| /* |
| * Copyright 2021 Alyssa Rosenzweig |
| * Copyright 2019 Collabora, Ltd. |
| * Copyright 2020 Igalia S.L. |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #include "agx_device.h" |
| #include <inttypes.h> |
| #include "clc/asahi_clc.h" |
| #include "drm-uapi/asahi_drm.h" |
| #include "util/bitscan.h" |
| #include "util/macros.h" |
| #include "util/ralloc.h" |
| #include "util/timespec.h" |
| #include "agx_abi.h" |
| #include "agx_bo.h" |
| #include "agx_compile.h" |
| #include "agx_device_virtio.h" |
| #include "agx_scratch.h" |
| #include "decode.h" |
| #include "glsl_types.h" |
| #include "layout.h" |
| #include "libagx_dgc.h" |
| #include "libagx_shaders.h" |
| |
| #include <fcntl.h> |
| #include <xf86drm.h> |
| #include "drm-uapi/dma-buf.h" |
| #include "util/blob.h" |
| #include "util/log.h" |
| #include "util/mesa-sha1.h" |
| #include "util/os_file.h" |
| #include "util/os_mman.h" |
| #include "util/os_time.h" |
| #include "util/simple_mtx.h" |
| #include "util/u_math.h" |
| #include "util/u_printf.h" |
| #include "git_sha1.h" |
| #include "nir_serialize.h" |
| #include "vdrm.h" |
| |
| static inline int |
| asahi_simple_ioctl(struct agx_device *dev, unsigned cmd, void *req) |
| { |
| if (dev->is_virtio) { |
| return agx_virtio_simple_ioctl(dev, cmd, req); |
| } else { |
| return drmIoctl(dev->fd, cmd, req); |
| } |
| } |
| |
| /* clang-format off */ |
| static const struct debug_named_value agx_debug_options[] = { |
| {"trace", AGX_DBG_TRACE, "Trace the command stream"}, |
| {"bodump", AGX_DBG_BODUMP, "Periodically dump live BOs"}, |
| {"no16", AGX_DBG_NO16, "Disable 16-bit support"}, |
| {"perf", AGX_DBG_PERF, "Print performance warnings"}, |
| #ifndef NDEBUG |
| {"dirty", AGX_DBG_DIRTY, "Disable dirty tracking"}, |
| #endif |
| {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"}, |
| {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"}, |
| {"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"}, |
| {"sync", AGX_DBG_SYNC, "Synchronously wait for all submissions"}, |
| {"stats", AGX_DBG_STATS, "Show command execution statistics"}, |
| {"resource", AGX_DBG_RESOURCE, "Log resource operations"}, |
| {"batch", AGX_DBG_BATCH, "Log batches"}, |
| {"nowc", AGX_DBG_NOWC, "Disable write-combining"}, |
| {"synctvb", AGX_DBG_SYNCTVB, "Synchronous TVB growth"}, |
| {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"}, |
| {"feedback", AGX_DBG_FEEDBACK, "Debug feedback loops"}, |
| {"nomsaa", AGX_DBG_NOMSAA, "Force disable MSAA"}, |
| {"noshadow", AGX_DBG_NOSHADOW, "Force disable resource shadowing"}, |
| {"scratch", AGX_DBG_SCRATCH, "Debug scratch memory usage"}, |
| {"1queue", AGX_DBG_1QUEUE, "Force usage of a single queue for multiple contexts"}, |
| {"nosoft", AGX_DBG_NOSOFT, "Disable soft fault optimizations"}, |
| {"nomerge", AGX_DBG_NOMERGE, "Disable control stream merging"}, |
| {"bodumpverbose", AGX_DBG_BODUMPVERBOSE, "Include extra info with dumps"}, |
| DEBUG_NAMED_VALUE_END |
| }; |
| /* clang-format on */ |
| |
| void |
| agx_bo_free(struct agx_device *dev, struct agx_bo *bo) |
| { |
| const uint64_t handle = bo->handle; |
| |
| if (bo->_map) |
| munmap(bo->_map, bo->size); |
| |
| /* Free the VA. No need to unmap the BO or unbind the VA, as the kernel will |
| * take care of that when we close it. |
| */ |
| agx_va_free(dev, bo->va, false); |
| |
| if (bo->prime_fd != -1) |
| close(bo->prime_fd); |
| |
| /* Reset the handle. This has to happen before the GEM close to avoid a race. |
| */ |
| memset(bo, 0, sizeof(*bo)); |
| __sync_synchronize(); |
| |
| struct drm_gem_close args = {.handle = handle}; |
| drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args); |
| } |
| |
| static int |
| agx_drm_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops, |
| uint32_t count) |
| { |
| struct drm_asahi_vm_bind vm_bind = { |
| .num_binds = count, |
| .vm_id = dev->vm_id, |
| .userptr = (uintptr_t)ops, |
| .stride = sizeof(*ops), |
| }; |
| |
| int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_VM_BIND, &vm_bind); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_VM_BIND failed\n"); |
| } |
| |
| return ret; |
| } |
| |
| /* |
| * Convenience helper to bind a single BO regardless of kernel module. |
| */ |
| int |
| agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, |
| size_t size_B, uint64_t offset_B, uint32_t flags) |
| { |
| assert((size_B % 16384) == 0 && "alignment required"); |
| assert((offset_B % 16384) == 0 && "alignment required"); |
| assert((addr % 16384) == 0 && "alignment required"); |
| |
| struct drm_asahi_gem_bind_op op = { |
| .flags = flags, |
| .handle = bo ? bo->uapi_handle : 0, |
| .offset = offset_B, |
| .range = size_B, |
| .addr = addr, |
| }; |
| |
| return dev->ops.bo_bind(dev, &op, 1); |
| } |
| |
| int |
| agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo, uint32_t *handle) |
| { |
| struct drm_asahi_gem_bind_object bind = { |
| .op = DRM_ASAHI_BIND_OBJECT_OP_BIND, |
| .flags = DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS, |
| .handle = bo->uapi_handle, |
| .range = bo->size, |
| }; |
| |
| int ret = dev->ops.bo_bind_object(dev, &bind); |
| *handle = bind.object_handle; |
| return ret; |
| } |
| |
| static struct agx_bo * |
| agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, |
| enum agx_bo_flags flags) |
| { |
| struct agx_bo *bo; |
| unsigned handle = 0; |
| |
| /* executable implies low va */ |
| assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA)); |
| |
| struct drm_asahi_gem_create gem_create = {.size = size}; |
| |
| if (flags & AGX_BO_WRITEBACK) |
| gem_create.flags |= DRM_ASAHI_GEM_WRITEBACK; |
| |
| if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) { |
| gem_create.flags |= DRM_ASAHI_GEM_VM_PRIVATE; |
| gem_create.vm_id = dev->vm_id; |
| } |
| |
| int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_CREATE, &gem_create); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_CREATE failed: %m\n"); |
| return NULL; |
| } |
| |
| handle = gem_create.handle; |
| |
| pthread_mutex_lock(&dev->bo_map_lock); |
| bo = agx_lookup_bo(dev, handle); |
| dev->max_handle = MAX2(dev->max_handle, handle); |
| pthread_mutex_unlock(&dev->bo_map_lock); |
| |
| /* Fresh handle */ |
| assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo))); |
| |
| bo->dev = dev; |
| bo->size = gem_create.size; |
| bo->align = align; |
| bo->flags = flags; |
| bo->handle = bo->uapi_handle = handle; |
| bo->prime_fd = -1; |
| |
| enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0; |
| bo->va = agx_va_alloc(dev, size, bo->align, va_flags, 0); |
| if (!bo->va) { |
| fprintf(stderr, "Failed to allocate BO VMA\n"); |
| agx_bo_free(dev, bo); |
| return NULL; |
| } |
| |
| uint32_t bind = DRM_ASAHI_BIND_READ; |
| if (!(flags & AGX_BO_READONLY)) { |
| bind |= DRM_ASAHI_BIND_WRITE; |
| } |
| |
| ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind); |
| if (ret) { |
| agx_bo_free(dev, bo); |
| return NULL; |
| } |
| |
| return bo; |
| } |
| |
| static void |
| agx_bo_mmap(struct agx_device *dev, struct agx_bo *bo, void *fixed_addr) |
| { |
| assert(bo->_map == NULL && "not double mapped"); |
| |
| struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = |
| bo->uapi_handle}; |
| int ret, flags; |
| |
| ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_MMAP_BO failed: %m\n"); |
| assert(0); |
| } |
| |
| flags = MAP_SHARED | (fixed_addr ? MAP_FIXED : 0); |
| bo->_map = os_mmap(fixed_addr, bo->size, PROT_READ | PROT_WRITE, flags, |
| dev->fd, gem_mmap_offset.offset); |
| if (bo->_map == MAP_FAILED) { |
| bo->_map = NULL; |
| fprintf(stderr, |
| "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n", |
| bo->_map, (long long)bo->size, dev->fd, |
| (long long)gem_mmap_offset.offset); |
| } |
| } |
| |
| struct agx_bo * |
| agx_bo_import(struct agx_device *dev, int fd) |
| { |
| struct agx_bo *bo; |
| ASSERTED int ret; |
| unsigned gem_handle; |
| |
| pthread_mutex_lock(&dev->bo_map_lock); |
| |
| ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle); |
| if (ret) { |
| fprintf(stderr, "import failed: Could not map fd %d to handle\n", fd); |
| pthread_mutex_unlock(&dev->bo_map_lock); |
| return NULL; |
| } |
| |
| bo = agx_lookup_bo(dev, gem_handle); |
| dev->max_handle = MAX2(dev->max_handle, gem_handle); |
| |
| if (!bo->size) { |
| bo->dev = dev; |
| bo->size = lseek(fd, 0, SEEK_END); |
| bo->align = AIL_PAGESIZE; |
| |
| /* Sometimes this can fail and return -1. size of -1 is not |
| * a nice thing for mmap to try mmap. Be more robust also |
| * for zero sized maps and fail nicely too |
| */ |
| if ((bo->size == 0) || (bo->size == (size_t)-1)) { |
| goto error; |
| } |
| if (bo->size & (AIL_PAGESIZE - 1)) { |
| fprintf( |
| stderr, |
| "import failed: BO is not a multiple of the page size (0x%llx bytes)\n", |
| (long long)bo->size); |
| goto error; |
| } |
| |
| bo->flags = AGX_BO_SHARED | AGX_BO_SHAREABLE; |
| bo->handle = gem_handle; |
| bo->prime_fd = os_dupfd_cloexec(fd); |
| bo->label = "Imported BO"; |
| assert(bo->prime_fd >= 0); |
| |
| p_atomic_set(&bo->refcnt, 1); |
| bo->va = agx_va_alloc(dev, bo->size, bo->align, 0, 0); |
| |
| if (!bo->va) { |
| fprintf( |
| stderr, |
| "import failed: Could not allocate from VMA heap (0x%llx bytes)\n", |
| (long long)bo->size); |
| abort(); |
| } |
| |
| if (dev->is_virtio) { |
| bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, bo->handle); |
| } else { |
| bo->uapi_handle = bo->handle; |
| } |
| |
| ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0, |
| DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE); |
| if (ret) { |
| fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n", |
| (long long)bo->va->addr); |
| abort(); |
| } |
| } else { |
| /* bo->refcnt == 0 can happen if the BO |
| * was being released but agx_bo_import() acquired the |
| * lock before agx_bo_unreference(). In that case, refcnt |
| * is 0 and we can't use agx_bo_reference() directly, we |
| * have to re-initialize the refcnt(). |
| * Note that agx_bo_unreference() checks |
| * refcnt value just after acquiring the lock to |
| * make sure the object is not freed if agx_bo_import() |
| * acquired it in the meantime. |
| */ |
| if (p_atomic_read(&bo->refcnt) == 0) |
| p_atomic_set(&bo->refcnt, 1); |
| else |
| agx_bo_reference(bo); |
| |
| /* If this bo came back to us via import, it had better |
| * been marked shared to begin with. |
| */ |
| assert(bo->flags & AGX_BO_SHAREABLE); |
| assert(bo->flags & AGX_BO_SHARED); |
| assert(bo->prime_fd != -1); |
| } |
| pthread_mutex_unlock(&dev->bo_map_lock); |
| |
| assert(bo->dev != NULL && "post-condition"); |
| |
| if (dev->debug & AGX_DBG_TRACE) { |
| agx_bo_map(bo); |
| agxdecode_track_alloc(dev->agxdecode, bo); |
| } |
| |
| return bo; |
| |
| error: |
| memset(bo, 0, sizeof(*bo)); |
| pthread_mutex_unlock(&dev->bo_map_lock); |
| return NULL; |
| } |
| |
| void |
| agx_bo_make_shared(struct agx_device *dev, struct agx_bo *bo) |
| { |
| assert(bo->flags & AGX_BO_SHAREABLE); |
| if (bo->flags & AGX_BO_SHARED) { |
| assert(bo->prime_fd >= 0); |
| return; |
| } |
| |
| bo->flags |= AGX_BO_SHARED; |
| assert(bo->prime_fd == -1); |
| |
| int ret = |
| drmPrimeHandleToFD(dev->fd, bo->handle, DRM_CLOEXEC, &bo->prime_fd); |
| assert(ret == 0); |
| assert(bo->prime_fd >= 0); |
| |
| /* If there is a pending writer to this BO, import it into the buffer |
| * for implicit sync. |
| */ |
| uint64_t writer = p_atomic_read_relaxed(&bo->writer); |
| if (writer) { |
| int out_sync_fd = -1; |
| int ret = drmSyncobjExportSyncFile(dev->fd, agx_bo_writer_syncobj(writer), |
| &out_sync_fd); |
| assert(ret >= 0); |
| assert(out_sync_fd >= 0); |
| |
| ret = agx_import_sync_file(dev, bo, out_sync_fd); |
| assert(ret >= 0); |
| close(out_sync_fd); |
| } |
| } |
| |
| int |
| agx_bo_export(struct agx_device *dev, struct agx_bo *bo) |
| { |
| agx_bo_make_shared(dev, bo); |
| |
| assert(bo->prime_fd >= 0); |
| return os_dupfd_cloexec(bo->prime_fd); |
| } |
| |
| static int |
| agx_bo_bind_object(struct agx_device *dev, |
| struct drm_asahi_gem_bind_object *bind) |
| { |
| int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, bind); |
| if (ret) { |
| fprintf(stderr, |
| "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n", |
| bind->handle); |
| } |
| |
| return ret; |
| } |
| |
| static int |
| agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle) |
| { |
| struct drm_asahi_gem_bind_object gem_bind = { |
| .op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND, |
| .object_handle = object_handle, |
| }; |
| |
| int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind); |
| if (ret) { |
| fprintf(stderr, |
| "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (object_handle=%d)\n", |
| object_handle); |
| } |
| |
| return ret; |
| } |
| |
| static ssize_t |
| agx_get_params(struct agx_device *dev, void *buf, size_t size) |
| { |
| struct drm_asahi_get_params get_param = { |
| .param_group = 0, |
| .pointer = (uint64_t)(uintptr_t)buf, |
| .size = size, |
| }; |
| |
| memset(buf, 0, size); |
| |
| int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GET_PARAMS, &get_param); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_GET_PARAMS failed: %m\n"); |
| return -EINVAL; |
| } |
| |
| return get_param.size; |
| } |
| |
| static int |
| agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit, |
| struct agx_submit_virt *virt) |
| { |
| return drmIoctl(dev->fd, DRM_IOCTL_ASAHI_SUBMIT, submit); |
| } |
| |
| const agx_device_ops_t agx_device_drm_ops = { |
| .bo_alloc = agx_bo_alloc, |
| .bo_bind = agx_drm_bo_bind, |
| .bo_mmap = agx_bo_mmap, |
| .get_params = agx_get_params, |
| .submit = agx_submit, |
| .bo_bind_object = agx_bo_bind_object, |
| .bo_unbind_object = agx_bo_unbind_object, |
| }; |
| |
| static uint64_t |
| gcd(uint64_t n, uint64_t m) |
| { |
| while (n != 0) { |
| uint64_t remainder = m % n; |
| m = n; |
| n = remainder; |
| } |
| |
| return m; |
| } |
| |
| static void |
| agx_init_timestamps(struct agx_device *dev) |
| { |
| uint64_t user_ts_gcd = |
| gcd(dev->params.command_timestamp_frequency_hz, NSEC_PER_SEC); |
| |
| dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd; |
| dev->user_timestamp_to_ns.den = |
| dev->params.command_timestamp_frequency_hz / user_ts_gcd; |
| } |
| |
| bool |
| agx_open_device(void *memctx, struct agx_device *dev) |
| { |
| dev->debug = |
| debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0); |
| |
| dev->ops = agx_device_drm_ops; |
| |
| ssize_t params_size = -1; |
| |
| /* DRM version check */ |
| { |
| drmVersionPtr version = drmGetVersion(dev->fd); |
| if (!version) { |
| fprintf(stderr, "cannot get version: %s", strerror(errno)); |
| return NULL; |
| } |
| |
| if (!strcmp(version->name, "asahi")) { |
| dev->is_virtio = false; |
| dev->ops = agx_device_drm_ops; |
| } else if (!strcmp(version->name, "virtio_gpu")) { |
| dev->is_virtio = true; |
| if (!agx_virtio_open_device(dev)) { |
| fprintf( |
| stderr, |
| "Error opening virtio-gpu device for Asahi native context\n"); |
| return false; |
| } |
| } else { |
| return false; |
| } |
| |
| drmFreeVersion(version); |
| } |
| |
| params_size = dev->ops.get_params(dev, &dev->params, sizeof(dev->params)); |
| if (params_size <= 0) { |
| assert(0); |
| return false; |
| } |
| assert(params_size >= sizeof(dev->params)); |
| |
| assert(dev->params.gpu_generation >= 13); |
| const char *variant = " Unknown"; |
| switch (dev->params.gpu_variant) { |
| case 'G': |
| variant = ""; |
| break; |
| case 'S': |
| variant = " Pro"; |
| break; |
| case 'C': |
| variant = " Max"; |
| break; |
| case 'D': |
| variant = " Ultra"; |
| break; |
| } |
| snprintf(dev->name, sizeof(dev->name), "Apple M%d%s (G%d%c %02X)", |
| dev->params.gpu_generation - 12, variant, |
| dev->params.gpu_generation, dev->params.gpu_variant, |
| dev->params.gpu_revision + 0xA0); |
| |
| /* We need a large chunk of VA space carved out for robustness. Hardware |
| * loads can shift an i32 by up to 2, for a total shift of 4. If the base |
| * address is zero, 36-bits is therefore enough to trap any zero-extended |
| * 32-bit index. For more generality we would need a larger carveout, but |
| * this is already optimal for VBOs. |
| * |
| * TODO: Maybe this should be on top instead? Might be ok. |
| */ |
| uint64_t reservation = (1ull << 36); |
| |
| /* Also reserve VA space for the printf buffer at a stable address, avoiding |
| * the need for relocs in precompiled shaders. |
| */ |
| assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS); |
| reservation += LIBAGX_PRINTF_BUFFER_SIZE; |
| |
| dev->guard_size = AIL_PAGESIZE; |
| // Put the USC heap at the bottom of the user address space, 4GiB aligned |
| dev->shader_base = |
| ALIGN_POT(MAX2(dev->params.vm_start, reservation), 0x100000000ull); |
| |
| if (dev->shader_base < reservation) { |
| /* Our robustness implementation requires the bottom unmapped */ |
| fprintf(stderr, "Unexpected address layout, can't cope\n"); |
| assert(0); |
| return false; |
| } |
| |
| uint64_t shader_size = 0x100000000ull; |
| // Put the user heap after the USC heap |
| uint64_t user_start = dev->shader_base + shader_size; |
| |
| assert(dev->shader_base >= dev->params.vm_start); |
| assert(user_start < dev->params.vm_end); |
| |
| dev->agxdecode = agxdecode_new_context(dev->shader_base); |
| |
| agx_init_timestamps(dev); |
| |
| util_sparse_array_init(&dev->bo_map, sizeof(struct agx_bo), 512); |
| pthread_mutex_init(&dev->bo_map_lock, NULL); |
| |
| simple_mtx_init(&dev->bo_cache.lock, mtx_plain); |
| list_inithead(&dev->bo_cache.lru); |
| |
| for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) |
| list_inithead(&dev->bo_cache.buckets[i]); |
| |
| // Put the kernel heap at the top of the address space. |
| // Give it 32GB of address space, should be more than enough for any |
| // reasonable use case. |
| uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30); |
| struct drm_asahi_vm_create vm_create = { |
| .kernel_start = dev->params.vm_end - kernel_size, |
| .kernel_end = dev->params.vm_end, |
| }; |
| |
| uint64_t user_size = vm_create.kernel_start - user_start; |
| |
| int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_VM_CREATE, &vm_create); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_VM_CREATE failed: %m\n"); |
| assert(0); |
| return false; |
| } |
| |
| /* Round the user VA window to powers-of-two... */ |
| user_start = util_next_power_of_two64(user_start); |
| user_size = util_next_power_of_two64(user_size + 1) >> 1; |
| |
| /* ...so when we cut user size in half to emulate sparse buffers... */ |
| user_size /= 2; |
| |
| /* ...or maybe in quarters if necessary to disambiguate */ |
| if (user_size == user_start) { |
| user_size /= 2; |
| } |
| |
| /* ...we can distinguish the top/bottom half by an address bit */ |
| dev->sparse_ro_offset = user_size; |
| assert((user_start & dev->sparse_ro_offset) == 0); |
| assert(((user_start + (user_size - 1)) & dev->sparse_ro_offset) == 0); |
| |
| simple_mtx_init(&dev->vma_lock, mtx_plain); |
| util_vma_heap_init(&dev->main_heap, user_start, user_size); |
| util_vma_heap_init(&dev->usc_heap, dev->shader_base, shader_size); |
| |
| dev->vm_id = vm_create.vm_id; |
| |
| glsl_type_singleton_init_or_ref(); |
| |
| if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) { |
| dev->libagx_programs = libagx_g13x; |
| } else { |
| dev->libagx_programs = libagx_g13g; |
| } |
| |
| if (dev->params.gpu_generation >= 14 && dev->params.num_clusters_total > 1) { |
| dev->chip = AGX_CHIP_G14X; |
| } else if (dev->params.gpu_generation >= 14) { |
| dev->chip = AGX_CHIP_G14G; |
| } else if (dev->params.gpu_generation >= 13 && |
| dev->params.num_clusters_total > 1) { |
| dev->chip = AGX_CHIP_G13X; |
| } else { |
| dev->chip = AGX_CHIP_G13G; |
| } |
| |
| /* Bind read-only zero page at 2^32. This is in our reservation, and can be |
| * addressed with only small integers in the low/high. That lets us do some |
| * robustness optimization even without soft fault. |
| */ |
| { |
| void *bo = agx_bo_create(dev, 16384, 0, 0, "Zero page"); |
| int ret = agx_bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0, |
| DRM_ASAHI_BIND_READ); |
| if (ret) { |
| fprintf(stderr, "Failed to bind zero page"); |
| return false; |
| } |
| |
| dev->zero_bo = bo; |
| } |
| |
| { |
| void *bo = agx_bo_create(dev, AIL_PAGESIZE, 0, 0, "Scratch page"); |
| int ret = agx_bo_bind(dev, bo, AGX_SCRATCH_PAGE_ADDRESS, AIL_PAGESIZE, 0, |
| DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE); |
| if (ret) { |
| fprintf(stderr, "Failed to bind zero page"); |
| return false; |
| } |
| |
| dev->scratch_bo = bo; |
| |
| /* The contents of the scratch page are undefined, but making them nonzero |
| * helps fuzz for bugs where we incorrectly read from the write section. |
| */ |
| memset(agx_bo_map(dev->scratch_bo), 0xCA, AIL_PAGESIZE); |
| } |
| |
| void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK, |
| "Printf/abort"); |
| |
| ret = agx_bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS, |
| LIBAGX_PRINTF_BUFFER_SIZE, 0, |
| DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE); |
| if (ret) { |
| fprintf(stderr, "Failed to bind printf buffer"); |
| return false; |
| } |
| |
| u_printf_init(&dev->printf, bo, agx_bo_map(bo)); |
| return true; |
| } |
| |
| void |
| agx_close_device(struct agx_device *dev) |
| { |
| agx_bo_unreference(dev, dev->printf.bo); |
| agx_bo_unreference(dev, dev->zero_bo); |
| agx_bo_unreference(dev, dev->scratch_bo); |
| u_printf_destroy(&dev->printf); |
| agx_bo_cache_evict_all(dev); |
| util_sparse_array_finish(&dev->bo_map); |
| agxdecode_destroy_context(dev->agxdecode); |
| |
| util_vma_heap_finish(&dev->main_heap); |
| util_vma_heap_finish(&dev->usc_heap); |
| glsl_type_singleton_decref(); |
| |
| close(dev->fd); |
| } |
| |
| uint32_t |
| agx_create_command_queue(struct agx_device *dev, |
| enum drm_asahi_priority priority) |
| { |
| |
| if (dev->debug & AGX_DBG_1QUEUE) { |
| // Abuse this lock for this, it's debug only anyway |
| simple_mtx_lock(&dev->vma_lock); |
| if (dev->queue_id) { |
| simple_mtx_unlock(&dev->vma_lock); |
| return dev->queue_id; |
| } |
| } |
| |
| struct drm_asahi_queue_create queue_create = { |
| .vm_id = dev->vm_id, |
| .priority = priority, |
| .usc_exec_base = dev->shader_base, |
| }; |
| |
| int ret = |
| asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_CREATE, &queue_create); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_QUEUE_CREATE failed: %m\n"); |
| assert(0); |
| } |
| |
| if (dev->debug & AGX_DBG_1QUEUE) { |
| dev->queue_id = queue_create.queue_id; |
| simple_mtx_unlock(&dev->vma_lock); |
| } |
| |
| return queue_create.queue_id; |
| } |
| |
| int |
| agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id) |
| { |
| if (dev->debug & AGX_DBG_1QUEUE) |
| return 0; |
| |
| struct drm_asahi_queue_destroy queue_destroy = { |
| .queue_id = queue_id, |
| }; |
| |
| return asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_DESTROY, |
| &queue_destroy); |
| } |
| |
| int |
| agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd) |
| { |
| struct dma_buf_import_sync_file import_sync_file_ioctl = { |
| .flags = DMA_BUF_SYNC_WRITE, |
| .fd = fd, |
| }; |
| |
| assert(fd >= 0); |
| assert(bo->prime_fd != -1); |
| |
| int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE, |
| &import_sync_file_ioctl); |
| assert(ret >= 0); |
| |
| return ret; |
| } |
| |
| int |
| agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo) |
| { |
| struct dma_buf_export_sync_file export_sync_file_ioctl = { |
| .flags = DMA_BUF_SYNC_RW, |
| .fd = -1, |
| }; |
| |
| assert(bo->prime_fd != -1); |
| |
| int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, |
| &export_sync_file_ioctl); |
| assert(ret >= 0); |
| assert(export_sync_file_ioctl.fd >= 0); |
| |
| return ret >= 0 ? export_sync_file_ioctl.fd : ret; |
| } |
| |
| void |
| agx_debug_fault(struct agx_device *dev, uint64_t addr) |
| { |
| pthread_mutex_lock(&dev->bo_map_lock); |
| |
| struct agx_bo *best = NULL; |
| |
| for (uint32_t handle = 0; handle < dev->max_handle; handle++) { |
| struct agx_bo *bo = agx_lookup_bo(dev, handle); |
| if (!bo->va) |
| continue; |
| |
| uint64_t bo_addr = bo->va->addr; |
| if (bo->flags & AGX_BO_LOW_VA) |
| bo_addr += dev->shader_base; |
| |
| if (!bo->size || bo_addr > addr) |
| continue; |
| |
| if (!best || bo_addr > best->va->addr) |
| best = bo; |
| } |
| |
| if (!best) { |
| mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr); |
| } else { |
| uint64_t start = best->va->addr; |
| uint64_t end = best->va->addr + best->size; |
| if (addr > (end + 1024 * 1024 * 1024)) { |
| /* 1GiB max as a sanity check */ |
| mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr); |
| } else if (addr > end) { |
| mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64 |
| " bytes beyond an object at 0x%" PRIx64 "..0x%" PRIx64 |
| " (%s)\n", |
| addr, addr - end, start, end - 1, best->label); |
| } else { |
| mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64 |
| " bytes into an object at 0x%" PRIx64 "..0x%" PRIx64 |
| " (%s)\n", |
| addr, addr - start, start, end - 1, best->label); |
| } |
| } |
| |
| pthread_mutex_unlock(&dev->bo_map_lock); |
| } |
| |
| uint64_t |
| agx_get_gpu_timestamp(struct agx_device *dev) |
| { |
| struct drm_asahi_get_time get_time = {.flags = 0}; |
| |
| int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time); |
| if (ret) { |
| fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n"); |
| } |
| |
| return get_time.gpu_timestamp; |
| } |
| |
| /* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */ |
| #define UUID_SIZE 16 |
| |
| void |
| agx_get_device_uuid(const struct agx_device *dev, void *uuid) |
| { |
| struct mesa_sha1 sha1_ctx; |
| _mesa_sha1_init(&sha1_ctx); |
| |
| /* The device UUID uniquely identifies the given device within the machine. |
| * Since we never have more than one device, this doesn't need to be a real |
| * UUID, so we use SHA1("agx" + gpu_generation + gpu_variant + gpu_revision). |
| */ |
| static const char *device_name = "agx"; |
| _mesa_sha1_update(&sha1_ctx, device_name, strlen(device_name)); |
| |
| _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_generation, |
| sizeof(dev->params.gpu_generation)); |
| _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_variant, |
| sizeof(dev->params.gpu_variant)); |
| _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_revision, |
| sizeof(dev->params.gpu_revision)); |
| |
| uint8_t sha1[SHA1_DIGEST_LENGTH]; |
| _mesa_sha1_final(&sha1_ctx, sha1); |
| |
| assert(SHA1_DIGEST_LENGTH >= UUID_SIZE); |
| memcpy(uuid, sha1, UUID_SIZE); |
| } |
| |
| void |
| agx_get_driver_uuid(void *uuid) |
| { |
| const char *driver_id = PACKAGE_VERSION MESA_GIT_SHA1; |
| |
| /* The driver UUID is used for determining sharability of images and memory |
| * between two Vulkan instances in separate processes, but also to |
| * determining memory objects and sharability between Vulkan and OpenGL |
| * driver. People who want to share memory need to also check the device |
| * UUID. |
| */ |
| struct mesa_sha1 sha1_ctx; |
| _mesa_sha1_init(&sha1_ctx); |
| |
| _mesa_sha1_update(&sha1_ctx, driver_id, strlen(driver_id)); |
| |
| uint8_t sha1[SHA1_DIGEST_LENGTH]; |
| _mesa_sha1_final(&sha1_ctx, sha1); |
| |
| assert(SHA1_DIGEST_LENGTH >= UUID_SIZE); |
| memcpy(uuid, sha1, UUID_SIZE); |
| } |
| |
| unsigned |
| agx_get_num_cores(const struct agx_device *dev) |
| { |
| unsigned n = 0; |
| |
| for (unsigned cl = 0; cl < dev->params.num_clusters_total; cl++) { |
| n += util_bitcount(dev->params.core_masks[cl]); |
| } |
| |
| return n; |
| } |
| |
| struct agx_device_key |
| agx_gather_device_key(struct agx_device *dev) |
| { |
| bool g13x_coh = (dev->params.gpu_generation == 13 && |
| dev->params.num_clusters_total > 1) || |
| dev->params.num_dies > 1; |
| |
| return (struct agx_device_key){ |
| .needs_g13x_coherency = u_tristate_make(g13x_coh), |
| .soft_fault = agx_has_soft_fault(dev), |
| }; |
| } |