src/intel/vulkan/anv_device.c - third_party/mesa - Git at Google

 /*
  * Copyright © 2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #include <assert.h>
 #include <inttypes.h>
 #include <stdbool.h>
 #include <fcntl.h>
 #include "drm-uapi/drm_fourcc.h"
 #include "drm-uapi/drm.h"
 #include <xf86drm.h>

 #include "anv_private.h"
 #include "anv_measure.h"
 #include "anv_slab_bo.h"
 #include "util/u_debug.h"
 #include "util/os_file.h"
 #include "util/os_misc.h"
 #include "util/u_atomic.h"
 #include "util/u_string.h"
 #include "vk_common_entrypoints.h"
 #include "vk_util.h"
 #include "vk_deferred_operation.h"
 #include "vk_drm_syncobj.h"
 #include "common/intel_aux_map.h"
 #include "common/intel_common.h"
 #include "common/intel_debug_identifier.h"

 #include "i915/anv_device.h"
 #include "xe/anv_device.h"

 #include "genxml/gen70_pack.h"
 #include "genxml/genX_bits.h"

 const struct gfx8_border_color anv_default_border_colors[] = {
    [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
    [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
    [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
    [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
    [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
    [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
 };

 static void
 anv_device_init_border_colors(struct anv_device *device)
 {
    device->border_colors =
       anv_state_pool_emit_data(&device->dynamic_state_pool,
                                sizeof(anv_default_border_colors),
                                64, anv_default_border_colors);
 }

 static VkResult
 anv_device_init_trivial_batch(struct anv_device *device)
 {
    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
                                          ANV_BO_ALLOC_BATCH_BUFFER_INTERNAL_FLAGS,
                                          0 /* explicit_address */,
                                          &device->trivial_batch_bo);
    ANV_DMR_BO_ALLOC(&device->vk.base, device->trivial_batch_bo, result);
    if (result != VK_SUCCESS)
       return result;

    struct anv_batch batch = {
       .start = device->trivial_batch_bo->map,
       .next = device->trivial_batch_bo->map,
       .end = device->trivial_batch_bo->map + 4096,
    };

    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);

    return VK_SUCCESS;
 }

 static bool
 get_bo_from_pool(struct intel_batch_decode_bo *ret,
                  struct anv_block_pool *pool,
                  uint64_t address)
 {
    anv_block_pool_foreach_bo(bo, pool) {
       uint64_t bo_address = intel_48b_address(bo->offset);
       if (address >= bo_address && address < (bo_address + bo->size)) {
          *ret = (struct intel_batch_decode_bo) {
             .addr = bo_address,
             .size = bo->size,
             .map = bo->map,
          };
          return true;
       }
    }
    return false;
 }

 /* Finding a buffer for batch decoding */
 static struct intel_batch_decode_bo
 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
 {
    struct anv_device *device = v_batch;
    struct intel_batch_decode_bo ret_bo = {};

    assert(ppgtt);

    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
       return ret_bo;
    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
       return ret_bo;
    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
       return ret_bo;
    if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
       return ret_bo;
    if (device->physical->indirect_descriptors &&
        get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
       return ret_bo;
    if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
       return ret_bo;
    if (device->physical->indirect_descriptors &&
        get_bo_from_pool(&ret_bo, &device->indirect_push_descriptor_pool.block_pool, address))
       return ret_bo;
    if (device->info->has_aux_map &&
        get_bo_from_pool(&ret_bo, &device->aux_tt_pool.block_pool, address))
       return ret_bo;

    if (!device->cmd_buffer_being_decoded)
       return (struct intel_batch_decode_bo) { };

    struct anv_batch_bo **bbo;
    u_vector_foreach(bbo, &device->cmd_buffer_being_decoded->seen_bbos) {
       /* The decoder zeroes out the top 16 bits, so we need to as well */
       uint64_t bo_address = (*bbo)->bo->offset & (~0ull >> 16);

       if (address >= bo_address && address < bo_address + (*bbo)->bo->size) {
          return (struct intel_batch_decode_bo) {
             .addr = bo_address,
             .size = (*bbo)->bo->size,
             .map = (*bbo)->bo->map,
          };
       }

       uint32_t dep_words = (*bbo)->relocs.dep_words;
       BITSET_WORD *deps = (*bbo)->relocs.deps;
       for (uint32_t w = 0; w < dep_words; w++) {
          BITSET_WORD mask = deps[w];
          while (mask) {
             int i = u_bit_scan(&mask);
             uint32_t gem_handle = w * BITSET_WORDBITS + i;
             struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
             assert(bo->refcount > 0);
             bo_address = bo->offset & (~0ull >> 16);
             if (address >= bo_address && address < bo_address + bo->size) {
                return (struct intel_batch_decode_bo) {
                   .addr = bo_address,
                   .size = bo->size,
                   .map = bo->map,
                };
             }
          }
       }
    }

    return (struct intel_batch_decode_bo) { };
 }

 struct intel_aux_map_buffer {
    struct intel_buffer base;
    struct anv_state state;
 };

 static struct intel_buffer *
 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
 {
    struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
    if (!buf)
       return NULL;

    struct anv_device *device = (struct anv_device*)driver_ctx;

    struct anv_state_pool *pool = &device->aux_tt_pool;
    buf->state = anv_state_pool_alloc(pool, size, size);

    buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
    buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
    buf->base.map = buf->state.map;
    buf->base.driver_bo = &buf->state;
    return &buf->base;
 }

 static void
 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
 {
    struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
    struct anv_device *device = (struct anv_device*)driver_ctx;
    struct anv_state_pool *pool = &device->aux_tt_pool;
    anv_state_pool_free(pool, buf->state);
    free(buf);
 }

 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
    .alloc = intel_aux_map_buffer_alloc,
    .free = intel_aux_map_buffer_free,
 };

 static VkResult
 anv_device_setup_context_or_vm(struct anv_device *device,
                                const VkDeviceCreateInfo *pCreateInfo,
                                const uint32_t num_queues)
 {
    switch (device->info->kmd_type) {
    case INTEL_KMD_TYPE_I915:
       return anv_i915_device_setup_context(device, pCreateInfo, num_queues);
    case INTEL_KMD_TYPE_XE:
       return anv_xe_device_setup_vm(device);
    default:
       unreachable("Missing");
       return VK_ERROR_UNKNOWN;
    }
 }

 static bool
 anv_device_destroy_context_or_vm(struct anv_device *device)
 {
    switch (device->info->kmd_type) {
    case INTEL_KMD_TYPE_I915:
       if (device->physical->has_vm_control)
          return anv_i915_device_destroy_vm(device);
       else
          return intel_gem_destroy_context(device->fd, device->context_id);
    case INTEL_KMD_TYPE_XE:
       return anv_xe_device_destroy_vm(device);
    default:
       unreachable("Missing");
       return false;
    }
 }

 static VkResult
 anv_device_init_trtt(struct anv_device *device)
 {
    if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
        !device->vk.enabled_features.sparseBinding)
       return VK_SUCCESS;

    struct anv_trtt *trtt = &device->trtt;

    VkResult result =
       vk_sync_create(&device->vk,
                      &device->physical->sync_syncobj_type,
                      VK_SYNC_IS_TIMELINE,
                      0 /* initial_value */,
                      &trtt->timeline);
    if (result != VK_SUCCESS)
       return result;

    simple_mtx_init(&trtt->mutex, mtx_plain);

    list_inithead(&trtt->in_flight_batches);

    return VK_SUCCESS;
 }

 static void
 anv_device_finish_trtt(struct anv_device *device)
 {
    if (device->physical->sparse_type != ANV_SPARSE_TYPE_TRTT ||
        !device->vk.enabled_features.sparseBinding)
       return;

    struct anv_trtt *trtt = &device->trtt;

    anv_sparse_trtt_garbage_collect_batches(device, true);

    vk_sync_destroy(&device->vk, trtt->timeline);

    simple_mtx_destroy(&trtt->mutex);

    vk_free(&device->vk.alloc, trtt->l3_mirror);
    vk_free(&device->vk.alloc, trtt->l2_mirror);

    for (int i = 0; i < trtt->num_page_table_bos; i++) {
       struct anv_bo *bo = trtt->page_table_bos[i];
       ANV_DMR_BO_FREE(&device->vk.base, bo);
       anv_device_release_bo(device, trtt->page_table_bos[i]);
    }

    vk_free(&device->vk.alloc, trtt->page_table_bos);
 }

 VkResult anv_CreateDevice(
     VkPhysicalDevice                            physicalDevice,
     const VkDeviceCreateInfo*                   pCreateInfo,
     const VkAllocationCallbacks*                pAllocator,
     VkDevice*                                   pDevice)
 {
    anv_wait_for_attach();
    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
    VkResult result;
    struct anv_device *device;
    bool device_has_compute_queue = false;

    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);

    /* Check requested queues and fail if we are requested to create any
     * queues with flags we don't support.
     */
    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
       if (pCreateInfo->pQueueCreateInfos[i].flags & ~VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT)
          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);

       const struct anv_queue_family *family =
          &physical_device->queue.families[pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex];
       device_has_compute_queue |= family->engine_class == INTEL_ENGINE_CLASS_COMPUTE;
    }

    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
                        sizeof(*device), 8,
                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
    if (!device)
       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);

    struct vk_device_dispatch_table dispatch_table;

    bool override_initial_entrypoints = true;
    if (physical_device->instance->vk.app_info.app_name &&
        !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
                                                 &anv_hitman3_device_entrypoints,
                                                 true);
       override_initial_entrypoints = false;
    }
    if (physical_device->info.ver < 12 &&
        physical_device->instance->vk.app_info.app_name &&
        !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
                                                 &anv_doom64_device_entrypoints,
                                                 true);
       override_initial_entrypoints = false;
    }
 #if DETECT_OS_ANDROID
    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
                                              &anv_android_device_entrypoints,
                                              true);
    override_initial_entrypoints = false;
 #endif
    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
       vk_device_dispatch_table_from_entrypoints(&dispatch_table,
                                                 &anv_rmv_device_entrypoints,
                                                 true);
       override_initial_entrypoints = false;
    }
    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
       anv_genX(&physical_device->info, device_entrypoints),
       override_initial_entrypoints);
    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
       &anv_device_entrypoints, false);
    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
       &wsi_device_entrypoints, false);


    result = vk_device_init(&device->vk, &physical_device->vk,
                            &dispatch_table, pCreateInfo, pAllocator);
    if (result != VK_SUCCESS)
       goto fail_alloc;

    if (INTEL_DEBUG(DEBUG_BATCH) || INTEL_DEBUG(DEBUG_BATCH_STATS)) {
       for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
          struct intel_batch_decode_ctx *decoder = &device->decoder[i];

          const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;

          intel_batch_decode_ctx_init_brw(decoder,
                                          &physical_device->compiler->isa,
                                          &physical_device->info,
                                          stderr, decode_flags, NULL,
                                          decode_get_bo, NULL, device);
          intel_batch_stats_reset(decoder);

          decoder->engine = physical_device->queue.families[i].engine_class;
          decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
          decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
          decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
       }
    }

    anv_device_set_physical(device, physical_device);
    device->kmd_backend = anv_kmd_backend_get(device->info->kmd_type);

    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
    if (device->fd == -1) {
       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
       goto fail_device;
    }

    switch (device->info->kmd_type) {
    case INTEL_KMD_TYPE_I915:
       device->vk.check_status = anv_i915_device_check_status;
       break;
    case INTEL_KMD_TYPE_XE:
       device->vk.check_status = anv_xe_device_check_status;
       break;
    default:
       unreachable("Missing");
    }

    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
    if (physical_device->info.kmd_type == INTEL_KMD_TYPE_I915)
       device->vk.create_sync_for_memory = anv_create_sync_for_memory;
    vk_device_set_drm_fd(&device->vk, device->fd);

    uint32_t num_queues = 0;
    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;

    result = anv_device_setup_context_or_vm(device, pCreateInfo, num_queues);
    if (result != VK_SUCCESS)
       goto fail_fd;

    device->queues =
       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
    if (device->queues == NULL) {
       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
       goto fail_context_id;
    }

    if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
       goto fail_queues_alloc;
    }

    /* keep the page with address zero out of the allocator */
    util_vma_heap_init(&device->vma_lo,
                       device->physical->va.low_heap.addr,
                       device->physical->va.low_heap.size);

    util_vma_heap_init(&device->vma_hi,
                       device->physical->va.high_heap.addr,
                       device->physical->va.high_heap.size);

    if (device->physical->indirect_descriptors) {
       util_vma_heap_init(&device->vma_desc,
                          device->physical->va.indirect_descriptor_pool.addr,
                          device->physical->va.indirect_descriptor_pool.size);
    } else {
       util_vma_heap_init(&device->vma_desc,
                          device->physical->va.bindless_surface_state_pool.addr,
                          device->physical->va.bindless_surface_state_pool.size);
    }

    /* Always initialized because the the memory types point to this and they
     * are on the physical device.
     */
    util_vma_heap_init(&device->vma_dynamic_visible,
                       device->physical->va.dynamic_visible_pool.addr,
                       device->physical->va.dynamic_visible_pool.size);
    util_vma_heap_init(&device->vma_trtt,
                       device->physical->va.trtt.addr,
                       device->physical->va.trtt.size);

    list_inithead(&device->memory_objects);
    list_inithead(&device->image_private_objects);
    list_inithead(&device->bvh_dumps);

    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
       goto fail_vmas;
    }

    pthread_condattr_t condattr;
    if (pthread_condattr_init(&condattr) != 0) {
       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
       goto fail_mutex;
    }
    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
       pthread_condattr_destroy(&condattr);
       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
       goto fail_mutex;
    }
    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
       pthread_condattr_destroy(&condattr);
       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
       goto fail_mutex;
    }
    pthread_condattr_destroy(&condattr);

    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
       anv_memory_trace_init(device);

    result = anv_bo_cache_init(&device->bo_cache, device);
    if (result != VK_SUCCESS)
       goto fail_queue_cond;

    if (!anv_slab_bo_init(device))
       goto fail_cache;

    anv_bo_pool_init(&device->batch_bo_pool, device, "batch",
                     ANV_BO_ALLOC_BATCH_BUFFER_FLAGS);
    if (device->vk.enabled_extensions.KHR_acceleration_structure) {
       anv_bo_pool_init(&device->bvh_bo_pool, device, "bvh build",
                        0 /* alloc_flags */);
    }

    /* Because scratch is also relative to General State Base Address, we leave
     * the base address 0 and start the pool memory at an offset.  This way we
     * get the correct offsets in the anv_states that get allocated from it.
     */
    result = anv_state_pool_init(&device->general_state_pool, device,
                                 &(struct anv_state_pool_params) {
                                    .name         = "general pool",
                                    .base_address = 0,
                                    .start_offset = device->physical->va.general_state_pool.addr,
                                    .block_size   = 16384,
                                    .max_size     = device->physical->va.general_state_pool.size
                                 });
    if (result != VK_SUCCESS)
       goto fail_batch_bo_pool;

    result = anv_state_pool_init(&device->dynamic_state_pool, device,
                                 &(struct anv_state_pool_params) {
                                    .name         = "dynamic pool",
                                    .base_address = device->physical->va.dynamic_state_pool.addr,
                                    .block_size   = 16384,
                                    .max_size     = device->physical->va.dynamic_state_pool.size,
                                 });
    if (result != VK_SUCCESS)
       goto fail_general_state_pool;

    /* The border color pointer is limited to 24 bits, so we need to make
     * sure that any such color used at any point in the program doesn't
     * exceed that limit.
     * We achieve that by reserving all the custom border colors we support
     * right off the bat, so they are close to the base address.
     */
    result = anv_state_reserved_array_pool_init(&device->custom_border_colors,
                                                &device->dynamic_state_pool,
                                                MAX_CUSTOM_BORDER_COLORS,
                                                sizeof(struct gfx8_border_color), 64);
    if (result != VK_SUCCESS)
       goto fail_dynamic_state_pool;

    result = anv_state_pool_init(&device->instruction_state_pool, device,
                                 &(struct anv_state_pool_params) {
                                    .name         = "instruction pool",
                                    .base_address = device->physical->va.instruction_state_pool.addr,
                                    .block_size   = 16384,
                                    .max_size     = device->physical->va.instruction_state_pool.size,
                                 });
    if (result != VK_SUCCESS)
       goto fail_custom_border_color_pool;

    if (device->info->verx10 >= 125) {
       /* Put the scratch surface states at the beginning of the internal
        * surface state pool.
        */
       result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "scratch surface state pool",
                                       .base_address = device->physical->va.scratch_surface_state_pool.addr,
                                       .block_size   = 4096,
                                       .max_size     = device->physical->va.scratch_surface_state_pool.size,
                                    });
       if (result != VK_SUCCESS)
          goto fail_instruction_state_pool;

       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "internal surface state pool",
                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
                                       .start_offset = device->physical->va.scratch_surface_state_pool.size,
                                       .block_size   = 4096,
                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
                                    });
    } else {
       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "internal surface state pool",
                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
                                       .block_size   = 4096,
                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
                                    });
    }
    if (result != VK_SUCCESS)
       goto fail_scratch_surface_state_pool;

    if (device->physical->indirect_descriptors) {
       result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "bindless surface state pool",
                                       .base_address = device->physical->va.bindless_surface_state_pool.addr,
                                       .block_size   = 4096,
                                       .max_size     = device->physical->va.bindless_surface_state_pool.size,
                                    });
       if (result != VK_SUCCESS)
          goto fail_internal_surface_state_pool;
    }

    if (device->info->verx10 >= 125) {
       /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
        * table its own base address separately from surface state base.
        */
       result = anv_state_pool_init(&device->binding_table_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "binding table pool",
                                       .base_address = device->physical->va.binding_table_pool.addr,
                                       .block_size   = BINDING_TABLE_POOL_BLOCK_SIZE,
                                       .max_size     = device->physical->va.binding_table_pool.size,
                                    });
    } else {
       /* The binding table should be in front of the surface states in virtual
        * address space so that all surface states can be express as relative
        * offsets from the binding table location.
        */
       assert(device->physical->va.binding_table_pool.addr <
              device->physical->va.internal_surface_state_pool.addr);
       int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
                                (int64_t)device->physical->va.internal_surface_state_pool.addr;
       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
       result = anv_state_pool_init(&device->binding_table_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "binding table pool",
                                       .base_address = device->physical->va.internal_surface_state_pool.addr,
                                       .start_offset = bt_pool_offset,
                                       .block_size   = BINDING_TABLE_POOL_BLOCK_SIZE,
                                       .max_size     = device->physical->va.internal_surface_state_pool.size,
                                    });
    }
    if (result != VK_SUCCESS)
       goto fail_bindless_surface_state_pool;

    if (device->physical->indirect_descriptors) {
       result = anv_state_pool_init(&device->indirect_push_descriptor_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "indirect push descriptor pool",
                                       .base_address = device->physical->va.indirect_push_descriptor_pool.addr,
                                       .block_size   = 4096,
                                       .max_size     = device->physical->va.indirect_push_descriptor_pool.size,
                                    });
       if (result != VK_SUCCESS)
          goto fail_binding_table_pool;
    }

    if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
        device->info->verx10 >= 125) {
       /* On Gfx12.5+ because of the bindless stages (Mesh, Task, RT), the only
        * way we can wire push descriptors is through the bindless heap. This
        * state pool is a 1Gb carve out of the 4Gb HW heap.
        */
       result = anv_state_pool_init(&device->push_descriptor_buffer_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "push descriptor buffer state pool",
                                       .base_address = device->physical->va.push_descriptor_buffer_pool.addr,
                                       .block_size   = 4096,
                                       .max_size     = device->physical->va.push_descriptor_buffer_pool.size,
                                    });
       if (result != VK_SUCCESS)
          goto fail_indirect_push_descriptor_pool;
    }

    if (device->info->has_aux_map) {
       result = anv_state_pool_init(&device->aux_tt_pool, device,
                                    &(struct anv_state_pool_params) {
                                       .name         = "aux-tt pool",
                                       .base_address = device->physical->va.aux_tt_pool.addr,
                                       .block_size   = 16384,
                                       .max_size     = device->physical->va.aux_tt_pool.size,
                                    });
       if (result != VK_SUCCESS)
          goto fail_push_descriptor_buffer_pool;

       device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
                                                &physical_device->info);
       if (!device->aux_map_ctx)
          goto fail_aux_tt_pool;
    }

    result = anv_device_alloc_bo(device, "workaround", 8192,
                                 ANV_BO_ALLOC_CAPTURE |
                                 ANV_BO_ALLOC_HOST_COHERENT |
                                 ANV_BO_ALLOC_MAPPED |
                                 ANV_BO_ALLOC_INTERNAL,
                                 0 /* explicit_address */,
                                 &device->workaround_bo);
    ANV_DMR_BO_ALLOC(&device->vk.base, device->workaround_bo, result);
    if (result != VK_SUCCESS)
       goto fail_surface_aux_map_pool;

    if (intel_needs_workaround(device->info, 14019708328)) {
       result = anv_device_alloc_bo(device, "dummy_aux", 4096,
                                    0 /* alloc_flags */,
                                    0 /* explicit_address */,
                                    &device->dummy_aux_bo);
       ANV_DMR_BO_ALLOC(&device->vk.base, device->dummy_aux_bo, result);
       if (result != VK_SUCCESS)
          goto fail_alloc_device_bo;

       device->isl_dev.dummy_aux_address = device->dummy_aux_bo->offset;
    }

    /* Programming note from MI_MEM_FENCE specification:
     *
     *    Software must ensure STATE_SYSTEM_MEM_FENCE_ADDRESS command is
     *    programmed prior to programming this command.
     *
     * HAS 1607240579 then provides the size information: 4K
     */
    if (device->info->verx10 >= 200) {
       result = anv_device_alloc_bo(device, "mem_fence", 4096,
                                    ANV_BO_ALLOC_NO_LOCAL_MEM, 0,
                                    &device->mem_fence_bo);
       ANV_DMR_BO_ALLOC(&device->vk.base, device->mem_fence_bo, result);
       if (result != VK_SUCCESS)
          goto fail_alloc_device_bo;
    }

    struct anv_address wa_addr = (struct anv_address) {
       .bo = device->workaround_bo,
    };

    wa_addr = anv_address_add_aligned(wa_addr,
                                      intel_debug_write_identifiers(
                                         device->workaround_bo->map,
                                         device->workaround_bo->size,
                                         "Anv"), 32);

    device->rt_uuid_addr = wa_addr;
    memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
           physical_device->rt_uuid,
           sizeof(physical_device->rt_uuid));

    /* Make sure the workaround address is the last one in the workaround BO,
     * so that writes never overwrite other bits of data stored in the
     * workaround BO.
     */
    wa_addr = anv_address_add_aligned(wa_addr,
                                      sizeof(physical_device->rt_uuid), 64);
    device->workaround_address = wa_addr;

    /* Make sure we don't over the allocated BO. */
    assert(device->workaround_address.offset < device->workaround_bo->size);
    /* We also need 64B (maximum GRF size) from the workaround address (see
     * TBIMR workaround)
     */
    assert((device->workaround_bo->size -
            device->workaround_address.offset) >= 64);

    device->workarounds.doom64_images = NULL;


    device->debug_frame_desc =
       intel_debug_get_identifier_block(device->workaround_bo->map,
                                        device->workaround_bo->size,
                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);

    if (device->vk.enabled_extensions.KHR_ray_query) {
       uint32_t ray_queries_size =
          align(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);

       result = anv_device_alloc_bo(device, "ray queries",
                                    ray_queries_size,
                                    ANV_BO_ALLOC_INTERNAL,
                                    0 /* explicit_address */,
                                    &device->ray_query_bo[0]);
       ANV_DMR_BO_ALLOC(&device->vk.base, device->ray_query_bo[0], result);
       if (result != VK_SUCCESS)
          goto fail_alloc_device_bo;

       /* We need a separate ray query bo for CCS engine with Wa_14022863161. */
       if (intel_needs_workaround(device->isl_dev.info, 14022863161) &&
           device_has_compute_queue) {
          result = anv_device_alloc_bo(device, "ray queries",
                                       ray_queries_size,
                                       ANV_BO_ALLOC_INTERNAL,
                                       0 /* explicit_address */,
                                       &device->ray_query_bo[1]);
          ANV_DMR_BO_ALLOC(&device->vk.base, device->ray_query_bo[1], result);
          if (result != VK_SUCCESS)
             goto fail_ray_query_bo;
       }
    }

    result = anv_device_init_trivial_batch(device);
    if (result != VK_SUCCESS)
       goto fail_ray_query_bo;

    /* Emit the CPS states before running the initialization batch as those
     * structures are referenced.
     */
    if (device->info->ver >= 12 && device->info->ver < 30) {
       uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */

       if (device->info->has_coarse_pixel_primitive_and_cb)
          n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */

       n_cps_states += 1; /* Disable CPS */

        /* Each of the combinaison must be replicated on all viewports */
       n_cps_states *= MAX_VIEWPORTS;

       device->cps_states =
          anv_state_pool_alloc(&device->dynamic_state_pool,
                               n_cps_states * CPS_STATE_length(device->info) * 4,
                               32);
       if (device->cps_states.map == NULL)
          goto fail_trivial_batch;

       anv_genX(device->info, init_cps_device_state)(device);
    }

    if (device->physical->indirect_descriptors) {
       /* Allocate a null surface state at surface state offset 0. This makes
        * NULL descriptor handling trivial because we can just memset
        * structures to zero and they have a valid descriptor.
        */
       device->null_surface_state =
          anv_state_pool_alloc(&device->bindless_surface_state_pool,
                               device->isl_dev.ss.size,
                               device->isl_dev.ss.align);
       isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
                           .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
       assert(device->null_surface_state.offset == 0);
    } else {
       /* When using direct descriptors, those can hold the null surface state
        * directly. We still need a null surface for the binding table entries
        * though but this one can live anywhere the internal surface state
        * pool.
        */
       device->null_surface_state =
          anv_state_pool_alloc(&device->internal_surface_state_pool,
                               device->isl_dev.ss.size,
                               device->isl_dev.ss.align);
       isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
                           .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
    }

    isl_null_fill_state(&device->isl_dev, &device->host_null_surface_state,
                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);

    anv_scratch_pool_init(device, &device->scratch_pool, false);
    anv_scratch_pool_init(device, &device->protected_scratch_pool, true);

    /* TODO(RT): Do we want some sort of data structure for this? */
    memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));

    if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
       /* The docs say to always allocate 128KB per DSS */
       const uint32_t btd_fifo_bo_size =
          128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
       result = anv_device_alloc_bo(device,
                                    "rt-btd-fifo",
                                    btd_fifo_bo_size,
                                    ANV_BO_ALLOC_INTERNAL,
                                    0 /* explicit_address */,
                                    &device->btd_fifo_bo);
       ANV_DMR_BO_ALLOC(&device->vk.base, device->btd_fifo_bo, result);
       if (result != VK_SUCCESS)
          goto fail_trivial_batch_bo_and_scratch_pool;
    }

    struct vk_pipeline_cache_create_info pcc_info = { .weak_ref = true, };
    device->vk.mem_cache =
       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
    if (!device->vk.mem_cache) {
       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
       goto fail_btd_fifo_bo;
    }

    /* Internal shaders need their own pipeline cache because, unlike the rest
     * of ANV, it won't work at all without the cache. It depends on it for
     * shaders to remain resident while it runs. Therefore, we need a special
     * cache just for BLORP/RT that's forced to always be enabled.
     */
    struct vk_pipeline_cache_create_info internal_pcc_info = {
       .force_enable = true,
       .weak_ref = false,
    };
    device->internal_cache =
       vk_pipeline_cache_create(&device->vk, &internal_pcc_info, NULL);
    if (device->internal_cache == NULL) {
       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
       goto fail_default_pipeline_cache;
    }

    /* The device (currently is ICL/TGL) does not have float64 support. */
    if (!device->info->has_64bit_float)
       anv_load_fp64_shader(device);

    if (INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
       result = anv_device_print_init(device);
       if (result != VK_SUCCESS)
          goto fail_internal_cache;
    }

    device->robust_buffer_access =
       device->vk.enabled_features.robustBufferAccess ||
       device->vk.enabled_features.nullDescriptor;

    device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
                                              4);
    p_atomic_set(&device->draw_call_count, 0);
    p_atomic_set(&device->dispatch_call_count, 0);

    /* Create a separate command pool for companion RCS command buffer. */
    if (device->info->verx10 >= 125) {
       VkCommandPoolCreateInfo pool_info = {
          .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
          .queueFamilyIndex =
              anv_get_first_render_queue_index(device->physical),
       };

       result = vk_common_CreateCommandPool(anv_device_to_handle(device),
                                            &pool_info, NULL,
                                            &device->companion_rcs_cmd_pool);
       if (result != VK_SUCCESS) {
          goto fail_print;
       }
    }

    result = anv_device_init_trtt(device);
    if (result != VK_SUCCESS)
       goto fail_companion_cmd_pool;

    result = anv_device_init_rt_shaders(device);
    if (result != VK_SUCCESS) {
       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
       goto fail_trtt;
    }

    anv_device_init_blorp(device);

    anv_device_init_border_colors(device);

    anv_device_init_internal_kernels(device);

    anv_device_init_astc_emu(device);

    anv_device_perf_init(device);

    anv_device_init_embedded_samplers(device);

    BITSET_ONES(device->gfx_dirty_state);
    BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_INDEX_BUFFER);
    BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SO_DECL_LIST);
    if (device->info->ver < 11)
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_VF_SGVS_2);
    if (device->info->ver < 12) {
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_DEPTH_BOUNDS);
    }
    if (!device->vk.enabled_extensions.EXT_sample_locations)
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SAMPLE_PATTERN);
    if (!device->vk.enabled_extensions.KHR_fragment_shading_rate) {
       if (device->info->ver >= 30) {
          BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_COARSE_PIXEL);
       } else {
          BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CPS);
       }
    }
    if (!device->vk.enabled_extensions.EXT_mesh_shader) {
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_SBE_MESH);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_CLIP_MESH);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_CONTROL);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_SHADER);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_MESH_DISTRIB);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_CONTROL);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_SHADER);
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_TASK_REDISTRIB);
    }
    if (!intel_needs_workaround(device->info, 18019816803))
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_18019816803);
    if (!intel_needs_workaround(device->info, 14018283232))
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_WA_14018283232);
    if (device->info->ver > 9)
       BITSET_CLEAR(device->gfx_dirty_state, ANV_GFX_STATE_PMA_FIX);

    device->queue_count = 0;
    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
       const VkDeviceQueueCreateInfo *queueCreateInfo =
          &pCreateInfo->pQueueCreateInfos[i];

       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
          result = anv_queue_init(device, &device->queues[device->queue_count],
                                  queueCreateInfo, j);
          if (result != VK_SUCCESS)
             goto fail_queues;

          device->queue_count++;
       }
    }

    anv_device_utrace_init(device);

    result = vk_meta_device_init(&device->vk, &device->meta_device);
    if (result != VK_SUCCESS)
       goto fail_utrace;

    result = anv_genX(device->info, init_device_state)(device);
    if (result != VK_SUCCESS)
       goto fail_meta_device;

    simple_mtx_init(&device->accel_struct_build.mutex, mtx_plain);

    *pDevice = anv_device_to_handle(device);

    return VK_SUCCESS;

  fail_meta_device:
    vk_meta_device_finish(&device->vk, &device->meta_device);
  fail_utrace:
    anv_device_utrace_finish(device);
  fail_queues:
    for (uint32_t i = 0; i < device->queue_count; i++)
       anv_queue_finish(&device->queues[i]);
    anv_device_finish_embedded_samplers(device);
    anv_device_finish_blorp(device);
    anv_device_finish_astc_emu(device);
    anv_device_finish_internal_kernels(device);
    anv_device_finish_rt_shaders(device);
  fail_trtt:
    anv_device_finish_trtt(device);
  fail_companion_cmd_pool:
    if (device->info->verx10 >= 125) {
       vk_common_DestroyCommandPool(anv_device_to_handle(device),
                                    device->companion_rcs_cmd_pool, NULL);
    }
  fail_print:
    if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
       anv_device_print_fini(device);
  fail_internal_cache:
    vk_pipeline_cache_destroy(device->internal_cache, NULL);
  fail_default_pipeline_cache:
    vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);
  fail_btd_fifo_bo:
    if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
       ANV_DMR_BO_FREE(&device->vk.base, device->btd_fifo_bo);
       anv_device_release_bo(device, device->btd_fifo_bo);
    }
  fail_trivial_batch_bo_and_scratch_pool:
    anv_scratch_pool_finish(device, &device->scratch_pool);
    anv_scratch_pool_finish(device, &device->protected_scratch_pool);
  fail_trivial_batch:
    ANV_DMR_BO_FREE(&device->vk.base, device->trivial_batch_bo);
    anv_device_release_bo(device, device->trivial_batch_bo);
  fail_ray_query_bo:
    for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
       if (device->ray_query_bo[i]) {
          ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bo[i]);
          anv_device_release_bo(device, device->ray_query_bo[i]);
       }
    }
  fail_alloc_device_bo:
    if (device->mem_fence_bo) {
       ANV_DMR_BO_FREE(&device->vk.base, device->mem_fence_bo);
       anv_device_release_bo(device, device->mem_fence_bo);
    }
    if (device->dummy_aux_bo) {
       ANV_DMR_BO_FREE(&device->vk.base, device->dummy_aux_bo);
       anv_device_release_bo(device, device->dummy_aux_bo);
    }
    ANV_DMR_BO_FREE(&device->vk.base, device->workaround_bo);
    anv_device_release_bo(device, device->workaround_bo);
  fail_surface_aux_map_pool:
    if (device->info->has_aux_map) {
       intel_aux_map_finish(device->aux_map_ctx);
       device->aux_map_ctx = NULL;
    }
  fail_aux_tt_pool:
    if (device->info->has_aux_map)
       anv_state_pool_finish(&device->aux_tt_pool);
  fail_push_descriptor_buffer_pool:
    if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
        device->info->verx10 >= 125)
       anv_state_pool_finish(&device->push_descriptor_buffer_pool);
  fail_indirect_push_descriptor_pool:
    if (device->physical->indirect_descriptors)
       anv_state_pool_finish(&device->indirect_push_descriptor_pool);
  fail_binding_table_pool:
    anv_state_pool_finish(&device->binding_table_pool);
  fail_bindless_surface_state_pool:
    if (device->physical->indirect_descriptors)
       anv_state_pool_finish(&device->bindless_surface_state_pool);
  fail_internal_surface_state_pool:
    anv_state_pool_finish(&device->internal_surface_state_pool);
  fail_scratch_surface_state_pool:
    if (device->info->verx10 >= 125)
       anv_state_pool_finish(&device->scratch_surface_state_pool);
  fail_instruction_state_pool:
    anv_state_pool_finish(&device->instruction_state_pool);
  fail_custom_border_color_pool:
    anv_state_reserved_array_pool_finish(&device->custom_border_colors);
  fail_dynamic_state_pool:
    anv_state_pool_finish(&device->dynamic_state_pool);
  fail_general_state_pool:
    anv_state_pool_finish(&device->general_state_pool);
  fail_batch_bo_pool:
    if (device->vk.enabled_extensions.KHR_acceleration_structure)
       anv_bo_pool_finish(&device->bvh_bo_pool);
    anv_bo_pool_finish(&device->batch_bo_pool);
    anv_slab_bo_deinit(device);
  fail_cache:
    anv_bo_cache_finish(&device->bo_cache);
  fail_queue_cond:
    pthread_cond_destroy(&device->queue_submit);
  fail_mutex:
    pthread_mutex_destroy(&device->mutex);
  fail_vmas:
    util_vma_heap_finish(&device->vma_trtt);
    util_vma_heap_finish(&device->vma_dynamic_visible);
    util_vma_heap_finish(&device->vma_desc);
    util_vma_heap_finish(&device->vma_hi);
    util_vma_heap_finish(&device->vma_lo);
    pthread_mutex_destroy(&device->vma_mutex);
  fail_queues_alloc:
    vk_free(&device->vk.alloc, device->queues);
  fail_context_id:
    anv_device_destroy_context_or_vm(device);
  fail_fd:
    close(device->fd);
  fail_device:
    vk_device_finish(&device->vk);
  fail_alloc:
    vk_free(&device->vk.alloc, device);

    return result;
 }

 void anv_DestroyDevice(
     VkDevice                                    _device,
     const VkAllocationCallbacks*                pAllocator)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);

    if (!device)
       return;

    anv_memory_trace_finish(device);

    struct anv_physical_device *pdevice = device->physical;

    /* Do TRTT batch garbage collection before destroying queues. */
    anv_device_finish_trtt(device);

    if (device->accel_struct_build.radix_sort) {
       radix_sort_vk_destroy(device->accel_struct_build.radix_sort,
                             _device, &device->vk.alloc);
    }
    vk_meta_device_finish(&device->vk, &device->meta_device);

    anv_device_utrace_finish(device);

    for (uint32_t i = 0; i < device->queue_count; i++)
       anv_queue_finish(&device->queues[i]);
    vk_free(&device->vk.alloc, device->queues);

    anv_device_finish_blorp(device);

    anv_device_finish_rt_shaders(device);

    anv_device_finish_astc_emu(device);

    anv_device_finish_internal_kernels(device);

    if (INTEL_DEBUG(DEBUG_SHADER_PRINT))
       anv_device_print_fini(device);

    vk_pipeline_cache_destroy(device->internal_cache, NULL);
    vk_pipeline_cache_destroy(device->vk.mem_cache, NULL);

    anv_device_finish_embedded_samplers(device);

    if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
       ANV_DMR_BO_FREE(&device->vk.base, device->btd_fifo_bo);
       anv_device_release_bo(device, device->btd_fifo_bo);
    }

    if (device->info->verx10 >= 125) {
       vk_common_DestroyCommandPool(anv_device_to_handle(device),
                                    device->companion_rcs_cmd_pool, NULL);
    }

    anv_state_reserved_array_pool_finish(&device->custom_border_colors);
 #ifdef HAVE_VALGRIND
    /* We only need to free these to prevent valgrind errors.  The backing
     * BO will go away in a couple of lines so we don't actually leak.
     */
    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
    anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
    anv_state_pool_free(&device->dynamic_state_pool, device->breakpoint);
 #endif

    for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
       if (device->rt_scratch_bos[i] != NULL) {
          struct anv_bo *bo = device->rt_scratch_bos[i];
          ANV_DMR_BO_FREE(&device->vk.base, bo);
          anv_device_release_bo(device, bo);
       }
    }

    anv_scratch_pool_finish(device, &device->scratch_pool);
    anv_scratch_pool_finish(device, &device->protected_scratch_pool);

    if (device->vk.enabled_extensions.KHR_ray_query) {
       for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_bo); i++) {
          for (unsigned j = 0; j < ARRAY_SIZE(device->ray_query_shadow_bos[0]); j++) {
             if (device->ray_query_shadow_bos[i][j] != NULL) {
                ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_shadow_bos[i][j]);
                anv_device_release_bo(device, device->ray_query_shadow_bos[i][j]);
             }
          }
          if (device->ray_query_bo[i]) {
             ANV_DMR_BO_FREE(&device->vk.base, device->ray_query_bo[i]);
             anv_device_release_bo(device, device->ray_query_bo[i]);
          }
       }
    }
    ANV_DMR_BO_FREE(&device->vk.base, device->workaround_bo);
    anv_device_release_bo(device, device->workaround_bo);
    if (device->dummy_aux_bo) {
       ANV_DMR_BO_FREE(&device->vk.base, device->dummy_aux_bo);
       anv_device_release_bo(device, device->dummy_aux_bo);
    }
    if (device->mem_fence_bo) {
       ANV_DMR_BO_FREE(&device->vk.base, device->mem_fence_bo);
       anv_device_release_bo(device, device->mem_fence_bo);
    }
    ANV_DMR_BO_FREE(&device->vk.base, device->trivial_batch_bo);
    anv_device_release_bo(device, device->trivial_batch_bo);

    if (device->info->has_aux_map) {
       intel_aux_map_finish(device->aux_map_ctx);
       device->aux_map_ctx = NULL;
       anv_state_pool_finish(&device->aux_tt_pool);
    }
    if (device->vk.enabled_extensions.EXT_descriptor_buffer &&
        device->info->verx10 >= 125)
       anv_state_pool_finish(&device->push_descriptor_buffer_pool);
    if (device->physical->indirect_descriptors)
       anv_state_pool_finish(&device->indirect_push_descriptor_pool);
    anv_state_pool_finish(&device->binding_table_pool);
    if (device->info->verx10 >= 125)
       anv_state_pool_finish(&device->scratch_surface_state_pool);
    anv_state_pool_finish(&device->internal_surface_state_pool);
    if (device->physical->indirect_descriptors)
       anv_state_pool_finish(&device->bindless_surface_state_pool);
    anv_state_pool_finish(&device->instruction_state_pool);
    anv_state_pool_finish(&device->dynamic_state_pool);
    anv_state_pool_finish(&device->general_state_pool);

    if (device->vk.enabled_extensions.KHR_acceleration_structure)
       anv_bo_pool_finish(&device->bvh_bo_pool);
    anv_bo_pool_finish(&device->batch_bo_pool);

    anv_slab_bo_deinit(device);
    anv_bo_cache_finish(&device->bo_cache);

    util_vma_heap_finish(&device->vma_trtt);
    util_vma_heap_finish(&device->vma_dynamic_visible);
    util_vma_heap_finish(&device->vma_desc);
    util_vma_heap_finish(&device->vma_hi);
    util_vma_heap_finish(&device->vma_lo);
    pthread_mutex_destroy(&device->vma_mutex);

    pthread_cond_destroy(&device->queue_submit);
    pthread_mutex_destroy(&device->mutex);

    simple_mtx_destroy(&device->accel_struct_build.mutex);

    ralloc_free(device->fp64_nir);

    anv_device_destroy_context_or_vm(device);

    if (INTEL_DEBUG(DEBUG_BATCH) || INTEL_DEBUG(DEBUG_BATCH_STATS)) {
       for (unsigned i = 0; i < pdevice->queue.family_count; i++) {
          if (INTEL_DEBUG(DEBUG_BATCH_STATS))
             intel_batch_print_stats(&device->decoder[i]);
          intel_batch_decode_ctx_finish(&device->decoder[i]);
       }
    }

    close(device->fd);

    vk_device_finish(&device->vk);
    vk_free(&device->vk.alloc, device);
 }

 VkResult anv_EnumerateInstanceLayerProperties(
     uint32_t*                                   pPropertyCount,
     VkLayerProperties*                          pProperties)
 {
    if (pProperties == NULL) {
       *pPropertyCount = 0;
       return VK_SUCCESS;
    }

    /* None supported at this time */
    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
 }

 VkResult
 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
                 int64_t timeout)
 {
    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
    if (ret == -1 && errno == ETIME) {
       return VK_TIMEOUT;
    } else if (ret == -1) {
       /* We don't know the real error. */
       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
    } else {
       return VK_SUCCESS;
    }
 }

 static struct util_vma_heap *
 anv_vma_heap_for_flags(struct anv_device *device,
                        enum anv_bo_alloc_flags alloc_flags)
 {
    if (alloc_flags & ANV_BO_ALLOC_TRTT)
       return &device->vma_trtt;

    if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS)
       return &device->vma_lo;

    if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL)
       return &device->vma_desc;

    if (alloc_flags & ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL)
       return &device->vma_dynamic_visible;

    return &device->vma_hi;
 }

 uint64_t
 anv_vma_alloc(struct anv_device *device,
               uint64_t size, uint64_t align,
               enum anv_bo_alloc_flags alloc_flags,
               uint64_t client_address,
               struct util_vma_heap **out_vma_heap)
 {
    pthread_mutex_lock(&device->vma_mutex);

    uint64_t addr = 0;
    *out_vma_heap = anv_vma_heap_for_flags(device, alloc_flags);

    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
       assert(*out_vma_heap == &device->vma_hi ||
              *out_vma_heap == &device->vma_dynamic_visible ||
              *out_vma_heap == &device->vma_trtt);

       if (client_address) {
          if (util_vma_heap_alloc_addr(*out_vma_heap,
                                       client_address, size)) {
             addr = client_address;
          }
       } else {
          (*out_vma_heap)->alloc_high = false;
          addr = util_vma_heap_alloc(*out_vma_heap, size, align);
          (*out_vma_heap)->alloc_high = true;
       }
       /* We don't want to fall back to other heaps */
       goto done;
    }

    assert(client_address == 0);

    addr = util_vma_heap_alloc(*out_vma_heap, size, align);

 done:
    pthread_mutex_unlock(&device->vma_mutex);

    assert(addr == intel_48b_address(addr));
    return intel_canonical_address(addr);
 }

 void
 anv_vma_free(struct anv_device *device,
              struct util_vma_heap *vma_heap,
              uint64_t address, uint64_t size)
 {
    assert(vma_heap == &device->vma_lo ||
           vma_heap == &device->vma_hi ||
           vma_heap == &device->vma_desc ||
           vma_heap == &device->vma_dynamic_visible ||
           vma_heap == &device->vma_trtt);

    const uint64_t addr_48b = intel_48b_address(address);

    pthread_mutex_lock(&device->vma_mutex);

    util_vma_heap_free(vma_heap, addr_48b, size);

    pthread_mutex_unlock(&device->vma_mutex);
 }

 VkResult anv_AllocateMemory(
     VkDevice                                    _device,
     const VkMemoryAllocateInfo*                 pAllocateInfo,
     const VkAllocationCallbacks*                pAllocator,
     VkDeviceMemory*                             pMem)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    struct anv_physical_device *pdevice = device->physical;
    struct anv_device_memory *mem;
    VkResult result = VK_SUCCESS;

    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);

    VkDeviceSize aligned_alloc_size =
       align64(pAllocateInfo->allocationSize, 4096);

    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
    const struct anv_memory_type *mem_type =
       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
    assert(mem_type->heapIndex < pdevice->memory.heap_count);
    struct anv_memory_heap *mem_heap =
       &pdevice->memory.heaps[mem_type->heapIndex];

    if (aligned_alloc_size > mem_heap->size)
       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);

    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);

    mem = vk_device_memory_create(&device->vk, pAllocateInfo,
                                  pAllocator, sizeof(*mem));
    if (mem == NULL)
       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

    mem->type = mem_type;
    mem->map = NULL;
    mem->map_size = 0;
    mem->map_delta = 0;

    enum anv_bo_alloc_flags alloc_flags = 0;

    const VkImportMemoryFdInfoKHR *fd_info = NULL;
    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
    const struct wsi_memory_allocate_info *wsi_info = NULL;
    uint64_t client_address = 0;

    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
       /* VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA isn't a real enum
        * value, so use cast to avoid compiler warn
        */
       switch ((uint32_t)ext->sType) {
       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR:
       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
          /* handled by vk_device_memory_create */
          break;

       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
          fd_info = (void *)ext;
          break;

       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
          dedicated_info = (void *)ext;
          break;

       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
          client_address = addr_info->opaqueCaptureAddress;
          break;
       }

       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
          wsi_info = (void *)ext;
          break;

       default:
          vk_debug_ignored_stype(ext->sType);
          break;
       }
    }

    /* If i915 reported a mappable/non_mappable vram regions and the
     * application want lmem mappable, then we need to use the
     * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
     */
    if (pdevice->vram_mappable.size > 0 &&
        pdevice->vram_non_mappable.size > 0 &&
        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;

    if (!mem_heap->is_local_mem)
       alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;

    if (mem->vk.alloc_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;

    if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_PROTECTED_BIT)
       alloc_flags |= ANV_BO_ALLOC_PROTECTED;

    /* For now, always allocated AUX-TT aligned memory, regardless of dedicated
     * allocations. An application can for example, suballocate a large
     * VkDeviceMemory and try to bind an image created with a CCS modifier. In
     * that case we cannot disable CCS if the alignment doesn´t meet the AUX-TT
     * requirements, so we need to ensure both the VkDeviceMemory and the
     * alignment reported through vkGetImageMemoryRequirements() meet the
     * AUX-TT requirement.
     *
     * Allocations with the special dynamic_visible mem type are for things like
     * descriptor buffers, so AUX-TT alignment is not needed here.
     */
    if (device->info->has_aux_map && !mem_type->dynamic_visible)
       alloc_flags |= ANV_BO_ALLOC_AUX_TT_ALIGNED;

    /* If the allocation is not dedicated nor a host pointer, allocate
     * additional CCS space.
     *
     * Allocations with the special dynamic_visible mem type are for things like
     * descriptor buffers, which don't need any compression.
     */
    if (device->physical->alloc_aux_tt_mem &&
        dedicated_info == NULL &&
        mem->vk.host_ptr == NULL &&
        !mem_type->dynamic_visible)
       alloc_flags |= ANV_BO_ALLOC_AUX_CCS;

    /* TODO: Android, ChromeOS and other applications may need another way to
     * allocate buffers that can be scanout to display but it should pretty
     * easy to catch those as Xe KMD driver will print warnings in dmesg when
     * scanning buffers allocated without proper flag set.
     */
    if (wsi_info)
       alloc_flags |= ANV_BO_ALLOC_SCANOUT;

    struct anv_image *image = dedicated_info ?
                              anv_image_from_handle(dedicated_info->image) :
                              NULL;
    mem->dedicated_image = image;

    if (device->info->ver >= 20 && image &&
        image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
        isl_drm_modifier_has_aux(image->vk.drm_format_mod)) {
       /* ISL should skip compression modifiers when no_ccs is set. */
       assert(!INTEL_DEBUG(DEBUG_NO_CCS));
       /* Images created with the Xe2 modifiers should be allocated into
        * compressed memory, but we won't get such info from the memory type,
        * refer to anv_image_is_pat_compressible(). We have to check the
        * modifiers and enable compression if we can here.
        */
       alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
    } else if (mem_type->compressed && !INTEL_DEBUG(DEBUG_NO_CCS)) {
       alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
    }

    /* Anything imported or exported is EXTERNAL */
    if (mem->vk.export_handle_types || mem->vk.import_handle_type) {
       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;

       /* wsi has its own way of synchronizing with the compositor */
       if (!wsi_info && image) {
          /* Apply implicit sync to be compatible with clients relying on
           * implicit fencing. This matches the behavior in iris i915_batch
           * submit. An example client is VA-API (iHD), so only dedicated
           * image scenario has to be covered.
           */
          alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC;

          /* For color attachment, apply IMPLICIT_WRITE so a client on the
           * consumer side relying on implicit fencing can have a fence to
           * wait for render complete.
           */
          if (pdevice->instance->external_memory_implicit_sync &&
              (image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
             alloc_flags |= ANV_BO_ALLOC_IMPLICIT_WRITE;
       }
    }

    if (mem_type->dynamic_visible)
       alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL;

    if (mem->vk.ahardware_buffer) {
       result = anv_import_ahw_memory(_device, mem);
       if (result != VK_SUCCESS)
          goto fail;

       goto success;
    }

    /* The Vulkan spec permits handleType to be 0, in which case the struct is
     * ignored.
     */
    if (fd_info && fd_info->handleType) {
       /* At the moment, we support only the below handle types. */
       assert(fd_info->handleType ==
                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
              fd_info->handleType ==
                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
       if (alloc_flags & ANV_BO_ALLOC_COMPRESSED) {
          /* First, when importing a compressed buffer on Xe2+, we are sure
           * about that the buffer is from a resource created with modifiers
           * supporting compression, even the info of modifier is not available
           * on the path of allocation. (Buffers created with modifiers not
           * supporting compression must be uncompressed or resolved first
           * for sharing.)
           *
           * We assume the source of the sharing (a GL driver or this driver)
           * would create the shared buffer for scanout usage as well by
           * following the above reasons. As a result, configure the imported
           * buffer for scanout.
           *
           * Such assumption could fit on pre-Xe2 platforms as well but become
           * more relevant on Xe2+ because the alloc flags will determine bo's
           * heap and then PAT entry in the later vm_bind stage.
           */
          assert(device->info->ver >= 20);
          alloc_flags |= ANV_BO_ALLOC_SCANOUT;
       }

       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
                                     client_address, &mem->bo);
       if (result != VK_SUCCESS)
          goto fail;

       /* For security purposes, we reject importing the bo if it's smaller
        * than the requested allocation size.  This prevents a malicious client
        * from passing a buffer to a trusted client, lying about the size, and
        * telling the trusted client to try and texture from an image that goes
        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
        * in the trusted client.  The trusted client can protect itself against
        * this sort of attack but only if it can trust the buffer size.
        */
       if (mem->bo->size < aligned_alloc_size) {
          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
                             "aligned allocationSize too large for "
                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
                             "%"PRIu64"B > %"PRIu64"B",
                             aligned_alloc_size, mem->bo->size);
          anv_device_release_bo(device, mem->bo);
          goto fail;
       }

       /* From the Vulkan spec:
        *
        *    "Importing memory from a file descriptor transfers ownership of
        *    the file descriptor from the application to the Vulkan
        *    implementation. The application must not perform any operations on
        *    the file descriptor after a successful import."
        *
        * If the import fails, we leave the file descriptor open.
        */
       close(fd_info->fd);
       goto success;
    }

    if (mem->vk.host_ptr) {
       if (mem->vk.import_handle_type ==
           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
          goto fail;
       }

       assert(mem->vk.import_handle_type ==
              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);

       result = anv_device_import_bo_from_host_ptr(device,
                                                   mem->vk.host_ptr,
                                                   mem->vk.size,
                                                   alloc_flags,
                                                   client_address,
                                                   &mem->bo);
       if (result != VK_SUCCESS)
          goto fail;

       goto success;
    }

    if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT)) {
       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
    } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
       if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
          alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
       if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
          alloc_flags |= ANV_BO_ALLOC_HOST_CACHED;
    } else {
       /* Required to set some host mode to have a valid pat index set */
       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT;
    }

    /* Regular allocate (not importing memory). */

    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
                                 alloc_flags, client_address, &mem->bo);
    if (result != VK_SUCCESS)
       goto fail;

    if (image && image->vk.wsi_legacy_scanout) {
       /* Some legacy (non-modifiers) consumers need the tiling to be set on
        * the BO.  In this case, we have a dedicated allocation.
        */
       const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
       result = anv_device_set_bo_tiling(device, mem->bo,
                                         surf->row_pitch_B,
                                         surf->tiling);
       if (result != VK_SUCCESS) {
          anv_device_release_bo(device, mem->bo);
          goto fail;
       }
    }

  success:
    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
    if (mem_heap_used > mem_heap->size) {
       p_atomic_add(&mem_heap->used, -mem->bo->size);
       anv_device_release_bo(device, mem->bo);
       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
                          "Out of heap memory");
       goto fail;
    }

    pthread_mutex_lock(&device->mutex);
    list_addtail(&mem->link, &device->memory_objects);
    pthread_mutex_unlock(&device->mutex);

    ANV_RMV(heap_create, device, mem, false, 0);
    ANV_DMR_BO_ALLOC_IMPORT(&mem->vk.base, mem->bo, result,
                            mem->vk.import_handle_type);

    *pMem = anv_device_memory_to_handle(mem);

    return VK_SUCCESS;

  fail:
    ANV_DMR_BO_ALLOC_IMPORT(&mem->vk.base, mem->bo, result,
                            mem->vk.import_handle_type);
    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);

    return result;
 }

 VkResult anv_GetMemoryFdKHR(
     VkDevice                                    device_h,
     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
     int*                                        pFd)
 {
    ANV_FROM_HANDLE(anv_device, dev, device_h);
    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);

    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);

    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);

    return anv_device_export_bo(dev, mem->bo, pFd);
 }

 VkResult anv_GetMemoryFdPropertiesKHR(
     VkDevice                                    _device,
     VkExternalMemoryHandleTypeFlagBits          handleType,
     int                                         fd,
     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);

    switch (handleType) {
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
       /* dma-buf can be imported as any memory type */
       pMemoryFdProperties->memoryTypeBits =
          (1 << device->physical->memory.type_count) - 1;
       return VK_SUCCESS;

    default:
       /* The valid usage section for this function says:
        *
        *    "handleType must not be one of the handle types defined as
        *    opaque."
        *
        * So opaque handle types fall into the default "unsupported" case.
        */
       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
    }
 }

 VkResult anv_GetMemoryHostPointerPropertiesEXT(
    VkDevice                                    _device,
    VkExternalMemoryHandleTypeFlagBits          handleType,
    const void*                                 pHostPointer,
    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);

    assert(pMemoryHostPointerProperties->sType ==
           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);

    switch (handleType) {
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:

       pMemoryHostPointerProperties->memoryTypeBits =
          device->info->ver >= 20 ?
          device->physical->memory.default_buffer_mem_types :
          (1ull << device->physical->memory.type_count) - 1;

       return VK_SUCCESS;

    default:
       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
    }
 }

 void anv_FreeMemory(
     VkDevice                                    _device,
     VkDeviceMemory                              _mem,
     const VkAllocationCallbacks*                pAllocator)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);

    if (mem == NULL)
       return;

    pthread_mutex_lock(&device->mutex);
    list_del(&mem->link);
    pthread_mutex_unlock(&device->mutex);

    if (mem->map) {
       const VkMemoryUnmapInfoKHR unmap = {
          .sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO_KHR,
          .memory = _mem,
       };
       anv_UnmapMemory2KHR(_device, &unmap);
    }

    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
                 -mem->bo->size);

    ANV_DMR_BO_FREE_IMPORT(&mem->vk.base, mem->bo,
                           mem->vk.import_handle_type);

    anv_device_release_bo(device, mem->bo);

    ANV_RMV(resource_destroy, device, mem);

    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
 }

 VkResult anv_MapMemory2KHR(
     VkDevice                                    _device,
     const VkMemoryMapInfoKHR*                   pMemoryMapInfo,
     void**                                      ppData)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryMapInfo->memory);

    if (mem == NULL) {
       *ppData = NULL;
       return VK_SUCCESS;
    }

    if (mem->vk.host_ptr) {
       *ppData = mem->vk.host_ptr + pMemoryMapInfo->offset;
       return VK_SUCCESS;
    }

    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
     *
     *  * memory must have been created with a memory type that reports
     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
     */
    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
                        "Memory object not mappable.");
    }

    assert(pMemoryMapInfo->size > 0);
    const VkDeviceSize offset = pMemoryMapInfo->offset;
    const VkDeviceSize size =
       vk_device_memory_range(&mem->vk, pMemoryMapInfo->offset,
                                        pMemoryMapInfo->size);

    if (size != (size_t)size) {
       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
                        "requested size 0x%"PRIx64" does not fit in %u bits",
                        size, (unsigned)(sizeof(size_t) * 8));
    }

    /* From the Vulkan 1.2.194 spec:
     *
     *    "memory must not be currently host mapped"
     */
    if (mem->map != NULL) {
       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
                        "Memory object already mapped.");
    }

    void *placed_addr = NULL;
    if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
       const VkMemoryMapPlacedInfoEXT *placed_info =
          vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
       assert(placed_info != NULL);
       placed_addr = placed_info->pPlacedAddress;
    }

    uint64_t map_offset, map_size;
    anv_sanitize_map_params(device, mem->bo, offset, size, &map_offset, &map_size);

    void *map;
    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
                                        map_size, placed_addr, &map);
    if (result != VK_SUCCESS)
       return result;

    mem->map = map;
    mem->map_size = map_size;
    mem->map_delta = (offset - map_offset);
    *ppData = mem->map + mem->map_delta;

    return VK_SUCCESS;
 }

 VkResult anv_UnmapMemory2KHR(
     VkDevice                                    _device,
     const VkMemoryUnmapInfoKHR*                 pMemoryUnmapInfo)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryUnmapInfo->memory);

    if (mem == NULL || mem->vk.host_ptr)
       return VK_SUCCESS;

    VkResult result =
       anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size,
                           pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
    if (result != VK_SUCCESS)
       return result;

    mem->map = NULL;
    mem->map_size = 0;
    mem->map_delta = 0;

    return VK_SUCCESS;
 }

 VkResult anv_FlushMappedMemoryRanges(
     VkDevice                                    _device,
     uint32_t                                    memoryRangeCount,
     const VkMappedMemoryRange*                  pMemoryRanges)
 {
 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
    ANV_FROM_HANDLE(anv_device, device, _device);

    if (!device->physical->memory.need_flush)
       return VK_SUCCESS;

    /* Make sure the writes we're flushing have landed. */
    __builtin_ia32_mfence();

    for (uint32_t i = 0; i < memoryRangeCount; i++) {
       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
          continue;

       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
       if (map_offset >= mem->map_size)
          continue;

       intel_flush_range(mem->map + map_offset,
                         MIN2(pMemoryRanges[i].size,
                              mem->map_size - map_offset));
    }
 #endif
    return VK_SUCCESS;
 }

 VkResult anv_InvalidateMappedMemoryRanges(
     VkDevice                                    _device,
     uint32_t                                    memoryRangeCount,
     const VkMappedMemoryRange*                  pMemoryRanges)
 {
 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
    ANV_FROM_HANDLE(anv_device, device, _device);

    if (!device->physical->memory.need_flush)
       return VK_SUCCESS;

    for (uint32_t i = 0; i < memoryRangeCount; i++) {
       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
          continue;

       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
       if (map_offset >= mem->map_size)
          continue;

       intel_invalidate_range(mem->map + map_offset,
                              MIN2(pMemoryRanges[i].size,
                                   mem->map_size - map_offset));
    }

    /* Make sure no reads get moved up above the invalidate. */
    __builtin_ia32_mfence();
 #endif
    return VK_SUCCESS;
 }

 void anv_GetDeviceMemoryCommitment(
     VkDevice                                    device,
     VkDeviceMemory                              memory,
     VkDeviceSize*                               pCommittedMemoryInBytes)
 {
    *pCommittedMemoryInBytes = 0;
 }

 static inline clockid_t
 anv_get_default_cpu_clock_id(void)
 {
 #ifdef CLOCK_MONOTONIC_RAW
    return CLOCK_MONOTONIC_RAW;
 #else
    return CLOCK_MONOTONIC;
 #endif
 }

 static inline clockid_t
 vk_time_domain_to_clockid(VkTimeDomainKHR domain)
 {
    switch (domain) {
 #ifdef CLOCK_MONOTONIC_RAW
    case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
       return CLOCK_MONOTONIC_RAW;
 #endif
    case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
       return CLOCK_MONOTONIC;
    default:
       unreachable("Missing");
       return CLOCK_MONOTONIC;
    }
 }

 static inline bool
 is_cpu_time_domain(VkTimeDomainKHR domain)
 {
    return domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR ||
           domain == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
 }

 static inline bool
 is_gpu_time_domain(VkTimeDomainKHR domain)
 {
    return domain == VK_TIME_DOMAIN_DEVICE_KHR;
 }

 VkResult anv_GetCalibratedTimestampsKHR(
    VkDevice                                     _device,
    uint32_t                                     timestampCount,
    const VkCalibratedTimestampInfoKHR           *pTimestampInfos,
    uint64_t                                     *pTimestamps,
    uint64_t                                     *pMaxDeviation)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    const uint64_t timestamp_frequency = device->info->timestamp_frequency;
    const uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
    uint32_t d, increment;
    uint64_t begin, end;
    uint64_t max_clock_period = 0;
    const enum intel_kmd_type kmd_type = device->physical->info.kmd_type;
    const bool has_correlate_timestamp = kmd_type == INTEL_KMD_TYPE_XE;
    clockid_t cpu_clock_id = -1;

    begin = end = vk_clock_gettime(anv_get_default_cpu_clock_id());

    for (d = 0, increment = 1; d < timestampCount; d += increment) {
       const VkTimeDomainKHR current = pTimestampInfos[d].timeDomain;
       /* If we have a request pattern like this :
        * - domain0 = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR or VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR
        * - domain1 = VK_TIME_DOMAIN_DEVICE_KHR
        * - domain2 = domain0 (optional)
        *
        * We can combine all of those into a single ioctl for maximum accuracy.
        */
       if (has_correlate_timestamp && (d + 1) < timestampCount) {
          const VkTimeDomainKHR next = pTimestampInfos[d + 1].timeDomain;

          if ((is_cpu_time_domain(current) && is_gpu_time_domain(next)) ||
              (is_gpu_time_domain(current) && is_cpu_time_domain(next))) {
             /* We'll consume at least 2 elements. */
             increment = 2;

             if (is_cpu_time_domain(current))
                cpu_clock_id = vk_time_domain_to_clockid(current);
             else
                cpu_clock_id = vk_time_domain_to_clockid(next);

             uint64_t cpu_timestamp, gpu_timestamp, cpu_delta_timestamp, cpu_end_timestamp;
             if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd,
                                                             kmd_type,
                                                             INTEL_ENGINE_CLASS_RENDER,
                                                             0 /* engine_instance */,
                                                             cpu_clock_id,
                                                             &cpu_timestamp,
                                                             &gpu_timestamp,
                                                             &cpu_delta_timestamp))
                return vk_device_set_lost(&device->vk, "Failed to read correlate timestamp %m");

             cpu_end_timestamp = cpu_timestamp + cpu_delta_timestamp;
             if (is_cpu_time_domain(current)) {
                pTimestamps[d] = cpu_timestamp;
                pTimestamps[d + 1] = gpu_timestamp;
             } else {
                pTimestamps[d] = gpu_timestamp;
                pTimestamps[d + 1] = cpu_end_timestamp;
             }
             max_clock_period = MAX2(max_clock_period, device_period);

             /* If we can consume a third element */
             if ((d + 2) < timestampCount &&
                 is_cpu_time_domain(current) &&
                 current == pTimestampInfos[d + 2].timeDomain) {
                pTimestamps[d + 2] = cpu_end_timestamp;
                increment++;
             }

             /* If we're the first element, we can replace begin */
             if (d == 0 && cpu_clock_id == anv_get_default_cpu_clock_id())
                begin = cpu_timestamp;

             /* If we're in the same clock domain as begin/end. We can set the end. */
             if (cpu_clock_id == anv_get_default_cpu_clock_id())
                end = cpu_end_timestamp;

             continue;
          }
       }

       /* fallback to regular method */
       increment = 1;
       switch (current) {
       case VK_TIME_DOMAIN_DEVICE_KHR:
          if (!intel_gem_read_render_timestamp(device->fd,
                                               device->info->kmd_type,
                                               &pTimestamps[d])) {
             return vk_device_set_lost(&device->vk, "Failed to read the "
                                       "TIMESTAMP register: %m");
          }
          max_clock_period = MAX2(max_clock_period, device_period);
          break;
       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
          pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
          max_clock_period = MAX2(max_clock_period, 1);
          break;

 #ifdef CLOCK_MONOTONIC_RAW
       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
          pTimestamps[d] = begin;
          break;
 #endif
       default:
          pTimestamps[d] = 0;
          break;
       }
    }

    /* If last timestamp was not get with has_correlate_timestamp method or
     * if it was but last cpu clock is not the default one, get time again
     */
    if (increment == 1 || cpu_clock_id != anv_get_default_cpu_clock_id())
       end = vk_clock_gettime(anv_get_default_cpu_clock_id());

    *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);

    return VK_SUCCESS;
 }

 const struct intel_device_info_pat_entry *
 anv_device_get_pat_entry(struct anv_device *device,
                          enum anv_bo_alloc_flags alloc_flags)
 {
    if (alloc_flags & ANV_BO_ALLOC_COMPRESSED) {
       /* Compressed PAT entries are available on Xe2+. */
       assert(device->info->ver >= 20);
       return alloc_flags & ANV_BO_ALLOC_SCANOUT ?
              &device->info->pat.compressed_scanout :
              &device->info->pat.compressed;
    }

    if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
       return &device->info->pat.cached_coherent;

    if (alloc_flags & (ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_SCANOUT))
       return &device->info->pat.scanout;

    /* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
     * be snopped by GPU and lmem will always be WC.
     * This might change in future discrete platforms.
     */
    if (anv_physical_device_has_vram(device->physical)) {
       if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
          return &device->info->pat.cached_coherent;
       return &device->info->pat.writecombining;
    }

    /* Integrated platforms handling only */
    if ((alloc_flags & (ANV_BO_ALLOC_HOST_CACHED_COHERENT)) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
       return &device->info->pat.cached_coherent;
    else if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
       return &device->info->pat.writeback_incoherent;
    else
       return &device->info->pat.writecombining;
 }