src/intel/vulkan/anv_slab_bo.c - third_party/mesa - Git at Google

 /* Copyright © 2025 Intel Corporation
  * SPDX-License-Identifier: MIT
  */

 #include "anv_slab_bo.h"

 enum anv_bo_slab_heap {
    ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE, /* main usage is batch buffers but other buffers also matches */
    ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL,
    ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL,
    ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT,
    ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT,
    ANV_BO_SLAB_HEAP_SMEM_COHERENT,
    ANV_BO_SLAB_HEAP_COMPRESSED, /* used by integrated and discrete GPUs */
    ANV_BO_SLAB_HEAP_LMEM_SMEM,
    ANV_BO_SLAB_HEAP_LMEM_ONLY,
    ANV_BO_SLAB_NOT_SUPPORTED,
 };

 struct anv_slab {
    struct pb_slab base;

    /** The BO representing the entire slab */
    struct anv_bo *bo;

    /** Array of anv_bo structs representing BOs allocated out of this slab */
    struct anv_bo *entries;
 };

 static enum anv_bo_slab_heap
 anv_bo_alloc_flags_to_slab_heap(struct anv_device *device,
                                 enum anv_bo_alloc_flags alloc_flags)
 {
    enum anv_bo_alloc_flags not_supported = ANV_BO_ALLOC_32BIT_ADDRESS |
                                            ANV_BO_ALLOC_EXTERNAL |
                                            ANV_BO_ALLOC_CAPTURE |
                                            ANV_BO_ALLOC_FIXED_ADDRESS |
                                            ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS |
                                            ANV_BO_ALLOC_DESCRIPTOR_POOL |
                                            ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE |
                                            ANV_BO_ALLOC_SCANOUT |
                                            ANV_BO_ALLOC_PROTECTED |
                                            ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL |
                                            ANV_BO_ALLOC_IMPORTED |
                                            ANV_BO_ALLOC_SLAB_PARENT;

    if (device->info->kmd_type == INTEL_KMD_TYPE_I915) {
       not_supported |= (ANV_BO_ALLOC_IMPLICIT_SYNC |
                         ANV_BO_ALLOC_IMPLICIT_WRITE);
    }

    if (alloc_flags == ANV_BO_ALLOC_BATCH_BUFFER_FLAGS ||
        alloc_flags == ANV_BO_ALLOC_BATCH_BUFFER_INTERNAL_FLAGS)
       return ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE;

    if (alloc_flags == ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL_FLAGS)
       return ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL;

    if (alloc_flags == ANV_BO_ALLOC_DESCRIPTOR_POOL_FLAGS)
       return ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL;

    if (alloc_flags & not_supported)
       return ANV_BO_SLAB_NOT_SUPPORTED;

    if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
       return ANV_BO_SLAB_HEAP_COMPRESSED;

    if (anv_physical_device_has_vram(device->physical)) {
       if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
          return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
       if (alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE))
          return ANV_BO_SLAB_HEAP_LMEM_SMEM;
       return ANV_BO_SLAB_HEAP_LMEM_ONLY;
    }

    if ((alloc_flags & ANV_BO_ALLOC_HOST_CACHED_COHERENT) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
       return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
    if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
       return ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT;
    return ANV_BO_SLAB_HEAP_SMEM_COHERENT;
 }

 /* Return the power of two size of a slab entry matching the input size. */
 static unsigned
 get_slab_pot_entry_size(struct anv_device *device, unsigned size)
 {
    unsigned entry_size = util_next_power_of_two(size);
    unsigned min_entry_size = 1 << device->bo_slabs[0].min_order;

    return MAX2(entry_size, min_entry_size);
 }

 static struct pb_slabs *
 get_slabs(struct anv_device *device, uint64_t size)
 {
    const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);

    for (unsigned i = 0; i < num_slab_allocator; i++) {
       struct pb_slabs *slabs = &device->bo_slabs[i];

       if (size <= (1ull << (slabs->min_order + slabs->num_orders - 1)))
          return slabs;
    }

    unreachable("should have found a valid slab for this size");
    return NULL;
 }

 static inline bool
 anv_slab_bo_is_disabled(struct anv_device *device)
 {
    return device->bo_slabs[0].num_heaps == 0;
 }

 struct anv_bo *
 anv_slab_bo_alloc(struct anv_device *device, const char *name, uint64_t requested_size,
                   uint32_t alignment, enum anv_bo_alloc_flags alloc_flags)
 {
    if (anv_slab_bo_is_disabled(device))
       return NULL;

    const enum anv_bo_slab_heap slab_heap = anv_bo_alloc_flags_to_slab_heap(device, alloc_flags);
    if (slab_heap == ANV_BO_SLAB_NOT_SUPPORTED)
       return NULL;

    /* Don't always use slab if AUX_TT_ALIGNED is required and AUX alignment is
     * >= 1MB, enabling this causes a high memory consumption that causes out
     * of memory when running several parallel GPU applications.
     */
    if ((alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) &&
        (intel_aux_map_get_alignment(device->aux_map_ctx) >= 1024 * 1024) &&
        (requested_size < (1024 * 1024 / 2)))
        return NULL;

    const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
    struct pb_slabs *last_slab = &device->bo_slabs[num_slab_allocator - 1];
    const uint64_t max_slab_entry_size = BITFIELD64_BIT(last_slab->min_order + last_slab->num_orders - 1);

    if (requested_size > max_slab_entry_size)
       return NULL;

    uint64_t alloc_size = MAX2(alignment, requested_size);
    alloc_size = get_slab_pot_entry_size(device, alloc_size);

    if (alloc_size > max_slab_entry_size)
          return NULL;

    struct pb_slabs *slabs = get_slabs(device, alloc_size);
    struct pb_slab_entry *entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
    if (!entry) {
       /* Clean up and try again... */
       pb_slabs_reclaim(slabs);

       entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
    }
    if (!entry)
       return NULL;

    struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);
    bo->name = name;
    bo->refcount = 1;
    bo->size = requested_size;
    bo->alloc_flags = alloc_flags;
    bo->flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);

    assert(bo->flags == bo->slab_parent->flags);
    assert((intel_48b_address(bo->offset) & (alignment - 1)) == 0);

    if (alloc_flags & ANV_BO_ALLOC_MAPPED) {
       if (anv_device_map_bo(device, bo, 0, bo->size, NULL, &bo->map) != VK_SUCCESS) {
          anv_slab_bo_free(device, bo);
          return NULL;
       }
    }

    return bo;
 }

 void
 anv_slab_bo_free(struct anv_device *device, struct anv_bo *bo)
 {
    assert(bo->slab_parent);

    if (bo->map) {
       anv_device_unmap_bo(device, bo, bo->map, bo->size, false /* replace */);
       bo->map = NULL;
    }

    bo->refcount = 0;
    pb_slab_free(get_slabs(device, bo->size), &bo->slab_entry);
 }

 static unsigned heap_max_get(struct anv_device *device)
 {
    unsigned ret;

    if (anv_physical_device_has_vram(device->physical))
       ret = ANV_BO_SLAB_HEAP_LMEM_ONLY;
    else
       ret = device->info->verx10 >= 200 ? ANV_BO_SLAB_HEAP_COMPRESSED :
                                           ANV_BO_SLAB_HEAP_SMEM_COHERENT;

    return (ret + 1);
 }

 static bool
 anv_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
 {
    struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);

    return p_atomic_read(&bo->refcount) == 0;
 }

 static struct pb_slab *
 anv_slab_alloc(void *priv,
                unsigned heap,
                unsigned entry_size,
                unsigned group_index)
 {
    struct anv_device *device = priv;
    struct anv_slab *slab = calloc(1, sizeof(struct anv_slab));

    if (!slab)
       return NULL;

    const enum anv_bo_slab_heap bo_slab_heap = heap;
    enum anv_bo_alloc_flags alloc_flags = ANV_BO_ALLOC_SLAB_PARENT;

    switch (bo_slab_heap) {
    case ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT:
       alloc_flags |= ANV_BO_ALLOC_HOST_CACHED_COHERENT |
                      ANV_BO_ALLOC_NO_LOCAL_MEM;
       break;
    case ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT:
       alloc_flags |= ANV_BO_ALLOC_HOST_CACHED |
                      ANV_BO_ALLOC_NO_LOCAL_MEM;
       break;
    case ANV_BO_SLAB_HEAP_SMEM_COHERENT:
       alloc_flags |= ANV_BO_ALLOC_HOST_COHERENT |
                      ANV_BO_ALLOC_NO_LOCAL_MEM;
       break;
    case ANV_BO_SLAB_HEAP_COMPRESSED:
       alloc_flags |= ANV_BO_ALLOC_COMPRESSED;
       break;
    case ANV_BO_SLAB_HEAP_LMEM_SMEM:
       alloc_flags |= ANV_BO_ALLOC_MAPPED |
                      ANV_BO_ALLOC_HOST_COHERENT;
       break;
    case ANV_BO_SLAB_HEAP_LMEM_ONLY:
       break;
    case ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE:
       alloc_flags |= ANV_BO_ALLOC_BATCH_BUFFER_FLAGS;
       break;
    case ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL:
       alloc_flags |= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL_FLAGS;
       break;
    case ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL:
       alloc_flags |= ANV_BO_ALLOC_DESCRIPTOR_POOL_FLAGS;
       break;
    default:
       unreachable("Missing");
       return NULL;
    }

    struct pb_slabs *slabs = get_slabs(device, entry_size);

    entry_size = MAX2(entry_size, 1ULL << slabs->min_order);
    if (!util_is_power_of_two_nonzero(entry_size))
       entry_size = util_next_power_of_two(entry_size);

    unsigned slab_parent_size = entry_size * 8;
    /* allocate at least a 2MB buffer, this allows KMD to enable THP for this bo */
    slab_parent_size = MAX2(slab_parent_size, 2 * 1024 * 1024);

    VkResult result;
    result = anv_device_alloc_bo(device, "slab_parent", slab_parent_size, alloc_flags,
                                 0, &slab->bo);
    if (result != VK_SUCCESS)
       goto error_alloc_bo;

    slab_parent_size = slab->bo->size = slab->bo->actual_size;
    slab->base.num_entries = slab_parent_size / entry_size;
    slab->base.num_free = slab->base.num_entries;
    slab->base.group_index = group_index;
    slab->base.entry_size = entry_size;
    slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
    if (!slab->entries)
       goto error_alloc_entries;

    list_inithead(&slab->base.free);

    for (unsigned i = 0; i < slab->base.num_entries; i++) {
       struct anv_bo *bo = &slab->entries[i];
       uint64_t offset = intel_48b_address(slab->bo->offset);

       offset += (i * entry_size);

       bo->name = "slab_child";
       bo->gem_handle = slab->bo->gem_handle;
       bo->refcount = 0;
       bo->offset = intel_canonical_address(offset);
       bo->size = entry_size;
       bo->actual_size = entry_size;
       bo->alloc_flags = alloc_flags;
       bo->vma_heap = slab->bo->vma_heap;
       bo->slab_parent = slab->bo;
       bo->slab_entry.slab = &slab->base;

       list_addtail(&bo->slab_entry.head, &slab->base.free);
    }

    return &slab->base;

 error_alloc_entries:
    anv_device_release_bo(device, slab->bo);
 error_alloc_bo:
    free(slab);

    return NULL;
 }

 static void
 anv_slab_free(void *priv, struct pb_slab *pslab)
 {
    struct anv_device *device = priv;
    struct anv_slab *slab = (void *)pslab;

    anv_device_release_bo(device, slab->bo);

    free(slab->entries);
    free(slab);
 }

 bool
 anv_slab_bo_init(struct anv_device *device)
 {
    const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
    unsigned min_slab_order = 8;/* 256 bytes */
    const unsigned max_slab_order = 20;/* 1 MB (slab size = 2 MB) */
    unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
                                             num_slab_allocator;

    if (unlikely(device->physical->instance->debug & ANV_DEBUG_NO_SLAB))
       return true;

    /* feature requirement */
    if (!device->info->has_mmap_offset || !device->info->has_partial_mmap_offset)
       return true;

    /* Divide the size order range among slab managers. */
    for (unsigned i = 0; i < num_slab_allocator; i++) {
       const unsigned min_order = min_slab_order;
       const unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
                                       max_slab_order);

       if (!pb_slabs_init(&device->bo_slabs[i], min_order, max_order,
                          heap_max_get(device), false, device,
                          anv_can_reclaim_slab,
                          anv_slab_alloc,
                          anv_slab_free)) {
          goto error_slabs_init;
       }
       min_slab_order = max_order + 1;
    }

    return true;

 error_slabs_init:
    for (unsigned i = 0; i < num_slab_allocator; i++) {
       if (!device->bo_slabs[i].groups)
          break;

       pb_slabs_deinit(&device->bo_slabs[i]);
    }

    return false;
 }

 void
 anv_slab_bo_deinit(struct anv_device *device)
 {
    const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);

    if (anv_slab_bo_is_disabled(device))
       return;

    for (int i = 0; i < num_slab_allocator; i++) {
       if (device->bo_slabs[i].groups)
          pb_slabs_deinit(&device->bo_slabs[i]);
    }
 }
	/* Copyright © 2025 Intel Corporation
	* SPDX-License-Identifier: MIT
	*/

	#include "anv_slab_bo.h"

	enum anv_bo_slab_heap {
	ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE, /* main usage is batch buffers but other buffers also matches */
	ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL,
	ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL,
	ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT,
	ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT,
	ANV_BO_SLAB_HEAP_SMEM_COHERENT,
	ANV_BO_SLAB_HEAP_COMPRESSED, /* used by integrated and discrete GPUs */
	ANV_BO_SLAB_HEAP_LMEM_SMEM,
	ANV_BO_SLAB_HEAP_LMEM_ONLY,
	ANV_BO_SLAB_NOT_SUPPORTED,
	};

	struct anv_slab {
	struct pb_slab base;

	/** The BO representing the entire slab */
	struct anv_bo *bo;

	/** Array of anv_bo structs representing BOs allocated out of this slab */
	struct anv_bo *entries;
	};

	static enum anv_bo_slab_heap
	anv_bo_alloc_flags_to_slab_heap(struct anv_device *device,
	enum anv_bo_alloc_flags alloc_flags)
	{
	enum anv_bo_alloc_flags not_supported = ANV_BO_ALLOC_32BIT_ADDRESS \|
	ANV_BO_ALLOC_EXTERNAL \|
	ANV_BO_ALLOC_CAPTURE \|
	ANV_BO_ALLOC_FIXED_ADDRESS \|
	ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS \|
	ANV_BO_ALLOC_DESCRIPTOR_POOL \|
	ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE \|
	ANV_BO_ALLOC_SCANOUT \|
	ANV_BO_ALLOC_PROTECTED \|
	ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL \|
	ANV_BO_ALLOC_IMPORTED \|
	ANV_BO_ALLOC_SLAB_PARENT;

	if (device->info->kmd_type == INTEL_KMD_TYPE_I915) {
	not_supported \|= (ANV_BO_ALLOC_IMPLICIT_SYNC \|
	ANV_BO_ALLOC_IMPLICIT_WRITE);
	}

	if (alloc_flags == ANV_BO_ALLOC_BATCH_BUFFER_FLAGS \|\|
	alloc_flags == ANV_BO_ALLOC_BATCH_BUFFER_INTERNAL_FLAGS)
	return ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE;

	if (alloc_flags == ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL_FLAGS)
	return ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL;

	if (alloc_flags == ANV_BO_ALLOC_DESCRIPTOR_POOL_FLAGS)
	return ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL;

	if (alloc_flags & not_supported)
	return ANV_BO_SLAB_NOT_SUPPORTED;

	if (alloc_flags & ANV_BO_ALLOC_COMPRESSED)
	return ANV_BO_SLAB_HEAP_COMPRESSED;

	if (anv_physical_device_has_vram(device->physical)) {
	if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
	return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
	if (alloc_flags & (ANV_BO_ALLOC_MAPPED \| ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE))
	return ANV_BO_SLAB_HEAP_LMEM_SMEM;
	return ANV_BO_SLAB_HEAP_LMEM_ONLY;
	}

	if ((alloc_flags & ANV_BO_ALLOC_HOST_CACHED_COHERENT) == ANV_BO_ALLOC_HOST_CACHED_COHERENT)
	return ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT;
	if (alloc_flags & ANV_BO_ALLOC_HOST_CACHED)
	return ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT;
	return ANV_BO_SLAB_HEAP_SMEM_COHERENT;
	}

	/* Return the power of two size of a slab entry matching the input size. */
	static unsigned
	get_slab_pot_entry_size(struct anv_device *device, unsigned size)
	{
	unsigned entry_size = util_next_power_of_two(size);
	unsigned min_entry_size = 1 << device->bo_slabs[0].min_order;

	return MAX2(entry_size, min_entry_size);
	}

	static struct pb_slabs *
	get_slabs(struct anv_device *device, uint64_t size)
	{
	const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);

	for (unsigned i = 0; i < num_slab_allocator; i++) {
	struct pb_slabs *slabs = &device->bo_slabs[i];

	if (size <= (1ull << (slabs->min_order + slabs->num_orders - 1)))
	return slabs;
	}

	unreachable("should have found a valid slab for this size");
	return NULL;
	}

	static inline bool
	anv_slab_bo_is_disabled(struct anv_device *device)
	{
	return device->bo_slabs[0].num_heaps == 0;
	}

	struct anv_bo *
	anv_slab_bo_alloc(struct anv_device device, const char name, uint64_t requested_size,
	uint32_t alignment, enum anv_bo_alloc_flags alloc_flags)
	{
	if (anv_slab_bo_is_disabled(device))
	return NULL;

	const enum anv_bo_slab_heap slab_heap = anv_bo_alloc_flags_to_slab_heap(device, alloc_flags);
	if (slab_heap == ANV_BO_SLAB_NOT_SUPPORTED)
	return NULL;

	/* Don't always use slab if AUX_TT_ALIGNED is required and AUX alignment is
	* >= 1MB, enabling this causes a high memory consumption that causes out
	* of memory when running several parallel GPU applications.
	*/
	if ((alloc_flags & ANV_BO_ALLOC_AUX_TT_ALIGNED) &&
	(intel_aux_map_get_alignment(device->aux_map_ctx) >= 1024 * 1024) &&
	(requested_size < (1024 * 1024 / 2)))
	return NULL;

	const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
	struct pb_slabs *last_slab = &device->bo_slabs[num_slab_allocator - 1];
	const uint64_t max_slab_entry_size = BITFIELD64_BIT(last_slab->min_order + last_slab->num_orders - 1);

	if (requested_size > max_slab_entry_size)
	return NULL;

	uint64_t alloc_size = MAX2(alignment, requested_size);
	alloc_size = get_slab_pot_entry_size(device, alloc_size);

	if (alloc_size > max_slab_entry_size)
	return NULL;

	struct pb_slabs *slabs = get_slabs(device, alloc_size);
	struct pb_slab_entry *entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
	if (!entry) {
	/* Clean up and try again... */
	pb_slabs_reclaim(slabs);

	entry = pb_slab_alloc(slabs, alloc_size, slab_heap);
	}
	if (!entry)
	return NULL;

	struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);
	bo->name = name;
	bo->refcount = 1;
	bo->size = requested_size;
	bo->alloc_flags = alloc_flags;
	bo->flags = device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);

	assert(bo->flags == bo->slab_parent->flags);
	assert((intel_48b_address(bo->offset) & (alignment - 1)) == 0);

	if (alloc_flags & ANV_BO_ALLOC_MAPPED) {
	if (anv_device_map_bo(device, bo, 0, bo->size, NULL, &bo->map) != VK_SUCCESS) {
	anv_slab_bo_free(device, bo);
	return NULL;
	}
	}

	return bo;
	}

	void
	anv_slab_bo_free(struct anv_device device, struct anv_bo bo)
	{
	assert(bo->slab_parent);

	if (bo->map) {
	anv_device_unmap_bo(device, bo, bo->map, bo->size, false /* replace */);
	bo->map = NULL;
	}

	bo->refcount = 0;
	pb_slab_free(get_slabs(device, bo->size), &bo->slab_entry);
	}

	static unsigned heap_max_get(struct anv_device *device)
	{
	unsigned ret;

	if (anv_physical_device_has_vram(device->physical))
	ret = ANV_BO_SLAB_HEAP_LMEM_ONLY;
	else
	ret = device->info->verx10 >= 200 ? ANV_BO_SLAB_HEAP_COMPRESSED :
	ANV_BO_SLAB_HEAP_SMEM_COHERENT;

	return (ret + 1);
	}

	static bool
	anv_can_reclaim_slab(void priv, struct pb_slab_entry entry)
	{
	struct anv_bo *bo = container_of(entry, struct anv_bo, slab_entry);

	return p_atomic_read(&bo->refcount) == 0;
	}

	static struct pb_slab *
	anv_slab_alloc(void *priv,
	unsigned heap,
	unsigned entry_size,
	unsigned group_index)
	{
	struct anv_device *device = priv;
	struct anv_slab *slab = calloc(1, sizeof(struct anv_slab));

	if (!slab)
	return NULL;

	const enum anv_bo_slab_heap bo_slab_heap = heap;
	enum anv_bo_alloc_flags alloc_flags = ANV_BO_ALLOC_SLAB_PARENT;

	switch (bo_slab_heap) {
	case ANV_BO_SLAB_HEAP_SMEM_CACHED_COHERENT:
	alloc_flags \|= ANV_BO_ALLOC_HOST_CACHED_COHERENT \|
	ANV_BO_ALLOC_NO_LOCAL_MEM;
	break;
	case ANV_BO_SLAB_HEAP_SMEM_CACHED_INCOHERENT:
	alloc_flags \|= ANV_BO_ALLOC_HOST_CACHED \|
	ANV_BO_ALLOC_NO_LOCAL_MEM;
	break;
	case ANV_BO_SLAB_HEAP_SMEM_COHERENT:
	alloc_flags \|= ANV_BO_ALLOC_HOST_COHERENT \|
	ANV_BO_ALLOC_NO_LOCAL_MEM;
	break;
	case ANV_BO_SLAB_HEAP_COMPRESSED:
	alloc_flags \|= ANV_BO_ALLOC_COMPRESSED;
	break;
	case ANV_BO_SLAB_HEAP_LMEM_SMEM:
	alloc_flags \|= ANV_BO_ALLOC_MAPPED \|
	ANV_BO_ALLOC_HOST_COHERENT;
	break;
	case ANV_BO_SLAB_HEAP_LMEM_ONLY:
	break;
	case ANV_BO_SLAB_HEAP_CACHED_COHERENT_CAPTURE:
	alloc_flags \|= ANV_BO_ALLOC_BATCH_BUFFER_FLAGS;
	break;
	case ANV_BO_SLAB_HEAP_DYNAMIC_VISIBLE_POOL:
	alloc_flags \|= ANV_BO_ALLOC_DYNAMIC_VISIBLE_POOL_FLAGS;
	break;
	case ANV_BO_SLAB_HEAP_DESCRIPTOR_POOL:
	alloc_flags \|= ANV_BO_ALLOC_DESCRIPTOR_POOL_FLAGS;
	break;
	default:
	unreachable("Missing");
	return NULL;
	}

	struct pb_slabs *slabs = get_slabs(device, entry_size);

	entry_size = MAX2(entry_size, 1ULL << slabs->min_order);
	if (!util_is_power_of_two_nonzero(entry_size))
	entry_size = util_next_power_of_two(entry_size);

	unsigned slab_parent_size = entry_size * 8;
	/* allocate at least a 2MB buffer, this allows KMD to enable THP for this bo */
	slab_parent_size = MAX2(slab_parent_size, 2 * 1024 * 1024);

	VkResult result;
	result = anv_device_alloc_bo(device, "slab_parent", slab_parent_size, alloc_flags,
	0, &slab->bo);
	if (result != VK_SUCCESS)
	goto error_alloc_bo;

	slab_parent_size = slab->bo->size = slab->bo->actual_size;
	slab->base.num_entries = slab_parent_size / entry_size;
	slab->base.num_free = slab->base.num_entries;
	slab->base.group_index = group_index;
	slab->base.entry_size = entry_size;
	slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
	if (!slab->entries)
	goto error_alloc_entries;

	list_inithead(&slab->base.free);

	for (unsigned i = 0; i < slab->base.num_entries; i++) {
	struct anv_bo *bo = &slab->entries[i];
	uint64_t offset = intel_48b_address(slab->bo->offset);

	offset += (i * entry_size);

	bo->name = "slab_child";
	bo->gem_handle = slab->bo->gem_handle;
	bo->refcount = 0;
	bo->offset = intel_canonical_address(offset);
	bo->size = entry_size;
	bo->actual_size = entry_size;
	bo->alloc_flags = alloc_flags;
	bo->vma_heap = slab->bo->vma_heap;
	bo->slab_parent = slab->bo;
	bo->slab_entry.slab = &slab->base;

	list_addtail(&bo->slab_entry.head, &slab->base.free);
	}

	return &slab->base;

	error_alloc_entries:
	anv_device_release_bo(device, slab->bo);
	error_alloc_bo:
	free(slab);

	return NULL;
	}

	static void
	anv_slab_free(void priv, struct pb_slab pslab)
	{
	struct anv_device *device = priv;
	struct anv_slab slab = (void )pslab;

	anv_device_release_bo(device, slab->bo);

	free(slab->entries);
	free(slab);
	}

	bool
	anv_slab_bo_init(struct anv_device *device)
	{
	const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);
	unsigned min_slab_order = 8;/* 256 bytes */
	const unsigned max_slab_order = 20;/* 1 MB (slab size = 2 MB) */
	unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
	num_slab_allocator;

	if (unlikely(device->physical->instance->debug & ANV_DEBUG_NO_SLAB))
	return true;

	/* feature requirement */
	if (!device->info->has_mmap_offset \|\| !device->info->has_partial_mmap_offset)
	return true;

	/* Divide the size order range among slab managers. */
	for (unsigned i = 0; i < num_slab_allocator; i++) {
	const unsigned min_order = min_slab_order;
	const unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
	max_slab_order);

	if (!pb_slabs_init(&device->bo_slabs[i], min_order, max_order,
	heap_max_get(device), false, device,
	anv_can_reclaim_slab,
	anv_slab_alloc,
	anv_slab_free)) {
	goto error_slabs_init;
	}
	min_slab_order = max_order + 1;
	}

	return true;

	error_slabs_init:
	for (unsigned i = 0; i < num_slab_allocator; i++) {
	if (!device->bo_slabs[i].groups)
	break;

	pb_slabs_deinit(&device->bo_slabs[i]);
	}

	return false;
	}

	void
	anv_slab_bo_deinit(struct anv_device *device)
	{
	const unsigned num_slab_allocator = ARRAY_SIZE(device->bo_slabs);

	if (anv_slab_bo_is_disabled(device))
	return;

	for (int i = 0; i < num_slab_allocator; i++) {
	if (device->bo_slabs[i].groups)
	pb_slabs_deinit(&device->bo_slabs[i]);
	}
	}