src/nouveau/vulkan/nvk_heap.c - third_party/mesa - Git at Google

 /*
  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
  * SPDX-License-Identifier: MIT
  */
 #include "nvk_heap.h"

 #include "nvk_device.h"
 #include "nvk_physical_device.h"
 #include "nvk_queue.h"

 #include "util/macros.h"

 #include "nv_push.h"
 #include "nv_push_cl90b5.h"

 VkResult
 nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
               enum nvkmd_mem_flags mem_flags,
               enum nvkmd_mem_map_flags map_flags,
               uint32_t overalloc, bool contiguous)
 {
    VkResult result = nvk_mem_arena_init(dev, &heap->arena, mem_flags, map_flags,
                                         contiguous, NVK_MEM_ARENA_MAX_SIZE);
    if (result != VK_SUCCESS)
       return result;

    assert(overalloc < NVK_MEM_ARENA_MIN_SIZE);
    heap->overalloc = overalloc;
    util_vma_heap_init(&heap->heap, 0, 0);

    return VK_SUCCESS;
 }

 void
 nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
 {
    util_vma_heap_finish(&heap->heap);
    nvk_mem_arena_finish(dev, &heap->arena);
 }

 static VkResult
 nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
 {
    const bool is_first_grow = nvk_mem_arena_size_B(&heap->arena) == 0;
    VkResult result;

    uint64_t addr, mem_size_B;
    result = nvk_mem_arena_grow_locked(dev, &heap->arena, &addr, &mem_size_B);
    if (result != VK_SUCCESS)
       return result;

    if (nvk_mem_arena_is_contiguous(&heap->arena) && !is_first_grow) {
       util_vma_heap_free(&heap->heap, addr - heap->overalloc, mem_size_B);
    } else {
       util_vma_heap_free(&heap->heap, addr, mem_size_B - heap->overalloc);
    }

    return VK_SUCCESS;
 }

 static VkResult
 nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
                       uint64_t size, uint32_t alignment,
                       uint64_t *addr_out, void **map_out)
 {
    /* Make sure we follow the restrictions in nvk_mem_arena_map(). */
    if (map_out != NULL && nvk_mem_arena_is_mapped(&heap->arena)) {
       assert(size <= NVK_MEM_ARENA_MIN_SIZE);
       alignment = MAX2(alignment, util_next_power_of_two(size));
    }

    while (1) {
       uint64_t addr = util_vma_heap_alloc(&heap->heap, size, alignment);
       if (addr != 0) {
          *addr_out = addr;

          if (map_out != NULL) {
             if (nvk_mem_arena_is_mapped(&heap->arena))
                *map_out = nvk_mem_arena_map(&heap->arena, addr, size);
             else
                *map_out = NULL;
          }

          return VK_SUCCESS;
       }

       VkResult result = nvk_heap_grow_locked(dev, heap);
       if (result != VK_SUCCESS)
          return result;
    }
 }

 static void
 nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
                      uint64_t addr, uint64_t size)
 {
    assert(addr + size > addr);
    util_vma_heap_free(&heap->heap, addr, size);
 }

 VkResult
 nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
                uint64_t size, uint32_t alignment,
                uint64_t *addr_out, void **map_out)
 {
    simple_mtx_lock(&heap->arena.mutex);
    VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
                                            addr_out, map_out);
    simple_mtx_unlock(&heap->arena.mutex);

    return result;
 }

 VkResult
 nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
                 const void *data, size_t size, uint32_t alignment,
                 uint64_t *addr_out)
 {
    VkResult result;

    result = nvk_heap_alloc(dev, heap, size, alignment, addr_out, NULL);
    if (result != VK_SUCCESS)
       return result;

    if (heap->arena.map_flags & NVKMD_MEM_MAP_WR) {
       nvk_mem_arena_copy_to_gpu(&heap->arena, *addr_out, data, size);
    } else {
       /* Otherwise, kick off an upload with the upload queue.
        *
        * This is a queued operation that the driver ensures happens before any
        * more client work via semaphores.  Because this is asynchronous and
        * heap allocations are synchronous we have to be a bit careful here.
        * The heap only ever tracks the current known CPU state of everything
        * while the upload queue makes that state valid at some point in the
        * future.
        *
        * This can be especially tricky for very fast upload/free cycles such
        * as if the client compiles a shader, throws it away without using it,
        * and then compiles another shader that ends up at the same address.
        * What makes this all correct is the fact that the everything on the
        * upload queue happens in a well-defined device-wide order.  In this
        * case the first shader will get uploaded and then the second will get
        * uploaded over top of it.  As long as we don't free the memory out
        * from under the upload queue, everything will end up in the correct
        * state by the time the client's shaders actually execute.
        */
       result = nvk_upload_queue_upload(dev, &dev->upload, *addr_out, data, size);
       if (result != VK_SUCCESS) {
          nvk_heap_free(dev, heap, *addr_out, size);
          return result;
       }
    }

    return VK_SUCCESS;
 }

 void
 nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
               uint64_t addr, uint64_t size)
 {
    simple_mtx_lock(&heap->arena.mutex);
    nvk_heap_free_locked(dev, heap, addr, size);
    simple_mtx_unlock(&heap->arena.mutex);
 }
	/*
	* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
	* SPDX-License-Identifier: MIT
	*/
	#include "nvk_heap.h"

	#include "nvk_device.h"
	#include "nvk_physical_device.h"
	#include "nvk_queue.h"

	#include "util/macros.h"

	#include "nv_push.h"
	#include "nv_push_cl90b5.h"

	VkResult
	nvk_heap_init(struct nvk_device dev, struct nvk_heap heap,
	enum nvkmd_mem_flags mem_flags,
	enum nvkmd_mem_map_flags map_flags,
	uint32_t overalloc, bool contiguous)
	{
	VkResult result = nvk_mem_arena_init(dev, &heap->arena, mem_flags, map_flags,
	contiguous, NVK_MEM_ARENA_MAX_SIZE);
	if (result != VK_SUCCESS)
	return result;

	assert(overalloc < NVK_MEM_ARENA_MIN_SIZE);
	heap->overalloc = overalloc;
	util_vma_heap_init(&heap->heap, 0, 0);

	return VK_SUCCESS;
	}

	void
	nvk_heap_finish(struct nvk_device dev, struct nvk_heap heap)
	{
	util_vma_heap_finish(&heap->heap);
	nvk_mem_arena_finish(dev, &heap->arena);
	}

	static VkResult
	nvk_heap_grow_locked(struct nvk_device dev, struct nvk_heap heap)
	{
	const bool is_first_grow = nvk_mem_arena_size_B(&heap->arena) == 0;
	VkResult result;

	uint64_t addr, mem_size_B;
	result = nvk_mem_arena_grow_locked(dev, &heap->arena, &addr, &mem_size_B);
	if (result != VK_SUCCESS)
	return result;

	if (nvk_mem_arena_is_contiguous(&heap->arena) && !is_first_grow) {
	util_vma_heap_free(&heap->heap, addr - heap->overalloc, mem_size_B);
	} else {
	util_vma_heap_free(&heap->heap, addr, mem_size_B - heap->overalloc);
	}

	return VK_SUCCESS;
	}

	static VkResult
	nvk_heap_alloc_locked(struct nvk_device dev, struct nvk_heap heap,
	uint64_t size, uint32_t alignment,
	uint64_t addr_out, void *map_out)
	{
	/* Make sure we follow the restrictions in nvk_mem_arena_map(). */
	if (map_out != NULL && nvk_mem_arena_is_mapped(&heap->arena)) {
	assert(size <= NVK_MEM_ARENA_MIN_SIZE);
	alignment = MAX2(alignment, util_next_power_of_two(size));
	}

	while (1) {
	uint64_t addr = util_vma_heap_alloc(&heap->heap, size, alignment);
	if (addr != 0) {
	*addr_out = addr;

	if (map_out != NULL) {
	if (nvk_mem_arena_is_mapped(&heap->arena))
	*map_out = nvk_mem_arena_map(&heap->arena, addr, size);
	else
	*map_out = NULL;
	}

	return VK_SUCCESS;
	}

	VkResult result = nvk_heap_grow_locked(dev, heap);
	if (result != VK_SUCCESS)
	return result;
	}
	}

	static void
	nvk_heap_free_locked(struct nvk_device dev, struct nvk_heap heap,
	uint64_t addr, uint64_t size)
	{
	assert(addr + size > addr);
	util_vma_heap_free(&heap->heap, addr, size);
	}

	VkResult
	nvk_heap_alloc(struct nvk_device dev, struct nvk_heap heap,
	uint64_t size, uint32_t alignment,
	uint64_t addr_out, void *map_out)
	{
	simple_mtx_lock(&heap->arena.mutex);
	VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
	addr_out, map_out);
	simple_mtx_unlock(&heap->arena.mutex);

	return result;
	}

	VkResult
	nvk_heap_upload(struct nvk_device dev, struct nvk_heap heap,
	const void *data, size_t size, uint32_t alignment,
	uint64_t *addr_out)
	{
	VkResult result;

	result = nvk_heap_alloc(dev, heap, size, alignment, addr_out, NULL);
	if (result != VK_SUCCESS)
	return result;

	if (heap->arena.map_flags & NVKMD_MEM_MAP_WR) {
	nvk_mem_arena_copy_to_gpu(&heap->arena, *addr_out, data, size);
	} else {
	/* Otherwise, kick off an upload with the upload queue.
	*
	* This is a queued operation that the driver ensures happens before any
	* more client work via semaphores. Because this is asynchronous and
	* heap allocations are synchronous we have to be a bit careful here.
	* The heap only ever tracks the current known CPU state of everything
	* while the upload queue makes that state valid at some point in the
	* future.
	*
	* This can be especially tricky for very fast upload/free cycles such
	* as if the client compiles a shader, throws it away without using it,
	* and then compiles another shader that ends up at the same address.
	* What makes this all correct is the fact that the everything on the
	* upload queue happens in a well-defined device-wide order. In this
	* case the first shader will get uploaded and then the second will get
	* uploaded over top of it. As long as we don't free the memory out
	* from under the upload queue, everything will end up in the correct
	* state by the time the client's shaders actually execute.
	*/
	result = nvk_upload_queue_upload(dev, &dev->upload, *addr_out, data, size);
	if (result != VK_SUCCESS) {
	nvk_heap_free(dev, heap, *addr_out, size);
	return result;
	}
	}

	return VK_SUCCESS;
	}

	void
	nvk_heap_free(struct nvk_device dev, struct nvk_heap heap,
	uint64_t addr, uint64_t size)
	{
	simple_mtx_lock(&heap->arena.mutex);
	nvk_heap_free_locked(dev, heap, addr, size);
	simple_mtx_unlock(&heap->arena.mutex);
	}