| /* |
| * Copyright © 2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "compiler/blob.h" |
| #include "util/hash_table.h" |
| #include "util/debug.h" |
| #include "anv_private.h" |
| |
| struct anv_shader_bin * |
| anv_shader_bin_create(struct anv_device *device, |
| const void *key_data, uint32_t key_size, |
| const void *kernel_data, uint32_t kernel_size, |
| const struct brw_stage_prog_data *prog_data_in, |
| uint32_t prog_data_size, const void *prog_data_param_in, |
| const struct anv_pipeline_bind_map *bind_map) |
| { |
| struct anv_shader_bin *shader; |
| struct anv_shader_bin_key *key; |
| struct brw_stage_prog_data *prog_data; |
| uint32_t *prog_data_param; |
| struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor; |
| |
| ANV_MULTIALLOC(ma); |
| anv_multialloc_add(&ma, &shader, 1); |
| anv_multialloc_add_size(&ma, &key, sizeof(*key) + key_size); |
| anv_multialloc_add_size(&ma, &prog_data, prog_data_size); |
| anv_multialloc_add(&ma, &prog_data_param, prog_data_in->nr_params); |
| anv_multialloc_add(&ma, &surface_to_descriptor, |
| bind_map->surface_count); |
| anv_multialloc_add(&ma, &sampler_to_descriptor, |
| bind_map->sampler_count); |
| |
| if (!anv_multialloc_alloc(&ma, &device->alloc, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) |
| return NULL; |
| |
| shader->ref_cnt = 1; |
| |
| key->size = key_size; |
| memcpy(key->data, key_data, key_size); |
| shader->key = key; |
| |
| shader->kernel = |
| anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64); |
| memcpy(shader->kernel.map, kernel_data, kernel_size); |
| shader->kernel_size = kernel_size; |
| |
| memcpy(prog_data, prog_data_in, prog_data_size); |
| memcpy(prog_data_param, prog_data_param_in, |
| prog_data->nr_params * sizeof(*prog_data_param)); |
| prog_data->param = prog_data_param; |
| shader->prog_data = prog_data; |
| shader->prog_data_size = prog_data_size; |
| |
| shader->bind_map = *bind_map; |
| typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor, |
| bind_map->surface_count); |
| shader->bind_map.surface_to_descriptor = surface_to_descriptor; |
| typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor, |
| bind_map->sampler_count); |
| shader->bind_map.sampler_to_descriptor = sampler_to_descriptor; |
| |
| return shader; |
| } |
| |
| void |
| anv_shader_bin_destroy(struct anv_device *device, |
| struct anv_shader_bin *shader) |
| { |
| assert(shader->ref_cnt == 0); |
| anv_state_pool_free(&device->instruction_state_pool, shader->kernel); |
| vk_free(&device->alloc, shader); |
| } |
| |
| static bool |
| anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader, |
| struct blob *blob) |
| { |
| bool ok; |
| |
| ok = blob_write_uint32(blob, shader->key->size); |
| ok = blob_write_bytes(blob, shader->key->data, shader->key->size); |
| |
| ok = blob_write_uint32(blob, shader->kernel_size); |
| ok = blob_write_bytes(blob, shader->kernel.map, shader->kernel_size); |
| |
| ok = blob_write_uint32(blob, shader->prog_data_size); |
| ok = blob_write_bytes(blob, shader->prog_data, shader->prog_data_size); |
| ok = blob_write_bytes(blob, shader->prog_data->param, |
| shader->prog_data->nr_params * |
| sizeof(*shader->prog_data->param)); |
| |
| ok = blob_write_uint32(blob, shader->bind_map.surface_count); |
| ok = blob_write_uint32(blob, shader->bind_map.sampler_count); |
| ok = blob_write_uint32(blob, shader->bind_map.image_count); |
| ok = blob_write_bytes(blob, shader->bind_map.surface_to_descriptor, |
| shader->bind_map.surface_count * |
| sizeof(*shader->bind_map.surface_to_descriptor)); |
| ok = blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor, |
| shader->bind_map.sampler_count * |
| sizeof(*shader->bind_map.sampler_to_descriptor)); |
| |
| return ok; |
| } |
| |
| static struct anv_shader_bin * |
| anv_shader_bin_create_from_blob(struct anv_device *device, |
| struct blob_reader *blob) |
| { |
| uint32_t key_size = blob_read_uint32(blob); |
| const void *key_data = blob_read_bytes(blob, key_size); |
| |
| uint32_t kernel_size = blob_read_uint32(blob); |
| const void *kernel_data = blob_read_bytes(blob, kernel_size); |
| |
| uint32_t prog_data_size = blob_read_uint32(blob); |
| const struct brw_stage_prog_data *prog_data = |
| blob_read_bytes(blob, prog_data_size); |
| if (blob->overrun) |
| return NULL; |
| const void *prog_data_param = |
| blob_read_bytes(blob, prog_data->nr_params * sizeof(*prog_data->param)); |
| |
| struct anv_pipeline_bind_map bind_map; |
| bind_map.surface_count = blob_read_uint32(blob); |
| bind_map.sampler_count = blob_read_uint32(blob); |
| bind_map.image_count = blob_read_uint32(blob); |
| bind_map.surface_to_descriptor = (void *) |
| blob_read_bytes(blob, bind_map.surface_count * |
| sizeof(*bind_map.surface_to_descriptor)); |
| bind_map.sampler_to_descriptor = (void *) |
| blob_read_bytes(blob, bind_map.sampler_count * |
| sizeof(*bind_map.sampler_to_descriptor)); |
| |
| if (blob->overrun) |
| return NULL; |
| |
| return anv_shader_bin_create(device, |
| key_data, key_size, |
| kernel_data, kernel_size, |
| prog_data, prog_data_size, prog_data_param, |
| &bind_map); |
| } |
| |
| /* Remaining work: |
| * |
| * - Compact binding table layout so it's tight and not dependent on |
| * descriptor set layout. |
| * |
| * - Review prog_data struct for size and cacheability: struct |
| * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 |
| * bit quantities etc; use bit fields for all bools, eg dual_src_blend. |
| */ |
| |
| static uint32_t |
| shader_bin_key_hash_func(const void *void_key) |
| { |
| const struct anv_shader_bin_key *key = void_key; |
| return _mesa_hash_data(key->data, key->size); |
| } |
| |
| static bool |
| shader_bin_key_compare_func(const void *void_a, const void *void_b) |
| { |
| const struct anv_shader_bin_key *a = void_a, *b = void_b; |
| if (a->size != b->size) |
| return false; |
| |
| return memcmp(a->data, b->data, a->size) == 0; |
| } |
| |
| void |
| anv_pipeline_cache_init(struct anv_pipeline_cache *cache, |
| struct anv_device *device, |
| bool cache_enabled) |
| { |
| cache->device = device; |
| pthread_mutex_init(&cache->mutex, NULL); |
| |
| if (cache_enabled) { |
| cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, |
| shader_bin_key_compare_func); |
| } else { |
| cache->cache = NULL; |
| } |
| } |
| |
| void |
| anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) |
| { |
| pthread_mutex_destroy(&cache->mutex); |
| |
| if (cache->cache) { |
| /* This is a bit unfortunate. In order to keep things from randomly |
| * going away, the shader cache has to hold a reference to all shader |
| * binaries it contains. We unref them when we destroy the cache. |
| */ |
| struct hash_entry *entry; |
| hash_table_foreach(cache->cache, entry) |
| anv_shader_bin_unref(cache->device, entry->data); |
| |
| _mesa_hash_table_destroy(cache->cache, NULL); |
| } |
| } |
| |
| static struct anv_shader_bin * |
| anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, |
| const void *key_data, uint32_t key_size) |
| { |
| uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; |
| struct anv_shader_bin_key *key = (void *)vla; |
| key->size = key_size; |
| memcpy(key->data, key_data, key_size); |
| |
| struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); |
| if (entry) |
| return entry->data; |
| else |
| return NULL; |
| } |
| |
| struct anv_shader_bin * |
| anv_pipeline_cache_search(struct anv_pipeline_cache *cache, |
| const void *key_data, uint32_t key_size) |
| { |
| if (!cache->cache) |
| return NULL; |
| |
| pthread_mutex_lock(&cache->mutex); |
| |
| struct anv_shader_bin *shader = |
| anv_pipeline_cache_search_locked(cache, key_data, key_size); |
| |
| pthread_mutex_unlock(&cache->mutex); |
| |
| /* We increment refcount before handing it to the caller */ |
| if (shader) |
| anv_shader_bin_ref(shader); |
| |
| return shader; |
| } |
| |
| static struct anv_shader_bin * |
| anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, |
| const void *key_data, uint32_t key_size, |
| const void *kernel_data, uint32_t kernel_size, |
| const struct brw_stage_prog_data *prog_data, |
| uint32_t prog_data_size, |
| const void *prog_data_param, |
| const struct anv_pipeline_bind_map *bind_map) |
| { |
| struct anv_shader_bin *shader = |
| anv_pipeline_cache_search_locked(cache, key_data, key_size); |
| if (shader) |
| return shader; |
| |
| struct anv_shader_bin *bin = |
| anv_shader_bin_create(cache->device, key_data, key_size, |
| kernel_data, kernel_size, |
| prog_data, prog_data_size, prog_data_param, |
| bind_map); |
| if (!bin) |
| return NULL; |
| |
| _mesa_hash_table_insert(cache->cache, bin->key, bin); |
| |
| return bin; |
| } |
| |
| struct anv_shader_bin * |
| anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, |
| const void *key_data, uint32_t key_size, |
| const void *kernel_data, uint32_t kernel_size, |
| const struct brw_stage_prog_data *prog_data, |
| uint32_t prog_data_size, |
| const struct anv_pipeline_bind_map *bind_map) |
| { |
| if (cache->cache) { |
| pthread_mutex_lock(&cache->mutex); |
| |
| struct anv_shader_bin *bin = |
| anv_pipeline_cache_add_shader(cache, key_data, key_size, |
| kernel_data, kernel_size, |
| prog_data, prog_data_size, |
| prog_data->param, bind_map); |
| |
| pthread_mutex_unlock(&cache->mutex); |
| |
| /* We increment refcount before handing it to the caller */ |
| if (bin) |
| anv_shader_bin_ref(bin); |
| |
| return bin; |
| } else { |
| /* In this case, we're not caching it so the caller owns it entirely */ |
| return anv_shader_bin_create(cache->device, key_data, key_size, |
| kernel_data, kernel_size, |
| prog_data, prog_data_size, |
| prog_data->param, bind_map); |
| } |
| } |
| |
| struct cache_header { |
| uint32_t header_size; |
| uint32_t header_version; |
| uint32_t vendor_id; |
| uint32_t device_id; |
| uint8_t uuid[VK_UUID_SIZE]; |
| }; |
| |
| static void |
| anv_pipeline_cache_load(struct anv_pipeline_cache *cache, |
| const void *data, size_t size) |
| { |
| struct anv_device *device = cache->device; |
| struct anv_physical_device *pdevice = &device->instance->physicalDevice; |
| |
| if (cache->cache == NULL) |
| return; |
| |
| struct blob_reader blob; |
| blob_reader_init(&blob, data, size); |
| |
| struct cache_header header; |
| blob_copy_bytes(&blob, &header, sizeof(header)); |
| uint32_t count = blob_read_uint32(&blob); |
| if (blob.overrun) |
| return; |
| |
| if (header.header_size < sizeof(header)) |
| return; |
| if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) |
| return; |
| if (header.vendor_id != 0x8086) |
| return; |
| if (header.device_id != device->chipset_id) |
| return; |
| if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0) |
| return; |
| |
| for (uint32_t i = 0; i < count; i++) { |
| struct anv_shader_bin *bin = |
| anv_shader_bin_create_from_blob(device, &blob); |
| if (!bin) |
| break; |
| _mesa_hash_table_insert(cache->cache, bin->key, bin); |
| } |
| } |
| |
| static bool |
| pipeline_cache_enabled() |
| { |
| static int enabled = -1; |
| if (enabled < 0) |
| enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); |
| return enabled; |
| } |
| |
| VkResult anv_CreatePipelineCache( |
| VkDevice _device, |
| const VkPipelineCacheCreateInfo* pCreateInfo, |
| const VkAllocationCallbacks* pAllocator, |
| VkPipelineCache* pPipelineCache) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| struct anv_pipeline_cache *cache; |
| |
| assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); |
| assert(pCreateInfo->flags == 0); |
| |
| cache = vk_alloc2(&device->alloc, pAllocator, |
| sizeof(*cache), 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); |
| if (cache == NULL) |
| return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); |
| |
| if (pCreateInfo->initialDataSize > 0) |
| anv_pipeline_cache_load(cache, |
| pCreateInfo->pInitialData, |
| pCreateInfo->initialDataSize); |
| |
| *pPipelineCache = anv_pipeline_cache_to_handle(cache); |
| |
| return VK_SUCCESS; |
| } |
| |
| void anv_DestroyPipelineCache( |
| VkDevice _device, |
| VkPipelineCache _cache, |
| const VkAllocationCallbacks* pAllocator) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); |
| |
| if (!cache) |
| return; |
| |
| anv_pipeline_cache_finish(cache); |
| |
| vk_free2(&device->alloc, pAllocator, cache); |
| } |
| |
| VkResult anv_GetPipelineCacheData( |
| VkDevice _device, |
| VkPipelineCache _cache, |
| size_t* pDataSize, |
| void* pData) |
| { |
| ANV_FROM_HANDLE(anv_device, device, _device); |
| ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); |
| struct anv_physical_device *pdevice = &device->instance->physicalDevice; |
| |
| struct blob blob; |
| if (pData) { |
| blob_init_fixed(&blob, pData, *pDataSize); |
| } else { |
| blob_init_fixed(&blob, NULL, SIZE_MAX); |
| } |
| |
| struct cache_header header = { |
| .header_size = sizeof(struct cache_header), |
| .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, |
| .vendor_id = 0x8086, |
| .device_id = device->chipset_id, |
| }; |
| memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE); |
| blob_write_bytes(&blob, &header, sizeof(header)); |
| |
| uint32_t count = 0; |
| intptr_t count_offset = blob_reserve_uint32(&blob); |
| if (count_offset < 0) { |
| *pDataSize = 0; |
| blob_finish(&blob); |
| return VK_INCOMPLETE; |
| } |
| |
| VkResult result = VK_SUCCESS; |
| if (cache->cache) { |
| struct hash_entry *entry; |
| hash_table_foreach(cache->cache, entry) { |
| struct anv_shader_bin *shader = entry->data; |
| |
| size_t save_size = blob.size; |
| if (!anv_shader_bin_write_to_blob(shader, &blob)) { |
| /* If it fails reset to the previous size and bail */ |
| blob.size = save_size; |
| result = VK_INCOMPLETE; |
| break; |
| } |
| |
| count++; |
| } |
| } |
| |
| blob_overwrite_uint32(&blob, count_offset, count); |
| |
| *pDataSize = blob.size; |
| |
| blob_finish(&blob); |
| |
| return result; |
| } |
| |
| VkResult anv_MergePipelineCaches( |
| VkDevice _device, |
| VkPipelineCache destCache, |
| uint32_t srcCacheCount, |
| const VkPipelineCache* pSrcCaches) |
| { |
| ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); |
| |
| if (!dst->cache) |
| return VK_SUCCESS; |
| |
| for (uint32_t i = 0; i < srcCacheCount; i++) { |
| ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); |
| if (!src->cache) |
| continue; |
| |
| struct hash_entry *entry; |
| hash_table_foreach(src->cache, entry) { |
| struct anv_shader_bin *bin = entry->data; |
| assert(bin); |
| |
| if (_mesa_hash_table_search(dst->cache, bin->key)) |
| continue; |
| |
| anv_shader_bin_ref(bin); |
| _mesa_hash_table_insert(dst->cache, bin->key, bin); |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |