| /* |
| * Copyright © 2022 Imagination Technologies Ltd. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <assert.h> |
| #include <stdbool.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <vulkan/vulkan.h> |
| |
| #include "hwdef/rogue_hw_utils.h" |
| #include "pvr_bo.h" |
| #include "pvr_cdm_load_sr.h" |
| #include "pvr_csb.h" |
| #include "pvr_job_context.h" |
| #include "pvr_pds.h" |
| #include "pvr_private.h" |
| #include "pvr_transfer_eot.h" |
| #include "pvr_types.h" |
| #include "pvr_vdm_load_sr.h" |
| #include "pvr_vdm_store_sr.h" |
| #include "pvr_winsys.h" |
| #include "util/macros.h" |
| #include "vk_alloc.h" |
| #include "vk_log.h" |
| |
| /* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this |
| * value when constructing the control stream? |
| */ |
| /* The VDM callstack is used by the hardware to implement control stream links |
| * with a return, i.e. sub-control streams/subroutines. This value specifies the |
| * maximum callstack depth. |
| */ |
| #define PVR_VDM_CALLSTACK_MAX_DEPTH 1U |
| |
| #define ROGUE_PDS_TASK_PROGRAM_SIZE 256U |
| |
| static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device, |
| struct pvr_reset_cmd *const reset_cmd) |
| { |
| const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| |
| /* The reset framework depends on compute support in the hw. */ |
| assert(PVR_HAS_FEATURE(dev_info, compute)); |
| |
| if (PVR_HAS_QUIRK(dev_info, 51764)) |
| pvr_finishme("Missing reset support for brn51764"); |
| |
| if (PVR_HAS_QUIRK(dev_info, 58839)) |
| pvr_finishme("Missing reset support for brn58839"); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void pvr_ctx_reset_cmd_fini(struct pvr_device *device, |
| struct pvr_reset_cmd *reset_cmd) |
| |
| { |
| /* TODO: reset command cleanup. */ |
| } |
| |
| static VkResult pvr_pds_pt_store_program_create_and_upload( |
| struct pvr_device *device, |
| struct pvr_bo *pt_bo, |
| uint32_t pt_bo_size, |
| struct pvr_pds_upload *const pds_upload_out) |
| { |
| struct pvr_pds_stream_out_terminate_program program = { 0 }; |
| const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); |
| size_t staging_buffer_size; |
| uint32_t *staging_buffer; |
| uint32_t *data_buffer; |
| uint32_t *code_buffer; |
| VkResult result; |
| |
| /* Check the bo size can be converted to dwords without any rounding. */ |
| assert(pt_bo_size % 4 == 0); |
| |
| program.pds_persistent_temp_size_to_store = pt_bo_size / 4; |
| program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr; |
| |
| pvr_pds_generate_stream_out_terminate_program(&program, |
| NULL, |
| PDS_GENERATE_SIZES, |
| dev_info); |
| |
| staging_buffer_size = (program.stream_out_terminate_pds_data_size + |
| program.stream_out_terminate_pds_code_size) * |
| sizeof(*staging_buffer); |
| |
| staging_buffer = vk_zalloc(&device->vk.alloc, |
| staging_buffer_size, |
| 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!staging_buffer) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| data_buffer = staging_buffer; |
| code_buffer = |
| pvr_pds_generate_stream_out_terminate_program(&program, |
| data_buffer, |
| PDS_GENERATE_DATA_SEGMENT, |
| dev_info); |
| pvr_pds_generate_stream_out_terminate_program(&program, |
| code_buffer, |
| PDS_GENERATE_CODE_SEGMENT, |
| dev_info); |
| |
| /* This PDS program is passed to the HW via the PPP state words. These only |
| * allow the data segment address to be specified and expect the code |
| * segment to immediately follow. Assume the code alignment is the same as |
| * the data. |
| */ |
| result = |
| pvr_gpu_upload_pds(device, |
| data_buffer, |
| program.stream_out_terminate_pds_data_size, |
| PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), |
| code_buffer, |
| program.stream_out_terminate_pds_code_size, |
| PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), |
| cache_line_size, |
| pds_upload_out); |
| |
| vk_free(&device->vk.alloc, staging_buffer); |
| |
| return result; |
| } |
| |
| static VkResult pvr_pds_pt_resume_program_create_and_upload( |
| struct pvr_device *device, |
| struct pvr_bo *pt_bo, |
| uint32_t pt_bo_size, |
| struct pvr_pds_upload *const pds_upload_out) |
| { |
| struct pvr_pds_stream_out_init_program program = { 0 }; |
| const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); |
| size_t staging_buffer_size; |
| uint32_t *staging_buffer; |
| uint32_t *data_buffer; |
| uint32_t *code_buffer; |
| VkResult result; |
| |
| /* Check the bo size can be converted to dwords without any rounding. */ |
| assert(pt_bo_size % 4 == 0); |
| |
| program.num_buffers = 1; |
| program.pds_buffer_data_size[0] = pt_bo_size / 4; |
| program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr; |
| |
| pvr_pds_generate_stream_out_init_program(&program, |
| NULL, |
| false, |
| PDS_GENERATE_SIZES, |
| dev_info); |
| |
| staging_buffer_size = (program.stream_out_init_pds_data_size + |
| program.stream_out_init_pds_code_size) * |
| sizeof(*staging_buffer); |
| |
| staging_buffer = vk_zalloc(&device->vk.alloc, |
| staging_buffer_size, |
| 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!staging_buffer) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| data_buffer = staging_buffer; |
| code_buffer = |
| pvr_pds_generate_stream_out_init_program(&program, |
| data_buffer, |
| false, |
| PDS_GENERATE_DATA_SEGMENT, |
| dev_info); |
| pvr_pds_generate_stream_out_init_program(&program, |
| code_buffer, |
| false, |
| PDS_GENERATE_CODE_SEGMENT, |
| dev_info); |
| |
| /* This PDS program is passed to the HW via the PPP state words. These only |
| * allow the data segment address to be specified and expect the code |
| * segment to immediately follow. Assume the code alignment is the same as |
| * the data. |
| */ |
| result = |
| pvr_gpu_upload_pds(device, |
| data_buffer, |
| program.stream_out_init_pds_data_size, |
| PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), |
| code_buffer, |
| program.stream_out_init_pds_code_size, |
| PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), |
| cache_line_size, |
| pds_upload_out); |
| |
| vk_free(&device->vk.alloc, staging_buffer); |
| |
| return result; |
| } |
| |
| static VkResult |
| pvr_render_job_pt_programs_setup(struct pvr_device *device, |
| struct rogue_pt_programs *pt_programs) |
| { |
| VkResult result; |
| |
| result = pvr_bo_alloc(device, |
| device->heaps.pds_heap, |
| ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, |
| ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT, |
| PVR_BO_ALLOC_FLAG_CPU_ACCESS, |
| &pt_programs->store_resume_state_bo); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = pvr_pds_pt_store_program_create_and_upload( |
| device, |
| pt_programs->store_resume_state_bo, |
| ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, |
| &pt_programs->pds_store_program); |
| if (result != VK_SUCCESS) |
| goto err_free_store_resume_state_bo; |
| |
| result = pvr_pds_pt_resume_program_create_and_upload( |
| device, |
| pt_programs->store_resume_state_bo, |
| ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, |
| &pt_programs->pds_resume_program); |
| if (result != VK_SUCCESS) |
| goto err_free_pds_store_program; |
| |
| return VK_SUCCESS; |
| |
| err_free_pds_store_program: |
| pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo); |
| |
| err_free_store_resume_state_bo: |
| pvr_bo_free(device, pt_programs->store_resume_state_bo); |
| |
| return result; |
| } |
| |
| static void |
| pvr_render_job_pt_programs_cleanup(struct pvr_device *device, |
| struct rogue_pt_programs *pt_programs) |
| { |
| pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo); |
| pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo); |
| pvr_bo_free(device, pt_programs->store_resume_state_bo); |
| } |
| |
| static void pvr_pds_ctx_sr_program_setup( |
| bool cc_enable, |
| uint64_t usc_program_upload_offset, |
| uint8_t usc_temps, |
| pvr_dev_addr_t sr_addr, |
| struct pvr_pds_shared_storing_program *const program_out) |
| { |
| /* The PDS task is the same for stores and loads. */ |
| *program_out = (struct pvr_pds_shared_storing_program){ |
| .cc_enable = cc_enable, |
| .doutw_control = { |
| .dest_store = PDS_UNIFIED_STORE, |
| .num_const64 = 2, |
| .doutw_data = { |
| [0] = sr_addr.addr, |
| [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE, |
| }, |
| .last_instruction = false, |
| }, |
| }; |
| |
| pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control, |
| usc_program_upload_offset, |
| usc_temps, |
| PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), |
| false); |
| } |
| |
| /* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to |
| * this. If there is a problem here it's likely that the same problem exists |
| * there so don't forget to update the compute function. |
| */ |
| static VkResult pvr_pds_render_ctx_sr_program_create_and_upload( |
| struct pvr_device *device, |
| uint64_t usc_program_upload_offset, |
| uint8_t usc_temps, |
| pvr_dev_addr_t sr_addr, |
| struct pvr_pds_upload *const pds_upload_out) |
| { |
| const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); |
| const uint32_t pds_data_alignment = |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; |
| |
| /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data |
| * and code size when using the PDS_GENERATE_SIZES mode. |
| */ |
| STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0); |
| uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 }; |
| struct pvr_pds_shared_storing_program program; |
| ASSERTED uint32_t *buffer_end; |
| uint32_t code_offset; |
| |
| pvr_pds_ctx_sr_program_setup(false, |
| usc_program_upload_offset, |
| usc_temps, |
| sr_addr, |
| &program); |
| |
| pvr_pds_generate_shared_storing_program(&program, |
| &staging_buffer[0], |
| PDS_GENERATE_DATA_SEGMENT, |
| dev_info); |
| |
| code_offset = ALIGN_POT(program.data_size, pds_data_alignment); |
| |
| buffer_end = |
| pvr_pds_generate_shared_storing_program(&program, |
| &staging_buffer[code_offset], |
| PDS_GENERATE_CODE_SEGMENT, |
| dev_info); |
| |
| assert((uint32_t)(buffer_end - staging_buffer) * 4 < |
| ROGUE_PDS_TASK_PROGRAM_SIZE); |
| |
| return pvr_gpu_upload_pds(device, |
| &staging_buffer[0], |
| program.data_size, |
| PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT), |
| &staging_buffer[code_offset], |
| program.code_size, |
| PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT), |
| cache_line_size, |
| pds_upload_out); |
| } |
| |
| /* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to |
| * this. If there is a problem here it's likely that the same problem exists |
| * there so don't forget to update the render_ctx function. |
| */ |
| static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload( |
| struct pvr_device *device, |
| bool is_loading_program, |
| uint64_t usc_program_upload_offset, |
| uint8_t usc_temps, |
| pvr_dev_addr_t sr_addr, |
| struct pvr_pds_upload *const pds_upload_out) |
| { |
| const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); |
| const uint32_t pds_data_alignment = |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; |
| |
| /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data |
| * and code size when using the PDS_GENERATE_SIZES mode. |
| */ |
| STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0); |
| uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 }; |
| struct pvr_pds_shared_storing_program program; |
| uint32_t *buffer_ptr; |
| uint32_t code_offset; |
| |
| pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421), |
| usc_program_upload_offset, |
| usc_temps, |
| sr_addr, |
| &program); |
| |
| if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { |
| pvr_pds_generate_compute_shared_loading_program(&program, |
| &staging_buffer[0], |
| PDS_GENERATE_DATA_SEGMENT, |
| dev_info); |
| } else { |
| pvr_pds_generate_shared_storing_program(&program, |
| &staging_buffer[0], |
| PDS_GENERATE_DATA_SEGMENT, |
| dev_info); |
| } |
| |
| code_offset = ALIGN_POT(program.data_size, pds_data_alignment); |
| |
| buffer_ptr = |
| pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset], |
| PDS_GENERATE_CODE_SEGMENT); |
| |
| if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { |
| buffer_ptr = pvr_pds_generate_compute_shared_loading_program( |
| &program, |
| buffer_ptr, |
| PDS_GENERATE_CODE_SEGMENT, |
| dev_info); |
| } else { |
| buffer_ptr = |
| pvr_pds_generate_shared_storing_program(&program, |
| buffer_ptr, |
| PDS_GENERATE_CODE_SEGMENT, |
| dev_info); |
| } |
| |
| assert((uint32_t)(buffer_ptr - staging_buffer) * 4 < |
| ROGUE_PDS_TASK_PROGRAM_SIZE); |
| |
| STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) == |
| PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT)); |
| |
| STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) == |
| PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT)); |
| |
| return pvr_gpu_upload_pds( |
| device, |
| &staging_buffer[0], |
| program.data_size, |
| PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT), |
| &staging_buffer[code_offset], |
| (uint32_t)(buffer_ptr - &staging_buffer[code_offset]), |
| PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT), |
| cache_line_size, |
| pds_upload_out); |
| } |
| |
| enum pvr_ctx_sr_program_target { |
| PVR_CTX_SR_RENDER_TARGET, |
| PVR_CTX_SR_COMPUTE_TARGET, |
| }; |
| |
| static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device, |
| enum pvr_ctx_sr_program_target target, |
| struct rogue_sr_programs *sr_programs) |
| { |
| const uint64_t store_load_state_bo_size = |
| PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) + |
| ROGUE_LLS_SHARED_REGS_RESERVE_SIZE; |
| const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); |
| uint64_t usc_store_program_upload_offset; |
| uint64_t usc_load_program_upload_offset; |
| const uint8_t *usc_load_sr_code; |
| uint32_t usc_load_sr_code_size; |
| VkResult result; |
| |
| /* Note that this is being used for both compute and render ctx. There is no |
| * compute equivalent define for the VDMCTRL unit size. |
| */ |
| /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */ |
| sr_programs->usc.unified_size = |
| DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE)); |
| |
| result = pvr_bo_alloc(device, |
| device->heaps.pds_heap, |
| store_load_state_bo_size, |
| cache_line_size, |
| PVR_WINSYS_BO_FLAG_CPU_ACCESS, |
| &sr_programs->store_load_state_bo); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| /* USC state update: SR state store. */ |
| |
| assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); |
| |
| result = pvr_gpu_upload_usc(device, |
| pvr_vdm_store_sr_code, |
| sizeof(pvr_vdm_store_sr_code), |
| cache_line_size, |
| &sr_programs->usc.store_program_bo); |
| if (result != VK_SUCCESS) |
| goto err_free_store_load_state_bo; |
| |
| usc_store_program_upload_offset = |
| sr_programs->usc.store_program_bo->vma->dev_addr.addr - |
| device->heaps.usc_heap->base_addr.addr; |
| |
| /* USC state update: SR state load. */ |
| |
| if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) { |
| STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); |
| |
| usc_load_sr_code = pvr_cdm_load_sr_code; |
| usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code); |
| } else { |
| STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); |
| |
| usc_load_sr_code = pvr_vdm_load_sr_code; |
| usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code); |
| } |
| |
| result = pvr_gpu_upload_usc(device, |
| usc_load_sr_code, |
| usc_load_sr_code_size, |
| cache_line_size, |
| &sr_programs->usc.load_program_bo); |
| if (result != VK_SUCCESS) |
| goto err_free_usc_store_program_bo; |
| |
| usc_load_program_upload_offset = |
| sr_programs->usc.load_program_bo->vma->dev_addr.addr - |
| device->heaps.usc_heap->base_addr.addr; |
| |
| /* FIXME: The number of USC temps should be output alongside |
| * pvr_vdm_store_sr_code rather than hard coded. |
| */ |
| /* Create and upload the PDS load and store programs. Point them to the |
| * appropriate USC load and store programs. |
| */ |
| switch (target) { |
| case PVR_CTX_SR_RENDER_TARGET: |
| /* PDS state update: SR state store. */ |
| result = pvr_pds_render_ctx_sr_program_create_and_upload( |
| device, |
| usc_store_program_upload_offset, |
| 8, |
| sr_programs->store_load_state_bo->vma->dev_addr, |
| &sr_programs->pds.store_program); |
| if (result != VK_SUCCESS) |
| goto err_free_usc_load_program_bo; |
| |
| /* PDS state update: SR state load. */ |
| result = pvr_pds_render_ctx_sr_program_create_and_upload( |
| device, |
| usc_load_program_upload_offset, |
| 20, |
| sr_programs->store_load_state_bo->vma->dev_addr, |
| &sr_programs->pds.load_program); |
| if (result != VK_SUCCESS) |
| goto err_free_pds_store_program_bo; |
| |
| break; |
| |
| case PVR_CTX_SR_COMPUTE_TARGET: |
| /* PDS state update: SR state store. */ |
| result = pvr_pds_compute_ctx_sr_program_create_and_upload( |
| device, |
| false, |
| usc_store_program_upload_offset, |
| 8, |
| sr_programs->store_load_state_bo->vma->dev_addr, |
| &sr_programs->pds.store_program); |
| if (result != VK_SUCCESS) |
| goto err_free_usc_load_program_bo; |
| |
| /* PDS state update: SR state load. */ |
| result = pvr_pds_compute_ctx_sr_program_create_and_upload( |
| device, |
| true, |
| usc_load_program_upload_offset, |
| 20, |
| sr_programs->store_load_state_bo->vma->dev_addr, |
| &sr_programs->pds.load_program); |
| if (result != VK_SUCCESS) |
| goto err_free_pds_store_program_bo; |
| |
| break; |
| |
| default: |
| unreachable("Invalid target."); |
| break; |
| } |
| |
| return VK_SUCCESS; |
| |
| err_free_pds_store_program_bo: |
| pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo); |
| |
| err_free_usc_load_program_bo: |
| pvr_bo_free(device, sr_programs->usc.load_program_bo); |
| |
| err_free_usc_store_program_bo: |
| pvr_bo_free(device, sr_programs->usc.store_program_bo); |
| |
| err_free_store_load_state_bo: |
| pvr_bo_free(device, sr_programs->store_load_state_bo); |
| |
| return VK_SUCCESS; |
| } |
| |
| static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device, |
| struct rogue_sr_programs *sr_programs) |
| { |
| pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo); |
| pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo); |
| pvr_bo_free(device, sr_programs->usc.load_program_bo); |
| pvr_bo_free(device, sr_programs->usc.store_program_bo); |
| pvr_bo_free(device, sr_programs->store_load_state_bo); |
| } |
| |
| static VkResult |
| pvr_render_ctx_switch_programs_setup(struct pvr_device *device, |
| struct pvr_render_ctx_programs *programs) |
| { |
| VkResult result; |
| |
| result = pvr_render_job_pt_programs_setup(device, &programs->pt); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = pvr_ctx_sr_programs_setup(device, |
| PVR_CTX_SR_RENDER_TARGET, |
| &programs->sr); |
| if (result != VK_SUCCESS) |
| goto err_pt_programs_cleanup; |
| |
| return VK_SUCCESS; |
| |
| err_pt_programs_cleanup: |
| pvr_render_job_pt_programs_cleanup(device, &programs->pt); |
| |
| return result; |
| } |
| |
| static void |
| pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device, |
| struct pvr_render_ctx_programs *programs) |
| { |
| pvr_ctx_sr_programs_cleanup(device, &programs->sr); |
| pvr_render_job_pt_programs_cleanup(device, &programs->pt); |
| } |
| |
| static VkResult pvr_render_ctx_switch_init(struct pvr_device *device, |
| struct pvr_render_ctx *ctx) |
| { |
| struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch; |
| const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | |
| PVR_BO_ALLOC_FLAG_CPU_ACCESS; |
| const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | |
| PVR_BO_ALLOC_FLAG_CPU_ACCESS; |
| VkResult result; |
| |
| result = pvr_bo_alloc(device, |
| device->heaps.general_heap, |
| ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE, |
| ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT, |
| vdm_state_bo_flags, |
| &ctx_switch->vdm_state_bo); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| result = pvr_bo_alloc(device, |
| device->heaps.general_heap, |
| ROGUE_LLS_TA_STATE_BUFFER_SIZE, |
| ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT, |
| geom_state_bo_flags, |
| &ctx_switch->geom_state_bo); |
| if (result != VK_SUCCESS) |
| goto err_pvr_bo_free_vdm_state_bo; |
| |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { |
| result = |
| pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]); |
| if (result) |
| goto err_programs_cleanup; |
| } |
| |
| return result; |
| |
| err_programs_cleanup: |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { |
| pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]); |
| } |
| |
| pvr_bo_free(device, ctx_switch->geom_state_bo); |
| |
| err_pvr_bo_free_vdm_state_bo: |
| pvr_bo_free(device, ctx_switch->vdm_state_bo); |
| |
| return result; |
| } |
| |
| static void pvr_render_ctx_switch_fini(struct pvr_device *device, |
| struct pvr_render_ctx *ctx) |
| { |
| struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch; |
| |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { |
| pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]); |
| } |
| |
| pvr_bo_free(device, ctx_switch->geom_state_bo); |
| pvr_bo_free(device, ctx_switch->vdm_state_bo); |
| } |
| |
| static void |
| pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program, |
| enum PVRX(VDMCTRL_USC_TARGET) usc_target, |
| uint8_t usc_unified_size, |
| uint32_t *const state0_out, |
| uint32_t *const state1_out) |
| { |
| pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) { |
| /* Convert the data size from dwords to bytes. */ |
| const uint32_t pds_data_size = pds_program->data_size * 4; |
| |
| state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM); |
| state.usc_target = usc_target; |
| state.usc_common_size = 0; |
| state.usc_unified_size = usc_unified_size; |
| state.pds_temp_size = 0; |
| |
| assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == |
| 0); |
| state.pds_data_size = |
| pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); |
| }; |
| |
| pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) { |
| state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset); |
| state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS); |
| state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS); |
| } |
| } |
| |
| static void |
| pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program, |
| uint32_t *const stream_out1_out, |
| uint32_t *const stream_out2_out) |
| { |
| pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) { |
| /* Convert the data size from dwords to bytes. */ |
| const uint32_t pds_data_size = pds_program->data_size * 4; |
| |
| state.sync = true; |
| |
| assert(pds_data_size % |
| PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) == |
| 0); |
| state.pds_data_size = |
| pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE); |
| |
| state.pds_temp_size = 0; |
| } |
| |
| pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) { |
| state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset); |
| } |
| } |
| |
| static void pvr_render_ctx_ws_static_state_init( |
| struct pvr_render_ctx *ctx, |
| struct pvr_winsys_render_ctx_static_state *static_state) |
| { |
| uint64_t *q_dst; |
| uint32_t *d_dst; |
| |
| q_dst = &static_state->vdm_ctx_state_base_addr; |
| pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STATE_BASE, base) { |
| base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr; |
| } |
| |
| q_dst = &static_state->geom_ctx_state_base_addr; |
| pvr_csb_pack (q_dst, CR_TA_CONTEXT_STATE_BASE, base) { |
| base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr; |
| } |
| |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) { |
| struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt; |
| struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr; |
| |
| /* Context store state. */ |
| q_dst = &static_state->geom_state[i].vdm_ctx_store_task0; |
| pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK0, task0) { |
| pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program, |
| PVRX(VDMCTRL_USC_TARGET_ANY), |
| sr_prog->usc.unified_size, |
| &task0.pds_state0, |
| &task0.pds_state1); |
| } |
| |
| d_dst = &static_state->geom_state[i].vdm_ctx_store_task1; |
| pvr_csb_pack (d_dst, CR_VDM_CONTEXT_STORE_TASK1, task1) { |
| pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) { |
| state.pds_code_addr = |
| PVR_DEV_ADDR(sr_prog->pds.store_program.code_offset); |
| } |
| } |
| |
| q_dst = &static_state->geom_state[i].vdm_ctx_store_task2; |
| pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK2, task2) { |
| pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program, |
| &task2.stream_out1, |
| &task2.stream_out2); |
| } |
| |
| /* Context resume state. */ |
| q_dst = &static_state->geom_state[i].vdm_ctx_resume_task0; |
| pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) { |
| pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program, |
| PVRX(VDMCTRL_USC_TARGET_ALL), |
| sr_prog->usc.unified_size, |
| &task0.pds_state0, |
| &task0.pds_state1); |
| } |
| |
| d_dst = &static_state->geom_state[i].vdm_ctx_resume_task1; |
| pvr_csb_pack (d_dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) { |
| pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) { |
| state.pds_code_addr = |
| PVR_DEV_ADDR(sr_prog->pds.load_program.code_offset); |
| } |
| } |
| |
| q_dst = &static_state->geom_state[i].vdm_ctx_resume_task2; |
| pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) { |
| pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program, |
| &task2.stream_out1, |
| &task2.stream_out2); |
| } |
| } |
| } |
| |
| static void pvr_render_ctx_ws_create_info_init( |
| struct pvr_render_ctx *ctx, |
| enum pvr_winsys_ctx_priority priority, |
| struct pvr_winsys_render_ctx_create_info *create_info) |
| { |
| create_info->priority = priority; |
| create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr; |
| |
| pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state); |
| } |
| |
| VkResult pvr_render_ctx_create(struct pvr_device *device, |
| enum pvr_winsys_ctx_priority priority, |
| struct pvr_render_ctx **const ctx_out) |
| { |
| const uint64_t vdm_callstack_size = |
| sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH; |
| struct pvr_winsys_render_ctx_create_info create_info; |
| struct pvr_render_ctx *ctx; |
| VkResult result; |
| |
| ctx = vk_alloc(&device->vk.alloc, |
| sizeof(*ctx), |
| 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!ctx) |
| return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| ctx->device = device; |
| |
| result = pvr_bo_alloc(device, |
| device->heaps.general_heap, |
| vdm_callstack_size, |
| PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT), |
| 0, |
| &ctx->vdm_callstack_bo); |
| if (result != VK_SUCCESS) |
| goto err_vk_free_ctx; |
| |
| result = pvr_render_ctx_switch_init(device, ctx); |
| if (result != VK_SUCCESS) |
| goto err_free_vdm_callstack_bo; |
| |
| result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); |
| if (result != VK_SUCCESS) |
| goto err_render_ctx_switch_fini; |
| |
| /* ctx must be fully initialized by this point since |
| * pvr_render_ctx_ws_create_info_init() depends on this. |
| */ |
| pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info); |
| |
| result = device->ws->ops->render_ctx_create(device->ws, |
| &create_info, |
| &ctx->ws_ctx); |
| if (result != VK_SUCCESS) |
| goto err_render_ctx_reset_cmd_fini; |
| |
| *ctx_out = ctx; |
| |
| return VK_SUCCESS; |
| |
| err_render_ctx_reset_cmd_fini: |
| pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); |
| |
| err_render_ctx_switch_fini: |
| pvr_render_ctx_switch_fini(device, ctx); |
| |
| err_free_vdm_callstack_bo: |
| pvr_bo_free(device, ctx->vdm_callstack_bo); |
| |
| err_vk_free_ctx: |
| vk_free(&device->vk.alloc, ctx); |
| |
| return result; |
| } |
| |
| void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx) |
| { |
| struct pvr_device *device = ctx->device; |
| |
| device->ws->ops->render_ctx_destroy(ctx->ws_ctx); |
| |
| pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); |
| pvr_render_ctx_switch_fini(device, ctx); |
| pvr_bo_free(device, ctx->vdm_callstack_bo); |
| vk_free(&device->vk.alloc, ctx); |
| } |
| |
| static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload( |
| struct pvr_device *device, |
| struct pvr_pds_upload *const pds_upload_out) |
| { |
| const uint32_t pds_data_alignment = |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; |
| const struct pvr_device_runtime_info *dev_runtime_info = |
| &device->pdevice->dev_runtime_info; |
| ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U]; |
| struct pvr_pds_fence_program program = { 0 }; |
| ASSERTED uint32_t *buffer_end; |
| uint32_t code_offset; |
| uint32_t data_size; |
| |
| /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */ |
| assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) && |
| dev_runtime_info->num_phantoms >= 2)); |
| |
| pvr_pds_generate_fence_terminate_program(&program, |
| staging_buffer, |
| PDS_GENERATE_DATA_SEGMENT, |
| &device->pdevice->dev_info); |
| |
| /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size |
| * when we generate the code segment. Implement |
| * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change? |
| * This behavior doesn't seem consistent with the rest of the api. For now |
| * we store the size in a variable. |
| */ |
| data_size = program.data_size; |
| code_offset = ALIGN_POT(program.data_size, pds_data_alignment); |
| |
| buffer_end = |
| pvr_pds_generate_fence_terminate_program(&program, |
| &staging_buffer[code_offset], |
| PDS_GENERATE_CODE_SEGMENT, |
| &device->pdevice->dev_info); |
| |
| assert((uint64_t)(buffer_end - staging_buffer) * 4U < |
| ROGUE_PDS_TASK_PROGRAM_SIZE); |
| |
| return pvr_gpu_upload_pds(device, |
| staging_buffer, |
| data_size, |
| PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT), |
| &staging_buffer[code_offset], |
| program.code_size, |
| PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT), |
| 0, |
| pds_upload_out); |
| } |
| |
| static void pvr_compute_ctx_ws_static_state_init( |
| const struct pvr_device_info *const dev_info, |
| const struct pvr_compute_ctx *const ctx, |
| struct pvr_winsys_compute_ctx_static_state *const static_state) |
| { |
| const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; |
| |
| /* CR_CDM_CONTEXT_... use state store program info. */ |
| |
| pvr_csb_pack (&static_state->cdm_ctx_store_pds0, |
| CR_CDM_CONTEXT_PDS0, |
| state) { |
| state.data_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.data_offset); |
| state.code_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.code_offset); |
| } |
| |
| pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b, |
| CR_CDM_CONTEXT_PDS0, |
| state) { |
| state.data_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.data_offset); |
| state.code_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.code_offset); |
| } |
| |
| pvr_csb_pack (&static_state->cdm_ctx_store_pds1, |
| CR_CDM_CONTEXT_PDS1, |
| state) { |
| /* Convert the data size from dwords to bytes. */ |
| const uint32_t store_program_data_size = |
| ctx_switch->sr[0].pds.store_program.data_size * 4U; |
| |
| state.pds_seq_dep = true; |
| state.usc_seq_dep = false; |
| state.target = true; |
| state.unified_size = ctx_switch->sr[0].usc.unified_size; |
| state.common_shared = false; |
| state.common_size = 0; |
| state.temp_size = 0; |
| |
| assert(store_program_data_size % |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == |
| 0); |
| state.data_size = store_program_data_size / |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); |
| |
| state.fence = true; |
| } |
| |
| /* CR_CDM_TERMINATE_... use fence terminate info. */ |
| |
| pvr_csb_pack (&static_state->cdm_ctx_terminate_pds, |
| CR_CDM_TERMINATE_PDS, |
| state) { |
| state.data_addr = |
| PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.data_offset); |
| state.code_addr = |
| PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.code_offset); |
| } |
| |
| pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1, |
| CR_CDM_TERMINATE_PDS1, |
| state) { |
| /* Convert the data size from dwords to bytes. */ |
| const uint32_t fence_terminate_program_data_size = |
| ctx_switch->sr_fence_terminate_program.data_size * 4U; |
| |
| state.pds_seq_dep = true; |
| state.usc_seq_dep = false; |
| state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable); |
| state.unified_size = 0; |
| /* Common store is for shareds -- this will free the partitions. */ |
| state.common_shared = true; |
| state.common_size = 0; |
| state.temp_size = 0; |
| |
| assert(fence_terminate_program_data_size % |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == |
| 0); |
| state.data_size = fence_terminate_program_data_size / |
| PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); |
| state.fence = true; |
| } |
| |
| /* CR_CDM_RESUME_... use state load program info. */ |
| |
| pvr_csb_pack (&static_state->cdm_ctx_resume_pds0, |
| CR_CDM_CONTEXT_LOAD_PDS0, |
| state) { |
| state.data_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.data_offset); |
| state.code_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.code_offset); |
| } |
| |
| pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b, |
| CR_CDM_CONTEXT_LOAD_PDS0, |
| state) { |
| state.data_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.data_offset); |
| state.code_addr = |
| PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.code_offset); |
| } |
| } |
| |
| static void pvr_compute_ctx_ws_create_info_init( |
| const struct pvr_compute_ctx *const ctx, |
| enum pvr_winsys_ctx_priority priority, |
| struct pvr_winsys_compute_ctx_create_info *const create_info) |
| { |
| create_info->priority = priority; |
| |
| pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info, |
| ctx, |
| &create_info->static_state); |
| } |
| |
| VkResult pvr_compute_ctx_create(struct pvr_device *const device, |
| enum pvr_winsys_ctx_priority priority, |
| struct pvr_compute_ctx **const ctx_out) |
| { |
| struct pvr_winsys_compute_ctx_create_info create_info; |
| struct pvr_compute_ctx *ctx; |
| VkResult result; |
| |
| ctx = vk_alloc(&device->vk.alloc, |
| sizeof(*ctx), |
| 8, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!ctx) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| ctx->device = device; |
| |
| result = pvr_bo_alloc( |
| device, |
| device->heaps.general_heap, |
| rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info), |
| rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info), |
| PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED, |
| &ctx->ctx_switch.compute_state_bo); |
| if (result != VK_SUCCESS) |
| goto err_free_ctx; |
| |
| /* TODO: Change this so that enabling storage to B doesn't change the array |
| * size. Instead of looping we could unroll this and have the second |
| * programs setup depending on the B enable. Doing it that way would make |
| * things more obvious. |
| */ |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) { |
| result = pvr_ctx_sr_programs_setup(device, |
| PVR_CTX_SR_COMPUTE_TARGET, |
| &ctx->ctx_switch.sr[i]); |
| if (result != VK_SUCCESS) { |
| for (uint32_t j = 0; j < i; j++) |
| pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]); |
| |
| goto err_free_state_buffer; |
| } |
| } |
| |
| result = pvr_pds_sr_fence_terminate_program_create_and_upload( |
| device, |
| &ctx->ctx_switch.sr_fence_terminate_program); |
| if (result != VK_SUCCESS) |
| goto err_free_sr_programs; |
| |
| pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info); |
| |
| result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); |
| if (result != VK_SUCCESS) |
| goto err_free_pds_fence_terminate_program; |
| |
| result = device->ws->ops->compute_ctx_create(device->ws, |
| &create_info, |
| &ctx->ws_ctx); |
| if (result != VK_SUCCESS) |
| goto err_fini_reset_cmd; |
| |
| *ctx_out = ctx; |
| |
| return VK_SUCCESS; |
| |
| err_fini_reset_cmd: |
| pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); |
| |
| err_free_pds_fence_terminate_program: |
| pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo); |
| |
| err_free_sr_programs: |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i) |
| pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]); |
| |
| err_free_state_buffer: |
| pvr_bo_free(device, ctx->ctx_switch.compute_state_bo); |
| |
| err_free_ctx: |
| vk_free(&device->vk.alloc, ctx); |
| |
| return result; |
| } |
| |
| void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx) |
| { |
| struct pvr_device *device = ctx->device; |
| |
| device->ws->ops->compute_ctx_destroy(ctx->ws_ctx); |
| |
| pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); |
| |
| pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo); |
| for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i) |
| pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]); |
| |
| pvr_bo_free(device, ctx->ctx_switch.compute_state_bo); |
| |
| vk_free(&device->vk.alloc, ctx); |
| } |
| |
| static void pvr_transfer_ctx_ws_create_info_init( |
| enum pvr_winsys_ctx_priority priority, |
| struct pvr_winsys_transfer_ctx_create_info *const create_info) |
| { |
| create_info->priority = priority; |
| } |
| |
| static VkResult pvr_transfer_ctx_setup_shaders(struct pvr_device *device, |
| struct pvr_transfer_ctx *ctx) |
| { |
| const uint32_t cache_line_size = |
| rogue_get_slc_cache_line_size(&device->pdevice->dev_info); |
| VkResult result; |
| |
| /* TODO: Setup USC fragments. */ |
| |
| /* Setup EOT program. */ |
| result = pvr_gpu_upload_usc(device, |
| pvr_transfer_eot_usc_code, |
| sizeof(pvr_transfer_eot_usc_code), |
| cache_line_size, |
| &ctx->usc_eot_bo); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| STATIC_ASSERT(ARRAY_SIZE(pvr_transfer_eot_usc_offsets) == |
| ARRAY_SIZE(ctx->transfer_mrts)); |
| for (uint32_t i = 0U; i < ARRAY_SIZE(pvr_transfer_eot_usc_offsets); i++) { |
| ctx->transfer_mrts[i] = |
| PVR_DEV_ADDR_OFFSET(ctx->usc_eot_bo->vma->dev_addr, |
| pvr_transfer_eot_usc_offsets[i]); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static void pvr_transfer_ctx_fini_shaders(struct pvr_device *device, |
| struct pvr_transfer_ctx *ctx) |
| { |
| pvr_bo_free(device, ctx->usc_eot_bo); |
| } |
| |
| VkResult pvr_transfer_ctx_create(struct pvr_device *const device, |
| enum pvr_winsys_ctx_priority priority, |
| struct pvr_transfer_ctx **const ctx_out) |
| { |
| struct pvr_winsys_transfer_ctx_create_info create_info; |
| struct pvr_transfer_ctx *ctx; |
| VkResult result; |
| |
| ctx = vk_zalloc(&device->vk.alloc, |
| sizeof(*ctx), |
| 8U, |
| VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
| if (!ctx) |
| return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| ctx->device = device; |
| |
| result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); |
| if (result != VK_SUCCESS) |
| goto err_free_ctx; |
| |
| pvr_transfer_ctx_ws_create_info_init(priority, &create_info); |
| |
| result = device->ws->ops->transfer_ctx_create(device->ws, |
| &create_info, |
| &ctx->ws_ctx); |
| if (result != VK_SUCCESS) |
| goto err_fini_reset_cmd; |
| |
| result = pvr_transfer_ctx_setup_shaders(device, ctx); |
| if (result != VK_SUCCESS) |
| goto err_destroy_transfer_ctx; |
| |
| /* Create the PDS Uniform/Tex state code segment array. */ |
| for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) { |
| for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) { |
| if (i == 0U && j == 0U) |
| continue; |
| |
| result = pvr_pds_unitex_state_program_create_and_upload( |
| device, |
| NULL, |
| i, |
| j, |
| &ctx->pds_unitex_code[i][j]); |
| if (result != VK_SUCCESS) { |
| goto err_free_pds_unitex_bos; |
| } |
| } |
| } |
| |
| *ctx_out = ctx; |
| |
| return VK_SUCCESS; |
| |
| err_free_pds_unitex_bos: |
| for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) { |
| for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) { |
| if (!ctx->pds_unitex_code[i][j].pvr_bo) |
| continue; |
| |
| pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo); |
| } |
| } |
| |
| pvr_transfer_ctx_fini_shaders(device, ctx); |
| |
| err_destroy_transfer_ctx: |
| device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx); |
| |
| err_fini_reset_cmd: |
| pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); |
| |
| err_free_ctx: |
| vk_free(&device->vk.alloc, ctx); |
| |
| return result; |
| } |
| |
| void pvr_transfer_ctx_destroy(struct pvr_transfer_ctx *const ctx) |
| { |
| struct pvr_device *device = ctx->device; |
| |
| for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) { |
| for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) { |
| if (!ctx->pds_unitex_code[i][j].pvr_bo) |
| continue; |
| |
| pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo); |
| } |
| } |
| |
| pvr_transfer_ctx_fini_shaders(device, ctx); |
| device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx); |
| pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); |
| vk_free(&device->vk.alloc, ctx); |
| } |