| /* |
| * Copyright © 2022 Imagination Technologies Ltd. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <assert.h> |
| #include <stdbool.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <vulkan/vulkan.h> |
| |
| #include "pvr_blit.h" |
| #include "pvr_clear.h" |
| #include "pvr_csb.h" |
| #include "pvr_formats.h" |
| #include "pvr_job_transfer.h" |
| #include "pvr_private.h" |
| #include "usc/programs/pvr_shader_factory.h" |
| #include "usc/programs/pvr_static_shaders.h" |
| #include "pvr_types.h" |
| #include "util/bitscan.h" |
| #include "util/list.h" |
| #include "util/macros.h" |
| #include "util/u_math.h" |
| #include "vk_alloc.h" |
| #include "vk_command_buffer.h" |
| #include "vk_command_pool.h" |
| #include "vk_format.h" |
| #include "vk_log.h" |
| |
| /* TODO: Investigate where this limit comes from. */ |
| #define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U |
| |
| static struct pvr_transfer_cmd * |
| pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer) |
| { |
| struct pvr_transfer_cmd *transfer_cmd; |
| |
| transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc, |
| sizeof(*transfer_cmd), |
| 8U, |
| VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); |
| if (!transfer_cmd) { |
| vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); |
| return NULL; |
| } |
| |
| /* transfer_cmd->mapping_count is already set to zero. */ |
| transfer_cmd->sources[0].filter = PVR_FILTER_POINT; |
| transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND; |
| transfer_cmd->sources[0].addr_mode = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE; |
| transfer_cmd->cmd_buffer = cmd_buffer; |
| |
| return transfer_cmd; |
| } |
| |
| static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface, |
| VkRect2D *rect, |
| pvr_dev_addr_t dev_addr, |
| VkDeviceSize offset, |
| VkFormat vk_format, |
| VkFormat image_format, |
| uint32_t width, |
| uint32_t height, |
| uint32_t stride) |
| { |
| enum pipe_format pformat = vk_format_to_pipe_format(image_format); |
| |
| surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset); |
| surface->width = width; |
| surface->height = height; |
| surface->stride = stride; |
| surface->vk_format = vk_format; |
| surface->mem_layout = PVR_MEMLAYOUT_LINEAR; |
| surface->sample_count = 1; |
| |
| /* Initialize rectangle extent. Also, rectangle.offset should be set to |
| * zero, as the offset is already adjusted in the device address above. We |
| * don't explicitly set offset to zero as transfer_cmd is zero allocated. |
| */ |
| rect->extent.width = width; |
| rect->extent.height = height; |
| |
| if (util_format_is_compressed(pformat)) { |
| uint32_t block_width = util_format_get_blockwidth(pformat); |
| uint32_t block_height = util_format_get_blockheight(pformat); |
| |
| surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width)); |
| surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height)); |
| surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width)); |
| |
| rect->offset.x /= block_width; |
| rect->offset.y /= block_height; |
| rect->extent.width = |
| MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width)); |
| rect->extent.height = |
| MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height)); |
| } |
| } |
| |
| VkFormat pvr_get_raw_copy_format(VkFormat format) |
| { |
| switch (vk_format_get_blocksize(format)) { |
| case 1: |
| return VK_FORMAT_R8_UINT; |
| case 2: |
| return VK_FORMAT_R8G8_UINT; |
| case 3: |
| return VK_FORMAT_R8G8B8_UINT; |
| case 4: |
| return VK_FORMAT_R32_UINT; |
| case 6: |
| return VK_FORMAT_R16G16B16_UINT; |
| case 8: |
| return VK_FORMAT_R32G32_UINT; |
| case 12: |
| return VK_FORMAT_R32G32B32_UINT; |
| case 16: |
| return VK_FORMAT_R32G32B32A32_UINT; |
| default: |
| unreachable("Unhandled copy block size."); |
| } |
| } |
| |
| static void pvr_setup_transfer_surface(struct pvr_device *device, |
| struct pvr_transfer_cmd_surface *surface, |
| VkRect2D *rect, |
| const struct pvr_image *image, |
| uint32_t array_layer, |
| uint32_t mip_level, |
| const VkOffset3D *offset, |
| const VkExtent3D *extent, |
| float fdepth, |
| VkFormat format, |
| VkImageAspectFlags aspect_mask) |
| { |
| const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U); |
| const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U); |
| enum pipe_format image_pformat = vk_format_to_pipe_format(image->vk.format); |
| enum pipe_format pformat = vk_format_to_pipe_format(format); |
| const VkImageSubresource sub_resource = { |
| .aspectMask = aspect_mask, |
| .mipLevel = mip_level, |
| .arrayLayer = array_layer, |
| }; |
| VkSubresourceLayout info; |
| uint32_t depth; |
| |
| if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED) |
| depth = MAX2(image->vk.extent.depth >> mip_level, 1U); |
| else |
| depth = 1U; |
| |
| pvr_get_image_subresource_layout(image, &sub_resource, &info); |
| |
| surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset); |
| surface->width = width; |
| surface->height = height; |
| surface->depth = depth; |
| |
| assert(info.rowPitch % vk_format_get_blocksize(format) == 0); |
| surface->stride = info.rowPitch / vk_format_get_blocksize(format); |
| |
| surface->vk_format = format; |
| surface->mem_layout = image->memlayout; |
| surface->sample_count = image->vk.samples; |
| |
| if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED) |
| surface->z_position = fdepth; |
| else |
| surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth); |
| |
| rect->offset.x = offset->x; |
| rect->offset.y = offset->y; |
| rect->extent.width = extent->width; |
| rect->extent.height = extent->height; |
| |
| if (util_format_is_compressed(image_pformat) && |
| !util_format_is_compressed(pformat)) { |
| uint32_t block_width = util_format_get_blockwidth(image_pformat); |
| uint32_t block_height = util_format_get_blockheight(image_pformat); |
| |
| surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width)); |
| surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height)); |
| surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width)); |
| |
| rect->offset.x /= block_width; |
| rect->offset.y /= block_height; |
| rect->extent.width = |
| MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width)); |
| rect->extent.height = |
| MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height)); |
| } |
| } |
| |
| void pvr_CmdBlitImage2(VkCommandBuffer commandBuffer, |
| const VkBlitImageInfo2 *pBlitImageInfo) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage); |
| PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage); |
| struct pvr_device *device = cmd_buffer->device; |
| enum pvr_filter filter = PVR_FILTER_DONTCARE; |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| if (pBlitImageInfo->filter == VK_FILTER_LINEAR) |
| filter = PVR_FILTER_LINEAR; |
| |
| for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) { |
| const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i]; |
| |
| assert(region->srcSubresource.layerCount == |
| region->dstSubresource.layerCount); |
| const bool inverted_dst_z = |
| (region->dstOffsets[1].z < region->dstOffsets[0].z); |
| const bool inverted_src_z = |
| (region->srcOffsets[1].z < region->srcOffsets[0].z); |
| const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z |
| : region->srcOffsets[0].z; |
| const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z |
| : region->srcOffsets[1].z; |
| const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z |
| : region->dstOffsets[0].z; |
| const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z |
| : region->dstOffsets[1].z; |
| |
| const uint32_t src_width = |
| region->srcOffsets[1].x - region->srcOffsets[0].x; |
| const uint32_t src_height = |
| region->srcOffsets[1].y - region->srcOffsets[0].y; |
| uint32_t dst_width; |
| uint32_t dst_height; |
| |
| float initial_depth_offset; |
| VkExtent3D src_extent; |
| VkExtent3D dst_extent; |
| VkOffset3D dst_offset = region->dstOffsets[0]; |
| float z_slice_stride; |
| bool flip_x; |
| bool flip_y; |
| |
| if (region->dstOffsets[1].x > region->dstOffsets[0].x) { |
| dst_width = region->dstOffsets[1].x - region->dstOffsets[0].x; |
| flip_x = false; |
| } else { |
| dst_width = region->dstOffsets[0].x - region->dstOffsets[1].x; |
| flip_x = true; |
| dst_offset.x = region->dstOffsets[1].x; |
| } |
| |
| if (region->dstOffsets[1].y > region->dstOffsets[0].y) { |
| dst_height = region->dstOffsets[1].y - region->dstOffsets[0].y; |
| flip_y = false; |
| } else { |
| dst_height = region->dstOffsets[0].y - region->dstOffsets[1].y; |
| flip_y = true; |
| dst_offset.y = region->dstOffsets[1].y; |
| } |
| |
| /* If any of the extent regions is zero, then reject the blit and |
| * continue. |
| */ |
| if (!src_width || !src_height || !dst_width || !dst_height || |
| !(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) { |
| mesa_loge("BlitImage: Region %i has an area of zero", i); |
| continue; |
| } |
| |
| src_extent = (VkExtent3D){ |
| .width = src_width, |
| .height = src_height, |
| .depth = 0U, |
| }; |
| |
| dst_extent = (VkExtent3D){ |
| .width = dst_width, |
| .height = dst_height, |
| .depth = 0U, |
| }; |
| |
| /* The z_position of a transfer surface is intended to be in the range |
| * of 0.0f <= z_position <= depth. It will be used as a texture coordinate |
| * in the source surface for cases where linear filtering is enabled, so |
| * the fractional part will need to represent the exact midpoint of a z |
| * slice range in the source texture, as it maps to each destination |
| * slice. |
| * |
| * For destination surfaces, the fractional part is discarded, so |
| * we can safely pass the slice index. |
| */ |
| |
| /* Calculate the ratio of z slices in our source region to that of our |
| * destination region, to get the number of z slices in our source region |
| * to iterate over for each destination slice. |
| * |
| * If our destination region is inverted, we iterate backwards. |
| */ |
| z_slice_stride = |
| (inverted_dst_z ? -1.0f : 1.0f) * |
| ((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z)); |
| |
| /* Offset the initial depth offset by half of the z slice stride, into the |
| * blit region's z range. |
| */ |
| initial_depth_offset = |
| (inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride); |
| |
| for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) { |
| struct pvr_transfer_cmd_surface src_surface = { 0 }; |
| struct pvr_transfer_cmd_surface dst_surface = { 0 }; |
| VkRect2D src_rect; |
| VkRect2D dst_rect; |
| |
| /* Get the subresource info for the src and dst images, this is |
| * required when incrementing the address of the depth slice used by |
| * the transfer surface. |
| */ |
| VkSubresourceLayout src_info, dst_info; |
| const VkImageSubresource src_sub_resource = { |
| .aspectMask = region->srcSubresource.aspectMask, |
| .mipLevel = region->srcSubresource.mipLevel, |
| .arrayLayer = region->srcSubresource.baseArrayLayer + j, |
| }; |
| const VkImageSubresource dst_sub_resource = { |
| .aspectMask = region->dstSubresource.aspectMask, |
| .mipLevel = region->dstSubresource.mipLevel, |
| .arrayLayer = region->dstSubresource.baseArrayLayer + j, |
| }; |
| |
| pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info); |
| pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info); |
| |
| /* Setup the transfer surfaces once per image layer, which saves us |
| * from repeating subresource queries by manually incrementing the |
| * depth slices. |
| */ |
| pvr_setup_transfer_surface(device, |
| &src_surface, |
| &src_rect, |
| src, |
| region->srcSubresource.baseArrayLayer + j, |
| region->srcSubresource.mipLevel, |
| ®ion->srcOffsets[0], |
| &src_extent, |
| initial_depth_offset, |
| src->vk.format, |
| region->srcSubresource.aspectMask); |
| |
| pvr_setup_transfer_surface(device, |
| &dst_surface, |
| &dst_rect, |
| dst, |
| region->dstSubresource.baseArrayLayer + j, |
| region->dstSubresource.mipLevel, |
| &dst_offset, |
| &dst_extent, |
| min_dst_z, |
| dst->vk.format, |
| region->dstSubresource.aspectMask); |
| |
| for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) { |
| struct pvr_transfer_cmd *transfer_cmd; |
| VkResult result; |
| |
| /* TODO: See if we can allocate all the transfer cmds in one go. */ |
| transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer); |
| if (!transfer_cmd) |
| return; |
| |
| transfer_cmd->sources[0].mappings[0].src_rect = src_rect; |
| transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect; |
| transfer_cmd->sources[0].mappings[0].flip_x = flip_x; |
| transfer_cmd->sources[0].mappings[0].flip_y = flip_y; |
| transfer_cmd->sources[0].mapping_count++; |
| |
| transfer_cmd->sources[0].surface = src_surface; |
| transfer_cmd->sources[0].filter = filter; |
| transfer_cmd->source_count = 1; |
| |
| transfer_cmd->dst = dst_surface; |
| transfer_cmd->scissor = dst_rect; |
| |
| result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); |
| return; |
| } |
| |
| if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) { |
| src_surface.z_position += z_slice_stride; |
| } else { |
| src_surface.dev_addr.addr += |
| src_info.depthPitch * ((uint32_t)z_slice_stride); |
| } |
| |
| if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) |
| dst_surface.z_position += 1.0f; |
| else |
| dst_surface.dev_addr.addr += dst_info.depthPitch; |
| } |
| } |
| } |
| } |
| |
| static VkFormat pvr_get_copy_format(VkFormat format) |
| { |
| switch (format) { |
| case VK_FORMAT_R8_SNORM: |
| return VK_FORMAT_R8_SINT; |
| case VK_FORMAT_R8G8_SNORM: |
| return VK_FORMAT_R8G8_SINT; |
| case VK_FORMAT_R8G8B8_SNORM: |
| return VK_FORMAT_R8G8B8_SINT; |
| case VK_FORMAT_R8G8B8A8_SNORM: |
| return VK_FORMAT_R8G8B8A8_SINT; |
| case VK_FORMAT_B8G8R8A8_SNORM: |
| return VK_FORMAT_B8G8R8A8_SINT; |
| default: |
| return format; |
| } |
| } |
| |
| static void |
| pvr_setup_surface_for_image(struct pvr_device *device, |
| struct pvr_transfer_cmd_surface *surface, |
| VkRect2D *rect, |
| const struct pvr_image *image, |
| uint32_t array_layer, |
| uint32_t array_offset, |
| uint32_t mip_level, |
| const VkOffset3D *offset, |
| const VkExtent3D *extent, |
| uint32_t depth, |
| VkFormat format, |
| const VkImageAspectFlags aspect_mask) |
| { |
| if (image->vk.image_type != VK_IMAGE_TYPE_3D) { |
| pvr_setup_transfer_surface(device, |
| surface, |
| rect, |
| image, |
| array_layer + array_offset, |
| mip_level, |
| offset, |
| extent, |
| 0.0f, |
| format, |
| aspect_mask); |
| } else { |
| pvr_setup_transfer_surface(device, |
| surface, |
| rect, |
| image, |
| array_layer, |
| mip_level, |
| offset, |
| extent, |
| (float)depth, |
| format, |
| aspect_mask); |
| } |
| } |
| |
| static VkResult |
| pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer, |
| enum pvr_resolve_op resolve_op, |
| const struct pvr_image *src, |
| const struct pvr_image *dst, |
| const VkImageCopy2 *region) |
| { |
| enum pipe_format src_pformat = vk_format_to_pipe_format(src->vk.format); |
| enum pipe_format dst_pformat = vk_format_to_pipe_format(dst->vk.format); |
| bool src_block_compressed = util_format_is_compressed(src_pformat); |
| bool dst_block_compressed = util_format_is_compressed(dst_pformat); |
| VkExtent3D src_extent; |
| VkExtent3D dst_extent; |
| VkFormat dst_format; |
| VkFormat src_format; |
| uint32_t dst_layers; |
| uint32_t src_layers; |
| uint32_t max_slices; |
| uint32_t flags = 0U; |
| |
| if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && |
| region->srcSubresource.aspectMask != |
| (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| /* Takes the stencil of the source and the depth of the destination and |
| * combines the two interleaved. |
| */ |
| flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE; |
| |
| if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| /* Takes the depth of the source and the stencil of the destination and |
| * combines the two interleaved. |
| */ |
| flags |= PVR_TRANSFER_CMD_FLAGS_PICKD; |
| } |
| } |
| |
| src_extent = region->extent; |
| dst_extent = region->extent; |
| |
| if (src_block_compressed && !dst_block_compressed) { |
| uint32_t block_width = util_format_get_blockwidth(src_pformat); |
| uint32_t block_height = util_format_get_blockheight(src_pformat); |
| |
| dst_extent.width = MAX2(1U, DIV_ROUND_UP(src_extent.width, block_width)); |
| dst_extent.height = |
| MAX2(1U, DIV_ROUND_UP(src_extent.height, block_height)); |
| } else if (!src_block_compressed && dst_block_compressed) { |
| uint32_t block_width = util_format_get_blockwidth(dst_pformat); |
| uint32_t block_height = util_format_get_blockheight(dst_pformat); |
| |
| dst_extent.width = MAX2(1U, src_extent.width * block_width); |
| dst_extent.height = MAX2(1U, src_extent.height * block_height); |
| } |
| |
| if (src->vk.samples > dst->vk.samples) { |
| /* Resolve op needs to know the actual format. */ |
| dst_format = dst->vk.format; |
| } else { |
| /* We don't care what format dst is as it's guaranteed to be size |
| * compatible with src. |
| */ |
| dst_format = pvr_get_raw_copy_format(src->vk.format); |
| } |
| src_format = dst_format; |
| |
| src_layers = |
| vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource); |
| dst_layers = |
| vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource); |
| |
| /* srcSubresource.layerCount must match layerCount of dstSubresource in |
| * copies not involving 3D images. In copies involving 3D images, if there is |
| * a 2D image it's layerCount. |
| */ |
| max_slices = MAX3(src_layers, dst_layers, region->extent.depth); |
| |
| for (uint32_t i = 0U; i < max_slices; i++) { |
| struct pvr_transfer_cmd *transfer_cmd; |
| VkResult result; |
| |
| transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer); |
| if (!transfer_cmd) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| transfer_cmd->flags |= flags; |
| transfer_cmd->sources[0].resolve_op = resolve_op; |
| |
| pvr_setup_surface_for_image( |
| cmd_buffer->device, |
| &transfer_cmd->sources[0].surface, |
| &transfer_cmd->sources[0].mappings[0U].src_rect, |
| src, |
| region->srcSubresource.baseArrayLayer, |
| i, |
| region->srcSubresource.mipLevel, |
| ®ion->srcOffset, |
| &src_extent, |
| region->srcOffset.z + i, |
| src_format, |
| region->srcSubresource.aspectMask); |
| |
| pvr_setup_surface_for_image(cmd_buffer->device, |
| &transfer_cmd->dst, |
| &transfer_cmd->scissor, |
| dst, |
| region->dstSubresource.baseArrayLayer, |
| i, |
| region->dstSubresource.mipLevel, |
| ®ion->dstOffset, |
| &dst_extent, |
| region->dstOffset.z + i, |
| dst_format, |
| region->dstSubresource.aspectMask); |
| |
| transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor; |
| transfer_cmd->sources[0].mapping_count++; |
| transfer_cmd->source_count = 1; |
| |
| result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); |
| return result; |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer, |
| const struct pvr_image *src, |
| const struct pvr_image *dst, |
| const VkImageCopy2 *region) |
| { |
| enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND; |
| |
| if (src->vk.samples > 1U && dst->vk.samples < 2U) { |
| /* Integer resolve picks a single sample. */ |
| if (vk_format_is_int(src->vk.format)) |
| resolve_op = PVR_RESOLVE_SAMPLE0; |
| } |
| |
| return pvr_copy_or_resolve_image_region(cmd_buffer, |
| resolve_op, |
| src, |
| dst, |
| region); |
| } |
| |
| static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA, |
| const VkImageCopy2 *pRegionB) |
| { |
| assert(pRegionA->srcSubresource.aspectMask != 0U); |
| assert(pRegionB->srcSubresource.aspectMask != 0U); |
| |
| if (!((pRegionA->srcSubresource.aspectMask ^ |
| pRegionB->srcSubresource.aspectMask) & |
| (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { |
| return false; |
| } |
| |
| /* Assert if aspectMask mismatch between src and dst, given it's a depth and |
| * stencil image so not multi-planar and from the Vulkan 1.0.223 spec: |
| * |
| * If neither srcImage nor dstImage has a multi-planar image format then |
| * for each element of pRegions, srcSubresource.aspectMask and |
| * dstSubresource.aspectMask must match. |
| */ |
| assert(pRegionA->srcSubresource.aspectMask == |
| pRegionA->dstSubresource.aspectMask); |
| assert(pRegionB->srcSubresource.aspectMask == |
| pRegionB->dstSubresource.aspectMask); |
| |
| if (!(pRegionA->srcSubresource.mipLevel == |
| pRegionB->srcSubresource.mipLevel && |
| pRegionA->srcSubresource.baseArrayLayer == |
| pRegionB->srcSubresource.baseArrayLayer && |
| pRegionA->srcSubresource.layerCount == |
| pRegionB->srcSubresource.layerCount)) { |
| return false; |
| } |
| |
| if (!(pRegionA->dstSubresource.mipLevel == |
| pRegionB->dstSubresource.mipLevel && |
| pRegionA->dstSubresource.baseArrayLayer == |
| pRegionB->dstSubresource.baseArrayLayer && |
| pRegionA->dstSubresource.layerCount == |
| pRegionB->dstSubresource.layerCount)) { |
| return false; |
| } |
| |
| if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x && |
| pRegionA->srcOffset.y == pRegionB->srcOffset.y && |
| pRegionA->srcOffset.z == pRegionB->srcOffset.z)) { |
| return false; |
| } |
| |
| if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x && |
| pRegionA->dstOffset.y == pRegionB->dstOffset.y && |
| pRegionA->dstOffset.z == pRegionB->dstOffset.z)) { |
| return false; |
| } |
| |
| if (!(pRegionA->extent.width == pRegionB->extent.width && |
| pRegionA->extent.height == pRegionB->extent.height && |
| pRegionA->extent.depth == pRegionB->extent.depth)) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void pvr_CmdCopyImage2(VkCommandBuffer commandBuffer, |
| const VkCopyImageInfo2 *pCopyImageInfo) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage); |
| PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage); |
| |
| const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && |
| dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT; |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) { |
| VkResult result; |
| |
| /* If an application has split a copy between D24S8 images into two |
| * separate copy regions (one for the depth aspect and one for the |
| * stencil aspect) attempt to merge the two regions back into one blit. |
| * |
| * This can only be merged if both regions are identical apart from the |
| * aspectMask, one of which has to be depth and the other has to be |
| * stencil. |
| * |
| * Only attempt to merge consecutive regions, ignore the case of merging |
| * non-consecutive regions. |
| */ |
| if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) { |
| const bool ret = |
| pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i], |
| &pCopyImageInfo->pRegions[i + 1]); |
| if (ret) { |
| VkImageCopy2 region = pCopyImageInfo->pRegions[i]; |
| |
| region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT; |
| region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT; |
| |
| result = pvr_copy_or_resolve_color_image_region(cmd_buffer, |
| src, |
| dst, |
| ®ion); |
| if (result != VK_SUCCESS) |
| return; |
| |
| /* Skip the next region as it has been processed with the last |
| * region. |
| */ |
| i++; |
| |
| continue; |
| } |
| } |
| |
| result = |
| pvr_copy_or_resolve_color_image_region(cmd_buffer, |
| src, |
| dst, |
| &pCopyImageInfo->pRegions[i]); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |
| |
| VkResult |
| pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer *const cmd_buffer, |
| const pvr_dev_addr_t buffer_dev_addr, |
| const struct pvr_image *const image, |
| const VkBufferImageCopy2 *const region, |
| const VkFormat src_format, |
| const VkFormat dst_format, |
| const uint32_t flags) |
| { |
| enum pipe_format pformat = vk_format_to_pipe_format(dst_format); |
| uint32_t row_length_in_texels; |
| uint32_t buffer_slice_size; |
| uint32_t buffer_layer_size; |
| uint32_t height_in_blks; |
| uint32_t row_length; |
| |
| if (region->bufferRowLength == 0) |
| row_length_in_texels = region->imageExtent.width; |
| else |
| row_length_in_texels = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| height_in_blks = region->imageExtent.height; |
| else |
| height_in_blks = region->bufferImageHeight; |
| |
| if (util_format_is_compressed(pformat)) { |
| uint32_t block_width = util_format_get_blockwidth(pformat); |
| uint32_t block_height = util_format_get_blockheight(pformat); |
| uint32_t block_size = util_format_get_blocksize(pformat); |
| |
| height_in_blks = DIV_ROUND_UP(height_in_blks, block_height); |
| row_length_in_texels = |
| DIV_ROUND_UP(row_length_in_texels, block_width) * block_size; |
| } |
| |
| row_length = row_length_in_texels * vk_format_get_blocksize(src_format); |
| |
| buffer_slice_size = height_in_blks * row_length; |
| buffer_layer_size = buffer_slice_size * region->imageExtent.depth; |
| |
| for (uint32_t i = 0; i < region->imageExtent.depth; i++) { |
| const uint32_t depth = i + (uint32_t)region->imageOffset.z; |
| |
| for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) { |
| const VkDeviceSize buffer_offset = region->bufferOffset + |
| (j * buffer_layer_size) + |
| (i * buffer_slice_size); |
| struct pvr_transfer_cmd *transfer_cmd; |
| VkResult result; |
| |
| transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer); |
| if (!transfer_cmd) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| transfer_cmd->flags = flags; |
| |
| pvr_setup_buffer_surface( |
| &transfer_cmd->sources[0].surface, |
| &transfer_cmd->sources[0].mappings[0].src_rect, |
| buffer_dev_addr, |
| buffer_offset, |
| src_format, |
| image->vk.format, |
| region->imageExtent.width, |
| region->imageExtent.height, |
| row_length_in_texels); |
| |
| transfer_cmd->sources[0].surface.depth = 1; |
| transfer_cmd->source_count = 1; |
| |
| pvr_setup_transfer_surface(cmd_buffer->device, |
| &transfer_cmd->dst, |
| &transfer_cmd->scissor, |
| image, |
| region->imageSubresource.baseArrayLayer + j, |
| region->imageSubresource.mipLevel, |
| ®ion->imageOffset, |
| ®ion->imageExtent, |
| depth, |
| dst_format, |
| region->imageSubresource.aspectMask); |
| |
| transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor; |
| transfer_cmd->sources[0].mapping_count++; |
| |
| result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); |
| return result; |
| } |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *const cmd_buffer, |
| const pvr_dev_addr_t buffer_dev_addr, |
| const struct pvr_image *const image, |
| const VkBufferImageCopy2 *const region) |
| { |
| const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask; |
| VkFormat src_format; |
| VkFormat dst_format; |
| uint32_t flags = 0; |
| |
| if (vk_format_has_depth(image->vk.format) && |
| vk_format_has_stencil(image->vk.format)) { |
| flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE; |
| |
| if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) { |
| src_format = vk_format_stencil_only(image->vk.format); |
| } else { |
| src_format = vk_format_depth_only(image->vk.format); |
| flags |= PVR_TRANSFER_CMD_FLAGS_PICKD; |
| } |
| |
| dst_format = image->vk.format; |
| } else { |
| src_format = pvr_get_raw_copy_format(image->vk.format); |
| dst_format = src_format; |
| } |
| |
| return pvr_copy_buffer_to_image_region_format(cmd_buffer, |
| buffer_dev_addr, |
| image, |
| region, |
| src_format, |
| dst_format, |
| flags); |
| } |
| |
| void pvr_CmdCopyBufferToImage2( |
| VkCommandBuffer commandBuffer, |
| const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) |
| { |
| PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer); |
| PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage); |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) { |
| const VkResult result = |
| pvr_copy_buffer_to_image_region(cmd_buffer, |
| src->dev_addr, |
| dst, |
| &pCopyBufferToImageInfo->pRegions[i]); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |
| |
| VkResult |
| pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer *const cmd_buffer, |
| const struct pvr_image *const image, |
| const pvr_dev_addr_t buffer_dev_addr, |
| const VkBufferImageCopy2 *const region, |
| const VkFormat src_format, |
| const VkFormat dst_format) |
| { |
| enum pipe_format pformat = vk_format_to_pipe_format(image->vk.format); |
| struct pvr_transfer_cmd_surface dst_surface = { 0 }; |
| VkImageSubresource sub_resource; |
| uint32_t buffer_image_height; |
| uint32_t buffer_row_length; |
| uint32_t buffer_slice_size; |
| uint32_t max_array_layers; |
| VkRect2D dst_rect = { 0 }; |
| uint32_t max_depth_slice; |
| VkSubresourceLayout info; |
| |
| /* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */ |
| assert(image->vk.samples == 1); |
| |
| if (region->bufferRowLength == 0) |
| buffer_row_length = region->imageExtent.width; |
| else |
| buffer_row_length = region->bufferRowLength; |
| |
| if (region->bufferImageHeight == 0) |
| buffer_image_height = region->imageExtent.height; |
| else |
| buffer_image_height = region->bufferImageHeight; |
| |
| max_array_layers = |
| region->imageSubresource.baseArrayLayer + |
| vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource); |
| |
| buffer_slice_size = buffer_image_height * buffer_row_length * |
| vk_format_get_blocksize(dst_format); |
| |
| max_depth_slice = region->imageExtent.depth + region->imageOffset.z; |
| |
| pvr_setup_buffer_surface(&dst_surface, |
| &dst_rect, |
| buffer_dev_addr, |
| region->bufferOffset, |
| dst_format, |
| image->vk.format, |
| buffer_row_length, |
| buffer_image_height, |
| buffer_row_length); |
| |
| dst_rect.extent.width = region->imageExtent.width; |
| dst_rect.extent.height = region->imageExtent.height; |
| |
| if (util_format_is_compressed(pformat)) { |
| uint32_t block_width = util_format_get_blockwidth(pformat); |
| uint32_t block_height = util_format_get_blockheight(pformat); |
| |
| dst_rect.extent.width = |
| MAX2(1U, DIV_ROUND_UP(dst_rect.extent.width, block_width)); |
| dst_rect.extent.height = |
| MAX2(1U, DIV_ROUND_UP(dst_rect.extent.height, block_height)); |
| } |
| |
| sub_resource = (VkImageSubresource){ |
| .aspectMask = region->imageSubresource.aspectMask, |
| .mipLevel = region->imageSubresource.mipLevel, |
| .arrayLayer = region->imageSubresource.baseArrayLayer, |
| }; |
| |
| pvr_get_image_subresource_layout(image, &sub_resource, &info); |
| |
| for (uint32_t i = region->imageSubresource.baseArrayLayer; |
| i < max_array_layers; |
| i++) { |
| struct pvr_transfer_cmd_surface src_surface = { 0 }; |
| VkRect2D src_rect = { 0 }; |
| |
| /* Note: Set the depth to the initial depth offset, the memory address (or |
| * the z_position) for the depth slice will be incremented manually in the |
| * loop below. |
| */ |
| pvr_setup_transfer_surface(cmd_buffer->device, |
| &src_surface, |
| &src_rect, |
| image, |
| i, |
| region->imageSubresource.mipLevel, |
| ®ion->imageOffset, |
| ®ion->imageExtent, |
| region->imageOffset.z, |
| src_format, |
| region->imageSubresource.aspectMask); |
| |
| for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) { |
| struct pvr_transfer_cmd *transfer_cmd; |
| VkResult result; |
| |
| /* TODO: See if we can allocate all the transfer cmds in one go. */ |
| transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer); |
| if (!transfer_cmd) |
| return vk_error(cmd_buffer->device, VK_ERROR_OUT_OF_HOST_MEMORY); |
| |
| transfer_cmd->sources[0].mappings[0].src_rect = src_rect; |
| transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect; |
| transfer_cmd->sources[0].mapping_count++; |
| |
| transfer_cmd->sources[0].surface = src_surface; |
| transfer_cmd->source_count = 1; |
| |
| transfer_cmd->dst = dst_surface; |
| transfer_cmd->scissor = dst_rect; |
| |
| result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); |
| return result; |
| } |
| |
| dst_surface.dev_addr.addr += buffer_slice_size; |
| |
| if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) |
| src_surface.z_position += 1.0f; |
| else |
| src_surface.dev_addr.addr += info.depthPitch; |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult |
| pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer *const cmd_buffer, |
| const struct pvr_image *const image, |
| const pvr_dev_addr_t buffer_dev_addr, |
| const VkBufferImageCopy2 *const region) |
| { |
| const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask; |
| |
| VkFormat src_format = pvr_get_copy_format(image->vk.format); |
| VkFormat dst_format; |
| |
| /* Color and depth aspect copies can be done using an appropriate raw format. |
| */ |
| if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) { |
| src_format = pvr_get_raw_copy_format(src_format); |
| dst_format = src_format; |
| } else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| /* From the Vulkan spec: |
| * |
| * Data copied to or from the stencil aspect of any depth/stencil |
| * format is tightly packed with one VK_FORMAT_S8_UINT value per texel. |
| */ |
| dst_format = VK_FORMAT_S8_UINT; |
| } else { |
| /* YUV Planes require specific formats. */ |
| dst_format = src_format; |
| } |
| |
| return pvr_copy_image_to_buffer_region_format(cmd_buffer, |
| image, |
| buffer_dev_addr, |
| region, |
| src_format, |
| dst_format); |
| } |
| |
| void pvr_CmdCopyImageToBuffer2( |
| VkCommandBuffer commandBuffer, |
| const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) |
| { |
| PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer); |
| PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage); |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) { |
| const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i]; |
| |
| const VkResult result = pvr_copy_image_to_buffer_region(cmd_buffer, |
| src, |
| dst->dev_addr, |
| region); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |
| |
| static void pvr_calc_mip_level_extents(const struct pvr_image *image, |
| uint16_t mip_level, |
| VkExtent3D *extent_out) |
| { |
| /* 3D textures are clamped to 4x4x4. */ |
| const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1; |
| const VkExtent3D *extent = &image->vk.extent; |
| |
| extent_out->width = MAX2(extent->width >> mip_level, clamp); |
| extent_out->height = MAX2(extent->height >> mip_level, clamp); |
| extent_out->depth = MAX2(extent->depth >> mip_level, clamp); |
| } |
| |
| static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer, |
| const struct pvr_image *image, |
| const VkClearColorValue *pColor, |
| const VkImageSubresourceRange *psRange, |
| uint32_t flags) |
| { |
| const uint32_t layer_count = |
| vk_image_subresource_layer_count(&image->vk, psRange); |
| const uint32_t max_layers = psRange->baseArrayLayer + layer_count; |
| VkFormat format = image->vk.format; |
| const VkOffset3D offset = { 0 }; |
| VkExtent3D mip_extent; |
| |
| assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers); |
| |
| for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) { |
| const uint32_t level_count = |
| vk_image_subresource_level_count(&image->vk, psRange); |
| const uint32_t max_level = psRange->baseMipLevel + level_count; |
| |
| assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels); |
| |
| for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) { |
| pvr_calc_mip_level_extents(image, level, &mip_extent); |
| |
| for (uint32_t depth = 0; depth < mip_extent.depth; depth++) { |
| struct pvr_transfer_cmd *transfer_cmd; |
| VkResult result; |
| |
| transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer); |
| if (!transfer_cmd) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| transfer_cmd->flags |= flags; |
| transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL; |
| |
| for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++) |
| transfer_cmd->clear_color[i].ui = pColor->uint32[i]; |
| |
| pvr_setup_transfer_surface(cmd_buffer->device, |
| &transfer_cmd->dst, |
| &transfer_cmd->scissor, |
| image, |
| layer, |
| level, |
| &offset, |
| &mip_extent, |
| depth, |
| format, |
| psRange->aspectMask); |
| |
| result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); |
| return result; |
| } |
| } |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer, |
| VkImage _image, |
| VkImageLayout imageLayout, |
| const VkClearColorValue *pColor, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| PVR_FROM_HANDLE(pvr_image, image, _image); |
| |
| for (uint32_t i = 0; i < rangeCount; i++) { |
| const VkResult result = |
| pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |
| |
| void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, |
| VkImage _image, |
| VkImageLayout imageLayout, |
| const VkClearDepthStencilValue *pDepthStencil, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange *pRanges) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| PVR_FROM_HANDLE(pvr_image, image, _image); |
| |
| for (uint32_t i = 0; i < rangeCount; i++) { |
| const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT | |
| VK_IMAGE_ASPECT_STENCIL_BIT; |
| VkClearColorValue clear_ds = { 0 }; |
| uint32_t flags = 0U; |
| VkResult result; |
| |
| if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && |
| pRanges[i].aspectMask != ds_aspect) { |
| /* A depth or stencil blit to a packed_depth_stencil requires a merge |
| * operation. |
| */ |
| flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE; |
| |
| if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) |
| flags |= PVR_TRANSFER_CMD_FLAGS_PICKD; |
| } |
| |
| clear_ds.float32[0] = pDepthStencil->depth; |
| clear_ds.uint32[1] = pDepthStencil->stencil; |
| |
| result = |
| pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |
| |
| static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer, |
| pvr_dev_addr_t src_addr, |
| VkDeviceSize src_offset, |
| pvr_dev_addr_t dst_addr, |
| VkDeviceSize dst_offset, |
| VkDeviceSize size, |
| uint32_t fill_data, |
| bool is_fill) |
| { |
| VkDeviceSize offset = 0; |
| |
| while (offset < size) { |
| const VkDeviceSize remaining_size = size - offset; |
| struct pvr_transfer_cmd *transfer_cmd; |
| uint32_t src_align = (src_addr.addr + offset + src_offset) & 0xF; |
| uint32_t dst_align = (dst_addr.addr + offset + src_offset) & 0xF; |
| uint32_t texel_width; |
| VkDeviceSize texels; |
| VkFormat vk_format; |
| VkResult result; |
| uint32_t height; |
| uint32_t width; |
| |
| if (is_fill) { |
| vk_format = VK_FORMAT_R32_UINT; |
| texel_width = 4U; |
| } else if (remaining_size >= 16U && (src_align % 16U) == 0 && |
| (dst_align % 16U) == 0) { |
| /* Only if address is 128bpp aligned */ |
| vk_format = VK_FORMAT_R32G32B32A32_UINT; |
| texel_width = 16U; |
| } else if (remaining_size >= 4U) { |
| vk_format = VK_FORMAT_R32_UINT; |
| texel_width = 4U; |
| } else { |
| vk_format = VK_FORMAT_R8_UINT; |
| texel_width = 1U; |
| } |
| |
| texels = remaining_size / texel_width; |
| |
| /* Try to do max-width rects, fall back to a 1-height rect for the |
| * remainder. |
| */ |
| if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) { |
| width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS; |
| height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS; |
| height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS); |
| } else { |
| width = texels; |
| height = 1; |
| } |
| |
| transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer); |
| if (!transfer_cmd) |
| return VK_ERROR_OUT_OF_HOST_MEMORY; |
| |
| if (!is_fill) { |
| pvr_setup_buffer_surface( |
| &transfer_cmd->sources[0].surface, |
| &transfer_cmd->sources[0].mappings[0].src_rect, |
| src_addr, |
| offset + src_offset, |
| vk_format, |
| vk_format, |
| width, |
| height, |
| width); |
| transfer_cmd->source_count = 1; |
| } else { |
| transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL; |
| |
| for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++) |
| transfer_cmd->clear_color[i].ui = fill_data; |
| } |
| |
| pvr_setup_buffer_surface(&transfer_cmd->dst, |
| &transfer_cmd->scissor, |
| dst_addr, |
| offset + dst_offset, |
| vk_format, |
| vk_format, |
| width, |
| height, |
| width); |
| |
| if (transfer_cmd->source_count > 0) { |
| transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor; |
| |
| transfer_cmd->sources[0].mapping_count++; |
| } |
| |
| result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); |
| if (result != VK_SUCCESS) { |
| vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd); |
| return result; |
| } |
| |
| offset += width * height * texel_width; |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize dataSize, |
| const void *pData) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer); |
| struct pvr_suballoc_bo *pvr_bo; |
| VkResult result; |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo); |
| if (result != VK_SUCCESS) |
| return; |
| |
| pvr_cmd_copy_buffer_region(cmd_buffer, |
| pvr_bo->dev_addr, |
| 0, |
| dst->dev_addr, |
| dstOffset, |
| dataSize, |
| 0U, |
| false); |
| } |
| |
| void pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer, |
| const VkCopyBufferInfo2 *pCopyBufferInfo) |
| { |
| PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer); |
| PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer); |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) { |
| const VkResult result = |
| pvr_cmd_copy_buffer_region(cmd_buffer, |
| src->dev_addr, |
| pCopyBufferInfo->pRegions[i].srcOffset, |
| dst->dev_addr, |
| pCopyBufferInfo->pRegions[i].dstOffset, |
| pCopyBufferInfo->pRegions[i].size, |
| 0U, |
| false); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |
| |
| void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize fillSize, |
| uint32_t data) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer); |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize); |
| |
| /* From the Vulkan spec: |
| * |
| * "size is the number of bytes to fill, and must be either a multiple |
| * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of |
| * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the |
| * buffer is not a multiple of 4, then the nearest smaller multiple is |
| * used." |
| */ |
| fillSize &= ~3ULL; |
| |
| pvr_cmd_copy_buffer_region(cmd_buffer, |
| PVR_DEV_ADDR_INVALID, |
| 0, |
| dst->dev_addr, |
| dstOffset, |
| fillSize, |
| data, |
| true); |
| } |
| |
| /** |
| * \brief Returns the maximum number of layers to clear starting from base_layer |
| * that contain or match the target rectangle. |
| * |
| * \param[in] target_rect The region which the clear should contain or |
| * match. |
| * \param[in] base_layer The layer index to start at. |
| * \param[in] clear_rect_count Amount of clear_rects |
| * \param[in] clear_rects Array of clear rects. |
| * |
| * \return Max number of layers that cover or match the target region. |
| */ |
| static uint32_t |
| pvr_get_max_layers_covering_target(VkRect2D target_rect, |
| uint32_t base_layer, |
| uint32_t clear_rect_count, |
| const VkClearRect *clear_rects) |
| { |
| const int32_t target_x0 = target_rect.offset.x; |
| const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width; |
| const int32_t target_y0 = target_rect.offset.y; |
| const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height; |
| |
| uint32_t layer_count = 0; |
| |
| assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX); |
| assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX); |
| |
| for (uint32_t i = 0; i < clear_rect_count; i++) { |
| const VkClearRect *clear_rect = &clear_rects[i]; |
| const uint32_t max_layer = |
| clear_rect->baseArrayLayer + clear_rect->layerCount; |
| bool target_is_covered; |
| int32_t x0, x1; |
| int32_t y0, y1; |
| |
| if (clear_rect->baseArrayLayer == 0) |
| continue; |
| |
| assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <= |
| UINT32_MAX); |
| |
| /* Check for layer intersection. */ |
| if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer) |
| continue; |
| |
| x0 = clear_rect->rect.offset.x; |
| x1 = x0 + (int32_t)clear_rect->rect.extent.width; |
| y0 = clear_rect->rect.offset.y; |
| y1 = y0 + (int32_t)clear_rect->rect.extent.height; |
| |
| assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX); |
| assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <= |
| INT32_MAX); |
| |
| target_is_covered = x0 <= target_x0 && x1 >= target_x1; |
| target_is_covered &= y0 <= target_y0 && y1 >= target_y1; |
| |
| if (target_is_covered) |
| layer_count = MAX2(layer_count, max_layer - base_layer); |
| } |
| |
| return layer_count; |
| } |
| |
| /* Return true if vertex shader is required to output render target id to pick |
| * the texture array layer. |
| */ |
| static inline bool |
| pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info, |
| uint32_t rect_count, |
| const VkClearRect *rects) |
| { |
| if (!PVR_HAS_FEATURE(dev_info, gs_rta_support)) |
| return false; |
| |
| for (uint32_t i = 0; i < rect_count; i++) { |
| if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static VkResult pvr_clear_color_attachment_static_create_consts_buffer( |
| struct pvr_cmd_buffer *cmd_buffer, |
| const struct pvr_shader_factory_info *shader_info, |
| const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE], |
| ASSERTED bool uses_tile_buffer, |
| uint32_t tile_buffer_idx, |
| struct pvr_suballoc_bo **const const_shareds_buffer_out) |
| { |
| struct pvr_device *device = cmd_buffer->device; |
| struct pvr_suballoc_bo *const_shareds_buffer; |
| struct pvr_bo *tile_buffer; |
| uint64_t tile_dev_addr; |
| uint32_t *buffer; |
| VkResult result; |
| |
| /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine. |
| * Change pvr_cmd_buffer_alloc_mem() to take in an alignment? |
| */ |
| result = |
| pvr_cmd_buffer_alloc_mem(cmd_buffer, |
| device->heaps.general_heap, |
| PVR_DW_TO_BYTES(shader_info->const_shared_regs), |
| &const_shareds_buffer); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer); |
| |
| for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) { |
| uint32_t dest_idx = shader_info->driver_const_location_map[i]; |
| |
| if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED) |
| continue; |
| |
| assert(dest_idx < shader_info->const_shared_regs); |
| |
| switch (i) { |
| case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0: |
| case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1: |
| case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2: |
| case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3: |
| buffer[dest_idx] = clear_color[i]; |
| break; |
| |
| case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER: |
| assert(uses_tile_buffer); |
| tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx]; |
| tile_dev_addr = tile_buffer->vma->dev_addr.addr; |
| buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32); |
| break; |
| |
| case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER: |
| assert(uses_tile_buffer); |
| tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx]; |
| tile_dev_addr = tile_buffer->vma->dev_addr.addr; |
| buffer[dest_idx] = (uint32_t)tile_dev_addr; |
| break; |
| |
| default: |
| unreachable("Unsupported clear attachment const type."); |
| } |
| } |
| |
| for (uint32_t i = 0; i < shader_info->num_static_const; i++) { |
| const struct pvr_static_buffer *static_buff = |
| &shader_info->static_const_buffer[i]; |
| |
| assert(static_buff->dst_idx < shader_info->const_shared_regs); |
| |
| buffer[static_buff->dst_idx] = static_buff->value; |
| } |
| |
| *const_shareds_buffer_out = const_shareds_buffer; |
| |
| return VK_SUCCESS; |
| } |
| |
| static VkResult pvr_clear_color_attachment_static( |
| struct pvr_cmd_buffer *cmd_buffer, |
| const struct usc_mrt_resource *mrt_resource, |
| VkFormat format, |
| uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE], |
| uint32_t template_idx, |
| uint32_t stencil, |
| bool vs_has_rt_id_output) |
| { |
| struct pvr_device *device = cmd_buffer->device; |
| ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info; |
| ASSERTED const bool has_eight_output_registers = |
| PVR_HAS_FEATURE(dev_info, eight_output_registers); |
| const struct pvr_device_static_clear_state *dev_clear_state = |
| &device->static_clear_state; |
| const bool uses_tile_buffer = mrt_resource->type == |
| USC_MRT_RESOURCE_TYPE_MEMORY; |
| const struct pvr_pds_clear_attachment_program_info *clear_attachment_program; |
| struct pvr_pds_pixel_shader_sa_program texture_program; |
| uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT]; |
| const struct pvr_shader_factory_info *shader_info; |
| struct pvr_suballoc_bo *pds_texture_program_bo; |
| struct pvr_static_clear_ppp_template template; |
| struct pvr_suballoc_bo *const_shareds_buffer; |
| uint64_t pds_texture_program_addr; |
| struct pvr_suballoc_bo *pvr_bo; |
| uint32_t tile_buffer_idx = 0; |
| uint32_t out_reg_count; |
| uint32_t output_offset; |
| uint32_t program_idx; |
| uint32_t *buffer; |
| VkResult result; |
| |
| out_reg_count = |
| DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U); |
| |
| if (uses_tile_buffer) { |
| tile_buffer_idx = mrt_resource->mem.tile_buffer; |
| output_offset = mrt_resource->mem.offset_dw; |
| } else { |
| output_offset = mrt_resource->reg.output_reg; |
| } |
| |
| assert(has_eight_output_registers || out_reg_count + output_offset <= 4); |
| |
| program_idx = pvr_get_clear_attachment_program_index(out_reg_count, |
| output_offset, |
| uses_tile_buffer); |
| |
| shader_info = clear_attachment_collection[program_idx].info; |
| |
| result = pvr_clear_color_attachment_static_create_consts_buffer( |
| cmd_buffer, |
| shader_info, |
| clear_color, |
| uses_tile_buffer, |
| tile_buffer_idx, |
| &const_shareds_buffer); |
| if (result != VK_SUCCESS) |
| return result; |
| |
| /* clang-format off */ |
| texture_program = (struct pvr_pds_pixel_shader_sa_program){ |
| .num_texture_dma_kicks = 1, |
| .texture_dma_address = { |
| [0] = const_shareds_buffer->dev_addr.addr, |
| } |
| }; |
| /* clang-format on */ |
| |
| pvr_csb_pack (&texture_program.texture_dma_control[0], |
| PDSINST_DOUT_FIELDS_DOUTD_SRC1, |
| doutd_src1) { |
| doutd_src1.dest = ROGUE_PDSINST_DOUTD_DEST_COMMON_STORE; |
| doutd_src1.bsize = shader_info->const_shared_regs; |
| } |
| |
| clear_attachment_program = |
| &dev_clear_state->pds_clear_attachment_program_info[program_idx]; |
| |
| /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine. |
| * Change pvr_cmd_buffer_alloc_mem() to take in an alignment? |
| */ |
| result = pvr_cmd_buffer_alloc_mem( |
| cmd_buffer, |
| device->heaps.pds_heap, |
| clear_attachment_program->texture_program_data_size, |
| &pds_texture_program_bo); |
| if (result != VK_SUCCESS) { |
| list_del(&const_shareds_buffer->link); |
| pvr_bo_suballoc_free(const_shareds_buffer); |
| |
| return result; |
| } |
| |
| buffer = pvr_bo_suballoc_get_map_addr(pds_texture_program_bo); |
| pds_texture_program_addr = pds_texture_program_bo->dev_addr.addr - |
| device->heaps.pds_heap->base_addr.addr; |
| |
| pvr_pds_generate_pixel_shader_sa_texture_state_data( |
| &texture_program, |
| buffer, |
| &device->pdevice->dev_info); |
| |
| pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE], |
| TA_STATE_PDS_SHADERBASE, |
| shaderbase) { |
| shaderbase.addr = clear_attachment_program->pixel_program_offset; |
| } |
| |
| pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE], |
| TA_STATE_PDS_TEXUNICODEBASE, |
| texunicodebase) { |
| texunicodebase.addr = clear_attachment_program->texture_program_offset; |
| } |
| |
| pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1], |
| TA_STATE_PDS_SIZEINFO1, |
| sizeinfo1) { |
| sizeinfo1.pds_texturestatesize = DIV_ROUND_UP( |
| clear_attachment_program->texture_program_data_size, |
| ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE); |
| |
| sizeinfo1.pds_tempsize = |
| DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count, |
| ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE); |
| } |
| |
| pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2], |
| TA_STATE_PDS_SIZEINFO2, |
| sizeinfo2) { |
| sizeinfo2.usc_sharedsize = |
| DIV_ROUND_UP(shader_info->const_shared_regs, |
| ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE); |
| } |
| |
| /* Dummy coefficient loading program. */ |
| pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0; |
| |
| pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE], |
| TA_STATE_PDS_TEXTUREDATABASE, |
| texturedatabase) { |
| texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr); |
| } |
| |
| assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT); |
| template = |
| cmd_buffer->device->static_clear_state.ppp_templates[template_idx]; |
| |
| template.config.pds_state = &pds_state; |
| |
| template.config.ispctl.upass = |
| cmd_buffer->state.render_pass_info.isp_userpass; |
| |
| if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| /* clang-format off */ |
| template.config.ispa.sref = stencil & ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX; |
| /* clang-format on */ |
| } |
| |
| if (vs_has_rt_id_output) { |
| template.config.output_sel.rhw_pres = true; |
| template.config.output_sel.render_tgt_pres = true; |
| template.config.output_sel.vtxsize = 4 + 1; |
| } |
| |
| result = pvr_emit_ppp_from_template( |
| &cmd_buffer->state.current_sub_cmd->gfx.control_stream, |
| &template, |
| &pvr_bo); |
| if (result != VK_SUCCESS) { |
| list_del(&pds_texture_program_bo->link); |
| pvr_bo_suballoc_free(pds_texture_program_bo); |
| |
| list_del(&const_shareds_buffer->link); |
| pvr_bo_suballoc_free(const_shareds_buffer); |
| |
| return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); |
| } |
| |
| list_add(&pvr_bo->link, &cmd_buffer->bo_list); |
| |
| return VK_SUCCESS; |
| } |
| |
| /** |
| * \brief Record a deferred clear operation into the command buffer. |
| * |
| * Devices which don't have gs_rta_support require extra handling for RTA |
| * clears. We setup a list of deferred clear transfer commands which will be |
| * processed at the end of the graphics sub command to account for the missing |
| * feature. |
| */ |
| static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer, |
| const VkClearAttachment *attachment, |
| const VkClearRect *rect, |
| bool is_render_init) |
| { |
| struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info; |
| struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx; |
| const struct pvr_renderpass_hwsetup_render *hw_render = |
| &pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx]; |
| struct pvr_transfer_cmd *transfer_cmd_list; |
| const struct pvr_image_view *image_view; |
| const struct pvr_image *image; |
| uint32_t base_layer; |
| |
| const VkOffset3D offset = { |
| .x = rect->rect.offset.x, |
| .y = rect->rect.offset.y, |
| .z = 1, |
| }; |
| const VkExtent3D extent = { |
| .width = rect->rect.extent.width, |
| .height = rect->rect.extent.height, |
| .depth = 1, |
| }; |
| |
| assert( |
| !PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support)); |
| |
| transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears, |
| struct pvr_transfer_cmd, |
| rect->layerCount); |
| if (!transfer_cmd_list) { |
| return vk_command_buffer_set_error(&cmd_buffer->vk, |
| VK_ERROR_OUT_OF_HOST_MEMORY); |
| } |
| |
| /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019: |
| * |
| * "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not |
| * include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT" |
| * |
| */ |
| if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) { |
| assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT || |
| attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT || |
| attachment->aspectMask == |
| (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); |
| |
| image_view = pass_info->attachments[hw_render->ds_attach_idx]; |
| } else if (is_render_init) { |
| uint32_t index; |
| |
| assert(attachment->colorAttachment < hw_render->color_init_count); |
| index = hw_render->color_init[attachment->colorAttachment].index; |
| |
| image_view = pass_info->attachments[index]; |
| } else { |
| const struct pvr_renderpass_hwsetup_subpass *hw_pass = |
| pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx); |
| const struct pvr_render_subpass *sub_pass = |
| &pass_info->pass->subpasses[hw_pass->index]; |
| const uint32_t attachment_idx = |
| sub_pass->color_attachments[attachment->colorAttachment]; |
| |
| assert(attachment->colorAttachment < sub_pass->color_count); |
| |
| image_view = pass_info->attachments[attachment_idx]; |
| } |
| |
| base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer; |
| image = vk_to_pvr_image(image_view->vk.image); |
| |
| for (uint32_t i = 0; i < rect->layerCount; i++) { |
| struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i]; |
| |
| /* TODO: Add an init function for when we don't want to use |
| * pvr_transfer_cmd_alloc()? And use it here. |
| */ |
| *transfer_cmd = (struct pvr_transfer_cmd){ |
| .flags = PVR_TRANSFER_CMD_FLAGS_FILL, |
| .cmd_buffer = cmd_buffer, |
| .is_deferred_clear = true, |
| }; |
| |
| if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { |
| for (uint32_t j = 0; j < ARRAY_SIZE(transfer_cmd->clear_color); j++) { |
| transfer_cmd->clear_color[j].ui = |
| attachment->clearValue.color.uint32[j]; |
| } |
| } else { |
| transfer_cmd->clear_color[0].f = |
| attachment->clearValue.depthStencil.depth; |
| transfer_cmd->clear_color[1].ui = |
| attachment->clearValue.depthStencil.stencil; |
| } |
| |
| pvr_setup_transfer_surface(cmd_buffer->device, |
| &transfer_cmd->dst, |
| &transfer_cmd->scissor, |
| image, |
| base_layer + i, |
| 0, |
| &offset, |
| &extent, |
| 0.0f, |
| image->vk.format, |
| attachment->aspectMask); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, |
| uint32_t attachment_count, |
| const VkClearAttachment *attachments, |
| uint32_t rect_count, |
| const VkClearRect *rects, |
| bool is_render_init) |
| { |
| const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass; |
| struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info; |
| const struct pvr_renderpass_hwsetup_subpass *hw_pass = |
| pvr_get_hw_subpass(pass, pass_info->subpass_idx); |
| struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx; |
| struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info; |
| struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index]; |
| uint32_t vs_output_size_in_bytes; |
| bool vs_has_rt_id_output; |
| |
| /* TODO: This function can be optimized so that most of the device memory |
| * gets allocated together in one go and then filled as needed. There might |
| * also be opportunities to reuse pds code and data segments. |
| */ |
| |
| assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); |
| |
| pvr_reset_graphics_dirty_state(cmd_buffer, false); |
| |
| /* We'll be emitting to the control stream. */ |
| sub_cmd->empty_cmd = false; |
| |
| vs_has_rt_id_output = |
| pvr_clear_needs_rt_id_output(dev_info, rect_count, rects); |
| |
| /* 4 because we're expecting the USC to output X, Y, Z, and W. */ |
| vs_output_size_in_bytes = PVR_DW_TO_BYTES(4); |
| if (vs_has_rt_id_output) |
| vs_output_size_in_bytes += PVR_DW_TO_BYTES(1); |
| |
| for (uint32_t i = 0; i < attachment_count; i++) { |
| const VkClearAttachment *attachment = &attachments[i]; |
| struct pvr_pds_vertex_shader_program pds_program; |
| struct pvr_pds_upload pds_program_upload = { 0 }; |
| uint64_t current_base_array_layer = ~0; |
| VkResult result; |
| float depth; |
| |
| if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { |
| uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE]; |
| const struct usc_mrt_resource *mrt_resource; |
| uint32_t global_attachment_idx; |
| uint32_t local_attachment_idx; |
| VkFormat format; |
| |
| local_attachment_idx = attachment->colorAttachment; |
| |
| if (is_render_init) { |
| struct pvr_renderpass_hwsetup_render *hw_render; |
| |
| assert(pass->hw_setup->render_count > 0); |
| hw_render = &pass->hw_setup->renders[0]; |
| |
| mrt_resource = |
| &hw_render->init_setup.mrt_resources[local_attachment_idx]; |
| |
| assert(local_attachment_idx < hw_render->color_init_count); |
| global_attachment_idx = |
| hw_render->color_init[local_attachment_idx].index; |
| } else { |
| mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx]; |
| |
| assert(local_attachment_idx < sub_pass->color_count); |
| global_attachment_idx = |
| sub_pass->color_attachments[local_attachment_idx]; |
| } |
| |
| if (global_attachment_idx == VK_ATTACHMENT_UNUSED) |
| continue; |
| |
| assert(global_attachment_idx < pass->attachment_count); |
| format = pass->attachments[global_attachment_idx].vk_format; |
| |
| assert(format != VK_FORMAT_UNDEFINED); |
| |
| pvr_get_hw_clear_color(format, |
| attachment->clearValue.color, |
| packed_clear_color); |
| |
| result = pvr_clear_color_attachment_static(cmd_buffer, |
| mrt_resource, |
| format, |
| packed_clear_color, |
| VK_IMAGE_ASPECT_COLOR_BIT, |
| 0, |
| vs_has_rt_id_output); |
| if (result != VK_SUCCESS) |
| return; |
| } else if (hw_pass->z_replicate != -1 && |
| attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { |
| const VkClearColorValue clear_color = { |
| .float32 = { [0] = attachment->clearValue.depthStencil.depth, }, |
| }; |
| const uint32_t template_idx = attachment->aspectMask | |
| VK_IMAGE_ASPECT_COLOR_BIT; |
| const uint32_t stencil = attachment->clearValue.depthStencil.stencil; |
| uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE]; |
| const struct usc_mrt_resource *mrt_resource; |
| |
| mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate]; |
| |
| pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT, |
| clear_color, |
| packed_clear_color); |
| |
| result = pvr_clear_color_attachment_static(cmd_buffer, |
| mrt_resource, |
| VK_FORMAT_R32_SFLOAT, |
| packed_clear_color, |
| template_idx, |
| stencil, |
| vs_has_rt_id_output); |
| if (result != VK_SUCCESS) |
| return; |
| } else { |
| const uint32_t template_idx = attachment->aspectMask; |
| struct pvr_static_clear_ppp_template template; |
| struct pvr_suballoc_bo *pvr_bo; |
| |
| assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT); |
| template = |
| cmd_buffer->device->static_clear_state.ppp_templates[template_idx]; |
| |
| if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { |
| /* clang-format off */ |
| template.config.ispa.sref = |
| attachment->clearValue.depthStencil.stencil & |
| ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX; |
| /* clang-format on */ |
| } |
| |
| if (vs_has_rt_id_output) { |
| template.config.output_sel.rhw_pres = true; |
| template.config.output_sel.render_tgt_pres = true; |
| template.config.output_sel.vtxsize = 4 + 1; |
| } |
| |
| result = pvr_emit_ppp_from_template(&sub_cmd->control_stream, |
| &template, |
| &pvr_bo); |
| if (result != VK_SUCCESS) { |
| pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); |
| return; |
| } |
| |
| list_add(&pvr_bo->link, &cmd_buffer->bo_list); |
| } |
| |
| if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) |
| depth = attachment->clearValue.depthStencil.depth; |
| else |
| depth = 1.0f; |
| |
| if (vs_has_rt_id_output) { |
| const struct pvr_device_static_clear_state *dev_clear_state = |
| &cmd_buffer->device->static_clear_state; |
| const struct pvr_suballoc_bo *multi_layer_vert_bo = |
| dev_clear_state->usc_multi_layer_vertex_shader_bo; |
| |
| /* We can't use the device's passthrough pds program since it doesn't |
| * have iterate_instance_id enabled. We'll be uploading code sections |
| * per each clear rect. |
| */ |
| |
| /* TODO: See if we can allocate all the code section memory in one go. |
| * We'd need to make sure that changing instance_id_modifier doesn't |
| * change the code section size. |
| * Also check if we can reuse the same code segment for each rect. |
| * Seems like the instance_id_modifier is written into the data section |
| * and used by the pds ADD instruction that way instead of it being |
| * embedded into the code section. |
| */ |
| |
| pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program, |
| multi_layer_vert_bo); |
| } else { |
| /* We can reuse the device's code section but we'll need to upload data |
| * sections so initialize the program. |
| */ |
| pvr_pds_clear_vertex_shader_program_init_base( |
| &pds_program, |
| cmd_buffer->device->static_clear_state.usc_vertex_shader_bo); |
| |
| pds_program_upload.code_offset = |
| cmd_buffer->device->static_clear_state.pds.code_offset; |
| /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe |
| * let's change its interface to make that clear and not set this? |
| */ |
| pds_program_upload.code_size = |
| cmd_buffer->device->static_clear_state.pds.code_size; |
| } |
| |
| for (uint32_t j = 0; j < rect_count; j++) { |
| struct pvr_pds_upload pds_program_data_upload; |
| const VkClearRect *clear_rect = &rects[j]; |
| struct pvr_suballoc_bo *vertices_bo; |
| uint32_t vdm_cs_size_in_dw; |
| uint32_t *vdm_cs_buffer; |
| VkResult result; |
| |
| if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) && |
| (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) { |
| result = pvr_add_deferred_rta_clear(cmd_buffer, |
| attachment, |
| clear_rect, |
| is_render_init); |
| if (result != VK_SUCCESS) |
| return; |
| |
| if (clear_rect->baseArrayLayer != 0) |
| continue; |
| } |
| |
| /* TODO: Allocate all the buffers in one go before the loop, and add |
| * support to multi-alloc bo. |
| */ |
| result = pvr_clear_vertices_upload(cmd_buffer->device, |
| &clear_rect->rect, |
| depth, |
| &vertices_bo); |
| if (result != VK_SUCCESS) { |
| pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); |
| return; |
| } |
| |
| list_add(&vertices_bo->link, &cmd_buffer->bo_list); |
| |
| if (vs_has_rt_id_output) { |
| if (current_base_array_layer != clear_rect->baseArrayLayer) { |
| const uint32_t base_array_layer = clear_rect->baseArrayLayer; |
| struct pvr_pds_upload pds_program_code_upload; |
| |
| result = |
| pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code( |
| &pds_program, |
| cmd_buffer, |
| base_array_layer, |
| &pds_program_code_upload); |
| if (result != VK_SUCCESS) { |
| pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); |
| return; |
| } |
| |
| pds_program_upload.code_offset = |
| pds_program_code_upload.code_offset; |
| /* TODO: The code size doesn't get used by pvr_clear_vdm_state() |
| * maybe let's change its interface to make that clear and not |
| * set this? |
| */ |
| pds_program_upload.code_size = pds_program_code_upload.code_size; |
| |
| current_base_array_layer = base_array_layer; |
| } |
| |
| result = |
| pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data( |
| &pds_program, |
| cmd_buffer, |
| vertices_bo, |
| &pds_program_data_upload); |
| if (result != VK_SUCCESS) |
| return; |
| } else { |
| result = pvr_pds_clear_vertex_shader_program_create_and_upload_data( |
| &pds_program, |
| cmd_buffer, |
| vertices_bo, |
| &pds_program_data_upload); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| |
| pds_program_upload.data_offset = pds_program_data_upload.data_offset; |
| pds_program_upload.data_size = pds_program_data_upload.data_size; |
| |
| vdm_cs_size_in_dw = |
| pvr_clear_vdm_state_get_size_in_dw(dev_info, |
| clear_rect->layerCount); |
| |
| pvr_csb_set_relocation_mark(&sub_cmd->control_stream); |
| |
| vdm_cs_buffer = |
| pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw); |
| if (!vdm_cs_buffer) { |
| pvr_cmd_buffer_set_error_unwarned(cmd_buffer, |
| sub_cmd->control_stream.status); |
| return; |
| } |
| |
| pvr_pack_clear_vdm_state(dev_info, |
| &pds_program_upload, |
| pds_program.temps_used, |
| 4, |
| vs_output_size_in_bytes, |
| clear_rect->layerCount, |
| vdm_cs_buffer); |
| |
| pvr_csb_clear_relocation_mark(&sub_cmd->control_stream); |
| } |
| } |
| } |
| |
| void pvr_clear_attachments_render_init(struct pvr_cmd_buffer *cmd_buffer, |
| const VkClearAttachment *attachment, |
| const VkClearRect *rect) |
| { |
| pvr_clear_attachments(cmd_buffer, 1, attachment, 1, rect, true); |
| } |
| |
| void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer, |
| uint32_t attachmentCount, |
| const VkClearAttachment *pAttachments, |
| uint32_t rectCount, |
| const VkClearRect *pRects) |
| { |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| struct pvr_cmd_buffer_state *state = &cmd_buffer->state; |
| struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx; |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); |
| |
| /* TODO: There are some optimizations that can be made here: |
| * - For a full screen clear, update the clear values for the corresponding |
| * attachment index. |
| * - For a full screen color attachment clear, add its index to a load op |
| * override to add it to the background shader. This will elide any load |
| * op loads currently in the background shader as well as the usual |
| * frag kick for geometry clear. |
| */ |
| |
| /* If we have any depth/stencil clears, update the sub command depth/stencil |
| * modification and usage flags. |
| */ |
| if (state->depth_format != VK_FORMAT_UNDEFINED) { |
| uint32_t full_screen_clear_count; |
| bool has_stencil_clear = false; |
| bool has_depth_clear = false; |
| |
| for (uint32_t i = 0; i < attachmentCount; i++) { |
| const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask; |
| |
| if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) |
| has_stencil_clear = true; |
| |
| if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) |
| has_depth_clear = true; |
| |
| if (has_stencil_clear && has_depth_clear) |
| break; |
| } |
| |
| sub_cmd->modifies_stencil |= has_stencil_clear; |
| sub_cmd->modifies_depth |= has_depth_clear; |
| |
| /* We only care about clears that have a baseArrayLayer of 0 as any |
| * attachment clears we move to the background shader must apply to all of |
| * the attachment's sub resources. |
| */ |
| full_screen_clear_count = |
| pvr_get_max_layers_covering_target(state->render_pass_info.render_area, |
| 0, |
| rectCount, |
| pRects); |
| |
| if (full_screen_clear_count > 0) { |
| if (has_stencil_clear && |
| sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) { |
| sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER; |
| } |
| |
| if (has_depth_clear && |
| sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) { |
| sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER; |
| } |
| } |
| } |
| |
| pvr_clear_attachments(cmd_buffer, |
| attachmentCount, |
| pAttachments, |
| rectCount, |
| pRects, |
| false); |
| } |
| |
| void pvr_CmdResolveImage2(VkCommandBuffer commandBuffer, |
| const VkResolveImageInfo2 *pResolveImageInfo) |
| { |
| PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage); |
| PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage); |
| PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); |
| |
| PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); |
| |
| for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) { |
| VkImageCopy2 region = { |
| .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2, |
| .srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource, |
| .srcOffset = pResolveImageInfo->pRegions[i].srcOffset, |
| .dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource, |
| .dstOffset = pResolveImageInfo->pRegions[i].dstOffset, |
| .extent = pResolveImageInfo->pRegions[i].extent, |
| }; |
| |
| VkResult result = |
| pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, ®ion); |
| if (result != VK_SUCCESS) |
| return; |
| } |
| } |