blob: 99987a1ea955d24444df199c6a0361be88681e4a [file] [log] [blame]
/*
* Copyright © 2019 Raspberry Pi Ltd
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3dv_private.h"
#include "v3dv_meta_common.h"
#include "compiler/nir/nir_builder.h"
#include "util/u_pack_color.h"
#include "vulkan/runtime/vk_common_entrypoints.h"
static uint32_t
meta_blit_key_hash(const void *key)
{
return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
}
static bool
meta_blit_key_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
}
static bool
create_blit_pipeline_layout(struct v3dv_device *device,
VkDescriptorSetLayout *descriptor_set_layout,
VkPipelineLayout *pipeline_layout)
{
VkResult result;
if (*descriptor_set_layout == 0) {
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = &descriptor_set_layout_binding,
};
result =
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
&descriptor_set_layout_info,
&device->vk.alloc,
descriptor_set_layout);
if (result != VK_SUCCESS)
return false;
}
assert(*pipeline_layout == 0);
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges =
&(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
};
result =
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
&pipeline_layout_info,
&device->vk.alloc,
pipeline_layout);
return result == VK_SUCCESS;
}
void
v3dv_meta_blit_init(struct v3dv_device *device)
{
for (uint32_t i = 0; i < 3; i++) {
device->meta.blit.cache[i] =
_mesa_hash_table_create(NULL,
meta_blit_key_hash,
meta_blit_key_compare);
}
create_blit_pipeline_layout(device,
&device->meta.blit.ds_layout,
&device->meta.blit.p_layout);
}
void
v3dv_meta_blit_finish(struct v3dv_device *device)
{
VkDevice _device = v3dv_device_to_handle(device);
for (uint32_t i = 0; i < 3; i++) {
hash_table_foreach(device->meta.blit.cache[i], entry) {
struct v3dv_meta_blit_pipeline *item = entry->data;
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
vk_free(&device->vk.alloc, item);
}
_mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
}
if (device->meta.blit.p_layout) {
v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
&device->vk.alloc);
}
if (device->meta.blit.ds_layout) {
v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
&device->vk.alloc);
}
}
static uint32_t
meta_texel_buffer_copy_key_hash(const void *key)
{
return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
}
static bool
meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
}
static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
VkDescriptorSetLayout *ds_layout,
VkPipelineLayout *p_layout)
{
VkResult result;
if (*ds_layout == 0) {
VkDescriptorSetLayoutBinding ds_layout_binding = {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = &ds_layout_binding,
};
result =
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
&ds_layout_info,
&device->vk.alloc,
ds_layout);
if (result != VK_SUCCESS)
return false;
}
assert(*p_layout == 0);
/* FIXME: this is abusing a bit the API, since not all of our copy
* pipelines have a geometry shader. We could create 2 different pipeline
* layouts, but this works for us for now.
*/
#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
VkPushConstantRange ranges[2] = {
{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
{ VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
};
VkPipelineLayoutCreateInfo p_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = ds_layout,
.pushConstantRangeCount = 2,
.pPushConstantRanges = ranges,
};
result =
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
&p_layout_info,
&device->vk.alloc,
p_layout);
return result == VK_SUCCESS;
}
void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
{
for (uint32_t i = 0; i < 3; i++) {
device->meta.texel_buffer_copy.cache[i] =
_mesa_hash_table_create(NULL,
meta_texel_buffer_copy_key_hash,
meta_texel_buffer_copy_key_compare);
}
create_texel_buffer_copy_pipeline_layout(
device,
&device->meta.texel_buffer_copy.ds_layout,
&device->meta.texel_buffer_copy.p_layout);
}
void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
{
VkDevice _device = v3dv_device_to_handle(device);
for (uint32_t i = 0; i < 3; i++) {
hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
vk_free(&device->vk.alloc, item);
}
_mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
}
if (device->meta.texel_buffer_copy.p_layout) {
v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
&device->vk.alloc);
}
if (device->meta.texel_buffer_copy.ds_layout) {
v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
&device->vk.alloc);
}
}
static VkFormat
get_compatible_tlb_format(VkFormat format)
{
switch (format) {
case VK_FORMAT_R8G8B8A8_SNORM:
return VK_FORMAT_R8G8B8A8_UINT;
case VK_FORMAT_R8G8_SNORM:
return VK_FORMAT_R8G8_UINT;
case VK_FORMAT_R8_SNORM:
return VK_FORMAT_R8_UINT;
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
return VK_FORMAT_A8B8G8R8_UINT_PACK32;
case VK_FORMAT_R16_UNORM:
case VK_FORMAT_R16_SNORM:
return VK_FORMAT_R16_UINT;
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16_SNORM:
return VK_FORMAT_R16G16_UINT;
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R16G16B16A16_SNORM:
return VK_FORMAT_R16G16B16A16_UINT;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
return VK_FORMAT_R32_SFLOAT;
/* We can't render to compressed formats using the TLB so instead we use
* a compatible format with the same bpp as the compressed format. Because
* the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
* case of ETC), when we implement copies with the compatible format we
* will have to divide offsets and dimensions on the compressed image by
* the compressed block size.
*/
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC2_SRGB_BLOCK:
case VK_FORMAT_BC3_SRGB_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
return VK_FORMAT_R32G32B32A32_UINT;
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
return VK_FORMAT_R16G16B16A16_UINT;
default:
return VK_FORMAT_UNDEFINED;
}
}
/**
* Checks if we can implement an image copy or clear operation using the TLB
* hardware.
*/
bool
v3dv_meta_can_use_tlb(struct v3dv_image *image,
const VkOffset3D *offset,
VkFormat *compat_format)
{
if (offset->x != 0 || offset->y != 0)
return false;
if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
if (compat_format)
*compat_format = image->vk.format;
return true;
}
/* If the image format is not TLB-supported, then check if we can use
* a compatible format instead.
*/
if (compat_format) {
*compat_format = get_compatible_tlb_format(image->vk.format);
if (*compat_format != VK_FORMAT_UNDEFINED)
return true;
}
return false;
}
/* Implements a copy using the TLB.
*
* This only works if we are copying from offset (0,0), since a TLB store for
* tile (x,y) will be written at the same tile offset into the destination.
* When this requirement is not met, we need to use a blit instead.
*
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*
*/
static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
const VkBufferImageCopy2 *region)
{
VkFormat fb_format;
if (!v3dv_meta_can_use_tlb(image, &region->imageOffset, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
(fb_format, region->imageSubresource.aspectMask,
&internal_type, &internal_bpp);
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* Handle copy from compressed format using a compatible format */
const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
v3dv_job_start_frame(job, width, height, num_layers, false, true,
1, internal_bpp, false);
struct v3dv_meta_framebuffer framebuffer;
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
internal_type, &job->frame_tiling);
v3dv_X(job->device, job_emit_binning_flush)(job);
v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
(job, buffer, image, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
static bool
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
VkFormat dst_format,
struct v3dv_image *src,
VkFormat src_format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
const VkImageBlit2 *region,
VkFilter filter,
bool dst_is_padded_image);
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
const VkBufferImageCopy2 *region)
{
bool handled = false;
/* This path uses a shader blit which doesn't support linear images. Return
* early to avoid all te heavy lifting in preparation for the blit_shader()
* call that is bound to fail in that scenario.
*/
if (image->vk.tiling == VK_IMAGE_TILING_LINEAR &&
image->vk.image_type != VK_IMAGE_TYPE_1D) {
return handled;
}
/* Generally, the bpp of the data in the buffer matches that of the
* source image. The exception is the case where we are copying
* stencil (8bpp) to a combined d24s8 image (32bpp).
*/
uint32_t buffer_bpp = image->cpp;
VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
/* Because we are going to implement the copy as a blit, we need to create
* a linear image from the destination buffer and we also want our blit
* source and destination formats to be the same (to avoid any format
* conversions), so we choose a canonical format that matches the
* source image bpp.
*
* The exception to the above is copying from combined depth/stencil images
* because we are copying only one aspect of the image, so we need to setup
* our formats, color write mask and source swizzle mask to match that.
*/
VkFormat dst_format;
VkFormat src_format;
VkColorComponentFlags cmask = 0; /* All components */
VkComponentMapping cswizzle = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
};
switch (buffer_bpp) {
case 16:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
dst_format = VK_FORMAT_R32G32B32A32_UINT;
src_format = dst_format;
break;
case 8:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
dst_format = VK_FORMAT_R16G16B16A16_UINT;
src_format = dst_format;
break;
case 4:
switch (copy_aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
src_format = VK_FORMAT_R32_UINT;
dst_format = VK_FORMAT_R32_UINT;
} else {
/* We want to write depth in the buffer in the first 24-bits,
* however, the hardware has depth in bits 8-31, so swizzle the
* the source components to match what we want. Also, we don't
* want to write bits 24-31 in the destination.
*/
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
cmask = VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT;
cswizzle.r = VK_COMPONENT_SWIZZLE_G;
cswizzle.g = VK_COMPONENT_SWIZZLE_B;
cswizzle.b = VK_COMPONENT_SWIZZLE_A;
cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
}
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
/* Copying from S8D24. We want to write 8-bit stencil values only,
* so adjust the buffer bpp for that. Since the hardware stores stencil
* in the LSB, we can just do a RGBA8UI to R8UI blit.
*/
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = VK_FORMAT_R8_UINT;
buffer_bpp = 1;
break;
default:
unreachable("unsupported aspect");
return handled;
};
break;
case 2:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
dst_format = VK_FORMAT_R16_UINT;
src_format = dst_format;
break;
case 1:
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
dst_format = VK_FORMAT_R8_UINT;
src_format = dst_format;
break;
default:
unreachable("unsupported bit-size");
return handled;
};
/* The hardware doesn't support linear depth/stencil stores, so we
* implement copies of depth/stencil aspect as color copies using a
* compatible color format.
*/
assert(vk_format_is_color(src_format));
assert(vk_format_is_color(dst_format));
copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
/* We should be able to handle the blit if we got this far */
handled = true;
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
if (region->bufferRowLength == 0)
buf_width = region->imageExtent.width;
else
buf_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buf_height = region->imageExtent.height;
else
buf_height = region->bufferImageHeight;
/* If the image is compressed, the bpp refers to blocks, not pixels */
uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
uint32_t block_height = vk_format_get_blockheight(image->vk.format);
buf_width = buf_width / block_width;
buf_height = buf_height / block_height;
/* Compute layers to copy */
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
/* Our blit interface can see the real format of the images to detect
* copies between compressed and uncompressed images and adapt the
* blit region accordingly. Here we are just doing a raw copy of
* compressed data, but we are passing an uncompressed view of the
* buffer for the blit destination image (since compressed formats are
* not renderable), so we also want to provide an uncompressed view of
* the source image.
*/
VkResult result;
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
if (vk_format_is_compressed(image->vk.format)) {
VkImage uiview;
VkImageCreateInfo uiview_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_3D,
.format = dst_format,
.extent = { buf_width, buf_height, image->vk.extent.depth },
.mipLevels = image->vk.mip_levels,
.arrayLayers = image->vk.array_layers,
.samples = image->vk.samples,
.tiling = image->vk.tiling,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
};
result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)uiview,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
result =
vk_common_BindImageMemory(_device, uiview,
v3dv_device_memory_to_handle(image->mem),
image->mem_offset);
if (result != VK_SUCCESS)
return handled;
image = v3dv_image_from_handle(uiview);
}
/* Copy requested layers */
for (uint32_t i = 0; i < num_layers; i++) {
/* Create the destination blit image from the destination buffer */
VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = dst_format,
.extent = { buf_width, buf_height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkImage buffer_image;
result =
v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)buffer_image,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
/* Bind the buffer memory to the image */
VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
i * buf_width * buf_height * buffer_bpp;
result =
vk_common_BindImageMemory(_device, buffer_image,
v3dv_device_memory_to_handle(buffer->mem),
buffer_offset);
if (result != VK_SUCCESS)
return handled;
/* Blit-copy the requested image extent.
*
* Since we are copying, the blit must use the same format on the
* destination and source images to avoid format conversions. The
* only exception is copying stencil, which we upload to a R8UI source
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
const VkImageBlit2 blit_region = {
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
.srcSubresource = {
.aspectMask = copy_aspect,
.mipLevel = region->imageSubresource.mipLevel,
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
.layerCount = 1,
},
.srcOffsets = {
{
DIV_ROUND_UP(region->imageOffset.x, block_width),
DIV_ROUND_UP(region->imageOffset.y, block_height),
region->imageOffset.z + i,
},
{
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
block_width),
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
block_height),
region->imageOffset.z + i + 1,
},
},
.dstSubresource = {
.aspectMask = copy_aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.dstOffsets = {
{ 0, 0, 0 },
{
DIV_ROUND_UP(region->imageExtent.width, block_width),
DIV_ROUND_UP(region->imageExtent.height, block_height),
1
},
},
};
handled = blit_shader(cmd_buffer,
v3dv_image_from_handle(buffer_image), dst_format,
image, src_format,
cmask, &cswizzle,
&blit_region, VK_FILTER_NEAREST, false);
if (!handled) {
/* This is unexpected, we should have a supported blit spec */
unreachable("Unable to blit buffer to destination image");
return false;
}
}
assert(handled);
return true;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
const VkCopyImageToBufferInfo2 *info)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
cmd_buffer->state.is_transfer = true;
for (uint32_t i = 0; i < info->regionCount; i++) {
if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
continue;
if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
continue;
unreachable("Unsupported image to buffer copy.");
}
cmd_buffer->state.is_transfer = false;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageCopy2 *region)
{
/* Destination can't be raster format */
if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
/* We can only do full copies, so if the format is D24S8 both aspects need
* to be copied. We only need to check the dst format because the spec
* states that depth/stencil formats must match exactly.
*/
if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT;
if (region->dstSubresource.aspectMask != ds_aspects)
return false;
}
/* Don't handle copies between uncompressed and compressed formats for now.
*
* FIXME: we should be able to handle these easily but there is no coverage
* in CTS at the moment that make such copies with full images (which we
* require here), only partial copies. Also, in that case the code below that
* checks for "dst image complete" requires some changes, since it is
* checking against the region dimensions, which are in units of the source
* image format.
*/
if (vk_format_is_compressed(dst->vk.format) !=
vk_format_is_compressed(src->vk.format)) {
return false;
}
/* Source region must start at (0,0) */
if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
return false;
/* Destination image must be complete */
if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
return false;
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
if (region->extent.width != dst_width || region->extent.height != dst_height)
return false;
/* From vkCmdCopyImage:
*
* "When copying between compressed and uncompressed formats the extent
* members represent the texel dimensions of the source image and not
* the destination."
*/
const uint32_t block_w = vk_format_get_blockwidth(src->vk.format);
const uint32_t block_h = vk_format_get_blockheight(src->vk.format);
uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
/* Account for sample count */
assert(dst->vk.samples == src->vk.samples);
if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
width *= 2;
height *= 2;
}
/* The TFU unit doesn't handle format conversions so we need the formats to
* match. On the other hand, vkCmdCopyImage allows different color formats
* on the source and destination images, but only if they are texel
* compatible. For us, this means that we can effectively ignore different
* formats and just make the copy using either of them, since we are just
* moving raw data and not making any conversions.
*
* Also, the formats supported by the TFU unit are limited, but again, since
* we are only doing raw copies here without interpreting or converting
* the underlying pixel data according to its format, we can always choose
* to use compatible formats that are supported with the TFU unit.
*/
assert(dst->cpp == src->cpp);
const struct v3dv_format *format =
v3dv_get_compatible_tfu_format(cmd_buffer->device,
dst->cpp, NULL);
/* Emit a TFU job for each layer to blit */
const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.layerCount :
region->extent.depth;
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.baseArrayLayer : region->srcOffset.z;
const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.baseArrayLayer : region->dstOffset.z;
for (uint32_t i = 0; i < layer_count; i++) {
const uint32_t dst_offset =
dst->mem->bo->offset +
v3dv_layer_offset(dst, dst_mip_level, base_dst_layer + i);
const uint32_t src_offset =
src->mem->bo->offset +
v3dv_layer_offset(src, src_mip_level, base_src_layer + i);
const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
cmd_buffer,
dst->mem->bo->handle,
dst_offset,
dst_slice->tiling,
dst_slice->padded_height,
dst->cpp,
src->mem->bo->handle,
src_offset,
src_slice->tiling,
src_slice->tiling == V3D_TILING_RASTER ?
src_slice->stride : src_slice->padded_height,
src->cpp,
width, height, format);
}
return true;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageCopy2 *region)
{
VkFormat fb_format;
if (!v3dv_meta_can_use_tlb(src, &region->srcOffset, &fb_format) ||
!v3dv_meta_can_use_tlb(dst, &region->dstOffset, &fb_format)) {
return false;
}
/* From the Vulkan spec, VkImageCopy valid usage:
*
* "If neither the calling command’s srcImage nor the calling command’s
* dstImage has a multi-planar image format then the aspectMask member
* of srcSubresource and dstSubresource must match."
*/
assert(region->dstSubresource.aspectMask ==
region->srcSubresource.aspectMask);
uint32_t internal_type, internal_bpp;
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
(fb_format, region->dstSubresource.aspectMask,
&internal_type, &internal_bpp);
/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
*
* "The number of slices of the extent (for 3D) or layers of the
* srcSubresource (for non-3D) must match the number of slices of the
* extent (for 3D) or layers of the dstSubresource (for non-3D)."
*/
assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.layerCount : region->extent.depth) ==
(dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.layerCount : region->extent.depth));
uint32_t num_layers;
if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->dstSubresource.layerCount;
else
num_layers = region->extent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* Handle copy to compressed image using compatible format */
const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
v3dv_job_start_frame(job, width, height, num_layers,
false, true, 1, internal_bpp,
src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
struct v3dv_meta_framebuffer framebuffer;
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
internal_type, &job->frame_tiling);
v3dv_X(job->device, job_emit_binning_flush)(job);
v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
/**
* Takes the image provided as argument and creates a new image that has
* the same specification and aliases the same memory storage, except that:
*
* - It has the uncompressed format passed in.
* - Its original width/height are scaled by the factors passed in.
*
* This is useful to implement copies from compressed images using the blit
* path. The idea is that we create uncompressed "image views" of both the
* source and destination images using the uncompressed format and then we
* define the copy blit in terms of that format.
*/
static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *src,
float width_scale,
float height_scale,
VkFormat format)
{
assert(!vk_format_is_compressed(format));
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
VkImageCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = src->vk.image_type,
.format = format,
.extent = {
.width = src->vk.extent.width * width_scale,
.height = src->vk.extent.height * height_scale,
.depth = src->vk.extent.depth,
},
.mipLevels = src->vk.mip_levels,
.arrayLayers = src->vk.array_layers,
.samples = src->vk.samples,
.tiling = src->vk.tiling,
.usage = src->vk.usage,
};
VkImage _image;
VkResult result =
v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
if (result != VK_SUCCESS) {
v3dv_flag_oom(cmd_buffer, NULL);
return NULL;
}
struct v3dv_image *image = v3dv_image_from_handle(_image);
image->mem = src->mem;
image->mem_offset = src->mem_offset;
return image;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageCopy2 *region)
{
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
const float block_scale_w = (float)src_block_w / (float)dst_block_w;
const float block_scale_h = (float)src_block_h / (float)dst_block_h;
/* We need to choose a single format for the blit to ensure that this is
* really a copy and there are not format conversions going on. Since we
* going to blit, we need to make sure that the selected format can be
* both rendered to and textured from.
*/
VkFormat format;
float src_scale_w = 1.0f;
float src_scale_h = 1.0f;
float dst_scale_w = block_scale_w;
float dst_scale_h = block_scale_h;
if (vk_format_is_compressed(src->vk.format)) {
/* If we are copying from a compressed format we should be aware that we
* are going to texture from the source image, and the texture setup
* knows the actual size of the image, so we need to choose a format
* that has a per-texel (not per-block) bpp that is compatible for that
* image size. For example, for a source image with size Bw*WxBh*H
* and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
* each of the Bw*WxBh*H texels in the compressed source image is 8-bit
* (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
* so we could specify a blit with size Bw*WxBh*H and a format with
* a bpp of 8-bit per texel (R8_UINT).
*
* Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
* which is 64-bit per texel, then we would need a 4-bit format, which
* we don't have, so instead we still choose an 8-bit format, but we
* apply a divisor to the row dimensions of the blit, since we are
* copying two texels per item.
*
* Generally, we can choose any format so long as we compute appropriate
* divisors for the width and height depending on the source image's
* bpp.
*/
assert(src->cpp == dst->cpp);
format = VK_FORMAT_R32G32_UINT;
switch (src->cpp) {
case 16:
format = VK_FORMAT_R32G32B32A32_UINT;
break;
case 8:
format = VK_FORMAT_R16G16B16A16_UINT;
break;
default:
unreachable("Unsupported compressed format");
}
/* Create image views of the src/dst images that we can interpret in
* terms of the canonical format.
*/
src_scale_w /= src_block_w;
src_scale_h /= src_block_h;
dst_scale_w /= src_block_w;
dst_scale_h /= src_block_h;
src = create_image_alias(cmd_buffer, src,
src_scale_w, src_scale_h, format);
dst = create_image_alias(cmd_buffer, dst,
dst_scale_w, dst_scale_h, format);
} else {
format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
src->vk.format : get_compatible_tlb_format(src->vk.format);
if (format == VK_FORMAT_UNDEFINED)
return false;
const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
return false;
}
/* Given an uncompressed image with size WxH, if we copy it to a compressed
* image, it will result in an image with size W*bWxH*bH, where bW and bH
* are the compressed format's block width and height. This means that
* copies between compressed and uncompressed images involve different
* image sizes, and therefore, we need to take that into account when
* setting up the source and destination blit regions below, so they are
* consistent from the point of view of the single compatible format
* selected for the copy.
*
* We should take into account that the dimensions of the region provided
* to the copy command are specified in terms of the source image. With that
* in mind, below we adjust the blit destination region to be consistent with
* the source region for the compatible format, so basically, we apply
* the block scale factor to the destination offset provided by the copy
* command (because it is specified in terms of the destination image, not
* the source), and then we just add the region copy dimensions to that
* (since the region dimensions are already specified in terms of the source
* image).
*/
const VkOffset3D src_start = {
region->srcOffset.x * src_scale_w,
region->srcOffset.y * src_scale_h,
region->srcOffset.z,
};
const VkOffset3D src_end = {
src_start.x + region->extent.width * src_scale_w,
src_start.y + region->extent.height * src_scale_h,
src_start.z + region->extent.depth,
};
const VkOffset3D dst_start = {
region->dstOffset.x * dst_scale_w,
region->dstOffset.y * dst_scale_h,
region->dstOffset.z,
};
const VkOffset3D dst_end = {
dst_start.x + region->extent.width * src_scale_w,
dst_start.y + region->extent.height * src_scale_h,
dst_start.z + region->extent.depth,
};
const VkImageBlit2 blit_region = {
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
.srcSubresource = region->srcSubresource,
.srcOffsets = { src_start, src_end },
.dstSubresource = region->dstSubresource,
.dstOffsets = { dst_start, dst_end },
};
bool handled = blit_shader(cmd_buffer,
dst, format,
src, format,
0, NULL,
&blit_region, VK_FILTER_NEAREST, true);
/* We should have selected formats that we can blit */
assert(handled);
return handled;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
const VkCopyImageInfo2 *info)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
assert(src->vk.samples == dst->vk.samples);
cmd_buffer->state.is_transfer = true;
for (uint32_t i = 0; i < info->regionCount; i++) {
if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
unreachable("Image copy not supported");
}
cmd_buffer->state.is_transfer = false;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
const VkCopyBufferInfo2 *pCopyBufferInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
cmd_buffer->state.is_transfer = true;
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
v3dv_X(cmd_buffer->device, meta_copy_buffer)
(cmd_buffer,
dst_buffer->mem->bo, dst_buffer->mem_offset,
src_buffer->mem->bo, src_buffer->mem_offset,
&pCopyBufferInfo->pRegions[i]);
}
cmd_buffer->state.is_transfer = false;
}
static void
destroy_update_buffer_cb(VkDevice _device,
uint64_t pobj,
VkAllocationCallbacks *alloc)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
v3dv_bo_free(device, bo);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void *pData)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
struct v3dv_bo *src_bo =
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
if (!src_bo) {
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
return;
}
bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
if (!ok) {
fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
return;
}
cmd_buffer->state.is_transfer = true;
memcpy(src_bo->map, pData, dataSize);
v3dv_bo_unmap(cmd_buffer->device, src_bo);
VkBufferCopy2 region = {
.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
.srcOffset = 0,
.dstOffset = dstOffset,
.size = dataSize,
};
struct v3dv_job *copy_job =
v3dv_X(cmd_buffer->device, meta_copy_buffer)
(cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
src_bo, 0, &region);
if (copy_job) {
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
}
cmd_buffer->state.is_transfer = false;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize size,
uint32_t data)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
cmd_buffer->state.is_transfer = true;
struct v3dv_bo *bo = dst_buffer->mem->bo;
/* From the Vulkan spec:
*
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
* a multiple of 4, then the nearest smaller multiple is used."
*/
if (size == VK_WHOLE_SIZE) {
size = dst_buffer->size - dstOffset;
size -= size % 4;
}
v3dv_X(cmd_buffer->device, meta_fill_buffer)
(cmd_buffer, bo, dstOffset, size, data);
cmd_buffer->state.is_transfer = false;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy2 *region)
{
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
/* Destination can't be raster format */
if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
/* We can't copy D24S8 because buffer to image copies only copy one aspect
* at a time, and the TFU copies full images. Also, V3D depth bits for
* both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
* the Vulkan spec has the buffer data specified the other way around, so it
* is not a straight copy, we would havew to swizzle the channels, which the
* TFU can't do.
*/
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
return false;
}
/* Region must include full slice */
const uint32_t offset_x = region->imageOffset.x;
const uint32_t offset_y = region->imageOffset.y;
if (offset_x != 0 || offset_y != 0)
return false;
uint32_t width, height;
if (region->bufferRowLength == 0)
width = region->imageExtent.width;
else
width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
height = region->imageExtent.height;
else
height = region->bufferImageHeight;
if (width != image->vk.extent.width || height != image->vk.extent.height)
return false;
/* Handle region semantics for compressed images */
const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
width = DIV_ROUND_UP(width, block_w);
height = DIV_ROUND_UP(height, block_h);
/* Format must be supported for texturing via the TFU. Since we are just
* copying raw data and not converting between pixel formats, we can ignore
* the image's format and choose a compatible TFU format for the image
* texel size instead, which expands the list of formats we can handle here.
*/
const struct v3dv_format *format =
v3dv_get_compatible_tfu_format(cmd_buffer->device,
image->cpp, NULL);
const uint32_t mip_level = region->imageSubresource.mipLevel;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
assert(image->mem && image->mem->bo);
const struct v3dv_bo *dst_bo = image->mem->bo;
assert(buffer->mem && buffer->mem->bo);
const struct v3dv_bo *src_bo = buffer->mem->bo;
/* Emit a TFU job per layer to copy */
const uint32_t buffer_stride = width * image->cpp;
for (int i = 0; i < num_layers; i++) {
uint32_t layer;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
layer = region->imageSubresource.baseArrayLayer + i;
else
layer = region->imageOffset.z + i;
const uint32_t buffer_offset =
buffer->mem_offset + region->bufferOffset +
height * buffer_stride * i;
const uint32_t src_offset = src_bo->offset + buffer_offset;
const uint32_t dst_offset =
dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
cmd_buffer,
dst_bo->handle,
dst_offset,
slice->tiling,
slice->padded_height,
image->cpp,
src_bo->handle,
src_offset,
V3D_TILING_RASTER,
width,
1,
width, height, format);
}
return true;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy2 *region)
{
VkFormat fb_format;
if (!v3dv_meta_can_use_tlb(image, &region->imageOffset, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
(fb_format, region->imageSubresource.aspectMask,
&internal_type, &internal_bpp);
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return true;
/* Handle copy to compressed format using a compatible format */
const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
v3dv_job_start_frame(job, width, height, num_layers, false, true,
1, internal_bpp, false);
struct v3dv_meta_framebuffer framebuffer;
v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
internal_type, &job->frame_tiling);
v3dv_X(job->device, job_emit_binning_flush)(job);
v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
(job, image, buffer, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
}
static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy2 *region)
{
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
return true;
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
return true;
return false;
}
static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
{
/* If this is not the first pool we create for this command buffer
* size it based on the size of the currently exhausted pool.
*/
uint32_t descriptor_count = 64;
if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
struct v3dv_descriptor_pool *exhausted_pool =
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
}
/* Create the descriptor pool */
cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
VkDescriptorPoolSize pool_size = {
.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = descriptor_count,
};
VkDescriptorPoolCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = descriptor_count,
.poolSizeCount = 1,
.pPoolSizes = &pool_size,
.flags = 0,
};
VkResult result =
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
&info,
&cmd_buffer->device->vk.alloc,
&cmd_buffer->meta.texel_buffer_copy.dspool);
if (result == VK_SUCCESS) {
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t) _pool,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
struct v3dv_descriptor_pool *pool =
v3dv_descriptor_pool_from_handle(_pool);
pool->is_driver_internal = true;
}
return result;
}
static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
VkDescriptorSet *set)
{
/* Make sure we have a descriptor pool */
VkResult result;
if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
if (result != VK_SUCCESS)
return result;
}
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
/* Allocate descriptor set */
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
VkDescriptorSetAllocateInfo info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
.descriptorSetCount = 1,
.pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
};
result = v3dv_AllocateDescriptorSets(_device, &info, set);
/* If we ran out of pool space, grow the pool and try again */
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
if (result == VK_SUCCESS) {
info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
result = v3dv_AllocateDescriptorSets(_device, &info, set);
}
}
return result;
}
static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
bool is_layered,
uint8_t *key)
{
memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
uint32_t *p = (uint32_t *) key;
*p = format;
p++;
*p = cmask;
p++;
/* Note that that we are using a single byte for this, so we could pack
* more data into this 32-bit slot in the future.
*/
*p = is_layered ? 1 : 0;
p++;
memcpy(p, cswizzle, sizeof(VkComponentMapping));
p += sizeof(VkComponentMapping) / sizeof(uint32_t);
assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
}
static bool
create_blit_render_pass(struct v3dv_device *device,
VkFormat dst_format,
VkFormat src_format,
VkRenderPass *pass_load,
VkRenderPass *pass_no_load);
static bool
create_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
struct nir_shader *vs_nir,
struct nir_shader *gs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
const VkPipelineColorBlendStateCreateInfo *cb_state,
const VkPipelineMultisampleStateCreateInfo *ms_state,
const VkPipelineLayout layout,
VkPipeline *pipeline);
static nir_shader *
get_texel_buffer_copy_vs()
{
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
"meta texel buffer copy vs");
nir_variable *vs_out_pos =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_vec4_type(), "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
return b.shader;
}
static nir_shader *
get_texel_buffer_copy_gs()
{
/* FIXME: this creates a geometry shader that takes the index of a single
* layer to clear from push constants, so we need to emit a draw call for
* each layer that we want to clear. We could actually do better and have it
* take a range of layers however, if we were to do this, we would need to
* be careful not to exceed the maximum number of output vertices allowed in
* a geometry shader.
*/
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
"meta texel buffer copy gs");
nir_shader *nir = b.shader;
nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
(1ull << VARYING_SLOT_LAYER);
nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
nir->info.gs.vertices_in = 3;
nir->info.gs.vertices_out = 3;
nir->info.gs.invocations = 1;
nir->info.gs.active_stream_mask = 0x1;
/* in vec4 gl_Position[3] */
nir_variable *gs_in_pos =
nir_variable_create(b.shader, nir_var_shader_in,
glsl_array_type(glsl_vec4_type(), 3, 0),
"in_gl_Position");
gs_in_pos->data.location = VARYING_SLOT_POS;
/* out vec4 gl_Position */
nir_variable *gs_out_pos =
nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
"out_gl_Position");
gs_out_pos->data.location = VARYING_SLOT_POS;
/* out float gl_Layer */
nir_variable *gs_out_layer =
nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
"out_gl_Layer");
gs_out_layer->data.location = VARYING_SLOT_LAYER;
/* Emit output triangle */
for (uint32_t i = 0; i < 3; i++) {
/* gl_Position from shader input */
nir_deref_instr *in_pos_i =
nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
/* gl_Layer from push constants */
nir_ssa_def *layer =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
.range = 4);
nir_store_var(&b, gs_out_layer, layer, 0x1);
nir_emit_vertex(&b, 0);
}
nir_end_primitive(&b, 0);
return nir;
}
static nir_ssa_def *
load_frag_coord(nir_builder *b)
{
nir_foreach_shader_in_variable(var, b->shader) {
if (var->data.location == VARYING_SLOT_POS)
return nir_load_var(b, var);
}
nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
glsl_vec4_type(), NULL);
pos->data.location = VARYING_SLOT_POS;
return nir_load_var(b, pos);
}
static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
{
if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
swz = comp;
switch (swz) {
case VK_COMPONENT_SWIZZLE_R:
return 0;
case VK_COMPONENT_SWIZZLE_G:
return 1;
case VK_COMPONENT_SWIZZLE_B:
return 2;
case VK_COMPONENT_SWIZZLE_A:
return 3;
default:
unreachable("Invalid swizzle");
};
}
static nir_shader *
get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
VkComponentMapping *cswizzle)
{
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
"meta texel buffer copy fs");
/* We only use the copy from texel buffer shader to implement
* copy_buffer_to_image_shader, which always selects a compatible integer
* format for the copy.
*/
assert(vk_format_is_int(format));
/* Fragment shader output color */
nir_variable *fs_out_color =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_uvec4_type(), "out_color");
fs_out_color->data.location = FRAG_RESULT_DATA0;
/* Texel buffer input */
const struct glsl_type *sampler_type =
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
nir_variable *sampler =
nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
/* Load the box describing the pixel region we want to copy from the
* texel buffer.
*/
nir_ssa_def *box =
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
.range = 16);
/* Load the buffer stride (this comes in texel units) */
nir_ssa_def *stride =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
.range = 4);
/* Load the buffer offset (this comes in texel units) */
nir_ssa_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
.range = 4);
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
/* Load pixel data from texel buffer based on the x,y offset of the pixel
* within the box. Texel buffers are 1D arrays of texels.
*
* Notice that we already make sure that we only generate fragments that are
* inside the box through the scissor/viewport state, so our offset into the
* texel buffer should always be within its bounds and we we don't need
* to add a check for that here.
*/
nir_ssa_def *x_offset =
nir_isub(&b, nir_channel(&b, coord, 0),
nir_channel(&b, box, 0));
nir_ssa_def *y_offset =
nir_isub(&b, nir_channel(&b, coord, 1),
nir_channel(&b, box, 1));
nir_ssa_def *texel_offset =
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
nir_imul(&b, y_offset, stride));
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(texel_offset);
tex->src[1].src_type = nir_tex_src_texture_deref;
tex->src[1].src = nir_src_for_ssa(tex_deref);
tex->dest_type = nir_type_uint32;
tex->is_array = false;
tex->coord_components = 1;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
nir_builder_instr_insert(&b, &tex->instr);
uint32_t swiz[4];
swiz[0] =
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
swiz[1] =
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
swiz[2] =
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
swiz[3] =
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
nir_store_var(&b, fs_out_color, s, 0xf);
return b.shader;
}
static bool
create_texel_buffer_copy_pipeline(struct v3dv_device *device,
VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
bool is_layered,
VkRenderPass _pass,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
{
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
assert(vk_format_is_color(format));
nir_shader *vs_nir = get_texel_buffer_copy_vs();
nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
};
VkPipelineDepthStencilStateCreateInfo ds_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
};
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
.blendEnable = false,
.colorWriteMask = cmask,
};
const VkPipelineColorBlendStateCreateInfo cb_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = false,
.attachmentCount = 1,
.pAttachments = blend_att_state
};
const VkPipelineMultisampleStateCreateInfo ms_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = false,
.pSampleMask = NULL,
.alphaToCoverageEnable = false,
.alphaToOneEnable = false,
};
return create_pipeline(device,
pass,
vs_nir, gs_nir, fs_nir,
&vi_state,
&ds_state,
&cb_state,
&ms_state,
pipeline_layout,
pipeline);
}
static bool
get_copy_texel_buffer_pipeline(
struct v3dv_device *device,
VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
VkImageType image_type,
bool is_layered,
struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
{
bool ok = true;
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
key);
mtx_lock(&device->meta.mtx);
struct hash_entry *entry =
_mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
key);
if (entry) {
mtx_unlock(&device->meta.mtx);
*pipeline = entry->data;
return true;
}
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (*pipeline == NULL)
goto fail;
/* The blit render pass is compatible */
ok = create_blit_render_pass(device, format, format,
&(*pipeline)->pass,
&(*pipeline)->pass_no_load);
if (!ok)
goto fail;
ok =
create_texel_buffer_copy_pipeline(device,
format, cmask, cswizzle, is_layered,
(*pipeline)->pass,
device->meta.texel_buffer_copy.p_layout,
&(*pipeline)->pipeline);
if (!ok)
goto fail;
uint8_t *dupkey = malloc(V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
memcpy(dupkey, key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
_mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
dupkey, *pipeline);
mtx_unlock(&device->meta.mtx);
return true;
fail:
mtx_unlock(&device->meta.mtx);
VkDevice _device = v3dv_device_to_handle(device);
if (*pipeline) {
if ((*pipeline)->pass)
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
if ((*pipeline)->pipeline)
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
vk_free(&device->vk.alloc, *pipeline);
*pipeline = NULL;
}
return false;
}
static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
VkImageAspectFlags aspect,
struct v3dv_image *image,
VkFormat dst_format,
VkFormat src_format,
struct v3dv_buffer *buffer,
uint32_t buffer_bpp,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
uint32_t region_count,
const VkBufferImageCopy2 *regions)
{
VkResult result;
bool handled = false;
assert(cswizzle);
/* This is a copy path, so we don't handle format conversions. The only
* exception are stencil to D24S8 copies, which are handled as a color
* masked R8->RGBA8 copy.
*/
assert(src_format == dst_format ||
(dst_format == VK_FORMAT_R8G8B8A8_UINT &&
src_format == VK_FORMAT_R8_UINT &&
cmask == VK_COLOR_COMPONENT_R_BIT));
/* We only handle color copies. Callers can copy D/S aspects by using
* a compatible color format and maybe a cmask/cswizzle for D24 formats.
*/
if (!vk_format_is_color(dst_format) || !vk_format_is_color(src_format))
return handled;
/* FIXME: we only handle uncompressed images for now. */
if (vk_format_is_compressed(image->vk.format))
return handled;
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
if (cmask == 0)
cmask = full_cmask;
/* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
* so we can bind it as a texel buffer. Otherwise, the buffer view
* we create below won't setup the texture state that we need for this.
*/
if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
if (v3dv_buffer_format_supports_features(
cmd_buffer->device, src_format,
VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT)) {
buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
} else {
return handled;
}
}
/* At this point we should be able to handle the copy unless an unexpected
* error occurs, such as an OOM.
*/
handled = true;
/* Compute the number of layers to copy.
*
* If we are batching (region_count > 1) all our regions have the same
* image subresource so we can take this from the first region. For 3D
* images we require the same depth extent.
*/
const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
num_layers = resource->layerCount;
} else {
assert(region_count == 1);
num_layers = regions[0].imageExtent.depth;
}
assert(num_layers > 0);
/* Get the texel buffer copy pipeline */
struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
dst_format, cmask, cswizzle,
image->vk.image_type, num_layers > 1,
&pipeline);
if (!ok)
return handled;
assert(pipeline && pipeline->pipeline && pipeline->pass);
/* Setup descriptor set for the source texel buffer. We don't have to
* register the descriptor as a private command buffer object since
* all descriptors will be freed automatically with the descriptor
* pool.
*/
VkDescriptorSet set;
result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
if (result != VK_SUCCESS)
return handled;
/* We can't pass region->bufferOffset here for the offset field because
* the texture base pointer in the texture shader state must be a 64-byte
* aligned value. Instead, we use 0 here and we pass the offset in texels
* as a push constant to the shader.
*/
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
VkBufferViewCreateInfo buffer_view_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = v3dv_buffer_to_handle(buffer),
.format = src_format,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
VkBufferView texel_buffer_view;
result = v3dv_CreateBufferView(_device, &buffer_view_info,
&cmd_buffer->device->vk.alloc,
&texel_buffer_view);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)texel_buffer_view,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
VkWriteDescriptorSet write = {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.pTexelBufferView = &texel_buffer_view,
};
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
/* Push command buffer state before starting meta operation */
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
/* Bind common state for all layers and regions */
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
v3dv_CmdBindPipeline(_cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline->pipeline);
v3dv_CmdBindDescriptorSets(_cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
0, 1, &set,
0, NULL);
/* Setup framebuffer.
*
* For 3D images, this creates a layered framebuffer with a number of
* layers matching the depth extent of the 3D image.
*/
uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel);
uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel);
VkImageViewCreateInfo image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(image),
.viewType = v3dv_image_type_to_view_type(image->vk.image_type),
.format = dst_format,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = resource->mipLevel,
.levelCount = 1,
.baseArrayLayer = resource->baseArrayLayer,
.layerCount = num_layers,
},
};
VkImageView image_view;
result = v3dv_create_image_view(cmd_buffer->device,
&image_view_info, &image_view);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)image_view,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
VkFramebufferCreateInfo fb_info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = pipeline->pass,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = fb_width,
.height = fb_height,
.layers = num_layers,
};
VkFramebuffer fb;
result = v3dv_CreateFramebuffer(_device, &fb_info,
&cmd_buffer->device->vk.alloc, &fb);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)fb,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
/* For each layer */
for (uint32_t l = 0; l < num_layers; l++) {
/* Start render pass for this layer.
*
* If the we only have one region to copy, then we might be able to
* skip the TLB load if it is aligned to tile boundaries. All layers
* copy the same area, so we only need to check this once.
*/
bool can_skip_tlb_load = false;
VkRect2D render_area;
if (region_count == 1) {
render_area.offset.x = regions[0].imageOffset.x;
render_area.offset.y = regions[0].imageOffset.y;
render_area.extent.width = regions[0].imageExtent.width;
render_area.extent.height = regions[0].imageExtent.height;
if (l == 0) {
struct v3dv_render_pass *pipeline_pass =
v3dv_render_pass_from_handle(pipeline->pass);
can_skip_tlb_load =
cmask == full_cmask &&
v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
v3dv_framebuffer_from_handle(fb),
pipeline_pass, 0);
}
} else {
render_area.offset.x = 0;
render_area.offset.y = 0;
render_area.extent.width = fb_width;
render_area.extent.height = fb_height;
}
VkRenderPassBeginInfo rp_info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
pipeline->pass,
.framebuffer = fb,
.renderArea = render_area,
.clearValueCount = 0,
};
VkSubpassBeginInfo sp_info = {
.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
.contents = VK_SUBPASS_CONTENTS_INLINE,
};
v3dv_CmdBeginRenderPass2(_cmd_buffer, &rp_info, &sp_info);
struct v3dv_job *job = cmd_buffer->state.job;
if (!job)
goto fail;
/* If we are using a layered copy we need to specify the layer for the
* Geometry Shader.
*/
if (num_layers > 1) {
uint32_t layer = resource->baseArrayLayer + l;
v3dv_CmdPushConstants(_cmd_buffer,
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
VK_SHADER_STAGE_GEOMETRY_BIT,
24, 4, &layer);
}
/* For each region */
for (uint32_t r = 0; r < region_count; r++) {
const VkBufferImageCopy2 *region = &regions[r];
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
if (region->bufferRowLength == 0)
buf_width = region->imageExtent.width;
else
buf_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buf_height = region->imageExtent.height;
else
buf_height = region->bufferImageHeight;
const VkViewport viewport = {
.x = region->imageOffset.x,
.y = region->imageOffset.y,
.width = region->imageExtent.width,
.height = region->imageExtent.height,
.minDepth = 0.0f,
.maxDepth = 1.0f
};
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
const VkRect2D scissor = {
.offset = { region->imageOffset.x, region->imageOffset.y },
.extent = { region->imageExtent.width, region->imageExtent.height }
};
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
const VkDeviceSize buf_offset =
region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
uint32_t push_data[6] = {
region->imageOffset.x,
region->imageOffset.y,
region->imageOffset.x + region->imageExtent.width - 1,
region->imageOffset.y + region->imageExtent.height - 1,
buf_width,
buf_offset,
};
v3dv_CmdPushConstants(_cmd_buffer,
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
VK_SHADER_STAGE_FRAGMENT_BIT,
0, sizeof(push_data), &push_data);
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
} /* For each region */
VkSubpassEndInfo sp_end_info = {
.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
};
v3dv_CmdEndRenderPass2(_cmd_buffer, &sp_end_info);
} /* For each layer */
fail:
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, true);
return handled;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
VkImageAspectFlags aspect,
struct v3dv_image *image,
VkFormat dst_format,
VkFormat src_format,
struct v3dv_buffer *buffer,
uint32_t buffer_bpp,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
uint32_t region_count,
const VkBufferImageCopy2 *regions)
{
/* Since we can't sample linear images we need to upload the linear
* buffer to a tiled image that we can use as a blit source, which
* is slow.
*/
perf_debug("Falling back to blit path for buffer to image copy.\n");
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
bool handled = true;
/* Allocate memory for the tiled image. Since we copy layer by layer
* we allocate memory to hold a full layer, which is the worse case.
* For that we create a dummy image with that spec, get memory requirements
* for it and use that information to create the memory allocation.
* We will then reuse this memory store for all the regions we want to
* copy.
*/
VkImage dummy_image;
VkImageCreateInfo dummy_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = src_format,
.extent = { image->vk.extent.width, image->vk.extent.height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkResult result =
v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
if (result != VK_SUCCESS)
return handled;
VkMemoryRequirements reqs;
vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
VkDeviceMemory mem;
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = reqs.size,
.memoryTypeIndex = 0,
};
result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)mem,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
/* Obtain the layer count.
*
* If we are batching (region_count > 1) all our regions have the same
* image subresource so we can take this from the first region.
*/
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = regions[0].imageSubresource.layerCount;
else
num_layers = regions[0].imageExtent.depth;
assert(num_layers > 0);
/* Sanity check: we can only batch multiple regions together if they have
* the same framebuffer (so the same layer).
*/
assert(num_layers == 1 || region_count == 1);
const uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
const uint32_t block_height = vk_format_get_blockheight(image->vk.format);
/* Copy regions by uploading each region to a temporary tiled image using
* the memory we have just allocated as storage.
*/
for (uint32_t r = 0; r < region_count; r++) {
const VkBufferImageCopy2 *region = &regions[r];
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
if (region->bufferRowLength == 0)
buf_width = region->imageExtent.width;
else
buf_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buf_height = region->imageExtent.height;
else
buf_height = region->bufferImageHeight;
/* If the image is compressed, the bpp refers to blocks, not pixels */
buf_width = buf_width / block_width;
buf_height = buf_height / block_height;
for (uint32_t i = 0; i < num_layers; i++) {
/* Create the tiled image */
VkImageCreateInfo image_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = src_format,
.extent = { buf_width, buf_height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkImage buffer_image;
VkResult result =
v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
&buffer_image);
if (result != VK_SUCCESS)
return handled;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)buffer_image,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
if (result != VK_SUCCESS)
return handled;
/* Upload buffer contents for the selected layer */
const VkDeviceSize buf_offset_bytes =
region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
const VkBufferImageCopy2 buffer_image_copy = {
.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2,
.bufferOffset = buf_offset_bytes,
.bufferRowLength = region->bufferRowLength / block_width,
.bufferImageHeight = region->bufferImageHeight / block_height,
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = { 0, 0, 0 },
.imageExtent = { buf_width, buf_height, 1 }
};
handled =
create_tiled_image_from_buffer(cmd_buffer,
v3dv_image_from_handle(buffer_image),
buffer, &buffer_image_copy);
if (!handled) {
/* This is unexpected, we should have setup the upload to be
* conformant to a TFU or TLB copy.
*/
unreachable("Unable to copy buffer to image through TLB");
return false;
}
/* Blit-copy the requested image extent from the buffer image to the
* destination image.
*
* Since we are copying, the blit must use the same format on the
* destination and source images to avoid format conversions. The
* only exception is copying stencil, which we upload to a R8UI source
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
const VkImageBlit2 blit_region = {
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2,
.srcSubresource = {
.aspectMask = aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.srcOffsets = {
{ 0, 0, 0 },
{ region->imageExtent.width, region->imageExtent.height, 1 },
},
.dstSubresource = {
.aspectMask = aspect,
.mipLevel = region->imageSubresource.mipLevel,
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
.layerCount = 1,
},
.dstOffsets = {
{
DIV_ROUND_UP(region->imageOffset.x, block_width),
DIV_ROUND_UP(region->imageOffset.y, block_height),
region->imageOffset.z + i,
},
{
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
block_width),
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
block_height),
region->imageOffset.z + i + 1,
},
},
};
handled = blit_shader(cmd_buffer,
image, dst_format,
v3dv_image_from_handle(buffer_image), src_format,
cmask, cswizzle,
&blit_region, VK_FILTER_NEAREST, true);
if (!handled) {
/* This is unexpected, we should have a supported blit spec */
unreachable("Unable to blit buffer to destination image");
return false;
}
}
}
return handled;
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
uint32_t region_count,
const VkBufferImageCopy2 *regions,
bool use_texel_buffer)
{
/* We can only call this with region_count > 1 if we can batch the regions
* together, in which case they share the same image subresource, and so
* the same aspect.
*/
VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
/* Generally, the bpp of the data in the buffer matches that of the
* destination image. The exception is the case where we are uploading
* stencil (8bpp) to a combined d24s8 image (32bpp).
*/
uint32_t buf_bpp = image->cpp;
/* We are about to upload the buffer data to an image so we can then
* blit that to our destination region. Because we are going to implement
* the copy as a blit, we want our blit source and destination formats to be
* the same (to avoid any format conversions), so we choose a canonical
* format that matches the destination image bpp.
*/
VkComponentMapping ident_swizzle = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
};
VkComponentMapping cswizzle = ident_swizzle;
VkColorComponentFlags cmask = 0; /* Write all components */
VkFormat src_format;
VkFormat dst_format;
switch (buf_bpp) {
case 16:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
src_format = VK_FORMAT_R32G32B32A32_UINT;
dst_format = src_format;
break;
case 8:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
src_format = VK_FORMAT_R16G16B16A16_UINT;
dst_format = src_format;
break;
case 4:
switch (aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = src_format;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = src_format;
/* For D24 formats, the Vulkan spec states that the depth component
* in the buffer is stored in the 24-LSB, but V3D wants it in the
* 24-MSB.
*/
if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
cmask = VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
cswizzle.r = VK_COMPONENT_SWIZZLE_R;
cswizzle.g = VK_COMPONENT_SWIZZLE_R;
cswizzle.b = VK_COMPONENT_SWIZZLE_G;
cswizzle.a = VK_COMPONENT_SWIZZLE_B;
}
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
/* Since we don't support separate stencil this is always a stencil
* copy to a combined depth/stencil image. Because we don't support
* separate stencil images, we interpret the buffer data as a
* color R8UI image, and implement the blit as a compatible color
* blit to an RGBA8UI destination masking out writes to components
* GBA (which map to the D24 component of a S8D24 image).
*/
assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
buf_bpp = 1;
src_format = VK_FORMAT_R8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
cmask = VK_COLOR_COMPONENT_R_BIT;
break;
default:
unreachable("unsupported aspect");
return false;
};
break;
case 2:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
src_format = VK_FORMAT_R16_UINT;
dst_format = src_format;
break;
case 1:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
src_format = VK_FORMAT_R8_UINT;
dst_format = src_format;
break;
default:
unreachable("unsupported bit-size");
return false;
}
if (use_texel_buffer) {
return texel_buffer_shader_copy(cmd_buffer, aspect, image,
dst_format, src_format,
buffer, buf_bpp,
cmask, &cswizzle,
region_count, regions);
} else {
return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
dst_format, src_format,
buffer, buf_bpp,
cmask, &cswizzle,
region_count, regions);
}
}
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*/
static bool
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
const VkBufferImageCopy2 *region)
{
/* FIXME */
if (vk_format_is_depth_or_stencil(image->vk.format))
return false;
if (vk_format_is_compressed(image->vk.format))
return false;
if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
uint32_t buffer_width, buffer_height;
if (region->bufferRowLength == 0)
buffer_width = region->imageExtent.width;
else
buffer_width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
buffer_height = region->imageExtent.height;
else
buffer_height = region->bufferImageHeight;
uint32_t buffer_stride = buffer_width * image->cpp;
uint32_t buffer_layer_stride = buffer_stride * buffer_height;
uint32_t num_layers;
if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
assert(num_layers > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
cmd_buffer, -1);
if (!job)
return true;
job->cpu.copy_buffer_to_image.image = image;
job->cpu.copy_buffer_to_image.buffer = buffer;
job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
job->cpu.copy_buffer_to_image.mip_level =
region->imageSubresource.mipLevel;
job->cpu.copy_buffer_to_image.base_layer =
region->imageSubresource.baseArrayLayer;
job->cpu.copy_buffer_to_image.layer_count = num_layers;
list_addtail(&job->list_link, &cmd_buffer->jobs);
return true;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2 *info)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
cmd_buffer->state.is_transfer = true;
uint32_t r = 0;
while (r < info->regionCount) {
/* The TFU and TLB paths can only copy one region at a time and the region
* needs to start at the origin. We try these first for the common case
* where we are copying full images, since they should be the fastest.
*/
uint32_t batch_size = 1;
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
goto handled;
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
goto handled;
/* Otherwise, we are copying subrects, so we fallback to copying
* via shader and texel buffers and we try to batch the regions
* if possible. We can only batch copies if they have the same
* framebuffer spec, which is mostly determined by the image
* subresource of the region.
*/
const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
for (uint32_t s = r + 1; s < info->regionCount; s++) {
const VkImageSubresourceLayers *rsc_s =
&info->pRegions[s].imageSubresource;
if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
break;
/* For 3D images we also need to check the depth extent */
if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
info->pRegions[s].imageExtent.depth !=
info->pRegions[r].imageExtent.depth) {
break;
}
batch_size++;
}
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
batch_size, &info->pRegions[r], true)) {
goto handled;
}
/* If we still could not copy, fallback to slower paths.
*
* FIXME: we could try to batch these too, but since they are bound to be
* slow it might not be worth it and we should instead put more effort
* in handling more cases with the other paths.
*/
if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
&info->pRegions[r])) {
batch_size = 1;
goto handled;
}
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
batch_size, &info->pRegions[r], false)) {
goto handled;
}
unreachable("Unsupported buffer to image copy.");
handled:
r += batch_size;
}
cmd_buffer->state.is_transfer = false;
}
static void
compute_blit_3d_layers(const VkOffset3D *offsets,
uint32_t *min_layer, uint32_t *max_layer,
bool *mirror_z);
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
*
* The TFU blit path doesn't handle scaling so the blit filter parameter can
* be ignored.
*/
static bool
blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
const VkImageBlit2 *region)
{
assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
/* Format must match */
if (src->vk.format != dst->vk.format)
return false;
/* Destination can't be raster format */
if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
/* Source region must start at (0,0) */
if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
return false;
/* Destination image must be complete */
if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
return false;
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
if (region->dstOffsets[1].x < dst_width - 1||
region->dstOffsets[1].y < dst_height - 1) {
return false;
}
/* No XY scaling */
if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
region->srcOffsets[1].y != region->dstOffsets[1].y) {
return false;
}
/* If the format is D24S8 both aspects need to be copied, since the TFU
* can't be programmed to copy only one aspect of the image.
*/
if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT;
if (region->dstSubresource.aspectMask != ds_aspects)
return false;
}
/* Our TFU blits only handle exact copies (it requires same formats
* on input and output, no scaling, etc), so there is no pixel format
* conversions and we can rewrite the format to use one that is TFU
* compatible based on its texel size.
*/
const struct v3dv_format *format =
v3dv_get_compatible_tfu_format(cmd_buffer->device,
dst->cpp, NULL);
/* Emit a TFU job for each layer to blit */
assert(region->dstSubresource.layerCount ==
region->srcSubresource.layerCount);
uint32_t min_dst_layer;
uint32_t max_dst_layer;
bool dst_mirror_z = false;
if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
compute_blit_3d_layers(region->dstOffsets,
&min_dst_layer, &max_dst_layer,
&dst_mirror_z);
} else {
min_dst_layer = region->dstSubresource.baseArrayLayer;
max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
}
uint32_t min_src_layer;
uint32_t max_src_layer;
bool src_mirror_z = false;
if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
compute_blit_3d_layers(region->srcOffsets,
&min_src_layer, &max_src_layer,
&src_mirror_z);
} else {
min_src_layer = region->srcSubresource.baseArrayLayer;
max_src_layer = min_src_layer + region->srcSubresource.layerCount;
}
/* No Z scaling for 3D images (for non-3D images both src and dst must
* have the same layerCount).
*/
if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
return false;
const uint32_t layer_count = max_dst_layer - min_dst_layer;
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
for (uint32_t i = 0; i < layer_count; i++) {
/* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
* only involves reversing the order of the slices.
*/
const uint32_t dst_layer =
dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
const uint32_t src_layer =
src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
const uint32_t dst_offset =
dst->mem->bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
const uint32_t src_offset =
src->mem->bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
v3dv_X(cmd_buffer->device, meta_emit_tfu_job)(
cmd_buffer,
dst->mem->bo->handle,
dst_offset,
dst_slice->tiling,
dst_slice->padded_height,
dst->cpp,
src->mem->bo->handle,
src_offset,
src_slice->tiling,
src_slice->tiling == V3D_TILING_RASTER ?
src_slice->stride : src_slice->padded_height,
src->cpp,
dst_width, dst_height, format);
}
return true;
}
static bool
format_needs_software_int_clamp(VkFormat format)
{
switch (format) {
case VK_FORMAT_A2R10G10B10_UINT_PACK32:
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
return true;
default:
return false;
};
}
static void
get_blit_pipeline_cache_key(VkFormat dst_format,
VkFormat src_format,
VkColorComponentFlags cmask,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
uint8_t *key)
{
memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
uint32_t *p = (uint32_t *) key;
*p = dst_format;
p++;
/* Generally, when blitting from a larger format to a smaller format
* the hardware takes care of clamping the source to the RT range.
* Specifically, for integer formats, this is done by using
* V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
* clamps to the bit-size of the render type, and some formats, such as
* rgb10a2_uint have a 16-bit type, so it won't do what we need and we
* require to clamp in software. In these cases, we need to amend the blit
* shader with clamp code that depends on both the src and dst formats, so
* we need the src format to be part of the key.