blob: 2d43766df2f84601c3849df1ef538fe106030cc8 [file] [log] [blame]
/*
* Copyright © 2021 Raspberry Pi Ltd
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "v3dv_private.h"
#include "v3dv_meta_common.h"
#include "broadcom/common/v3d_macros.h"
#include "broadcom/common/v3d_tfu.h"
#include "broadcom/cle/v3dx_pack.h"
#include "broadcom/compiler/v3d_compiler.h"
struct rcl_clear_info {
const union v3dv_clear_value *clear_value;
struct v3dv_image *image;
VkImageAspectFlags aspects;
uint32_t level;
};
static struct v3dv_cl *
emit_rcl_prologue(struct v3dv_job *job,
struct v3dv_meta_framebuffer *fb,
const struct rcl_clear_info *clear_info)
{
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
struct v3dv_cl *rcl = &job->rcl;
v3dv_cl_ensure_space_with_branch(rcl, 200 +
tiling->layers * 256 *
cl_packet_length(SUPERTILE_COORDINATES));
if (job->cmd_buffer->state.oom)
return NULL;
assert(!tiling->msaa || !tiling->double_buffer);
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
config.early_z_disable = true;
config.image_width_pixels = tiling->width;
config.image_height_pixels = tiling->height;
config.number_of_render_targets = 1;
config.multisample_mode_4x = tiling->msaa;
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
config.internal_depth_type = fb->internal_depth_type;
}
if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
uint32_t clear_pad = 0;
if (clear_info->image) {
const struct v3dv_image *image = clear_info->image;
const struct v3d_resource_slice *slice =
&image->slices[clear_info->level];
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
slice->tiling == V3D_TILING_UIF_XOR) {
int uif_block_height = v3d_utile_height(image->cpp) * 2;
uint32_t implicit_padded_height =
align(tiling->height, uif_block_height) / uif_block_height;
if (slice->padded_height_of_output_image_in_uif_blocks -
implicit_padded_height >= 15) {
clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
}
}
}
const uint32_t *color = &clear_info->clear_value->color[0];
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
clear.clear_color_low_32_bits = color[0];
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
clear.render_target_number = 0;
};
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
clear.clear_color_mid_low_32_bits =
((color[1] >> 24) | (color[2] << 8));
clear.clear_color_mid_high_24_bits =
((color[2] >> 24) | ((color[3] & 0xffff) << 8));
clear.render_target_number = 0;
};
}
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
clear.uif_padded_height_in_uif_blocks = clear_pad;
clear.clear_color_high_16_bits = color[3] >> 16;
clear.render_target_number = 0;
};
}
}
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
rt.render_target_0_internal_bpp = tiling->internal_bpp;
rt.render_target_0_internal_type = fb->internal_type;
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
}
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
};
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
init.use_auto_chained_tile_lists = true;
init.size_of_first_block_in_chained_tile_lists =
TILE_ALLOCATION_BLOCK_SIZE_64B;
}
return rcl;
}
static void
emit_frame_setup(struct v3dv_job *job,
uint32_t min_layer,
const union v3dv_clear_value *clear_value)
{
v3dv_return_if_oom(NULL, job);
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
struct v3dv_cl *rcl = &job->rcl;
const uint32_t tile_alloc_offset =
64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
}
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
config.number_of_bin_tile_lists = 1;
config.total_frame_width_in_tiles = tiling->draw_tiles_x;
config.total_frame_height_in_tiles = tiling->draw_tiles_y;
config.supertile_width_in_tiles = tiling->supertile_width;
config.supertile_height_in_tiles = tiling->supertile_height;
config.total_frame_width_in_supertiles =
tiling->frame_width_in_supertiles;
config.total_frame_height_in_supertiles =
tiling->frame_height_in_supertiles;
}
/* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
* it here.
*/
for (int i = 0; i < 2; i++) {
cl_emit(rcl, TILE_COORDINATES, coords);
cl_emit(rcl, END_OF_LOADS, end);
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = NONE;
}
/* When using double-buffering, we need to clear both buffers (unless
* we only have a single tile to render).
*/
if (clear_value &&
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
clear.clear_z_stencil_buffer = true;
clear.clear_all_render_targets = true;
}
}
cl_emit(rcl, END_OF_TILE_MARKER, end);
}
cl_emit(rcl, FLUSH_VCD_CACHE, flush);
}
static void
emit_supertile_coordinates(struct v3dv_job *job,
struct v3dv_meta_framebuffer *framebuffer)
{
v3dv_return_if_oom(NULL, job);
struct v3dv_cl *rcl = &job->rcl;
const uint32_t min_y = framebuffer->min_y_supertile;
const uint32_t max_y = framebuffer->max_y_supertile;
const uint32_t min_x = framebuffer->min_x_supertile;
const uint32_t max_x = framebuffer->max_x_supertile;
for (int y = min_y; y <= max_y; y++) {
for (int x = min_x; x <= max_x; x++) {
cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
coords.column_number_in_supertiles = x;
coords.row_number_in_supertiles = y;
}
}
}
}
static void
emit_linear_load(struct v3dv_cl *cl,
uint32_t buffer,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t stride,
uint32_t format)
{
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
load.buffer_to_load = buffer;
load.address = v3dv_cl_address(bo, offset);
load.input_image_format = format;
load.memory_format = V3D_TILING_RASTER;
load.height_in_ub_or_stride = stride;
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_linear_store(struct v3dv_cl *cl,
uint32_t buffer,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t stride,
bool msaa,
uint32_t format)
{
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = RENDER_TARGET_0;
store.address = v3dv_cl_address(bo, offset);
store.clear_buffer_being_stored = false;
store.output_image_format = format;
store.memory_format = V3D_TILING_RASTER;
store.height_in_ub_or_stride = stride;
store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
V3D_DECIMATE_MODE_SAMPLE_0;
}
}
/* This chooses a tile buffer format that is appropriate for the copy operation.
* Typically, this is the image render target type, however, if we are copying
* depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
* we need to load and store to/from a tile color buffer using a compatible
* color format.
*/
static uint32_t
choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
VkImageAspectFlags aspect,
bool for_store,
bool is_copy_to_buffer,
bool is_copy_from_buffer)
{
if (is_copy_to_buffer || is_copy_from_buffer) {
switch (framebuffer->vk_format) {
case VK_FORMAT_D16_UNORM:
return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
case VK_FORMAT_D32_SFLOAT:
return V3D_OUTPUT_IMAGE_FORMAT_R32F;
case VK_FORMAT_X8_D24_UNORM_PACK32:
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
case VK_FORMAT_D24_UNORM_S8_UINT:
/* When storing the stencil aspect of a combined depth/stencil image
* to a buffer, the Vulkan spec states that the output buffer must
* have packed stencil values, so we choose an R8UI format for our
* store outputs. For the load input we still want RGBA8UI since the
* source image contains 4 channels (including the 3 channels
* containing the 24-bit depth value).
*
* When loading the stencil aspect of a combined depth/stencil image
* from a buffer, we read packed 8-bit stencil values from the buffer
* that we need to put into the LSB of the 32-bit format (the R
* channel), so we use R8UI. For the store, if we used R8UI then we
* would write 8-bit stencil values consecutively over depth channels,
* so we need to use RGBA8UI. This will write each stencil value in
* its correct position, but will overwrite depth values (channels G
* B,A) with undefined values. To fix this, we will have to restore
* the depth aspect from the Z tile buffer, which we should pre-load
* from the image before the store).
*/
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
} else {
assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
if (is_copy_to_buffer) {
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
} else {
assert(is_copy_from_buffer);
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
V3D_OUTPUT_IMAGE_FORMAT_R8UI;
}
}
default: /* Color formats */
return framebuffer->format->rt_type;
break;
}
} else {
return framebuffer->format->rt_type;
}
}
static inline bool
format_needs_rb_swap(struct v3dv_device *device,
VkFormat format)
{
const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
return v3dv_format_swizzle_needs_rb_swap(swizzle);
}
static inline bool
format_needs_reverse(struct v3dv_device *device,
VkFormat format)
{
const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
return v3dv_format_swizzle_needs_reverse(swizzle);
}
static void
emit_image_load(struct v3dv_device *device,
struct v3dv_cl *cl,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_image *image,
VkImageAspectFlags aspect,
uint32_t layer,
uint32_t mip_level,
bool is_copy_to_buffer,
bool is_copy_from_buffer)
{
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
/* For image to/from buffer copies we always load to and store from RT0,
* even for depth/stencil aspects, because the hardware can't do raster
* stores or loads from/to the depth/stencil tile buffers.
*/
bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
load.buffer_to_load = load_to_color_tlb ?
RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
load.address = v3dv_cl_address(image->mem->bo, layer_offset);
load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
is_copy_to_buffer,
is_copy_from_buffer);
load.memory_format = slice->tiling;
/* When copying depth/stencil images to a buffer, for D24 formats Vulkan
* expects the depth value in the LSB bits of each 32-bit pixel.
* Unfortunately, the hardware seems to put the S8/X8 bits there and the
* depth bits on the MSB. To work around that we can reverse the channel
* order and then swap the R/B channels to get what we want.
*
* NOTE: reversing and swapping only gets us the behavior we want if the
* operations happen in that exact order, which seems to be the case when
* done on the tile buffer load operations. On the store, it seems the
* order is not the same. The order on the store is probably reversed so
* that reversing and swapping on both the load and the store preserves
* the original order of the channels in memory.
*
* Notice that we only need to do this when copying to a buffer, where
* depth and stencil aspects are copied as separate regions and
* the spec expects them to be tightly packed.
*/
bool needs_rb_swap = false;
bool needs_chan_reverse = false;
if (is_copy_to_buffer &&
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
needs_rb_swap = true;
needs_chan_reverse = true;
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
/* This is not a raw data copy (i.e. we are clearing the image),
* so we need to make sure we respect the format swizzle.
*/
needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
}
load.r_b_swap = needs_rb_swap;
load.channel_reverse = needs_chan_reverse;
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
slice->tiling == V3D_TILING_UIF_XOR) {
load.height_in_ub_or_stride =
slice->padded_height_of_output_image_in_uif_blocks;
} else if (slice->tiling == V3D_TILING_RASTER) {
load.height_in_ub_or_stride = slice->stride;
}
if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
else
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_image_store(struct v3dv_device *device,
struct v3dv_cl *cl,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_image *image,
VkImageAspectFlags aspect,
uint32_t layer,
uint32_t mip_level,
bool is_copy_to_buffer,
bool is_copy_from_buffer,
bool is_multisample_resolve)
{
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
store.buffer_to_store = store_from_color_tlb ?
RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
store.address = v3dv_cl_address(image->mem->bo, layer_offset);
store.clear_buffer_being_stored = false;
/* See rationale in emit_image_load() */
bool needs_rb_swap = false;
bool needs_chan_reverse = false;
if (is_copy_from_buffer &&
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
needs_rb_swap = true;
needs_chan_reverse = true;
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
}
store.r_b_swap = needs_rb_swap;
store.channel_reverse = needs_chan_reverse;
store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
is_copy_to_buffer,
is_copy_from_buffer);
store.memory_format = slice->tiling;
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
slice->tiling == V3D_TILING_UIF_XOR) {
store.height_in_ub_or_stride =
slice->padded_height_of_output_image_in_uif_blocks;
} else if (slice->tiling == V3D_TILING_RASTER) {
store.height_in_ub_or_stride = slice->stride;
}
if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
else if (is_multisample_resolve)
store.decimate_mode = V3D_DECIMATE_MODE_4X;
else
store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
}
}
static void
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
uint32_t layer_offset,
const VkBufferImageCopy2 *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
/* Load image to TLB */
assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
layer_offset < region->imageSubresource.layerCount) ||
layer_offset < image->vk.extent.depth);
const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
region->imageSubresource.baseArrayLayer + layer_offset :
region->imageOffset.z + layer_offset;
emit_image_load(job->device, cl, framebuffer, image,
region->imageSubresource.aspectMask,
image_layer,
region->imageSubresource.mipLevel,
true, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
/* Store TLB to buffer */
uint32_t width, height;
if (region->bufferRowLength == 0)
width = region->imageExtent.width;
else
width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
height = region->imageExtent.height;
else
height = region->bufferImageHeight;
/* Handle copy from compressed format */
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
/* If we are storing stencil from a combined depth/stencil format the
* Vulkan spec states that the output buffer must have packed stencil
* values, where each stencil value is 1 byte.
*/
uint32_t cpp =
region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1 : image->cpp;
uint32_t buffer_stride = width * cpp;
uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
height * buffer_stride * layer_offset;
uint32_t format = choose_tlb_format(framebuffer,
region->imageSubresource.aspectMask,
true, true, false);
bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
buffer_offset, buffer_stride, msaa, format);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_layer_to_buffer(struct v3dv_job *job,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t layer,
const VkBufferImageCopy2 *region)
{
emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
image, layer, region);
emit_supertile_coordinates(job, framebuffer);
}
void
v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
struct v3dv_meta_framebuffer *framebuffer,
const VkBufferImageCopy2 *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, NULL);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_image *dst,
struct v3dv_image *src,
uint32_t layer_offset,
const VkImageResolve2 *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
layer_offset < region->srcSubresource.layerCount) ||
layer_offset < src->vk.extent.depth);
const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.baseArrayLayer + layer_offset :
region->srcOffset.z + layer_offset;
emit_image_load(job->device, cl, framebuffer, src,
region->srcSubresource.aspectMask,
src_layer,
region->srcSubresource.mipLevel,
false, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
layer_offset < region->dstSubresource.layerCount) ||
layer_offset < dst->vk.extent.depth);
const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.baseArrayLayer + layer_offset :
region->dstOffset.z + layer_offset;
bool is_depth_or_stencil =
region->dstSubresource.aspectMask &
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
emit_image_store(job->device, cl, framebuffer, dst,
region->dstSubresource.aspectMask,
dst_layer,
region->dstSubresource.mipLevel,
false, false, !is_depth_or_stencil);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_resolve_image_layer(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t layer,
const VkImageResolve2 *region)
{
emit_resolve_image_layer_per_tile_list(job, framebuffer,
dst, src, layer, region);
emit_supertile_coordinates(job, framebuffer);
}
void
v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct v3dv_meta_framebuffer *framebuffer,
const VkImageResolve2 *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, NULL);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
emit_copy_buffer_per_tile_list(struct v3dv_job *job,
struct v3dv_bo *dst,
struct v3dv_bo *src,
uint32_t dst_offset,
uint32_t src_offset,
uint32_t stride,
uint32_t format)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
emit_linear_store(cl, RENDER_TARGET_0,
dst, dst_offset, stride, false, format);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
void
v3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
struct v3dv_bo *dst,
struct v3dv_bo *src,
uint32_t dst_offset,
uint32_t src_offset,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t format,
uint32_t item_size)
{
const uint32_t stride = job->frame_tiling.width * item_size;
emit_copy_buffer_per_tile_list(job, dst, src,
dst_offset, src_offset,
stride, format);
emit_supertile_coordinates(job, framebuffer);
}
void
v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
struct v3dv_bo *dst,
struct v3dv_bo *src,
uint32_t dst_offset,
uint32_t src_offset,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t format,
uint32_t item_size)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, NULL);
v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
framebuffer, format, item_size);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_image *dst,
struct v3dv_image *src,
uint32_t layer_offset,
const VkImageCopy2 *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
layer_offset < region->srcSubresource.layerCount) ||
layer_offset < src->vk.extent.depth);
const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.baseArrayLayer + layer_offset :
region->srcOffset.z + layer_offset;
emit_image_load(job->device, cl, framebuffer, src,
region->srcSubresource.aspectMask,
src_layer,
region->srcSubresource.mipLevel,
false, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
layer_offset < region->dstSubresource.layerCount) ||
layer_offset < dst->vk.extent.depth);
const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.baseArrayLayer + layer_offset :
region->dstOffset.z + layer_offset;
emit_image_store(job->device, cl, framebuffer, dst,
region->dstSubresource.aspectMask,
dst_layer,
region->dstSubresource.mipLevel,
false, false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_image_layer(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t layer,
const VkImageCopy2 *region)
{
emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
emit_supertile_coordinates(job, framebuffer);
}
void
v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
struct v3dv_image *dst,
struct v3dv_image *src,
struct v3dv_meta_framebuffer *framebuffer,
const VkImageCopy2 *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, NULL);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
void
v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t dst_bo_handle,
uint32_t dst_offset,
enum v3d_tiling_mode dst_tiling,
uint32_t dst_padded_height_or_stride,
uint32_t dst_cpp,
uint32_t src_bo_handle,
uint32_t src_offset,
enum v3d_tiling_mode src_tiling,
uint32_t src_padded_height_or_stride,
uint32_t src_cpp,
uint32_t width,
uint32_t height,
const struct v3dv_format *format)
{
struct drm_v3d_submit_tfu tfu = {
.ios = (height << 16) | width,
.bo_handles = {
dst_bo_handle,
src_bo_handle != dst_bo_handle ? src_bo_handle : 0
},
};
tfu.iia |= src_offset;
if (src_tiling == V3D_TILING_RASTER) {
tfu.icfg = V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT;
} else {
tfu.icfg = (V3D33_TFU_ICFG_FORMAT_LINEARTILE +
(src_tiling - V3D_TILING_LINEARTILE)) <<
V3D33_TFU_ICFG_FORMAT_SHIFT;
}
tfu.icfg |= format->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT;
tfu.ioa = dst_offset;
tfu.ioa |= (V3D33_TFU_IOA_FORMAT_LINEARTILE +
(dst_tiling - V3D_TILING_LINEARTILE)) <<
V3D33_TFU_IOA_FORMAT_SHIFT;
switch (src_tiling) {
case V3D_TILING_UIF_NO_XOR:
case V3D_TILING_UIF_XOR:
tfu.iis |= src_padded_height_or_stride / (2 * v3d_utile_height(src_cpp));
break;
case V3D_TILING_RASTER:
tfu.iis |= src_padded_height_or_stride / src_cpp;
break;
default:
break;
}
/* The TFU can handle raster sources but always produces UIF results */
assert(dst_tiling != V3D_TILING_RASTER);
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
* OPAD field for the destination (how many extra UIF blocks beyond
* those necessary to cover the height).
*/
if (dst_tiling == V3D_TILING_UIF_NO_XOR || dst_tiling == V3D_TILING_UIF_XOR) {
uint32_t uif_block_h = 2 * v3d_utile_height(dst_cpp);
uint32_t implicit_padded_height = align(height, uif_block_h);
uint32_t icfg = (dst_padded_height_or_stride - implicit_padded_height) /
uif_block_h;
tfu.icfg |= icfg << V3D33_TFU_ICFG_OPAD_SHIFT;
}
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
}
static void
emit_clear_image_layer_per_tile_list(struct v3dv_job *job,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_image *image,
VkImageAspectFlags aspects,
uint32_t layer,
uint32_t level)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
emit_image_store(job->device, cl, framebuffer, image, aspects,
layer, level, false, false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_clear_image_layers(struct v3dv_job *job,
struct v3dv_image *image,
struct v3dv_meta_framebuffer *framebuffer,
VkImageAspectFlags aspects,
uint32_t min_layer,
uint32_t max_layer,
uint32_t level)
{
for (uint32_t layer = min_layer; layer < max_layer; layer++) {
emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
layer, level);
emit_supertile_coordinates(job, framebuffer);
}
}
void
v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
struct v3dv_image *image,
struct v3dv_meta_framebuffer *framebuffer,
const union v3dv_clear_value *clear_value,
VkImageAspectFlags aspects,
uint32_t min_layer,
uint32_t max_layer,
uint32_t level)
{
const struct rcl_clear_info clear_info = {
.clear_value = clear_value,
.image = image,
.aspects = aspects,
.level = level,
};
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, clear_value);
emit_clear_image_layers(job, image, framebuffer, aspects,
min_layer, max_layer, level);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
emit_fill_buffer_per_tile_list(struct v3dv_job *job,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t stride)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_fill_buffer(struct v3dv_job *job,
struct v3dv_bo *bo,
uint32_t offset,
struct v3dv_meta_framebuffer *framebuffer)
{
const uint32_t stride = job->frame_tiling.width * 4;
emit_fill_buffer_per_tile_list(job, bo, offset, stride);
emit_supertile_coordinates(job, framebuffer);
}
void
v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
struct v3dv_bo *bo,
uint32_t offset,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t data)
{
const union v3dv_clear_value clear_value = {
.color = { data, 0, 0, 0 },
};
const struct rcl_clear_info clear_info = {
.clear_value = &clear_value,
.image = NULL,
.aspects = VK_IMAGE_ASPECT_COLOR_BIT,
.level = 0,
};
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, &clear_value);
emit_fill_buffer(job, bo, offset, framebuffer);
cl_emit(rcl, END_OF_RENDERING, end);
}
static void
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
struct v3dv_meta_framebuffer *framebuffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
uint32_t layer,
const VkBufferImageCopy2 *region)
{
struct v3dv_cl *cl = &job->indirect;
v3dv_cl_ensure_space(cl, 200, 1);
v3dv_return_if_oom(NULL, job);
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
layer < image->vk.extent.depth);
/* Load TLB from buffer */
uint32_t width, height;
if (region->bufferRowLength == 0)
width = region->imageExtent.width;
else
width = region->bufferRowLength;
if (region->bufferImageHeight == 0)
height = region->imageExtent.height;
else
height = region->bufferImageHeight;
/* Handle copy to compressed format using a compatible format */
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1 : image->cpp;
uint32_t buffer_stride = width * cpp;
uint32_t buffer_offset =
buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
false, false, true);
uint32_t image_layer = layer + (image->vk.image_type != VK_IMAGE_TYPE_3D ?
imgrsc->baseArrayLayer : region->imageOffset.z);
emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
buffer_offset, buffer_stride, format);
/* Because we can't do raster loads/stores of Z/S formats we need to
* use a color tile buffer with a compatible RGBA color format instead.
* However, when we are uploading a single aspect to a combined
* depth/stencil image we have the problem that our tile buffer stores don't
* allow us to mask out the other aspect, so we always write all four RGBA
* channels to the image and we end up overwriting that other aspect with
* undefined values. To work around that, we first load the aspect we are
* not copying from the image memory into a proper Z/S tile buffer. Then we
* do our store from the color buffer for the aspect we are copying, and
* after that, we do another store from the Z/S tile buffer to restore the
* other aspect to its original value.
*/
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
emit_image_load(job->device, cl, framebuffer, image,
VK_IMAGE_ASPECT_STENCIL_BIT,
image_layer, imgrsc->mipLevel,
false, false);
} else {
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
emit_image_load(job->device, cl, framebuffer, image,
VK_IMAGE_ASPECT_DEPTH_BIT,
image_layer, imgrsc->mipLevel,
false, false);
}
}
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
/* Store TLB to image */
emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
image_layer, imgrsc->mipLevel,
false, true, false);
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
emit_image_store(job->device, cl, framebuffer, image,
VK_IMAGE_ASPECT_STENCIL_BIT,
image_layer, imgrsc->mipLevel,
false, false, false);
} else {
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
emit_image_store(job->device, cl, framebuffer, image,
VK_IMAGE_ASPECT_DEPTH_BIT,
image_layer, imgrsc->mipLevel,
false, false, false);
}
}
cl_emit(cl, END_OF_TILE_MARKER, end);
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
branch.start = tile_list_start;
branch.end = v3dv_cl_get_address(cl);
}
}
static void
emit_copy_buffer_to_layer(struct v3dv_job *job,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
struct v3dv_meta_framebuffer *framebuffer,
uint32_t layer,
const VkBufferImageCopy2 *region)
{
emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
layer, region);
emit_supertile_coordinates(job, framebuffer);
}
void
v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
struct v3dv_meta_framebuffer *framebuffer,
const VkBufferImageCopy2 *region)
{
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
v3dv_return_if_oom(NULL, job);
emit_frame_setup(job, 0, NULL);
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
cl_emit(rcl, END_OF_RENDERING, end);
}
/* Figure out a TLB size configuration for a number of pixels to process.
* Beware that we can't "render" more than MAX_DIMxMAX_DIM pixels in a single
* job, if the pixel count is larger than this, the caller might need to split
* the job and call this function multiple times.
*/
static void
framebuffer_size_for_pixel_count(uint32_t num_pixels,
uint32_t *width,
uint32_t *height)
{
assert(num_pixels > 0);
const uint32_t max_dim_pixels = V3D_MAX_IMAGE_DIMENSION;
const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
uint32_t w, h;
if (num_pixels > max_pixels) {
w = max_dim_pixels;
h = max_dim_pixels;
} else {
w = num_pixels;
h = 1;
while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
w >>= 1;
h <<= 1;
}
}
assert(w <= max_dim_pixels && h <= max_dim_pixels);
assert(w * h <= num_pixels);
assert(w > 0 && h > 0);
*width = w;
*height = h;
}
struct v3dv_job *
v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_bo *dst,
uint32_t dst_offset,
struct v3dv_bo *src,
uint32_t src_offset,
const VkBufferCopy2 *region)
{
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
/* Select appropriate pixel format for the copy operation based on the
* size to copy and the alignment of the source and destination offsets.
*/
src_offset += region->srcOffset;
dst_offset += region->dstOffset;
uint32_t item_size = 4;
while (item_size > 1 &&
(src_offset % item_size != 0 || dst_offset % item_size != 0)) {
item_size /= 2;
}
while (item_size > 1 && region->size % item_size != 0)
item_size /= 2;
assert(region->size % item_size == 0);
uint32_t num_items = region->size / item_size;
assert(num_items > 0);
uint32_t format;
VkFormat vk_format;
switch (item_size) {
case 4:
format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
vk_format = VK_FORMAT_R8G8B8A8_UINT;
break;
case 2:
format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
vk_format = VK_FORMAT_R8G8_UINT;
break;
default:
format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
vk_format = VK_FORMAT_R8_UINT;
break;
}
struct v3dv_job *job = NULL;
while (num_items > 0) {
job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return NULL;
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
v3dv_job_start_frame(job, width, height, 1, true, true,
1, internal_bpp, false);
struct v3dv_meta_framebuffer framebuffer;
v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
&job->frame_tiling);
v3dX(job_emit_binning_flush)(job);
v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
&framebuffer, format, item_size);
v3dv_cmd_buffer_finish_job(cmd_buffer);
const uint32_t items_copied = width * height;
const uint32_t bytes_copied = items_copied * item_size;
num_items -= items_copied;
src_offset += bytes_copied;
dst_offset += bytes_copied;
}
return job;
}
void
v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_bo *bo,
uint32_t offset,
uint32_t size,
uint32_t data)
{
assert(size > 0 && size % 4 == 0);
assert(offset + size <= bo->size);
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
uint32_t num_items = size / 4;
while (num_items > 0) {
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
if (!job)
return;
uint32_t width, height;
framebuffer_size_for_pixel_count(num_items, &width, &height);
v3dv_job_start_frame(job, width, height, 1, true, true,
1, internal_bpp, false);
struct v3dv_meta_framebuffer framebuffer;
v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
internal_type, &job->frame_tiling);
v3dX(job_emit_binning_flush)(job);
v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
v3dv_cmd_buffer_finish_job(cmd_buffer);
const uint32_t items_copied = width * height;
const uint32_t bytes_copied = items_copied * 4;
num_items -= items_copied;
offset += bytes_copied;
}
}
void
v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
VkFormat vk_format,
uint32_t internal_type,
const struct v3dv_frame_tiling *tiling)
{
fb->internal_type = internal_type;
/* Supertile coverage always starts at 0,0 */
uint32_t supertile_w_in_pixels =
tiling->tile_width * tiling->supertile_width;
uint32_t supertile_h_in_pixels =
tiling->tile_height * tiling->supertile_height;
fb->min_x_supertile = 0;
fb->min_y_supertile = 0;
fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
fb->vk_format = vk_format;
fb->format = v3dX(get_format)(vk_format);
fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
if (vk_format_is_depth_or_stencil(vk_format))
fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
}