blob: 3bb2218df87fae6f0cebfad5f02fb0329715b7fd [file] [log] [blame]
/*
* Copyright © 2021 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "nir/nir_builder.h"
#include "pan_encoder.h"
#include "pan_shader.h"
#include "panvk_private.h"
static mali_ptr
panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
struct pan_pool *desc_pool,
const struct pan_image_view *view)
{
struct panfrost_ptr texture =
pan_pool_alloc_desc(desc_pool, TEXTURE);
size_t payload_size =
GENX(panfrost_estimate_texture_payload_size)(view);
struct panfrost_ptr surfaces =
pan_pool_alloc_aligned(desc_pool, payload_size,
pan_alignment(SURFACE_WITH_STRIDE));
GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
return texture.gpu;
}
static mali_ptr
panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
struct pan_pool *desc_pool)
{
struct panfrost_ptr sampler =
pan_pool_alloc_desc(desc_pool, SAMPLER);
pan_pack(sampler.cpu, SAMPLER, cfg) {
cfg.seamless_cube_map = false;
cfg.normalized_coordinates = false;
cfg.minify_nearest = true;
cfg.magnify_nearest = true;
}
return sampler.gpu;
}
static void
panvk_meta_copy_emit_varying(struct pan_pool *pool,
mali_ptr coordinates,
mali_ptr *varying_bufs,
mali_ptr *varyings)
{
struct panfrost_ptr varying =
pan_pool_alloc_desc(pool, ATTRIBUTE);
struct panfrost_ptr varying_buffer =
pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER);
pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
cfg.pointer = coordinates;
cfg.stride = 4 * sizeof(uint32_t);
cfg.size = cfg.stride * 4;
}
/* Bifrost needs an empty desc to mark end of prefetching */
pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
ATTRIBUTE_BUFFER, cfg);
pan_pack(varying.cpu, ATTRIBUTE, cfg) {
cfg.buffer_index = 0;
cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
}
*varyings = varying.gpu;
*varying_bufs = varying_buffer.gpu;
}
static void
panvk_meta_copy_emit_dcd(struct pan_pool *pool,
mali_ptr src_coords, mali_ptr dst_coords,
mali_ptr texture, mali_ptr sampler,
mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
mali_ptr push_constants, void *out)
{
pan_pack(out, DRAW, cfg) {
cfg.thread_storage = tsd;
cfg.state = rsd;
cfg.push_uniforms = push_constants;
cfg.position = dst_coords;
if (src_coords) {
panvk_meta_copy_emit_varying(pool, src_coords,
&cfg.varying_buffers,
&cfg.varyings);
}
cfg.viewport = vpd;
cfg.textures = texture;
cfg.samplers = sampler;
}
}
static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
mali_ptr src_coords, mali_ptr dst_coords,
mali_ptr texture, mali_ptr sampler,
mali_ptr push_constants,
mali_ptr vpd, mali_ptr rsd,
mali_ptr tsd, mali_ptr tiler)
{
struct panfrost_ptr job =
pan_pool_alloc_desc(desc_pool, TILER_JOB);
panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
texture, sampler, vpd, tsd, rsd, push_constants,
pan_section_ptr(job.cpu, TILER_JOB, DRAW));
pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
cfg.index_count = 4;
cfg.job_task_split = 6;
}
pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
cfg.constant = 1.0f;
}
void *invoc = pan_section_ptr(job.cpu,
TILER_JOB,
INVOCATION);
panfrost_pack_work_groups_compute(invoc, 1, 4,
1, 1, 1, 1, true, false);
pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
cfg.address = tiler;
}
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
false, false, 0, 0, &job, false);
return job;
}
static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
const struct pan_compute_dim *num_wg,
const struct pan_compute_dim *wg_sz,
mali_ptr texture, mali_ptr sampler,
mali_ptr push_constants,
mali_ptr rsd, mali_ptr tsd)
{
struct panfrost_ptr job =
pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
void *invoc = pan_section_ptr(job.cpu,
COMPUTE_JOB,
INVOCATION);
panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
wg_sz->x, wg_sz->y, wg_sz->z,
false, false);
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = 8;
}
panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
0, tsd, rsd, push_constants,
pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
false, false, 0, 0, &job, false);
return job;
}
static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
{
switch (texelsize) {
case 6: return MALI_RGB16UI << 12;
case 8: return MALI_RG32UI << 12;
case 12: return MALI_RGB32UI << 12;
case 16: return MALI_RGBA32UI << 12;
default: unreachable("Invalid texel size\n");
}
}
static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
struct pan_pool *desc_pool,
mali_ptr shader,
const struct pan_shader_info *shader_info,
enum pipe_format fmt, unsigned wrmask,
bool from_img)
{
struct panfrost_ptr rsd_ptr =
pan_pool_alloc_desc_aggregate(desc_pool,
PAN_DESC(RENDERER_STATE),
PAN_DESC_ARRAY(1, BLEND));
bool raw = util_format_get_blocksize(fmt) > 4;
unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
bool partialwrite = fullmask != wrmask && !raw;
bool readstb = fullmask != wrmask && raw;
pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(shader_info, shader, &cfg);
if (from_img) {
cfg.shader.varying_count = 1;
cfg.shader.texture_count = 1;
cfg.shader.sampler_count = 1;
}
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
cfg.multisample_misc.sample_mask = UINT16_MAX;
cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
cfg.stencil_front.mask = 0xFF;
cfg.stencil_back = cfg.stencil_front;
cfg.properties.allow_forward_pixel_to_be_killed = true;
cfg.properties.allow_forward_pixel_to_kill =
!partialwrite && !readstb;
cfg.properties.zs_update_operation =
MALI_PIXEL_KILL_STRONG_EARLY;
cfg.properties.pixel_kill_operation =
MALI_PIXEL_KILL_FORCE_EARLY;
}
pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
cfg.round_to_fb_precision = true;
cfg.load_destination = partialwrite;
cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.internal.mode =
partialwrite ?
MALI_BLEND_MODE_FIXED_FUNCTION :
MALI_BLEND_MODE_OPAQUE;
cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
cfg.internal.fixed_function.num_comps = 4;
if (!raw) {
cfg.internal.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(pdev, fmt, false);
cfg.internal.fixed_function.conversion.register_format =
MALI_REGISTER_FILE_FORMAT_F32;
} else {
unsigned imgtexelsz = util_format_get_blocksize(fmt);
cfg.internal.fixed_function.conversion.memory_format =
panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
cfg.internal.fixed_function.conversion.register_format =
(imgtexelsz & 2) ?
MALI_REGISTER_FILE_FORMAT_U16 :
MALI_REGISTER_FILE_FORMAT_U32;
}
}
return rsd_ptr.gpu;
}
static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
struct pan_pool *desc_pool,
mali_ptr shader,
const struct pan_shader_info *shader_info,
bool from_img)
{
struct panfrost_ptr rsd_ptr =
pan_pool_alloc_desc_aggregate(desc_pool,
PAN_DESC(RENDERER_STATE));
pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(shader_info, shader, &cfg);
if (from_img) {
cfg.shader.texture_count = 1;
cfg.shader.sampler_count = 1;
}
}
return rsd_ptr.gpu;
}
static mali_ptr
panvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
enum pipe_format srcfmt,
enum pipe_format dstfmt, unsigned dstmask,
unsigned texdim, bool texisarray, bool is_ms,
struct pan_shader_info *shader_info)
{
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
GENX(pan_shader_get_compiler_options)(),
"panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
util_format_name(srcfmt), util_format_name(dstfmt),
texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
nir_variable *coord_var =
nir_variable_create(b.shader, nir_var_shader_in,
glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
"coord");
coord_var->data.location = VARYING_SLOT_VAR0;
nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
tex->texture_index = 0;
tex->is_array = texisarray;
tex->dest_type = util_format_is_unorm(srcfmt) ?
nir_type_float32 : nir_type_uint32;
switch (texdim) {
case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
default: unreachable("Invalid texture dimension");
}
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(coord);
tex->coord_components = texdim + texisarray;
if (is_ms) {
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
}
nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
nir_alu_type_get_type_size(tex->dest_type), NULL);
nir_builder_instr_insert(&b, &tex->instr);
nir_ssa_def *texel = &tex->dest.ssa;
unsigned dstcompsz =
util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
unsigned ndstcomps = util_format_get_nr_components(dstfmt);
const struct glsl_type *outtype = NULL;
if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
nir_ssa_def *rgb =
nir_f2u32(&b, nir_fmul(&b, texel,
nir_vec3(&b,
nir_imm_float(&b, 31),
nir_imm_float(&b, 63),
nir_imm_float(&b, 31))));
nir_ssa_def *rg =
nir_vec2(&b,
nir_ior(&b, nir_channel(&b, rgb, 0),
nir_ishl(&b, nir_channel(&b, rgb, 1),
nir_imm_int(&b, 5))),
nir_ior(&b,
nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
nir_ishl(&b, nir_channel(&b, rgb, 2),
nir_imm_int(&b, 3))));
rg = nir_iand_imm(&b, rg, 255);
texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
} else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
nir_ssa_def *rgb =
nir_vec3(&b,
nir_channel(&b, rg, 0),
nir_ior(&b,
nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
nir_ishl(&b, nir_channel(&b, rg, 1),
nir_imm_int(&b, 3))),
nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
rgb = nir_iand(&b, rgb,
nir_vec3(&b,
nir_imm_int(&b, 31),
nir_imm_int(&b, 63),
nir_imm_int(&b, 31)));
texel = nir_fmul(&b, nir_u2f32(&b, rgb),
nir_vec3(&b,
nir_imm_float(&b, 1.0 / 31),
nir_imm_float(&b, 1.0 / 63),
nir_imm_float(&b, 1.0 / 31)));
outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
} else {
assert(srcfmt == dstfmt);
enum glsl_base_type basetype;
if (util_format_is_unorm(dstfmt)) {
basetype = GLSL_TYPE_FLOAT;
} else if (dstcompsz == 16) {
basetype = GLSL_TYPE_UINT16;
} else {
assert(dstcompsz == 32);
basetype = GLSL_TYPE_UINT;
}
if (dstcompsz == 16)
texel = nir_u2u16(&b, texel);
texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
outtype = glsl_vector_type(basetype, ndstcomps);
}
nir_variable *out =
nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
out->data.location = FRAG_RESULT_DATA0;
unsigned fullmask = (1 << ndstcomps) - 1;
if (dstcompsz > 8 && dstmask != fullmask) {
nir_ssa_def *oldtexel = nir_load_var(&b, out);
nir_ssa_def *dstcomps[4];
for (unsigned i = 0; i < ndstcomps; i++) {
if (dstmask & BITFIELD_BIT(i))
dstcomps[i] = nir_channel(&b, texel, i);
else
dstcomps[i] = nir_channel(&b, oldtexel, i);
}
texel = nir_vec(&b, dstcomps, ndstcomps);
}
nir_store_var(&b, out, texel, 0xff);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.is_blit = true,
.no_ubo_to_push = true,
};
pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
cfg.register_format = dstcompsz == 2 ?
MALI_REGISTER_FILE_FORMAT_U16 :
MALI_REGISTER_FILE_FORMAT_U32;
}
inputs.bifrost.static_rt_conv = true;
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->fs.sample_shading = is_ms;
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
return shader;
}
static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)
{
/* We can't use a non-compressed format when handling a tiled/AFBC
* compressed format because the tile size differ (4x4 blocks for
* compressed formats and 16x16 texels for non-compressed ones).
*/
assert(!util_format_is_compressed(fmt));
/* Pick blendable formats when we can, otherwise pick the UINT variant
* matching the texel size.
*/
switch (util_format_get_blocksize(fmt)) {
case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
case 12: return PIPE_FORMAT_R32G32B32_UINT;
case 8: return PIPE_FORMAT_R32G32_UINT;
case 6: return PIPE_FORMAT_R16G16B16_UINT;
case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
case 1: return PIPE_FORMAT_R8_UNORM;
default: unreachable("Unsupported format\n");
}
}
struct panvk_meta_copy_img2img_format_info {
enum pipe_format srcfmt;
enum pipe_format dstfmt;
unsigned dstmask;
} PACKED;
static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
{ PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
{ PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
{ PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
{ PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
{ PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
/* Z24S8(depth) */
{ PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
/* Z24S8(stencil) */
{ PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
{ PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
{ PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
{ PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
/* Z32S8X24(depth) */
{ PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
/* Z32S8X24(stencil) */
{ PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
{ PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
{ PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
};
static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
{
STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
return i;
}
unreachable("Invalid image format\n");
}
static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
{
if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
return (1 << util_format_get_nr_components(outfmt)) - 1;
}
switch (imgfmt) {
case PIPE_FORMAT_S8_UINT:
return 1;
case PIPE_FORMAT_Z16_UNORM:
return 3;
case PIPE_FORMAT_Z16_UNORM_S8_UINT:
return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
case PIPE_FORMAT_Z24X8_UNORM:
assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
return 7;
case PIPE_FORMAT_Z32_FLOAT:
return 0xf;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
default:
unreachable("Invalid depth format\n");
}
}
static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_image *src,
const struct panvk_image *dst,
const VkImageCopy2 *region)
{
struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
struct panvk_meta_copy_img2img_format_info key = {
.srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
.dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
.dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
region->dstSubresource.aspectMask),
};
assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
unsigned texdimidx =
panvk_meta_copy_tex_type(src->pimage.layout.dim,
src->pimage.layout.array_size > 1);
unsigned fmtidx =
panvk_meta_copy_img2img_format_idx(key);
unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
struct pan_image_view srcview = {
.format = key.srcfmt,
.dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
.image = &src->pimage,
.nr_samples = src->pimage.layout.nr_samples,
.first_level = region->srcSubresource.mipLevel,
.last_level = region->srcSubresource.mipLevel,
.first_layer = region->srcSubresource.baseArrayLayer,
.last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
.swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
};
struct pan_image_view dstview = {
.format = key.dstfmt,
.dim = MALI_TEXTURE_DIMENSION_2D,
.image = &dst->pimage,
.nr_samples = dst->pimage.layout.nr_samples,
.first_level = region->dstSubresource.mipLevel,
.last_level = region->dstSubresource.mipLevel,
.swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
};
unsigned minx = MAX2(region->dstOffset.x, 0);
unsigned miny = MAX2(region->dstOffset.y, 0);
unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
mali_ptr vpd =
panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
minx, miny, maxx, maxy);
float dst_rect[] = {
minx, miny, 0.0, 1.0,
maxx + 1, miny, 0.0, 1.0,
minx, maxy + 1, 0.0, 1.0,
maxx + 1, maxy + 1, 0.0, 1.0,
};
mali_ptr dst_coords =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
sizeof(dst_rect), 64);
/* TODO: don't force preloads of dst resources if unneeded */
unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
cmdbuf->state.fb.crc_valid[0] = false;
*fbinfo = (struct pan_fb_info){
.width = width,
.height = height,
.extent.minx = minx & ~31,
.extent.miny = miny & ~31,
.extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
.extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
.nr_samples = dst->pimage.layout.nr_samples,
.rt_count = 1,
.rts[0].view = &dstview,
.rts[0].preload = true,
.rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
};
mali_ptr texture =
panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
mali_ptr sampler =
panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
panvk_per_arch(cmd_close_batch)(cmdbuf);
minx = MAX2(region->srcOffset.x, 0);
miny = MAX2(region->srcOffset.y, 0);
maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
assert(region->dstOffset.z >= 0);
unsigned first_src_layer = MAX2(0, region->srcOffset.z);
unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
for (unsigned l = 0; l < nlayers; l++) {
unsigned src_l = l + first_src_layer;
float src_rect[] = {
minx, miny, src_l, 1.0,
maxx + 1, miny, src_l, 1.0,
minx, maxy + 1, src_l, 1.0,
maxx + 1, maxy + 1, src_l, 1.0,
};
mali_ptr src_coords =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
sizeof(src_rect), 64);
struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
dstview.first_layer = dstview.last_layer = l + first_dst_layer;
batch->blit.src = src->pimage.data.bo;
batch->blit.dst = dst->pimage.data.bo;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
mali_ptr tsd, tiler;
tsd = batch->tls.gpu;
tiler = batch->tiler.descs.gpu;
struct panfrost_ptr job;
job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
src_coords, dst_coords,
texture, sampler, 0,
vpd, rsd, tsd, tiler);
util_dynarray_append(&batch->jobs, void *, job.cpu);
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
}
static void
panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
{
STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
for (unsigned texdim = 1; texdim <= 3; texdim++) {
unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
/* No MSAA on 3D textures */
if (texdim == 3 && is_ms) continue;
struct pan_shader_info shader_info;
mali_ptr shader =
panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
panvk_meta_copy_img2img_fmts[i].srcfmt,
panvk_meta_copy_img2img_fmts[i].dstfmt,
panvk_meta_copy_img2img_fmts[i].dstmask,
texdim, false, is_ms, &shader_info);
dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
shader, &shader_info,
panvk_meta_copy_img2img_fmts[i].dstfmt,
panvk_meta_copy_img2img_fmts[i].dstmask,
true);
if (texdim == 3)
continue;
memset(&shader_info, 0, sizeof(shader_info));
texdimidx = panvk_meta_copy_tex_type(texdim, true);
assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
shader =
panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
panvk_meta_copy_img2img_fmts[i].srcfmt,
panvk_meta_copy_img2img_fmts[i].dstfmt,
panvk_meta_copy_img2img_fmts[i].dstmask,
texdim, true, is_ms, &shader_info);
dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
shader, &shader_info,
panvk_meta_copy_img2img_fmts[i].dstfmt,
panvk_meta_copy_img2img_fmts[i].dstmask,
true);
}
}
}
void
panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
const VkCopyImageInfo2 *pCopyImageInfo)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
}
}
static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
{
unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
unsigned nbufcomps = util_bitcount(mask);
if (nbufcomps == util_format_get_nr_components(imgfmt))
return imgtexelsz;
/* Special case for Z24 buffers which are not tightly packed */
if (mask == 7 && imgtexelsz == 4)
return 4;
/* Special case for S8 extraction from Z32_S8X24 */
if (mask == 2 && imgtexelsz == 8)
return 1;
unsigned compsz =
util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
assert(!(compsz % 8));
return nbufcomps * compsz / 8;
}
static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
{
/* Pick blendable formats when we can, and the FLOAT variant matching the
* texelsize otherwise.
*/
switch (util_format_get_blocksize(imgfmt)) {
case 1: return PIPE_FORMAT_R8_UNORM;
/* AFBC stores things differently for RGB565,
* we can't simply map to R8G8 in that case */
case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
case 6: return PIPE_FORMAT_R16G16B16_UINT;
case 8: return PIPE_FORMAT_R32G32_UINT;
case 12: return PIPE_FORMAT_R32G32B32_UINT;
case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
default: unreachable("Invalid format\n");
}
}
struct panvk_meta_copy_format_info {
enum pipe_format imgfmt;
unsigned mask;
} PACKED;
static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
{ PIPE_FORMAT_R8_UNORM, 0x1 },
{ PIPE_FORMAT_R8G8_UNORM, 0x3 },
{ PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
{ PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
{ PIPE_FORMAT_R16G16B16_UINT, 0x7 },
{ PIPE_FORMAT_R32G32_UINT, 0x3 },
{ PIPE_FORMAT_R32G32B32_UINT, 0x7 },
{ PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
/* S8 -> Z24S8 */
{ PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
/* S8 -> Z32_S8X24 */
{ PIPE_FORMAT_R32G32_UINT, 0x2 },
/* Z24X8 -> Z24S8 */
{ PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
/* Z32 -> Z32_S8X24 */
{ PIPE_FORMAT_R32G32_UINT, 0x1 },
};
struct panvk_meta_copy_buf2img_info {
struct {
mali_ptr ptr;
struct {
unsigned line;
unsigned surf;
} stride;
} buf;
} PACKED;
#define panvk_meta_copy_buf2img_get_info_field(b, field) \
nir_load_push_constant((b), 1, \
sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
nir_imm_int(b, 0), \
.base = offsetof(struct panvk_meta_copy_buf2img_info, field), \
.range = ~0)
static mali_ptr
panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
struct panvk_meta_copy_format_info key,
struct pan_shader_info *shader_info)
{
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
GENX(pan_shader_get_compiler_options)(),
"panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
util_format_name(key.imgfmt),
key.mask);
nir_variable *coord_var =
nir_variable_create(b.shader, nir_var_shader_in,
glsl_vector_type(GLSL_TYPE_FLOAT, 3),
"coord");
coord_var->data.location = VARYING_SLOT_VAR0;
nir_ssa_def *coord = nir_load_var(&b, coord_var);
coord = nir_f2u32(&b, coord);
nir_ssa_def *bufptr =
panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
nir_ssa_def *buflinestride =
panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
nir_ssa_def *bufsurfstride =
panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
unsigned writemask = key.mask;
nir_ssa_def *offset =
nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
offset = nir_iadd(&b, offset,
nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
offset = nir_iadd(&b, offset,
nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
unsigned imgcompsz =
(imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
unsigned nimgcomps = imgtexelsz / imgcompsz;
unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
unsigned nbufcomps = buftexelsz / bufcompsz;
assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
assert(nbufcomps <= 4 && nimgcomps <= 4);
nir_ssa_def *texel =
nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
enum glsl_base_type basetype;
if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
texel = nir_vec3(&b,
nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
texel = nir_fmul(&b,
nir_u2f32(&b, texel),
nir_vec3(&b,
nir_imm_float(&b, 1.0f / 31),
nir_imm_float(&b, 1.0f / 63),
nir_imm_float(&b, 1.0f / 31)));
nimgcomps = 3;
basetype = GLSL_TYPE_FLOAT;
} else if (imgcompsz == 1) {
assert(bufcompsz == 1);
/* Blendable formats are unorm and the fixed-function blend unit
* takes float values.
*/
texel = nir_fmul(&b, nir_u2f32(&b, texel),
nir_imm_float(&b, 1.0f / 255));
basetype = GLSL_TYPE_FLOAT;
} else {
texel = nir_u2uN(&b, texel, imgcompsz * 8);
basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
}
/* We always pass the texel using 32-bit regs for now */
nir_variable *out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_vector_type(basetype, nimgcomps),
"out");
out->data.location = FRAG_RESULT_DATA0;
uint16_t fullmask = (1 << nimgcomps) - 1;
assert(fullmask >= writemask);
if (fullmask != writemask) {
unsigned first_written_comp = ffs(writemask) - 1;
nir_ssa_def *oldtexel = NULL;
if (imgcompsz > 1)
oldtexel = nir_load_var(&b, out);
nir_ssa_def *texel_comps[4];
for (unsigned i = 0; i < nimgcomps; i++) {
if (writemask & BITFIELD_BIT(i))
texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
else if (imgcompsz > 1)
texel_comps[i] = nir_channel(&b, oldtexel, i);
else
texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
}
texel = nir_vec(&b, texel_comps, nimgcomps);
}
nir_store_var(&b, out, texel, 0xff);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.is_blit = true,
.no_ubo_to_push = true,
};
pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
cfg.register_format = imgcompsz == 2 ?
MALI_REGISTER_FILE_FORMAT_U16 :
MALI_REGISTER_FILE_FORMAT_U32;
}
inputs.bifrost.static_rt_conv = true;
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
return shader;
}
static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
{
for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
return i;
}
unreachable("Invalid image format\n");
}
static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_buffer *buf,
const struct panvk_image *img,
const VkBufferImageCopy2 *region)
{
struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
unsigned minx = MAX2(region->imageOffset.x, 0);
unsigned miny = MAX2(region->imageOffset.y, 0);
unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
mali_ptr vpd =
panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
minx, miny, maxx, maxy);
float dst_rect[] = {
minx, miny, 0.0, 1.0,
maxx + 1, miny, 0.0, 1.0,
minx, maxy + 1, 0.0, 1.0,
maxx + 1, maxy + 1, 0.0, 1.0,
};
mali_ptr dst_coords =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
sizeof(dst_rect), 64);
struct panvk_meta_copy_format_info key = {
.imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
.mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
region->imageSubresource.aspectMask),
};
unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
const struct vk_image_buffer_layout buflayout =
vk_image_buffer_copy_layout(&img->vk, region);
struct panvk_meta_copy_buf2img_info info = {
.buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
.buf.stride.line = buflayout.row_stride_B,
.buf.stride.surf = buflayout.image_stride_B,
};
mali_ptr pushconsts =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
struct pan_image_view view = {
.format = key.imgfmt,
.dim = MALI_TEXTURE_DIMENSION_2D,
.image = &img->pimage,
.nr_samples = img->pimage.layout.nr_samples,
.first_level = region->imageSubresource.mipLevel,
.last_level = region->imageSubresource.mipLevel,
.swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
};
/* TODO: don't force preloads of dst resources if unneeded */
cmdbuf->state.fb.crc_valid[0] = false;
*fbinfo = (struct pan_fb_info){
.width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
.height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
.extent.minx = minx,
.extent.maxx = maxx,
.extent.miny = miny,
.extent.maxy = maxy,
.nr_samples = 1,
.rt_count = 1,
.rts[0].view = &view,
.rts[0].preload = true,
.rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
};
panvk_per_arch(cmd_close_batch)(cmdbuf);
assert(region->imageSubresource.layerCount == 1 ||
region->imageExtent.depth == 1);
assert(region->imageOffset.z >= 0);
unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
for (unsigned l = 0; l < nlayers; l++) {
float src_rect[] = {
0, 0, l, 1.0,
region->imageExtent.width, 0, l, 1.0,
0, region->imageExtent.height, l, 1.0,
region->imageExtent.width, region->imageExtent.height, l, 1.0,
};
mali_ptr src_coords =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
sizeof(src_rect), 64);
struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
view.first_layer = view.last_layer = l + first_layer;
batch->blit.src = buf->bo;
batch->blit.dst = img->pimage.data.bo;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
mali_ptr tsd, tiler;
tsd = batch->tls.gpu;
tiler = batch->tiler.descs.gpu;
struct panfrost_ptr job;
job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
src_coords, dst_coords,
0, 0, pushconsts,
vpd, rsd, tsd, tiler);
util_dynarray_append(&batch->jobs, void *, job.cpu);
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
}
static void
panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
{
STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
struct pan_shader_info shader_info;
mali_ptr shader =
panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
panvk_meta_copy_buf2img_fmts[i],
&shader_info);
dev->meta.copy.buf2img[i].rsd =
panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
shader, &shader_info,
panvk_meta_copy_buf2img_fmts[i].imgfmt,
panvk_meta_copy_buf2img_fmts[i].mask,
false);
}
}
void
panvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer,
const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]);
}
}
static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
{ PIPE_FORMAT_R8_UINT, 0x1 },
{ PIPE_FORMAT_R8G8_UINT, 0x3 },
{ PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
{ PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
{ PIPE_FORMAT_R16G16B16_UINT, 0x7 },
{ PIPE_FORMAT_R32G32_UINT, 0x3 },
{ PIPE_FORMAT_R32G32B32_UINT, 0x7 },
{ PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
/* S8 -> Z24S8 */
{ PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
/* S8 -> Z32_S8X24 */
{ PIPE_FORMAT_R32G32_UINT, 0x2 },
/* Z24X8 -> Z24S8 */
{ PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
/* Z32 -> Z32_S8X24 */
{ PIPE_FORMAT_R32G32_UINT, 0x1 },
};
static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
{
/* Pick blendable formats when we can, and the FLOAT variant matching the
* texelsize otherwise.
*/
switch (util_format_get_blocksize(imgfmt)) {
case 1: return PIPE_FORMAT_R8_UINT;
/* AFBC stores things differently for RGB565,
* we can't simply map to R8G8 in that case */
case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
case 6: return PIPE_FORMAT_R16G16B16_UINT;
case 8: return PIPE_FORMAT_R32G32_UINT;
case 12: return PIPE_FORMAT_R32G32B32_UINT;
case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
default: unreachable("Invalid format\n");
}
}
struct panvk_meta_copy_img2buf_info {
struct {
mali_ptr ptr;
struct {
unsigned line;
unsigned surf;
} stride;
} buf;
struct {
struct {
unsigned x, y, z;
} offset;
struct {
unsigned minx, miny, maxx, maxy;
} extent;
} img;
} PACKED;
#define panvk_meta_copy_img2buf_get_info_field(b, field) \
nir_load_push_constant((b), 1, \
sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
nir_imm_int(b, 0), \
.base = offsetof(struct panvk_meta_copy_img2buf_info, field), \
.range = ~0)
static mali_ptr
panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
struct panvk_meta_copy_format_info key,
unsigned texdim, unsigned texisarray,
struct pan_shader_info *shader_info)
{
unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
/* FIXME: Won't work on compute queues, but we can't do that with
* a compute shader if the destination is an AFBC surface.
*/
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
GENX(pan_shader_get_compiler_options)(),
"panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
texdim, texisarray ? "[]" : "",
util_format_name(key.imgfmt),
key.mask);
nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
nir_ssa_def *bufptr =
panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
nir_ssa_def *buflinestride =
panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
nir_ssa_def *bufsurfstride =
panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
nir_ssa_def *imgminx =
panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
nir_ssa_def *imgminy =
panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
nir_ssa_def *imgmaxx =
panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
nir_ssa_def *imgmaxy =
panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
nir_ssa_def *imgcoords, *inbounds;
switch (texdim + texisarray) {
case 1:
imgcoords =
nir_iadd(&b,
nir_channel(&b, coord, 0),
panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
inbounds =
nir_iand(&b,
nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
break;
case 2:
imgcoords =
nir_vec2(&b,
nir_iadd(&b,
nir_channel(&b, coord, 0),
panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
nir_iadd(&b,
nir_channel(&b, coord, 1),
panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
inbounds =
nir_iand(&b,
nir_iand(&b,
nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
nir_iand(&b,
nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
break;
case 3:
imgcoords =
nir_vec3(&b,
nir_iadd(&b,
nir_channel(&b, coord, 0),
panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
nir_iadd(&b,
nir_channel(&b, coord, 1),
panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
nir_iadd(&b,
nir_channel(&b, coord, 2),
panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
inbounds =
nir_iand(&b,
nir_iand(&b,
nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
nir_iand(&b,
nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
break;
default:
unreachable("Invalid texture dimension\n");
}
nir_push_if(&b, inbounds);
/* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
* blocks instead of 16x16 texels in that case, and there's nothing we can
* do to force the tile size to 4x4 in the render path.
* This being said, compressed textures are not compatible with AFBC, so we
* could use a compute shader arranging the blocks properly.
*/
nir_ssa_def *offset =
nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
offset = nir_iadd(&b, offset,
nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
offset = nir_iadd(&b, offset,
nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
unsigned imgcompsz = imgtexelsz <= 4 ?
1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
unsigned nimgcomps = imgtexelsz / imgcompsz;
assert(nimgcomps <= 4);
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
tex->op = nir_texop_txf;
tex->texture_index = 0;
tex->is_array = texisarray;
tex->dest_type = util_format_is_unorm(key.imgfmt) ?
nir_type_float32 : nir_type_uint32;
switch (texdim) {
case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
default: unreachable("Invalid texture dimension");
}
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(imgcoords);
tex->coord_components = texdim + texisarray;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
nir_alu_type_get_type_size(tex->dest_type), NULL);
nir_builder_instr_insert(&b, &tex->instr);
nir_ssa_def *texel = &tex->dest.ssa;
unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
unsigned nbufcomps = util_bitcount(fullmask);
if (key.mask != fullmask) {
nir_ssa_def *bufcomps[4];
nbufcomps = 0;
for (unsigned i = 0; i < nimgcomps; i++) {
if (key.mask & BITFIELD_BIT(i))
bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
}
texel = nir_vec(&b, bufcomps, nbufcomps);
}
unsigned bufcompsz = buftexelsz / nbufcomps;
if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
texel = nir_fmul(&b, texel,
nir_vec3(&b,
nir_imm_float(&b, 31),
nir_imm_float(&b, 63),
nir_imm_float(&b, 31)));
texel = nir_f2u16(&b, texel);
texel = nir_ior(&b, nir_channel(&b, texel, 0),
nir_ior(&b,
nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
imgcompsz = 2;
bufcompsz = 2;
nbufcomps = 1;
nimgcomps = 1;
} else if (imgcompsz == 1) {
nir_ssa_def *packed = nir_channel(&b, texel, 0);
for (unsigned i = 1; i < nbufcomps; i++) {
packed = nir_ior(&b, packed,
nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
nir_imm_int(&b, i * 8)));
}
texel = packed;
bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
nbufcomps = 1;
}
assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
assert(nbufcomps <= 4 && nimgcomps <= 4);
texel = nir_u2uN(&b, texel, bufcompsz * 8);
nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
nir_pop_if(&b, NULL);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.is_blit = true,
.no_ubo_to_push = true,
};
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
return shader;
}
static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
{
for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
return i;
}
unreachable("Invalid texel size\n");
}
static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_buffer *buf,
const struct panvk_image *img,
const VkBufferImageCopy2 *region)
{
struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
struct panvk_meta_copy_format_info key = {
.imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
.mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
region->imageSubresource.aspectMask),
};
unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
unsigned texdimidx =
panvk_meta_copy_tex_type(img->pimage.layout.dim,
img->pimage.layout.array_size > 1);
unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
struct panvk_meta_copy_img2buf_info info = {
.buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
.buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
.img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
.img.extent.minx = MAX2(region->imageOffset.x, 0),
.img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
};
if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
info.img.extent.maxy = region->imageSubresource.layerCount - 1;
} else {
info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
info.img.offset.z = MAX2(region->imageOffset.z, 0);
info.img.extent.miny = MAX2(region->imageOffset.y, 0);
info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
}
info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
info.buf.stride.line;
mali_ptr pushconsts =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
struct pan_image_view view = {
.format = key.imgfmt,
.dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
.image = &img->pimage,
.nr_samples = img->pimage.layout.nr_samples,
.first_level = region->imageSubresource.mipLevel,
.last_level = region->imageSubresource.mipLevel,
.first_layer = region->imageSubresource.baseArrayLayer,
.last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
.swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
};
mali_ptr texture =
panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
mali_ptr sampler =
panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
panvk_per_arch(cmd_close_batch)(cmdbuf);
struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
struct pan_tls_info tlsinfo = { 0 };
batch->blit.src = img->pimage.data.bo;
batch->blit.dst = buf->bo;
batch->tls =
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
mali_ptr tsd = batch->tls.gpu;
struct pan_compute_dim wg_sz = {
16,
img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1,
};
struct pan_compute_dim num_wg = {
(ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
region->imageSubresource.layerCount :
(ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
};
struct panfrost_ptr job =
panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
&batch->scoreboard, &num_wg, &wg_sz,
texture, sampler,
pushconsts, rsd, tsd);
util_dynarray_append(&batch->jobs, void *, job.cpu);
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
static void
panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
{
STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
for (unsigned texdim = 1; texdim <= 3; texdim++) {
unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
struct pan_shader_info shader_info;
mali_ptr shader =
panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
panvk_meta_copy_img2buf_fmts[i],
texdim, false, &shader_info);
dev->meta.copy.img2buf[texdimidx][i].rsd =
panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
&dev->meta.desc_pool.base,
shader, &shader_info, true);
if (texdim == 3)
continue;
memset(&shader_info, 0, sizeof(shader_info));
texdimidx = panvk_meta_copy_tex_type(texdim, true);
assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
shader =
panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
panvk_meta_copy_img2buf_fmts[i],
texdim, true, &shader_info);
dev->meta.copy.img2buf[texdimidx][i].rsd =
panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
&dev->meta.desc_pool.base,
shader, &shader_info, true);
}
}
}
void
panvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer,
const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]);
}
}
struct panvk_meta_copy_buf2buf_info {
mali_ptr src;
mali_ptr dst;
} PACKED;
#define panvk_meta_copy_buf2buf_get_info_field(b, field) \
nir_load_push_constant((b), 1, \
sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
nir_imm_int(b, 0), \
.base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \
.range = ~0)
static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
unsigned blksz,
struct pan_shader_info *shader_info)
{
/* FIXME: Won't work on compute queues, but we can't do that with
* a compute shader if the destination is an AFBC surface.
*/
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
GENX(pan_shader_get_compiler_options)(),
"panvk_meta_copy_buf2buf(blksz=%d)",
blksz);
nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
nir_ssa_def *offset =
nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
nir_ssa_def *srcptr =
nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
nir_ssa_def *dstptr =
nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
unsigned compsz = blksz < 4 ? blksz : 4;
unsigned ncomps = blksz / compsz;
nir_store_global(&b, dstptr, blksz,
nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
(1 << ncomps) - 1);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.is_blit = true,
.no_ubo_to_push = true,
};
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
return shader;
}
static void
panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
{
for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
struct pan_shader_info shader_info;
mali_ptr shader =
panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1 << i, &shader_info);
dev->meta.copy.buf2buf[i].rsd =
panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
shader, &shader_info, false);
}
}
static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_buffer *src,
const struct panvk_buffer *dst,
const VkBufferCopy2 *region)
{
struct panvk_meta_copy_buf2buf_info info = {
.src = panvk_buffer_gpu_ptr(src, region->srcOffset),
.dst = panvk_buffer_gpu_ptr(dst, region->dstOffset),
};
unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
unsigned log2blksz = alignment ? alignment - 1 : 4;
assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
mali_ptr pushconsts =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
panvk_per_arch(cmd_close_batch)(cmdbuf);
struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
mali_ptr tsd = batch->tls.gpu;
unsigned nblocks = region->size >> log2blksz;
struct pan_compute_dim num_wg = { nblocks, 1, 1 };
struct pan_compute_dim wg_sz = { 1, 1, 1};
struct panfrost_ptr job =
panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
&num_wg, &wg_sz,
0, 0, pushconsts, rsd, tsd);
util_dynarray_append(&batch->jobs, void *, job.cpu);
batch->blit.src = src->bo;
batch->blit.dst = dst->bo;
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
void
panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
const VkCopyBufferInfo2 *pCopyBufferInfo)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
}
}
struct panvk_meta_fill_buf_info {
mali_ptr start;
uint32_t val;
} PACKED;
#define panvk_meta_fill_buf_get_info_field(b, field) \
nir_load_push_constant((b), 1, \
sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
nir_imm_int(b, 0), \
.base = offsetof(struct panvk_meta_fill_buf_info, field), \
.range = ~0)
static mali_ptr
panvk_meta_fill_buf_shader(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
struct pan_shader_info *shader_info)
{
/* FIXME: Won't work on compute queues, but we can't do that with
* a compute shader if the destination is an AFBC surface.
*/
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
GENX(pan_shader_get_compiler_options)(),
"panvk_meta_fill_buf()");
nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
nir_ssa_def *offset =
nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
nir_ssa_def *ptr =
nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.is_blit = true,
.no_ubo_to_push = true,
};
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
return shader;
}
static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
struct pan_pool *bin_pool,
struct pan_pool *desc_pool)
{
struct pan_shader_info shader_info;
mali_ptr shader =
panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
struct panfrost_ptr rsd_ptr =
pan_pool_alloc_desc_aggregate(desc_pool,
PAN_DESC(RENDERER_STATE));
pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader_info, shader, &cfg);
}
return rsd_ptr.gpu;
}
static void
panvk_meta_fill_buf_init(struct panvk_physical_device *dev)
{
dev->meta.copy.fillbuf.rsd =
panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
&dev->meta.desc_pool.base);
}
static void
panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_buffer *dst,
VkDeviceSize size, VkDeviceSize offset,
uint32_t val)
{
struct panvk_meta_fill_buf_info info = {
.start = panvk_buffer_gpu_ptr(dst, offset),
.val = val,
};
size = panvk_buffer_range(dst, offset, size);
/* From the Vulkan spec:
*
* "size is the number of bytes to fill, and must be either a multiple
* of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
* the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
* buffer is not a multiple of 4, then the nearest smaller multiple is
* used."
*/
size &= ~3ull;
assert(!(offset & 3) && !(size & 3));
unsigned nwords = size / sizeof(uint32_t);
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
mali_ptr pushconsts =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
panvk_per_arch(cmd_close_batch)(cmdbuf);
struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
mali_ptr tsd = batch->tls.gpu;
struct pan_compute_dim num_wg = { nwords, 1, 1 };
struct pan_compute_dim wg_sz = { 1, 1, 1};
struct panfrost_ptr job =
panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
&num_wg, &wg_sz,
0, 0, pushconsts, rsd, tsd);
util_dynarray_append(&batch->jobs, void *, job.cpu);
batch->blit.dst = dst->bo;
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
void
panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize fillSize,
uint32_t data)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
}
static void
panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_buffer *dst, VkDeviceSize offset,
VkDeviceSize size, const void *data)
{
struct panvk_meta_copy_buf2buf_info info = {
.src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
.dst = panvk_buffer_gpu_ptr(dst, offset),
};
unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
mali_ptr rsd =
cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
mali_ptr pushconsts =
pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
panvk_per_arch(cmd_close_batch)(cmdbuf);
struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
mali_ptr tsd = batch->tls.gpu;
unsigned nblocks = size >> log2blksz;
struct pan_compute_dim num_wg = { nblocks, 1, 1 };
struct pan_compute_dim wg_sz = { 1, 1, 1};
struct panfrost_ptr job =
panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
&num_wg, &wg_sz,
0, 0, pushconsts, rsd, tsd);
util_dynarray_append(&batch->jobs, void *, job.cpu);
batch->blit.dst = dst->bo;
panvk_per_arch(cmd_close_batch)(cmdbuf);
}
void
panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void *pData)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
}
void
panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
{
panvk_meta_copy_img2img_init(dev, false);
panvk_meta_copy_img2img_init(dev, true);
panvk_meta_copy_buf2img_init(dev);
panvk_meta_copy_img2buf_init(dev);
panvk_meta_copy_buf2buf_init(dev);
panvk_meta_fill_buf_init(dev);
}