blob: 50bd9ee4b6fd131ed082c46a0b33fd4c67282592 [file]
/*
* Copyright © 2024 Collabora Ltd.
* Copyright © 2024 Arm Ltd.
*
* SPDX-License-Identifier: MIT
*/
#include "panvk_buffer.h"
#include "panvk_cmd_buffer.h"
#include "panvk_cmd_meta.h"
#include "panvk_device_memory.h"
#include "panvk_entrypoints.h"
#include "pan_desc.h"
#include "pan_util.h"
static void
att_set_clear_preload(const VkRenderingAttachmentInfo *att, bool *clear, bool *preload)
{
switch (att->loadOp) {
case VK_ATTACHMENT_LOAD_OP_CLEAR:
*clear = true;
break;
case VK_ATTACHMENT_LOAD_OP_LOAD:
*preload = true;
break;
case VK_ATTACHMENT_LOAD_OP_NONE:
break;
case VK_ATTACHMENT_LOAD_OP_DONT_CARE:
/* This is a very frustrating corner case. From the spec:
*
* VK_ATTACHMENT_STORE_OP_NONE specifies the contents within the
* render area are not accessed by the store operation as long as
* no values are written to the attachment during the render pass.
*
* With VK_ATTACHMENT_LOAD_OP_DONT_CARE + VK_ATTACHMENT_STORE_OP_NONE,
* we need to preserve the contents throughout partial renders. The
* easiest way to do that is forcing a preload, so that partial stores
* for unused attachments will be no-op'd by writing existing contents.
*
* TODO: disable preload when we have clean_pixel_write_enable = false
* as an optimization
*/
*preload |= att->storeOp == VK_ATTACHMENT_STORE_OP_NONE;
break;
default:
unreachable("Unsupported loadOp");
}
}
static void
render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingAttachmentInfo *att,
uint32_t index)
{
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
struct pan_fb_info *fbinfo = &state->render.fb.info;
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
struct panvk_image *img =
container_of(iview->vk.image, struct panvk_image, vk);
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(index);
state->render.color_attachments.iviews[index] = iview;
state->render.color_attachments.fmts[index] = iview->vk.format;
state->render.color_attachments.samples[index] = img->vk.samples;
#if PAN_ARCH < 9
for (uint8_t p = 0; p < ARRAY_SIZE(iview->pview.planes); p++) {
struct pan_image_plane_ref pref =
pan_image_view_get_plane(&iview->pview, p);
if (!pref.image)
continue;
assert(pref.plane_idx < ARRAY_SIZE(img->planes));
assert(img->planes[pref.plane_idx].mem->bo != NULL);
state->render.fb.bos[state->render.fb.bo_count++] =
img->planes[pref.plane_idx].mem->bo;
}
#endif
fbinfo->rts[index].view = &iview->pview;
fbinfo->rts[index].crc_valid = &state->render.fb.crc_valid[index];
state->render.fb.nr_samples =
MAX2(state->render.fb.nr_samples,
pan_image_view_get_nr_samples(&iview->pview));
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
enum pipe_format fmt = vk_format_to_pipe_format(iview->vk.format);
union pipe_color_union *col =
(union pipe_color_union *)&att->clearValue.color;
pan_pack_color(phys_dev->formats.blendable,
fbinfo->rts[index].clear_value, col, fmt, false);
}
att_set_clear_preload(att, &fbinfo->rts[index].clear,
&fbinfo->rts[index].preload);
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
struct panvk_resolve_attachment *resolve_info =
&state->render.color_attachments.resolve[index];
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
resolve_info->mode = att->resolveMode;
resolve_info->dst_iview = resolve_iview;
}
}
static void
render_state_set_z_attachment(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingAttachmentInfo *att)
{
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
struct pan_fb_info *fbinfo = &state->render.fb.info;
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
struct panvk_image *img =
container_of(iview->vk.image, struct panvk_image, vk);
#if PAN_ARCH < 9
/* Depth plane always comes first. */
state->render.fb.bos[state->render.fb.bo_count++] = img->planes[0].mem->bo;
#endif
state->render.z_attachment.fmt = iview->vk.format;
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
state->render.zs_pview = iview->pview;
fbinfo->zs.view.zs = &state->render.zs_pview;
/* D32_S8 is a multiplanar format, so we need to adjust the format of the
* depth-only view to match the one of the depth plane.
*/
if (iview->pview.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
state->render.zs_pview.format = PIPE_FORMAT_Z32_FLOAT;
state->render.zs_pview.planes[0] = (struct pan_image_plane_ref){
.image = &img->planes[0].image,
.plane_idx = 0,
};
state->render.zs_pview.planes[1] = (struct pan_image_plane_ref){0};
state->render.fb.nr_samples =
MAX2(state->render.fb.nr_samples,
pan_image_view_get_nr_samples(&iview->pview));
state->render.z_attachment.iview = iview;
/* D24S8 is a single plane format where the depth/stencil are interleaved.
* If we touch the depth component, we need to make sure the stencil
* component is preserved, hence the preload, and the view format adjusment.
*/
if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
fbinfo->zs.preload.s = true;
cmdbuf->state.gfx.render.zs_pview.format =
PIPE_FORMAT_Z24_UNORM_S8_UINT;
} else {
state->render.zs_pview.format =
vk_format_to_pipe_format(vk_format_depth_only(img->vk.format));
}
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth;
att_set_clear_preload(att, &fbinfo->zs.clear.z, &fbinfo->zs.preload.z);
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
struct panvk_resolve_attachment *resolve_info =
&state->render.z_attachment.resolve;
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
resolve_info->mode = att->resolveMode;
resolve_info->dst_iview = resolve_iview;
}
}
static void
render_state_set_s_attachment(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingAttachmentInfo *att)
{
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
struct pan_fb_info *fbinfo = &state->render.fb.info;
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
struct panvk_image *img =
container_of(iview->vk.image, struct panvk_image, vk);
#if PAN_ARCH < 9
/* The stencil plane is always last. */
state->render.fb.bos[state->render.fb.bo_count++] =
img->planes[img->plane_count - 1].mem->bo;
#endif
state->render.s_attachment.fmt = iview->vk.format;
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
state->render.s_pview = iview->pview;
fbinfo->zs.view.s = &state->render.s_pview;
/* D32_S8 is a multiplanar format, so we need to adjust the format of the
* stencil-only view to match the one of the stencil plane.
*/
state->render.s_pview.format = img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT
? PIPE_FORMAT_Z24_UNORM_S8_UINT
: PIPE_FORMAT_S8_UINT;
if (img->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
state->render.s_pview.planes[0] = (struct pan_image_plane_ref){0};
state->render.s_pview.planes[1] = (struct pan_image_plane_ref){
.image = &img->planes[1].image,
.plane_idx = 0,
};
} else {
state->render.s_pview.planes[0] = (struct pan_image_plane_ref){
.image = &img->planes[0].image,
.plane_idx = 0,
};
state->render.s_pview.planes[1] = (struct pan_image_plane_ref){0};
}
state->render.fb.nr_samples =
MAX2(state->render.fb.nr_samples,
pan_image_view_get_nr_samples(&iview->pview));
state->render.s_attachment.iview = iview;
/* If the depth and stencil attachments point to the same image,
* and the format is D24S8, we can combine them in a single view
* addressing both components.
*/
if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
state->render.z_attachment.iview &&
state->render.z_attachment.iview->vk.image == iview->vk.image) {
state->render.zs_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
fbinfo->zs.preload.s = false;
fbinfo->zs.view.s = NULL;
/* If there was no depth attachment, and the image format is D24S8,
* we use the depth+stencil slot, so we can benefit from AFBC, which
* is not supported on the stencil-only slot on Bifrost.
*/
} else if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
fbinfo->zs.view.zs == NULL) {
fbinfo->zs.view.zs = &state->render.s_pview;
state->render.s_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
fbinfo->zs.preload.z = true;
fbinfo->zs.view.s = NULL;
}
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
fbinfo->zs.clear_value.stencil = att->clearValue.depthStencil.stencil;
att_set_clear_preload(att, &fbinfo->zs.clear.s, &fbinfo->zs.preload.s);
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
struct panvk_resolve_attachment *resolve_info =
&state->render.s_attachment.resolve;
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
resolve_info->mode = att->resolveMode;
resolve_info->dst_iview = resolve_iview;
}
}
void
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *pRenderingInfo)
{
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
struct pan_fb_info *fbinfo = &state->render.fb.info;
uint32_t att_width = UINT32_MAX, att_height = UINT32_MAX;
state->render.flags = pRenderingInfo->flags;
BITSET_SET(state->dirty, PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE);
#if PAN_ARCH < 9
state->render.fb.bo_count = 0;
memset(state->render.fb.bos, 0, sizeof(state->render.fb.bos));
#endif
state->render.first_provoking_vertex = U_TRISTATE_UNSET;
#if PAN_ARCH >= 10
state->render.maybe_set_tds_provoking_vertex = NULL;
state->render.maybe_set_fbds_provoking_vertex = NULL;
#endif
memset(state->render.fb.crc_valid, 0, sizeof(state->render.fb.crc_valid));
memset(&state->render.color_attachments, 0,
sizeof(state->render.color_attachments));
memset(&state->render.z_attachment, 0, sizeof(state->render.z_attachment));
memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment));
state->render.bound_attachments = 0;
cmdbuf->state.gfx.render.layer_count = pRenderingInfo->viewMask ?
util_last_bit(pRenderingInfo->viewMask) :
pRenderingInfo->layerCount;
cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
*fbinfo = (struct pan_fb_info){
.tile_buf_budget = pan_query_optimal_tib_size(phys_dev->model),
.z_tile_buf_budget = pan_query_optimal_z_tib_size(phys_dev->model),
.nr_samples = 0,
.rt_count = pRenderingInfo->colorAttachmentCount,
};
cmdbuf->state.gfx.render.fb.nr_samples = 1;
assert(pRenderingInfo->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
const VkRenderingAttachmentInfo *att =
&pRenderingInfo->pColorAttachments[i];
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
if (!iview)
continue;
render_state_set_color_attachment(cmdbuf, att, i);
att_width = MIN2(iview->vk.extent.width, att_width);
att_height = MIN2(iview->vk.extent.height, att_height);
}
if (pRenderingInfo->pDepthAttachment &&
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE) {
const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
if (iview) {
assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
render_state_set_z_attachment(cmdbuf, att);
att_width = MIN2(iview->vk.extent.width, att_width);
att_height = MIN2(iview->vk.extent.height, att_height);
}
}
if (pRenderingInfo->pStencilAttachment &&
pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE) {
const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
if (iview) {
assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
render_state_set_s_attachment(cmdbuf, att);
att_width = MIN2(iview->vk.extent.width, att_width);
att_height = MIN2(iview->vk.extent.height, att_height);
}
}
fbinfo->extent.minx = pRenderingInfo->renderArea.offset.x;
fbinfo->extent.maxx = pRenderingInfo->renderArea.offset.x +
pRenderingInfo->renderArea.extent.width - 1;
fbinfo->extent.miny = pRenderingInfo->renderArea.offset.y;
fbinfo->extent.maxy = pRenderingInfo->renderArea.offset.y +
pRenderingInfo->renderArea.extent.height - 1;
if (state->render.bound_attachments) {
fbinfo->width = att_width;
fbinfo->height = att_height;
} else {
fbinfo->width = fbinfo->extent.maxx + 1;
fbinfo->height = fbinfo->extent.maxy + 1;
}
assert(fbinfo->width && fbinfo->height);
}
void
panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf)
{
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
/* In case we never emitted tiler/framebuffer descriptors, we emit the
* current sample count and compute tile size */
if (fbinfo->nr_samples == 0) {
fbinfo->nr_samples = cmdbuf->state.gfx.render.fb.nr_samples;
GENX(pan_select_tile_size)(fbinfo);
#if PAN_ARCH != 6
if (fbinfo->cbuf_allocation > fbinfo->tile_buf_budget) {
vk_perf(VK_LOG_OBJS(&cmdbuf->vk.base),
"Using too much tile-memory, disabling pipelining");
}
#endif
} else {
/* In case we already emitted tiler/framebuffer descriptors, we ensure
* that the sample count didn't change (this should never happen) */
assert(fbinfo->nr_samples == cmdbuf->state.gfx.render.fb.nr_samples);
}
}
void
panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf)
{
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
bool needs_resolve = false;
unsigned bound_atts = cmdbuf->state.gfx.render.bound_attachments;
unsigned color_att_count =
util_last_bit(bound_atts & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS);
VkRenderingAttachmentInfo color_atts[MAX_RTS];
for (uint32_t i = 0; i < color_att_count; i++) {
const struct panvk_resolve_attachment *resolve_info =
&cmdbuf->state.gfx.render.color_attachments.resolve[i];
struct panvk_image_view *src_iview =
cmdbuf->state.gfx.render.color_attachments.iviews[i];
color_atts[i] = (VkRenderingAttachmentInfo){
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = panvk_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
.resolveMode = resolve_info->mode,
.resolveImageView =
panvk_image_view_to_handle(resolve_info->dst_iview),
.resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
needs_resolve = true;
}
const struct panvk_resolve_attachment *resolve_info =
&cmdbuf->state.gfx.render.z_attachment.resolve;
struct panvk_image_view *src_iview =
cmdbuf->state.gfx.render.z_attachment.iview;
VkRenderingAttachmentInfo z_att = {
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = panvk_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
.resolveMode = resolve_info->mode,
.resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
.resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
needs_resolve = true;
resolve_info = &cmdbuf->state.gfx.render.s_attachment.resolve;
src_iview = cmdbuf->state.gfx.render.s_attachment.iview;
VkRenderingAttachmentInfo s_att = {
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = panvk_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
.resolveMode = resolve_info->mode,
.resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
.resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
needs_resolve = true;
if (!needs_resolve)
return;
#if PAN_ARCH >= 10
/* insert a barrier for resolve */
const VkMemoryBarrier2 mem_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
.dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT
};
const VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.memoryBarrierCount = 1,
.pMemoryBarriers = &mem_barrier,
};
panvk_per_arch(CmdPipelineBarrier2)(panvk_cmd_buffer_to_handle(cmdbuf),
&dep_info);
#endif
const VkRenderingInfo render_info = {
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
.renderArea =
{
.offset.x = fbinfo->extent.minx,
.offset.y = fbinfo->extent.miny,
.extent.width = fbinfo->extent.maxx - fbinfo->extent.minx + 1,
.extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1,
},
.layerCount = cmdbuf->state.gfx.render.layer_count,
.viewMask = cmdbuf->state.gfx.render.view_mask,
.colorAttachmentCount = color_att_count,
.pColorAttachments = color_atts,
.pDepthAttachment = &z_att,
.pStencilAttachment = &s_att,
};
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_cmd_meta_graphics_save_ctx save = {0};
panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save);
vk_meta_resolve_rendering(&cmdbuf->vk, &dev->meta, &render_info);
panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save);
}
void
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *render_info)
{
/* We force preloading for all active attachments when the render area is
* unaligned or when a barrier flushes prior draw calls in the middle of a
* render pass. The two cases can be distinguished by whether a
* render_info is provided.
*
* When the render area is unaligned, we force preloading to preserve
* contents falling outside of the render area. We also make sure the
* initial attachment clears are performed.
*/
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
struct pan_fb_info *fbinfo = &state->render.fb.info;
VkClearAttachment clear_atts[MAX_RTS + 2];
uint32_t clear_att_count = 0;
if (!state->render.bound_attachments)
return;
for (unsigned i = 0; i < fbinfo->rt_count; i++) {
if (!fbinfo->rts[i].view)
continue;
fbinfo->rts[i].preload = true;
if (fbinfo->rts[i].clear) {
if (render_info) {
const VkRenderingAttachmentInfo *att =
&render_info->pColorAttachments[i];
clear_atts[clear_att_count++] = (VkClearAttachment){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.colorAttachment = i,
.clearValue = att->clearValue,
};
}
fbinfo->rts[i].clear = false;
}
}
if (fbinfo->zs.view.zs) {
fbinfo->zs.preload.z = true;
if (fbinfo->zs.clear.z) {
if (render_info) {
const VkRenderingAttachmentInfo *att =
render_info->pDepthAttachment;
clear_atts[clear_att_count++] = (VkClearAttachment){
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
.clearValue = att->clearValue,
};
}
fbinfo->zs.clear.z = false;
}
}
if (fbinfo->zs.view.s ||
(fbinfo->zs.view.zs &&
util_format_is_depth_and_stencil(fbinfo->zs.view.zs->format))) {
fbinfo->zs.preload.s = true;
if (fbinfo->zs.clear.s) {
if (render_info) {
const VkRenderingAttachmentInfo *att =
render_info->pStencilAttachment;
clear_atts[clear_att_count++] = (VkClearAttachment){
.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
.clearValue = att->clearValue,
};
}
fbinfo->zs.clear.s = false;
}
}
#if PAN_ARCH >= 10
/* insert a barrier for preload */
const VkMemoryBarrier2 mem_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
.dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
};
const VkDependencyInfo dep_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.memoryBarrierCount = 1,
.pMemoryBarriers = &mem_barrier,
};
panvk_per_arch(CmdPipelineBarrier2)(panvk_cmd_buffer_to_handle(cmdbuf),
&dep_info);
#endif
if (clear_att_count && render_info) {
VkClearRect clear_rect = {
.rect = render_info->renderArea,
.baseArrayLayer = 0,
.layerCount = render_info->viewMask ? 1 : render_info->layerCount,
};
panvk_per_arch(CmdClearAttachments)(panvk_cmd_buffer_to_handle(cmdbuf),
clear_att_count, clear_atts, 1,
&clear_rect);
}
}
void
panvk_per_arch(cmd_preload_render_area_border)(
struct panvk_cmd_buffer *cmdbuf, const VkRenderingInfo *render_info)
{
const unsigned meta_tile_size = pan_meta_tile_size(PAN_ARCH);
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
struct pan_fb_info *fbinfo = &state->render.fb.info;
bool render_area_is_aligned =
((fbinfo->extent.minx | fbinfo->extent.miny) % meta_tile_size) == 0 &&
(fbinfo->extent.maxx + 1 == fbinfo->width ||
(fbinfo->extent.maxx % meta_tile_size) == (meta_tile_size - 1)) &&
(fbinfo->extent.maxy + 1 == fbinfo->height ||
(fbinfo->extent.maxy % meta_tile_size) == (meta_tile_size - 1));
/* If the render area is aligned on the meta tile size, we're good. */
if (!render_area_is_aligned)
panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info);
}
static void
prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals)
{
const struct vk_input_attachment_location_state *ial =
&cmdbuf->vk.dynamic_graphics_state.ial;
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
uint32_t catt_count =
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
? MAX_RTS
: ial->color_attachment_count;
memset(iam, ~0, sizeof(iam));
assert(catt_count <= MAX_RTS);
for (uint32_t i = 0; i < catt_count; i++) {
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED ||
!(cmdbuf->state.gfx.render.bound_attachments &
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)))
continue;
VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i];
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
struct mali_internal_conversion_packed conv;
uint32_t ia_idx = ial->color_map[i] + 1;
assert(ia_idx < ARRAY_SIZE(iam));
iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i);
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
cfg.memory_format =
GENX(pan_dithered_format_from_pipe_format)(pfmt, false);
#if PAN_ARCH < 9
cfg.register_format =
vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32
: vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32
: MALI_REGISTER_FILE_FORMAT_F32;
#endif
}
iam[ia_idx].conversion = conv.opaque[0];
}
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
uint32_t ia_idx =
ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1;
assert(ia_idx < ARRAY_SIZE(iam));
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
#if PAN_ARCH < 9
/* On v7, we need to pass the depth format around. If we use a conversion
* of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */
VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt;
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
struct mali_internal_conversion_packed conv;
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32;
cfg.memory_format =
GENX(pan_dithered_format_from_pipe_format)(pfmt, false);
}
iam[ia_idx].conversion = conv.opaque[0];
#endif
}
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
uint32_t ia_idx =
ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1;
assert(ia_idx < ARRAY_SIZE(iam));
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
}
for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++)
set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]);
}
/* This value has been selected to get
* dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing.
*/
#define MIN_DEPTH_CLIP_RANGE 37.7E-06f
void
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_info *info)
{
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
const struct panvk_shader *fs = get_fs(cmdbuf);
uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
set_gfx_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
noperspective_varyings);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.base_instance, info->instance.base);
#if PAN_ARCH < 9
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
info->vertex.raw_offset);
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
#endif
if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) {
set_gfx_sysval(cmdbuf, dirty_sysvals, blend.constants[i],
CLAMP(cb->blend_constants[i], 0.0f, 1.0f));
}
}
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
dyn_gfx_state_dirty(cmdbuf, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
const struct vk_rasterization_state *rs =
&cmdbuf->vk.dynamic_graphics_state.rs;
const struct vk_viewport_state *vp =
&cmdbuf->vk.dynamic_graphics_state.vp;
const VkViewport *viewport = &vp->viewports[0];
/* Doing the viewport transform in the vertex shader and then depth
* clipping with the viewport depth range gets a similar result to
* clipping in clip-space, but loses precision when the viewport depth
* range is very small. When minDepth == maxDepth, this completely
* flattens the clip-space depth and results in never clipping.
*
* To work around this, set a lower limit on depth range when clipping is
* enabled. This results in slightly incorrect fragment depth values, and
* doesn't help with the precision loss, but at least clipping isn't
* completely broken.
*/
float z_min = viewport->minDepth;
float z_max = viewport->maxDepth;
if (vk_rasterization_state_depth_clip_enable(rs) &&
fabsf(z_max - z_min) < MIN_DEPTH_CLIP_RANGE) {
float z_sign = z_min <= z_max ? 1.0f : -1.0f;
float z_center = 0.5f * (z_max + z_min);
/* Bump offset off-center if necessary, to not go out of range */
z_center = CLAMP(z_center, 0.5f * MIN_DEPTH_CLIP_RANGE,
1.0f - 0.5f * MIN_DEPTH_CLIP_RANGE);
z_min = z_center - 0.5f * z_sign * MIN_DEPTH_CLIP_RANGE;
z_max = z_center + 0.5f * z_sign * MIN_DEPTH_CLIP_RANGE;
}
/* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of
* section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
* end of the section, the spec defines:
*
* px = width
* py = height
* pz = maxDepth - minDepth if negativeOneToOne is false
* pz = (maxDepth - minDepth) / 2 if negativeOneToOne is true
*/
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.x,
0.5f * viewport->width);
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.y,
0.5f * viewport->height);
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.z,
vp->depth_clip_negative_one_to_one ?
0.5f * (z_max - z_min) : z_max - z_min);
/* Upload the viewport offset. Defined as (ox, oy, oz) at the start of
* section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
* end of the section, the spec defines:
*
* ox = x + width/2
* oy = y + height/2
* oz = minDepth if negativeOneToOne is false
* oz = (maxDepth + minDepth) / 2 if negativeOneToOne is true
*/
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.x,
(0.5f * viewport->width) + viewport->x);
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.y,
(0.5f * viewport->height) + viewport->y);
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.z,
vp->depth_clip_negative_one_to_one ?
0.5f * (z_min + z_max) : z_min);
}
if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP))
prepare_iam_sysvals(cmdbuf, dirty_sysvals);
const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
#if PAN_ARCH < 9
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
set_gfx_sysval(cmdbuf, dirty_sysvals,
desc.sets[PANVK_DESC_TABLE_VS_DYN_SSBOS],
vs_desc_state->dyn_ssbos);
}
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
set_gfx_sysval(cmdbuf, dirty_sysvals,
desc.sets[PANVK_DESC_TABLE_FS_DYN_SSBOS],
fs_desc_state->dyn_ssbos);
}
for (uint32_t i = 0; i < MAX_SETS; i++) {
uint32_t used_set_mask =
vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
if (used_set_mask & BITFIELD_BIT(i)) {
set_gfx_sysval(cmdbuf, dirty_sysvals, desc.sets[i],
desc_state->sets[i]->descs.dev);
}
}
#endif
/* We mask the dirty sysvals by the shader usage, and only flag
* the push uniforms dirty if those intersect. */
BITSET_DECLARE(dirty_shader_sysvals, MAX_SYSVAL_FAUS);
BITSET_AND(dirty_shader_sysvals, dirty_sysvals, vs->fau.used_sysvals);
if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
gfx_state_set_dirty(cmdbuf, VS_PUSH_UNIFORMS);
if (fs) {
BITSET_AND(dirty_shader_sysvals, dirty_sysvals, fs->fau.used_sysvals);
/* If blend constants are not read by the blend shader, we can consider
* they are not read at all, so clear the dirty bits to avoid re-emitting
* FAUs when we can. */
if (!cmdbuf->state.gfx.cb.info.shader_loads_blend_const)
BITSET_CLEAR_RANGE(dirty_shader_sysvals, 0, 3);
if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindVertexBuffers2)(VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer *pBuffers,
const VkDeviceSize *pOffsets,
const VkDeviceSize *pSizes,
const VkDeviceSize *pStrides)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
assert(firstBinding + bindingCount <= MAX_VBS);
if (pStrides) {
vk_cmd_set_vertex_binding_strides(&cmdbuf->vk, firstBinding,
bindingCount, pStrides);
}
for (uint32_t i = 0; i < bindingCount; i++) {
VK_FROM_HANDLE(panvk_buffer, buffer, pBuffers[i]);
if (buffer) {
cmdbuf->state.gfx.vb.bufs[firstBinding + i].address =
panvk_buffer_gpu_ptr(buffer, pOffsets[i]);
cmdbuf->state.gfx.vb.bufs[firstBinding + i].size = panvk_buffer_range(
buffer, pOffsets[i], pSizes ? pSizes[i] : VK_WHOLE_SIZE);
} else {
cmdbuf->state.gfx.vb.bufs[firstBinding + i].address = 0;
cmdbuf->state.gfx.vb.bufs[firstBinding + i].size = 0;
}
}
cmdbuf->state.gfx.vb.count =
MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount);
gfx_state_set_dirty(cmdbuf, VB);
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindIndexBuffer2)(VkCommandBuffer commandBuffer,
VkBuffer buffer, VkDeviceSize offset,
VkDeviceSize size, VkIndexType indexType)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_buffer, buf, buffer);
if (buf) {
cmdbuf->state.gfx.ib.size = panvk_buffer_range(buf, offset, size);
assert(cmdbuf->state.gfx.ib.size <= UINT32_MAX);
cmdbuf->state.gfx.ib.dev_addr = panvk_buffer_gpu_ptr(buf, offset);
#if PAN_ARCH < 9
cmdbuf->state.gfx.ib.host_addr =
buf && buf->host_ptr ? buf->host_ptr + offset : NULL;
#endif
} else {
cmdbuf->state.gfx.ib.size = 0;
/* In case of NullDescriptors, we need to set a non-NULL address and rely
* on out-of-bounds behavior against the zero size of the buffer. Note
* that this only works for v10+, as v9 does not have a way to specify the
* index buffer size. */
cmdbuf->state.gfx.ib.dev_addr = PAN_ARCH >= 10 ? 0x1000 : 0;
#if PAN_ARCH < 9
cmdbuf->state.gfx.ib.host_addr = 0;
#endif
}
cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType);
gfx_state_set_dirty(cmdbuf, IB);
}