blob: 908dca1b582d8f0e1f123a34da833cdf004ecc05 [file] [log] [blame]
/*
* Copyright © 2022 Imagination Technologies Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <assert.h>
#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <vulkan/vulkan.h>
#include "hwdef/rogue_hw_defs.h"
#include "hwdef/rogue_hw_utils.h"
#include "pvr_hw_pass.h"
#include "pvr_formats.h"
#include "pvr_private.h"
#include "util/bitset.h"
#include "util/list.h"
#include "util/macros.h"
#include "util/u_math.h"
#include "vk_alloc.h"
#include "vk_format.h"
#include "vk_log.h"
struct pvr_render_int_subpass {
/* Points to the input subpass. This is set to NULL when the subpass is
* unscheduled.
*/
struct pvr_render_subpass *subpass;
/* Count of other subpasses which have this subpass as a dependency. */
uint32_t out_subpass_count;
/* Pointers to the other subpasses which have this subpass as a dependency.
*/
struct pvr_render_int_subpass **out_subpasses;
/* Count of subpasses on which this subpass is dependent and which haven't
* been scheduled yet.
*/
uint32_t in_subpass_count;
};
struct pvr_renderpass_resource {
/* Resource type allocated for render target. */
enum usc_mrt_resource_type type;
union {
/* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
struct {
/* The output register to use. */
uint32_t output_reg;
/* The offset in bytes within the output register. */
uint32_t offset;
} reg;
/* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
struct {
/* The index of the tile buffer to use. */
uint32_t tile_buffer;
/* The offset (in dwords) within the tile buffer. */
uint32_t offset_dw;
} mem;
};
};
struct pvr_render_int_attachment {
/* Points to the corresponding input attachment. */
struct pvr_render_pass_attachment *attachment;
/* True if this attachment is referenced in the currently open render. */
bool is_used;
/* Operation to use when this attachment is non-resident and referenced as a
* color or depth attachment.
*/
VkAttachmentLoadOp load_op;
/* Operation to use for the stencil component when this attachment is
* non-resident and referenced as a color or depth attachment.
*/
VkAttachmentLoadOp stencil_load_op;
/* Count of uses of this attachment in unscheduled subpasses. */
uint32_t remaining_count;
/* Count of uses of the stencil component of this attachment in unscheduled
* subpasses.
*/
uint32_t stencil_remaining_count;
/* If this attachment has currently allocated on-chip storage then details of
* the allocated location.
*/
struct usc_mrt_resource resource;
/* Index of the subpass in the current render where the attachment is first
* used. VK_ATTACHMENT_UNUSED if the attachment isn't used in the current
* render.
*/
int32_t first_use;
/* Index of the subpass in the current render where the attachment is last
* used.
*/
int32_t last_use;
/* Index of the subpass (global) where the attachment is last read. */
int32_t last_read;
/* If this attachment has currently allocated on-chip storage then the entry
* in context.active_surf_list.
*/
struct list_head link;
/* During pvr_close_render: if this attachment has allocated on-chip storage
* then the index in pvr_renderpass_hwsetup_render.eot_setup.mrt_resources
* with details of the storage location. Otherwise -1.
*/
int32_t mrt_idx;
/* Index of the last render where the attachment was the source of an MSAA
* resolve.
*/
int32_t last_resolve_src_render;
/* Index of the last render where the attachment was the destination of an
* MSAA resolve.
*/
int32_t last_resolve_dst_render;
/* true if the attachment is used with a z replicate in the current render.
*/
bool z_replicate;
/* true if this attachment can be resolved by the PBE. */
bool is_pbe_downscalable;
/* true if this attachment requires an EOT attachment. */
bool eot_surf_required;
};
/* Which parts of the output registers/a tile buffer are currently allocated. */
struct pvr_renderpass_alloc_buffer {
/* Bit array. A bit is set if the corresponding dword is allocated. */
BITSET_DECLARE(allocs, 8U);
};
struct pvr_renderpass_alloc {
/* Which pixel output registers are allocated. */
struct pvr_renderpass_alloc_buffer output_reg;
/* Range of allocated output registers. */
uint32_t output_regs_count;
/* Number of tile buffers allocated. */
uint32_t tile_buffers_count;
/* Which parts of each tile buffer are allocated. Length is
* tile_buffers_count.
*/
struct pvr_renderpass_alloc_buffer *tile_buffers;
};
struct pvr_renderpass_subpass {
/* A pointer to the input subpass description. */
struct pvr_render_subpass *input_subpass;
/* true if the depth attachment for this subpass has z replication enabled.
*/
bool z_replicate;
/* Which pixel output registers/tile buffer locations are allocated during
* this subpass.
*/
struct pvr_renderpass_alloc alloc;
};
struct pvr_renderpass_context {
/* Internal information about each input attachment. */
struct pvr_render_int_attachment *int_attach;
/* Internal information about each input subpass. */
struct pvr_render_int_subpass *int_subpasses;
/* Input structure. */
struct pvr_render_pass *pass;
/* Output structure. */
struct pvr_renderpass_hwsetup *hw_setup;
/* In-progress render. */
struct pvr_renderpass_hwsetup_render *hw_render;
/* Information about each subpass in the current render. */
struct pvr_renderpass_subpass *subpasses;
/* Which parts of color storage are currently allocated. */
struct pvr_renderpass_alloc alloc;
/* Attachment which is currently allocated the on-chip depth/stencil. */
struct pvr_render_int_attachment *int_ds_attach;
/* Attachment which is loaded into the on-chip depth/stencil at the start of
* the render.
*/
struct pvr_render_int_attachment *ds_load_surface;
/* Attachment which the depth/stencil attachment should be resolved to at the
* end of the render.
*/
struct pvr_render_int_attachment *ds_resolve_surface;
/* Count of surfaces which are allocated on-chip color storage. */
uint32_t active_surfaces;
/* List of attachment/ranges which are allocated on-chip color storage. */
struct list_head active_surf_list;
const VkAllocationCallbacks *allocator;
};
struct pvr_render_int_subpass_dsts {
struct pvr_renderpass_resource *color;
struct pvr_renderpass_resource incoming_zrep;
struct pvr_renderpass_resource existing_zrep;
};
struct pvr_render_subpass_depth_params {
bool existing_ds_is_input;
bool incoming_ds_is_input;
uint32_t existing_ds_attach;
};
struct pvr_renderpass_storage_firstuse_buffer {
/* For each pixel output register/tile buffer location: true if the output
* register has been allocated in the current render.
*/
bool used[8U];
};
struct pvr_renderpass_storage_firstuse {
/* First use information for pixel output registers. */
struct pvr_renderpass_storage_firstuse_buffer output_reg;
/* First use information for tile buffers. */
struct pvr_renderpass_storage_firstuse_buffer *tile_buffers;
};
static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format)
{
if (util_format_has_depth(vk_format_description(vk_format)))
return vk_format_get_blocksizebits(vk_format);
if (!vk_format_has_stencil(vk_format))
return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
return 0;
}
/** Copy information about allocated color storage. */
static VkResult pvr_copy_alloc(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_alloc *dst,
struct pvr_renderpass_alloc *src)
{
dst->output_reg = src->output_reg;
dst->output_regs_count = src->output_regs_count;
dst->tile_buffers_count = src->tile_buffers_count;
if (dst->tile_buffers_count > 0U) {
dst->tile_buffers =
vk_alloc(ctx->allocator,
sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count,
8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!dst->tile_buffers)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
memcpy(dst->tile_buffers,
src->tile_buffers,
sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count);
} else {
dst->tile_buffers = NULL;
}
return VK_SUCCESS;
}
/** Free information about allocated color storage. */
static void pvr_free_alloc(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_alloc *alloc)
{
if (alloc->tile_buffers)
vk_free(ctx->allocator, alloc->tile_buffers);
memset(alloc, 0U, sizeof(*alloc));
}
static void pvr_reset_render(struct pvr_renderpass_context *ctx)
{
ctx->int_ds_attach = NULL;
ctx->active_surfaces = 0U;
list_inithead(&ctx->active_surf_list);
memset(&ctx->alloc.output_reg, 0U, sizeof(ctx->alloc.output_reg));
ctx->alloc.output_regs_count = 0U;
ctx->alloc.tile_buffers_count = 0U;
ctx->alloc.tile_buffers = NULL;
ctx->hw_render = NULL;
ctx->subpasses = NULL;
ctx->ds_load_surface = NULL;
}
/** Gets the amount of memory to allocate per-core for a tile buffer. */
static uint32_t
pvr_get_tile_buffer_size_per_core(const struct pvr_device *device)
{
uint32_t clusters =
PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, num_clusters, 1U);
/* Round the number of clusters up to the next power of two. */
if (!PVR_HAS_FEATURE(&device->pdevice->dev_info, tile_per_usc))
clusters = util_next_power_of_two(clusters);
/* Tile buffer is (total number of partitions across all clusters) * 16 * 16
* (quadrant size in pixels).
*/
return device->pdevice->dev_runtime_info.total_reserved_partition_size *
clusters * sizeof(uint32_t);
}
/**
* Gets the amount of memory to allocate for a tile buffer on the current BVNC.
*/
uint32_t pvr_get_tile_buffer_size(const struct pvr_device *device)
{
/* On a multicore system duplicate the buffer for each core. */
return pvr_get_tile_buffer_size_per_core(device) *
rogue_get_max_num_cores(&device->pdevice->dev_info);
}
static void
pvr_finalise_mrt_setup(const struct pvr_device *device,
struct pvr_renderpass_hwsetup_render *hw_render,
struct usc_mrt_setup *mrt)
{
mrt->num_output_regs = hw_render->output_regs_count;
mrt->num_tile_buffers = hw_render->tile_buffers_count;
mrt->tile_buffer_size = pvr_get_tile_buffer_size(device);
}
/**
* Copy information about the number of pixel output registers and tile buffers
* required for the current render to the output structure.
*/
static void pvr_finalise_po_alloc(const struct pvr_device *device,
struct pvr_renderpass_context *ctx)
{
struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
/* The number of output registers must be a power of two. */
hw_render->output_regs_count =
util_next_power_of_two(ctx->alloc.output_regs_count);
assert(ctx->alloc.tile_buffers_count <= ctx->pass->max_tilebuffer_count);
hw_render->tile_buffers_count = ctx->alloc.tile_buffers_count;
/* Copy the number of output registers and tile buffers to each subpass. */
for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_hwsetup_subpass *hw_subpass =
&hw_render->subpasses[i];
pvr_finalise_mrt_setup(device, hw_render, &hw_subpass->setup);
}
pvr_finalise_mrt_setup(device, hw_render, &hw_render->init_setup);
pvr_finalise_mrt_setup(device, hw_render, &hw_render->eot_setup);
}
/** Mark that device memory must be allocated for an attachment. */
static void pvr_mark_surface_alloc(struct pvr_renderpass_context *ctx,
struct pvr_render_int_attachment *int_attach)
{
const uint32_t attach_idx = int_attach - ctx->int_attach;
assert(attach_idx < ctx->pass->attachment_count);
ctx->hw_setup->surface_allocate[attach_idx] = true;
}
/**
* Check if there is space in a buffer for storing a render target of a
* specified size.
*/
static int32_t
pvr_is_space_in_buffer(const struct pvr_device_info *dev_info,
struct pvr_renderpass_alloc_buffer *buffer,
uint32_t pixel_size)
{
const uint32_t max_out_regs = rogue_get_max_output_regs_per_pixel(dev_info);
uint32_t alignment = 1U;
if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) {
/* For a 64-bit/128-bit source format: the start offset must be even. */
if (pixel_size == 2U || pixel_size == 4U)
alignment = 2U;
}
assert(pixel_size <= max_out_regs);
for (uint32_t i = 0U; i <= (max_out_regs - pixel_size); i += alignment) {
if (!BITSET_TEST_RANGE(buffer->allocs, i, i + pixel_size - 1U))
return i;
}
return -1;
}
static VkResult
pvr_surface_setup_render_init(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_storage_firstuse *first_use,
struct usc_mrt_resource const *resource,
struct pvr_render_pass_attachment *attachment,
VkAttachmentLoadOp load_op,
bool *use_render_init)
{
const uint32_t pixel_size =
DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
struct pvr_renderpass_storage_firstuse_buffer *buffer;
uint32_t start;
/* Check if this is the first use of all the allocated registers. */
if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
buffer = &first_use->output_reg;
start = resource->reg.output_reg;
} else {
assert(resource->mem.tile_buffer < ctx->alloc.tile_buffers_count);
buffer = &first_use->tile_buffers[resource->mem.tile_buffer];
start = resource->mem.offset_dw;
}
*use_render_init = true;
for (uint32_t i = 0U; i < pixel_size; i++) {
/* Don't initialize at the render level if the output registers were
* previously allocated a different attachment.
*/
if (buffer->used[start + i])
*use_render_init = false;
/* Don't use render init for future attachments allocated to the same
* registers.
*/
buffer->used[start + i] = true;
}
if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
*use_render_init = false;
if (*use_render_init) {
struct pvr_renderpass_colorinit *new_color_init;
struct usc_mrt_resource *new_mrt;
/* Initialize the storage at the start of the render. */
new_color_init = vk_realloc(ctx->allocator,
hw_render->color_init,
sizeof(hw_render->color_init[0U]) *
(hw_render->color_init_count + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!new_color_init)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
hw_render->color_init = new_color_init;
hw_render->color_init[hw_render->color_init_count].index =
attachment->index;
hw_render->color_init[hw_render->color_init_count].op = load_op;
/* Set the destination for the attachment load/clear. */
assert(hw_render->init_setup.num_render_targets ==
hw_render->color_init_count);
new_mrt = vk_realloc(ctx->allocator,
hw_render->init_setup.mrt_resources,
sizeof(hw_render->init_setup.mrt_resources[0U]) *
(hw_render->init_setup.num_render_targets + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!new_mrt)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
hw_render->init_setup.mrt_resources = new_mrt;
hw_render->init_setup
.mrt_resources[hw_render->init_setup.num_render_targets] = *resource;
hw_render->init_setup.num_render_targets++;
hw_render->color_init_count++;
}
return VK_SUCCESS;
}
static VkResult
pvr_subpass_setup_render_init(struct pvr_renderpass_context *ctx)
{
struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
struct pvr_renderpass_storage_firstuse first_use = { 0 };
bool first_ds = true;
VkResult result;
if (ctx->alloc.tile_buffers_count > 0U) {
first_use.tile_buffers = vk_zalloc(ctx->allocator,
sizeof(first_use.tile_buffers[0U]) *
ctx->alloc.tile_buffers_count,
8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!first_use.tile_buffers)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
}
for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_hwsetup_subpass *hw_subpass =
&hw_render->subpasses[i];
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
/* If this is the first depth attachment in the render then clear at the
* render level, not the subpass level.
*/
if (first_ds &&
(hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR ||
hw_subpass->stencil_clear)) {
struct pvr_render_int_attachment *int_ds_attach;
assert(input_subpass->depth_stencil_attachment !=
VK_ATTACHMENT_UNUSED);
assert(input_subpass->depth_stencil_attachment <
ctx->pass->attachment_count);
int_ds_attach =
&ctx->int_attach[input_subpass->depth_stencil_attachment];
assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
hw_render->ds_attach_idx == int_ds_attach->attachment->index);
hw_render->ds_attach_idx = int_ds_attach->attachment->index;
if (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)
hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
if (hw_subpass->stencil_clear) {
hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
hw_subpass->stencil_clear = false;
}
}
if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
first_ds = false;
for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
struct usc_mrt_resource *mrt = &hw_subpass->setup.mrt_resources[j];
const uint32_t attach_idx = input_subpass->color_attachments[j];
struct pvr_render_int_attachment *int_attach;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
assert(pvr_get_accum_format_bitsize(
int_attach->attachment->vk_format) > 0U);
/* Is this the first use of the attachment? */
if (int_attach->first_use == (int32_t)i) {
/* Set if we should initialize the attachment storage at the
* render level.
*/
bool use_render_init;
result = pvr_surface_setup_render_init(ctx,
&first_use,
mrt,
int_attach->attachment,
hw_subpass->color_initops[j],
&use_render_init);
if (result != VK_SUCCESS) {
vk_free(ctx->allocator, first_use.tile_buffers);
return result;
}
/* On success don't initialize the attachment at the subpass level.
*/
if (use_render_init)
hw_subpass->color_initops[j] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
} else {
/* This attachment is already present in on-chip storage so don't
* do anything.
*/
assert(hw_subpass->color_initops[j] ==
VK_ATTACHMENT_LOAD_OP_DONT_CARE);
}
}
}
vk_free(ctx->allocator, first_use.tile_buffers);
return VK_SUCCESS;
}
static void
pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer *buffer,
uint32_t start,
uint32_t pixel_size)
{
assert(!BITSET_TEST_RANGE(buffer->allocs, start, start + pixel_size - 1U));
BITSET_SET_RANGE(buffer->allocs, start, start + pixel_size - 1U);
}
static VkResult
pvr_mark_storage_allocated(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_alloc *alloc,
struct pvr_render_pass_attachment *attachment,
struct pvr_renderpass_resource *resource)
{
/* Number of dwords to allocate for the attachment. */
const uint32_t pixel_size =
DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
/* Update the locations used in the pixel output registers. */
pvr_mark_storage_allocated_in_buffer(&alloc->output_reg,
resource->reg.output_reg,
pixel_size);
/* Update the range of pixel output registers used. */
alloc->output_regs_count =
MAX2(alloc->output_regs_count, resource->reg.output_reg + pixel_size);
} else {
assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
if (resource->mem.tile_buffer >= alloc->tile_buffers_count) {
/* Grow the number of tile buffers. */
struct pvr_renderpass_alloc_buffer *new_tile_buffers = vk_realloc(
ctx->allocator,
alloc->tile_buffers,
sizeof(alloc->tile_buffers[0U]) * (resource->mem.tile_buffer + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!new_tile_buffers)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
alloc->tile_buffers = new_tile_buffers;
memset(
&alloc->tile_buffers[alloc->tile_buffers_count],
0U,
sizeof(alloc->tile_buffers[0U]) *
(resource->mem.tile_buffer + 1U - alloc->tile_buffers_count));
alloc->tile_buffers_count = resource->mem.tile_buffer + 1U;
assert(alloc->tile_buffers_count <= ctx->pass->max_tilebuffer_count);
}
/* Update the locations used in the tile buffer. */
pvr_mark_storage_allocated_in_buffer(
&alloc->tile_buffers[resource->mem.tile_buffer],
resource->mem.offset_dw,
pixel_size);
/* The hardware makes the bit depth of the on-chip storage and memory
* storage the same so make sure the memory storage is large enough to
* accommodate the largest render target.
*/
alloc->output_regs_count =
MAX2(alloc->output_regs_count, resource->mem.offset_dw + pixel_size);
}
return VK_SUCCESS;
}
static VkResult
pvr_surface_alloc_color_storage(const struct pvr_device_info *dev_info,
struct pvr_renderpass_context *ctx,
struct pvr_renderpass_alloc *alloc,
struct pvr_render_pass_attachment *attachment,
struct pvr_renderpass_resource *resource)
{
/* Number of dwords to allocate for the attachment. */
const uint32_t pixel_size =
DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
/* Try allocating pixel output registers. */
const int32_t output_reg =
pvr_is_space_in_buffer(dev_info, &alloc->output_reg, pixel_size);
if (output_reg != -1) {
resource->type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
resource->reg.output_reg = (uint32_t)output_reg;
resource->reg.offset = 0U;
} else {
uint32_t i;
/* Mark the attachment as using a tile buffer. */
resource->type = USC_MRT_RESOURCE_TYPE_MEMORY;
/* Try allocating from an existing tile buffer. */
for (i = 0U; i < alloc->tile_buffers_count; i++) {
const int32_t tile_buffer_offset =
pvr_is_space_in_buffer(dev_info,
&alloc->tile_buffers[i],
pixel_size);
if (tile_buffer_offset != -1) {
resource->mem.tile_buffer = i;
resource->mem.offset_dw = (uint32_t)tile_buffer_offset;
break;
}
}
if (i == alloc->tile_buffers_count) {
/* Check for reaching the maximum number of tile buffers. */
if (alloc->tile_buffers_count == ctx->pass->max_tilebuffer_count)
return vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
/* Use a newly allocated tile buffer. */
resource->mem.tile_buffer = i;
resource->mem.offset_dw = 0U;
}
}
/* Update which parts of the pixel outputs/tile buffers are used. */
return pvr_mark_storage_allocated(ctx, alloc, attachment, resource);
}
/** Free the storage allocated to an attachment. */
static void
pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer *buffer,
struct pvr_render_int_attachment *int_attach,
uint32_t start)
{
const uint32_t pixel_size = DIV_ROUND_UP(
pvr_get_accum_format_bitsize(int_attach->attachment->vk_format),
32U);
BITSET_CLEAR_RANGE(buffer->allocs, start, start + pixel_size - 1U);
}
/** Free the storage allocated to an attachment. */
static void
pvr_free_surface_storage(struct pvr_renderpass_context *ctx,
struct pvr_render_int_attachment *int_attach)
{
struct usc_mrt_resource *resource = &int_attach->resource;
struct pvr_renderpass_alloc *alloc = &ctx->alloc;
assert(resource->type != USC_MRT_RESOURCE_TYPE_INVALID);
/* Mark the storage as free. */
if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
pvr_free_buffer_storage(&alloc->output_reg,
int_attach,
resource->reg.output_reg);
} else {
struct pvr_renderpass_alloc_buffer *tile_buffer;
assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
assert(resource->mem.tile_buffer < alloc->tile_buffers_count);
tile_buffer = &alloc->tile_buffers[resource->mem.tile_buffer];
pvr_free_buffer_storage(tile_buffer, int_attach, resource->mem.offset_dw);
}
/* Mark that the attachment doesn't have allocated storage. */
resource->type = USC_MRT_RESOURCE_TYPE_INVALID;
/* Remove from the list of surfaces with allocated on-chip storage. */
assert(ctx->active_surfaces > 0U);
ctx->active_surfaces--;
list_del(&int_attach->link);
}
static void pvr_reset_surface(struct pvr_renderpass_context *ctx,
struct pvr_render_int_attachment *int_attach)
{
/* Reset information about the range of uses. */
int_attach->first_use = int_attach->last_use = -1;
int_attach->z_replicate = false;
pvr_free_surface_storage(ctx, int_attach);
}
static void
pvr_make_surface_active(struct pvr_renderpass_context *ctx,
struct pvr_render_int_attachment *int_attach,
uint32_t subpass_num)
{
/* Add to the list of surfaces with on-chip storage. */
assert(int_attach->first_use == -1);
int_attach->first_use = subpass_num;
ctx->active_surfaces++;
list_addtail(&int_attach->link, &ctx->active_surf_list);
}
/**
* For a subpass copy details of storage locations for the input/color to the
* output structure.
*/
static VkResult
pvr_copy_storage_details(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_hwsetup_subpass *hw_subpass,
struct pvr_renderpass_subpass *subpass)
{
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
const uint32_t max_rts =
input_subpass->color_count + input_subpass->input_count;
VkResult result;
if (max_rts == 0)
return VK_SUCCESS;
hw_subpass->setup.mrt_resources =
vk_zalloc(ctx->allocator,
sizeof(hw_subpass->setup.mrt_resources[0U]) * max_rts,
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!hw_subpass->setup.mrt_resources) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_copy_storage_details;
}
for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
const uint32_t attach_idx = input_subpass->color_attachments[i];
struct pvr_render_int_attachment *int_attach;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
/* Record for the subpass where the color attachment is stored. */
assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
hw_subpass->setup.mrt_resources[i] = int_attach->resource;
}
hw_subpass->setup.num_render_targets = input_subpass->color_count;
if (input_subpass->input_count == 0)
return VK_SUCCESS;
/* For this subpass's input attachments. */
hw_subpass->input_access = vk_alloc(ctx->allocator,
sizeof(hw_subpass->input_access[0U]) *
input_subpass->input_count,
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!hw_subpass->input_access) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_copy_storage_details;
}
for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
const uint32_t attach_idx = input_subpass->input_attachments[i];
struct pvr_render_int_attachment *int_attach;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID) {
bool is_color = false;
/* Access the input attachment from on-chip storage. */
if (int_attach->z_replicate) {
hw_subpass->input_access[i].type =
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE;
} else {
hw_subpass->input_access[i].type =
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP;
}
/* If this attachment is also a color attachment then point to the
* color attachment's resource.
*/
for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
if (input_subpass->color_attachments[j] == (int32_t)attach_idx) {
hw_subpass->input_access[i].on_chip_rt = j;
is_color = true;
break;
}
}
if (!is_color) {
const uint32_t num_rts = hw_subpass->setup.num_render_targets;
hw_subpass->input_access[i].on_chip_rt = num_rts;
hw_subpass->setup.num_render_targets++;
/* Record the location of the storage for the attachment. */
hw_subpass->setup.mrt_resources[num_rts] = int_attach->resource;
}
} else {
/* Access the input attachment from memory. */
hw_subpass->input_access[i].type =
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
hw_subpass->input_access[i].on_chip_rt = -1;
}
}
return VK_SUCCESS;
end_copy_storage_details:
if (hw_subpass->input_access) {
vk_free(ctx->allocator, hw_subpass->input_access);
hw_subpass->input_access = NULL;
}
if (hw_subpass->setup.mrt_resources) {
vk_free(ctx->allocator, hw_subpass->setup.mrt_resources);
hw_subpass->setup.mrt_resources = NULL;
}
return result;
}
/**
* For a subpass copy details of any storage location for a replicated version
* of the depth attachment to the output structure.
*/
static VkResult
pvr_copy_z_replicate_details(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_hwsetup_subpass *hw_subpass,
struct pvr_renderpass_subpass *subpass)
{
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
struct pvr_render_int_attachment *int_ds_attach;
uint32_t z_replicate;
bool found = false;
assert(input_subpass->depth_stencil_attachment >= 0U &&
input_subpass->depth_stencil_attachment <
(int32_t)ctx->pass->attachment_count);
int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
assert(hw_subpass->z_replicate == -1);
/* Is the replicated depth also an input attachment? */
for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
const uint32_t attach_idx = input_subpass->input_attachments[i];
struct pvr_render_int_attachment *int_attach;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
if (int_attach == int_ds_attach) {
z_replicate = hw_subpass->input_access[i].on_chip_rt;
found = true;
break;
}
}
if (!found)
z_replicate = hw_subpass->setup.num_render_targets;
/* If the Z replicate attachment isn't also an input attachment then grow the
* array of locations.
*/
assert(z_replicate <= hw_subpass->setup.num_render_targets);
if (z_replicate == hw_subpass->setup.num_render_targets) {
struct usc_mrt_resource *mrt =
vk_realloc(ctx->allocator,
hw_subpass->setup.mrt_resources,
sizeof(hw_subpass->setup.mrt_resources[0U]) *
(hw_subpass->setup.num_render_targets + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!mrt)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
hw_subpass->setup.mrt_resources = mrt;
hw_subpass->setup.num_render_targets++;
}
/* Copy the location of the Z replicate. */
assert(int_ds_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
hw_subpass->setup.mrt_resources[z_replicate] = int_ds_attach->resource;
hw_subpass->z_replicate = z_replicate;
return VK_SUCCESS;
}
static void pvr_dereference_surface(struct pvr_renderpass_context *ctx,
int32_t attach_idx,
uint32_t subpass_num)
{
struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
assert(int_attach->remaining_count > 0U);
int_attach->remaining_count--;
if (int_attach->remaining_count == 0U) {
if (int_attach->first_use != -1)
int_attach->last_use = subpass_num;
if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID)
pvr_free_surface_storage(ctx, int_attach);
}
if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
assert(int_attach->stencil_remaining_count > 0U);
int_attach->stencil_remaining_count--;
}
}
static void pvr_free_render(struct pvr_renderpass_context *ctx)
{
pvr_free_alloc(ctx, &ctx->alloc);
if (ctx->subpasses) {
for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++)
pvr_free_alloc(ctx, &ctx->subpasses[i].alloc);
vk_free(ctx->allocator, ctx->subpasses);
ctx->subpasses = NULL;
}
}
static bool pvr_render_has_side_effects(struct pvr_renderpass_context *ctx)
{
struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
struct pvr_render_pass *pass = ctx->pass;
if ((hw_render->depth_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
hw_render->depth_store) ||
(hw_render->stencil_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
hw_render->stencil_store)) {
return true;
}
for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
const struct pvr_renderpass_hwsetup_eot_surface *eot_attach =
&hw_render->eot_surfaces[i];
const struct pvr_render_pass_attachment *attachment =
&pass->attachments[eot_attach->attachment_idx];
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR &&
attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) {
return true;
}
if (eot_attach->need_resolve)
return true;
}
return false;
}
static VkResult pvr_close_render(const struct pvr_device *device,
struct pvr_renderpass_context *ctx)
{
struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
struct pvr_renderpass_hwsetup_eot_surface *eot_attach;
struct usc_mrt_setup *eot_setup;
int32_t mrt_idx;
VkResult result;
/* Render already closed. */
if (!hw_render)
return VK_SUCCESS;
/* Setup render and allocate resources for color/depth loads and clears. */
result = pvr_subpass_setup_render_init(ctx);
if (result != VK_SUCCESS)
return result;
/* Reset surfaces whose last use was in the current render. */
list_for_each_entry_safe (struct pvr_render_int_attachment,
int_attach,
&ctx->active_surf_list,
link) {
if (int_attach->last_use != -1) {
assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
pvr_reset_surface(ctx, int_attach);
}
}
/* Check if the depth attachment has uses in future subpasses. */
if (ctx->int_ds_attach) {
/* Store the depth to the attachment at the end of the render. */
if (ctx->int_ds_attach->remaining_count > 0U)
hw_render->depth_store = true;
/* Store the stencil to the attachment at the end of the render. */
if (ctx->int_ds_attach->stencil_remaining_count > 0U)
hw_render->stencil_store = true;
if (hw_render->depth_store || hw_render->stencil_store) {
assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
hw_render->ds_attach_idx ==
ctx->int_ds_attach->attachment->index);
hw_render->ds_attach_idx = ctx->int_ds_attach->attachment->index;
/* Allocate memory for the attachment. */
pvr_mark_surface_alloc(ctx, ctx->int_ds_attach);
}
/* Load the depth and stencil before the next use. */
ctx->int_ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
ctx->int_ds_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
}
eot_setup = &hw_render->eot_setup;
memset(eot_setup, 0U, sizeof(*eot_setup));
/* Set the number of pixel output registers/tile buffers allocated for the
* render and copy the information to all subpasses and the EOT program.
*/
pvr_finalise_po_alloc(device, ctx);
/* If any attachment are used with z replicate then they will be stored to by
* the ISP. So remove them from the list to store to using the PBE.
*/
list_for_each_entry_safe (struct pvr_render_int_attachment,
int_attach,
&ctx->active_surf_list,
link) {
if (int_attach->z_replicate)
pvr_reset_surface(ctx, int_attach);
}
/* Number of surfaces with allocated on-chip storage. */
eot_setup->num_render_targets = ctx->active_surfaces;
eot_setup->mrt_resources = vk_alloc(ctx->allocator,
sizeof(eot_setup->mrt_resources[0U]) *
eot_setup->num_render_targets,
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!eot_setup->mrt_resources)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* Record the location of the on-chip storage. */
mrt_idx = 0U;
list_for_each_entry_safe (struct pvr_render_int_attachment,
int_attach,
&ctx->active_surf_list,
link) {
assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
assert(int_attach->remaining_count > 0U);
if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
assert(int_attach->stencil_remaining_count > 0U);
/* Copy the location of the source data for this attachment. */
eot_setup->mrt_resources[mrt_idx] = int_attach->resource;
assert(int_attach->mrt_idx == -1);
int_attach->mrt_idx = mrt_idx;
mrt_idx++;
}
assert(mrt_idx == (int32_t)eot_setup->num_render_targets);
hw_render->eot_surface_count = 0U;
hw_render->pbe_emits = 0U;
/* Count the number of surfaces to store to at the end of the subpass. */
for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
const uint32_t resolve_output =
input_subpass->resolve_attachments
? input_subpass->resolve_attachments[j]
: VK_ATTACHMENT_UNUSED;
struct pvr_render_int_attachment *color_attach;
if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
continue;
color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
if (list_is_linked(&color_attach->link)) {
uint32_t rem_count = resolve_output == VK_ATTACHMENT_UNUSED ? 0U
: 1U;
/* If a color attachment is resolved it will have an extra
* remaining usage.
*/
if (color_attach->remaining_count > rem_count &&
!color_attach->eot_surf_required) {
color_attach->eot_surf_required = true;
hw_render->eot_surface_count++;
}
}
if (resolve_output != VK_ATTACHMENT_UNUSED) {
struct pvr_render_int_attachment *int_resolve_attach =
&ctx->int_attach[resolve_output];
if (!int_resolve_attach->eot_surf_required) {
int_resolve_attach->eot_surf_required = true;
hw_render->eot_surface_count++;
}
}
}
}
assert(hw_render->eot_surface_count <= 16U);
hw_render->eot_surfaces = vk_alloc(ctx->allocator,
sizeof(hw_render->eot_surfaces[0U]) *
hw_render->eot_surface_count,
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!hw_render->eot_surfaces)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
eot_attach = hw_render->eot_surfaces;
for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
const uint32_t resolve_output =
input_subpass->resolve_attachments
? input_subpass->resolve_attachments[j]
: VK_ATTACHMENT_UNUSED;
struct pvr_render_int_attachment *color_attach;
if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
continue;
color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
if (resolve_output != VK_ATTACHMENT_UNUSED) {
struct pvr_render_int_attachment *resolve_src =
&ctx->int_attach[input_subpass->color_attachments[j]];
struct pvr_render_int_attachment *resolve_dst =
&ctx->int_attach[resolve_output];
assert(resolve_dst->eot_surf_required);
resolve_dst->eot_surf_required = false;
/* Dereference the source to the resolve. */
assert(resolve_src->remaining_count > 0U);
resolve_src->remaining_count--;
/* Allocate device memory for the resolve destination. */
pvr_mark_surface_alloc(ctx, resolve_dst);
/* The attachment has been written so load the attachment the
* next time it is referenced.
*/
resolve_dst->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
eot_attach->mrt_idx = resolve_src->mrt_idx;
eot_attach->attachment_idx = resolve_dst->attachment->index;
eot_attach->src_attachment_idx = resolve_src->attachment->index;
eot_attach->need_resolve = true;
if (!resolve_src->is_pbe_downscalable) {
/* Resolve src must be stored for transfer resolve. */
assert(resolve_src->remaining_count > 0U);
eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
} else if (resolve_src->remaining_count == 0U) {
eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
hw_render->pbe_emits++;
} else {
eot_attach->resolve_type = PVR_RESOLVE_TYPE_INVALID;
}
eot_attach++;
}
if (color_attach->eot_surf_required) {
assert(color_attach->remaining_count > 0U);
pvr_mark_surface_alloc(ctx, color_attach);
assert(color_attach->mrt_idx >= 0);
assert(color_attach->mrt_idx <
(int32_t)hw_render->eot_setup.num_render_targets);
eot_attach->mrt_idx = color_attach->mrt_idx;
eot_attach->attachment_idx = color_attach->attachment->index;
eot_attach->need_resolve = false;
eot_attach++;
hw_render->pbe_emits++;
color_attach->eot_surf_required = false;
}
}
}
assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
/* Count the number of extra resolves we can do through the PBE. */
for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
eot_attach = &hw_render->eot_surfaces[i];
if (eot_attach->need_resolve &&
eot_attach->resolve_type == PVR_RESOLVE_TYPE_INVALID) {
if (hw_render->pbe_emits == PVR_NUM_PBE_EMIT_REGS) {
eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
} else {
eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
hw_render->pbe_emits++;
}
}
}
assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
/* Check for side effects in the final render. */
hw_render->has_side_effects = pvr_render_has_side_effects(ctx);
/* Reset active surfaces. */
list_for_each_entry_safe (struct pvr_render_int_attachment,
int_attach,
&ctx->active_surf_list,
link) {
int_attach->mrt_idx = -1;
pvr_reset_surface(ctx, int_attach);
}
assert(ctx->active_surfaces == 0U);
assert(list_is_empty(&ctx->active_surf_list));
pvr_free_render(ctx);
pvr_reset_render(ctx);
return VK_SUCCESS;
}
static bool pvr_is_input(struct pvr_render_subpass *subpass,
uint32_t attach_idx)
{
if (attach_idx == VK_ATTACHMENT_UNUSED)
return false;
for (uint32_t i = 0U; i < subpass->input_count; i++) {
if (subpass->input_attachments[i] == attach_idx)
return true;
}
return false;
}
static bool
pvr_depth_zls_conflict(struct pvr_renderpass_context *ctx,
struct pvr_render_int_attachment *int_ds_attach,
bool existing_ds_is_input)
{
if (!ctx->int_ds_attach)
return false;
/* No conflict if the incoming subpass doesn't have a depth/stencil
* attachment.
*/
if (!int_ds_attach)
return false;
/* No conflict if the incoming depth/stencil attachment is the same as the
* existing one.
*/
if (ctx->int_ds_attach == int_ds_attach)
return false;
/* If the existing depth/stencil attachment is used later, then we can't
* overwrite it.
*
* The exception is if the only use is as an input attachment in the incoming
* subpass in which case we can use the Z replicate feature to save the
* value.
*/
if (ctx->int_ds_attach->remaining_count > 0U &&
!(existing_ds_is_input && ctx->int_ds_attach->remaining_count == 1U)) {
return true;
}
if (ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
ctx->int_ds_attach->stencil_remaining_count > 0U) {
return true;
}
/* We can't load midrender so fail if the new depth/stencil attachment is
* already initialized.
*/
if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
return true;
if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
return true;
}
return false;
}
static void
pvr_set_surface_resource(struct pvr_render_int_attachment *int_attach,
struct pvr_renderpass_resource *resource)
{
int_attach->resource.type = resource->type;
switch (resource->type) {
case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
int_attach->resource.reg.output_reg = resource->reg.output_reg;
int_attach->resource.reg.offset = resource->reg.offset;
break;
case USC_MRT_RESOURCE_TYPE_MEMORY:
int_attach->resource.mem.tile_buffer = resource->mem.tile_buffer;
int_attach->resource.mem.offset_dw = resource->mem.offset_dw;
break;
default:
break;
}
}
static bool pvr_equal_resources(struct pvr_renderpass_resource *resource1,
struct pvr_renderpass_resource *resource2)
{
if (resource1->type != resource2->type)
return false;
switch (resource1->type) {
case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
return resource1->reg.output_reg == resource2->reg.output_reg &&
resource1->reg.offset == resource2->reg.offset;
case USC_MRT_RESOURCE_TYPE_MEMORY:
return resource1->mem.tile_buffer == resource2->mem.tile_buffer &&
resource1->mem.offset_dw == resource2->mem.offset_dw;
default:
return true;
}
}
static VkResult
pvr_enable_z_replicate(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_hwsetup_render *hw_render,
int32_t replicate_attach_idx,
struct pvr_renderpass_resource *replicate_dst)
{
struct pvr_render_int_attachment *int_attach =
&ctx->int_attach[replicate_attach_idx];
int32_t first_use = -1;
/* If Z replication was already enabled for the attachment then nothing more
* to do.
*/
if (!int_attach->z_replicate) {
/* Copy details of the storage for the replicated value to the attachment.
*/
assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
assert(replicate_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
pvr_set_surface_resource(int_attach, replicate_dst);
} else {
assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
assert(replicate_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
}
/* Find the first subpass where the attachment is written. */
for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
if (input_subpass->depth_stencil_attachment == replicate_attach_idx) {
first_use = i;
break;
}
}
assert(first_use >= 0);
/* For all subpasses from the first write. */
for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
/* If the subpass writes to the attachment then enable z replication. */
if (input_subpass->depth_stencil_attachment == replicate_attach_idx &&
!subpass->z_replicate) {
subpass->z_replicate = true;
if (i != (hw_render->subpass_count - 1U)) {
/* Copy the details of the storage for replicated value. */
const VkResult result =
pvr_copy_z_replicate_details(ctx,
&ctx->hw_render->subpasses[i],
subpass);
if (result != VK_SUCCESS)
return result;
}
}
}
if (!int_attach->z_replicate) {
/* Add the storage for the replicated value to locations in use at each
* subpass.
*/
for (uint32_t i = first_use; i < (hw_render->subpass_count - 1U); i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
pvr_mark_storage_allocated(ctx,
&subpass->alloc,
int_attach->attachment,
replicate_dst);
}
/* Add the depth attachment to the list of surfaces with allocated
* storage.
*/
pvr_make_surface_active(ctx, int_attach, first_use);
int_attach->z_replicate = true;
}
return VK_SUCCESS;
}
static bool pvr_is_pending_resolve_dest(struct pvr_renderpass_context *ctx,
uint32_t attach_idx)
{
struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
return int_attach->last_resolve_dst_render != -1 &&
int_attach->last_resolve_dst_render ==
(int32_t)(ctx->hw_setup->render_count - 1U);
}
static bool pvr_is_pending_resolve_src(struct pvr_renderpass_context *ctx,
uint32_t attach_idx)
{
struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
return int_attach->last_resolve_src_render != -1 &&
int_attach->last_resolve_src_render ==
(int32_t)(ctx->hw_setup->render_count - 1U);
}
static bool pvr_exceeds_pbe_registers(struct pvr_renderpass_context *ctx,
struct pvr_render_subpass *subpass)
{
int32_t live_outputs[PVR_NUM_PBE_EMIT_REGS];
uint32_t num_live_outputs = 0U;
/* Count all color outputs so far. */
for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++) {
struct pvr_render_subpass *input_subpass =
ctx->subpasses[i].input_subpass;
for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
const uint32_t global_color_attach =
input_subpass->color_attachments[j];
struct pvr_render_int_attachment *int_attach;
bool found = false;
if (global_color_attach == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[global_color_attach];
if (int_attach->last_read <= (int32_t)subpass->index)
continue;
for (uint32_t k = 0U; k < num_live_outputs; k++) {
if (live_outputs[k] == global_color_attach) {
found = true;
break;
}
}
if (!found)
live_outputs[num_live_outputs++] = global_color_attach;
}
}
assert(num_live_outputs <= PVR_NUM_PBE_EMIT_REGS);
/* Check if adding all the color outputs of the new subpass to the render
* would exceed the limit.
*/
for (uint32_t i = 0U; i < subpass->color_count; i++) {
const uint32_t global_color_attach = subpass->color_attachments[i];
struct pvr_render_int_attachment *int_attach;
bool found = false;
if (global_color_attach == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[global_color_attach];
if (int_attach->last_read <= (int32_t)subpass->index)
continue;
for (uint32_t j = 0U; j < num_live_outputs; j++) {
if (live_outputs[j] == global_color_attach) {
found = true;
break;
}
}
if (!found) {
if (num_live_outputs >= PVR_NUM_PBE_EMIT_REGS)
return true;
live_outputs[num_live_outputs++] = global_color_attach;
}
}
return false;
}
static void pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer *dst,
struct pvr_renderpass_alloc_buffer *src)
{
for (uint32_t i = 0U; i < ARRAY_SIZE(dst->allocs); i++)
dst->allocs[i] |= src->allocs[i];
}
static VkResult pvr_merge_alloc(struct pvr_renderpass_context *ctx,
struct pvr_renderpass_alloc *dst,
struct pvr_renderpass_alloc *src)
{
pvr_merge_alloc_buffer(&dst->output_reg, &src->output_reg);
dst->output_regs_count =
MAX2(dst->output_regs_count, src->output_regs_count);
if (dst->tile_buffers_count < src->tile_buffers_count) {
struct pvr_renderpass_alloc_buffer *new_tile_buffers =
vk_realloc(ctx->allocator,
dst->tile_buffers,
sizeof(dst->tile_buffers[0U]) * src->tile_buffers_count,
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!new_tile_buffers)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
dst->tile_buffers = new_tile_buffers;
memset(dst->tile_buffers + dst->tile_buffers_count,
0U,
sizeof(dst->tile_buffers[0U]) *
(src->tile_buffers_count - dst->tile_buffers_count));
dst->tile_buffers_count = src->tile_buffers_count;
}
for (uint32_t i = 0U; i < src->tile_buffers_count; i++)
pvr_merge_alloc_buffer(&dst->tile_buffers[i], &src->tile_buffers[i]);
return VK_SUCCESS;
}
static VkResult
pvr_is_z_replicate_space_available(const struct pvr_device_info *dev_info,
struct pvr_renderpass_context *ctx,
struct pvr_renderpass_alloc *alloc,
uint32_t attach_idx,
struct pvr_renderpass_resource *resource)
{
struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
struct pvr_render_int_attachment *int_attach;
struct pvr_renderpass_alloc combined_alloc;
uint32_t first_use;
VkResult result;
/* If z replication was already enabled by a previous subpass then storage
* will already be allocated.
*/
assert(attach_idx < ctx->pass->attachment_count);
int_attach = &ctx->int_attach[attach_idx];
if (int_attach->z_replicate) {
assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
return VK_SUCCESS;
}
/* Get the registers used in any subpass after the depth is first written.
* Start with registers used in the incoming subpass.
*/
result = pvr_copy_alloc(ctx, &combined_alloc, alloc);
if (result != VK_SUCCESS)
return result;
if (hw_render) {
/* Find the subpass where the depth is first written. */
first_use = hw_render->subpass_count;
for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
struct pvr_render_subpass *input_subpass = subpass->input_subpass;
if (input_subpass->depth_stencil_attachment == (int32_t)attach_idx) {
first_use = i;
break;
}
}
/* Merge in registers used in previous subpasses. */
for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
result = pvr_merge_alloc(ctx, &combined_alloc, &subpass->alloc);
if (result != VK_SUCCESS) {
pvr_free_alloc(ctx, &combined_alloc);
return result;
}
}
}
result = pvr_surface_alloc_color_storage(dev_info,
ctx,
&combined_alloc,
int_attach->attachment,
resource);
pvr_free_alloc(ctx, &combined_alloc);
if (result != VK_SUCCESS)
return result;
return pvr_mark_storage_allocated(ctx,
alloc,
int_attach->attachment,
resource);
}
static VkResult
pvr_is_subpass_space_available(const struct pvr_device_info *dev_info,
struct pvr_renderpass_context *ctx,
struct pvr_render_subpass *subpass,
struct pvr_render_subpass_depth_params *sp_depth,
struct pvr_renderpass_alloc *alloc,
struct pvr_render_int_subpass_dsts *sp_dsts)
{
VkResult result;
/* Mark pointers in return structures as not allocated. */
sp_dsts->color = NULL;
alloc->tile_buffers = NULL;
/* Allocate space for which locations are in use after this subpass. */
result = pvr_copy_alloc(ctx, alloc, &ctx->alloc);
if (result != VK_SUCCESS)
return result;
/* Allocate space to store our results. */
if (subpass->color_count > 0U) {
sp_dsts->color =
vk_alloc(ctx->allocator,
sizeof(sp_dsts->color[0U]) * subpass->color_count,
8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!sp_dsts->color) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto err_free_alloc;
}
} else {
sp_dsts->color = NULL;
}
sp_dsts->existing_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
sp_dsts->incoming_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
for (uint32_t i = 0U; i < subpass->color_count; i++) {
const uint32_t attach_idx = subpass->color_attachments[i];
struct pvr_render_int_attachment *int_attach;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
assert(pvr_get_accum_format_bitsize(int_attach->attachment->vk_format) >
0U);
/* Is the attachment not allocated on-chip storage? */
if (int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID) {
result = pvr_surface_alloc_color_storage(dev_info,
ctx,
alloc,
int_attach->attachment,
&sp_dsts->color[i]);
if (result != VK_SUCCESS)
goto err_free_alloc;
/* Avoid merging subpasses which result in tile buffers having to be
* used. The benefit of merging must be weighed against the cost of
* writing/reading to tile buffers.
*/
if (ctx->hw_render &&
sp_dsts->color[i].type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
result = vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
goto err_free_alloc;
}
} else {
sp_dsts->color[i].type = USC_MRT_RESOURCE_TYPE_INVALID;
}
}
if (sp_depth->existing_ds_is_input) {
result = pvr_is_z_replicate_space_available(dev_info,
ctx,
alloc,
sp_depth->existing_ds_attach,
&sp_dsts->existing_zrep);
if (result != VK_SUCCESS)
goto err_free_alloc;
}
if (sp_depth->incoming_ds_is_input) {
if (sp_depth->existing_ds_attach != subpass->depth_stencil_attachment) {
result = pvr_is_z_replicate_space_available(
dev_info,
ctx,
alloc,
subpass->depth_stencil_attachment,
&sp_dsts->incoming_zrep);
if (result != VK_SUCCESS)
goto err_free_alloc;
} else {
sp_dsts->incoming_zrep = sp_dsts->existing_zrep;
}
}
return VK_SUCCESS;
err_free_alloc:
pvr_free_alloc(ctx, alloc);
if (sp_dsts->color)
vk_free(ctx->allocator, sp_dsts->color);
sp_dsts->color = NULL;
return result;
}
static bool
pvr_can_combine_with_render(const struct pvr_device_info *dev_info,
struct pvr_renderpass_context *ctx,
struct pvr_render_subpass *subpass,
struct pvr_render_subpass_depth_params *sp_depth,
struct pvr_render_int_attachment *int_ds_attach,
struct pvr_renderpass_alloc *new_alloc,
struct pvr_render_int_subpass_dsts *sp_dsts)
{
VkResult result;
bool ret;
/* Mark pointers in return structures as not allocated. */
sp_dsts->color = NULL;
new_alloc->tile_buffers = NULL;
/* The hardware doesn't support replicating the stencil, so we need to store
* the depth to memory if a stencil attachment is used as an input
* attachment.
*/
if (sp_depth->existing_ds_is_input &&
ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
return false;
}
if (sp_depth->incoming_ds_is_input && int_ds_attach &&
int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
ctx->hw_render) {
return false;
}
/* Can't mix multiple sample counts into same render. */
if (ctx->hw_render &&
ctx->hw_render->sample_count != subpass->sample_count) {
return false;
}
/* If the depth is used by both the render and the incoming subpass and
* either the existing depth must be saved or the new depth must be loaded
* then we can't merge.
*/
ret = pvr_depth_zls_conflict(ctx,
int_ds_attach,
sp_depth->existing_ds_is_input);
if (ret)
return false;
/* Check if any of the subpass's dependencies are marked that the two
* subpasses can't be in the same render.
*/
for (uint32_t i = 0U; i < subpass->dep_count; i++) {
const uint32_t dep = subpass->dep_list[i];
if (subpass->flush_on_dep[i] && ctx->hw_setup->subpass_map[dep].render ==
(ctx->hw_setup->render_count - 1U)) {
return false;
}
}
/* Check if one of the input/color attachments is written by an MSAA resolve
* in an existing subpass in the current render.
*/
for (uint32_t i = 0U; i < subpass->input_count; i++) {
const uint32_t attach_idx = subpass->input_attachments[i];
if (attach_idx != VK_ATTACHMENT_UNUSED &&
pvr_is_pending_resolve_dest(ctx, attach_idx)) {
return false;
}
}
for (uint32_t i = 0U; i < subpass->color_count; i++) {
if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED &&
(pvr_is_pending_resolve_dest(ctx, subpass->color_attachments[i]) ||
pvr_is_pending_resolve_src(ctx, subpass->color_attachments[i]))) {
return false;
}
if (subpass->resolve_attachments &&
subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED &&
pvr_is_pending_resolve_dest(ctx, subpass->resolve_attachments[i])) {
return false;
}
}
/* No chance of exceeding PBE registers in a single subpass. */
if (ctx->hw_render) {
ret = pvr_exceeds_pbe_registers(ctx, subpass);
if (ret)
return false;
}
/* Check we can allocate storage for the new subpass's color attachments and
* any z replications.
*/
result = pvr_is_subpass_space_available(dev_info,
ctx,
subpass,
sp_depth,
new_alloc,
sp_dsts);
if (result != VK_SUCCESS)
return false;
return true;
}
static VkResult
pvr_merge_subpass(const struct pvr_device *device,
struct pvr_renderpass_context *ctx,
struct pvr_render_subpass *input_subpass,
struct pvr_renderpass_hwsetup_subpass **const hw_subpass_out)
{
struct pvr_renderpass_hwsetup_subpass *new_hw_subpasses;
struct pvr_renderpass_hwsetup_subpass *hw_subpass;
struct pvr_render_int_attachment *int_ds_attach;
struct pvr_renderpass_hwsetup_render *hw_render;
struct pvr_render_subpass_depth_params sp_depth;
struct pvr_renderpass_subpass *new_subpasses;
struct pvr_render_int_subpass_dsts sp_dsts;
struct pvr_renderpass_subpass *subpass;
struct pvr_renderpass_alloc alloc;
VkResult result;
bool ret;
/* Depth attachment for the incoming subpass. */
if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
else
int_ds_attach = NULL;
/* Attachment ID for the existing depth attachment. */
if (ctx->int_ds_attach)
sp_depth.existing_ds_attach = ctx->int_ds_attach - ctx->int_attach;
else
sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
/* Is the incoming depth attachment used as an input to the incoming subpass?
*/
sp_depth.incoming_ds_is_input =
pvr_is_input(input_subpass, input_subpass->depth_stencil_attachment);
/* Is the current depth attachment used as an input to the incoming subpass?
*/
sp_depth.existing_ds_is_input =
pvr_is_input(input_subpass, sp_depth.existing_ds_attach);
/* Can the incoming subpass be combined with the existing render? Also checks
* if space is available for the subpass results and return the allocated
* locations.
*/
ret = pvr_can_combine_with_render(&device->pdevice->dev_info,
ctx,
input_subpass,
&sp_depth,
int_ds_attach,
&alloc,
&sp_dsts);
if (!ret) {
result = pvr_close_render(device, ctx);
if (result != VK_SUCCESS)
goto end_merge_subpass;
sp_depth.existing_ds_is_input = false;
sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
/* Allocate again in a new render. */
result = pvr_is_subpass_space_available(&device->pdevice->dev_info,
ctx,
input_subpass,
&sp_depth,
&alloc,
&sp_dsts);
assert(result != VK_ERROR_TOO_MANY_OBJECTS);
if (result != VK_SUCCESS)
goto end_merge_subpass;
}
/* If there isn't an in-progress render then allocate one. */
if (!ctx->hw_render) {
struct pvr_renderpass_hwsetup *hw_setup = ctx->hw_setup;
struct pvr_renderpass_hwsetup_render *new_hw_render = vk_realloc(
ctx->allocator,
hw_setup->renders,
sizeof(hw_setup->renders[0U]) * (hw_setup->render_count + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!new_hw_render) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_merge_subpass;
}
hw_setup->renders = new_hw_render;
ctx->hw_render = &hw_setup->renders[hw_setup->render_count];
memset(ctx->hw_render, 0U, sizeof(*hw_render));
ctx->hw_render->ds_attach_idx = VK_ATTACHMENT_UNUSED;
hw_setup->render_count++;
ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
ctx->hw_render->sample_count = input_subpass->sample_count;
}
/* Allocate a new subpass in the in-progress render. */
hw_render = ctx->hw_render;
new_hw_subpasses = vk_realloc(ctx->allocator,
hw_render->subpasses,
sizeof(hw_render->subpasses[0U]) *
(hw_render->subpass_count + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!new_hw_subpasses) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_merge_subpass;
}
hw_render->subpasses = new_hw_subpasses;
hw_subpass = &hw_render->subpasses[hw_render->subpass_count];
new_subpasses =
vk_realloc(ctx->allocator,
ctx->subpasses,
sizeof(ctx->subpasses[0U]) * (hw_render->subpass_count + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!new_subpasses) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_merge_subpass;
}
ctx->subpasses = new_subpasses;
subpass = &ctx->subpasses[hw_render->subpass_count];
subpass->input_subpass = input_subpass;
subpass->z_replicate = false;
/* Save the allocation state at the subpass. */
result = pvr_copy_alloc(ctx, &subpass->alloc, &alloc);
if (result != VK_SUCCESS)
goto end_merge_subpass;
hw_render->subpass_count++;
memset(hw_subpass, 0U, sizeof(*hw_subpass));
hw_subpass->index = input_subpass->index;
hw_subpass->z_replicate = -1;
hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
if (int_ds_attach && ctx->int_ds_attach != int_ds_attach) {
bool setup_render_ds = false;
bool stencil_load = false;
bool depth_load = false;
if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
depth_load = true;
setup_render_ds = true;
hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_LOAD;
hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_LOAD;
assert(!ctx->ds_load_surface);
ctx->ds_load_surface = int_ds_attach;
} else if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_CLEAR;
}
if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
if (int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
stencil_load = true;
setup_render_ds = true;
hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_LOAD;
} else if (int_ds_attach->stencil_load_op ==
VK_ATTACHMENT_LOAD_OP_CLEAR) {
hw_subpass->stencil_clear = true;
}
}
/* If the depth is loaded then allocate external memory for the depth
* attachment.
*/
if (depth_load || stencil_load)
pvr_mark_surface_alloc(ctx, int_ds_attach);
if (setup_render_ds) {
assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED);
hw_render->ds_attach_idx = int_ds_attach->attachment->index;
}
ctx->int_ds_attach = int_ds_attach;
}
/* Set up the initialization operations for subpasses. */
hw_subpass->color_initops = vk_alloc(ctx->allocator,
sizeof(hw_subpass->color_initops[0U]) *
input_subpass->color_count,
8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!hw_subpass->color_initops) {
result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_merge_subpass;
}
for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
const uint32_t attach_idx = input_subpass->color_attachments[i];
struct pvr_render_int_attachment *int_attach;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
if (int_attach->first_use == -1) {
hw_subpass->color_initops[i] = int_attach->load_op;
/* If the attachment is loaded then off-chip memory must be
* allocated for it.
*/
if (int_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
pvr_mark_surface_alloc(ctx, int_attach);
/* The attachment has been written so load the attachment the next
* time it is referenced.
*/
int_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
} else {
hw_subpass->color_initops[i] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
}
/* Copy the destinations allocated for the color attachments. */
for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
const uint32_t attach_idx = input_subpass->color_attachments[i];
struct pvr_render_int_attachment *int_attach;
struct pvr_renderpass_resource *attach_dst;
if (attach_idx == VK_ATTACHMENT_UNUSED)
continue;
int_attach = &ctx->int_attach[attach_idx];
attach_dst = &sp_dsts.color[i];
if (int_attach->first_use == -1) {
assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
assert(attach_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
pvr_set_surface_resource(int_attach, attach_dst);
/* If this attachment is being used for the first time then add it
* to the active list.
*/
pvr_make_surface_active(ctx,
int_attach,
hw_render->subpass_count - 1U);
} else {
assert(attach_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
}
}
/* We can't directly read the on-chip depth so mark subpasses where the depth
* is written to replicate the value into part of the color storage.
*/
if (sp_depth.existing_ds_is_input) {
result = pvr_enable_z_replicate(ctx,
hw_render,
sp_depth.existing_ds_attach,
&sp_dsts.existing_zrep);
if (result != VK_SUCCESS)
goto end_merge_subpass;
}
if (sp_depth.incoming_ds_is_input) {
if (input_subpass->depth_stencil_attachment !=
sp_depth.existing_ds_attach) {
result =
pvr_enable_z_replicate(ctx,
hw_render,
input_subpass->depth_stencil_attachment,
&sp_dsts.incoming_zrep);
if (result != VK_SUCCESS)
goto end_merge_subpass;
} else {
assert(pvr_equal_resources(&sp_dsts.existing_zrep,
&sp_dsts.incoming_zrep));
}
}
/* Copy the locations of color/input attachments to the output structure.
* N.B. Need to do this after Z replication in case the replicated depth is
* an input attachment for the incoming subpass.
*/
result = pvr_copy_storage_details(ctx, hw_subpass, subpass);
if (result != VK_SUCCESS)
goto end_merge_subpass;
if (subpass->z_replicate) {
result = pvr_copy_z_replicate_details(ctx, hw_subpass, subpass);
if (result != VK_SUCCESS)
goto end_merge_subpass;
}
/* Copy the allocation at the subpass. This will then be updated if this was
* last use of any attachment.
*/
pvr_free_alloc(ctx, &ctx->alloc);
ctx->alloc = alloc;
/* Free information about subpass destinations. */
if (sp_dsts.color)
vk_free(ctx->allocator, sp_dsts.color);
*hw_subpass_out = hw_subpass;
return VK_SUCCESS;
end_merge_subpass:
if (sp_dsts.color)
vk_free(ctx->allocator, sp_dsts.color);
pvr_free_alloc(ctx, &alloc);
return result;
}
static void
pvr_dereference_color_output_list(struct pvr_renderpass_context *ctx,
uint32_t subpass_num,
struct pvr_render_subpass *subpass)
{
for (uint32_t i = 0U; i < subpass->color_count; i++) {
const uint32_t attach_idx = subpass->color_attachments[i];
if (attach_idx != VK_ATTACHMENT_UNUSED)
pvr_dereference_surface(ctx, attach_idx, subpass_num);
}
}
static void pvr_dereference_surface_list(struct pvr_renderpass_context *ctx,
uint32_t subpass_num,
uint32_t *attachments,
uint32_t count)
{
for (uint32_t i = 0U; i < count; i++) {
if (attachments[i] != VK_ATTACHMENT_UNUSED)
pvr_dereference_surface(ctx, attachments[i], subpass_num);
}
}
static VkResult pvr_schedule_subpass(const struct pvr_device *device,
struct pvr_renderpass_context *ctx,
uint32_t subpass_idx)
{
struct pvr_renderpass_hwsetup_subpass *hw_subpass;
struct pvr_renderpass_hwsetup_render *hw_render;
struct pvr_render_int_subpass *int_subpass;
struct pvr_render_subpass *subpass;
uint32_t subpass_num;
VkResult result;
int_subpass = &ctx->int_subpasses[subpass_idx];
subpass = int_subpass->subpass;
result = pvr_merge_subpass(device, ctx, subpass, &hw_subpass);
if (result != VK_SUCCESS)
return result;
hw_render = ctx->hw_render;
subpass_num = hw_render->subpass_count - 1U;
/* Record where the subpass was scheduled. */
ctx->hw_setup->subpass_map[subpass_idx].render =
ctx->hw_setup->render_count - 1U;
ctx->hw_setup->subpass_map[subpass_idx].subpass = subpass_num;
/* Check this subpass was the last use of any attachments. */
pvr_dereference_color_output_list(ctx, subpass_num, subpass);
pvr_dereference_surface_list(ctx,
subpass_num,
subpass->input_attachments,
subpass->input_count);
if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
struct pvr_render_int_attachment *int_depth_attach =
&ctx->int_attach[subpass->depth_stencil_attachment];
assert(int_depth_attach->remaining_count > 0U);
int_depth_attach->remaining_count--;
if (int_depth_attach->remaining_count == 0U) {
if (int_depth_attach->first_use != -1)
int_depth_attach->last_use = subpass_num;
if (int_depth_attach->z_replicate)
pvr_free_surface_storage(ctx, int_depth_attach);
}
if (int_depth_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
assert(int_depth_attach->stencil_remaining_count > 0U);
int_depth_attach->stencil_remaining_count--;
}
/* The depth attachment has initialized data so load it from memory if it
* is referenced again.
*/
int_depth_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
int_depth_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
}
/* Mark surfaces which have been the source or destination of an MSAA resolve
* in the current render.
*/
for (uint32_t i = 0U; i < subpass->color_count; i++) {
struct pvr_render_int_attachment *resolve_src;
struct pvr_render_int_attachment *resolve_dst;
if (!subpass->resolve_attachments)
break;
if (subpass->resolve_attachments[i] == VK_ATTACHMENT_UNUSED)
continue;
assert(subpass->color_attachments[i] <
(int32_t)ctx->pass->attachment_count);
resolve_src = &ctx->int_attach[subpass->color_attachments[i]];
assert(subpass->resolve_attachments[i] <
(int32_t)ctx->pass->attachment_count);
resolve_dst = &ctx->int_attach[subpass->resolve_attachments[i]];
/* Mark the resolve source. */
assert(resolve_src->last_resolve_src_render <
(int32_t)(ctx->hw_setup->render_count - 1U));
resolve_src->last_resolve_src_render = ctx->hw_setup->render_count - 1U;
/* Mark the resolve destination. */
assert(resolve_dst->last_resolve_dst_render <
(int32_t)(ctx->hw_setup->render_count - 1U));
resolve_dst->last_resolve_dst_render = ctx->hw_setup->render_count - 1U;
/* If we can't down scale through the PBE then the src must be stored
* for transfer down scale.
*/
if (!resolve_src->is_pbe_downscalable &&
resolve_src->last_read < (int32_t)ctx->pass->subpass_count) {
resolve_src->last_read = (int32_t)ctx->pass->subpass_count;
resolve_src->remaining_count++;
}
}
/* For subpasses dependent on this subpass decrement the unscheduled
* dependency count.
*/
for (uint32_t i = 0U; i < int_subpass->out_subpass_count; i++) {
struct pvr_render_int_subpass *int_dst_subpass =
int_subpass->out_subpasses[i];
assert(int_dst_subpass->in_subpass_count > 0U);
int_dst_subpass->in_subpass_count--;
}
return VK_SUCCESS;
}
static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
uint32_t size,
uint32_t attach_idx)
{
uint32_t count = 0U;
for (uint32_t i = 0U; i < size; i++) {
if (attachments[i] == attach_idx)
count++;
}
return count;
}
static uint32_t
pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
uint32_t attach_idx)
{
uint32_t count = 0U;
for (uint32_t i = 0U; i < subpass->color_count; i++) {
if (subpass->color_attachments[i] == attach_idx) {
count++;
if (subpass->resolve_attachments &&
subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
count++;
}
}
return count;
}
void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
struct pvr_renderpass_hwsetup *hw_setup)
{
for (uint32_t i = 0U; i < hw_setup->render_count; i++) {
struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
vk_free(alloc, hw_render->eot_surfaces);
vk_free(alloc, hw_render->eot_setup.mrt_resources);
vk_free(alloc, hw_render->init_setup.mrt_resources);
vk_free(alloc, hw_render->color_init);
for (uint32_t j = 0U; j < hw_render->subpass_count; j++) {
struct pvr_renderpass_hwsetup_subpass *subpass =
&hw_render->subpasses[j];
vk_free(alloc, subpass->color_initops);
vk_free(alloc, subpass->input_access);
vk_free(alloc, subpass->setup.mrt_resources);
}
vk_free(alloc, hw_render->subpasses);
}
vk_free(alloc, hw_setup->renders);
vk_free(alloc, hw_setup);
}
VkResult pvr_create_renderpass_hwsetup(
struct pvr_device *device,
const VkAllocationCallbacks *alloc,
struct pvr_render_pass *pass,
bool disable_merge,
struct pvr_renderpass_hwsetup **const hw_setup_out)
{
struct pvr_render_int_attachment *int_attachments;
struct pvr_render_int_subpass *int_subpasses;
struct pvr_renderpass_hw_map *subpass_map;
struct pvr_renderpass_hwsetup *hw_setup;
struct pvr_renderpass_context *ctx;
bool *surface_allocate;
VkResult result;
VK_MULTIALLOC(ma);
vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
vk_multialloc_add(&ma,
&surface_allocate,
__typeof__(*surface_allocate),
pass->attachment_count);
vk_multialloc_add(&ma,
&subpass_map,
__typeof__(*subpass_map),
pass->subpass_count);
if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
hw_setup->surface_allocate = surface_allocate;
hw_setup->subpass_map = subpass_map;
VK_MULTIALLOC(ma_ctx);
vk_multialloc_add(&ma_ctx, &ctx, __typeof__(*ctx), 1);
vk_multialloc_add(&ma_ctx,
&int_attachments,
__typeof__(*int_attachments),
pass->attachment_count);
vk_multialloc_add(&ma_ctx,
&int_subpasses,
__typeof__(*int_subpasses),
pass->subpass_count);
if (!vk_multialloc_zalloc(&ma_ctx,
alloc,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
vk_free(alloc, hw_setup);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
ctx->pass = pass;
ctx->hw_setup = hw_setup;
ctx->int_attach = int_attachments;
ctx->int_subpasses = int_subpasses;
ctx->allocator = alloc;
for (uint32_t i = 0U; i < pass->attachment_count; i++) {
struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
struct pvr_render_int_attachment *int_attach = &ctx->int_attach[i];
const VkFormat format = attachment->vk_format;
uint32_t pixel_size_in_chunks;
uint32_t pixel_size_in_bits;
/* TODO: Add support for packing multiple attachments into the same
* register.
*/
const uint32_t part_bits = 0;
if (vk_format_is_color(format) &&
pvr_get_pbe_accum_format(attachment->vk_format) ==
PVR_PBE_ACCUM_FORMAT_INVALID) {
/* The VkFormat is not supported as a color attachment so `0`.
* Vulkan doesn't seems to restrict vkCreateRenderPass() to supported
* formats only.
*/
pixel_size_in_bits = 0;
} else {
pixel_size_in_bits =
pvr_get_accum_format_bitsize(attachment->vk_format);
}
int_attach->resource.type = USC_MRT_RESOURCE_TYPE_INVALID;
int_attach->resource.intermediate_size =
DIV_ROUND_UP(pixel_size_in_bits, CHAR_BIT);
int_attach->resource.mrt_desc.intermediate_size =
int_attach->resource.intermediate_size;
pixel_size_in_chunks = DIV_ROUND_UP(pixel_size_in_bits, 32U);
for (uint32_t j = 0U; j < pixel_size_in_chunks; j++)
int_attach->resource.mrt_desc.valid_mask[j] = ~0;
if (part_bits > 0U) {
int_attach->resource.mrt_desc.valid_mask[pixel_size_in_chunks] =
BITFIELD_MASK(part_bits);
}
int_attach->load_op = pass->attachments[i].load_op;
int_attach->stencil_load_op = pass->attachments[i].stencil_load_op;
int_attach->attachment = attachment;
int_attach->first_use = -1;
int_attach->last_use = -1;
int_attach->last_read = -1;
int_attach->mrt_idx = -1;
int_attach->last_resolve_dst_render = -1;
int_attach->last_resolve_src_render = -1;
int_attach->z_replicate = false;
int_attach->is_pbe_downscalable = attachment->is_pbe_downscalable;
/* Count the number of references to this attachment in subpasses. */
for (uint32_t j = 0U; j < pass->subpass_count; j++) {
struct pvr_render_subpass *subpass = &pass->subpasses[j];
const uint32_t color_output_uses =
pvr_count_uses_in_color_output_list(subpass, i);
const uint32_t input_attachment_uses =
pvr_count_uses_in_list(subpass->input_attachments,
subpass->input_count,
i);
if (color_output_uses != 0U || input_attachment_uses != 0U)
int_attach->last_read = j;
int_attach->remaining_count +=
color_output_uses + input_attachment_uses;
if ((uint32_t)subpass->depth_stencil_attachment == i)
int_attach->remaining_count++;
}
if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
int_attach->stencil_remaining_count = int_attach->remaining_count;
if (pass->attachments[i].stencil_store_op ==
VK_ATTACHMENT_STORE_OP_STORE) {
int_attach->stencil_remaining_count++;
}
}
if (pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
int_attach->remaining_count++;
int_attach->last_read = pass->subpass_count;
}
}
for (uint32_t i = 0U; i < pass->subpass_count; i++) {
struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
int_subpass->subpass = &pass->subpasses[i];
int_subpass->out_subpass_count = 0U;
int_subpass->out_subpasses = NULL;
int_subpass->in_subpass_count = int_subpass->subpass->dep_count;
}
/* For each dependency of a subpass create an edge in the opposite
* direction.
*/
for (uint32_t i = 0U; i < pass->subpass_count; i++) {
struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
for (uint32_t j = 0U; j < int_subpass->in_subpass_count; j++) {
uint32_t src_idx = int_subpass->subpass->dep_list[j];
struct pvr_render_int_subpass *int_src_subpass;
struct pvr_render_int_subpass **out_subpasses;
assert(src_idx < pass->subpass_count);
int_src_subpass = &ctx->int_subpasses[src_idx];
out_subpasses =
vk_realloc(ctx->allocator,
int_src_subpass->out_subpasses,
sizeof(int_src_subpass->out_subpasses[0U]) *
(int_src_subpass->out_subpass_count + 1U),
8U,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!out_subpasses) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto end_create_renderpass_hwsetup;
}
int_src_subpass->out_subpasses = out_subpasses;
int_src_subpass->out_subpasses[int_src_subpass->out_subpass_count] =
int_subpass;
int_src_subpass->out_subpass_count++;
}
}
pvr_reset_render(ctx);
for (uint32_t i = 0U; i < pass->subpass_count; i++) {
uint32_t j;
/* Find a subpass with no unscheduled dependencies. */
for (j = 0U; j < pass->subpass_count; j++) {
struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[j];
if (int_subpass->subpass && int_subpass->in_subpass_count == 0U)
break;
}
assert(j < pass->subpass_count);
result = pvr_schedule_subpass(device, ctx, j);
if (result != VK_SUCCESS)
goto end_create_renderpass_hwsetup;
if (disable_merge) {
result = pvr_close_render(device, ctx);
if (result != VK_SUCCESS)
goto end_create_renderpass_hwsetup;
}
ctx->int_subpasses[j].subpass = NULL;
}
/* Finalise the last in-progress render. */
result = pvr_close_render(device, ctx);
end_create_renderpass_hwsetup:
if (result != VK_SUCCESS) {
pvr_free_render(ctx);
if (hw_setup) {
pvr_destroy_renderpass_hwsetup(alloc, hw_setup);
hw_setup = NULL;
}
}
for (uint32_t i = 0U; i < pass->subpass_count; i++) {
struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
if (int_subpass->out_subpass_count > 0U)
vk_free(alloc, int_subpass->out_subpasses);
}
vk_free(alloc, ctx);
*hw_setup_out = hw_setup;
return result;
}