blob: faa2d626de9e2bd92afc1170f327a483b799e50d [file] [log] [blame] [edit]
/**************************************************************************
*
* Copyright 2007 VMware, Inc.
* Copyright 2012 Marek Olšák <maraeo@gmail.com>
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* This converts the VBO's vertex attribute/array information into
* Gallium vertex state and binds it.
*
* Authors:
* Keith Whitwell <keithw@vmware.com>
* Marek Olšák <maraeo@gmail.com>
*/
#include "st_context.h"
#include "st_atom.h"
#include "st_draw.h"
#include "st_program.h"
#include "cso_cache/cso_context.h"
#include "util/u_cpu_detect.h"
#include "util/u_math.h"
#include "util/u_upload_mgr.h"
#include "util/u_threaded_context.h"
#include "main/bufferobj.h"
#include "main/glformats.h"
#include "main/varray.h"
#include "main/arrayobj.h"
enum st_fill_tc_set_vb {
FILL_TC_SET_VB_OFF, /* always works */
FILL_TC_SET_VB_ON, /* specialized version (faster) */
};
enum st_use_vao_fast_path {
VAO_FAST_PATH_OFF, /* more complicated version (slower) */
VAO_FAST_PATH_ON, /* always works (faster) */
};
enum st_allow_zero_stride_attribs {
ZERO_STRIDE_ATTRIBS_OFF, /* specialized version (faster) */
ZERO_STRIDE_ATTRIBS_ON, /* always works */
};
/* Whether vertex attrib indices are equal to their vertex buffer indices. */
enum st_identity_attrib_mapping {
IDENTITY_ATTRIB_MAPPING_OFF, /* always works */
IDENTITY_ATTRIB_MAPPING_ON, /* specialized version (faster) */
};
enum st_allow_user_buffers {
USER_BUFFERS_OFF, /* specialized version (faster) */
USER_BUFFERS_ON, /* always works */
};
enum st_update_velems {
UPDATE_VELEMS_OFF, /* specialized version (faster) */
UPDATE_VELEMS_ON, /* always works */
};
/* Always inline the non-64bit element code, so that the compiler can see
* that velements is on the stack.
*/
static void ALWAYS_INLINE
init_velement(struct pipe_vertex_element *velements,
const struct gl_vertex_format *vformat,
int src_offset, unsigned src_stride,
unsigned instance_divisor,
int vbo_index, bool dual_slot, int idx)
{
velements[idx].src_offset = src_offset;
velements[idx].src_stride = src_stride;
velements[idx].src_format = vformat->_PipeFormat;
velements[idx].instance_divisor = instance_divisor;
velements[idx].vertex_buffer_index = vbo_index;
velements[idx].dual_slot = dual_slot;
assert(velements[idx].src_format);
}
/* ALWAYS_INLINE helps the compiler realize that most of the parameters are
* on the stack.
*/
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_use_vao_fast_path USE_VAO_FAST_PATH,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
st_allow_user_buffers ALLOW_USER_BUFFERS,
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
setup_arrays(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
const GLbitfield dual_slot_inputs,
const GLbitfield inputs_read,
GLbitfield mask,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
{
/* Set up enabled vertex arrays. */
if (USE_VAO_FAST_PATH) {
const GLubyte *attribute_map =
!HAS_IDENTITY_ATTRIB_MAPPING ?
_mesa_vao_attribute_map[vao->_AttributeMapMode] : NULL;
struct pipe_context *pipe = ctx->pipe;
struct tc_buffer_list *next_buffer_list = NULL;
if (FILL_TC_SET_VB)
next_buffer_list = tc_get_next_buffer_list(pipe);
/* Note: I did try to unroll this loop by passing the number of
* iterations as a template parameter, but it resulted in more overhead.
*/
while (mask) {
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&mask);
const struct gl_array_attributes *attrib;
const struct gl_vertex_buffer_binding *binding;
if (HAS_IDENTITY_ATTRIB_MAPPING) {
attrib = &vao->VertexAttrib[attr];
binding = &vao->BufferBinding[attr];
} else {
attrib = &vao->VertexAttrib[attribute_map[attr]];
binding = &vao->BufferBinding[attrib->BufferBindingIndex];
}
const unsigned bufidx = (*num_vbuffers)++;
/* Set the vertex buffer. */
if (!ALLOW_USER_BUFFERS || binding->BufferObj) {
assert(binding->BufferObj);
struct pipe_resource *buf =
_mesa_get_bufferobj_reference(ctx, binding->BufferObj);
vbuffer[bufidx].buffer.resource = buf;
vbuffer[bufidx].is_user_buffer = false;
vbuffer[bufidx].buffer_offset = binding->Offset +
attrib->RelativeOffset;
if (FILL_TC_SET_VB)
tc_track_vertex_buffer(pipe, bufidx, buf, next_buffer_list);
} else {
vbuffer[bufidx].buffer.user = attrib->Ptr;
vbuffer[bufidx].is_user_buffer = true;
vbuffer[bufidx].buffer_offset = 0;
assert(!FILL_TC_SET_VB);
}
if (!UPDATE_VELEMS)
continue;
/* Determine the vertex element index without popcnt
* if !ALLOW_ZERO_STRIDE_ATTRIBS, which means that we don't need
* to leave any holes for zero-stride attribs, thus the mapping from
* vertex elements to vertex buffers is identity.
*/
unsigned index;
if (ALLOW_ZERO_STRIDE_ATTRIBS) {
assert(POPCNT != POPCNT_INVALID);
index = util_bitcount_fast<POPCNT>(inputs_read &
BITFIELD_MASK(attr));
} else {
index = bufidx;
assert(index == util_bitcount(inputs_read &
BITFIELD_MASK(attr)));
}
/* Set the vertex element. */
init_velement(velements->velems, &attrib->Format, 0, binding->Stride,
binding->InstanceDivisor, bufidx,
dual_slot_inputs & BITFIELD_BIT(attr), index);
}
return;
}
/* The slow path needs more fields initialized, which is not done if it's
* disabled.
*/
assert(!ctx->Const.UseVAOFastPath || vao->SharedAndImmutable);
/* Require these because we don't use them here and we don't want to
* generate identical template variants.
*/
assert(!FILL_TC_SET_VB);
assert(ALLOW_ZERO_STRIDE_ATTRIBS);
assert(!HAS_IDENTITY_ATTRIB_MAPPING);
assert(ALLOW_USER_BUFFERS);
assert(UPDATE_VELEMS);
while (mask) {
/* The attribute index to start pulling a binding */
const gl_vert_attrib i = (gl_vert_attrib)(ffs(mask) - 1);
const struct gl_vertex_buffer_binding *const binding
= _mesa_draw_buffer_binding(vao, i);
const unsigned bufidx = (*num_vbuffers)++;
if (binding->BufferObj) {
/* Set the binding */
vbuffer[bufidx].buffer.resource =
_mesa_get_bufferobj_reference(ctx, binding->BufferObj);
vbuffer[bufidx].is_user_buffer = false;
vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
} else {
/* Set the binding */
const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
vbuffer[bufidx].buffer.user = ptr;
vbuffer[bufidx].is_user_buffer = true;
vbuffer[bufidx].buffer_offset = 0;
}
const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
GLbitfield attrmask = mask & boundmask;
/* Mark the those attributes as processed */
mask &= ~boundmask;
/* We can assume that we have array for the binding */
assert(attrmask);
/* Walk attributes belonging to the binding */
do {
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&attrmask);
const struct gl_array_attributes *const attrib
= _mesa_draw_array_attrib(vao, attr);
const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
assert(POPCNT != POPCNT_INVALID);
init_velement(velements->velems, &attrib->Format, off,
binding->Stride, binding->InstanceDivisor, bufidx,
dual_slot_inputs & BITFIELD_BIT(attr),
util_bitcount_fast<POPCNT>(inputs_read &
BITFIELD_MASK(attr)));
} while (attrmask);
}
}
/* Only used by the select/feedback mode. */
void
st_setup_arrays(struct st_context *st,
const struct gl_vertex_program *vp,
const struct st_common_variant *vp_variant,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
{
struct gl_context *ctx = st->ctx;
GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_ON,
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
(ctx, ctx->Array._DrawVAO, vp->Base.DualSlotInputs,
vp_variant->vert_attrib_mask,
vp_variant->vert_attrib_mask & enabled_arrays,
velements, vbuffer, num_vbuffers);
}
/* ALWAYS_INLINE helps the compiler realize that most of the parameters are
* on the stack.
*
* Return the index of the vertex buffer where current attribs have been
* uploaded.
*/
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_setup_current(struct st_context *st,
const GLbitfield dual_slot_inputs,
const GLbitfield inputs_read,
GLbitfield curmask,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
{
/* Process values that should have better been uniforms in the application */
if (curmask) {
struct gl_context *ctx = st->ctx;
assert(POPCNT != POPCNT_INVALID);
unsigned num_attribs = util_bitcount_fast<POPCNT>(curmask);
unsigned num_dual_attribs = util_bitcount_fast<POPCNT>(curmask &
dual_slot_inputs);
/* num_attribs includes num_dual_attribs, so adding num_dual_attribs
* doubles the size of those attribs.
*/
unsigned max_size = (num_attribs + num_dual_attribs) * 16;
const unsigned bufidx = (*num_vbuffers)++;
vbuffer[bufidx].is_user_buffer = false;
vbuffer[bufidx].buffer.resource = NULL;
/* vbuffer[bufidx].buffer_offset is set below */
/* Use const_uploader for zero-stride vertex attributes, because
* it may use a better memory placement than stream_uploader.
* The reason is that zero-stride attributes can be fetched many
* times (thousands of times), so a better placement is going to
* perform better.
*/
struct u_upload_mgr *uploader = st->can_bind_const_buffer_as_vertex ?
st->pipe->const_uploader :
st->pipe->stream_uploader;
uint8_t *ptr = NULL;
u_upload_alloc(uploader, 0, max_size, 16,
&vbuffer[bufidx].buffer_offset,
&vbuffer[bufidx].buffer.resource, (void**)&ptr);
uint8_t *cursor = ptr;
if (FILL_TC_SET_VB) {
struct pipe_context *pipe = ctx->pipe;
tc_track_vertex_buffer(pipe, bufidx, vbuffer[bufidx].buffer.resource,
tc_get_next_buffer_list(pipe));
}
do {
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
const struct gl_array_attributes *const attrib
= _mesa_draw_current_attrib(ctx, attr);
const unsigned size = attrib->Format._ElementSize;
/* When the current attribs are set (e.g. via glColor3ub or
* glVertexAttrib2s), they are always converted to float32 or int32
* or dual slots being 2x int32, so they are always dword-aligned.
* glBegin/End behaves in the same way. It's really an internal Mesa
* inefficiency that is convenient here, which is why this assertion
* is always true.
*/
assert(size % 4 == 0); /* assume a hw-friendly alignment */
memcpy(cursor, attrib->Ptr, size);
if (UPDATE_VELEMS) {
init_velement(velements->velems, &attrib->Format, cursor - ptr,
0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
util_bitcount_fast<POPCNT>(inputs_read &
BITFIELD_MASK(attr)));
}
cursor += size;
} while (curmask);
/* Always unmap. The uploader might use explicit flushes. */
u_upload_unmap(uploader);
}
}
/* Only used by the select/feedback mode. */
void
st_setup_current_user(struct st_context *st,
const struct gl_vertex_program *vp,
const struct st_common_variant *vp_variant,
struct cso_velems_state *velements,
struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
{
struct gl_context *ctx = st->ctx;
const GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
/* Process values that should have better been uniforms in the application */
GLbitfield curmask = inputs_read & ~enabled_arrays;
/* For each attribute, make an own user buffer binding. */
while (curmask) {
const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
const struct gl_array_attributes *const attrib
= _mesa_draw_current_attrib(ctx, attr);
const unsigned bufidx = (*num_vbuffers)++;
init_velement(velements->velems, &attrib->Format, 0, 0, 0,
bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
util_bitcount(inputs_read & BITFIELD_MASK(attr)));
vbuffer[bufidx].is_user_buffer = true;
vbuffer[bufidx].buffer.user = attrib->Ptr;
vbuffer[bufidx].buffer_offset = 0;
}
}
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_use_vao_fast_path USE_VAO_FAST_PATH,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
st_allow_user_buffers ALLOW_USER_BUFFERS,
st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_update_array_templ(struct st_context *st,
const GLbitfield enabled_arrays,
const GLbitfield enabled_user_arrays,
const GLbitfield nonzero_divisor_arrays)
{
struct gl_context *ctx = st->ctx;
/* vertex program validation must be done before this */
/* _NEW_PROGRAM, ST_NEW_VS_STATE */
const struct gl_vertex_program *vp =
(struct gl_vertex_program *)ctx->VertexProgram._Current;
const struct st_common_variant *vp_variant = st->vp_variant;
const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
const GLbitfield userbuf_arrays =
ALLOW_USER_BUFFERS ? inputs_read & enabled_user_arrays : 0;
bool uses_user_vertex_buffers = userbuf_arrays != 0;
st->draw_needs_minmax_index =
(userbuf_arrays & ~nonzero_divisor_arrays) != 0;
struct pipe_vertex_buffer vbuffer_local[PIPE_MAX_ATTRIBS];
struct pipe_vertex_buffer *vbuffer;
unsigned num_vbuffers = 0, num_vbuffers_tc;
struct cso_velems_state velements;
if (FILL_TC_SET_VB) {
assert(!uses_user_vertex_buffers);
assert(POPCNT != POPCNT_INVALID);
num_vbuffers_tc = util_bitcount_fast<POPCNT>(inputs_read &
enabled_arrays);
/* Add up to 1 vertex buffer for zero-stride vertex attribs. */
num_vbuffers_tc += ALLOW_ZERO_STRIDE_ATTRIBS &&
inputs_read & ~enabled_arrays;
vbuffer = tc_add_set_vertex_buffers_call(st->pipe, num_vbuffers_tc);
} else {
vbuffer = vbuffer_local;
}
/* ST_NEW_VERTEX_ARRAYS */
/* Setup arrays */
setup_arrays<POPCNT, FILL_TC_SET_VB, USE_VAO_FAST_PATH,
ALLOW_ZERO_STRIDE_ATTRIBS, HAS_IDENTITY_ATTRIB_MAPPING,
ALLOW_USER_BUFFERS, UPDATE_VELEMS>
(ctx, ctx->Array._DrawVAO, dual_slot_inputs, inputs_read,
inputs_read & enabled_arrays, &velements, vbuffer, &num_vbuffers);
/* _NEW_CURRENT_ATTRIB */
/* Setup zero-stride attribs. */
if (ALLOW_ZERO_STRIDE_ATTRIBS) {
st_setup_current<POPCNT, FILL_TC_SET_VB, UPDATE_VELEMS>
(st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
&velements, vbuffer, &num_vbuffers);
} else {
assert(!(inputs_read & ~enabled_arrays));
}
if (FILL_TC_SET_VB)
assert(num_vbuffers == num_vbuffers_tc);
if (UPDATE_VELEMS) {
struct cso_context *cso = st->cso_context;
velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
/* Set vertex buffers and elements. */
if (FILL_TC_SET_VB) {
cso_set_vertex_elements(cso, &velements);
} else {
cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
uses_user_vertex_buffers, vbuffer);
}
/* The driver should clear this after it has processed the update. */
ctx->Array.NewVertexElements = false;
st->uses_user_vertex_buffers = uses_user_vertex_buffers;
} else {
/* Only vertex buffers. */
if (!FILL_TC_SET_VB)
cso_set_vertex_buffers(st->cso_context, num_vbuffers, true, vbuffer);
/* This can change only when we update vertex elements. */
assert(st->uses_user_vertex_buffers == uses_user_vertex_buffers);
}
}
typedef void (*update_array_func)(struct st_context *st,
const GLbitfield enabled_arrays,
const GLbitfield enabled_user_attribs,
const GLbitfield nonzero_divisor_attribs);
/* This just initializes the table of all st_update_array variants. */
struct st_update_array_table {
update_array_func funcs[2][2][2][2][2][2];
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
st_allow_user_buffers ALLOW_USER_BUFFERS,
st_update_velems UPDATE_VELEMS>
void init_one()
{
/* These conditions reduce the number of compiled variants. */
/* The TC path is only valid without user buffers.
*/
constexpr st_fill_tc_set_vb fill_tc_set_vb =
!ALLOW_USER_BUFFERS ? FILL_TC_SET_VB : FILL_TC_SET_VB_OFF;
/* POPCNT is unused without zero-stride attribs and without TC. */
constexpr util_popcnt popcnt =
!ALLOW_ZERO_STRIDE_ATTRIBS && !fill_tc_set_vb ?
POPCNT_INVALID : POPCNT;
funcs[POPCNT][FILL_TC_SET_VB][ALLOW_ZERO_STRIDE_ATTRIBS]
[HAS_IDENTITY_ATTRIB_MAPPING][ALLOW_USER_BUFFERS][UPDATE_VELEMS] =
st_update_array_templ<
popcnt,
fill_tc_set_vb,
VAO_FAST_PATH_ON,
ALLOW_ZERO_STRIDE_ATTRIBS,
HAS_IDENTITY_ATTRIB_MAPPING,
ALLOW_USER_BUFFERS,
UPDATE_VELEMS>;
}
/* We have to do this in stages because of the combinatorial explosion of
* variants.
*/
template<util_popcnt POPCNT,
st_fill_tc_set_vb FILL_TC_SET_VB,
st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS>
void init_last_3_args()
{
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF, USER_BUFFERS_OFF,
UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_OFF, UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
IDENTITY_ATTRIB_MAPPING_ON,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
}
st_update_array_table()
{
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_ON>();
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_ON>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
ZERO_STRIDE_ATTRIBS_ON>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_OFF>();
init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
ZERO_STRIDE_ATTRIBS_ON>();
}
};
static st_update_array_table update_array_table;
template<util_popcnt POPCNT,
st_use_vao_fast_path USE_VAO_FAST_PATH> void ALWAYS_INLINE
st_update_array_impl(struct st_context *st)
{
struct gl_context *ctx = st->ctx;
struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
const GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
GLbitfield enabled_user_arrays;
GLbitfield nonzero_divisor_arrays;
assert(vao->_EnabledWithMapMode ==
_mesa_vao_enable_to_vp_inputs(vao->_AttributeMapMode, vao->Enabled));
if (!USE_VAO_FAST_PATH && !vao->SharedAndImmutable)
_mesa_update_vao_derived_arrays(ctx, vao, false);
_mesa_get_derived_vao_masks(ctx, enabled_arrays, &enabled_user_arrays,
&nonzero_divisor_arrays);
/* Execute the slow path without using multiple C++ template variants. */
if (!USE_VAO_FAST_PATH) {
st_update_array_templ<POPCNT, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
return;
}
/* The fast path that selects from multiple C++ template variants. */
const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
const GLbitfield enabled_arrays_read = inputs_read & enabled_arrays;
/* Check cso_context whether it goes directly to TC. */
bool fill_tc_set_vbs = st->cso_context->draw_vbo == tc_draw_vbo;
bool has_zero_stride_attribs = inputs_read & ~enabled_arrays;
uint32_t non_identity_attrib_mapping =
vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_IDENTITY ? 0 :
vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_POSITION ? VERT_BIT_GENERIC0
: VERT_BIT_POS;
bool has_identity_mapping = !(enabled_arrays_read &
(vao->NonIdentityBufferAttribMapping |
non_identity_attrib_mapping));
/* has_user_buffers is always false with glthread. */
bool has_user_buffers = inputs_read & enabled_user_arrays;
/* Changing from user to non-user buffers and vice versa can switch between
* cso and u_vbuf, which means that we need to update vertex elements even
* when they have not changed.
*/
bool update_velems = ctx->Array.NewVertexElements ||
st->uses_user_vertex_buffers != has_user_buffers;
update_array_table.funcs[POPCNT][fill_tc_set_vbs][has_zero_stride_attribs]
[has_identity_mapping][has_user_buffers]
[update_velems]
(st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
}
/* The default callback that must be present before st_init_update_array
* selects the driver-dependent variant.
*/
void
st_update_array(struct st_context *st)
{
unreachable("st_init_update_array not called");
}
void
st_init_update_array(struct st_context *st)
{
st_update_func_t *func = &st->update_functions[ST_NEW_VERTEX_ARRAYS_INDEX];
if (util_get_cpu_caps()->has_popcnt) {
if (st->ctx->Const.UseVAOFastPath)
*func = st_update_array_impl<POPCNT_YES, VAO_FAST_PATH_ON>;
else
*func = st_update_array_impl<POPCNT_YES, VAO_FAST_PATH_OFF>;
} else {
if (st->ctx->Const.UseVAOFastPath)
*func = st_update_array_impl<POPCNT_NO, VAO_FAST_PATH_ON>;
else
*func = st_update_array_impl<POPCNT_NO, VAO_FAST_PATH_OFF>;
}
}
struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context *ctx,
const struct gl_vertex_array_object *vao,
struct gl_buffer_object *indexbuf,
uint32_t enabled_arrays)
{
struct st_context *st = st_context(ctx);
const GLbitfield inputs_read = enabled_arrays;
const GLbitfield dual_slot_inputs = 0; /* always zero */
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers = 0;
struct cso_velems_state velements;
/* This should use the slow path because there is only 1 interleaved
* vertex buffers.
*/
setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
USER_BUFFERS_ON, UPDATE_VELEMS_ON>
(ctx, vao, dual_slot_inputs, inputs_read, inputs_read, &velements,
vbuffer, &num_vbuffers);
if (num_vbuffers != 1) {
assert(!"this should never happen with display lists");
return NULL;
}
velements.count = util_bitcount(inputs_read);
struct pipe_screen *screen = st->screen;
struct pipe_vertex_state *state =
screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
velements.count,
indexbuf ?
indexbuf->buffer : NULL,
enabled_arrays);
for (unsigned i = 0; i < num_vbuffers; i++)
pipe_vertex_buffer_unreference(&vbuffer[i]);
return state;
}