blob: a41e2720465d1392b626db8bda77ad89260b3f79 [file] [log] [blame]
/*
* Copyright © 2006 - 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "elk_compiler.h"
#include "elk_disasm.h"
#include "elk_eu.h"
#include "elk_prim.h"
#include "dev/intel_debug.h"
struct elk_sf_compile {
struct elk_codegen func;
struct elk_sf_prog_key key;
struct elk_sf_prog_data prog_data;
struct elk_reg pv;
struct elk_reg det;
struct elk_reg dx0;
struct elk_reg dx2;
struct elk_reg dy0;
struct elk_reg dy2;
/* z and 1/w passed in separately:
*/
struct elk_reg z[3];
struct elk_reg inv_w[3];
/* The vertices:
*/
struct elk_reg vert[3];
/* Temporaries, allocated after last vertex reg.
*/
struct elk_reg inv_det;
struct elk_reg a1_sub_a0;
struct elk_reg a2_sub_a0;
struct elk_reg tmp;
struct elk_reg m1Cx;
struct elk_reg m2Cy;
struct elk_reg m3C0;
GLuint nr_verts;
GLuint nr_attr_regs;
GLuint nr_setup_regs;
int urb_entry_read_offset;
/** The last known value of the f0.0 flag register. */
unsigned flag_value;
struct intel_vue_map vue_map;
};
/**
* Determine the vue slot corresponding to the given half of the given register.
*/
static inline int vert_reg_to_vue_slot(struct elk_sf_compile *c, GLuint reg,
int half)
{
return (reg + c->urb_entry_read_offset) * 2 + half;
}
/**
* Determine the varying corresponding to the given half of the given
* register. half=0 means the first half of a register, half=1 means the
* second half.
*/
static inline int vert_reg_to_varying(struct elk_sf_compile *c, GLuint reg,
int half)
{
int vue_slot = vert_reg_to_vue_slot(c, reg, half);
return c->vue_map.slot_to_varying[vue_slot];
}
/**
* Determine the register corresponding to the given vue slot
*/
static struct elk_reg get_vue_slot(struct elk_sf_compile *c,
struct elk_reg vert,
int vue_slot)
{
GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
GLuint sub = vue_slot % 2;
return elk_vec4_grf(vert.nr + off, sub * 4);
}
/**
* Determine the register corresponding to the given varying.
*/
static struct elk_reg get_varying(struct elk_sf_compile *c,
struct elk_reg vert,
GLuint varying)
{
int vue_slot = c->vue_map.varying_to_slot[varying];
assert (vue_slot >= c->urb_entry_read_offset);
return get_vue_slot(c, vert, vue_slot);
}
static bool
have_attr(struct elk_sf_compile *c, GLuint attr)
{
return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
}
/***********************************************************************
* Twoside lighting
*/
static void copy_bfc( struct elk_sf_compile *c,
struct elk_reg vert )
{
struct elk_codegen *p = &c->func;
GLuint i;
for (i = 0; i < 2; i++) {
if (have_attr(c, VARYING_SLOT_COL0+i) &&
have_attr(c, VARYING_SLOT_BFC0+i))
elk_MOV(p,
get_varying(c, vert, VARYING_SLOT_COL0+i),
get_varying(c, vert, VARYING_SLOT_BFC0+i));
}
}
static void do_twoside_color( struct elk_sf_compile *c )
{
struct elk_codegen *p = &c->func;
GLuint backface_conditional = c->key.frontface_ccw ? ELK_CONDITIONAL_G : ELK_CONDITIONAL_L;
/* Already done in clip program:
*/
if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
return;
/* If the vertex shader provides backface color, do the selection. The VS
* promises to set up the front color if the backface color is provided, but
* it may contain junk if never written to.
*/
if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
!(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
return;
/* Need to use ELK_EXECUTE_4 and also do an 4-wide compare in order
* to get all channels active inside the IF. In the clipping code
* we run with NoMask, so it's not an option and we can use
* ELK_EXECUTE_1 for all comparisons.
*/
elk_CMP(p, vec4(elk_null_reg()), backface_conditional, c->det, elk_imm_f(0));
elk_IF(p, ELK_EXECUTE_4);
{
switch (c->nr_verts) {
case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
case 1: copy_bfc(c, c->vert[0]);
}
}
elk_ENDIF(p);
}
/***********************************************************************
* Flat shading
*/
static void copy_flatshaded_attributes(struct elk_sf_compile *c,
struct elk_reg dst,
struct elk_reg src)
{
struct elk_codegen *p = &c->func;
int i;
for (i = 0; i < c->vue_map.num_slots; i++) {
if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
elk_MOV(p,
get_vue_slot(c, dst, i),
get_vue_slot(c, src, i));
}
}
}
static int count_flatshaded_attributes(struct elk_sf_compile *c)
{
int i;
int count = 0;
for (i = 0; i < c->vue_map.num_slots; i++)
if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
count++;
return count;
}
/* Need to use a computed jump to copy flatshaded attributes as the
* vertices are ordered according to y-coordinate before reaching this
* point, so the PV could be anywhere.
*/
static void do_flatshade_triangle( struct elk_sf_compile *c )
{
struct elk_codegen *p = &c->func;
GLuint nr;
GLuint jmpi = 1;
/* Already done in clip program:
*/
if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
return;
if (p->devinfo->ver == 5)
jmpi = 2;
nr = count_flatshaded_attributes(c);
elk_MUL(p, c->pv, c->pv, elk_imm_d(jmpi*(nr*2+1)));
elk_JMPI(p, c->pv, ELK_PREDICATE_NONE);
copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
elk_JMPI(p, elk_imm_d(jmpi*(nr*4+1)), ELK_PREDICATE_NONE);
copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
elk_JMPI(p, elk_imm_d(jmpi*nr*2), ELK_PREDICATE_NONE);
copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
}
static void do_flatshade_line( struct elk_sf_compile *c )
{
struct elk_codegen *p = &c->func;
GLuint nr;
GLuint jmpi = 1;
/* Already done in clip program:
*/
if (c->key.primitive == ELK_SF_PRIM_UNFILLED_TRIS)
return;
if (p->devinfo->ver == 5)
jmpi = 2;
nr = count_flatshaded_attributes(c);
elk_MUL(p, c->pv, c->pv, elk_imm_d(jmpi*(nr+1)));
elk_JMPI(p, c->pv, ELK_PREDICATE_NONE);
copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
elk_JMPI(p, elk_imm_ud(jmpi*nr), ELK_PREDICATE_NONE);
copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
}
/***********************************************************************
* Triangle setup.
*/
static void alloc_regs( struct elk_sf_compile *c )
{
GLuint reg, i;
/* Values computed by fixed function unit:
*/
c->pv = retype(elk_vec1_grf(1, 1), ELK_REGISTER_TYPE_D);
c->det = elk_vec1_grf(1, 2);
c->dx0 = elk_vec1_grf(1, 3);
c->dx2 = elk_vec1_grf(1, 4);
c->dy0 = elk_vec1_grf(1, 5);
c->dy2 = elk_vec1_grf(1, 6);
/* z and 1/w passed in separately:
*/
c->z[0] = elk_vec1_grf(2, 0);
c->inv_w[0] = elk_vec1_grf(2, 1);
c->z[1] = elk_vec1_grf(2, 2);
c->inv_w[1] = elk_vec1_grf(2, 3);
c->z[2] = elk_vec1_grf(2, 4);
c->inv_w[2] = elk_vec1_grf(2, 5);
/* The vertices:
*/
reg = 3;
for (i = 0; i < c->nr_verts; i++) {
c->vert[i] = elk_vec8_grf(reg, 0);
reg += c->nr_attr_regs;
}
/* Temporaries, allocated after last vertex reg.
*/
c->inv_det = elk_vec1_grf(reg, 0); reg++;
c->a1_sub_a0 = elk_vec8_grf(reg, 0); reg++;
c->a2_sub_a0 = elk_vec8_grf(reg, 0); reg++;
c->tmp = elk_vec8_grf(reg, 0); reg++;
/* Note grf allocation:
*/
c->prog_data.total_grf = reg;
/* Outputs of this program - interpolation coefficients for
* rasterization:
*/
c->m1Cx = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 1, 0);
c->m2Cy = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 2, 0);
c->m3C0 = elk_vec8_reg(ELK_MESSAGE_REGISTER_FILE, 3, 0);
}
static void copy_z_inv_w( struct elk_sf_compile *c )
{
struct elk_codegen *p = &c->func;
GLuint i;
/* Copy both scalars with a single MOV:
*/
for (i = 0; i < c->nr_verts; i++)
elk_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
}
static void invert_det( struct elk_sf_compile *c)
{
/* Looks like we invert all 8 elements just to get 1/det in
* position 2 !?!
*/
elk_gfx4_math(&c->func,
c->inv_det,
ELK_MATH_FUNCTION_INV,
0,
c->det,
ELK_MATH_PRECISION_FULL);
}
static bool
calculate_masks(struct elk_sf_compile *c,
GLuint reg,
GLushort *pc,
GLushort *pc_persp,
GLushort *pc_linear)
{
bool is_last_attr = (reg == c->nr_setup_regs - 1);
enum glsl_interp_mode interp;
*pc_persp = 0;
*pc_linear = 0;
*pc = 0xf;
interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
if (interp == INTERP_MODE_SMOOTH) {
*pc_linear = 0xf;
*pc_persp = 0xf;
} else if (interp == INTERP_MODE_NOPERSPECTIVE)
*pc_linear = 0xf;
/* Maybe only process one attribute on the final round:
*/
if (vert_reg_to_varying(c, reg, 1) != ELK_VARYING_SLOT_COUNT) {
*pc |= 0xf0;
interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
if (interp == INTERP_MODE_SMOOTH) {
*pc_linear |= 0xf0;
*pc_persp |= 0xf0;
} else if (interp == INTERP_MODE_NOPERSPECTIVE)
*pc_linear |= 0xf0;
}
return is_last_attr;
}
/* Calculates the predicate control for which channels of a reg
* (containing 2 attrs) to do point sprite coordinate replacement on.
*/
static uint16_t
calculate_point_sprite_mask(struct elk_sf_compile *c, GLuint reg)
{
int varying1, varying2;
uint16_t pc = 0;
varying1 = vert_reg_to_varying(c, reg, 0);
if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
pc |= 0x0f;
}
if (varying1 == ELK_VARYING_SLOT_PNTC)
pc |= 0x0f;
varying2 = vert_reg_to_varying(c, reg, 1);
if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
if (c->key.point_sprite_coord_replace & (1 << (varying2 -
VARYING_SLOT_TEX0)))
pc |= 0xf0;
}
if (varying2 == ELK_VARYING_SLOT_PNTC)
pc |= 0xf0;
return pc;
}
static void
set_predicate_control_flag_value(struct elk_codegen *p,
struct elk_sf_compile *c,
unsigned value)
{
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
if (value != 0xff) {
if (value != c->flag_value) {
elk_MOV(p, elk_flag_reg(0, 0), elk_imm_uw(value));
c->flag_value = value;
}
elk_set_default_predicate_control(p, ELK_PREDICATE_NORMAL);
}
}
static void elk_emit_tri_setup(struct elk_sf_compile *c, bool allocate)
{
struct elk_codegen *p = &c->func;
GLuint i;
c->flag_value = 0xff;
c->nr_verts = 3;
if (allocate)
alloc_regs(c);
invert_det(c);
copy_z_inv_w(c);
if (c->key.do_twoside_color)
do_twoside_color(c);
if (c->key.contains_flat_varying)
do_flatshade_triangle(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
/* Pair of incoming attributes:
*/
struct elk_reg a0 = offset(c->vert[0], i);
struct elk_reg a1 = offset(c->vert[1], i);
struct elk_reg a2 = offset(c->vert[2], i);
GLushort pc, pc_persp, pc_linear;
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
set_predicate_control_flag_value(p, c, pc_persp);
elk_MUL(p, a0, a0, c->inv_w[0]);
elk_MUL(p, a1, a1, c->inv_w[1]);
elk_MUL(p, a2, a2, c->inv_w[2]);
}
/* Calculate coefficients for interpolated values:
*/
if (pc_linear)
{
set_predicate_control_flag_value(p, c, pc_linear);
elk_ADD(p, c->a1_sub_a0, a1, negate(a0));
elk_ADD(p, c->a2_sub_a0, a2, negate(a0));
/* calculate dA/dx
*/
elk_MUL(p, elk_null_reg(), c->a1_sub_a0, c->dy2);
elk_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
elk_MUL(p, c->m1Cx, c->tmp, c->inv_det);
/* calculate dA/dy
*/
elk_MUL(p, elk_null_reg(), c->a2_sub_a0, c->dx0);
elk_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
elk_MUL(p, c->m2Cy, c->tmp, c->inv_det);
}
{
set_predicate_control_flag_value(p, c, pc);
/* start point for interpolation
*/
elk_MOV(p, c->m3C0, a0);
/* Copy m0..m3 to URB. m0 is implicitly copied from r0 in
* the send instruction:
*/
elk_urb_WRITE(p,
elk_null_reg(),
0,
elk_vec8_grf(0, 0), /* r0, will be copied to m0 */
last ? ELK_URB_WRITE_EOT_COMPLETE
: ELK_URB_WRITE_NO_FLAGS,
4, /* msg len */
0, /* response len */
i*4, /* offset */
ELK_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
}
}
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
}
static void elk_emit_line_setup(struct elk_sf_compile *c, bool allocate)
{
struct elk_codegen *p = &c->func;
GLuint i;
c->flag_value = 0xff;
c->nr_verts = 2;
if (allocate)
alloc_regs(c);
invert_det(c);
copy_z_inv_w(c);
if (c->key.contains_flat_varying)
do_flatshade_line(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
/* Pair of incoming attributes:
*/
struct elk_reg a0 = offset(c->vert[0], i);
struct elk_reg a1 = offset(c->vert[1], i);
GLushort pc, pc_persp, pc_linear;
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
set_predicate_control_flag_value(p, c, pc_persp);
elk_MUL(p, a0, a0, c->inv_w[0]);
elk_MUL(p, a1, a1, c->inv_w[1]);
}
/* Calculate coefficients for position, color:
*/
if (pc_linear) {
set_predicate_control_flag_value(p, c, pc_linear);
elk_ADD(p, c->a1_sub_a0, a1, negate(a0));
elk_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
elk_MUL(p, c->m1Cx, c->tmp, c->inv_det);
elk_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
elk_MUL(p, c->m2Cy, c->tmp, c->inv_det);
}
{
set_predicate_control_flag_value(p, c, pc);
/* start point for interpolation
*/
elk_MOV(p, c->m3C0, a0);
/* Copy m0..m3 to URB.
*/
elk_urb_WRITE(p,
elk_null_reg(),
0,
elk_vec8_grf(0, 0),
last ? ELK_URB_WRITE_EOT_COMPLETE
: ELK_URB_WRITE_NO_FLAGS,
4, /* msg len */
0, /* response len */
i*4, /* urb destination offset */
ELK_URB_SWIZZLE_TRANSPOSE);
}
}
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
}
static void elk_emit_point_sprite_setup(struct elk_sf_compile *c, bool allocate)
{
struct elk_codegen *p = &c->func;
GLuint i;
c->flag_value = 0xff;
c->nr_verts = 1;
if (allocate)
alloc_regs(c);
copy_z_inv_w(c);
for (i = 0; i < c->nr_setup_regs; i++)
{
struct elk_reg a0 = offset(c->vert[0], i);
GLushort pc, pc_persp, pc_linear, pc_coord_replace;
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
pc_coord_replace = calculate_point_sprite_mask(c, i);
pc_persp &= ~pc_coord_replace;
if (pc_persp) {
set_predicate_control_flag_value(p, c, pc_persp);
elk_MUL(p, a0, a0, c->inv_w[0]);
}
/* Point sprite coordinate replacement: A texcoord with this
* enabled gets replaced with the value (x, y, 0, 1) where x and
* y vary from 0 to 1 across the horizontal and vertical of the
* point.
*/
if (pc_coord_replace) {
set_predicate_control_flag_value(p, c, pc_coord_replace);
/* Calculate 1.0/PointWidth */
elk_gfx4_math(&c->func,
c->tmp,
ELK_MATH_FUNCTION_INV,
0,
c->dx0,
ELK_MATH_PRECISION_FULL);
elk_set_default_access_mode(p, ELK_ALIGN_16);
/* dA/dx, dA/dy */
elk_MOV(p, c->m1Cx, elk_imm_f(0.0));
elk_MOV(p, c->m2Cy, elk_imm_f(0.0));
elk_MOV(p, elk_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
if (c->key.sprite_origin_lower_left) {
elk_MOV(p, elk_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
} else {
elk_MOV(p, elk_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
}
/* attribute constant offset */
elk_MOV(p, c->m3C0, elk_imm_f(0.0));
if (c->key.sprite_origin_lower_left) {
elk_MOV(p, elk_writemask(c->m3C0, WRITEMASK_YW), elk_imm_f(1.0));
} else {
elk_MOV(p, elk_writemask(c->m3C0, WRITEMASK_W), elk_imm_f(1.0));
}
elk_set_default_access_mode(p, ELK_ALIGN_1);
}
if (pc & ~pc_coord_replace) {
set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
elk_MOV(p, c->m1Cx, elk_imm_ud(0));
elk_MOV(p, c->m2Cy, elk_imm_ud(0));
elk_MOV(p, c->m3C0, a0); /* constant value */
}
set_predicate_control_flag_value(p, c, pc);
/* Copy m0..m3 to URB. */
elk_urb_WRITE(p,
elk_null_reg(),
0,
elk_vec8_grf(0, 0),
last ? ELK_URB_WRITE_EOT_COMPLETE
: ELK_URB_WRITE_NO_FLAGS,
4, /* msg len */
0, /* response len */
i*4, /* urb destination offset */
ELK_URB_SWIZZLE_TRANSPOSE);
}
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
}
/* Points setup - several simplifications as all attributes are
* constant across the face of the point (point sprites excluded!)
*/
static void elk_emit_point_setup(struct elk_sf_compile *c, bool allocate)
{
struct elk_codegen *p = &c->func;
GLuint i;
c->flag_value = 0xff;
c->nr_verts = 1;
if (allocate)
alloc_regs(c);
copy_z_inv_w(c);
elk_MOV(p, c->m1Cx, elk_imm_ud(0)); /* zero - move out of loop */
elk_MOV(p, c->m2Cy, elk_imm_ud(0)); /* zero - move out of loop */
for (i = 0; i < c->nr_setup_regs; i++)
{
struct elk_reg a0 = offset(c->vert[0], i);
GLushort pc, pc_persp, pc_linear;
bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
if (pc_persp)
{
/* This seems odd as the values are all constant, but the
* fragment shader will be expecting it:
*/
set_predicate_control_flag_value(p, c, pc_persp);
elk_MUL(p, a0, a0, c->inv_w[0]);
}
/* The delta values are always zero, just send the starting
* coordinate. Again, this is to fit in with the interpolation
* code in the fragment shader.
*/
{
set_predicate_control_flag_value(p, c, pc);
elk_MOV(p, c->m3C0, a0); /* constant value */
/* Copy m0..m3 to URB.
*/
elk_urb_WRITE(p,
elk_null_reg(),
0,
elk_vec8_grf(0, 0),
last ? ELK_URB_WRITE_EOT_COMPLETE
: ELK_URB_WRITE_NO_FLAGS,
4, /* msg len */
0, /* response len */
i*4, /* urb destination offset */
ELK_URB_SWIZZLE_TRANSPOSE);
}
}
elk_set_default_predicate_control(p, ELK_PREDICATE_NONE);
}
static void elk_emit_anyprim_setup( struct elk_sf_compile *c )
{
struct elk_codegen *p = &c->func;
struct elk_reg payload_prim = elk_uw1_reg(ELK_GENERAL_REGISTER_FILE, 1, 0);
struct elk_reg payload_attr = get_element_ud(elk_vec1_reg(ELK_GENERAL_REGISTER_FILE, 1, 0), 0);
struct elk_reg primmask;
int jmp;
struct elk_reg v1_null_ud = vec1(retype(elk_null_reg(), ELK_REGISTER_TYPE_UD));
c->nr_verts = 3;
alloc_regs(c);
primmask = retype(get_element(c->tmp, 0), ELK_REGISTER_TYPE_UD);
elk_MOV(p, primmask, elk_imm_ud(1));
elk_SHL(p, primmask, primmask, payload_prim);
elk_AND(p, v1_null_ud, primmask, elk_imm_ud((1<<_3DPRIM_TRILIST) |
(1<<_3DPRIM_TRISTRIP) |
(1<<_3DPRIM_TRIFAN) |
(1<<_3DPRIM_TRISTRIP_REVERSE) |
(1<<_3DPRIM_POLYGON) |
(1<<_3DPRIM_RECTLIST) |
(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
elk_emit_tri_setup(c, false);
elk_land_fwd_jump(p, jmp);
elk_AND(p, v1_null_ud, primmask, elk_imm_ud((1<<_3DPRIM_LINELIST) |
(1<<_3DPRIM_LINESTRIP) |
(1<<_3DPRIM_LINELOOP) |
(1<<_3DPRIM_LINESTRIP_CONT) |
(1<<_3DPRIM_LINESTRIP_BF) |
(1<<_3DPRIM_LINESTRIP_CONT_BF)));
elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
elk_emit_line_setup(c, false);
elk_land_fwd_jump(p, jmp);
elk_AND(p, v1_null_ud, payload_attr, elk_imm_ud(1<<ELK_SPRITE_POINT_ENABLE));
elk_inst_set_cond_modifier(p->devinfo, elk_last_inst, ELK_CONDITIONAL_Z);
jmp = elk_JMPI(p, elk_imm_d(0), ELK_PREDICATE_NORMAL) - p->store;
elk_emit_point_sprite_setup(c, false);
elk_land_fwd_jump(p, jmp);
elk_emit_point_setup( c, false );
}
const unsigned *
elk_compile_sf(const struct elk_compiler *compiler,
void *mem_ctx,
const struct elk_sf_prog_key *key,
struct elk_sf_prog_data *prog_data,
struct intel_vue_map *vue_map,
unsigned *final_assembly_size)
{
struct elk_sf_compile c;
memset(&c, 0, sizeof(c));
/* Begin the compilation:
*/
elk_init_codegen(&compiler->isa, &c.func, mem_ctx);
c.key = *key;
c.vue_map = *vue_map;
if (c.key.do_point_coord) {
/*
* gl_PointCoord is a FS instead of VS builtin variable, thus it's
* not included in c.vue_map generated in VS stage. Here we add
* it manually to let SF shader generate the needed interpolation
* coefficient for FS shader.
*/
c.vue_map.varying_to_slot[ELK_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
c.vue_map.slot_to_varying[c.vue_map.num_slots++] = ELK_VARYING_SLOT_PNTC;
}
c.urb_entry_read_offset = ELK_SF_URB_ENTRY_READ_OFFSET;
c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
c.nr_setup_regs = c.nr_attr_regs;
c.prog_data.urb_read_length = c.nr_attr_regs;
c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
/* Which primitive? Or all three?
*/
switch (key->primitive) {
case ELK_SF_PRIM_TRIANGLES:
c.nr_verts = 3;
elk_emit_tri_setup( &c, true );
break;
case ELK_SF_PRIM_LINES:
c.nr_verts = 2;
elk_emit_line_setup( &c, true );
break;
case ELK_SF_PRIM_POINTS:
c.nr_verts = 1;
if (key->do_point_sprite)
elk_emit_point_sprite_setup( &c, true );
else
elk_emit_point_setup( &c, true );
break;
case ELK_SF_PRIM_UNFILLED_TRIS:
c.nr_verts = 3;
elk_emit_anyprim_setup( &c );
break;
default:
unreachable("not reached");
}
/* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
* source). Compacting would be difficult.
*/
/* elk_compact_instructions(&c.func, 0, 0, NULL); */
*prog_data = c.prog_data;
const unsigned *program = elk_get_program(&c.func, final_assembly_size);
if (INTEL_DEBUG(DEBUG_SF)) {
fprintf(stderr, "sf:\n");
elk_disassemble_with_labels(&compiler->isa,
program, 0, *final_assembly_size, stderr);
fprintf(stderr, "\n");
}
return program;
}