blob: a1f443471f469875a93ce00bd2118f2930ced0b0 [file] [log] [blame]
/*
* XGL
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* LunarG
*/
#include "gpu.h"
#include "shader.h"
#include "pipeline.h"
#include "compiler/shader/compiler_interface.h"
#include "compiler/pipeline/pipeline_compiler_interface.h"
#include "compiler/pipeline/brw_context.h"
#include "compiler/pipeline/brw_shader.h"
#include "compiler/mesa-utils/src/mesa/main/context.h"
#include "compiler/mesa-utils/src/glsl/ralloc.h"
#include "compiler/pipeline/brw_device_info.h"
#include "compiler/pipeline/brw_wm.h"
static void initialize_brw_context(struct brw_context *brw,
const struct intel_gpu *gpu)
{
// create a stripped down context for compilation
initialize_mesa_context_to_defaults(&brw->ctx);
//
// init the things pulled from DRI in brwCreateContext
//
struct brw_device_info *devInfo = rzalloc(brw, struct brw_device_info);
switch (intel_gpu_gen(gpu)) {
case INTEL_GEN(7.5):
devInfo->gen = 7;
devInfo->is_haswell = true;
break;
case INTEL_GEN(7):
devInfo->gen = 7;
break;
case INTEL_GEN(6):
devInfo->gen = 6;
break;
default:
assert(!"unsupported GEN");
break;
}
devInfo->gt = gpu->gt;
devInfo->has_llc = true;
devInfo->has_pln = true;
devInfo->has_compr4 = true;
devInfo->has_negative_rhw_bug = false;
devInfo->needs_unlit_centroid_workaround = true;
// hand code values until we have something to pull from
// use brw_device_info_hsw_gt3
brw->intelScreen = rzalloc(brw, struct intel_screen);
brw->intelScreen->devinfo = devInfo;
brw->gen = brw->intelScreen->devinfo->gen;
brw->gt = brw->intelScreen->devinfo->gt;
brw->is_g4x = brw->intelScreen->devinfo->is_g4x;
brw->is_baytrail = brw->intelScreen->devinfo->is_baytrail;
brw->is_haswell = brw->intelScreen->devinfo->is_haswell;
brw->has_llc = brw->intelScreen->devinfo->has_llc;
brw->has_pln = brw->intelScreen->devinfo->has_pln;
brw->has_compr4 = brw->intelScreen->devinfo->has_compr4;
brw->has_negative_rhw_bug = brw->intelScreen->devinfo->has_negative_rhw_bug;
brw->needs_unlit_centroid_workaround =
brw->intelScreen->devinfo->needs_unlit_centroid_workaround;
brw->vs.base.stage = MESA_SHADER_VERTEX;
brw->gs.base.stage = MESA_SHADER_GEOMETRY;
brw->wm.base.stage = MESA_SHADER_FRAGMENT;
//
// init what remains of intel_screen
//
brw->intelScreen->deviceID = 0;
brw->intelScreen->program_id = 0;
brw_vec4_alloc_reg_set(brw->intelScreen);
brw->shader_prog = brw_new_shader_program(&brw->ctx, 0);
}
static inline void pipeline_destroy_compile(struct brw_context *brw) {
ralloc_free(brw->shader_prog);
ralloc_free(brw);
}
static void hexdump(FILE *fp, void *ptr, int buflen) {
unsigned int *buf = (unsigned int*)ptr;
int i, j;
for (i=0; i<(buflen/4); i+=4) {
fprintf(fp,"%06x: ", i);
for (j=0; j<4; j++)
if (i+j < (buflen/4))
fprintf(fp,"%08x ", buf[i+j]);
else
fprintf(fp," ");
fprintf(fp,"\n");
}
fflush(fp);
}
static void base_prog_dump(FILE *fp, struct brw_stage_prog_data* base)
{
fprintf(fp, "data->base.binding_table.size_bytes = %u\n",
base->binding_table.size_bytes);
fprintf(fp, "data->base.binding_table.pull_constants_start = %u\n",
base->binding_table.pull_constants_start);
fprintf(fp, "data->base.binding_table.texture_start = %u\n",
base->binding_table.texture_start);
fprintf(fp, "data->base.binding_table.gather_texture_start = %u\n",
base->binding_table.gather_texture_start);
fprintf(fp, "data->base.binding_table.ubo_start = %u\n",
base->binding_table.ubo_start);
fprintf(fp, "data->base.binding_table.abo_start = %u\n",
base->binding_table.abo_start);
fprintf(fp, "data->base.binding_table.shader_time_start = %u\n",
base->binding_table.shader_time_start);
fprintf(fp, "data->base.nr_params = %u\n",
base->nr_params);
fprintf(fp, "data->base.nr_pull_params = %u\n",
base->nr_pull_params);
fprintf(fp, "== push constants: ==\n");
fprintf(fp, "data->base.nr_params = %u\n",
base->nr_params);
for (int i = 0; i < base->nr_params; ++i) {
fprintf(fp, "data->base.param = %p\n",
base->param);
fprintf(fp, "*data->base.param = %p\n",
*base->param);
fprintf(fp, "**data->base.param = %f\n",
**base->param);
}
fprintf(fp, "== pull constants: ==\n");
fprintf(fp, "data->base.nr_pull_params = %u\n",
base->nr_pull_params);
for (int i = 0; i < base->nr_pull_params; ++i) {
fprintf(fp, "data->base.pull_param = %p\n",
base->pull_param);
fprintf(fp, "*data->base.pull_param = %p\n",
*base->pull_param);
fprintf(fp, "**data->base.pull_param = %f\n",
**base->pull_param);
}
}
static void vs_data_dump(FILE *fp, struct brw_vs_prog_data *data)
{
fprintf(fp, "\n=== begin brw_vs_prog_data ===\n");
base_prog_dump(fp, &data->base.base);
fprintf(fp, "data->base.vue_map.slots_valid = 0x%" PRIX64 "\n",
data->base.vue_map.slots_valid);
for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
fprintf(fp, "data->base.vue_map.varying_to_slot[%i] = %i\n", i,
(int) data->base.vue_map.varying_to_slot[i]);
for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
fprintf(fp, "data->base.vue_map.slot_to_varying[%i] = %i\n", i,
(int) data->base.vue_map.slot_to_varying[i]);
fprintf(fp, "data->base.vue_map.num_slots = %i\n",
data->base.vue_map.num_slots);
fprintf(fp, "data->base.dispatch_grf_start_reg = %u\n",
data->base.dispatch_grf_start_reg);
fprintf(fp, "data->base.curb_read_length = %u\n",
data->base.curb_read_length);
fprintf(fp, "data->base.urb_read_length = %u\n",
data->base.urb_read_length);
fprintf(fp, "data->base.total_grf = %u\n",
data->base.total_grf);
fprintf(fp, "data->base.total_scratch = %u\n",
data->base.total_scratch);
fprintf(fp, "data->base.urb_entry_size = %u\n",
data->base.urb_entry_size);
fprintf(fp, "data->inputs_read = 0x%" PRIX64 "\n",
data->inputs_read);
fprintf(fp, "data->uses_vertexid = %s\n",
data->uses_vertexid ? "true" : "false");
fprintf(fp, "data->uses_instanceid = %s\n",
data->uses_instanceid ? "true" : "false");
fprintf(fp, "=== end brw_vs_prog_data ===\n");
fflush(fp);
}
static void fs_data_dump(FILE *fp, struct brw_wm_prog_data* data)
{
fprintf(fp, "\n=== begin brw_wm_prog_data ===\n");
base_prog_dump(fp, &data->base);
fprintf(fp, "data->curb_read_length = %u\n",
data->curb_read_length);
fprintf(fp, "data->num_varying_inputs = %u\n",
data->num_varying_inputs);
fprintf(fp, "data->first_curbe_grf = %u\n",
data->first_curbe_grf);
fprintf(fp, "data->first_curbe_grf_16 = %u\n",
data->first_curbe_grf_16);
fprintf(fp, "data->reg_blocks = %u\n",
data->reg_blocks);
fprintf(fp, "data->reg_blocks_16 = %u\n",
data->reg_blocks_16);
fprintf(fp, "data->total_scratch = %u\n",
data->total_scratch);
fprintf(fp, "data->binding_table.render_target_start = %u\n",
data->binding_table.render_target_start);
fprintf(fp, "data->dual_src_blend = %s\n",
data->dual_src_blend ? "true" : "false");
fprintf(fp, "data->uses_pos_offset = %s\n",
data->uses_pos_offset ? "true" : "false");
fprintf(fp, "data->uses_omask = %s\n",
data->uses_omask ? "true" : "false");
fprintf(fp, "data->prog_offset_16 = %u\n",
data->prog_offset_16);
fprintf(fp, "data->barycentric_interp_modes = %u\n",
data->barycentric_interp_modes);
for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
fprintf(fp, "data->urb_setup[%i] = %i\n",
i, data->urb_setup[i]);
}
fprintf(fp, "=== end brw_wm_prog_data ===\n");
fflush(fp);
}
extern "C" {
struct brw_context *intel_create_brw_context(const struct intel_gpu *gpu)
{
// create a brw_context
struct brw_context *brw = rzalloc(NULL, struct brw_context);
// allocate sub structures on the stack
initialize_brw_context(brw, gpu);
return brw;
}
// invoke backend compiler to generate ISA and supporting data structures
XGL_RESULT intel_pipeline_shader_compile(struct intel_pipeline_shader *pipe_shader,
const struct intel_gpu *gpu,
const struct intel_ir *ir)
{
/* XXX how about constness? */
struct gl_shader_program *sh_prog = (struct gl_shader_program *) ir;
XGL_RESULT status = XGL_SUCCESS;
struct brw_context *brw = intel_create_brw_context(gpu);
// LunarG : TODO - should this have been set for us somewhere?
sh_prog->Type = sh_prog->Shaders[0]->Stage;
if (brw_link_shader(&brw->ctx, sh_prog)) {
// first take at standalone backend compile
switch(sh_prog->Shaders[0]->Type) {
case GL_VERTEX_SHADER:
{
pipe_shader->codeSize = get_vs_program_size(brw->shader_prog);
pipe_shader->pCode = icd_alloc(pipe_shader->codeSize, 0, XGL_SYSTEM_ALLOC_INTERNAL_SHADER);
if (!pipe_shader->pCode) {
status = XGL_ERROR_OUT_OF_MEMORY;
break;
}
// copy the ISA out of our compile context, it is about to poof away
memcpy(pipe_shader->pCode, get_vs_program(brw->shader_prog), pipe_shader->codeSize);
struct brw_vs_prog_data *data = get_vs_prog_data(brw->shader_prog);
if (data->uses_vertexid)
pipe_shader->uses |= INTEL_SHADER_USE_VID;
if (data->uses_instanceid)
pipe_shader->uses |= INTEL_SHADER_USE_IID;
uint32_t user_attr_read = 0;
for (int i=VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
if (data->inputs_read & BITFIELD64_BIT(i)) {
user_attr_read |= (1 << (i - VERT_ATTRIB_GENERIC0));
}
}
pipe_shader->user_attributes_read = user_attr_read;
pipe_shader->enable_user_clip = sh_prog->Vert.UsesClipDistance;
// These are really best guesses, and will require more work to
// understand as we turn on more features
pipe_shader->in_count = data->base.urb_read_length;// = 1;
pipe_shader->out_count = data->base.vue_map.num_slots;// = 2;
pipe_shader->urb_grf_start = data->base.dispatch_grf_start_reg;// = 1;
pipe_shader->surface_count = data->base.base.binding_table.size_bytes / 4;
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
printf("out_count: %d\n", pipe_shader->out_count);
vs_data_dump(stdout, data);
fprintf(stdout,"\nISA generated by compiler:\n");
fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
fflush(stdout);
}
}
break;
case GL_FRAGMENT_SHADER:
{
// Start pulling bits out of our compile result.
// see upload_ps_state() for references about what I believe each of these values are
// I would prefer to find a way to pull this data out without exposing
// the internals of the compiler, but it hasn't presented itself yet
pipe_shader->codeSize = get_wm_program_size(brw->shader_prog);
pipe_shader->pCode = icd_alloc(pipe_shader->codeSize, 0, XGL_SYSTEM_ALLOC_INTERNAL_SHADER);
if (!pipe_shader->pCode) {
status = XGL_ERROR_OUT_OF_MEMORY;
break;
}
// copy the ISA out of our compile context, it is about to poof away
memcpy(pipe_shader->pCode, get_wm_program(brw->shader_prog), pipe_shader->codeSize);
struct brw_wm_prog_data *data = get_wm_prog_data(brw->shader_prog);
pipe_shader->surface_count = data->base.binding_table.size_bytes / 4;
pipe_shader->urb_grf_start = data->first_curbe_grf;
pipe_shader->in_count = data->num_varying_inputs;
// Ensure this is 1:1, or create a converter
pipe_shader->barycentric_interps = data->barycentric_interp_modes;
struct brw_stage_state *stage_state = &brw->wm.base;
pipe_shader->sampler_count = stage_state->sampler_count;
// TODO - Figure out multiple FS outputs
pipe_shader->out_count = 1;
pipe_shader->uses; // ??
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
// print out the supporting structures generated by the BE compile:
fs_data_dump(stdout, data);
printf("in_count: %d\n", pipe_shader->in_count);
fprintf(stdout,"\nISA generated by compiler:\n");
fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
fflush(stdout);
}
}
break;
case GL_GEOMETRY_SHADER:
case GL_COMPUTE_SHADER:
default:
assert(0);
status = XGL_ERROR_BAD_PIPELINE_DATA;
}
} else {
assert(0);
status = XGL_ERROR_BAD_PIPELINE_DATA;
}
pipeline_destroy_compile(brw);
return status;
}
void intel_disassemble_kernel(const struct intel_gpu *gpu,
const void *kernel, XGL_SIZE size)
{
struct brw_compile c;
memset(&c, 0, sizeof(c));
c.brw = intel_create_brw_context(gpu);
c.store = (struct brw_instruction *) kernel;
brw_dump_compile(&c, stderr, 0, size);
}
} // extern "C"