icd/intel/compiler/pipeline/pipeline_compiler_interface.cpp - third_party/vulkan_loader_and_validation_layers - Git at Google

 /*
  * XGL
  *
  * Copyright (C) 2014 LunarG, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included
  * in all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *   LunarG
  */

 #include "gpu.h"
 #include "shader.h"
 #include "pipeline.h"
 #include "compiler/shader/compiler_interface.h"
 #include "compiler/pipeline/pipeline_compiler_interface.h"
 #include "compiler/pipeline/brw_context.h"
 #include "compiler/pipeline/brw_shader.h"
 #include "compiler/mesa-utils/src/mesa/main/context.h"
 #include "compiler/mesa-utils/src/glsl/ralloc.h"
 #include "compiler/pipeline/brw_device_info.h"
 #include "compiler/pipeline/brw_wm.h"


 static void initialize_brw_context(struct brw_context *brw,
                                    const struct intel_gpu *gpu)
 {

     // create a stripped down context for compilation
     initialize_mesa_context_to_defaults(&brw->ctx);

     //
     // init the things pulled from DRI in brwCreateContext
     //
     struct brw_device_info *devInfo = rzalloc(brw, struct brw_device_info);
     switch (intel_gpu_gen(gpu)) {
     case INTEL_GEN(7.5):
         devInfo->gen = 7;
         devInfo->is_haswell = true;
         break;
     case INTEL_GEN(7):
         devInfo->gen = 7;
         break;
     case INTEL_GEN(6):
         devInfo->gen = 6;
         break;
     default:
         assert(!"unsupported GEN");
         break;
     }

     devInfo->gt = gpu->gt;
     devInfo->has_llc = true;
     devInfo->has_pln = true;
     devInfo->has_compr4 = true;
     devInfo->has_negative_rhw_bug = false;
     devInfo->needs_unlit_centroid_workaround = true;

     // hand code values until we have something to pull from
     // use brw_device_info_hsw_gt3
     brw->intelScreen = rzalloc(brw, struct intel_screen);
     brw->intelScreen->devinfo = devInfo;

     brw->gen = brw->intelScreen->devinfo->gen;
     brw->gt = brw->intelScreen->devinfo->gt;
     brw->is_g4x = brw->intelScreen->devinfo->is_g4x;
     brw->is_baytrail = brw->intelScreen->devinfo->is_baytrail;
     brw->is_haswell = brw->intelScreen->devinfo->is_haswell;
     brw->has_llc = brw->intelScreen->devinfo->has_llc;
     brw->has_pln = brw->intelScreen->devinfo->has_pln;
     brw->has_compr4 = brw->intelScreen->devinfo->has_compr4;
     brw->has_negative_rhw_bug = brw->intelScreen->devinfo->has_negative_rhw_bug;
     brw->needs_unlit_centroid_workaround =
        brw->intelScreen->devinfo->needs_unlit_centroid_workaround;

     brw->vs.base.stage = MESA_SHADER_VERTEX;
     brw->gs.base.stage = MESA_SHADER_GEOMETRY;
     brw->wm.base.stage = MESA_SHADER_FRAGMENT;

     //
     // init what remains of intel_screen
     //
     brw->intelScreen->deviceID = 0;
     brw->intelScreen->program_id = 0;

     brw_vec4_alloc_reg_set(brw->intelScreen);

     brw->shader_prog = brw_new_shader_program(&brw->ctx, 0);
 }

 static inline void pipeline_destroy_compile(struct brw_context *brw) {
     ralloc_free(brw->shader_prog);
     ralloc_free(brw);
 }

 static void hexdump(FILE *fp, void *ptr, int buflen) {
   unsigned int *buf = (unsigned int*)ptr;
   int i, j;
   for (i=0; i<(buflen/4); i+=4) {
     fprintf(fp,"%06x: ", i);
     for (j=0; j<4; j++)
       if (i+j < (buflen/4))
         fprintf(fp,"%08x ", buf[i+j]);
       else
         fprintf(fp,"   ");
     fprintf(fp,"\n");
   }

   fflush(fp);
 }

 static void base_prog_dump(FILE *fp, struct brw_stage_prog_data* base)
 {
     fprintf(fp, "data->base.binding_table.size_bytes = %u\n",
                  base->binding_table.size_bytes);
     fprintf(fp, "data->base.binding_table.pull_constants_start = %u\n",
                  base->binding_table.pull_constants_start);
     fprintf(fp, "data->base.binding_table.texture_start = %u\n",
                  base->binding_table.texture_start);
     fprintf(fp, "data->base.binding_table.gather_texture_start = %u\n",
                  base->binding_table.gather_texture_start);
     fprintf(fp, "data->base.binding_table.ubo_start = %u\n",
                  base->binding_table.ubo_start);
     fprintf(fp, "data->base.binding_table.abo_start = %u\n",
                  base->binding_table.abo_start);
     fprintf(fp, "data->base.binding_table.shader_time_start = %u\n",
                  base->binding_table.shader_time_start);

     fprintf(fp, "data->base.nr_params = %u\n",
                  base->nr_params);
     fprintf(fp, "data->base.nr_pull_params = %u\n",
                  base->nr_pull_params);

     fprintf(fp, "== push constants: ==\n");
     fprintf(fp, "data->base.nr_params = %u\n",
                  base->nr_params);

     for (int i = 0; i < base->nr_params; ++i) {
         fprintf(fp, "data->base.param = %p\n",
                      base->param);
         fprintf(fp, "*data->base.param = %p\n",
                      *base->param);
         fprintf(fp, "**data->base.param = %f\n",
                      **base->param);
     }

     fprintf(fp, "== pull constants: ==\n");
     fprintf(fp, "data->base.nr_pull_params = %u\n",
                  base->nr_pull_params);

     for (int i = 0; i < base->nr_pull_params; ++i) {
         fprintf(fp, "data->base.pull_param = %p\n",
                      base->pull_param);
         fprintf(fp, "*data->base.pull_param = %p\n",
                      *base->pull_param);
         fprintf(fp, "**data->base.pull_param = %f\n",
                      **base->pull_param);
     }
 }

 static void vs_data_dump(FILE *fp, struct brw_vs_prog_data *data)
 {
     fprintf(fp, "\n=== begin brw_vs_prog_data ===\n");

     base_prog_dump(fp, &data->base.base);

     fprintf(fp, "data->base.vue_map.slots_valid = 0x%" PRIX64 "\n",
                  data->base.vue_map.slots_valid);

     for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
         fprintf(fp, "data->base.vue_map.varying_to_slot[%i] = %i\n", i,
                (int) data->base.vue_map.varying_to_slot[i]);

     for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
         fprintf(fp, "data->base.vue_map.slot_to_varying[%i] = %i\n", i,
                (int) data->base.vue_map.slot_to_varying[i]);

     fprintf(fp, "data->base.vue_map.num_slots = %i\n",
                  data->base.vue_map.num_slots);
     fprintf(fp, "data->base.dispatch_grf_start_reg = %u\n",
                  data->base.dispatch_grf_start_reg);
     fprintf(fp, "data->base.curb_read_length = %u\n",
                  data->base.curb_read_length);
     fprintf(fp, "data->base.urb_read_length = %u\n",
                  data->base.urb_read_length);
     fprintf(fp, "data->base.total_grf = %u\n",
                  data->base.total_grf);
     fprintf(fp, "data->base.total_scratch = %u\n",
                  data->base.total_scratch);
     fprintf(fp, "data->base.urb_entry_size = %u\n",
                  data->base.urb_entry_size);

     fprintf(fp, "data->inputs_read = 0x%" PRIX64 "\n",
                  data->inputs_read);
     fprintf(fp, "data->uses_vertexid = %s\n",
                  data->uses_vertexid ? "true" : "false");
     fprintf(fp, "data->uses_instanceid = %s\n",
                  data->uses_instanceid ? "true" : "false");

     fprintf(fp, "=== end brw_vs_prog_data ===\n");

     fflush(fp);
 }

 static void fs_data_dump(FILE *fp, struct brw_wm_prog_data* data)
 {
     fprintf(fp, "\n=== begin brw_wm_prog_data ===\n");

     base_prog_dump(fp, &data->base);

     fprintf(fp, "data->curb_read_length = %u\n",
                  data->curb_read_length);
     fprintf(fp, "data->num_varying_inputs = %u\n",
                  data->num_varying_inputs);

     fprintf(fp, "data->first_curbe_grf = %u\n",
                  data->first_curbe_grf);
     fprintf(fp, "data->first_curbe_grf_16 = %u\n",
                  data->first_curbe_grf_16);
     fprintf(fp, "data->reg_blocks = %u\n",
                  data->reg_blocks);
     fprintf(fp, "data->reg_blocks_16 = %u\n",
                  data->reg_blocks_16);
     fprintf(fp, "data->total_scratch = %u\n",
                  data->total_scratch);
     fprintf(fp, "data->binding_table.render_target_start = %u\n",
                  data->binding_table.render_target_start);

     fprintf(fp, "data->dual_src_blend = %s\n",
                  data->dual_src_blend ? "true" : "false");
     fprintf(fp, "data->uses_pos_offset = %s\n",
                  data->uses_pos_offset ? "true" : "false");
     fprintf(fp, "data->uses_omask = %s\n",
                  data->uses_omask ? "true" : "false");
     fprintf(fp, "data->prog_offset_16 = %u\n",
                  data->prog_offset_16);

     fprintf(fp, "data->barycentric_interp_modes = %u\n",
                  data->barycentric_interp_modes);

     for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
         fprintf(fp, "data->urb_setup[%i] = %i\n",
                   i, data->urb_setup[i]);
     }

     fprintf(fp, "=== end brw_wm_prog_data ===\n");

     fflush(fp);
 }

 extern "C" {

 struct brw_context *intel_create_brw_context(const struct intel_gpu *gpu)
 {
     // create a brw_context
     struct brw_context *brw = rzalloc(NULL, struct brw_context);

     // allocate sub structures on the stack
     initialize_brw_context(brw, gpu);

     return brw;
 }

 // invoke backend compiler to generate ISA and supporting data structures
 XGL_RESULT intel_pipeline_shader_compile(struct intel_pipeline_shader *pipe_shader,
                                          const struct intel_gpu *gpu,
                                          const struct intel_ir *ir)
 {
     /* XXX how about constness? */
     struct gl_shader_program *sh_prog = (struct gl_shader_program *) ir;
     XGL_RESULT status = XGL_SUCCESS;

     struct brw_context *brw = intel_create_brw_context(gpu);

     // LunarG : TODO - should this have been set for us somewhere?
     sh_prog->Type = sh_prog->Shaders[0]->Stage;

     if (brw_link_shader(&brw->ctx, sh_prog)) {

         // first take at standalone backend compile
         switch(sh_prog->Shaders[0]->Type) {
         case GL_VERTEX_SHADER:
         {
             pipe_shader->codeSize = get_vs_program_size(brw->shader_prog);

             pipe_shader->pCode = icd_alloc(pipe_shader->codeSize, 0, XGL_SYSTEM_ALLOC_INTERNAL_SHADER);
             if (!pipe_shader->pCode) {
                 status = XGL_ERROR_OUT_OF_MEMORY;
                 break;
             }

             // copy the ISA out of our compile context, it is about to poof away
             memcpy(pipe_shader->pCode, get_vs_program(brw->shader_prog), pipe_shader->codeSize);

             struct brw_vs_prog_data *data = get_vs_prog_data(brw->shader_prog);

             if (data->uses_vertexid)
                 pipe_shader->uses |= INTEL_SHADER_USE_VID;

             if (data->uses_instanceid)
                 pipe_shader->uses |= INTEL_SHADER_USE_IID;

             uint32_t user_attr_read = 0;
             for (int i=VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
                 if (data->inputs_read & BITFIELD64_BIT(i)) {
                     user_attr_read |= (1 << (i - VERT_ATTRIB_GENERIC0));
                 }
             }
             pipe_shader->user_attributes_read = user_attr_read;

             pipe_shader->enable_user_clip = sh_prog->Vert.UsesClipDistance;

             // These are really best guesses, and will require more work to
             // understand as we turn on more features
             pipe_shader->in_count = data->base.urb_read_length;// = 1;
             pipe_shader->out_count = data->base.vue_map.num_slots;// = 2;
             pipe_shader->urb_grf_start = data->base.dispatch_grf_start_reg;// = 1;
             pipe_shader->surface_count = data->base.base.binding_table.size_bytes / 4;

             if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
                 printf("out_count: %d\n", pipe_shader->out_count);

                 vs_data_dump(stdout, data);

                 fprintf(stdout,"\nISA generated by compiler:\n");
                 fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
                 hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
                 fflush(stdout);
             }
         }
             break;

         case GL_FRAGMENT_SHADER:
         {
             // Start pulling bits out of our compile result.
             // see upload_ps_state() for references about what I believe each of these values are

             // I would prefer to find a way to pull this data out without exposing
             // the internals of the compiler, but it hasn't presented itself yet

             pipe_shader->codeSize = get_wm_program_size(brw->shader_prog);

             pipe_shader->pCode = icd_alloc(pipe_shader->codeSize, 0, XGL_SYSTEM_ALLOC_INTERNAL_SHADER);
             if (!pipe_shader->pCode) {
                 status = XGL_ERROR_OUT_OF_MEMORY;
                 break;
             }

             // copy the ISA out of our compile context, it is about to poof away
             memcpy(pipe_shader->pCode, get_wm_program(brw->shader_prog), pipe_shader->codeSize);

             struct brw_wm_prog_data *data = get_wm_prog_data(brw->shader_prog);

             pipe_shader->surface_count = data->base.binding_table.size_bytes / 4;
             pipe_shader->urb_grf_start = data->first_curbe_grf;
             pipe_shader->in_count      = data->num_varying_inputs;

             // Ensure this is 1:1, or create a converter
             pipe_shader->barycentric_interps = data->barycentric_interp_modes;

             struct brw_stage_state *stage_state = &brw->wm.base;
             pipe_shader->sampler_count = stage_state->sampler_count;

             // TODO - Figure out multiple FS outputs
             pipe_shader->out_count = 1;
             pipe_shader->uses; // ??

             if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
                 // print out the supporting structures generated by the BE compile:
                 fs_data_dump(stdout, data);

                 printf("in_count: %d\n", pipe_shader->in_count);

                 fprintf(stdout,"\nISA generated by compiler:\n");
                 fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
                 hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
                 fflush(stdout);
             }
         }
             break;

         case GL_GEOMETRY_SHADER:
         case GL_COMPUTE_SHADER:
         default:
             assert(0);
             status = XGL_ERROR_BAD_PIPELINE_DATA;
         }
     } else {
         assert(0);
         status = XGL_ERROR_BAD_PIPELINE_DATA;
     }

     pipeline_destroy_compile(brw);

     return status;
 }

 void intel_disassemble_kernel(const struct intel_gpu *gpu,
                               const void *kernel, XGL_SIZE size)
 {
     struct brw_compile c;

     memset(&c, 0, sizeof(c));
     c.brw = intel_create_brw_context(gpu);
     c.store = (struct brw_instruction *) kernel;

     brw_dump_compile(&c, stderr, 0, size);
 }

 } // extern "C"
	/*
	* XGL
	*
	* Copyright (C) 2014 LunarG, Inc.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included
	* in all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	* DEALINGS IN THE SOFTWARE.
	*
	* Authors:
	* LunarG
	*/

	#include "gpu.h"
	#include "shader.h"
	#include "pipeline.h"
	#include "compiler/shader/compiler_interface.h"
	#include "compiler/pipeline/pipeline_compiler_interface.h"
	#include "compiler/pipeline/brw_context.h"
	#include "compiler/pipeline/brw_shader.h"
	#include "compiler/mesa-utils/src/mesa/main/context.h"
	#include "compiler/mesa-utils/src/glsl/ralloc.h"
	#include "compiler/pipeline/brw_device_info.h"
	#include "compiler/pipeline/brw_wm.h"


	static void initialize_brw_context(struct brw_context *brw,
	const struct intel_gpu *gpu)
	{

	// create a stripped down context for compilation
	initialize_mesa_context_to_defaults(&brw->ctx);

	//
	// init the things pulled from DRI in brwCreateContext
	//
	struct brw_device_info *devInfo = rzalloc(brw, struct brw_device_info);
	switch (intel_gpu_gen(gpu)) {
	case INTEL_GEN(7.5):
	devInfo->gen = 7;
	devInfo->is_haswell = true;
	break;
	case INTEL_GEN(7):
	devInfo->gen = 7;
	break;
	case INTEL_GEN(6):
	devInfo->gen = 6;
	break;
	default:
	assert(!"unsupported GEN");
	break;
	}

	devInfo->gt = gpu->gt;
	devInfo->has_llc = true;
	devInfo->has_pln = true;
	devInfo->has_compr4 = true;
	devInfo->has_negative_rhw_bug = false;
	devInfo->needs_unlit_centroid_workaround = true;

	// hand code values until we have something to pull from
	// use brw_device_info_hsw_gt3
	brw->intelScreen = rzalloc(brw, struct intel_screen);
	brw->intelScreen->devinfo = devInfo;

	brw->gen = brw->intelScreen->devinfo->gen;
	brw->gt = brw->intelScreen->devinfo->gt;
	brw->is_g4x = brw->intelScreen->devinfo->is_g4x;
	brw->is_baytrail = brw->intelScreen->devinfo->is_baytrail;
	brw->is_haswell = brw->intelScreen->devinfo->is_haswell;
	brw->has_llc = brw->intelScreen->devinfo->has_llc;
	brw->has_pln = brw->intelScreen->devinfo->has_pln;
	brw->has_compr4 = brw->intelScreen->devinfo->has_compr4;
	brw->has_negative_rhw_bug = brw->intelScreen->devinfo->has_negative_rhw_bug;
	brw->needs_unlit_centroid_workaround =
	brw->intelScreen->devinfo->needs_unlit_centroid_workaround;

	brw->vs.base.stage = MESA_SHADER_VERTEX;
	brw->gs.base.stage = MESA_SHADER_GEOMETRY;
	brw->wm.base.stage = MESA_SHADER_FRAGMENT;

	//
	// init what remains of intel_screen
	//
	brw->intelScreen->deviceID = 0;
	brw->intelScreen->program_id = 0;

	brw_vec4_alloc_reg_set(brw->intelScreen);

	brw->shader_prog = brw_new_shader_program(&brw->ctx, 0);
	}

	static inline void pipeline_destroy_compile(struct brw_context *brw) {
	ralloc_free(brw->shader_prog);
	ralloc_free(brw);
	}

	static void hexdump(FILE fp, void ptr, int buflen) {
	unsigned int buf = (unsigned int)ptr;
	int i, j;
	for (i=0; i<(buflen/4); i+=4) {
	fprintf(fp,"%06x: ", i);
	for (j=0; j<4; j++)
	if (i+j < (buflen/4))
	fprintf(fp,"%08x ", buf[i+j]);
	else
	fprintf(fp," ");
	fprintf(fp,"\n");
	}

	fflush(fp);
	}

	static void base_prog_dump(FILE fp, struct brw_stage_prog_data base)
	{
	fprintf(fp, "data->base.binding_table.size_bytes = %u\n",
	base->binding_table.size_bytes);
	fprintf(fp, "data->base.binding_table.pull_constants_start = %u\n",
	base->binding_table.pull_constants_start);
	fprintf(fp, "data->base.binding_table.texture_start = %u\n",
	base->binding_table.texture_start);
	fprintf(fp, "data->base.binding_table.gather_texture_start = %u\n",
	base->binding_table.gather_texture_start);
	fprintf(fp, "data->base.binding_table.ubo_start = %u\n",
	base->binding_table.ubo_start);
	fprintf(fp, "data->base.binding_table.abo_start = %u\n",
	base->binding_table.abo_start);
	fprintf(fp, "data->base.binding_table.shader_time_start = %u\n",
	base->binding_table.shader_time_start);

	fprintf(fp, "data->base.nr_params = %u\n",
	base->nr_params);
	fprintf(fp, "data->base.nr_pull_params = %u\n",
	base->nr_pull_params);

	fprintf(fp, "== push constants: ==\n");
	fprintf(fp, "data->base.nr_params = %u\n",
	base->nr_params);

	for (int i = 0; i < base->nr_params; ++i) {
	fprintf(fp, "data->base.param = %p\n",
	base->param);
	fprintf(fp, "*data->base.param = %p\n",
	*base->param);
	fprintf(fp, "**data->base.param = %f\n",
	**base->param);
	}

	fprintf(fp, "== pull constants: ==\n");
	fprintf(fp, "data->base.nr_pull_params = %u\n",
	base->nr_pull_params);

	for (int i = 0; i < base->nr_pull_params; ++i) {
	fprintf(fp, "data->base.pull_param = %p\n",
	base->pull_param);
	fprintf(fp, "*data->base.pull_param = %p\n",
	*base->pull_param);
	fprintf(fp, "**data->base.pull_param = %f\n",
	**base->pull_param);
	}
	}

	static void vs_data_dump(FILE fp, struct brw_vs_prog_data data)
	{
	fprintf(fp, "\n=== begin brw_vs_prog_data ===\n");

	base_prog_dump(fp, &data->base.base);

	fprintf(fp, "data->base.vue_map.slots_valid = 0x%" PRIX64 "\n",
	data->base.vue_map.slots_valid);

	for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
	fprintf(fp, "data->base.vue_map.varying_to_slot[%i] = %i\n", i,
	(int) data->base.vue_map.varying_to_slot[i]);

	for (int i = 0; i < BRW_VARYING_SLOT_COUNT; ++i)
	fprintf(fp, "data->base.vue_map.slot_to_varying[%i] = %i\n", i,
	(int) data->base.vue_map.slot_to_varying[i]);

	fprintf(fp, "data->base.vue_map.num_slots = %i\n",
	data->base.vue_map.num_slots);
	fprintf(fp, "data->base.dispatch_grf_start_reg = %u\n",
	data->base.dispatch_grf_start_reg);
	fprintf(fp, "data->base.curb_read_length = %u\n",
	data->base.curb_read_length);
	fprintf(fp, "data->base.urb_read_length = %u\n",
	data->base.urb_read_length);
	fprintf(fp, "data->base.total_grf = %u\n",
	data->base.total_grf);
	fprintf(fp, "data->base.total_scratch = %u\n",
	data->base.total_scratch);
	fprintf(fp, "data->base.urb_entry_size = %u\n",
	data->base.urb_entry_size);

	fprintf(fp, "data->inputs_read = 0x%" PRIX64 "\n",
	data->inputs_read);
	fprintf(fp, "data->uses_vertexid = %s\n",
	data->uses_vertexid ? "true" : "false");
	fprintf(fp, "data->uses_instanceid = %s\n",
	data->uses_instanceid ? "true" : "false");

	fprintf(fp, "=== end brw_vs_prog_data ===\n");

	fflush(fp);
	}

	static void fs_data_dump(FILE fp, struct brw_wm_prog_data data)
	{
	fprintf(fp, "\n=== begin brw_wm_prog_data ===\n");

	base_prog_dump(fp, &data->base);

	fprintf(fp, "data->curb_read_length = %u\n",
	data->curb_read_length);
	fprintf(fp, "data->num_varying_inputs = %u\n",
	data->num_varying_inputs);

	fprintf(fp, "data->first_curbe_grf = %u\n",
	data->first_curbe_grf);
	fprintf(fp, "data->first_curbe_grf_16 = %u\n",
	data->first_curbe_grf_16);
	fprintf(fp, "data->reg_blocks = %u\n",
	data->reg_blocks);
	fprintf(fp, "data->reg_blocks_16 = %u\n",
	data->reg_blocks_16);
	fprintf(fp, "data->total_scratch = %u\n",
	data->total_scratch);
	fprintf(fp, "data->binding_table.render_target_start = %u\n",
	data->binding_table.render_target_start);

	fprintf(fp, "data->dual_src_blend = %s\n",
	data->dual_src_blend ? "true" : "false");
	fprintf(fp, "data->uses_pos_offset = %s\n",
	data->uses_pos_offset ? "true" : "false");
	fprintf(fp, "data->uses_omask = %s\n",
	data->uses_omask ? "true" : "false");
	fprintf(fp, "data->prog_offset_16 = %u\n",
	data->prog_offset_16);

	fprintf(fp, "data->barycentric_interp_modes = %u\n",
	data->barycentric_interp_modes);

	for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
	fprintf(fp, "data->urb_setup[%i] = %i\n",
	i, data->urb_setup[i]);
	}

	fprintf(fp, "=== end brw_wm_prog_data ===\n");

	fflush(fp);
	}

	extern "C" {

	struct brw_context intel_create_brw_context(const struct intel_gpu gpu)
	{
	// create a brw_context
	struct brw_context *brw = rzalloc(NULL, struct brw_context);

	// allocate sub structures on the stack
	initialize_brw_context(brw, gpu);

	return brw;
	}

	// invoke backend compiler to generate ISA and supporting data structures
	XGL_RESULT intel_pipeline_shader_compile(struct intel_pipeline_shader *pipe_shader,
	const struct intel_gpu *gpu,
	const struct intel_ir *ir)
	{
	/* XXX how about constness? */
	struct gl_shader_program sh_prog = (struct gl_shader_program ) ir;
	XGL_RESULT status = XGL_SUCCESS;

	struct brw_context *brw = intel_create_brw_context(gpu);

	// LunarG : TODO - should this have been set for us somewhere?
	sh_prog->Type = sh_prog->Shaders[0]->Stage;

	if (brw_link_shader(&brw->ctx, sh_prog)) {

	// first take at standalone backend compile
	switch(sh_prog->Shaders[0]->Type) {
	case GL_VERTEX_SHADER:
	{
	pipe_shader->codeSize = get_vs_program_size(brw->shader_prog);

	pipe_shader->pCode = icd_alloc(pipe_shader->codeSize, 0, XGL_SYSTEM_ALLOC_INTERNAL_SHADER);
	if (!pipe_shader->pCode) {
	status = XGL_ERROR_OUT_OF_MEMORY;
	break;
	}

	// copy the ISA out of our compile context, it is about to poof away
	memcpy(pipe_shader->pCode, get_vs_program(brw->shader_prog), pipe_shader->codeSize);

	struct brw_vs_prog_data *data = get_vs_prog_data(brw->shader_prog);

	if (data->uses_vertexid)
	pipe_shader->uses \|= INTEL_SHADER_USE_VID;

	if (data->uses_instanceid)
	pipe_shader->uses \|= INTEL_SHADER_USE_IID;

	uint32_t user_attr_read = 0;
	for (int i=VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
	if (data->inputs_read & BITFIELD64_BIT(i)) {
	user_attr_read \|= (1 << (i - VERT_ATTRIB_GENERIC0));
	}
	}
	pipe_shader->user_attributes_read = user_attr_read;

	pipe_shader->enable_user_clip = sh_prog->Vert.UsesClipDistance;

	// These are really best guesses, and will require more work to
	// understand as we turn on more features
	pipe_shader->in_count = data->base.urb_read_length;// = 1;
	pipe_shader->out_count = data->base.vue_map.num_slots;// = 2;
	pipe_shader->urb_grf_start = data->base.dispatch_grf_start_reg;// = 1;
	pipe_shader->surface_count = data->base.base.binding_table.size_bytes / 4;

	if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	printf("out_count: %d\n", pipe_shader->out_count);

	vs_data_dump(stdout, data);

	fprintf(stdout,"\nISA generated by compiler:\n");
	fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
	hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
	fflush(stdout);
	}
	}
	break;

	case GL_FRAGMENT_SHADER:
	{
	// Start pulling bits out of our compile result.
	// see upload_ps_state() for references about what I believe each of these values are

	// I would prefer to find a way to pull this data out without exposing
	// the internals of the compiler, but it hasn't presented itself yet

	pipe_shader->codeSize = get_wm_program_size(brw->shader_prog);

	pipe_shader->pCode = icd_alloc(pipe_shader->codeSize, 0, XGL_SYSTEM_ALLOC_INTERNAL_SHADER);
	if (!pipe_shader->pCode) {
	status = XGL_ERROR_OUT_OF_MEMORY;
	break;
	}

	// copy the ISA out of our compile context, it is about to poof away
	memcpy(pipe_shader->pCode, get_wm_program(brw->shader_prog), pipe_shader->codeSize);

	struct brw_wm_prog_data *data = get_wm_prog_data(brw->shader_prog);

	pipe_shader->surface_count = data->base.binding_table.size_bytes / 4;
	pipe_shader->urb_grf_start = data->first_curbe_grf;
	pipe_shader->in_count = data->num_varying_inputs;

	// Ensure this is 1:1, or create a converter
	pipe_shader->barycentric_interps = data->barycentric_interp_modes;

	struct brw_stage_state *stage_state = &brw->wm.base;
	pipe_shader->sampler_count = stage_state->sampler_count;

	// TODO - Figure out multiple FS outputs
	pipe_shader->out_count = 1;
	pipe_shader->uses; // ??

	if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
	// print out the supporting structures generated by the BE compile:
	fs_data_dump(stdout, data);

	printf("in_count: %d\n", pipe_shader->in_count);

	fprintf(stdout,"\nISA generated by compiler:\n");
	fprintf(stdout,"ISA size: %i\n", pipe_shader->codeSize);
	hexdump(stdout, pipe_shader->pCode, pipe_shader->codeSize);
	fflush(stdout);
	}
	}
	break;

	case GL_GEOMETRY_SHADER:
	case GL_COMPUTE_SHADER:
	default:
	assert(0);
	status = XGL_ERROR_BAD_PIPELINE_DATA;
	}
	} else {
	assert(0);
	status = XGL_ERROR_BAD_PIPELINE_DATA;
	}

	pipeline_destroy_compile(brw);

	return status;
	}

	void intel_disassemble_kernel(const struct intel_gpu *gpu,
	const void *kernel, XGL_SIZE size)
	{
	struct brw_compile c;

	memset(&c, 0, sizeof(c));
	c.brw = intel_create_brw_context(gpu);
	c.store = (struct brw_instruction *) kernel;

	brw_dump_compile(&c, stderr, 0, size);
	}

	} // extern "C"