icd/intel/pipeline.c - third_party/VulkanTools - Git at Google

 /*
  *
  * Copyright (C) 2015-2016 Valve Corporation
  * Copyright (C) 2015-2016 LunarG, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  * Author: Chia-I Wu <olvaffe@gmail.com>
  * Author: Chia-I Wu <olv@lunarg.com>
  * Author: Courtney Goeltzenleuchter <courtney@LunarG.com>
  * Author: GregF <greg@LunarG.com>
  * Author: Tony Barbour <tony@LunarG.com>
  *
  */

 #include "genhw/genhw.h"
 #include "compiler/pipeline/pipeline_compiler_interface.h"
 #include "cmd.h"
 #include "format.h"
 #include "shader.h"
 #include "pipeline.h"
 #include "mem.h"

 static int translate_blend_func(VkBlendOp func)
 {
    switch (func) {
    case VK_BLEND_OP_ADD:                return GEN6_BLENDFUNCTION_ADD;
    case VK_BLEND_OP_SUBTRACT:           return GEN6_BLENDFUNCTION_SUBTRACT;
    case VK_BLEND_OP_REVERSE_SUBTRACT:   return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
    case VK_BLEND_OP_MIN:                return GEN6_BLENDFUNCTION_MIN;
    case VK_BLEND_OP_MAX:                return GEN6_BLENDFUNCTION_MAX;
    default:
       assert(!"unknown blend func");
       return GEN6_BLENDFUNCTION_ADD;
    };
 }

 static int translate_blend(VkBlendFactor blend)
 {
    switch (blend) {
    case VK_BLEND_FACTOR_ZERO:                     return GEN6_BLENDFACTOR_ZERO;
    case VK_BLEND_FACTOR_ONE:                      return GEN6_BLENDFACTOR_ONE;
    case VK_BLEND_FACTOR_SRC_COLOR:                return GEN6_BLENDFACTOR_SRC_COLOR;
    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:      return GEN6_BLENDFACTOR_INV_SRC_COLOR;
    case VK_BLEND_FACTOR_DST_COLOR:               return GEN6_BLENDFACTOR_DST_COLOR;
    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:     return GEN6_BLENDFACTOR_INV_DST_COLOR;
    case VK_BLEND_FACTOR_SRC_ALPHA:                return GEN6_BLENDFACTOR_SRC_ALPHA;
    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:      return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
    case VK_BLEND_FACTOR_DST_ALPHA:               return GEN6_BLENDFACTOR_DST_ALPHA;
    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:     return GEN6_BLENDFACTOR_INV_DST_ALPHA;
    case VK_BLEND_FACTOR_CONSTANT_COLOR:           return GEN6_BLENDFACTOR_CONST_COLOR;
    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
    case VK_BLEND_FACTOR_CONSTANT_ALPHA:           return GEN6_BLENDFACTOR_CONST_ALPHA;
    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:       return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
    case VK_BLEND_FACTOR_SRC1_COLOR:               return GEN6_BLENDFACTOR_SRC1_COLOR;
    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:     return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
    case VK_BLEND_FACTOR_SRC1_ALPHA:               return GEN6_BLENDFACTOR_SRC1_ALPHA;
    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:     return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
    default:
       assert(!"unknown blend factor");
       return GEN6_BLENDFACTOR_ONE;
    };
 }

 static int translate_compare_func(VkCompareOp func)
 {
     switch (func) {
     case VK_COMPARE_OP_NEVER:         return GEN6_COMPAREFUNCTION_NEVER;
     case VK_COMPARE_OP_LESS:          return GEN6_COMPAREFUNCTION_LESS;
     case VK_COMPARE_OP_EQUAL:         return GEN6_COMPAREFUNCTION_EQUAL;
     case VK_COMPARE_OP_LESS_OR_EQUAL:    return GEN6_COMPAREFUNCTION_LEQUAL;
     case VK_COMPARE_OP_GREATER:       return GEN6_COMPAREFUNCTION_GREATER;
     case VK_COMPARE_OP_NOT_EQUAL:     return GEN6_COMPAREFUNCTION_NOTEQUAL;
     case VK_COMPARE_OP_GREATER_OR_EQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
     case VK_COMPARE_OP_ALWAYS:        return GEN6_COMPAREFUNCTION_ALWAYS;
     default:
       assert(!"unknown compare_func");
       return GEN6_COMPAREFUNCTION_NEVER;
     }
 }

 static int translate_stencil_op(VkStencilOp op)
 {
     switch (op) {
     case VK_STENCIL_OP_KEEP:       return GEN6_STENCILOP_KEEP;
     case VK_STENCIL_OP_ZERO:       return GEN6_STENCILOP_ZERO;
     case VK_STENCIL_OP_REPLACE:    return GEN6_STENCILOP_REPLACE;
     case VK_STENCIL_OP_INCREMENT_AND_CLAMP:  return GEN6_STENCILOP_INCRSAT;
     case VK_STENCIL_OP_DECREMENT_AND_CLAMP:  return GEN6_STENCILOP_DECRSAT;
     case VK_STENCIL_OP_INVERT:     return GEN6_STENCILOP_INVERT;
     case VK_STENCIL_OP_INCREMENT_AND_WRAP:   return GEN6_STENCILOP_INCR;
     case VK_STENCIL_OP_DECREMENT_AND_WRAP:   return GEN6_STENCILOP_DECR;
     default:
       assert(!"unknown stencil op");
       return GEN6_STENCILOP_KEEP;
     }
 }

 static int translate_sample_count(VkSampleCountFlagBits samples)
 {
     switch (samples) {
     case VK_SAMPLE_COUNT_1_BIT:     return 1;
     case VK_SAMPLE_COUNT_2_BIT:     return 2;
     case VK_SAMPLE_COUNT_4_BIT:     return 4;
     case VK_SAMPLE_COUNT_8_BIT:     return 8;
     case VK_SAMPLE_COUNT_16_BIT:    return 16;
     case VK_SAMPLE_COUNT_32_BIT:    return 32;
     case VK_SAMPLE_COUNT_64_BIT:    return 64;
     default:
       assert(!"unknown sample count");
       return 1;
     }
 }

 struct intel_pipeline_create_info {
     VkFlags                                use_pipeline_dynamic_state;
     VkGraphicsPipelineCreateInfo           graphics;
     VkPipelineVertexInputStateCreateInfo   vi;
     VkPipelineInputAssemblyStateCreateInfo ia;
     VkPipelineDepthStencilStateCreateInfo  db;
     VkPipelineColorBlendStateCreateInfo    cb;
     VkPipelineRasterizationStateCreateInfo        rs;
     VkPipelineTessellationStateCreateInfo  tess;
     VkPipelineMultisampleStateCreateInfo   ms;
     VkPipelineViewportStateCreateInfo      vp;

     VkComputePipelineCreateInfo            compute;

     VkPipelineShaderStageCreateInfo        vs;
     VkPipelineShaderStageCreateInfo        tcs;
     VkPipelineShaderStageCreateInfo        tes;
     VkPipelineShaderStageCreateInfo        gs;
     VkPipelineShaderStageCreateInfo        fs;
 };

 /* in S1.3 */
 struct intel_pipeline_sample_position {
     int8_t x, y;
 };

 static uint8_t pack_sample_position(const struct intel_dev *dev,
                                     const struct intel_pipeline_sample_position *pos)
 {
     return (pos->x + 8) << 4 | (pos->y + 8);
 }

 void intel_pipeline_init_default_sample_patterns(const struct intel_dev *dev,
                                                  uint8_t *pat_1x, uint8_t *pat_2x,
                                                  uint8_t *pat_4x, uint8_t *pat_8x,
                                                  uint8_t *pat_16x)
 {
     static const struct intel_pipeline_sample_position default_1x[1] = {
         {  0,  0 },
     };
     static const struct intel_pipeline_sample_position default_2x[2] = {
         { -4, -4 },
         {  4,  4 },
     };
     static const struct intel_pipeline_sample_position default_4x[4] = {
         { -2, -6 },
         {  6, -2 },
         { -6,  2 },
         {  2,  6 },
     };
     static const struct intel_pipeline_sample_position default_8x[8] = {
         { -1,  1 },
         {  1,  5 },
         {  3, -5 },
         {  5,  3 },
         { -7, -1 },
         { -3, -7 },
         {  7, -3 },
         { -5,  7 },
     };
     static const struct intel_pipeline_sample_position default_16x[16] = {
         {  0,  2 },
         {  3,  0 },
         { -3, -2 },
         { -2, -4 },
         {  4,  3 },
         {  5,  1 },
         {  6, -1 },
         {  2, -6 },
         { -4,  5 },
         { -5, -5 },
         { -1, -7 },
         {  7, -3 },
         { -7,  4 },
         {  1, -8 },
         { -6,  6 },
         { -8,  7 },
     };
     int i;

     pat_1x[0] = pack_sample_position(dev, default_1x);
     for (i = 0; i < 2; i++)
         pat_2x[i] = pack_sample_position(dev, &default_2x[i]);
     for (i = 0; i < 4; i++)
         pat_4x[i] = pack_sample_position(dev, &default_4x[i]);
     for (i = 0; i < 8; i++)
         pat_8x[i] = pack_sample_position(dev, &default_8x[i]);
     for (i = 0; i < 16; i++)
         pat_16x[i] = pack_sample_position(dev, &default_16x[i]);
 }

 struct intel_pipeline_shader *intel_pipeline_shader_create_meta(struct intel_dev *dev,
                                                                 enum intel_dev_meta_shader id)
 {
     struct intel_pipeline_shader *sh;
     VkResult ret;

     sh = intel_alloc(dev, sizeof(*sh), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
     if (!sh)
         return NULL;
     memset(sh, 0, sizeof(*sh));

     ret = intel_pipeline_shader_compile_meta(sh, dev->gpu, id);
     if (ret != VK_SUCCESS) {
         intel_free(dev, sh);
         return NULL;
     }

     switch (id) {
     case INTEL_DEV_META_VS_FILL_MEM:
     case INTEL_DEV_META_VS_COPY_MEM:
     case INTEL_DEV_META_VS_COPY_MEM_UNALIGNED:
         sh->max_threads = intel_gpu_get_max_threads(dev->gpu,
                 VK_SHADER_STAGE_VERTEX_BIT);
         break;
     default:
         sh->max_threads = intel_gpu_get_max_threads(dev->gpu,
                 VK_SHADER_STAGE_FRAGMENT_BIT);
         break;
     }

     return sh;
 }

 void intel_pipeline_shader_destroy(struct intel_dev *dev,
                                    struct intel_pipeline_shader *sh)
 {
     intel_pipeline_shader_cleanup(sh, dev->gpu);
     intel_free(dev, sh);
 }

 static VkResult pipeline_build_shader(struct intel_pipeline *pipeline,
                                         const VkPipelineShaderStageCreateInfo *sh_info,
                                         struct intel_pipeline_shader *sh)
 {
     struct intel_shader_module *mod =
         intel_shader_module(sh_info->module);
     const struct intel_ir *ir =
         intel_shader_module_get_ir(mod, sh_info->stage);
     VkResult ret;

     if (!ir)
         return VK_ERROR_OUT_OF_HOST_MEMORY;

     ret = intel_pipeline_shader_compile(sh,
             pipeline->dev->gpu, pipeline->pipeline_layout, sh_info, ir);

     if (ret != VK_SUCCESS)
         return ret;

     sh->max_threads =
         intel_gpu_get_max_threads(pipeline->dev->gpu, sh_info->stage);

     /* 1KB aligned */
     sh->scratch_offset = u_align(pipeline->scratch_size, 1024);
     pipeline->scratch_size = sh->scratch_offset +
         sh->per_thread_scratch_size * sh->max_threads;

     pipeline->active_shaders |= sh_info->stage;

     return VK_SUCCESS;
 }

 static VkResult pipeline_build_shaders(struct intel_pipeline *pipeline,
                                          const struct intel_pipeline_create_info *info)
 {
     VkResult ret = VK_SUCCESS;

     if (ret == VK_SUCCESS && info->vs.module)
         ret = pipeline_build_shader(pipeline, &info->vs, &pipeline->vs);
     if (ret == VK_SUCCESS && info->tcs.module)
         ret = pipeline_build_shader(pipeline, &info->tcs,&pipeline->tcs);
     if (ret == VK_SUCCESS && info->tes.module)
         ret = pipeline_build_shader(pipeline, &info->tes,&pipeline->tes);
     if (ret == VK_SUCCESS && info->gs.module)
         ret = pipeline_build_shader(pipeline, &info->gs, &pipeline->gs);
     if (ret == VK_SUCCESS && info->fs.module)
         ret = pipeline_build_shader(pipeline, &info->fs, &pipeline->fs);

     if (ret == VK_SUCCESS && info->compute.stage.module) {
         ret = pipeline_build_shader(pipeline,
                 &info->compute.stage, &pipeline->cs);
     }

     return ret;
 }
 static uint32_t *pipeline_cmd_ptr(struct intel_pipeline *pipeline, int cmd_len)
 {
     uint32_t *ptr;

     assert(pipeline->cmd_len + cmd_len < INTEL_PSO_CMD_ENTRIES);
     ptr = &pipeline->cmds[pipeline->cmd_len];
     pipeline->cmd_len += cmd_len;
     return ptr;
 }

 static VkResult pipeline_build_ia(struct intel_pipeline *pipeline,
                                     const struct intel_pipeline_create_info* info)
 {
     pipeline->topology = info->ia.topology;
     pipeline->disable_vs_cache = false;

     switch (info->ia.topology) {
     case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
         pipeline->prim_type = GEN6_3DPRIM_POINTLIST;
         break;
     case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
         pipeline->prim_type = GEN6_3DPRIM_LINELIST;
         break;
     case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
         pipeline->prim_type = GEN6_3DPRIM_LINESTRIP;
         break;
     case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
         pipeline->prim_type = GEN6_3DPRIM_TRILIST;
         break;
     case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
         pipeline->prim_type = GEN6_3DPRIM_TRISTRIP;
         break;
     case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
         pipeline->prim_type = GEN6_3DPRIM_TRIFAN;
         break;
     case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
         pipeline->prim_type = GEN6_3DPRIM_LINELIST_ADJ;
         break;
     case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
         pipeline->prim_type = GEN6_3DPRIM_LINESTRIP_ADJ;
         break;
     case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
         pipeline->prim_type = GEN6_3DPRIM_TRILIST_ADJ;
         break;
     case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
         pipeline->prim_type = GEN6_3DPRIM_TRISTRIP_ADJ;
         break;
     case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
         pipeline->prim_type = GEN7_3DPRIM_PATCHLIST_1 +
             info->tess.patchControlPoints - 1;
         break;
     default:
         assert(!"unsupported primitive topology format");
         break;
     }

     if (info->ia.primitiveRestartEnable) {
         pipeline->primitive_restart = true;
         pipeline->primitive_restart_index = 0;
     } else {
         pipeline->primitive_restart = false;
     }

     return VK_SUCCESS;
 }

 static VkResult pipeline_build_rs_state(struct intel_pipeline *pipeline,
                                           const struct intel_pipeline_create_info* info)
 {
     const VkPipelineRasterizationStateCreateInfo *rs_state = &info->rs;
     bool ccw;

     pipeline->depthClipEnable = !rs_state->depthClampEnable;
     pipeline->rasterizerDiscardEnable = rs_state->rasterizerDiscardEnable;
     pipeline->depthBiasEnable = rs_state->depthBiasEnable;

     switch (rs_state->polygonMode) {
     case VK_POLYGON_MODE_POINT:
         pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_POINT |
                               GEN7_SF_DW1_BACKFACE_POINT;
         break;
     case VK_POLYGON_MODE_LINE:
         pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_WIREFRAME |
                               GEN7_SF_DW1_BACKFACE_WIREFRAME;
         break;
     case VK_POLYGON_MODE_FILL:
     default:
         pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_SOLID |
                               GEN7_SF_DW1_BACKFACE_SOLID;
         break;
     }

     ccw = (rs_state->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE);
     /* flip the winding order */

     if (ccw) {
         pipeline->cmd_sf_fill |= GEN7_SF_DW1_FRONTWINDING_CCW;
         pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
     }

     switch (rs_state->cullMode) {
     case VK_CULL_MODE_NONE:
     default:
         pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_NONE;
         pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_NONE;
         break;
     case VK_CULL_MODE_FRONT_BIT:
         pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_FRONT;
         pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_FRONT;
         break;
     case VK_CULL_MODE_BACK_BIT:
         pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_BACK;
         pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_BACK;
         break;
     case VK_CULL_MODE_FRONT_AND_BACK:
         pipeline->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_BOTH;
         pipeline->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_BOTH;
         break;
     }

     /* only GEN7+ needs cull mode in 3DSTATE_CLIP */
     if (intel_gpu_gen(pipeline->dev->gpu) == INTEL_GEN(6))
         pipeline->cmd_clip_cull = 0;

     return VK_SUCCESS;
 }

 static void pipeline_destroy(struct intel_obj *obj)
 {
     struct intel_pipeline *pipeline = intel_pipeline_from_obj(obj);

     if (pipeline->active_shaders & SHADER_VERTEX_FLAG) {
         intel_pipeline_shader_cleanup(&pipeline->vs, pipeline->dev->gpu);
     }

     if (pipeline->active_shaders & SHADER_TESS_CONTROL_FLAG) {
         intel_pipeline_shader_cleanup(&pipeline->tcs, pipeline->dev->gpu);
     }

     if (pipeline->active_shaders & SHADER_TESS_EVAL_FLAG) {
         intel_pipeline_shader_cleanup(&pipeline->tes, pipeline->dev->gpu);
     }

     if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
         intel_pipeline_shader_cleanup(&pipeline->gs, pipeline->dev->gpu);
     }

     if (pipeline->active_shaders & SHADER_FRAGMENT_FLAG) {
         intel_pipeline_shader_cleanup(&pipeline->fs, pipeline->dev->gpu);
     }

     if (pipeline->active_shaders & SHADER_COMPUTE_FLAG) {
         intel_pipeline_shader_cleanup(&pipeline->cs, pipeline->dev->gpu);
     }

     intel_base_destroy(&pipeline->obj.base);
 }

 static void pipeline_build_urb_alloc_gen6(struct intel_pipeline *pipeline,
                                           const struct intel_pipeline_create_info *info)
 {
     const struct intel_gpu *gpu = pipeline->dev->gpu;
     const int urb_size = ((gpu->gt == 2) ? 64 : 32) * 1024;
     const struct intel_pipeline_shader *vs = &pipeline->vs;
     const struct intel_pipeline_shader *gs = &pipeline->gs;
     int vs_entry_size, gs_entry_size;
     int vs_size, gs_size;

     INTEL_GPU_ASSERT(gpu, 6, 6);

     vs_entry_size = ((vs->in_count >= vs->out_count) ?
         vs->in_count : vs->out_count);
     gs_entry_size = (gs) ? gs->out_count : 0;

     /* in bytes */
     vs_entry_size *= sizeof(float) * 4;
     gs_entry_size *= sizeof(float) * 4;

     if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
         vs_size = urb_size / 2;
         gs_size = vs_size;
     } else {
         vs_size = urb_size;
         gs_size = 0;
     }

     /* 3DSTATE_URB */
     {
         const uint8_t cmd_len = 3;
         const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) |
                              (cmd_len - 2);
         int vs_alloc_size, gs_alloc_size;
         int vs_entry_count, gs_entry_count;
         uint32_t *dw;

         /* in 1024-bit rows */
         vs_alloc_size = (vs_entry_size + 128 - 1) / 128;
         gs_alloc_size = (gs_entry_size + 128 - 1) / 128;

         /* valid range is [1, 5] */
         if (!vs_alloc_size)
             vs_alloc_size = 1;
         if (!gs_alloc_size)
             gs_alloc_size = 1;
         assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);

         /* valid range is [24, 256], multiples of 4 */
         vs_entry_count = (vs_size / 128 / vs_alloc_size) & ~3;
         if (vs_entry_count > 256)
             vs_entry_count = 256;
         assert(vs_entry_count >= 24);

         /* valid range is [0, 256], multiples of 4 */
         gs_entry_count = (gs_size / 128 / gs_alloc_size) & ~3;
         if (gs_entry_count > 256)
             gs_entry_count = 256;

         dw = pipeline_cmd_ptr(pipeline, cmd_len);

         dw[0] = dw0;
         dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
                 vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
         dw[2] = gs_entry_count << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
                 (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
     }
 }

 static void pipeline_build_urb_alloc_gen7(struct intel_pipeline *pipeline,
                                           const struct intel_pipeline_create_info *info)
 {
     const struct intel_gpu *gpu = pipeline->dev->gpu;
     const int urb_size = ((gpu->gt == 3) ? 512 :
                           (gpu->gt == 2) ? 256 : 128) * 1024;
     const struct intel_pipeline_shader *vs = &pipeline->vs;
     const struct intel_pipeline_shader *gs = &pipeline->gs;
     /* some space is reserved for PCBs */
     int urb_offset = ((gpu->gt == 3) ? 32 : 16) * 1024;
     int vs_entry_size, gs_entry_size;
     int vs_size, gs_size;

     INTEL_GPU_ASSERT(gpu, 7, 7.5);

     vs_entry_size = ((vs->in_count >= vs->out_count) ?
         vs->in_count : vs->out_count);
     gs_entry_size = (gs) ? gs->out_count : 0;

     /* in bytes */
     vs_entry_size *= sizeof(float) * 4;
     gs_entry_size *= sizeof(float) * 4;

     if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) {
         vs_size = (urb_size - urb_offset) / 2;
         gs_size = vs_size;
     } else {
         vs_size = urb_size - urb_offset;
         gs_size = 0;
     }

     /* 3DSTATE_URB_* */
     {
         const uint8_t cmd_len = 2;
         int vs_alloc_size, gs_alloc_size;
         int vs_entry_count, gs_entry_count;
         uint32_t *dw;

         /* in 512-bit rows */
         vs_alloc_size = (vs_entry_size + 64 - 1) / 64;
         gs_alloc_size = (gs_entry_size + 64 - 1) / 64;

         if (!vs_alloc_size)
             vs_alloc_size = 1;
         if (!gs_alloc_size)
             gs_alloc_size = 1;

         /* avoid performance decrease due to banking */
         if (vs_alloc_size == 5)
             vs_alloc_size = 6;

         /* in multiples of 8 */
         vs_entry_count = (vs_size / 64 / vs_alloc_size) & ~7;
         assert(vs_entry_count >= 32);

         gs_entry_count = (gs_size / 64 / gs_alloc_size) & ~7;

         if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5)) {
             const int max_vs_entry_count =
                 (gpu->gt >= 2) ? 1664 : 640;
             const int max_gs_entry_count =
                 (gpu->gt >= 2) ? 640 : 256;
             if (vs_entry_count >= max_vs_entry_count)
                 vs_entry_count = max_vs_entry_count;
             if (gs_entry_count >= max_gs_entry_count)
                 gs_entry_count = max_gs_entry_count;
         } else {
             const int max_vs_entry_count =
                 (gpu->gt == 2) ? 704 : 512;
             const int max_gs_entry_count =
                 (gpu->gt == 2) ? 320 : 192;
             if (vs_entry_count >= max_vs_entry_count)
                 vs_entry_count = max_vs_entry_count;
             if (gs_entry_count >= max_gs_entry_count)
                 gs_entry_count = max_gs_entry_count;
         }

         dw = pipeline_cmd_ptr(pipeline, cmd_len*4);
         dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
         dw[1] = (urb_offset / 8192) << GEN7_URB_DW1_OFFSET__SHIFT |
                 (vs_alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
                 vs_entry_count;

         dw += 2;
         if (gs_size)
             urb_offset += vs_size;
         dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
         dw[1] = (urb_offset  / 8192) << GEN7_URB_DW1_OFFSET__SHIFT |
                 (gs_alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
                 gs_entry_count;

         dw += 2;
         dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
         dw[1] = (urb_offset / 8192)  << GEN7_URB_DW1_OFFSET__SHIFT;

         dw += 2;
         dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
         dw[1] = (urb_offset / 8192)  << GEN7_URB_DW1_OFFSET__SHIFT;
     }
 }

 static void pipeline_build_vertex_elements(struct intel_pipeline *pipeline,
                                            const struct intel_pipeline_create_info *info)
 {
     const struct intel_pipeline_shader *vs = &pipeline->vs;
     uint8_t cmd_len;
     uint32_t *dw;
     uint32_t i, j;
     uint32_t attr_count;
     uint32_t attrs_processed;
     int comps[4];

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);

     attr_count = u_popcountll(vs->inputs_read);
     cmd_len = 1 + 2 * attr_count;
     if (vs->uses & (INTEL_SHADER_USE_VID | INTEL_SHADER_USE_IID))
         cmd_len += 2;

     if (cmd_len == 1)
         return;

     dw = pipeline_cmd_ptr(pipeline, cmd_len);

     dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
             (cmd_len - 2);
     dw++;

     /* VERTEX_ELEMENT_STATE */
     for (i = 0, attrs_processed = 0; attrs_processed < attr_count; i++) {
         VkVertexInputAttributeDescription *attr = NULL;

         /*
          * The compiler will pack the shader references and then
          * indicate which locations are used via the bitmask in
          * vs->inputs_read.
          */
         if (!(vs->inputs_read & (1L << i))) {
             continue;
         }

         /*
          * For each bit set in the vs->inputs_read we'll need
          * to find the corresponding attribute record and then
          * set up the next HW vertex element based on that attribute.
          */
         for (j = 0; j < info->vi.vertexAttributeDescriptionCount; j++) {
             if (info->vi.pVertexAttributeDescriptions[j].location == i) {
                 attr = (VkVertexInputAttributeDescription *) &info->vi.pVertexAttributeDescriptions[j];
                 attrs_processed++;
                 break;
             }
         }
         assert(attr != NULL);

         const int format =
             intel_format_translate_color(pipeline->dev->gpu, attr->format);

         comps[0] = GEN6_VFCOMP_STORE_0;
         comps[1] = GEN6_VFCOMP_STORE_0;
         comps[2] = GEN6_VFCOMP_STORE_0;
         comps[3] = icd_format_is_int(attr->format) ?
             GEN6_VFCOMP_STORE_1_INT : GEN6_VFCOMP_STORE_1_FP;

         switch (icd_format_get_channel_count(attr->format)) {
         case 4: comps[3] = GEN6_VFCOMP_STORE_SRC; /* fall through */
         case 3: comps[2] = GEN6_VFCOMP_STORE_SRC; /* fall through */
         case 2: comps[1] = GEN6_VFCOMP_STORE_SRC; /* fall through */
         case 1: comps[0] = GEN6_VFCOMP_STORE_SRC; break;
         default:
             break;
         }

         assert(attr->offset <= 2047);

         dw[0] = attr->binding << GEN6_VE_DW0_VB_INDEX__SHIFT |
                 GEN6_VE_DW0_VALID |
                 format << GEN6_VE_DW0_FORMAT__SHIFT |
                 attr->offset;

         dw[1] = comps[0] << GEN6_VE_DW1_COMP0__SHIFT |
                 comps[1] << GEN6_VE_DW1_COMP1__SHIFT |
                 comps[2] << GEN6_VE_DW1_COMP2__SHIFT |
                 comps[3] << GEN6_VE_DW1_COMP3__SHIFT;

         dw += 2;
     }

     if (vs->uses & (INTEL_SHADER_USE_VID | INTEL_SHADER_USE_IID)) {
         comps[0] = (vs->uses & INTEL_SHADER_USE_VID) ?
             GEN6_VFCOMP_STORE_VID : GEN6_VFCOMP_STORE_0;
         comps[1] = (vs->uses & INTEL_SHADER_USE_IID) ?
             GEN6_VFCOMP_STORE_IID : GEN6_VFCOMP_NOSTORE;
         comps[2] = GEN6_VFCOMP_NOSTORE;
         comps[3] = GEN6_VFCOMP_NOSTORE;

         dw[0] = GEN6_VE_DW0_VALID;
         dw[1] = comps[0] << GEN6_VE_DW1_COMP0__SHIFT |
                 comps[1] << GEN6_VE_DW1_COMP1__SHIFT |
                 comps[2] << GEN6_VE_DW1_COMP2__SHIFT |
                 comps[3] << GEN6_VE_DW1_COMP3__SHIFT;

         dw += 2;
     }
 }

 static void pipeline_build_fragment_SBE(struct intel_pipeline *pipeline,
                                         const struct intel_pipeline_create_info *info)
 {
     const struct intel_pipeline_shader *fs = &pipeline->fs;
     uint8_t cmd_len;
     uint32_t *body;
     uint32_t attr_skip, attr_count;
     uint32_t vue_offset, vue_len;
     uint32_t i;

     // If GS is active, use its outputs
     const struct intel_pipeline_shader *src =
             (pipeline->active_shaders & SHADER_GEOMETRY_FLAG)
                     ? &pipeline->gs
                     : &pipeline->vs;

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);

     cmd_len = 14;

     if (intel_gpu_gen(pipeline->dev->gpu) >= INTEL_GEN(7))
         body = pipeline_cmd_ptr(pipeline, cmd_len);
     else
         body = pipeline->cmd_3dstate_sbe;

     assert(!fs->reads_user_clip || src->enable_user_clip);
     attr_skip = src->outputs_offset;
     if (src->enable_user_clip != fs->reads_user_clip) {
         attr_skip += 2;
     }
     assert(src->out_count >= attr_skip);
     attr_count = src->out_count - attr_skip;

     // LUNARG TODO: We currently are only handling 16 attrs;
     // ultimately, we need to handle 32
     assert(fs->in_count <= 16);
     assert(attr_count <= 16);

     vue_offset = attr_skip / 2;
     vue_len = (attr_count + 1) / 2;
     if (!vue_len)
         vue_len = 1;

     body[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) |
             (cmd_len - 2);

     // LUNARG TODO: If the attrs needed by the FS are exactly
     // what is written by the VS, we don't need to enable
     // swizzling, improving performance. Even if we swizzle,
     // we can improve performance by reducing vue_len to
     // just include the values needed by the FS:
     // vue_len = ceiling((max_vs_out + 1)/2)

     body[1] = GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE |
           fs->in_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
           vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
           vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;

     /* Vulkan default is point origin upper left */
     body[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;

     uint16_t src_slot[fs->in_count];
     int32_t fs_in = 0;
     int32_t src_out = - (vue_offset * 2 - src->outputs_offset);
     for (i=0; i < 64; i++) {
         bool srcWrites = src->outputs_written & (1L << i);
         bool fsReads   = fs->inputs_read      & (1L << i);

         if (fsReads) {
             assert(src_out >= 0);
             assert(fs_in < fs->in_count);
             src_slot[fs_in] = src_out;

             if (!srcWrites) {
                 // If the vertex shader did not write this input, we cannot
                 // program the SBE to read it.  Our choices are to allow it to
                 // read junk from a GRF, or get zero.  We're choosing zero.
                 if (i >= fs->generic_input_start) {
                     src_slot[fs_in] = GEN8_SBE_SWIZ_CONST_0000 |
                                      GEN8_SBE_SWIZ_OVERRIDE_X |
                                      GEN8_SBE_SWIZ_OVERRIDE_Y |
                                      GEN8_SBE_SWIZ_OVERRIDE_Z |
                                      GEN8_SBE_SWIZ_OVERRIDE_W;
                 }
             }

             fs_in += 1;
         }
         if (srcWrites) {
             src_out += 1;
         }
     }

     for (i = 0; i < 8; i++) {
         uint16_t hi, lo;

         /* no attr swizzles */
         if (i * 2 + 1 < fs->in_count) {
             lo = src_slot[i * 2];
             hi = src_slot[i * 2 + 1];
         } else if (i * 2 < fs->in_count) {
             lo = src_slot[i * 2];
             hi = 0;
         } else {
             hi = 0;
             lo = 0;
         }

         body[2 + i] = hi << GEN8_SBE_SWIZ_HIGH__SHIFT | lo;
     }

     if (info->ia.topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
         body[10] = fs->point_sprite_enables;
     else
         body[10] = 0;

     body[11] = 0; /* constant interpolation enables */
     body[12] = 0; /* WrapShortest enables */
     body[13] = 0;
 }

 static void pipeline_build_gs(struct intel_pipeline *pipeline,
                               const struct intel_pipeline_create_info *info)
 {
     // gen7_emit_3DSTATE_GS done by cmd_pipeline
 }

 static void pipeline_build_hs(struct intel_pipeline *pipeline,
                               const struct intel_pipeline_create_info *info)
 {
     const uint8_t cmd_len = 7;
     const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
     uint32_t *dw;

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 7, 7.5);

     dw = pipeline_cmd_ptr(pipeline, cmd_len);
     dw[0] = dw0;
     dw[1] = 0;
     dw[2] = 0;
     dw[3] = 0;
     dw[4] = 0;
     dw[5] = 0;
     dw[6] = 0;
 }

 static void pipeline_build_te(struct intel_pipeline *pipeline,
                               const struct intel_pipeline_create_info *info)
 {
     const uint8_t cmd_len = 4;
     const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
     uint32_t *dw;

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 7, 7.5);

     dw = pipeline_cmd_ptr(pipeline, cmd_len);
     dw[0] = dw0;
     dw[1] = 0;
     dw[2] = 0;
     dw[3] = 0;
 }

 static void pipeline_build_ds(struct intel_pipeline *pipeline,
                               const struct intel_pipeline_create_info *info)
 {
     const uint8_t cmd_len = 6;
     const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
     uint32_t *dw;

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 7, 7.5);

     dw = pipeline_cmd_ptr(pipeline, cmd_len);
     dw[0] = dw0;
     dw[1] = 0;
     dw[2] = 0;
     dw[3] = 0;
     dw[4] = 0;
     dw[5] = 0;
 }

 static void pipeline_build_depth_stencil(struct intel_pipeline *pipeline,
                                          const struct intel_pipeline_create_info *info)
 {
     pipeline->cmd_depth_stencil = 0;

     if (info->db.stencilTestEnable) {
         pipeline->cmd_depth_stencil = 1 << 31 |
                translate_compare_func(info->db.front.compareOp) << 28 |
                translate_stencil_op(info->db.front.failOp) << 25 |
                translate_stencil_op(info->db.front.depthFailOp) << 22 |
                translate_stencil_op(info->db.front.passOp) << 19 |
                1 << 15 |
                translate_compare_func(info->db.back.compareOp) << 12 |
                translate_stencil_op(info->db.back.failOp) << 9 |
                translate_stencil_op(info->db.back.depthFailOp) << 6 |
                translate_stencil_op(info->db.back.passOp) << 3;
      }

     pipeline->stencilTestEnable = info->db.stencilTestEnable;

     /*
      * From the Sandy Bridge PRM, volume 2 part 1, page 360:
      *
      *     "Enabling the Depth Test function without defining a Depth Buffer is
      *      UNDEFINED."
      *
      * From the Sandy Bridge PRM, volume 2 part 1, page 375:
      *
      *     "A Depth Buffer must be defined before enabling writes to it, or
      *      operation is UNDEFINED."
      *
      * TODO We do not check these yet.
      */
     if (info->db.depthTestEnable) {
        pipeline->cmd_depth_test = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
                translate_compare_func(info->db.depthCompareOp) << 27;
     } else {
        pipeline->cmd_depth_test = GEN6_COMPAREFUNCTION_ALWAYS << 27;
     }

     if (info->db.depthWriteEnable)
        pipeline->cmd_depth_test |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
 }

 static void pipeline_build_msaa(struct intel_pipeline *pipeline,
                                 const struct intel_pipeline_create_info *info)
 {
     uint32_t cmd, cmd_len;
     uint32_t *dw;

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);

     pipeline->sample_count =
         translate_sample_count(info->ms.rasterizationSamples);

     pipeline->alphaToCoverageEnable = info->ms.alphaToCoverageEnable;
     pipeline->alphaToOneEnable = info->ms.alphaToOneEnable;

     /* 3DSTATE_SAMPLE_MASK */
     cmd = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK);
     cmd_len = 2;

     dw = pipeline_cmd_ptr(pipeline, cmd_len);
     dw[0] = cmd | (cmd_len - 2);
     if (info->ms.pSampleMask) {
         /* "Bit B of mask word M corresponds to sample 32*M + B."
          * "The array is sized to a length of ceil(rasterizationSamples / 32) words."
          * "If pSampleMask is NULL, it is treated as if the mask has all bits enabled,"
          * "i.e. no coverage is removed from primitives."
          */
         assert(pipeline->sample_count / 32 == 0);
         dw[1] = *info->ms.pSampleMask & ((1 << pipeline->sample_count) - 1);
      } else {
         dw[1] = (1 << pipeline->sample_count) - 1;
      }

     pipeline->cmd_sample_mask = dw[1];
 }

 static void pipeline_build_cb(struct intel_pipeline *pipeline,
                               const struct intel_pipeline_create_info *info)
 {
     uint32_t i;

     INTEL_GPU_ASSERT(pipeline->dev->gpu, 6, 7.5);
     STATIC_ASSERT(ARRAY_SIZE(pipeline->cmd_cb) >= INTEL_MAX_RENDER_TARGETS*2);
     assert(info->cb.attachmentCount <= INTEL_MAX_RENDER_TARGETS);

     uint32_t *dw = pipeline->cmd_cb;

     for (i = 0; i < info->cb.attachmentCount; i++) {
         const VkPipelineColorBlendAttachmentState *att = &info->cb.pAttachments[i];
         uint32_t dw0, dw1;


         dw0 = 0;
         dw1 = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
               GEN6_RT_DW1_PRE_BLEND_CLAMP |
               GEN6_RT_DW1_POST_BLEND_CLAMP;

         if (att->blendEnable) {
             dw0 = 1 << 31 |
                     translate_blend_func(att->alphaBlendOp) << 26 |
                     translate_blend(att->srcAlphaBlendFactor) << 20 |
                     translate_blend(att->dstAlphaBlendFactor) << 15 |
                     translate_blend_func(att->colorBlendOp) << 11 |
                     translate_blend(att->srcColorBlendFactor) << 5 |
                     translate_blend(att->dstColorBlendFactor);

             if (att->alphaBlendOp != att->colorBlendOp ||
                 att->srcAlphaBlendFactor != att->srcColorBlendFactor ||
                 att->dstAlphaBlendFactor != att->dstColorBlendFactor)
                 dw0 |= 1 << 30;

             pipeline->dual_source_blend_enable = icd_pipeline_cb_att_needs_dual_source_blending(att);
         }

         if (info->cb.logicOpEnable && info->cb.logicOp != VK_LOGIC_OP_COPY) {
             int logicop;

             switch (info->cb.logicOp) {
             case VK_LOGIC_OP_CLEAR:            logicop = GEN6_LOGICOP_CLEAR; break;
             case VK_LOGIC_OP_AND:              logicop = GEN6_LOGICOP_AND; break;
             case VK_LOGIC_OP_AND_REVERSE:      logicop = GEN6_LOGICOP_AND_REVERSE; break;
             case VK_LOGIC_OP_AND_INVERTED:     logicop = GEN6_LOGICOP_AND_INVERTED; break;
             case VK_LOGIC_OP_NO_OP:             logicop = GEN6_LOGICOP_NOOP; break;
             case VK_LOGIC_OP_XOR:              logicop = GEN6_LOGICOP_XOR; break;
             case VK_LOGIC_OP_OR:               logicop = GEN6_LOGICOP_OR; break;
             case VK_LOGIC_OP_NOR:              logicop = GEN6_LOGICOP_NOR; break;
             case VK_LOGIC_OP_EQUIVALENT:            logicop = GEN6_LOGICOP_EQUIV; break;
             case VK_LOGIC_OP_INVERT:           logicop = GEN6_LOGICOP_INVERT; break;
             case VK_LOGIC_OP_OR_REVERSE:       logicop = GEN6_LOGICOP_OR_REVERSE; break;
             case VK_LOGIC_OP_COPY_INVERTED:    logicop = GEN6_LOGICOP_COPY_INVERTED; break;
             case VK_LOGIC_OP_OR_INVERTED:      logicop = GEN6_LOGICOP_OR_INVERTED; break;
             case VK_LOGIC_OP_NAND:             logicop = GEN6_LOGICOP_NAND; break;
             case VK_LOGIC_OP_SET:              logicop = GEN6_LOGICOP_SET; break;
             default:
                 assert(!"unknown logic op");
                 logicop = GEN6_LOGICOP_CLEAR;
                 break;
             }

             dw1 |= GEN6_RT_DW1_LOGICOP_ENABLE |
                    logicop << GEN6_RT_DW1_LOGICOP_FUNC__SHIFT;
         }

         if (!(att->colorWriteMask & 0x1))
             dw1 |= GEN6_RT_DW1_WRITE_DISABLE_R;
         if (!(att->colorWriteMask & 0x2))
             dw1 |= GEN6_RT_DW1_WRITE_DISABLE_G;
         if (!(att->colorWriteMask & 0x4))
             dw1 |= GEN6_RT_DW1_WRITE_DISABLE_B;
         if (!(att->colorWriteMask & 0x8))
             dw1 |= GEN6_RT_DW1_WRITE_DISABLE_A;

         dw[2 * i] = dw0;
         dw[2 * i + 1] = dw1;
     }

     for (i=info->cb.attachmentCount; i < INTEL_MAX_RENDER_TARGETS; i++)
     {
         dw[2 * i] = 0;
         dw[2 * i + 1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
                 GEN6_RT_DW1_PRE_BLEND_CLAMP |
                 GEN6_RT_DW1_POST_BLEND_CLAMP |
                 GEN6_RT_DW1_WRITE_DISABLE_R |
                 GEN6_RT_DW1_WRITE_DISABLE_G |
                 GEN6_RT_DW1_WRITE_DISABLE_B |
                 GEN6_RT_DW1_WRITE_DISABLE_A;
     }

 }

 static void pipeline_build_state(struct intel_pipeline *pipeline,
                                  const struct intel_pipeline_create_info *info)
 {
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_VIEWPORT) {
         pipeline->state.viewport.viewport_count = info->vp.viewportCount;
         memcpy(pipeline->state.viewport.viewports, info->vp.pViewports, info->vp.viewportCount * sizeof(VkViewport));
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_SCISSOR) {
         pipeline->state.viewport.scissor_count = info->vp.scissorCount;
         memcpy(pipeline->state.viewport.scissors, info->vp.pScissors, info->vp.scissorCount * sizeof(VkRect2D));
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_LINE_WIDTH) {
         pipeline->state.line_width.line_width = info->rs.lineWidth;
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BIAS) {
         pipeline->state.depth_bias.depth_bias = info->rs.depthBiasConstantFactor;
         pipeline->state.depth_bias.depth_bias_clamp = info->rs.depthBiasClamp;
         pipeline->state.depth_bias.slope_scaled_depth_bias = info->rs.depthBiasSlopeFactor;
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_BLEND_CONSTANTS) {
         pipeline->state.blend.blend_const[0] = info->cb.blendConstants[0];
         pipeline->state.blend.blend_const[1] = info->cb.blendConstants[1];
         pipeline->state.blend.blend_const[2] = info->cb.blendConstants[2];
         pipeline->state.blend.blend_const[3] = info->cb.blendConstants[3];
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BOUNDS) {
         pipeline->state.depth_bounds.min_depth_bounds = info->db.minDepthBounds;
         pipeline->state.depth_bounds.max_depth_bounds = info->db.maxDepthBounds;
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_STENCIL_COMPARE_MASK) {
         pipeline->state.stencil.front.stencil_compare_mask = info->db.front.compareMask;
         pipeline->state.stencil.back.stencil_compare_mask = info->db.back.compareMask;
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_STENCIL_WRITE_MASK) {

         pipeline->state.stencil.front.stencil_write_mask = info->db.front.writeMask;
         pipeline->state.stencil.back.stencil_write_mask = info->db.back.writeMask;
     }
     if (info->use_pipeline_dynamic_state & INTEL_USE_PIPELINE_DYNAMIC_STENCIL_REFERENCE) {
         pipeline->state.stencil.front.stencil_reference = info->db.front.reference;
         pipeline->state.stencil.back.stencil_reference = info->db.back.reference;
     }

     pipeline->state.use_pipeline_dynamic_state = info->use_pipeline_dynamic_state;
 }


 static VkResult pipeline_build_all(struct intel_pipeline *pipeline,
                                    const struct intel_pipeline_create_info *info)
 {
     VkResult ret;

     pipeline_build_state(pipeline, info);

     ret = pipeline_build_shaders(pipeline, info);
     if (ret != VK_SUCCESS)
         return ret;

     /* TODOVV: Move test to validation layer
      *  This particular test is based on a limit imposed by
      *  INTEL_MAX_VERTEX_BINDING_COUNT, which should be migrated
      *  to API-defined maxVertexInputBindings setting and then
      *  this check can be in DeviceLimits layer
      */
     if (info->vi.vertexBindingDescriptionCount > ARRAY_SIZE(pipeline->vb) ||
         info->vi.vertexAttributeDescriptionCount > ARRAY_SIZE(pipeline->vb)) {
         return VK_ERROR_VALIDATION_FAILED_EXT;
     }

     pipeline->vb_count = info->vi.vertexBindingDescriptionCount;
     memcpy(pipeline->vb, info->vi.pVertexBindingDescriptions,
             sizeof(pipeline->vb[0]) * pipeline->vb_count);

     pipeline_build_vertex_elements(pipeline, info);
     pipeline_build_fragment_SBE(pipeline, info);
     pipeline_build_msaa(pipeline, info);
     pipeline_build_depth_stencil(pipeline, info);

     if (intel_gpu_gen(pipeline->dev->gpu) >= INTEL_GEN(7)) {
         pipeline_build_urb_alloc_gen7(pipeline, info);
         pipeline_build_gs(pipeline, info);
         pipeline_build_hs(pipeline, info);
         pipeline_build_te(pipeline, info);
         pipeline_build_ds(pipeline, info);

         pipeline->wa_flags = INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE |
                              INTEL_CMD_WA_GEN6_PRE_COMMAND_SCOREBOARD_STALL |
                              INTEL_CMD_WA_GEN7_PRE_VS_DEPTH_STALL_WRITE |
                              INTEL_CMD_WA_GEN7_POST_COMMAND_CS_STALL |
                              INTEL_CMD_WA_GEN7_POST_COMMAND_DEPTH_STALL;
     } else {
         pipeline_build_urb_alloc_gen6(pipeline, info);

         pipeline->wa_flags = INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE |
                              INTEL_CMD_WA_GEN6_PRE_COMMAND_SCOREBOARD_STALL;
     }

     ret = pipeline_build_ia(pipeline, info);

     if (ret == VK_SUCCESS)
         ret = pipeline_build_rs_state(pipeline, info);

     if (ret == VK_SUCCESS) {
         pipeline_build_cb(pipeline, info);
         pipeline->cb_state = info->cb;
         pipeline->tess_state = info->tess;
     }

     return ret;
 }

 static VkResult pipeline_create_info_init(struct intel_pipeline_create_info  *info,
                                           const VkGraphicsPipelineCreateInfo *vkinfo)
 {
     memset(info, 0, sizeof(*info));

     /*
      * Do we need to set safe defaults in case the app doesn't provide all of
      * the necessary create infos?
      */
     info->ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
     info->ms.pSampleMask = NULL;

     memcpy(&info->graphics, vkinfo, sizeof (info->graphics));

     void *dst;
     for (uint32_t i = 0; i < vkinfo->stageCount; i++) {
         const VkPipelineShaderStageCreateInfo *thisStage = &vkinfo->pStages[i];
         switch (thisStage->stage) {
             case VK_SHADER_STAGE_VERTEX_BIT:
                 dst = &info->vs;
                 break;
             case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
                 dst = &info->tcs;
                 break;
             case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
                 dst = &info->tes;
                 break;
             case VK_SHADER_STAGE_GEOMETRY_BIT:
                 dst = &info->gs;
                 break;
             case VK_SHADER_STAGE_FRAGMENT_BIT:
                 dst = &info->fs;
                 break;
             case VK_SHADER_STAGE_COMPUTE_BIT:
                 dst = &info->compute;
                 break;
             default:
                 assert(!"unsupported shader stage");
                 break;
         }
         memcpy(dst, thisStage, sizeof(VkPipelineShaderStageCreateInfo));
     }

     if (vkinfo->pVertexInputState != NULL) {
         memcpy(&info->vi, vkinfo->pVertexInputState, sizeof (info->vi));
     }
     if (vkinfo->pInputAssemblyState != NULL) {
         memcpy(&info->ia, vkinfo->pInputAssemblyState, sizeof (info->ia));
     }
     if (vkinfo->pDepthStencilState != NULL) {
         memcpy(&info->db, vkinfo->pDepthStencilState, sizeof (info->db));
     }
     if (vkinfo->pColorBlendState != NULL) {
         memcpy(&info->cb, vkinfo->pColorBlendState, sizeof (info->cb));
     }
     if (vkinfo->pRasterizationState != NULL) {
         memcpy(&info->rs, vkinfo->pRasterizationState, sizeof (info->rs));
     }
     if (vkinfo->pTessellationState != NULL) {
         memcpy(&info->tess, vkinfo->pTessellationState, sizeof (info->tess));
     }
     if (vkinfo->pMultisampleState != NULL) {
         memcpy(&info->ms, vkinfo->pMultisampleState, sizeof (info->ms));
     }
     if (vkinfo->pViewportState != NULL) {
         memcpy(&info->vp, vkinfo->pViewportState, sizeof (info->vp));
     }

     /* by default, take all dynamic state from the pipeline */
     info->use_pipeline_dynamic_state = INTEL_USE_PIPELINE_DYNAMIC_VIEWPORT |
                                        INTEL_USE_PIPELINE_DYNAMIC_SCISSOR |
                                        INTEL_USE_PIPELINE_DYNAMIC_BLEND_CONSTANTS |
                                        INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BIAS |
                                        INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BOUNDS |
                                        INTEL_USE_PIPELINE_DYNAMIC_LINE_WIDTH |
                                        INTEL_USE_PIPELINE_DYNAMIC_STENCIL_COMPARE_MASK |
                                        INTEL_USE_PIPELINE_DYNAMIC_STENCIL_REFERENCE |
                                        INTEL_USE_PIPELINE_DYNAMIC_STENCIL_WRITE_MASK;
     if (vkinfo->pDynamicState != NULL) {
         for (uint32_t i = 0; i < vkinfo->pDynamicState->dynamicStateCount; i++) {
             /* Mark dynamic state indicated by app as not using pipeline state */
             switch (vkinfo->pDynamicState->pDynamicStates[i]) {
             case VK_DYNAMIC_STATE_VIEWPORT:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_VIEWPORT;
                 break;
             case VK_DYNAMIC_STATE_SCISSOR:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_SCISSOR;
                 break;
             case VK_DYNAMIC_STATE_LINE_WIDTH:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_LINE_WIDTH;
                 break;
             case VK_DYNAMIC_STATE_DEPTH_BIAS:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BIAS;
                 break;
             case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_BLEND_CONSTANTS;
                 break;
             case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_DEPTH_BOUNDS;
                 break;
             case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_STENCIL_COMPARE_MASK;
                 break;
             case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_STENCIL_WRITE_MASK;
                 break;
             case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
                 info->use_pipeline_dynamic_state &= ~INTEL_USE_PIPELINE_DYNAMIC_STENCIL_REFERENCE;
                 break;
             default:
                 assert(!"Invalid dynamic state");
                 break;
             }
         }
     }

     return VK_SUCCESS;
 }

 static VkResult graphics_pipeline_create(struct intel_dev *dev,
                                          const VkGraphicsPipelineCreateInfo *info_,
                                          struct intel_pipeline **pipeline_ret)
 {
     struct intel_pipeline_create_info info;
     struct intel_pipeline *pipeline;
     VkResult ret;

     ret = pipeline_create_info_init(&info, info_);

     if (ret != VK_SUCCESS)
         return ret;

     pipeline = (struct intel_pipeline *) intel_base_create(&dev->base.handle,
                         sizeof (*pipeline), dev->base.dbg,
                         VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT, info_, 0);
     if (!pipeline)
         return VK_ERROR_OUT_OF_HOST_MEMORY;

     pipeline->dev = dev;
     pipeline->pipeline_layout = intel_pipeline_layout(info.graphics.layout);

     pipeline->obj.destroy = pipeline_destroy;

     ret = pipeline_build_all(pipeline, &info);
     if (ret != VK_SUCCESS) {
         pipeline_destroy(&pipeline->obj);
         return ret;
     }

     VkMemoryAllocateInfo mem_reqs;
     mem_reqs.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
     mem_reqs.allocationSize = pipeline->scratch_size;
     mem_reqs.pNext = NULL;
     mem_reqs.memoryTypeIndex = 0;
     intel_mem_alloc(dev, &mem_reqs, &pipeline->obj.mem);

     *pipeline_ret = pipeline;
     return VK_SUCCESS;
 }

 VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache(
     VkDevice                                    device,
     const VkPipelineCacheCreateInfo*            pCreateInfo,
     const VkAllocationCallbacks*                     pAllocator,
     VkPipelineCache*                            pPipelineCache)
 {

     // non-dispatchable objects only need to be 64 bits currently
     *((uint64_t *)pPipelineCache) = 1;
     return VK_SUCCESS;
 }

 VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache(
     VkDevice                                    device,
     VkPipelineCache                             pipelineCache,
     const VkAllocationCallbacks*                     pAllocator)
 {
 }

 VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData(
     VkDevice                                    device,
     VkPipelineCache                             pipelineCache,
     size_t*                                     pDataSize,
     void*                                       pData)
 {
     return VK_ERROR_VALIDATION_FAILED_EXT;
 }

 VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches(
     VkDevice                                    device,
     VkPipelineCache                             dstCache,
     uint32_t                                    srcCacheCount,
     const VkPipelineCache*                      pSrcCaches)
 {
     return VK_ERROR_VALIDATION_FAILED_EXT;
 }

 VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines(
     VkDevice                                  device,
     VkPipelineCache                           pipelineCache,
     uint32_t                                  createInfoCount,
     const VkGraphicsPipelineCreateInfo*       pCreateInfos,
     const VkAllocationCallbacks*                     pAllocator,
     VkPipeline*                               pPipelines)
 {
     struct intel_dev *dev = intel_dev(device);
     uint32_t i;
     VkResult res = VK_SUCCESS;
     bool one_succeeded = false;

     for (i = 0; i < createInfoCount; i++) {
         res =  graphics_pipeline_create(dev, &(pCreateInfos[i]),
             (struct intel_pipeline **) &(pPipelines[i]));
         //return NULL handle for unsuccessful creates
         if (res != VK_SUCCESS)
             pPipelines[i] = VK_NULL_HANDLE;
         else
             one_succeeded = true;
     }
     //return VK_SUCCESS if any of count creates succeeded
     if (one_succeeded)
         return VK_SUCCESS;
     else
         return res;
 }

 VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines(
     VkDevice                                  device,
     VkPipelineCache                           pipelineCache,
     uint32_t                                  createInfoCount,
     const VkComputePipelineCreateInfo*        pCreateInfos,
     const VkAllocationCallbacks*                     pAllocator,
     VkPipeline*                               pPipelines)
 {
     return VK_ERROR_VALIDATION_FAILED_EXT;
 }

 VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline(
     VkDevice                                device,
     VkPipeline                              pipeline,
     const VkAllocationCallbacks*                     pAllocator)

  {
     struct intel_obj *obj = intel_obj(pipeline);

     intel_mem_free(obj->mem);
     obj->destroy(obj);
  }