blob: 01b60103320ca3b572f27c353c99a4bfcd3b5896 [file] [log] [blame] [edit]
/*
* Copyright 2016-2021 Robert Konrad
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/*
* At your option, you may choose to accept this material under either:
* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
*/
#include "spirv_hlsl.hpp"
#include "GLSL.std.450.h"
#include <algorithm>
#include <assert.h>
using namespace spv;
using namespace SPIRV_CROSS_NAMESPACE;
using namespace std;
enum class ImageFormatNormalizedState
{
None = 0,
Unorm = 1,
Snorm = 2
};
static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt)
{
switch (fmt)
{
case ImageFormatR8:
case ImageFormatR16:
case ImageFormatRg8:
case ImageFormatRg16:
case ImageFormatRgba8:
case ImageFormatRgba16:
case ImageFormatRgb10A2:
return ImageFormatNormalizedState::Unorm;
case ImageFormatR8Snorm:
case ImageFormatR16Snorm:
case ImageFormatRg8Snorm:
case ImageFormatRg16Snorm:
case ImageFormatRgba8Snorm:
case ImageFormatRgba16Snorm:
return ImageFormatNormalizedState::Snorm;
default:
break;
}
return ImageFormatNormalizedState::None;
}
static unsigned image_format_to_components(ImageFormat fmt)
{
switch (fmt)
{
case ImageFormatR8:
case ImageFormatR16:
case ImageFormatR8Snorm:
case ImageFormatR16Snorm:
case ImageFormatR16f:
case ImageFormatR32f:
case ImageFormatR8i:
case ImageFormatR16i:
case ImageFormatR32i:
case ImageFormatR8ui:
case ImageFormatR16ui:
case ImageFormatR32ui:
return 1;
case ImageFormatRg8:
case ImageFormatRg16:
case ImageFormatRg8Snorm:
case ImageFormatRg16Snorm:
case ImageFormatRg16f:
case ImageFormatRg32f:
case ImageFormatRg8i:
case ImageFormatRg16i:
case ImageFormatRg32i:
case ImageFormatRg8ui:
case ImageFormatRg16ui:
case ImageFormatRg32ui:
return 2;
case ImageFormatR11fG11fB10f:
return 3;
case ImageFormatRgba8:
case ImageFormatRgba16:
case ImageFormatRgb10A2:
case ImageFormatRgba8Snorm:
case ImageFormatRgba16Snorm:
case ImageFormatRgba16f:
case ImageFormatRgba32f:
case ImageFormatRgba8i:
case ImageFormatRgba16i:
case ImageFormatRgba32i:
case ImageFormatRgba8ui:
case ImageFormatRgba16ui:
case ImageFormatRgba32ui:
case ImageFormatRgb10a2ui:
return 4;
case ImageFormatUnknown:
return 4; // Assume 4.
default:
SPIRV_CROSS_THROW("Unrecognized typed image format.");
}
}
static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
{
switch (fmt)
{
case ImageFormatR8:
case ImageFormatR16:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "unorm float";
case ImageFormatRg8:
case ImageFormatRg16:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "unorm float2";
case ImageFormatRgba8:
case ImageFormatRgba16:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "unorm float4";
case ImageFormatRgb10A2:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "unorm float4";
case ImageFormatR8Snorm:
case ImageFormatR16Snorm:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "snorm float";
case ImageFormatRg8Snorm:
case ImageFormatRg16Snorm:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "snorm float2";
case ImageFormatRgba8Snorm:
case ImageFormatRgba16Snorm:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "snorm float4";
case ImageFormatR16f:
case ImageFormatR32f:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "float";
case ImageFormatRg16f:
case ImageFormatRg32f:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "float2";
case ImageFormatRgba16f:
case ImageFormatRgba32f:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "float4";
case ImageFormatR11fG11fB10f:
if (basetype != SPIRType::Float)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "float3";
case ImageFormatR8i:
case ImageFormatR16i:
case ImageFormatR32i:
if (basetype != SPIRType::Int)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "int";
case ImageFormatRg8i:
case ImageFormatRg16i:
case ImageFormatRg32i:
if (basetype != SPIRType::Int)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "int2";
case ImageFormatRgba8i:
case ImageFormatRgba16i:
case ImageFormatRgba32i:
if (basetype != SPIRType::Int)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "int4";
case ImageFormatR8ui:
case ImageFormatR16ui:
case ImageFormatR32ui:
if (basetype != SPIRType::UInt)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "uint";
case ImageFormatRg8ui:
case ImageFormatRg16ui:
case ImageFormatRg32ui:
if (basetype != SPIRType::UInt)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "uint2";
case ImageFormatRgba8ui:
case ImageFormatRgba16ui:
case ImageFormatRgba32ui:
if (basetype != SPIRType::UInt)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "uint4";
case ImageFormatRgb10a2ui:
if (basetype != SPIRType::UInt)
SPIRV_CROSS_THROW("Mismatch in image type and base type of image.");
return "uint4";
case ImageFormatUnknown:
switch (basetype)
{
case SPIRType::Float:
return "float4";
case SPIRType::Int:
return "int4";
case SPIRType::UInt:
return "uint4";
default:
SPIRV_CROSS_THROW("Unsupported base type for image.");
}
default:
SPIRV_CROSS_THROW("Unrecognized typed image format.");
}
}
string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
{
auto &imagetype = get<SPIRType>(type.image.type);
const char *dim = nullptr;
bool typed_load = false;
uint32_t components = 4;
bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable);
switch (type.image.dim)
{
case Dim1D:
typed_load = type.image.sampled == 2;
dim = "1D";
break;
case Dim2D:
typed_load = type.image.sampled == 2;
dim = "2D";
break;
case Dim3D:
typed_load = type.image.sampled == 2;
dim = "3D";
break;
case DimCube:
if (type.image.sampled == 2)
SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL.");
dim = "Cube";
break;
case DimRect:
SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO
case DimBuffer:
if (type.image.sampled == 1)
return join("Buffer<", type_to_glsl(imagetype), components, ">");
else if (type.image.sampled == 2)
{
if (interlocked_resources.count(id))
return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
">");
typed_load = !force_image_srv && type.image.sampled == 2;
const char *rw = force_image_srv ? "" : "RW";
return join(rw, "Buffer<",
typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
join(type_to_glsl(imagetype), components),
">");
}
else
SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
case DimSubpassData:
dim = "2D";
typed_load = false;
break;
default:
SPIRV_CROSS_THROW("Invalid dimension.");
}
const char *arrayed = type.image.arrayed ? "Array" : "";
const char *ms = type.image.ms ? "MS" : "";
const char *rw = typed_load && !force_image_srv ? "RW" : "";
if (force_image_srv)
typed_load = false;
if (typed_load && interlocked_resources.count(id))
rw = "RasterizerOrdered";
return join(rw, "Texture", dim, ms, arrayed, "<",
typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
join(type_to_glsl(imagetype), components),
">");
}
string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/)
{
auto &imagetype = get<SPIRType>(type.image.type);
string res;
switch (imagetype.basetype)
{
case SPIRType::Int:
res = "i";
break;
case SPIRType::UInt:
res = "u";
break;
default:
break;
}
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
return res + "subpassInput" + (type.image.ms ? "MS" : "");
// If we're emulating subpassInput with samplers, force sampler2D
// so we don't have to specify format.
if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
{
// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
if (type.image.dim == DimBuffer && type.image.sampled == 1)
res += "sampler";
else
res += type.image.sampled == 2 ? "image" : "texture";
}
else
res += "sampler";
switch (type.image.dim)
{
case Dim1D:
res += "1D";
break;
case Dim2D:
res += "2D";
break;
case Dim3D:
res += "3D";
break;
case DimCube:
res += "CUBE";
break;
case DimBuffer:
res += "Buffer";
break;
case DimSubpassData:
res += "2D";
break;
default:
SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
}
if (type.image.ms)
res += "MS";
if (type.image.arrayed)
res += "Array";
return res;
}
string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id)
{
if (hlsl_options.shader_model <= 30)
return image_type_hlsl_legacy(type, id);
else
return image_type_hlsl_modern(type, id);
}
// The optional id parameter indicates the object whose type we are trying
// to find the description for. It is optional. Most type descriptions do not
// depend on a specific object's use of that type.
string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
{
// Ignore the pointer type since GLSL doesn't have pointers.
switch (type.basetype)
{
case SPIRType::Struct:
// Need OpName lookup here to get a "sensible" name for a struct.
if (backend.explicit_struct_type)
return join("struct ", to_name(type.self));
else
return to_name(type.self);
case SPIRType::Image:
case SPIRType::SampledImage:
return image_type_hlsl(type, id);
case SPIRType::Sampler:
return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState";
case SPIRType::Void:
return "void";
default:
break;
}
if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return "bool";
case SPIRType::Int:
return backend.basic_int_type;
case SPIRType::UInt:
return backend.basic_uint_type;
case SPIRType::AtomicCounter:
return "atomic_uint";
case SPIRType::Half:
if (hlsl_options.enable_16bit_types)
return "half";
else
return "min16float";
case SPIRType::Short:
if (hlsl_options.enable_16bit_types)
return "int16_t";
else
return "min16int";
case SPIRType::UShort:
if (hlsl_options.enable_16bit_types)
return "uint16_t";
else
return "min16uint";
case SPIRType::Float:
return "float";
case SPIRType::Double:
return "double";
case SPIRType::Int64:
if (hlsl_options.shader_model < 60)
SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
return "int64_t";
case SPIRType::UInt64:
if (hlsl_options.shader_model < 60)
SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
return "uint64_t";
case SPIRType::AccelerationStructure:
return "RaytracingAccelerationStructure";
case SPIRType::RayQuery:
return "RayQuery<RAY_FLAG_NONE>";
default:
return "???";
}
}
else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bool", type.vecsize);
case SPIRType::Int:
return join("int", type.vecsize);
case SPIRType::UInt:
return join("uint", type.vecsize);
case SPIRType::Half:
return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize);
case SPIRType::Short:
return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize);
case SPIRType::UShort:
return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize);
case SPIRType::Float:
return join("float", type.vecsize);
case SPIRType::Double:
return join("double", type.vecsize);
case SPIRType::Int64:
return join("i64vec", type.vecsize);
case SPIRType::UInt64:
return join("u64vec", type.vecsize);
default:
return "???";
}
}
else
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bool", type.columns, "x", type.vecsize);
case SPIRType::Int:
return join("int", type.columns, "x", type.vecsize);
case SPIRType::UInt:
return join("uint", type.columns, "x", type.vecsize);
case SPIRType::Half:
return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize);
case SPIRType::Short:
return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize);
case SPIRType::UShort:
return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize);
case SPIRType::Float:
return join("float", type.columns, "x", type.vecsize);
case SPIRType::Double:
return join("double", type.columns, "x", type.vecsize);
// Matrix types not supported for int64/uint64.
default:
return "???";
}
}
}
void CompilerHLSL::emit_header()
{
for (auto &header : header_lines)
statement(header);
if (header_lines.size() > 0)
{
statement("");
}
}
void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var)
{
add_resource_name(var.self);
// The global copies of I/O variables should not contain interpolation qualifiers.
// These are emitted inside the interface structs.
auto &flags = ir.meta[var.self].decoration.decoration_flags;
auto old_flags = flags;
flags.reset();
statement("static ", variable_decl(var), ";");
flags = old_flags;
}
const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
{
// Input and output variables are handled specially in HLSL backend.
// The variables are declared as global, private variables, and do not need any qualifiers.
if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
var.storage == StorageClassPushConstant)
{
return "uniform ";
}
return "";
}
void CompilerHLSL::emit_builtin_outputs_in_struct()
{
auto &execution = get_entry_point();
bool legacy = hlsl_options.shader_model <= 30;
active_output_builtins.for_each_bit([&](uint32_t i) {
const char *type = nullptr;
const char *semantic = nullptr;
auto builtin = static_cast<BuiltIn>(i);
switch (builtin)
{
case BuiltInPosition:
type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
semantic = legacy ? "POSITION" : "SV_Position";
break;
case BuiltInSampleMask:
if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment)
SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher.");
type = "uint";
semantic = "SV_Coverage";
break;
case BuiltInFragDepth:
type = "float";
if (legacy)
{
semantic = "DEPTH";
}
else
{
if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater))
semantic = "SV_DepthGreaterEqual";
else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess))
semantic = "SV_DepthLessEqual";
else
semantic = "SV_Depth";
}
break;
case BuiltInClipDistance:
{
static const char *types[] = { "float", "float2", "float3", "float4" };
// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
if (execution.model == ExecutionModelMeshEXT)
{
if (clip_distance_count > 4)
SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders.");
if (clip_distance_count == 1)
{
// Avoids having to hack up access_chain code. Makes it trivially indexable.
statement("float gl_ClipDistance[1] : SV_ClipDistance;");
}
else
{
// Replace array with vector directly, avoids any weird fixup path.
statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;");
}
}
else
{
for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
{
uint32_t to_declare = clip_distance_count - clip;
if (to_declare > 4)
to_declare = 4;
uint32_t semantic_index = clip / 4;
statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
" : SV_ClipDistance", semantic_index, ";");
}
}
break;
}
case BuiltInCullDistance:
{
static const char *types[] = { "float", "float2", "float3", "float4" };
// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
if (execution.model == ExecutionModelMeshEXT)
{
if (cull_distance_count > 4)
SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders.");
if (cull_distance_count == 1)
{
// Avoids having to hack up access_chain code. Makes it trivially indexable.
statement("float gl_CullDistance[1] : SV_CullDistance;");
}
else
{
// Replace array with vector directly, avoids any weird fixup path.
statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;");
}
}
else
{
for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
{
uint32_t to_declare = cull_distance_count - cull;
if (to_declare > 4)
to_declare = 4;
uint32_t semantic_index = cull / 4;
statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
" : SV_CullDistance", semantic_index, ";");
}
}
break;
}
case BuiltInPointSize:
// If point_size_compat is enabled, just ignore PointSize.
// PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders,
// even if it means working around the missing feature.
if (legacy)
{
type = "float";
semantic = "PSIZE";
}
else if (!hlsl_options.point_size_compat)
SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
break;
case BuiltInLayer:
case BuiltInPrimitiveId:
case BuiltInViewportIndex:
case BuiltInPrimitiveShadingRateKHR:
case BuiltInCullPrimitiveEXT:
// per-primitive attributes handled separatly
break;
case BuiltInPrimitivePointIndicesEXT:
case BuiltInPrimitiveLineIndicesEXT:
case BuiltInPrimitiveTriangleIndicesEXT:
// meshlet local-index buffer handled separatly
break;
default:
SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
}
if (type && semantic)
statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
});
}
void CompilerHLSL::emit_builtin_primitive_outputs_in_struct()
{
active_output_builtins.for_each_bit([&](uint32_t i) {
const char *type = nullptr;
const char *semantic = nullptr;
auto builtin = static_cast<BuiltIn>(i);
switch (builtin)
{
case BuiltInLayer:
{
if (hlsl_options.shader_model < 50)
SPIRV_CROSS_THROW("Render target array index output is only supported in SM 5.0 or higher.");
type = "uint";
semantic = "SV_RenderTargetArrayIndex";
break;
}
case BuiltInPrimitiveId:
type = "uint";
semantic = "SV_PrimitiveID";
break;
case BuiltInViewportIndex:
type = "uint";
semantic = "SV_ViewportArrayIndex";
break;
case BuiltInPrimitiveShadingRateKHR:
type = "uint";
semantic = "SV_ShadingRate";
break;
case BuiltInCullPrimitiveEXT:
type = "bool";
semantic = "SV_CullPrimitive";
break;
default:
break;
}
if (type && semantic)
statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
});
}
void CompilerHLSL::emit_builtin_inputs_in_struct()
{
bool legacy = hlsl_options.shader_model <= 30;
active_input_builtins.for_each_bit([&](uint32_t i) {
const char *type = nullptr;
const char *semantic = nullptr;
auto builtin = static_cast<BuiltIn>(i);
switch (builtin)
{
case BuiltInFragCoord:
type = "float4";
semantic = legacy ? "VPOS" : "SV_Position";
break;
case BuiltInVertexId:
case BuiltInVertexIndex:
if (legacy)
SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower.");
type = "uint";
semantic = "SV_VertexID";
break;
case BuiltInPrimitiveId:
type = "uint";
semantic = "SV_PrimitiveID";
break;
case BuiltInInstanceId:
case BuiltInInstanceIndex:
if (legacy)
SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower.");
type = "uint";
semantic = "SV_InstanceID";
break;
case BuiltInSampleId:
if (legacy)
SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower.");
type = "uint";
semantic = "SV_SampleIndex";
break;
case BuiltInSampleMask:
if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher.");
type = "uint";
semantic = "SV_Coverage";
break;
case BuiltInGlobalInvocationId:
type = "uint3";
semantic = "SV_DispatchThreadID";
break;
case BuiltInLocalInvocationId:
type = "uint3";
semantic = "SV_GroupThreadID";
break;
case BuiltInLocalInvocationIndex:
type = "uint";
semantic = "SV_GroupIndex";
break;
case BuiltInWorkgroupId:
type = "uint3";
semantic = "SV_GroupID";
break;
case BuiltInFrontFacing:
type = "bool";
semantic = "SV_IsFrontFace";
break;
case BuiltInViewIndex:
if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment))
SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher.");
type = "uint";
semantic = "SV_ViewID";
break;
case BuiltInNumWorkgroups:
case BuiltInSubgroupSize:
case BuiltInSubgroupLocalInvocationId:
case BuiltInSubgroupEqMask:
case BuiltInSubgroupLtMask:
case BuiltInSubgroupLeMask:
case BuiltInSubgroupGtMask:
case BuiltInSubgroupGeMask:
case BuiltInBaseVertex:
case BuiltInBaseInstance:
// Handled specially.
break;
case BuiltInHelperInvocation:
if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
break;
case BuiltInClipDistance:
// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
{
uint32_t to_declare = clip_distance_count - clip;
if (to_declare > 4)
to_declare = 4;
uint32_t semantic_index = clip / 4;
static const char *types[] = { "float", "float2", "float3", "float4" };
statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
" : SV_ClipDistance", semantic_index, ";");
}
break;
case BuiltInCullDistance:
// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
{
uint32_t to_declare = cull_distance_count - cull;
if (to_declare > 4)
to_declare = 4;
uint32_t semantic_index = cull / 4;
static const char *types[] = { "float", "float2", "float3", "float4" };
statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index,
" : SV_CullDistance", semantic_index, ";");
}
break;
case BuiltInPointCoord:
// PointCoord is not supported, but provide a way to just ignore that, similar to PointSize.
if (hlsl_options.point_coord_compat)
break;
else
SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
case BuiltInLayer:
if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher.");
type = "uint";
semantic = "SV_RenderTargetArrayIndex";
break;
default:
SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
}
if (type && semantic)
statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";");
});
}
uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
{
// TODO: Need to verify correctness.
uint32_t elements = 0;
if (type.basetype == SPIRType::Struct)
{
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
elements += type_to_consumed_locations(get<SPIRType>(type.member_types[i]));
}
else
{
uint32_t array_multiplier = 1;
for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
{
if (type.array_size_literal[i])
array_multiplier *= type.array[i];
else
array_multiplier *= evaluate_constant_u32(type.array[i]);
}
elements += array_multiplier * type.columns;
}
return elements;
}
string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
{
string res;
//if (flags & (1ull << DecorationSmooth))
// res += "linear ";
if (flags.get(DecorationFlat))
res += "nointerpolation ";
if (flags.get(DecorationNoPerspective))
res += "noperspective ";
if (flags.get(DecorationCentroid))
res += "centroid ";
if (flags.get(DecorationPatch))
res += "patch "; // Seems to be different in actual HLSL.
if (flags.get(DecorationSample))
res += "sample ";
if (flags.get(DecorationInvariant) && backend.support_precise_qualifier)
res += "precise "; // Not supported?
return res;
}
std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc)
{
if (em == ExecutionModelVertex && sc == StorageClassInput)
{
// We have a vertex attribute - we should look at remapping it if the user provided
// vertex attribute hints.
for (auto &attribute : remap_vertex_attributes)
if (attribute.location == location)
return attribute.semantic;
}
// Not a vertex attribute, or no remap_vertex_attributes entry.
return join("TEXCOORD", location);
}
std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var)
{
// We cannot emit static const initializer for block constants for practical reasons,
// so just inline the initializer.
// FIXME: There is a theoretical problem here if someone tries to composite extract
// into this initializer since we don't declare it properly, but that is somewhat non-sensical.
auto &type = get<SPIRType>(var.basetype);
bool is_block = has_decoration(type.self, DecorationBlock);
auto *c = maybe_get<SPIRConstant>(var.initializer);
if (is_block && c)
return constant_expression(*c);
else
return CompilerGLSL::to_initializer_expression(var);
}
void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index,
uint32_t location,
std::unordered_set<uint32_t> &active_locations)
{
auto &execution = get_entry_point();
auto type = get<SPIRType>(var.basetype);
auto semantic = to_semantic(location, execution.model, var.storage);
auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index));
auto &mbr_type = get<SPIRType>(type.member_types[member_index]);
statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)),
type_to_glsl(mbr_type),
" ", mbr_name, type_to_array_glsl(mbr_type),
" : ", semantic, ";");
// Structs and arrays should consume more locations.
uint32_t consumed_locations = type_to_consumed_locations(mbr_type);
for (uint32_t i = 0; i < consumed_locations; i++)
active_locations.insert(location + i);
}
void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations)
{
auto &execution = get_entry_point();
auto type = get<SPIRType>(var.basetype);
string binding;
bool use_location_number = true;
bool need_matrix_unroll = false;
bool legacy = hlsl_options.shader_model <= 30;
if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
{
// Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1.
uint32_t index = get_decoration(var.self, DecorationIndex);
uint32_t location = get_decoration(var.self, DecorationLocation);
if (index != 0 && location != 0)
SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL.");
binding = join(legacy ? "COLOR" : "SV_Target", location + index);
use_location_number = false;
if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP)
type.vecsize = 4;
}
else if (var.storage == StorageClassInput && execution.model == ExecutionModelVertex)
{
need_matrix_unroll = true;
if (legacy) // Inputs must be floating-point in legacy targets.
type.basetype = SPIRType::Float;
}
const auto get_vacant_location = [&]() -> uint32_t {
for (uint32_t i = 0; i < 64; i++)
if (!active_locations.count(i))
return i;
SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted.");
};
auto name = to_name(var.self);
if (use_location_number)
{
uint32_t location_number;
// If an explicit location exists, use it with TEXCOORD[N] semantic.
// Otherwise, pick a vacant location.
if (has_decoration(var.self, DecorationLocation))
location_number = get_decoration(var.self, DecorationLocation);
else
location_number = get_vacant_location();
// Allow semantic remap if specified.
auto semantic = to_semantic(location_number, execution.model, var.storage);
if (need_matrix_unroll && type.columns > 1)
{
if (!type.array.empty())
SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported.");
// Unroll matrices.
for (uint32_t i = 0; i < type.columns; i++)
{
SPIRType newtype = type;
newtype.columns = 1;
string effective_semantic;
if (hlsl_options.flatten_matrix_vertex_input_semantics)
effective_semantic = to_semantic(location_number, execution.model, var.storage);
else
effective_semantic = join(semantic, "_", i);
statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)),
variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";");
active_locations.insert(location_number++);
}
}
else
{
auto decl_type = type;
if (execution.model == ExecutionModelMeshEXT)
{
decl_type.array.erase(decl_type.array.begin());
decl_type.array_size_literal.erase(decl_type.array_size_literal.begin());
}
statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ",
semantic, ";");
// Structs and arrays should consume more locations.
uint32_t consumed_locations = type_to_consumed_locations(decl_type);
for (uint32_t i = 0; i < consumed_locations; i++)
active_locations.insert(location_number + i);
}
}
else
{
statement(variable_decl(type, name), " : ", binding, ";");
}
}
std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage)
{
switch (builtin)
{
case BuiltInVertexId:
return "gl_VertexID";
case BuiltInInstanceId:
return "gl_InstanceID";
case BuiltInNumWorkgroups:
{
if (!num_workgroups_builtin)
SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. "
"Cannot emit code for this builtin.");
auto &var = get<SPIRVariable>(num_workgroups_builtin);
auto &type = get<SPIRType>(var.basetype);
auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0));
ParsedIR::sanitize_underscores(ret);
return ret;
}
case BuiltInPointCoord:
// Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
return "float2(0.5f, 0.5f)";
case BuiltInSubgroupLocalInvocationId:
return "WaveGetLaneIndex()";
case BuiltInSubgroupSize:
return "WaveGetLaneCount()";
case BuiltInHelperInvocation:
return "IsHelperLane()";
default:
return CompilerGLSL::builtin_to_glsl(builtin, storage);
}
}
void CompilerHLSL::emit_builtin_variables()
{
Bitset builtins = active_input_builtins;
builtins.merge_or(active_output_builtins);
std::unordered_map<uint32_t, ID> builtin_to_initializer;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
if (!is_builtin_variable(var) || var.storage != StorageClassOutput || !var.initializer)
return;
auto *c = this->maybe_get<SPIRConstant>(var.initializer);
if (!c)
return;
auto &type = this->get<SPIRType>(var.basetype);
if (type.basetype == SPIRType::Struct)
{
uint32_t member_count = uint32_t(type.member_types.size());
for (uint32_t i = 0; i < member_count; i++)
{
if (has_member_decoration(type.self, i, DecorationBuiltIn))
{
builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] =
c->subconstants[i];
}
}
}
else if (has_decoration(var.self, DecorationBuiltIn))
builtin_to_initializer[get_decoration(var.self, DecorationBuiltIn)] = var.initializer;
});
// Emit global variables for the interface variables which are statically used by the shader.
builtins.for_each_bit([&](uint32_t i) {
const char *type = nullptr;
auto builtin = static_cast<BuiltIn>(i);
uint32_t array_size = 0;
string init_expr;
auto init_itr = builtin_to_initializer.find(builtin);
if (init_itr != builtin_to_initializer.end())
init_expr = join(" = ", to_expression(init_itr->second));
if (get_execution_model() == ExecutionModelMeshEXT)
{
if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId ||
builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT ||
builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT ||
builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT)
{
return;
}
}
switch (builtin)
{
case BuiltInFragCoord:
case BuiltInPosition:
type = "float4";
break;
case BuiltInFragDepth:
type = "float";
break;
case BuiltInVertexId:
case BuiltInVertexIndex:
case BuiltInInstanceIndex:
type = "int";
if (hlsl_options.support_nonzero_base_vertex_base_instance)
base_vertex_info.used = true;
break;
case BuiltInBaseVertex:
case BuiltInBaseInstance:
type = "int";
base_vertex_info.used = true;
break;
case BuiltInInstanceId:
case BuiltInSampleId:
type = "int";
break;
case BuiltInPointSize:
if (hlsl_options.point_size_compat || hlsl_options.shader_model <= 30)
{
// Just emit the global variable, it will be ignored.
type = "float";
break;
}
else
SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
case BuiltInGlobalInvocationId:
case BuiltInLocalInvocationId:
case BuiltInWorkgroupId:
type = "uint3";
break;
case BuiltInLocalInvocationIndex:
type = "uint";
break;
case BuiltInFrontFacing:
type = "bool";
break;
case BuiltInNumWorkgroups:
case BuiltInPointCoord:
// Handled specially.
break;
case BuiltInSubgroupLocalInvocationId:
case BuiltInSubgroupSize:
if (hlsl_options.shader_model < 60)
SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
break;
case BuiltInSubgroupEqMask:
case BuiltInSubgroupLtMask:
case BuiltInSubgroupLeMask:
case BuiltInSubgroupGtMask:
case BuiltInSubgroupGeMask:
if (hlsl_options.shader_model < 60)
SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops.");
type = "uint4";
break;
case BuiltInHelperInvocation:
if (hlsl_options.shader_model < 50)
SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation.");
break;
case BuiltInClipDistance:
array_size = clip_distance_count;
type = "float";
break;
case BuiltInCullDistance:
array_size = cull_distance_count;
type = "float";
break;
case BuiltInSampleMask:
type = "int";
break;
case BuiltInPrimitiveId:
case BuiltInViewIndex:
case BuiltInLayer:
type = "uint";
break;
case BuiltInViewportIndex:
case BuiltInPrimitiveShadingRateKHR:
case BuiltInPrimitiveLineIndicesEXT:
case BuiltInCullPrimitiveEXT:
type = "uint";
break;
default:
SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
}
StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput;
if (type)
{
if (array_size)
statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";");
else
statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";");
}
// SampleMask can be both in and out with sample builtin, in this case we have already
// declared the input variable and we need to add the output one now.
if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i))
{
statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";");
}
});
if (base_vertex_info.used)
{
string binding_info;
if (base_vertex_info.explicit_binding)
{
binding_info = join(" : register(b", base_vertex_info.register_index);
if (base_vertex_info.register_space)
binding_info += join(", space", base_vertex_info.register_space);
binding_info += ")";
}
statement("cbuffer SPIRV_Cross_VertexInfo", binding_info);
begin_scope();
statement("int SPIRV_Cross_BaseVertex;");
statement("int SPIRV_Cross_BaseInstance;");
end_scope_decl();
statement("");
}
}
void CompilerHLSL::set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space)
{
if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
{
base_vertex_info.explicit_binding = true;
base_vertex_info.register_space = register_space;
base_vertex_info.register_index = register_index;
}
}
void CompilerHLSL::unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding)
{
if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
base_vertex_info.explicit_binding = false;
}
bool CompilerHLSL::is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const
{
if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
return base_vertex_info.used;
else
return false;
}
void CompilerHLSL::emit_composite_constants()
{
// HLSL cannot declare structs or arrays inline, so we must move them out to
// global constants directly.
bool emitted = false;
ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
if (c.specialization)
return;
auto &type = this->get<SPIRType>(c.constant_type);
if (type.basetype == SPIRType::Struct && is_builtin_type(type))
return;
if (type.basetype == SPIRType::Struct || !type.array.empty())
{
add_resource_name(c.self);
auto name = to_name(c.self);
statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
emitted = true;
}
});
if (emitted)
statement("");
}
void CompilerHLSL::emit_specialization_constants_and_structs()
{
bool emitted = false;
SpecializationConstant wg_x, wg_y, wg_z;
ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
std::unordered_set<TypeID> io_block_types;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
interface_variable_exists_in_entry_point(var.self) &&
has_decoration(type.self, DecorationBlock))
{
io_block_types.insert(type.self);
}
});
auto loop_lock = ir.create_loop_hard_lock();
for (auto &id_ : ir.ids_for_constant_undef_or_type)
{
auto &id = ir.ids[id_];
if (id.get_type() == TypeConstant)
{
auto &c = id.get<SPIRConstant>();
if (c.self == workgroup_size_id)
{
statement("static const uint3 gl_WorkGroupSize = ",
constant_expression(get<SPIRConstant>(workgroup_size_id)), ";");
emitted = true;
}
else if (c.specialization)
{
auto &type = get<SPIRType>(c.constant_type);
add_resource_name(c.self);
auto name = to_name(c.self);
if (has_decoration(c.self, DecorationSpecId))
{
// HLSL does not support specialization constants, so fallback to macros.
c.specialization_constant_macro_name =
constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
statement("#ifndef ", c.specialization_constant_macro_name);
statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
statement("#endif");
statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";");
}
else
statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
emitted = true;
}
}
else if (id.get_type() == TypeConstantOp)
{
auto &c = id.get<SPIRConstantOp>();
auto &type = get<SPIRType>(c.basetype);
add_resource_name(c.self);
auto name = to_name(c.self);
statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
emitted = true;
}
else if (id.get_type() == TypeType)
{
auto &type = id.get<SPIRType>();
bool is_non_io_block = has_decoration(type.self, DecorationBlock) &&
io_block_types.count(type.self) == 0;
bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock);
if (type.basetype == SPIRType::Struct && type.array.empty() &&
!type.pointer && !is_non_io_block && !is_buffer_block)
{
if (emitted)
statement("");
emitted = false;
emit_struct(type);
}
}
else if (id.get_type() == TypeUndef)
{
auto &undef = id.get<SPIRUndef>();
auto &type = this->get<SPIRType>(undef.basetype);
// OpUndef can be void for some reason ...
if (type.basetype == SPIRType::Void)
return;
string initializer;
if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
emitted = true;
}
}
if (emitted)
statement("");
}
void CompilerHLSL::replace_illegal_names()
{
static const unordered_set<string> keywords = {
// Additional HLSL specific keywords.
// From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords
"AppendStructuredBuffer", "asm", "asm_fragment",
"BlendState", "bool", "break", "Buffer", "ByteAddressBuffer",
"case", "cbuffer", "centroid", "class", "column_major", "compile",
"compile_fragment", "CompileShader", "const", "continue", "ComputeShader",
"ConsumeStructuredBuffer",
"default", "DepthStencilState", "DepthStencilView", "discard", "do",
"double", "DomainShader", "dword",
"else", "export", "false", "float", "for", "fxgroup",
"GeometryShader", "groupshared", "half", "HullShader",
"indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface",
"line", "lineadj", "linear", "LineStream",
"matrix", "min16float", "min10float", "min16int", "min16uint",
"namespace", "nointerpolation", "noperspective", "NULL",
"out", "OutputPatch",
"payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point",
"PointStream", "precise", "RasterizerState", "RenderTargetView",
"return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer",
"RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D",
"RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState",
"SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state",
"static", "string", "struct", "switch", "StructuredBuffer", "tbuffer",
"technique", "technique10", "technique11", "texture", "Texture1D",
"Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray",
"Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle",
"triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned",
"vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while",
};
CompilerGLSL::replace_illegal_names(keywords);
CompilerGLSL::replace_illegal_names();
}
void CompilerHLSL::emit_resources()
{
auto &execution = get_entry_point();
replace_illegal_names();
switch (execution.model)
{
case ExecutionModelGeometry:
case ExecutionModelTessellationControl:
case ExecutionModelTessellationEvaluation:
case ExecutionModelMeshEXT:
fixup_implicit_builtin_block_names(execution.model);
break;
default:
break;
}
emit_specialization_constants_and_structs();
emit_composite_constants();
bool emitted = false;
// Output UBOs and SSBOs
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
has_block_flags)
{
emit_buffer_block(var);
emitted = true;
}
});
// Output push constant blocks
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
!is_hidden_variable(var))
{
emit_push_constant_block(var);
emitted = true;
}
});
if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 &&
active_output_builtins.get(BuiltInPosition))
{
statement("uniform float4 gl_HalfPixel;");
emitted = true;
}
bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30;
// Output Uniform Constants (values, samplers, images, etc).
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
// If we're remapping separate samplers and images, only emit the combined samplers.
if (skip_separate_image_sampler)
{
// Sampler buffers are always used without a sampler, and they will also work in regular D3D.
bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
bool separate_sampler = type.basetype == SPIRType::Sampler;
if (!sampler_buffer && (separate_image || separate_sampler))
return;
}
if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable &&
type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) &&
!is_hidden_variable(var))
{
emit_uniform(var);
emitted = true;
}
});
if (emitted)
statement("");
emitted = false;
// Emit builtin input and output variables here.
emit_builtin_variables();
if (execution.model != ExecutionModelMeshEXT)
{
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
(var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
interface_variable_exists_in_entry_point(var.self))
{
// Builtin variables are handled separately.
emit_interface_block_globally(var);
emitted = true;
}
});
}
if (emitted)
statement("");
emitted = false;
require_input = false;
require_output = false;
unordered_set<uint32_t> active_inputs;
unordered_set<uint32_t> active_outputs;
struct IOVariable
{
const SPIRVariable *var;
uint32_t location;
uint32_t block_member_index;
bool block;
};
SmallVector<IOVariable> input_variables;
SmallVector<IOVariable> output_variables;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
bool block = has_decoration(type.self, DecorationBlock);
if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
return;
if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
interface_variable_exists_in_entry_point(var.self))
{
if (block)
{
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
{
uint32_t location = get_declared_member_location(var, i, false);
if (var.storage == StorageClassInput)
input_variables.push_back({ &var, location, i, true });
else
output_variables.push_back({ &var, location, i, true });
}
}
else
{
uint32_t location = get_decoration(var.self, DecorationLocation);
if (var.storage == StorageClassInput)
input_variables.push_back({ &var, location, 0, false });
else
output_variables.push_back({ &var, location, 0, false });
}
}
});
const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool {
// Sort input and output variables based on, from more robust to less robust:
// - Location
// - Variable has a location
// - Name comparison
// - Variable has a name
// - Fallback: ID
bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation);
bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation);
if (has_location_a && has_location_b)
return a.location < b.location;
else if (has_location_a && !has_location_b)
return true;
else if (!has_location_a && has_location_b)
return false;
const auto &name1 = to_name(a.var->self);
const auto &name2 = to_name(b.var->self);
if (name1.empty() && name2.empty())
return a.var->self < b.var->self;
else if (name1.empty())
return true;
else if (name2.empty())
return false;
return name1.compare(name2) < 0;
};
auto input_builtins = active_input_builtins;
input_builtins.clear(BuiltInNumWorkgroups);
input_builtins.clear(BuiltInPointCoord);
input_builtins.clear(BuiltInSubgroupSize);
input_builtins.clear(BuiltInSubgroupLocalInvocationId);
input_builtins.clear(BuiltInSubgroupEqMask);
input_builtins.clear(BuiltInSubgroupLtMask);
input_builtins.clear(BuiltInSubgroupLeMask);
input_builtins.clear(BuiltInSubgroupGtMask);
input_builtins.clear(BuiltInSubgroupGeMask);
if (!input_variables.empty() || !input_builtins.empty())
{
require_input = true;
statement("struct SPIRV_Cross_Input");
begin_scope();
sort(input_variables.begin(), input_variables.end(), variable_compare);
for (auto &var : input_variables)
{
if (var.block)
emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs);
else
emit_interface_block_in_struct(*var.var, active_inputs);
}
emit_builtin_inputs_in_struct();
end_scope_decl();
statement("");
}
const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT;
if (!output_variables.empty() || !active_output_builtins.empty())
{
sort(output_variables.begin(), output_variables.end(), variable_compare);
require_output = !is_mesh_shader;
statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output");
begin_scope();
for (auto &var : output_variables)
{
if (is_per_primitive_variable(*var.var))
continue;
if (var.block && is_mesh_shader && var.block_member_index != 0)
continue;
if (var.block && !is_mesh_shader)
emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs);
else
emit_interface_block_in_struct(*var.var, active_outputs);
}
emit_builtin_outputs_in_struct();
if (!is_mesh_shader)
emit_builtin_primitive_outputs_in_struct();
end_scope_decl();
statement("");
if (is_mesh_shader)
{
statement("struct gl_MeshPerPrimitiveEXT");
begin_scope();
for (auto &var : output_variables)
{
if (!is_per_primitive_variable(*var.var))
continue;
if (var.block && var.block_member_index != 0)
continue;
emit_interface_block_in_struct(*var.var, active_outputs);
}
emit_builtin_primitive_outputs_in_struct();
end_scope_decl();
statement("");
}
}
// Global variables.
for (auto global : global_variables)
{
auto &var = get<SPIRVariable>(global);
if (is_hidden_variable(var, true))
continue;
if (var.storage == StorageClassTaskPayloadWorkgroupEXT && is_mesh_shader)
continue;
if (var.storage != StorageClassOutput)
{
if (!variable_is_lut(var))
{
add_resource_name(var.self);
const char *storage = nullptr;
switch (var.storage)
{
case StorageClassWorkgroup:
case StorageClassTaskPayloadWorkgroupEXT:
storage = "groupshared";
break;
default:
storage = "static";
break;
}
string initializer;
if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
!var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
{
initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
}
statement(storage, " ", variable_decl(var), initializer, ";");
emitted = true;
}
}
}
if (emitted)
statement("");
if (requires_op_fmod)
{
static const char *types[] = {
"float",
"float2",
"float3",
"float4",
};
for (auto &type : types)
{
statement(type, " mod(", type, " x, ", type, " y)");
begin_scope();
statement("return x - y * floor(x / y);");
end_scope();
statement("");
}
}
emit_texture_size_variants(required_texture_size_variants.srv, "4", false, "");
for (uint32_t norm = 0; norm < 3; norm++)
{
for (uint32_t comp = 0; comp < 4; comp++)
{
static const char *qualifiers[] = { "", "unorm ", "snorm " };
static const char *vecsizes[] = { "", "2", "3", "4" };
emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true,
qualifiers[norm]);
}
}
if (requires_fp16_packing)
{
// HLSL does not pack into a single word sadly :(
statement("uint spvPackHalf2x16(float2 value)");
begin_scope();
statement("uint2 Packed = f32tof16(value);");
statement("return Packed.x | (Packed.y << 16);");
end_scope();
statement("");
statement("float2 spvUnpackHalf2x16(uint value)");
begin_scope();
statement("return f16tof32(uint2(value & 0xffff, value >> 16));");
end_scope();
statement("");
}
if (requires_uint2_packing)
{
statement("uint64_t spvPackUint2x32(uint2 value)");
begin_scope();
statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);");
end_scope();
statement("");
statement("uint2 spvUnpackUint2x32(uint64_t value)");
begin_scope();
statement("uint2 Unpacked;");
statement("Unpacked.x = uint(value & 0xffffffff);");
statement("Unpacked.y = uint(value >> 32);");
statement("return Unpacked;");
end_scope();
statement("");
}
if (requires_explicit_fp16_packing)
{
// HLSL does not pack into a single word sadly :(
statement("uint spvPackFloat2x16(min16float2 value)");
begin_scope();
statement("uint2 Packed = f32tof16(value);");
statement("return Packed.x | (Packed.y << 16);");
end_scope();
statement("");
statement("min16float2 spvUnpackFloat2x16(uint value)");
begin_scope();
statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
end_scope();
statement("");
}
// HLSL does not seem to have builtins for these operation, so roll them by hand ...
if (requires_unorm8_packing)
{
statement("uint spvPackUnorm4x8(float4 value)");
begin_scope();
statement("uint4 Packed = uint4(round(saturate(value) * 255.0));");
statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);");
end_scope();
statement("");
statement("float4 spvUnpackUnorm4x8(uint value)");
begin_scope();
statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);");
statement("return float4(Packed) / 255.0;");
end_scope();
statement("");
}
if (requires_snorm8_packing)
{
statement("uint spvPackSnorm4x8(float4 value)");
begin_scope();
statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;");
statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));");
end_scope();
statement("");
statement("float4 spvUnpackSnorm4x8(uint value)");
begin_scope();
statement("int SignedValue = int(value);");
statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;");
statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);");
end_scope();
statement("");
}
if (requires_unorm16_packing)
{
statement("uint spvPackUnorm2x16(float2 value)");
begin_scope();
statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));");
statement("return Packed.x | (Packed.y << 16);");
end_scope();
statement("");
statement("float2 spvUnpackUnorm2x16(uint value)");
begin_scope();
statement("uint2 Packed = uint2(value & 0xffff, value >> 16);");
statement("return float2(Packed) / 65535.0;");
end_scope();
statement("");
}
if (requires_snorm16_packing)
{
statement("uint spvPackSnorm2x16(float2 value)");
begin_scope();
statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;");
statement("return uint(Packed.x | (Packed.y << 16));");
end_scope();
statement("");
statement("float2 spvUnpackSnorm2x16(uint value)");
begin_scope();
statement("int SignedValue = int(value);");
statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;");
statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);");
end_scope();
statement("");
}
if (requires_bitfield_insert)
{
static const char *types[] = { "uint", "uint2", "uint3", "uint4" };
for (auto &type : types)
{
statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)");
begin_scope();
statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));");
statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);");
end_scope();
statement("");
}
}
if (requires_bitfield_extract)
{
static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" };
for (auto &type : unsigned_types)
{
statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)");
begin_scope();
statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);");
statement("return (Base >> Offset) & Mask;");
end_scope();
statement("");
}
// In this overload, we will have to do sign-extension, which we will emulate by shifting up and down.
static const char *signed_types[] = { "int", "int2", "int3", "int4" };
for (auto &type : signed_types)
{
statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)");
begin_scope();
statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);");
statement(type, " Masked = (Base >> Offset) & Mask;");
statement("int ExtendShift = (32 - Count) & 31;");
statement("return (Masked << ExtendShift) >> ExtendShift;");
end_scope();
statement("");
}
}
if (requires_inverse_2x2)
{
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
statement("float2x2 spvInverse(float2x2 m)");
begin_scope();
statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
statement("adj[0][0] = m[1][1];");
statement("adj[0][1] = -m[0][1];");
statement_no_indent("");
statement("adj[1][0] = -m[1][0];");
statement("adj[1][1] = m[0][0];");
statement_no_indent("");
statement("// Calculate the determinant as a combination of the cofactors of the first row.");
statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
statement_no_indent("");
statement("// Divide the classical adjoint matrix by the determinant.");
statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
end_scope();
statement("");
}
if (requires_inverse_3x3)
{
statement("// Returns the determinant of a 2x2 matrix.");
statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
begin_scope();
statement("return a1 * b2 - b1 * a2;");
end_scope();
statement_no_indent("");
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
statement("float3x3 spvInverse(float3x3 m)");
begin_scope();
statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
statement_no_indent("");
statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
statement_no_indent("");
statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
statement_no_indent("");
statement("// Calculate the determinant as a combination of the cofactors of the first row.");
statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
statement_no_indent("");
statement("// Divide the classical adjoint matrix by the determinant.");
statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
end_scope();
statement("");
}
if (requires_inverse_4x4)
{
if (!requires_inverse_3x3)
{
statement("// Returns the determinant of a 2x2 matrix.");
statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
begin_scope();
statement("return a1 * b2 - b1 * a2;");
end_scope();
statement("");
}
statement("// Returns the determinant of a 3x3 matrix.");
statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
"float c2, float c3)");
begin_scope();
statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * "
"spvDet2x2(a2, a3, "
"b2, b3);");
end_scope();
statement_no_indent("");
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
statement("float4x4 spvInverse(float4x4 m)");
begin_scope();
statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
statement(
"adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
"m[3][3]);");
statement(
"adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
"m[3][3]);");
statement(
"adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
"m[3][3]);");
statement(
"adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
"m[2][3]);");
statement_no_indent("");
statement(
"adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
"m[3][3]);");
statement(
"adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
"m[3][3]);");
statement(
"adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
"m[3][3]);");
statement(
"adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
"m[2][3]);");
statement_no_indent("");
statement(
"adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
"m[3][3]);");
statement(
"adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
"m[3][3]);");
statement(
"adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
"m[3][3]);");
statement(
"adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
"m[2][3]);");
statement_no_indent("");
statement(
"adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
"m[3][2]);");
statement(
"adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
"m[3][2]);");
statement(
"adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
"m[3][2]);");
statement(
"adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
"m[2][2]);");
statement_no_indent("");
statement("// Calculate the determinant as a combination of the cofactors of the first row.");
statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
"* m[3][0]);");
statement_no_indent("");
statement("// Divide the classical adjoint matrix by the determinant.");
statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
end_scope();
statement("");
}
if (requires_scalar_reflect)
{
// FP16/FP64? No templates in HLSL.
statement("float spvReflect(float i, float n)");
begin_scope();
statement("return i - 2.0 * dot(n, i) * n;");
end_scope();
statement("");
}
if (requires_scalar_refract)
{
// FP16/FP64? No templates in HLSL.
statement("float spvRefract(float i, float n, float eta)");
begin_scope();
statement("float NoI = n * i;");
statement("float NoI2 = NoI * NoI;");
statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
statement("if (k < 0.0)");
begin_scope();
statement("return 0.0;");
end_scope();
statement("else");
begin_scope();
statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
end_scope();
end_scope();
statement("");
}
if (requires_scalar_faceforward)
{
// FP16/FP64? No templates in HLSL.
statement("float spvFaceForward(float n, float i, float nref)");
begin_scope();
statement("return i * nref < 0.0 ? n : -n;");
end_scope();
statement("");
}
for (TypeID type_id : composite_selection_workaround_types)
{
// Need out variable since HLSL does not support returning arrays.
auto &type = get<SPIRType>(type_id);
auto type_str = type_to_glsl(type);
auto type_arr_str = type_to_array_glsl(type);
statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ",
type_str, " true_val", type_arr_str, ", ",
type_str, " false_val", type_arr_str, ")");
begin_scope();
statement("if (cond)");
begin_scope();
statement("out_value = true_val;");
end_scope();
statement("else");
begin_scope();
statement("out_value = false_val;");
end_scope();
end_scope();
statement("");
}
}
void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav,
const char *type_qualifier)
{
if (variant_mask == 0)
return;
static const char *types[QueryTypeCount] = { "float", "int", "uint" };
static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray",
"Texture3D", "Buffer", "TextureCube", "TextureCubeArray",
"Texture2DMS", "Texture2DMSArray" };
static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false };
static const char *ret_types[QueryDimCount] = {
"uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3",
};
static const uint32_t return_arguments[QueryDimCount] = {
1, 2, 2, 3, 3, 1, 2, 3, 2, 3,
};
for (uint32_t index = 0; index < QueryDimCount; index++)
{
for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++)
{
uint32_t bit = 16 * type_index + index;
uint64_t mask = 1ull << bit;
if ((variant_mask & mask) == 0)
continue;
statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""),
dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ",
(uav ? "" : "uint Level, "), "out uint Param)");
begin_scope();
statement(ret_types[index], " ret;");
switch (return_arguments[index])
{
case 1:
if (has_lod[index] && !uav)
statement("Tex.GetDimensions(Level, ret.x, Param);");
else
{
statement("Tex.GetDimensions(ret.x);");
statement("Param = 0u;");
}
break;
case 2:
if (has_lod[index] && !uav)
statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);");
else if (!uav)
statement("Tex.GetDimensions(ret.x, ret.y, Param);");
else
{
statement("Tex.GetDimensions(ret.x, ret.y);");
statement("Param = 0u;");
}
break;
case 3:
if (has_lod[index] && !uav)
statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);");
else if (!uav)
statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);");
else
{
statement("Tex.GetDimensions(ret.x, ret.y, ret.z);");
statement("Param = 0u;");
}
break;
}
statement("return ret;");
end_scope();
statement("");
}
}
}
void CompilerHLSL::analyze_meshlet_writes()
{
uint32_t id_per_vertex = 0;
uint32_t id_per_primitive = 0;
bool need_per_primitive = false;
bool need_per_vertex = false;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
bool block = has_decoration(type.self, DecorationBlock);
if (var.storage == StorageClassOutput && block && is_builtin_variable(var))
{
auto flags = get_buffer_block_flags(var.self);
if (flags.get(DecorationPerPrimitiveEXT))
id_per_primitive = var.self;
else
id_per_vertex = var.self;
}
else if (var.storage == StorageClassOutput)
{
Bitset flags;
if (block)
flags = get_buffer_block_flags(var.self);
else
flags = get_decoration_bitset(var.self);
if (flags.get(DecorationPerPrimitiveEXT))
need_per_primitive = true;
else
need_per_vertex = true;
}
});
// If we have per-primitive outputs, and no per-primitive builtins,
// empty version of gl_MeshPerPrimitiveEXT will be emitted.
// If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block.
const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t {
auto &execution = get_entry_point();
uint32_t op_type = ir.increase_bound_by(4);
uint32_t op_arr = op_type + 1;
uint32_t op_ptr = op_type + 2;
uint32_t op_var = op_type + 3;
auto &type = set<SPIRType>(op_type);
type.basetype = SPIRType::Struct;
set_name(op_type, block_name);
set_decoration(op_type, DecorationBlock);
if (per_primitive)
set_decoration(op_type, DecorationPerPrimitiveEXT);
auto &arr = set<SPIRType>(op_arr, type);
arr.parent_type = type.self;
arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices);
arr.array_size_literal.push_back(true);
auto &ptr = set<SPIRType>(op_ptr, arr);
ptr.parent_type = arr.self;
ptr.pointer = true;
ptr.pointer_depth++;
ptr.storage = StorageClassOutput;
set_decoration(op_ptr, DecorationBlock);
set_name(op_ptr, block_name);
auto &var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
if (per_primitive)
set_decoration(op_var, DecorationPerPrimitiveEXT);
set_name(op_var, instance_name);
execution.interface_variables.push_back(var.self);
return op_var;
};
if (id_per_vertex == 0 && need_per_vertex)
id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false);
if (id_per_primitive == 0 && need_per_primitive)
id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true);
unordered_set<uint32_t> processed_func_ids;
analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids);
}
void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
std::unordered_set<uint32_t> &processed_func_ids)
{
// Avoid processing a function more than once
if (processed_func_ids.find(func_id) != processed_func_ids.end())
return;
processed_func_ids.insert(func_id);
auto &func = get<SPIRFunction>(func_id);
// Recursively establish global args added to functions on which we depend.
for (auto& block : func.blocks)
{
auto &b = get<SPIRBlock>(block);
for (auto &i : b.ops)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
switch (op)
{
case OpFunctionCall:
{
// Then recurse into the function itself to extract globals used internally in the function
uint32_t inner_func_id = ops[2];
analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids);
auto &inner_func = get<SPIRFunction>(inner_func_id);
for (auto &iarg : inner_func.arguments)
{
if (!iarg.alias_global_variable)
continue;
bool already_declared = false;
for (auto &arg : func.arguments)
{
if (arg.id == iarg.id)
{
already_declared = true;
break;
}
}
if (!already_declared)
{
// basetype is effectively ignored here since we declare the argument
// with explicit types. Just pass down a valid type.
func.arguments.push_back({ expression_type_id(iarg.id), iarg.id,
iarg.read_count, iarg.write_count, true });
}
}
break;
}
case OpStore:
case OpLoad:
case OpInBoundsAccessChain:
case OpAccessChain:
case OpPtrAccessChain:
case OpInBoundsPtrAccessChain:
case OpArrayLength:
{
auto *var = maybe_get<SPIRVariable>(ops[op == OpStore ? 0 : 2]);
if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT))
{
bool already_declared = false;
auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
uint32_t var_id = var->self;
if (var->storage != StorageClassTaskPayloadWorkgroupEXT &&
builtin_type != BuiltInPrimitivePointIndicesEXT &&
builtin_type != BuiltInPrimitiveLineIndicesEXT &&
builtin_type != BuiltInPrimitiveTriangleIndicesEXT)
{
var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex;
}
for (auto &arg : func.arguments)
{
if (arg.id == var_id)
{
already_declared = true;
break;
}
}
if (!already_declared)
{
// basetype is effectively ignored here since we declare the argument
// with explicit types. Just pass down a valid type.
uint32_t type_id = expression_type_id(var_id);
if (var->storage == StorageClassTaskPayloadWorkgroupEXT)
func.arguments.push_back({ type_id, var_id, 1u, 0u, true });
else
func.arguments.push_back({ type_id, var_id, 1u, 1u, true });
}
}
break;
}
default:
break;
}
}
}
}
string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
{
auto &flags = get_member_decoration_bitset(type.self, index);
// HLSL can emit row_major or column_major decoration in any struct.
// Do not try to merge combined decorations for children like in GLSL.
// Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major".
// The way to deal with this is to multiply everything in inverse order, and reverse the memory layout.
if (flags.get(DecorationColMajor))
return "row_major ";
else if (flags.get(DecorationRowMajor))
return "column_major ";
return "";
}
void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const string &qualifier, uint32_t base_offset)
{
auto &membertype = get<SPIRType>(member_type_id);
Bitset memberflags;
auto &memb = ir.meta[type.self].members;
if (index < memb.size())
memberflags = memb[index].decoration_flags;
string packing_offset;
bool is_push_constant = type.storage == StorageClassPushConstant;
if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) &&
has_member_decoration(type.self, index, DecorationOffset))
{
uint32_t offset = memb[index].offset - base_offset;
if (offset & 3)
SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL.");
static const char *packing_swizzle[] = { "", ".y", ".z", ".w" };
packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")");
}
statement(layout_for_member(type, index), qualifier,
variable_decl(membertype, to_member_name(type, index)), packing_offset, ";");
}
void CompilerHLSL::emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops)
{
flush_variable_declaration(ops[0]);
uint32_t is_commited = evaluate_constant_u32(ops[3]);
emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false);
}
void CompilerHLSL::emit_mesh_tasks(SPIRBlock &block)
{
if (block.mesh.payload != 0)
{
statement("DispatchMesh(", to_unpacked_expression(block.mesh.groups[0]), ", ", to_unpacked_expression(block.mesh.groups[1]), ", ",
to_unpacked_expression(block.mesh.groups[2]), ", ", to_unpacked_expression(block.mesh.payload), ");");
}
else
{
SPIRV_CROSS_THROW("Amplification shader in HLSL must have payload");
}
}
void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock);
if (flattened_buffer_blocks.count(var.self))
{
emit_buffer_block_flattened(var);
}
else if (is_uav)
{
Bitset flags = ir.get_buffer_block_flags(var);
bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
bool is_coherent = flags.get(DecorationCoherent) && !is_readonly;
bool is_interlocked = interlocked_resources.count(var.self) > 0;
const char *type_name = "ByteAddressBuffer ";
if (!is_readonly)
type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
add_resource_name(var.self);
statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
to_resource_binding(var), ";");
}
else
{
if (type.array.empty())
{
// Flatten the top-level struct so we can use packoffset,
// this restriction is similar to GLSL where layout(offset) is not possible on sub-structs.
flattened_structs[var.self] = false;
// Prefer the block name if possible.
auto buffer_name = to_name(type.self, false);
if (ir.meta[type.self].decoration.alias.empty() ||
resource_names.find(buffer_name) != end(resource_names) ||
block_names.find(buffer_name) != end(block_names))
{
buffer_name = get_block_fallback_name(var.self);
}
add_variable(block_names, resource_names, buffer_name);
// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
// This cannot conflict with anything else, so we're safe now.
if (buffer_name.empty())
buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
uint32_t failed_index = 0;
if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index))
set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
else
{
SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ",
failed_index, " (name: ", to_member_name(type, failed_index),
") cannot be expressed with either HLSL packing layout or packoffset."));
}
block_names.insert(buffer_name);
// Save for post-reflection later.
declared_block_names[var.self] = buffer_name;
type.member_name_cache.clear();
// var.self can be used as a backup name for the block name,
// so we need to make sure we don't disturb the name here on a recompile.
// It will need to be reset if we have to recompile.
preserve_alias_on_reset(var.self);
add_resource_name(var.self);
statement("cbuffer ", buffer_name, to_resource_binding(var));
begin_scope();
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
auto backup_name = get_member_name(type.self, i);
auto member_name = to_member_name(type, i);
member_name = join(to_name(var.self), "_", member_name);
ParsedIR::sanitize_underscores(member_name);
set_member_name(type.self, i, member_name);
emit_struct_member(type, member, i, "");
set_member_name(type.self, i, backup_name);
i++;
}
end_scope_decl();
statement("");
}
else
{
if (hlsl_options.shader_model < 51)
SPIRV_CROSS_THROW(
"Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1.");
add_resource_name(type.self);
add_resource_name(var.self);
// ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect.
uint32_t failed_index = 0;
if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index))
{
SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self),
"), member index ", failed_index, " (name: ", to_member_name(type, failed_index),
") cannot be expressed with normal HLSL packing rules."));
}
emit_struct(get<SPIRType>(type.self));
statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type),
to_resource_binding(var), ";");
}
}
}
void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
{
if (flattened_buffer_blocks.count(var.self))
{
emit_buffer_block_flattened(var);
}
else if (root_constants_layout.empty())
{
emit_buffer_block(var);
}
else
{
for (const auto &layout : root_constants_layout)
{
auto &type = get<SPIRType>(var.basetype);
uint32_t failed_index = 0;
if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start,
layout.end))
set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
else
{
SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")",
", member index ", failed_index, " (name: ", to_member_name(type, failed_index),
") cannot be expressed with either HLSL packing layout or packoffset."));
}
flattened_structs[var.self] = false;
type.member_name_cache.clear();
add_resource_name(var.self);
auto &memb = ir.meta[type.self].members;
statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self),
to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space));
begin_scope();
// Index of the next field in the generated root constant constant buffer
auto constant_index = 0u;
// Iterate over all member of the push constant and check which of the fields
// fit into the given root constant layout.
for (auto i = 0u; i < memb.size(); i++)
{
const auto offset = memb[i].offset;
if (layout.start <= offset && offset < layout.end)
{
const auto &member = type.member_types[i];
add_member_name(type, constant_index);
auto backup_name = get_member_name(type.self, i);
auto member_name = to_member_name(type, i);
member_name = join(to_name(var.self), "_", member_name);
ParsedIR::sanitize_underscores(member_name);
set_member_name(type.self, constant_index, member_name);
emit_struct_member(type, member, i, "", layout.start);
set_member_name(type.self, constant_index, backup_name);
constant_index++;
}
}
end_scope_decl();
}
}
}
string CompilerHLSL::to_sampler_expression(uint32_t id)
{
auto expr = join("_", to_non_uniform_aware_expression(id));
auto index = expr.find_first_of('[');
if (index == string::npos)
{
return expr + "_sampler";
}
else
{
// We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead.
return expr.insert(index, "_sampler");
}
}
void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
{
if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty())
{
set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
}
else
{
// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
}
}
string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
{
string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
if (hlsl_options.shader_model <= 30)
return arg_str;
// Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL.
auto &type = expression_type(id);
// We don't have to consider combined image samplers here via OpSampledImage because
// those variables cannot be passed as arguments to functions.
// Only global SampledImage variables may be used as arguments.
if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
arg_str += ", " + to_sampler_expression(id);
return arg_str;
}
string CompilerHLSL::get_inner_entry_point_name() const
{
auto &execution = get_entry_point();
if (hlsl_options.use_entry_point_name)
{
auto name = join(execution.name, "_inner");
ParsedIR::sanitize_underscores(name);
return name;
}
if (execution.model == ExecutionModelVertex)
return "vert_main";
else if (execution.model == ExecutionModelFragment)
return "frag_main";
else if (execution.model == ExecutionModelGLCompute)
return "comp_main";
else if (execution.model == ExecutionModelMeshEXT)
return "mesh_main";
else if (execution.model == ExecutionModelTaskEXT)
return "task_main";
else
SPIRV_CROSS_THROW("Unsupported execution model.");
}
void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
{
if (func.self != ir.default_entry_point)
add_function_overload(func);
// Avoid shadow declarations.
local_variable_names = resource_names;
string decl;
auto &type = get<SPIRType>(func.return_type);
if (type.array.empty())
{
decl += flags_to_qualifiers_glsl(type, return_flags);
decl += type_to_glsl(type);
decl += " ";
}
else
{
// We cannot return arrays in HLSL, so "return" through an out variable.
decl = "void ";
}
if (func.self == ir.default_entry_point)
{
decl += get_inner_entry_point_name();
processing_entry_point = true;
}
else
decl += to_name(func.self);
decl += "(";
SmallVector<string> arglist;
if (!type.array.empty())
{
// Fake array returns by writing to an out array instead.
string out_argument;
out_argument += "out ";
out_argument += type_to_glsl(type);
out_argument += " ";
out_argument += "spvReturnValue";
out_argument += type_to_array_glsl(type);
arglist.push_back(std::move