| /* |
| * Copyright 2016-2021 Robert Konrad |
| * SPDX-License-Identifier: Apache-2.0 OR MIT |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| /* |
| * At your option, you may choose to accept this material under either: |
| * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
| * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
| */ |
| |
| #include "spirv_hlsl.hpp" |
| #include "GLSL.std.450.h" |
| #include <algorithm> |
| #include <assert.h> |
| |
| using namespace spv; |
| using namespace SPIRV_CROSS_NAMESPACE; |
| using namespace std; |
| |
| enum class ImageFormatNormalizedState |
| { |
| None = 0, |
| Unorm = 1, |
| Snorm = 2 |
| }; |
| |
| static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt) |
| { |
| switch (fmt) |
| { |
| case ImageFormatR8: |
| case ImageFormatR16: |
| case ImageFormatRg8: |
| case ImageFormatRg16: |
| case ImageFormatRgba8: |
| case ImageFormatRgba16: |
| case ImageFormatRgb10A2: |
| return ImageFormatNormalizedState::Unorm; |
| |
| case ImageFormatR8Snorm: |
| case ImageFormatR16Snorm: |
| case ImageFormatRg8Snorm: |
| case ImageFormatRg16Snorm: |
| case ImageFormatRgba8Snorm: |
| case ImageFormatRgba16Snorm: |
| return ImageFormatNormalizedState::Snorm; |
| |
| default: |
| break; |
| } |
| |
| return ImageFormatNormalizedState::None; |
| } |
| |
| static unsigned image_format_to_components(ImageFormat fmt) |
| { |
| switch (fmt) |
| { |
| case ImageFormatR8: |
| case ImageFormatR16: |
| case ImageFormatR8Snorm: |
| case ImageFormatR16Snorm: |
| case ImageFormatR16f: |
| case ImageFormatR32f: |
| case ImageFormatR8i: |
| case ImageFormatR16i: |
| case ImageFormatR32i: |
| case ImageFormatR8ui: |
| case ImageFormatR16ui: |
| case ImageFormatR32ui: |
| return 1; |
| |
| case ImageFormatRg8: |
| case ImageFormatRg16: |
| case ImageFormatRg8Snorm: |
| case ImageFormatRg16Snorm: |
| case ImageFormatRg16f: |
| case ImageFormatRg32f: |
| case ImageFormatRg8i: |
| case ImageFormatRg16i: |
| case ImageFormatRg32i: |
| case ImageFormatRg8ui: |
| case ImageFormatRg16ui: |
| case ImageFormatRg32ui: |
| return 2; |
| |
| case ImageFormatR11fG11fB10f: |
| return 3; |
| |
| case ImageFormatRgba8: |
| case ImageFormatRgba16: |
| case ImageFormatRgb10A2: |
| case ImageFormatRgba8Snorm: |
| case ImageFormatRgba16Snorm: |
| case ImageFormatRgba16f: |
| case ImageFormatRgba32f: |
| case ImageFormatRgba8i: |
| case ImageFormatRgba16i: |
| case ImageFormatRgba32i: |
| case ImageFormatRgba8ui: |
| case ImageFormatRgba16ui: |
| case ImageFormatRgba32ui: |
| case ImageFormatRgb10a2ui: |
| return 4; |
| |
| case ImageFormatUnknown: |
| return 4; // Assume 4. |
| |
| default: |
| SPIRV_CROSS_THROW("Unrecognized typed image format."); |
| } |
| } |
| |
| static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) |
| { |
| switch (fmt) |
| { |
| case ImageFormatR8: |
| case ImageFormatR16: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "unorm float"; |
| case ImageFormatRg8: |
| case ImageFormatRg16: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "unorm float2"; |
| case ImageFormatRgba8: |
| case ImageFormatRgba16: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "unorm float4"; |
| case ImageFormatRgb10A2: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "unorm float4"; |
| |
| case ImageFormatR8Snorm: |
| case ImageFormatR16Snorm: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "snorm float"; |
| case ImageFormatRg8Snorm: |
| case ImageFormatRg16Snorm: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "snorm float2"; |
| case ImageFormatRgba8Snorm: |
| case ImageFormatRgba16Snorm: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "snorm float4"; |
| |
| case ImageFormatR16f: |
| case ImageFormatR32f: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "float"; |
| case ImageFormatRg16f: |
| case ImageFormatRg32f: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "float2"; |
| case ImageFormatRgba16f: |
| case ImageFormatRgba32f: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "float4"; |
| |
| case ImageFormatR11fG11fB10f: |
| if (basetype != SPIRType::Float) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "float3"; |
| |
| case ImageFormatR8i: |
| case ImageFormatR16i: |
| case ImageFormatR32i: |
| if (basetype != SPIRType::Int) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "int"; |
| case ImageFormatRg8i: |
| case ImageFormatRg16i: |
| case ImageFormatRg32i: |
| if (basetype != SPIRType::Int) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "int2"; |
| case ImageFormatRgba8i: |
| case ImageFormatRgba16i: |
| case ImageFormatRgba32i: |
| if (basetype != SPIRType::Int) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "int4"; |
| |
| case ImageFormatR8ui: |
| case ImageFormatR16ui: |
| case ImageFormatR32ui: |
| if (basetype != SPIRType::UInt) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "uint"; |
| case ImageFormatRg8ui: |
| case ImageFormatRg16ui: |
| case ImageFormatRg32ui: |
| if (basetype != SPIRType::UInt) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "uint2"; |
| case ImageFormatRgba8ui: |
| case ImageFormatRgba16ui: |
| case ImageFormatRgba32ui: |
| if (basetype != SPIRType::UInt) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "uint4"; |
| case ImageFormatRgb10a2ui: |
| if (basetype != SPIRType::UInt) |
| SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); |
| return "uint4"; |
| |
| case ImageFormatUnknown: |
| switch (basetype) |
| { |
| case SPIRType::Float: |
| return "float4"; |
| case SPIRType::Int: |
| return "int4"; |
| case SPIRType::UInt: |
| return "uint4"; |
| default: |
| SPIRV_CROSS_THROW("Unsupported base type for image."); |
| } |
| |
| default: |
| SPIRV_CROSS_THROW("Unrecognized typed image format."); |
| } |
| } |
| |
| string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) |
| { |
| auto &imagetype = get<SPIRType>(type.image.type); |
| const char *dim = nullptr; |
| bool typed_load = false; |
| uint32_t components = 4; |
| |
| bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable); |
| |
| switch (type.image.dim) |
| { |
| case Dim1D: |
| typed_load = type.image.sampled == 2; |
| dim = "1D"; |
| break; |
| case Dim2D: |
| typed_load = type.image.sampled == 2; |
| dim = "2D"; |
| break; |
| case Dim3D: |
| typed_load = type.image.sampled == 2; |
| dim = "3D"; |
| break; |
| case DimCube: |
| if (type.image.sampled == 2) |
| SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); |
| dim = "Cube"; |
| break; |
| case DimRect: |
| SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO |
| case DimBuffer: |
| if (type.image.sampled == 1) |
| return join("Buffer<", type_to_glsl(imagetype), components, ">"); |
| else if (type.image.sampled == 2) |
| { |
| if (interlocked_resources.count(id)) |
| return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), |
| ">"); |
| |
| typed_load = !force_image_srv && type.image.sampled == 2; |
| |
| const char *rw = force_image_srv ? "" : "RW"; |
| return join(rw, "Buffer<", |
| typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : |
| join(type_to_glsl(imagetype), components), |
| ">"); |
| } |
| else |
| SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); |
| case DimSubpassData: |
| dim = "2D"; |
| typed_load = false; |
| break; |
| default: |
| SPIRV_CROSS_THROW("Invalid dimension."); |
| } |
| const char *arrayed = type.image.arrayed ? "Array" : ""; |
| const char *ms = type.image.ms ? "MS" : ""; |
| const char *rw = typed_load && !force_image_srv ? "RW" : ""; |
| |
| if (force_image_srv) |
| typed_load = false; |
| |
| if (typed_load && interlocked_resources.count(id)) |
| rw = "RasterizerOrdered"; |
| |
| return join(rw, "Texture", dim, ms, arrayed, "<", |
| typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : |
| join(type_to_glsl(imagetype), components), |
| ">"); |
| } |
| |
| string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/) |
| { |
| auto &imagetype = get<SPIRType>(type.image.type); |
| string res; |
| |
| switch (imagetype.basetype) |
| { |
| case SPIRType::Int: |
| res = "i"; |
| break; |
| case SPIRType::UInt: |
| res = "u"; |
| break; |
| default: |
| break; |
| } |
| |
| if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) |
| return res + "subpassInput" + (type.image.ms ? "MS" : ""); |
| |
| // If we're emulating subpassInput with samplers, force sampler2D |
| // so we don't have to specify format. |
| if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) |
| { |
| // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. |
| if (type.image.dim == DimBuffer && type.image.sampled == 1) |
| res += "sampler"; |
| else |
| res += type.image.sampled == 2 ? "image" : "texture"; |
| } |
| else |
| res += "sampler"; |
| |
| switch (type.image.dim) |
| { |
| case Dim1D: |
| res += "1D"; |
| break; |
| case Dim2D: |
| res += "2D"; |
| break; |
| case Dim3D: |
| res += "3D"; |
| break; |
| case DimCube: |
| res += "CUBE"; |
| break; |
| |
| case DimBuffer: |
| res += "Buffer"; |
| break; |
| |
| case DimSubpassData: |
| res += "2D"; |
| break; |
| default: |
| SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); |
| } |
| |
| if (type.image.ms) |
| res += "MS"; |
| if (type.image.arrayed) |
| res += "Array"; |
| |
| return res; |
| } |
| |
| string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id) |
| { |
| if (hlsl_options.shader_model <= 30) |
| return image_type_hlsl_legacy(type, id); |
| else |
| return image_type_hlsl_modern(type, id); |
| } |
| |
| // The optional id parameter indicates the object whose type we are trying |
| // to find the description for. It is optional. Most type descriptions do not |
| // depend on a specific object's use of that type. |
| string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) |
| { |
| // Ignore the pointer type since GLSL doesn't have pointers. |
| |
| switch (type.basetype) |
| { |
| case SPIRType::Struct: |
| // Need OpName lookup here to get a "sensible" name for a struct. |
| if (backend.explicit_struct_type) |
| return join("struct ", to_name(type.self)); |
| else |
| return to_name(type.self); |
| |
| case SPIRType::Image: |
| case SPIRType::SampledImage: |
| return image_type_hlsl(type, id); |
| |
| case SPIRType::Sampler: |
| return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState"; |
| |
| case SPIRType::Void: |
| return "void"; |
| |
| default: |
| break; |
| } |
| |
| if (type.vecsize == 1 && type.columns == 1) // Scalar builtin |
| { |
| switch (type.basetype) |
| { |
| case SPIRType::Boolean: |
| return "bool"; |
| case SPIRType::Int: |
| return backend.basic_int_type; |
| case SPIRType::UInt: |
| return backend.basic_uint_type; |
| case SPIRType::AtomicCounter: |
| return "atomic_uint"; |
| case SPIRType::Half: |
| if (hlsl_options.enable_16bit_types) |
| return "half"; |
| else |
| return "min16float"; |
| case SPIRType::Short: |
| if (hlsl_options.enable_16bit_types) |
| return "int16_t"; |
| else |
| return "min16int"; |
| case SPIRType::UShort: |
| if (hlsl_options.enable_16bit_types) |
| return "uint16_t"; |
| else |
| return "min16uint"; |
| case SPIRType::Float: |
| return "float"; |
| case SPIRType::Double: |
| return "double"; |
| case SPIRType::Int64: |
| if (hlsl_options.shader_model < 60) |
| SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); |
| return "int64_t"; |
| case SPIRType::UInt64: |
| if (hlsl_options.shader_model < 60) |
| SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); |
| return "uint64_t"; |
| default: |
| return "???"; |
| } |
| } |
| else if (type.vecsize > 1 && type.columns == 1) // Vector builtin |
| { |
| switch (type.basetype) |
| { |
| case SPIRType::Boolean: |
| return join("bool", type.vecsize); |
| case SPIRType::Int: |
| return join("int", type.vecsize); |
| case SPIRType::UInt: |
| return join("uint", type.vecsize); |
| case SPIRType::Half: |
| return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize); |
| case SPIRType::Short: |
| return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize); |
| case SPIRType::UShort: |
| return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize); |
| case SPIRType::Float: |
| return join("float", type.vecsize); |
| case SPIRType::Double: |
| return join("double", type.vecsize); |
| case SPIRType::Int64: |
| return join("i64vec", type.vecsize); |
| case SPIRType::UInt64: |
| return join("u64vec", type.vecsize); |
| default: |
| return "???"; |
| } |
| } |
| else |
| { |
| switch (type.basetype) |
| { |
| case SPIRType::Boolean: |
| return join("bool", type.columns, "x", type.vecsize); |
| case SPIRType::Int: |
| return join("int", type.columns, "x", type.vecsize); |
| case SPIRType::UInt: |
| return join("uint", type.columns, "x", type.vecsize); |
| case SPIRType::Half: |
| return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize); |
| case SPIRType::Short: |
| return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize); |
| case SPIRType::UShort: |
| return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize); |
| case SPIRType::Float: |
| return join("float", type.columns, "x", type.vecsize); |
| case SPIRType::Double: |
| return join("double", type.columns, "x", type.vecsize); |
| // Matrix types not supported for int64/uint64. |
| default: |
| return "???"; |
| } |
| } |
| } |
| |
| void CompilerHLSL::emit_header() |
| { |
| for (auto &header : header_lines) |
| statement(header); |
| |
| if (header_lines.size() > 0) |
| { |
| statement(""); |
| } |
| } |
| |
| void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var) |
| { |
| add_resource_name(var.self); |
| |
| // The global copies of I/O variables should not contain interpolation qualifiers. |
| // These are emitted inside the interface structs. |
| auto &flags = ir.meta[var.self].decoration.decoration_flags; |
| auto old_flags = flags; |
| flags.reset(); |
| statement("static ", variable_decl(var), ";"); |
| flags = old_flags; |
| } |
| |
| const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) |
| { |
| // Input and output variables are handled specially in HLSL backend. |
| // The variables are declared as global, private variables, and do not need any qualifiers. |
| if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || |
| var.storage == StorageClassPushConstant) |
| { |
| return "uniform "; |
| } |
| |
| return ""; |
| } |
| |
| void CompilerHLSL::emit_builtin_outputs_in_struct() |
| { |
| auto &execution = get_entry_point(); |
| |
| bool legacy = hlsl_options.shader_model <= 30; |
| active_output_builtins.for_each_bit([&](uint32_t i) { |
| const char *type = nullptr; |
| const char *semantic = nullptr; |
| auto builtin = static_cast<BuiltIn>(i); |
| switch (builtin) |
| { |
| case BuiltInPosition: |
| type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4"; |
| semantic = legacy ? "POSITION" : "SV_Position"; |
| break; |
| |
| case BuiltInSampleMask: |
| if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment) |
| SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher."); |
| type = "uint"; |
| semantic = "SV_Coverage"; |
| break; |
| |
| case BuiltInFragDepth: |
| type = "float"; |
| if (legacy) |
| { |
| semantic = "DEPTH"; |
| } |
| else |
| { |
| if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater)) |
| semantic = "SV_DepthGreaterEqual"; |
| else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess)) |
| semantic = "SV_DepthLessEqual"; |
| else |
| semantic = "SV_Depth"; |
| } |
| break; |
| |
| case BuiltInClipDistance: |
| // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. |
| for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) |
| { |
| uint32_t to_declare = clip_distance_count - clip; |
| if (to_declare > 4) |
| to_declare = 4; |
| |
| uint32_t semantic_index = clip / 4; |
| |
| static const char *types[] = { "float", "float2", "float3", "float4" }; |
| statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, |
| " : SV_ClipDistance", semantic_index, ";"); |
| } |
| break; |
| |
| case BuiltInCullDistance: |
| // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. |
| for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) |
| { |
| uint32_t to_declare = cull_distance_count - cull; |
| if (to_declare > 4) |
| to_declare = 4; |
| |
| uint32_t semantic_index = cull / 4; |
| |
| static const char *types[] = { "float", "float2", "float3", "float4" }; |
| statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, |
| " : SV_CullDistance", semantic_index, ";"); |
| } |
| break; |
| |
| case BuiltInPointSize: |
| // If point_size_compat is enabled, just ignore PointSize. |
| // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders, |
| // even if it means working around the missing feature. |
| if (hlsl_options.point_size_compat) |
| break; |
| else |
| SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); |
| |
| default: |
| SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); |
| } |
| |
| if (type && semantic) |
| statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); |
| }); |
| } |
| |
| void CompilerHLSL::emit_builtin_inputs_in_struct() |
| { |
| bool legacy = hlsl_options.shader_model <= 30; |
| active_input_builtins.for_each_bit([&](uint32_t i) { |
| const char *type = nullptr; |
| const char *semantic = nullptr; |
| auto builtin = static_cast<BuiltIn>(i); |
| switch (builtin) |
| { |
| case BuiltInFragCoord: |
| type = "float4"; |
| semantic = legacy ? "VPOS" : "SV_Position"; |
| break; |
| |
| case BuiltInVertexId: |
| case BuiltInVertexIndex: |
| if (legacy) |
| SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower."); |
| type = "uint"; |
| semantic = "SV_VertexID"; |
| break; |
| |
| case BuiltInInstanceId: |
| case BuiltInInstanceIndex: |
| if (legacy) |
| SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower."); |
| type = "uint"; |
| semantic = "SV_InstanceID"; |
| break; |
| |
| case BuiltInSampleId: |
| if (legacy) |
| SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower."); |
| type = "uint"; |
| semantic = "SV_SampleIndex"; |
| break; |
| |
| case BuiltInSampleMask: |
| if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) |
| SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher."); |
| type = "uint"; |
| semantic = "SV_Coverage"; |
| break; |
| |
| case BuiltInGlobalInvocationId: |
| type = "uint3"; |
| semantic = "SV_DispatchThreadID"; |
| break; |
| |
| case BuiltInLocalInvocationId: |
| type = "uint3"; |
| semantic = "SV_GroupThreadID"; |
| break; |
| |
| case BuiltInLocalInvocationIndex: |
| type = "uint"; |
| semantic = "SV_GroupIndex"; |
| break; |
| |
| case BuiltInWorkgroupId: |
| type = "uint3"; |
| semantic = "SV_GroupID"; |
| break; |
| |
| case BuiltInFrontFacing: |
| type = "bool"; |
| semantic = "SV_IsFrontFace"; |
| break; |
| |
| case BuiltInNumWorkgroups: |
| case BuiltInSubgroupSize: |
| case BuiltInSubgroupLocalInvocationId: |
| case BuiltInSubgroupEqMask: |
| case BuiltInSubgroupLtMask: |
| case BuiltInSubgroupLeMask: |
| case BuiltInSubgroupGtMask: |
| case BuiltInSubgroupGeMask: |
| // Handled specially. |
| break; |
| |
| case BuiltInClipDistance: |
| // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. |
| for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) |
| { |
| uint32_t to_declare = clip_distance_count - clip; |
| if (to_declare > 4) |
| to_declare = 4; |
| |
| uint32_t semantic_index = clip / 4; |
| |
| static const char *types[] = { "float", "float2", "float3", "float4" }; |
| statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, |
| " : SV_ClipDistance", semantic_index, ";"); |
| } |
| break; |
| |
| case BuiltInCullDistance: |
| // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. |
| for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) |
| { |
| uint32_t to_declare = cull_distance_count - cull; |
| if (to_declare > 4) |
| to_declare = 4; |
| |
| uint32_t semantic_index = cull / 4; |
| |
| static const char *types[] = { "float", "float2", "float3", "float4" }; |
| statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, |
| " : SV_CullDistance", semantic_index, ";"); |
| } |
| break; |
| |
| case BuiltInPointCoord: |
| // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize. |
| if (hlsl_options.point_coord_compat) |
| break; |
| else |
| SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); |
| |
| default: |
| SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); |
| } |
| |
| if (type && semantic) |
| statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";"); |
| }); |
| } |
| |
| uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const |
| { |
| // TODO: Need to verify correctness. |
| uint32_t elements = 0; |
| |
| if (type.basetype == SPIRType::Struct) |
| { |
| for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
| elements += type_to_consumed_locations(get<SPIRType>(type.member_types[i])); |
| } |
| else |
| { |
| uint32_t array_multiplier = 1; |
| for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) |
| { |
| if (type.array_size_literal[i]) |
| array_multiplier *= type.array[i]; |
| else |
| array_multiplier *= evaluate_constant_u32(type.array[i]); |
| } |
| elements += array_multiplier * type.columns; |
| } |
| return elements; |
| } |
| |
| string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags) |
| { |
| string res; |
| //if (flags & (1ull << DecorationSmooth)) |
| // res += "linear "; |
| if (flags.get(DecorationFlat)) |
| res += "nointerpolation "; |
| if (flags.get(DecorationNoPerspective)) |
| res += "noperspective "; |
| if (flags.get(DecorationCentroid)) |
| res += "centroid "; |
| if (flags.get(DecorationPatch)) |
| res += "patch "; // Seems to be different in actual HLSL. |
| if (flags.get(DecorationSample)) |
| res += "sample "; |
| if (flags.get(DecorationInvariant) && backend.support_precise_qualifier) |
| res += "precise "; // Not supported? |
| |
| return res; |
| } |
| |
| std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc) |
| { |
| if (em == ExecutionModelVertex && sc == StorageClassInput) |
| { |
| // We have a vertex attribute - we should look at remapping it if the user provided |
| // vertex attribute hints. |
| for (auto &attribute : remap_vertex_attributes) |
| if (attribute.location == location) |
| return attribute.semantic; |
| } |
| |
| // Not a vertex attribute, or no remap_vertex_attributes entry. |
| return join("TEXCOORD", location); |
| } |
| |
| std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var) |
| { |
| // We cannot emit static const initializer for block constants for practical reasons, |
| // so just inline the initializer. |
| // FIXME: There is a theoretical problem here if someone tries to composite extract |
| // into this initializer since we don't declare it properly, but that is somewhat non-sensical. |
| auto &type = get<SPIRType>(var.basetype); |
| bool is_block = has_decoration(type.self, DecorationBlock); |
| auto *c = maybe_get<SPIRConstant>(var.initializer); |
| if (is_block && c) |
| return constant_expression(*c); |
| else |
| return CompilerGLSL::to_initializer_expression(var); |
| } |
| |
| void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, |
| uint32_t location, |
| std::unordered_set<uint32_t> &active_locations) |
| { |
| auto &execution = get_entry_point(); |
| auto type = get<SPIRType>(var.basetype); |
| auto semantic = to_semantic(location, execution.model, var.storage); |
| auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index)); |
| auto &mbr_type = get<SPIRType>(type.member_types[member_index]); |
| |
| statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)), |
| type_to_glsl(mbr_type), |
| " ", mbr_name, type_to_array_glsl(mbr_type), |
| " : ", semantic, ";"); |
| |
| // Structs and arrays should consume more locations. |
| uint32_t consumed_locations = type_to_consumed_locations(mbr_type); |
| for (uint32_t i = 0; i < consumed_locations; i++) |
| active_locations.insert(location + i); |
| } |
| |
| void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations) |
| { |
| auto &execution = get_entry_point(); |
| auto type = get<SPIRType>(var.basetype); |
| |
| string binding; |
| bool use_location_number = true; |
| bool legacy = hlsl_options.shader_model <= 30; |
| if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) |
| { |
| // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1. |
| uint32_t index = get_decoration(var.self, DecorationIndex); |
| uint32_t location = get_decoration(var.self, DecorationLocation); |
| |
| if (index != 0 && location != 0) |
| SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL."); |
| |
| binding = join(legacy ? "COLOR" : "SV_Target", location + index); |
| use_location_number = false; |
| if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP) |
| type.vecsize = 4; |
| } |
| |
| const auto get_vacant_location = [&]() -> uint32_t { |
| for (uint32_t i = 0; i < 64; i++) |
| if (!active_locations.count(i)) |
| return i; |
| SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted."); |
| }; |
| |
| bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; |
| |
| auto name = to_name(var.self); |
| if (use_location_number) |
| { |
| uint32_t location_number; |
| |
| // If an explicit location exists, use it with TEXCOORD[N] semantic. |
| // Otherwise, pick a vacant location. |
| if (has_decoration(var.self, DecorationLocation)) |
| location_number = get_decoration(var.self, DecorationLocation); |
| else |
| location_number = get_vacant_location(); |
| |
| // Allow semantic remap if specified. |
| auto semantic = to_semantic(location_number, execution.model, var.storage); |
| |
| if (need_matrix_unroll && type.columns > 1) |
| { |
| if (!type.array.empty()) |
| SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported."); |
| |
| // Unroll matrices. |
| for (uint32_t i = 0; i < type.columns; i++) |
| { |
| SPIRType newtype = type; |
| newtype.columns = 1; |
| |
| string effective_semantic; |
| if (hlsl_options.flatten_matrix_vertex_input_semantics) |
| effective_semantic = to_semantic(location_number, execution.model, var.storage); |
| else |
| effective_semantic = join(semantic, "_", i); |
| |
| statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), |
| variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";"); |
| active_locations.insert(location_number++); |
| } |
| } |
| else |
| { |
| statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ", |
| semantic, ";"); |
| |
| // Structs and arrays should consume more locations. |
| uint32_t consumed_locations = type_to_consumed_locations(type); |
| for (uint32_t i = 0; i < consumed_locations; i++) |
| active_locations.insert(location_number + i); |
| } |
| } |
| else |
| statement(variable_decl(type, name), " : ", binding, ";"); |
| } |
| |
| std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) |
| { |
| switch (builtin) |
| { |
| case BuiltInVertexId: |
| return "gl_VertexID"; |
| case BuiltInInstanceId: |
| return "gl_InstanceID"; |
| case BuiltInNumWorkgroups: |
| { |
| if (!num_workgroups_builtin) |
| SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. " |
| "Cannot emit code for this builtin."); |
| |
| auto &var = get<SPIRVariable>(num_workgroups_builtin); |
| auto &type = get<SPIRType>(var.basetype); |
| auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)); |
| ParsedIR::sanitize_underscores(ret); |
| return ret; |
| } |
| case BuiltInPointCoord: |
| // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set. |
| return "float2(0.5f, 0.5f)"; |
| case BuiltInSubgroupLocalInvocationId: |
| return "WaveGetLaneIndex()"; |
| case BuiltInSubgroupSize: |
| return "WaveGetLaneCount()"; |
| |
| default: |
| return CompilerGLSL::builtin_to_glsl(builtin, storage); |
| } |
| } |
| |
| void CompilerHLSL::emit_builtin_variables() |
| { |
| Bitset builtins = active_input_builtins; |
| builtins.merge_or(active_output_builtins); |
| |
| bool need_base_vertex_info = false; |
| |
| std::unordered_map<uint32_t, ID> builtin_to_initializer; |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| if (!is_builtin_variable(var) || var.storage != StorageClassOutput || !var.initializer) |
| return; |
| |
| auto *c = this->maybe_get<SPIRConstant>(var.initializer); |
| if (!c) |
| return; |
| |
| auto &type = this->get<SPIRType>(var.basetype); |
| if (type.basetype == SPIRType::Struct) |
| { |
| uint32_t member_count = uint32_t(type.member_types.size()); |
| for (uint32_t i = 0; i < member_count; i++) |
| { |
| if (has_member_decoration(type.self, i, DecorationBuiltIn)) |
| { |
| builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] = |
| c->subconstants[i]; |
| } |
| } |
| } |
| else if (has_decoration(var.self, DecorationBuiltIn)) |
| builtin_to_initializer[get_decoration(var.self, DecorationBuiltIn)] = var.initializer; |
| }); |
| |
| // Emit global variables for the interface variables which are statically used by the shader. |
| builtins.for_each_bit([&](uint32_t i) { |
| const char *type = nullptr; |
| auto builtin = static_cast<BuiltIn>(i); |
| uint32_t array_size = 0; |
| |
| string init_expr; |
| auto init_itr = builtin_to_initializer.find(builtin); |
| if (init_itr != builtin_to_initializer.end()) |
| init_expr = join(" = ", to_expression(init_itr->second)); |
| |
| switch (builtin) |
| { |
| case BuiltInFragCoord: |
| case BuiltInPosition: |
| type = "float4"; |
| break; |
| |
| case BuiltInFragDepth: |
| type = "float"; |
| break; |
| |
| case BuiltInVertexId: |
| case BuiltInVertexIndex: |
| case BuiltInInstanceIndex: |
| type = "int"; |
| if (hlsl_options.support_nonzero_base_vertex_base_instance) |
| need_base_vertex_info = true; |
| break; |
| |
| case BuiltInInstanceId: |
| case BuiltInSampleId: |
| type = "int"; |
| break; |
| |
| case BuiltInPointSize: |
| if (hlsl_options.point_size_compat) |
| { |
| // Just emit the global variable, it will be ignored. |
| type = "float"; |
| break; |
| } |
| else |
| SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); |
| |
| case BuiltInGlobalInvocationId: |
| case BuiltInLocalInvocationId: |
| case BuiltInWorkgroupId: |
| type = "uint3"; |
| break; |
| |
| case BuiltInLocalInvocationIndex: |
| type = "uint"; |
| break; |
| |
| case BuiltInFrontFacing: |
| type = "bool"; |
| break; |
| |
| case BuiltInNumWorkgroups: |
| case BuiltInPointCoord: |
| // Handled specially. |
| break; |
| |
| case BuiltInSubgroupLocalInvocationId: |
| case BuiltInSubgroupSize: |
| if (hlsl_options.shader_model < 60) |
| SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); |
| break; |
| |
| case BuiltInSubgroupEqMask: |
| case BuiltInSubgroupLtMask: |
| case BuiltInSubgroupLeMask: |
| case BuiltInSubgroupGtMask: |
| case BuiltInSubgroupGeMask: |
| if (hlsl_options.shader_model < 60) |
| SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); |
| type = "uint4"; |
| break; |
| |
| case BuiltInClipDistance: |
| array_size = clip_distance_count; |
| type = "float"; |
| break; |
| |
| case BuiltInCullDistance: |
| array_size = cull_distance_count; |
| type = "float"; |
| break; |
| |
| case BuiltInSampleMask: |
| type = "int"; |
| break; |
| |
| default: |
| SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); |
| } |
| |
| StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput; |
| |
| if (type) |
| { |
| if (array_size) |
| statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";"); |
| else |
| statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";"); |
| } |
| |
| // SampleMask can be both in and out with sample builtin, in this case we have already |
| // declared the input variable and we need to add the output one now. |
| if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i)) |
| { |
| statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";"); |
| } |
| }); |
| |
| if (need_base_vertex_info) |
| { |
| statement("cbuffer SPIRV_Cross_VertexInfo"); |
| begin_scope(); |
| statement("int SPIRV_Cross_BaseVertex;"); |
| statement("int SPIRV_Cross_BaseInstance;"); |
| end_scope_decl(); |
| statement(""); |
| } |
| } |
| |
| void CompilerHLSL::emit_composite_constants() |
| { |
| // HLSL cannot declare structs or arrays inline, so we must move them out to |
| // global constants directly. |
| bool emitted = false; |
| |
| ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) { |
| if (c.specialization) |
| return; |
| |
| auto &type = this->get<SPIRType>(c.constant_type); |
| |
| if (type.basetype == SPIRType::Struct && is_builtin_type(type)) |
| return; |
| |
| if (type.basetype == SPIRType::Struct || !type.array.empty()) |
| { |
| auto name = to_name(c.self); |
| statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); |
| emitted = true; |
| } |
| }); |
| |
| if (emitted) |
| statement(""); |
| } |
| |
| void CompilerHLSL::emit_specialization_constants_and_structs() |
| { |
| bool emitted = false; |
| SpecializationConstant wg_x, wg_y, wg_z; |
| ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); |
| |
| std::unordered_set<TypeID> io_block_types; |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) && |
| !var.remapped_variable && type.pointer && !is_builtin_variable(var) && |
| interface_variable_exists_in_entry_point(var.self) && |
| has_decoration(type.self, DecorationBlock)) |
| { |
| io_block_types.insert(type.self); |
| } |
| }); |
| |
| auto loop_lock = ir.create_loop_hard_lock(); |
| for (auto &id_ : ir.ids_for_constant_or_type) |
| { |
| auto &id = ir.ids[id_]; |
| |
| if (id.get_type() == TypeConstant) |
| { |
| auto &c = id.get<SPIRConstant>(); |
| |
| if (c.self == workgroup_size_id) |
| { |
| statement("static const uint3 gl_WorkGroupSize = ", |
| constant_expression(get<SPIRConstant>(workgroup_size_id)), ";"); |
| emitted = true; |
| } |
| else if (c.specialization) |
| { |
| auto &type = get<SPIRType>(c.constant_type); |
| auto name = to_name(c.self); |
| |
| // HLSL does not support specialization constants, so fallback to macros. |
| c.specialization_constant_macro_name = |
| constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); |
| |
| statement("#ifndef ", c.specialization_constant_macro_name); |
| statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); |
| statement("#endif"); |
| statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); |
| emitted = true; |
| } |
| } |
| else if (id.get_type() == TypeConstantOp) |
| { |
| auto &c = id.get<SPIRConstantOp>(); |
| auto &type = get<SPIRType>(c.basetype); |
| auto name = to_name(c.self); |
| statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); |
| emitted = true; |
| } |
| else if (id.get_type() == TypeType) |
| { |
| auto &type = id.get<SPIRType>(); |
| bool is_non_io_block = has_decoration(type.self, DecorationBlock) && |
| io_block_types.count(type.self) == 0; |
| bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock); |
| if (type.basetype == SPIRType::Struct && type.array.empty() && |
| !type.pointer && !is_non_io_block && !is_buffer_block) |
| { |
| if (emitted) |
| statement(""); |
| emitted = false; |
| |
| emit_struct(type); |
| } |
| } |
| } |
| |
| if (emitted) |
| statement(""); |
| } |
| |
| void CompilerHLSL::replace_illegal_names() |
| { |
| static const unordered_set<string> keywords = { |
| // Additional HLSL specific keywords. |
| "line", "linear", "matrix", "point", "row_major", "sampler", "vector" |
| }; |
| |
| CompilerGLSL::replace_illegal_names(keywords); |
| CompilerGLSL::replace_illegal_names(); |
| } |
| |
| void CompilerHLSL::declare_undefined_values() |
| { |
| bool emitted = false; |
| ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) { |
| auto &type = this->get<SPIRType>(undef.basetype); |
| // OpUndef can be void for some reason ... |
| if (type.basetype == SPIRType::Void) |
| return; |
| |
| string initializer; |
| if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) |
| initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); |
| |
| statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); |
| emitted = true; |
| }); |
| |
| if (emitted) |
| statement(""); |
| } |
| |
| void CompilerHLSL::emit_resources() |
| { |
| auto &execution = get_entry_point(); |
| |
| replace_illegal_names(); |
| |
| emit_specialization_constants_and_structs(); |
| emit_composite_constants(); |
| |
| bool emitted = false; |
| |
| // Output UBOs and SSBOs |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| |
| bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; |
| bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || |
| ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); |
| |
| if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && |
| has_block_flags) |
| { |
| emit_buffer_block(var); |
| emitted = true; |
| } |
| }); |
| |
| // Output push constant blocks |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && |
| !is_hidden_variable(var)) |
| { |
| emit_push_constant_block(var); |
| emitted = true; |
| } |
| }); |
| |
| if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30) |
| { |
| statement("uniform float4 gl_HalfPixel;"); |
| emitted = true; |
| } |
| |
| bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30; |
| |
| // Output Uniform Constants (values, samplers, images, etc). |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| |
| // If we're remapping separate samplers and images, only emit the combined samplers. |
| if (skip_separate_image_sampler) |
| { |
| // Sampler buffers are always used without a sampler, and they will also work in regular D3D. |
| bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; |
| bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; |
| bool separate_sampler = type.basetype == SPIRType::Sampler; |
| if (!sampler_buffer && (separate_image || separate_sampler)) |
| return; |
| } |
| |
| if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable && |
| type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) && |
| !is_hidden_variable(var)) |
| { |
| emit_uniform(var); |
| emitted = true; |
| } |
| }); |
| |
| if (emitted) |
| statement(""); |
| emitted = false; |
| |
| // Emit builtin input and output variables here. |
| emit_builtin_variables(); |
| |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| |
| if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && |
| (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && |
| interface_variable_exists_in_entry_point(var.self)) |
| { |
| // Builtin variables are handled separately. |
| emit_interface_block_globally(var); |
| emitted = true; |
| } |
| }); |
| |
| if (emitted) |
| statement(""); |
| emitted = false; |
| |
| require_input = false; |
| require_output = false; |
| unordered_set<uint32_t> active_inputs; |
| unordered_set<uint32_t> active_outputs; |
| |
| struct IOVariable |
| { |
| const SPIRVariable *var; |
| uint32_t location; |
| uint32_t block_member_index; |
| bool block; |
| }; |
| |
| SmallVector<IOVariable> input_variables; |
| SmallVector<IOVariable> output_variables; |
| |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| bool block = has_decoration(type.self, DecorationBlock); |
| |
| if (var.storage != StorageClassInput && var.storage != StorageClassOutput) |
| return; |
| |
| if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && |
| interface_variable_exists_in_entry_point(var.self)) |
| { |
| if (block) |
| { |
| for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
| { |
| uint32_t location = get_declared_member_location(var, i, false); |
| if (var.storage == StorageClassInput) |
| input_variables.push_back({ &var, location, i, true }); |
| else |
| output_variables.push_back({ &var, location, i, true }); |
| } |
| } |
| else |
| { |
| uint32_t location = get_decoration(var.self, DecorationLocation); |
| if (var.storage == StorageClassInput) |
| input_variables.push_back({ &var, location, 0, false }); |
| else |
| output_variables.push_back({ &var, location, 0, false }); |
| } |
| } |
| }); |
| |
| const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool { |
| // Sort input and output variables based on, from more robust to less robust: |
| // - Location |
| // - Variable has a location |
| // - Name comparison |
| // - Variable has a name |
| // - Fallback: ID |
| bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation); |
| bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation); |
| |
| if (has_location_a && has_location_b) |
| return a.location < b.location; |
| else if (has_location_a && !has_location_b) |
| return true; |
| else if (!has_location_a && has_location_b) |
| return false; |
| |
| const auto &name1 = to_name(a.var->self); |
| const auto &name2 = to_name(b.var->self); |
| |
| if (name1.empty() && name2.empty()) |
| return a.var->self < b.var->self; |
| else if (name1.empty()) |
| return true; |
| else if (name2.empty()) |
| return false; |
| |
| return name1.compare(name2) < 0; |
| }; |
| |
| auto input_builtins = active_input_builtins; |
| input_builtins.clear(BuiltInNumWorkgroups); |
| input_builtins.clear(BuiltInPointCoord); |
| input_builtins.clear(BuiltInSubgroupSize); |
| input_builtins.clear(BuiltInSubgroupLocalInvocationId); |
| input_builtins.clear(BuiltInSubgroupEqMask); |
| input_builtins.clear(BuiltInSubgroupLtMask); |
| input_builtins.clear(BuiltInSubgroupLeMask); |
| input_builtins.clear(BuiltInSubgroupGtMask); |
| input_builtins.clear(BuiltInSubgroupGeMask); |
| |
| if (!input_variables.empty() || !input_builtins.empty()) |
| { |
| require_input = true; |
| statement("struct SPIRV_Cross_Input"); |
| |
| begin_scope(); |
| sort(input_variables.begin(), input_variables.end(), variable_compare); |
| for (auto &var : input_variables) |
| { |
| if (var.block) |
| emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs); |
| else |
| emit_interface_block_in_struct(*var.var, active_inputs); |
| } |
| emit_builtin_inputs_in_struct(); |
| end_scope_decl(); |
| statement(""); |
| } |
| |
| if (!output_variables.empty() || !active_output_builtins.empty()) |
| { |
| require_output = true; |
| statement("struct SPIRV_Cross_Output"); |
| |
| begin_scope(); |
| sort(output_variables.begin(), output_variables.end(), variable_compare); |
| for (auto &var : output_variables) |
| { |
| if (var.block) |
| emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); |
| else |
| emit_interface_block_in_struct(*var.var, active_outputs); |
| } |
| emit_builtin_outputs_in_struct(); |
| end_scope_decl(); |
| statement(""); |
| } |
| |
| // Global variables. |
| for (auto global : global_variables) |
| { |
| auto &var = get<SPIRVariable>(global); |
| if (is_hidden_variable(var, true)) |
| continue; |
| |
| if (var.storage != StorageClassOutput) |
| { |
| if (!variable_is_lut(var)) |
| { |
| add_resource_name(var.self); |
| |
| const char *storage = nullptr; |
| switch (var.storage) |
| { |
| case StorageClassWorkgroup: |
| storage = "groupshared"; |
| break; |
| |
| default: |
| storage = "static"; |
| break; |
| } |
| |
| string initializer; |
| if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && |
| !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) |
| { |
| initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); |
| } |
| statement(storage, " ", variable_decl(var), initializer, ";"); |
| |
| emitted = true; |
| } |
| } |
| } |
| |
| if (emitted) |
| statement(""); |
| |
| declare_undefined_values(); |
| |
| if (requires_op_fmod) |
| { |
| static const char *types[] = { |
| "float", |
| "float2", |
| "float3", |
| "float4", |
| }; |
| |
| for (auto &type : types) |
| { |
| statement(type, " mod(", type, " x, ", type, " y)"); |
| begin_scope(); |
| statement("return x - y * floor(x / y);"); |
| end_scope(); |
| statement(""); |
| } |
| } |
| |
| emit_texture_size_variants(required_texture_size_variants.srv, "4", false, ""); |
| for (uint32_t norm = 0; norm < 3; norm++) |
| { |
| for (uint32_t comp = 0; comp < 4; comp++) |
| { |
| static const char *qualifiers[] = { "", "unorm ", "snorm " }; |
| static const char *vecsizes[] = { "", "2", "3", "4" }; |
| emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true, |
| qualifiers[norm]); |
| } |
| } |
| |
| if (requires_fp16_packing) |
| { |
| // HLSL does not pack into a single word sadly :( |
| statement("uint spvPackHalf2x16(float2 value)"); |
| begin_scope(); |
| statement("uint2 Packed = f32tof16(value);"); |
| statement("return Packed.x | (Packed.y << 16);"); |
| end_scope(); |
| statement(""); |
| |
| statement("float2 spvUnpackHalf2x16(uint value)"); |
| begin_scope(); |
| statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_uint2_packing) |
| { |
| statement("uint64_t spvPackUint2x32(uint2 value)"); |
| begin_scope(); |
| statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);"); |
| end_scope(); |
| statement(""); |
| |
| statement("uint2 spvUnpackUint2x32(uint64_t value)"); |
| begin_scope(); |
| statement("uint2 Unpacked;"); |
| statement("Unpacked.x = uint(value & 0xffffffff);"); |
| statement("Unpacked.y = uint(value >> 32);"); |
| statement("return Unpacked;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_explicit_fp16_packing) |
| { |
| // HLSL does not pack into a single word sadly :( |
| statement("uint spvPackFloat2x16(min16float2 value)"); |
| begin_scope(); |
| statement("uint2 Packed = f32tof16(value);"); |
| statement("return Packed.x | (Packed.y << 16);"); |
| end_scope(); |
| statement(""); |
| |
| statement("min16float2 spvUnpackFloat2x16(uint value)"); |
| begin_scope(); |
| statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); |
| end_scope(); |
| statement(""); |
| } |
| |
| // HLSL does not seem to have builtins for these operation, so roll them by hand ... |
| if (requires_unorm8_packing) |
| { |
| statement("uint spvPackUnorm4x8(float4 value)"); |
| begin_scope(); |
| statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); |
| statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); |
| end_scope(); |
| statement(""); |
| |
| statement("float4 spvUnpackUnorm4x8(uint value)"); |
| begin_scope(); |
| statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); |
| statement("return float4(Packed) / 255.0;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_snorm8_packing) |
| { |
| statement("uint spvPackSnorm4x8(float4 value)"); |
| begin_scope(); |
| statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); |
| statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); |
| end_scope(); |
| statement(""); |
| |
| statement("float4 spvUnpackSnorm4x8(uint value)"); |
| begin_scope(); |
| statement("int SignedValue = int(value);"); |
| statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); |
| statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_unorm16_packing) |
| { |
| statement("uint spvPackUnorm2x16(float2 value)"); |
| begin_scope(); |
| statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); |
| statement("return Packed.x | (Packed.y << 16);"); |
| end_scope(); |
| statement(""); |
| |
| statement("float2 spvUnpackUnorm2x16(uint value)"); |
| begin_scope(); |
| statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); |
| statement("return float2(Packed) / 65535.0;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_snorm16_packing) |
| { |
| statement("uint spvPackSnorm2x16(float2 value)"); |
| begin_scope(); |
| statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); |
| statement("return uint(Packed.x | (Packed.y << 16));"); |
| end_scope(); |
| statement(""); |
| |
| statement("float2 spvUnpackSnorm2x16(uint value)"); |
| begin_scope(); |
| statement("int SignedValue = int(value);"); |
| statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); |
| statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_bitfield_insert) |
| { |
| static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; |
| for (auto &type : types) |
| { |
| statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); |
| begin_scope(); |
| statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); |
| statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); |
| end_scope(); |
| statement(""); |
| } |
| } |
| |
| if (requires_bitfield_extract) |
| { |
| static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; |
| for (auto &type : unsigned_types) |
| { |
| statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)"); |
| begin_scope(); |
| statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); |
| statement("return (Base >> Offset) & Mask;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down. |
| static const char *signed_types[] = { "int", "int2", "int3", "int4" }; |
| for (auto &type : signed_types) |
| { |
| statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)"); |
| begin_scope(); |
| statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); |
| statement(type, " Masked = (Base >> Offset) & Mask;"); |
| statement("int ExtendShift = (32 - Count) & 31;"); |
| statement("return (Masked << ExtendShift) >> ExtendShift;"); |
| end_scope(); |
| statement(""); |
| } |
| } |
| |
| if (requires_inverse_2x2) |
| { |
| statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); |
| statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); |
| statement("float2x2 spvInverse(float2x2 m)"); |
| begin_scope(); |
| statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); |
| statement_no_indent(""); |
| statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); |
| statement("adj[0][0] = m[1][1];"); |
| statement("adj[0][1] = -m[0][1];"); |
| statement_no_indent(""); |
| statement("adj[1][0] = -m[1][0];"); |
| statement("adj[1][1] = m[0][0];"); |
| statement_no_indent(""); |
| statement("// Calculate the determinant as a combination of the cofactors of the first row."); |
| statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); |
| statement_no_indent(""); |
| statement("// Divide the classical adjoint matrix by the determinant."); |
| statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); |
| statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_inverse_3x3) |
| { |
| statement("// Returns the determinant of a 2x2 matrix."); |
| statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); |
| begin_scope(); |
| statement("return a1 * b2 - b1 * a2;"); |
| end_scope(); |
| statement_no_indent(""); |
| statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); |
| statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); |
| statement("float3x3 spvInverse(float3x3 m)"); |
| begin_scope(); |
| statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); |
| statement_no_indent(""); |
| statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); |
| statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); |
| statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); |
| statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); |
| statement_no_indent(""); |
| statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); |
| statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); |
| statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); |
| statement_no_indent(""); |
| statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); |
| statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); |
| statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); |
| statement_no_indent(""); |
| statement("// Calculate the determinant as a combination of the cofactors of the first row."); |
| statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); |
| statement_no_indent(""); |
| statement("// Divide the classical adjoint matrix by the determinant."); |
| statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); |
| statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_inverse_4x4) |
| { |
| if (!requires_inverse_3x3) |
| { |
| statement("// Returns the determinant of a 2x2 matrix."); |
| statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); |
| begin_scope(); |
| statement("return a1 * b2 - b1 * a2;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| statement("// Returns the determinant of a 3x3 matrix."); |
| statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " |
| "float c2, float c3)"); |
| begin_scope(); |
| statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * " |
| "spvDet2x2(a2, a3, " |
| "b2, b3);"); |
| end_scope(); |
| statement_no_indent(""); |
| statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); |
| statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); |
| statement("float4x4 spvInverse(float4x4 m)"); |
| begin_scope(); |
| statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); |
| statement_no_indent(""); |
| statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); |
| statement( |
| "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " |
| "m[3][3]);"); |
| statement( |
| "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " |
| "m[3][3]);"); |
| statement( |
| "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " |
| "m[3][3]);"); |
| statement( |
| "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " |
| "m[2][3]);"); |
| statement_no_indent(""); |
| statement( |
| "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " |
| "m[3][3]);"); |
| statement( |
| "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " |
| "m[3][3]);"); |
| statement( |
| "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " |
| "m[3][3]);"); |
| statement( |
| "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " |
| "m[2][3]);"); |
| statement_no_indent(""); |
| statement( |
| "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " |
| "m[3][3]);"); |
| statement( |
| "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " |
| "m[3][3]);"); |
| statement( |
| "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " |
| "m[3][3]);"); |
| statement( |
| "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " |
| "m[2][3]);"); |
| statement_no_indent(""); |
| statement( |
| "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " |
| "m[3][2]);"); |
| statement( |
| "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " |
| "m[3][2]);"); |
| statement( |
| "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " |
| "m[3][2]);"); |
| statement( |
| "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " |
| "m[2][2]);"); |
| statement_no_indent(""); |
| statement("// Calculate the determinant as a combination of the cofactors of the first row."); |
| statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " |
| "* m[3][0]);"); |
| statement_no_indent(""); |
| statement("// Divide the classical adjoint matrix by the determinant."); |
| statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); |
| statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_scalar_reflect) |
| { |
| // FP16/FP64? No templates in HLSL. |
| statement("float spvReflect(float i, float n)"); |
| begin_scope(); |
| statement("return i - 2.0 * dot(n, i) * n;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_scalar_refract) |
| { |
| // FP16/FP64? No templates in HLSL. |
| statement("float spvRefract(float i, float n, float eta)"); |
| begin_scope(); |
| statement("float NoI = n * i;"); |
| statement("float NoI2 = NoI * NoI;"); |
| statement("float k = 1.0 - eta * eta * (1.0 - NoI2);"); |
| statement("if (k < 0.0)"); |
| begin_scope(); |
| statement("return 0.0;"); |
| end_scope(); |
| statement("else"); |
| begin_scope(); |
| statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); |
| end_scope(); |
| end_scope(); |
| statement(""); |
| } |
| |
| if (requires_scalar_faceforward) |
| { |
| // FP16/FP64? No templates in HLSL. |
| statement("float spvFaceForward(float n, float i, float nref)"); |
| begin_scope(); |
| statement("return i * nref < 0.0 ? n : -n;"); |
| end_scope(); |
| statement(""); |
| } |
| |
| for (TypeID type_id : composite_selection_workaround_types) |
| { |
| // Need out variable since HLSL does not support returning arrays. |
| auto &type = get<SPIRType>(type_id); |
| auto type_str = type_to_glsl(type); |
| auto type_arr_str = type_to_array_glsl(type); |
| statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ", |
| type_str, " true_val", type_arr_str, ", ", |
| type_str, " false_val", type_arr_str, ")"); |
| begin_scope(); |
| statement("if (cond)"); |
| begin_scope(); |
| statement("out_value = true_val;"); |
| end_scope(); |
| statement("else"); |
| begin_scope(); |
| statement("out_value = false_val;"); |
| end_scope(); |
| end_scope(); |
| statement(""); |
| } |
| } |
| |
| void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav, |
| const char *type_qualifier) |
| { |
| if (variant_mask == 0) |
| return; |
| |
| static const char *types[QueryTypeCount] = { "float", "int", "uint" }; |
| static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", |
| "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", |
| "Texture2DMS", "Texture2DMSArray" }; |
| |
| static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; |
| |
| static const char *ret_types[QueryDimCount] = { |
| "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", |
| }; |
| |
| static const uint32_t return_arguments[QueryDimCount] = { |
| 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, |
| }; |
| |
| for (uint32_t index = 0; index < QueryDimCount; index++) |
| { |
| for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) |
| { |
| uint32_t bit = 16 * type_index + index; |
| uint64_t mask = 1ull << bit; |
| |
| if ((variant_mask & mask) == 0) |
| continue; |
| |
| statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""), |
| dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ", |
| (uav ? "" : "uint Level, "), "out uint Param)"); |
| begin_scope(); |
| statement(ret_types[index], " ret;"); |
| switch (return_arguments[index]) |
| { |
| case 1: |
| if (has_lod[index] && !uav) |
| statement("Tex.GetDimensions(Level, ret.x, Param);"); |
| else |
| { |
| statement("Tex.GetDimensions(ret.x);"); |
| statement("Param = 0u;"); |
| } |
| break; |
| case 2: |
| if (has_lod[index] && !uav) |
| statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); |
| else if (!uav) |
| statement("Tex.GetDimensions(ret.x, ret.y, Param);"); |
| else |
| { |
| statement("Tex.GetDimensions(ret.x, ret.y);"); |
| statement("Param = 0u;"); |
| } |
| break; |
| case 3: |
| if (has_lod[index] && !uav) |
| statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); |
| else if (!uav) |
| statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); |
| else |
| { |
| statement("Tex.GetDimensions(ret.x, ret.y, ret.z);"); |
| statement("Param = 0u;"); |
| } |
| break; |
| } |
| |
| statement("return ret;"); |
| end_scope(); |
| statement(""); |
| } |
| } |
| } |
| |
| string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) |
| { |
| auto &flags = get_member_decoration_bitset(type.self, index); |
| |
| // HLSL can emit row_major or column_major decoration in any struct. |
| // Do not try to merge combined decorations for children like in GLSL. |
| |
| // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major". |
| // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout. |
| if (flags.get(DecorationColMajor)) |
| return "row_major "; |
| else if (flags.get(DecorationRowMajor)) |
| return "column_major "; |
| |
| return ""; |
| } |
| |
| void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
| const string &qualifier, uint32_t base_offset) |
| { |
| auto &membertype = get<SPIRType>(member_type_id); |
| |
| Bitset memberflags; |
| auto &memb = ir.meta[type.self].members; |
| if (index < memb.size()) |
| memberflags = memb[index].decoration_flags; |
| |
| string packing_offset; |
| bool is_push_constant = type.storage == StorageClassPushConstant; |
| |
| if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) && |
| has_member_decoration(type.self, index, DecorationOffset)) |
| { |
| uint32_t offset = memb[index].offset - base_offset; |
| if (offset & 3) |
| SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL."); |
| |
| static const char *packing_swizzle[] = { "", ".y", ".z", ".w" }; |
| packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); |
| } |
| |
| statement(layout_for_member(type, index), qualifier, |
| variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); |
| } |
| |
| void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) |
| { |
| auto &type = get<SPIRType>(var.basetype); |
| |
| bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock); |
| |
| if (is_uav) |
| { |
| Bitset flags = ir.get_buffer_block_flags(var); |
| bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); |
| bool is_coherent = flags.get(DecorationCoherent) && !is_readonly; |
| bool is_interlocked = interlocked_resources.count(var.self) > 0; |
| const char *type_name = "ByteAddressBuffer "; |
| if (!is_readonly) |
| type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer "; |
| add_resource_name(var.self); |
| statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type), |
| to_resource_binding(var), ";"); |
| } |
| else |
| { |
| if (type.array.empty()) |
| { |
| // Flatten the top-level struct so we can use packoffset, |
| // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. |
| flattened_structs[var.self] = false; |
| |
| // Prefer the block name if possible. |
| auto buffer_name = to_name(type.self, false); |
| if (ir.meta[type.self].decoration.alias.empty() || |
| resource_names.find(buffer_name) != end(resource_names) || |
| block_names.find(buffer_name) != end(block_names)) |
| { |
| buffer_name = get_block_fallback_name(var.self); |
| } |
| |
| add_variable(block_names, resource_names, buffer_name); |
| |
| // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. |
| // This cannot conflict with anything else, so we're safe now. |
| if (buffer_name.empty()) |
| buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); |
| |
| uint32_t failed_index = 0; |
| if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index)) |
| set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); |
| else |
| { |
| SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ", |
| failed_index, " (name: ", to_member_name(type, failed_index), |
| ") cannot be expressed with either HLSL packing layout or packoffset.")); |
| } |
| |
| block_names.insert(buffer_name); |
| |
| // Save for post-reflection later. |
| declared_block_names[var.self] = buffer_name; |
| |
| type.member_name_cache.clear(); |
| // var.self can be used as a backup name for the block name, |
| // so we need to make sure we don't disturb the name here on a recompile. |
| // It will need to be reset if we have to recompile. |
| preserve_alias_on_reset(var.self); |
| add_resource_name(var.self); |
| statement("cbuffer ", buffer_name, to_resource_binding(var)); |
| begin_scope(); |
| |
| uint32_t i = 0; |
| for (auto &member : type.member_types) |
| { |
| add_member_name(type, i); |
| auto backup_name = get_member_name(type.self, i); |
| auto member_name = to_member_name(type, i); |
| member_name = join(to_name(var.self), "_", member_name); |
| ParsedIR::sanitize_underscores(member_name); |
| set_member_name(type.self, i, member_name); |
| emit_struct_member(type, member, i, ""); |
| set_member_name(type.self, i, backup_name); |
| i++; |
| } |
| |
| end_scope_decl(); |
| statement(""); |
| } |
| else |
| { |
| if (hlsl_options.shader_model < 51) |
| SPIRV_CROSS_THROW( |
| "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1."); |
| |
| add_resource_name(type.self); |
| add_resource_name(var.self); |
| |
| // ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect. |
| uint32_t failed_index = 0; |
| if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index)) |
| { |
| SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self), |
| "), member index ", failed_index, " (name: ", to_member_name(type, failed_index), |
| ") cannot be expressed with normal HLSL packing rules.")); |
| } |
| |
| emit_struct(get<SPIRType>(type.self)); |
| statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), |
| to_resource_binding(var), ";"); |
| } |
| } |
| } |
| |
| void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) |
| { |
| if (root_constants_layout.empty()) |
| { |
| emit_buffer_block(var); |
| } |
| else |
| { |
| for (const auto &layout : root_constants_layout) |
| { |
| auto &type = get<SPIRType>(var.basetype); |
| |
| uint32_t failed_index = 0; |
| if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start, |
| layout.end)) |
| set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); |
| else |
| { |
| SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")", |
| ", member index ", failed_index, " (name: ", to_member_name(type, failed_index), |
| ") cannot be expressed with either HLSL packing layout or packoffset.")); |
| } |
| |
| flattened_structs[var.self] = false; |
| type.member_name_cache.clear(); |
| add_resource_name(var.self); |
| auto &memb = ir.meta[type.self].members; |
| |
| statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), |
| to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space)); |
| begin_scope(); |
| |
| // Index of the next field in the generated root constant constant buffer |
| auto constant_index = 0u; |
| |
| // Iterate over all member of the push constant and check which of the fields |
| // fit into the given root constant layout. |
| for (auto i = 0u; i < memb.size(); i++) |
| { |
| const auto offset = memb[i].offset; |
| if (layout.start <= offset && offset < layout.end) |
| { |
| const auto &member = type.member_types[i]; |
| |
| add_member_name(type, constant_index); |
| auto backup_name = get_member_name(type.self, i); |
| auto member_name = to_member_name(type, i); |
| member_name = join(to_name(var.self), "_", member_name); |
| ParsedIR::sanitize_underscores(member_name); |
| set_member_name(type.self, constant_index, member_name); |
| emit_struct_member(type, member, i, "", layout.start); |
| set_member_name(type.self, constant_index, backup_name); |
| |
| constant_index++; |
| } |
| } |
| |
| end_scope_decl(); |
| } |
| } |
| } |
| |
| string CompilerHLSL::to_sampler_expression(uint32_t id) |
| { |
| auto expr = join("_", to_non_uniform_aware_expression(id)); |
| auto index = expr.find_first_of('['); |
| if (index == string::npos) |
| { |
| return expr + "_sampler"; |
| } |
| else |
| { |
| // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead. |
| return expr.insert(index, "_sampler"); |
| } |
| } |
| |
| void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) |
| { |
| if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty()) |
| { |
| set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id); |
| } |
| else |
| { |
| // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. |
| emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); |
| } |
| } |
| |
| string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) |
| { |
| string arg_str = CompilerGLSL::to_func_call_arg(arg, id); |
| |
| if (hlsl_options.shader_model <= 30) |
| return arg_str; |
| |
| // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL. |
| auto &type = expression_type(id); |
| |
| // We don't have to consider combined image samplers here via OpSampledImage because |
| // those variables cannot be passed as arguments to functions. |
| // Only global SampledImage variables may be used as arguments. |
| if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) |
| arg_str += ", " + to_sampler_expression(id); |
| |
| return arg_str; |
| } |
| |
| void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) |
| { |
| if (func.self != ir.default_entry_point) |
| add_function_overload(func); |
| |
| auto &execution = get_entry_point(); |
| // Avoid shadow declarations. |
| local_variable_names = resource_names; |
| |
| string decl; |
| |
| auto &type = get<SPIRType>(func.return_type); |
| if (type.array.empty()) |
| { |
| decl += flags_to_qualifiers_glsl(type, return_flags); |
| decl += type_to_glsl(type); |
| decl += " "; |
| } |
| else |
| { |
| // We cannot return arrays in HLSL, so "return" through an out variable. |
| decl = "void "; |
| } |
| |
| if (func.self == ir.default_entry_point) |
| { |
| if (execution.model == ExecutionModelVertex) |
| decl += "vert_main"; |
| else if (execution.model == ExecutionModelFragment) |
| decl += "frag_main"; |
| else if (execution.model == ExecutionModelGLCompute) |
| decl += "comp_main"; |
| else |
| SPIRV_CROSS_THROW("Unsupported execution model."); |
| processing_entry_point = true; |
| } |
| else |
| decl += to_name(func.self); |
| |
| decl += "("; |
| SmallVector<string> arglist; |
| |
| if (!type.array.empty()) |
| { |
| // Fake array returns by writing to an out array instead. |
| string out_argument; |
| out_argument += "out "; |
| out_argument += type_to_glsl(type); |
| out_argument += " "; |
| out_argument += "spvReturnValue"; |
| out_argument += type_to_array_glsl(type); |
| arglist.push_back(std::move(out_argument)); |
| } |
| |
| for (auto &arg : func.arguments) |
| { |
| // Do not pass in separate images or samplers if we're remapping |
| // to combined image samplers. |
| if (skip_argument(arg.id)) |
| continue; |
| |
| // Might change the variable name if it already exists in this function. |
| // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation |
| // to use same name for variables. |
| // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. |
| add_local_variable_name(arg.id); |
| |
| arglist.push_back(argument_decl(arg)); |
| |
| // Flatten a combined sampler to two separate arguments in modern HLSL. |
| auto &arg_type = get<SPIRType>(arg.type); |
| if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage && |
| arg_type.image.dim != DimBuffer) |
| { |
| // Manufacture automatic sampler arg for SampledImage texture |
| arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", |
| to_sampler_expression(arg.id), type_to_array_glsl(arg_type))); |
| } |
| |
| // Hold a pointer to the parameter so we can invalidate the readonly field if needed. |
| auto *var = maybe_get<SPIRVariable>(arg.id); |
| if (var) |
| var->parameter = &arg; |
| } |
| |
| for (auto &arg : func.shadow_arguments) |
| { |
| // Might change the variable name if it already exists in this function. |
| // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation |
| // to use same name for variables. |
| // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. |
| add_local_variable_name(arg.id); |
| |
| arglist.push_back(argument_decl(arg)); |
| |
| // Hold a pointer to the parameter so we can invalidate the readonly field if needed. |
| auto *var = maybe_get<SPIRVariable>(arg.id); |
| if (var) |
| var->parameter = &arg; |
| } |
| |
| decl += merge(arglist); |
| decl += ")"; |
| statement(decl); |
| } |
| |
| void CompilerHLSL::emit_hlsl_entry_point() |
| { |
| SmallVector<string> arguments; |
| |
| if (require_input) |
| arguments.push_back("SPIRV_Cross_Input stage_input"); |
| |
| auto &execution = get_entry_point(); |
| |
| switch (execution.model) |
| { |
| case ExecutionModelGLCompute: |
| { |
| SpecializationConstant wg_x, wg_y, wg_z; |
| get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); |
| |
| uint32_t x = execution.workgroup_size.x; |
| uint32_t y = execution.workgroup_size.y; |
| uint32_t z = execution.workgroup_size.z; |
| |
| auto x_expr = wg_x.id ? get<SPIRConstant>(wg_x.id).specialization_constant_macro_name : to_string(x); |
| auto y_expr = wg_y.id ? get<SPIRConstant>(wg_y.id).specialization_constant_macro_name : to_string(y); |
| auto z_expr = wg_z.id ? get<SPIRConstant>(wg_z.id).specialization_constant_macro_name : to_string(z); |
| |
| statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]"); |
| break; |
| } |
| case ExecutionModelFragment: |
| if (execution.flags.get(ExecutionModeEarlyFragmentTests)) |
| statement("[earlydepthstencil]"); |
| break; |
| default: |
| break; |
| } |
| |
| statement(require_output ? "SPIRV_Cross_Output " : "void ", "main(", merge(arguments), ")"); |
| begin_scope(); |
| bool legacy = hlsl_options.shader_model <= 30; |
| |
| // Copy builtins from entry point arguments to globals. |
| active_input_builtins.for_each_bit([&](uint32_t i) { |
| auto builtin = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassInput); |
| switch (static_cast<BuiltIn>(i)) |
| { |
| case BuiltInFragCoord: |
| // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent. |
| // TODO: Do we need an option here? Any reason why a D3D9 shader would be used |
| // on a D3D10+ system with a different rasterization config? |
| if (legacy) |
| statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);"); |
| else |
| { |
| statement(builtin, " = stage_input.", builtin, ";"); |
| // ZW are undefined in D3D9, only do this fixup here. |
| statement(builtin, ".w = 1.0 / ", builtin, ".w;"); |
| } |
| break; |
| |
| case BuiltInVertexId: |
| case BuiltInVertexIndex: |
| case BuiltInInstanceIndex: |
| // D3D semantics are uint, but shader wants int. |
| if (hlsl_options.support_nonzero_base_vertex_base_instance) |
| { |
| if (static_cast<BuiltIn>(i) == BuiltInInstanceIndex) |
| statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;"); |
| else |
| statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;"); |
| } |
| else |
| statement(builtin, " = int(stage_input.", builtin, ");"); |
| break; |
| |
| case BuiltInInstanceId: |
| // D3D semantics are uint, but shader wants int. |
| statement(builtin, " = int(stage_input.", builtin, ");"); |
| break; |
| |
| case BuiltInNumWorkgroups: |
| case BuiltInPointCoord: |
| case BuiltInSubgroupSize: |
| case BuiltInSubgroupLocalInvocationId: |
| break; |
| |
| case BuiltInSubgroupEqMask: |
| // Emulate these ... |
| // No 64-bit in HLSL, so have to do it in 32-bit and unroll. |
| statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));"); |
| statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;"); |
| statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;"); |
| statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;"); |
| statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;"); |
| break; |
| |
| case BuiltInSubgroupGeMask: |
| // Emulate these ... |
| // No 64-bit in HLSL, so have to do it in 32-bit and unroll. |
| statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);"); |
| statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;"); |
| statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;"); |
| statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;"); |
| statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;"); |
| statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;"); |
| statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;"); |
| break; |
| |
| case BuiltInSubgroupGtMask: |
| // Emulate these ... |
| // No 64-bit in HLSL, so have to do it in 32-bit and unroll. |
| statement("uint gt_lane_index = WaveGetLaneIndex() + 1;"); |
| statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);"); |
| statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;"); |
| statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;"); |
| statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;"); |
| statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;"); |
| statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;"); |
| statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;"); |
| statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;"); |
| break; |
| |
| case BuiltInSubgroupLeMask: |
| // Emulate these ... |
| // No 64-bit in HLSL, so have to do it in 32-bit and unroll. |
| statement("uint le_lane_index = WaveGetLaneIndex() + 1;"); |
| statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;"); |
| statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;"); |
| statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;"); |
| statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;"); |
| statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;"); |
| statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;"); |
| statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;"); |
| statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;"); |
| break; |
| |
| case BuiltInSubgroupLtMask: |
| // Emulate these ... |
| // No 64-bit in HLSL, so have to do it in 32-bit and unroll. |
| statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;"); |
| statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;"); |
| statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;"); |
| statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;"); |
| statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;"); |
| statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;"); |
| statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;"); |
| break; |
| |
| case BuiltInClipDistance: |
| for (uint32_t clip = 0; clip < clip_distance_count; clip++) |
| statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], |
| ";"); |
| break; |
| |
| case BuiltInCullDistance: |
| for (uint32_t cull = 0; cull < cull_distance_count; cull++) |
| statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], |
| ";"); |
| break; |
| |
| default: |
| statement(builtin, " = stage_input.", builtin, ";"); |
| break; |
| } |
| }); |
| |
| // Copy from stage input struct to globals. |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| bool block = has_decoration(type.self, DecorationBlock); |
| |
| if (var.storage != StorageClassInput) |
| return; |
| |
| bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; |
| |
| if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && |
| interface_variable_exists_in_entry_point(var.self)) |
| { |
| if (block) |
| { |
| auto type_name = to_name(type.self); |
| auto var_name = to_name(var.self); |
| for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) |
| { |
| auto mbr_name = to_member_name(type, mbr_idx); |
| auto flat_name = join(type_name, "_", mbr_name); |
| statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";"); |
| } |
| } |
| else |
| { |
| auto name = to_name(var.self); |
| auto &mtype = this->get<SPIRType>(var.basetype); |
| if (need_matrix_unroll && mtype.columns > 1) |
| { |
| // Unroll matrices. |
| for (uint32_t col = 0; col < mtype.columns; col++) |
| statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); |
| } |
| else |
| { |
| statement(name, " = stage_input.", name, ";"); |
| } |
| } |
| } |
| }); |
| |
| // Run the shader. |
| if (execution.model == ExecutionModelVertex) |
| statement("vert_main();"); |
| else if (execution.model == ExecutionModelFragment) |
| statement("frag_main();"); |
| else if (execution.model == ExecutionModelGLCompute) |
| statement("comp_main();"); |
| else |
| SPIRV_CROSS_THROW("Unsupported shader stage."); |
| |
| // Copy stage outputs. |
| if (require_output) |
| { |
| statement("SPIRV_Cross_Output stage_output;"); |
| |
| // Copy builtins from globals to return struct. |
| active_output_builtins.for_each_bit([&](uint32_t i) { |
| // PointSize doesn't exist in HLSL. |
| if (i == BuiltInPointSize) |
| return; |
| |
| switch (static_cast<BuiltIn>(i)) |
| { |
| case BuiltInClipDistance: |
| for (uint32_t clip = 0; clip < clip_distance_count; clip++) |
| statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[", |
| clip, "];"); |
| break; |
| |
| case BuiltInCullDistance: |
| for (uint32_t cull = 0; cull < cull_distance_count; cull++) |
| statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[", |
| cull, "];"); |
| break; |
| |
| default: |
| { |
| auto builtin_expr = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput); |
| statement("stage_output.", builtin_expr, " = ", builtin_expr, ";"); |
| break; |
| } |
| } |
| }); |
| |
| ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { |
| auto &type = this->get<SPIRType>(var.basetype); |
| bool block = has_decoration(type.self, DecorationBlock); |
| |
| if (var.storage != StorageClassOutput) |
| return; |
| |
| if (!var.remapped_variable && type.pointer && |
| !is_builtin_variable(var) && |
| interface_variable_exists_in_entry_point(var.self)) |
| { |
| if (block) |
| { |
| // I/O blocks need to flatten output. |
| auto type_name = to_name(type.self); |
| auto var_name = to_name(var.self); |
| for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) |
| { |
| auto mbr_name = to_member_name(type, mbr_idx); |
| auto flat_name = join(type_name, "_", mbr_name); |
| statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";"); |
| } |
| } |
| else |
| { |
| auto name = to_name(var.self); |
| |
| if (legacy && execution.model == ExecutionModelFragment) |
| { |
| string output_filler; |
| for (uint32_t size = type.vecsize; size < 4; ++size) |
| output_filler += ", 0.0"; |
| |
| statement("stage_output.", name, " = float4(", name, output_filler, ");"); |
| } |
| else |
| { |
| statement("stage_output.", name, " = ", name, ";"); |
| } |
| } |
| } |
| }); |
| |
| statement("return stage_output;"); |
| } |
| |
| end_scope(); |
| } |
| |
| void CompilerHLSL::emit_fixup() |
| { |
| if (is_vertex_like_shader()) |
| { |
| // Do various mangling on the gl_Position. |
| if (hlsl_options.shader_model <= 30) |
| { |
| statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * " |
| "gl_Position.w;"); |
| statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * " |
| "gl_Position.w;"); |
| } |
| |
| if (options.vertex.flip_vert_y) |
| statement("gl_Position.y = -gl_Position.y;"); |
| if (options.vertex.fixup_clipspace) |
| statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;"); |
| } |
| } |
| |
| void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) |
| { |
| if (sparse) |
| SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL."); |
| |
| auto *ops = stream(i); |
| auto op = static_cast<Op>(i.op); |
| uint32_t length = i.length; |
| |
| SmallVector<uint32_t> inherited_expressions; |
| |
| uint32_t result_type = ops[0]; |
| uint32_t id = ops[1]; |
| VariableID img = ops[2]; |
| uint32_t coord = ops[3]; |
| uint32_t dref = 0; |
| uint32_t comp = 0; |
| bool gather = false; |
| bool proj = false; |
| const uint32_t *opt = nullptr; |
| auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img); |
| |
| if (combined_image && has_decoration(img, DecorationNonUniform)) |
| { |
| set_decoration(combined_image->image, DecorationNonUniform); |
| set_decoration(combined_image->sampler, DecorationNonUniform); |
| } |
| |
| auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img); |
| |
| inherited_expressions.push_back(coord); |
| |
| switch (op) |
| { |
| case OpImageSampleDrefImplicitLod: |
| case OpImageSampleDrefExplicitLod: |
| dref = ops[4]; |
| opt = &ops[5]; |
| length -= 5; |
| break; |
| |
| case OpImageSampleProjDrefImplicitLod: |
| case OpImageSampleProjDrefExplicitLod: |
| dref = ops[4]; |
| proj = true; |
| opt = &ops[5]; |
| length -= 5; |
| break; |
| |
| case OpImageDrefGather: |
| dref = ops[4]; |
| opt = &ops[5]; |
| gather = true; |
| length -= 5; |
| break; |
| |
| case OpImageGather: |
| comp = ops[4]; |
| opt = &ops[5]; |
| gather = true; |
| length -= 5; |
| break; |
| |
| case OpImageSampleProjImplicitLod: |
| case OpImageSampleProjExplicitLod: |
| opt = &ops[4]; |
| length -= 4; |
| proj = true; |
| break; |
| |
| case OpImageQueryLod: |
| opt = &ops[4]; |
| length -= 4; |
| break; |
| |
| default: |
| opt = &ops[4]; |
| length -= 4; |
| break; |
| } |
| |
| auto &imgtype = expression_type(img); |
| uint32_t coord_components = 0; |
| switch (imgtype.image.dim) |
| { |
| case spv::Dim1D: |
| coord_components = 1; |
| break; |
| case spv::Dim2D: |
| coord_components = 2; |
| break; |
| case spv::Dim3D: |
| coord_components = 3; |
| break; |
| case spv::DimCube: |
| coord_components = 3; |
| break; |
| case spv::DimBuffer: |
| coord_components = 1; |
| break; |
| default: |
| coord_components = 2; |
| break; |
| } |
| |
| if (dref) |
| inherited_expressions.push_back(dref); |
| |
| if (imgtype.image.arrayed) |
| coord_components++; |
| |
| uint32_t bias = 0; |
| uint32_t lod = 0; |
| uint32_t grad_x = 0; |
| uint32_t grad_y = 0; |
| uint32_t coffset = 0; |
| uint32_t offset = 0; |
| uint32_t coffsets = 0; |
| uint32_t sample = 0; |
| uint32_t minlod = 0; |
| uint32_t flags = 0; |
| |
| if (length) |
| { |
| flags = opt[0]; |
| opt++; |
| length--; |
| } |
| |
| auto test = [&](uint32_t &v, uint32_t flag) { |
| if (length && (flags & flag)) |
| { |
| v = *opt++; |
| inherited_expressions.push_back(v); |
| length--; |
| } |
| }; |
| |
| test(bias, ImageOperandsBiasMask); |
| test(lod, ImageOperandsLodMask); |
| test(grad_x, ImageOperandsGradMask); |
| test(grad_y, ImageOperandsGradMask); |
| test(coffset, ImageOperandsConstOffsetMask); |
| test(offset, ImageOperandsOffsetMask); |
| test(coffsets, ImageOperandsConstOffsetsMask); |
| test(sample, ImageOperandsSampleMask); |
| test(minlod, ImageOperandsMinLodMask); |
| |
| string expr; |
| string texop; |
| |
| if (minlod != 0) |
| SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL."); |
| |
| if (op == OpImageFetch) |
| { |
| if (hlsl_options.shader_model < 40) |
| { |
| SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3."); |
| } |
| texop += img_expr; |
| texop += ".Load"; |
| } |
| else if (op == OpImageQueryLod) |
| { |
| texop += img_expr; |
| texop += ".CalculateLevelOfDetail"; |
| } |
| else |
| { |
| auto &imgformat = get<SPIRType>(imgtype.image.type); |
| if (imgformat.basetype != SPIRType::Float) |
| { |
| SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL."); |
| } |
| |
| if (hlsl_options.shader_model >= 40) |
| { |
| texop += img_expr; |
| |
| if (is_depth_image(imgtype, img)) |
|