blob: 4a4f77a51ccbb4c46a4eb6ba330ac798a3112e09 [file] [log] [blame]
/*
* Copyright 2016-2019 The Brenwill Workshop Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "spirv_msl.hpp"
#include "GLSL.std.450.h"
#include <algorithm>
#include <assert.h>
#include <numeric>
using namespace spv;
using namespace SPIRV_CROSS_NAMESPACE;
using namespace std;
static const uint32_t k_unknown_location = ~0u;
static const uint32_t k_unknown_component = ~0u;
static const uint32_t k_aux_mbr_idx_swizzle_const = 0u;
CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_)
: CompilerGLSL(move(spirv_))
{
}
CompilerMSL::CompilerMSL(const uint32_t *ir_, size_t word_count)
: CompilerGLSL(ir_, word_count)
{
}
CompilerMSL::CompilerMSL(const ParsedIR &ir_)
: CompilerGLSL(ir_)
{
}
CompilerMSL::CompilerMSL(ParsedIR &&ir_)
: CompilerGLSL(std::move(ir_))
{
}
void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va)
{
vtx_attrs_by_location[va.location] = va;
if (va.builtin != BuiltInMax && !vtx_attrs_by_builtin.count(va.builtin))
vtx_attrs_by_builtin[va.builtin] = va;
}
void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
{
resource_bindings.push_back({ binding, false });
}
void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
{
if (desc_set < kMaxArgumentBuffers)
argument_buffer_discrete_mask |= 1u << desc_set;
}
bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location)
{
return vtx_attrs_in_use.count(location) != 0;
}
bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding)
{
auto itr = find_if(begin(resource_bindings), end(resource_bindings),
[&](const std::pair<MSLResourceBinding, bool> &resource) -> bool {
return model == resource.first.stage && desc_set == resource.first.desc_set &&
binding == resource.first.binding;
});
return itr != end(resource_bindings) && itr->second;
}
void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
{
fragment_output_components[location] = components;
}
void CompilerMSL::build_implicit_builtins()
{
bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex;
bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl;
if (need_subpass_input || need_sample_pos || need_vertex_params || need_tesc_params)
{
bool has_frag_coord = false;
bool has_sample_id = false;
bool has_vertex_idx = false;
bool has_base_vertex = false;
bool has_instance_idx = false;
bool has_base_instance = false;
bool has_invocation_id = false;
bool has_primitive_id = false;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
return;
if (need_subpass_input && ir.meta[var.self].decoration.builtin_type == BuiltInFragCoord)
{
builtin_frag_coord_id = var.self;
has_frag_coord = true;
}
if (need_sample_pos && ir.meta[var.self].decoration.builtin_type == BuiltInSampleId)
{
builtin_sample_id_id = var.self;
has_sample_id = true;
}
if (need_vertex_params)
{
switch (ir.meta[var.self].decoration.builtin_type)
{
case BuiltInVertexIndex:
builtin_vertex_idx_id = var.self;
has_vertex_idx = true;
break;
case BuiltInBaseVertex:
builtin_base_vertex_id = var.self;
has_base_vertex = true;
break;
case BuiltInInstanceIndex:
builtin_instance_idx_id = var.self;
has_instance_idx = true;
break;
case BuiltInBaseInstance:
builtin_base_instance_id = var.self;
has_base_instance = true;
break;
default:
break;
}
}
if (need_tesc_params)
{
switch (ir.meta[var.self].decoration.builtin_type)
{
case BuiltInInvocationId:
builtin_invocation_id_id = var.self;
has_invocation_id = true;
break;
case BuiltInPrimitiveId:
builtin_primitive_id_id = var.self;
has_primitive_id = true;
break;
default:
break;
}
}
});
if (!has_frag_coord && need_subpass_input)
{
uint32_t offset = ir.increase_bound_by(3);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
uint32_t var_id = offset + 2;
// Create gl_FragCoord.
SPIRType vec4_type;
vec4_type.basetype = SPIRType::Float;
vec4_type.width = 32;
vec4_type.vecsize = 4;
set<SPIRType>(type_id, vec4_type);
SPIRType vec4_type_ptr;
vec4_type_ptr = vec4_type;
vec4_type_ptr.pointer = true;
vec4_type_ptr.parent_type = type_id;
vec4_type_ptr.storage = StorageClassInput;
auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
ptr_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
builtin_frag_coord_id = var_id;
}
if (!has_sample_id && need_sample_pos)
{
uint32_t offset = ir.increase_bound_by(3);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
uint32_t var_id = offset + 2;
// Create gl_SampleID.
SPIRType uint_type;
uint_type.basetype = SPIRType::UInt;
uint_type.width = 32;
set<SPIRType>(type_id, uint_type);
SPIRType uint_type_ptr;
uint_type_ptr = uint_type;
uint_type_ptr.pointer = true;
uint_type_ptr.parent_type = type_id;
uint_type_ptr.storage = StorageClassInput;
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId);
builtin_sample_id_id = var_id;
}
if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance))
{
uint32_t offset = ir.increase_bound_by(2);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
SPIRType uint_type;
uint_type.basetype = SPIRType::UInt;
uint_type.width = 32;
set<SPIRType>(type_id, uint_type);
SPIRType uint_type_ptr;
uint_type_ptr = uint_type;
uint_type_ptr.pointer = true;
uint_type_ptr.parent_type = type_id;
uint_type_ptr.storage = StorageClassInput;
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
if (!has_vertex_idx)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_VertexIndex.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex);
builtin_vertex_idx_id = var_id;
}
if (!has_base_vertex)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_BaseVertex.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex);
builtin_base_vertex_id = var_id;
}
if (!has_instance_idx)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_InstanceIndex.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
builtin_instance_idx_id = var_id;
}
if (!has_base_instance)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_BaseInstance.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance);
builtin_base_instance_id = var_id;
}
}
if (need_tesc_params && (!has_invocation_id || !has_primitive_id))
{
uint32_t offset = ir.increase_bound_by(2);
uint32_t type_id = offset;
uint32_t type_ptr_id = offset + 1;
SPIRType uint_type;
uint_type.basetype = SPIRType::UInt;
uint_type.width = 32;
set<SPIRType>(type_id, uint_type);
SPIRType uint_type_ptr;
uint_type_ptr = uint_type;
uint_type_ptr.pointer = true;
uint_type_ptr.parent_type = type_id;
uint_type_ptr.storage = StorageClassInput;
auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
ptr_type.self = type_id;
if (!has_invocation_id)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_InvocationID.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId);
builtin_invocation_id_id = var_id;
}
if (!has_primitive_id)
{
uint32_t var_id = ir.increase_bound_by(1);
// Create gl_PrimitiveID.
set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId);
builtin_primitive_id_id = var_id;
}
}
}
if (needs_aux_buffer_def)
{
uint32_t offset = ir.increase_bound_by(5);
uint32_t type_id = offset;
uint32_t type_arr_id = offset + 1;
uint32_t struct_id = offset + 2;
uint32_t struct_ptr_id = offset + 3;
uint32_t var_id = offset + 4;
// Create a buffer to hold extra data, including the swizzle constants.
SPIRType uint_type;
uint_type.basetype = SPIRType::UInt;
uint_type.width = 32;
set<SPIRType>(type_id, uint_type);
SPIRType uint_type_arr = uint_type;
uint_type_arr.array.push_back(0);
uint_type_arr.array_size_literal.push_back(true);
uint_type_arr.parent_type = type_id;
set<SPIRType>(type_arr_id, uint_type_arr);
set_decoration(type_arr_id, DecorationArrayStride, 4);
SPIRType struct_type;
struct_type.basetype = SPIRType::Struct;
struct_type.member_types.push_back(type_arr_id);
auto &type = set<SPIRType>(struct_id, struct_type);
type.self = struct_id;
set_decoration(struct_id, DecorationBlock);
set_name(struct_id, "spvAux");
set_member_name(struct_id, k_aux_mbr_idx_swizzle_const, "swizzleConst");
set_member_decoration(struct_id, k_aux_mbr_idx_swizzle_const, DecorationOffset, 0);
SPIRType struct_type_ptr = struct_type;
struct_type_ptr.pointer = true;
struct_type_ptr.parent_type = struct_id;
struct_type_ptr.storage = StorageClassUniform;
auto &ptr_type = set<SPIRType>(struct_ptr_id, struct_type_ptr);
ptr_type.self = struct_id;
set<SPIRVariable>(var_id, struct_ptr_id, StorageClassUniform);
set_name(var_id, "spvAuxBuffer");
// This should never match anything.
set_decoration(var_id, DecorationDescriptorSet, 0xFFFFFFFE);
set_decoration(var_id, DecorationBinding, msl_options.aux_buffer_index);
aux_buffer_id = var_id;
}
}
static string create_sampler_address(const char *prefix, MSLSamplerAddress addr)
{
switch (addr)
{
case MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE:
return join(prefix, "address::clamp_to_edge");
case MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO:
return join(prefix, "address::clamp_to_zero");
case MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER:
return join(prefix, "address::clamp_to_border");
case MSL_SAMPLER_ADDRESS_REPEAT:
return join(prefix, "address::repeat");
case MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT:
return join(prefix, "address::mirrored_repeat");
default:
SPIRV_CROSS_THROW("Invalid sampler addressing mode.");
}
}
SPIRType &CompilerMSL::get_stage_in_struct_type()
{
auto &si_var = get<SPIRVariable>(stage_in_var_id);
return get_variable_data_type(si_var);
}
SPIRType &CompilerMSL::get_stage_out_struct_type()
{
auto &so_var = get<SPIRVariable>(stage_out_var_id);
return get_variable_data_type(so_var);
}
SPIRType &CompilerMSL::get_patch_stage_in_struct_type()
{
auto &si_var = get<SPIRVariable>(patch_stage_in_var_id);
return get_variable_data_type(si_var);
}
SPIRType &CompilerMSL::get_patch_stage_out_struct_type()
{
auto &so_var = get<SPIRVariable>(patch_stage_out_var_id);
return get_variable_data_type(so_var);
}
std::string CompilerMSL::get_tess_factor_struct_name()
{
if (get_entry_point().flags.get(ExecutionModeTriangles))
return "MTLTriangleTessellationFactorsHalf";
return "MTLQuadTessellationFactorsHalf";
}
void CompilerMSL::emit_entry_point_declarations()
{
// FIXME: Get test coverage here ...
// Emit constexpr samplers here.
for (auto &samp : constexpr_samplers)
{
auto &var = get<SPIRVariable>(samp.first);
auto &type = get<SPIRType>(var.basetype);
if (type.basetype == SPIRType::Sampler)
add_resource_name(samp.first);
SmallVector<string> args;
auto &s = samp.second;
if (s.coord != MSL_SAMPLER_COORD_NORMALIZED)
args.push_back("coord::pixel");
if (s.min_filter == s.mag_filter)
{
if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
args.push_back("filter::linear");
}
else
{
if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST)
args.push_back("min_filter::linear");
if (s.mag_filter != MSL_SAMPLER_FILTER_NEAREST)
args.push_back("mag_filter::linear");
}
switch (s.mip_filter)
{
case MSL_SAMPLER_MIP_FILTER_NONE:
// Default
break;
case MSL_SAMPLER_MIP_FILTER_NEAREST:
args.push_back("mip_filter::nearest");
break;
case MSL_SAMPLER_MIP_FILTER_LINEAR:
args.push_back("mip_filter::linear");
break;
default:
SPIRV_CROSS_THROW("Invalid mip filter.");
}
if (s.s_address == s.t_address && s.s_address == s.r_address)
{
if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
args.push_back(create_sampler_address("", s.s_address));
}
else
{
if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
args.push_back(create_sampler_address("s_", s.s_address));
if (s.t_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
args.push_back(create_sampler_address("t_", s.t_address));
if (s.r_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE)
args.push_back(create_sampler_address("r_", s.r_address));
}
if (s.compare_enable)
{
switch (s.compare_func)
{
case MSL_SAMPLER_COMPARE_FUNC_ALWAYS:
args.push_back("compare_func::always");
break;
case MSL_SAMPLER_COMPARE_FUNC_NEVER:
args.push_back("compare_func::never");
break;
case MSL_SAMPLER_COMPARE_FUNC_EQUAL:
args.push_back("compare_func::equal");
break;
case MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL:
args.push_back("compare_func::not_equal");
break;
case MSL_SAMPLER_COMPARE_FUNC_LESS:
args.push_back("compare_func::less");
break;
case MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL:
args.push_back("compare_func::less_equal");
break;
case MSL_SAMPLER_COMPARE_FUNC_GREATER:
args.push_back("compare_func::greater");
break;
case MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL:
args.push_back("compare_func::greater_equal");
break;
default:
SPIRV_CROSS_THROW("Invalid sampler compare function.");
}
}
if (s.s_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || s.t_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER ||
s.r_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER)
{
switch (s.border_color)
{
case MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK:
args.push_back("border_color::opaque_black");
break;
case MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE:
args.push_back("border_color::opaque_white");
break;
case MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK:
args.push_back("border_color::transparent_black");
break;
default:
SPIRV_CROSS_THROW("Invalid sampler border color.");
}
}
if (s.anisotropy_enable)
args.push_back(join("max_anisotropy(", s.max_anisotropy, ")"));
if (s.lod_clamp_enable)
{
args.push_back(join("lod_clamp(", convert_to_string(s.lod_clamp_min, current_locale_radix_character), ", ",
convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")"));
}
statement("constexpr sampler ",
type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
"(", merge(args), ");");
}
// Emit buffer arrays here.
for (uint32_t array_id : buffer_arrays)
{
const auto &var = get<SPIRVariable>(array_id);
const auto &type = get_variable_data_type(var);
string name = to_name(array_id);
statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
begin_scope();
for (uint32_t i = 0; i < type.array[0]; ++i)
statement(name + "_" + convert_to_string(i) + ",");
end_scope_decl();
statement_no_indent("");
}
// For some reason, without this, we end up emitting the arrays twice.
buffer_arrays.clear();
}
string CompilerMSL::compile()
{
// Do not deal with GLES-isms like precision, older extensions and such.
options.vulkan_semantics = true;
options.es = false;
options.version = 450;
backend.null_pointer_literal = "nullptr";
backend.float_literal_suffix = false;
backend.uint32_t_literal_suffix = true;
backend.int16_t_literal_suffix = "";
backend.uint16_t_literal_suffix = "u";
backend.basic_int_type = "int";
backend.basic_uint_type = "uint";
backend.basic_int8_type = "char";
backend.basic_uint8_type = "uchar";
backend.basic_int16_type = "short";
backend.basic_uint16_type = "ushort";
backend.discard_literal = "discard_fragment()";
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
backend.use_initializer_list = true;
backend.use_typed_initializer_list = true;
backend.native_row_major_matrix = false;
backend.unsized_array_supported = false;
backend.can_declare_arrays_inline = false;
backend.can_return_array = false;
backend.boolean_mix_support = false;
backend.allow_truncated_access_chain = true;
backend.array_is_value_type = false;
backend.comparison_image_samples_scalar = true;
backend.native_pointers = true;
backend.nonuniform_qualifier = "";
capture_output_to_buffer = msl_options.capture_output_to_buffer;
is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
replace_illegal_names();
struct_member_padding.clear();
build_function_control_flow_graphs_and_analyze();
update_active_builtins();
analyze_image_and_sampler_usage();
analyze_sampled_image_usage();
build_implicit_builtins();
fixup_image_load_store_access();
set_enabled_interface_variables(get_active_interface_variables());
if (aux_buffer_id)
active_interface_variables.insert(aux_buffer_id);
// Preprocess OpCodes to extract the need to output additional header content
preprocess_op_codes();
// Create structs to hold input, output and uniform variables.
// Do output first to ensure out. is declared at top of entry function.
qual_pos_var_name = "";
stage_out_var_id = add_interface_block(StorageClassOutput);
patch_stage_out_var_id = add_interface_block(StorageClassOutput, true);
stage_in_var_id = add_interface_block(StorageClassInput);
if (get_execution_model() == ExecutionModelTessellationEvaluation)
patch_stage_in_var_id = add_interface_block(StorageClassInput, true);
if (get_execution_model() == ExecutionModelTessellationControl)
stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput);
if (is_tessellation_shader())
stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput);
// Metal vertex functions that define no output must disable rasterization and return void.
if (!stage_out_var_id)
is_rasterization_disabled = true;
// Convert the use of global variables to recursively-passed function parameters
localize_global_variables();
extract_global_variables_from_functions();
// Mark any non-stage-in structs to be tightly packed.
mark_packable_structs();
// Add fixup hooks required by shader inputs and outputs. This needs to happen before
// the loop, so the hooks aren't added multiple times.
fix_up_shader_inputs_outputs();
// If we are using argument buffers, we create argument buffer structures for them here.
// These buffers will be used in the entry point, not the individual resources.
if (msl_options.argument_buffers)
{
if (!msl_options.supports_msl_version(2, 0))
SPIRV_CROSS_THROW("Argument buffers can only be used with MSL 2.0 and up.");
analyze_argument_buffers();
}
uint32_t pass_count = 0;
do
{
if (pass_count >= 3)
SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
reset();
// Start bindings at zero.
next_metal_resource_index_buffer = 0;
next_metal_resource_index_texture = 0;
next_metal_resource_index_sampler = 0;
// Move constructor for this type is broken on GCC 4.9 ...
buffer.reset();
emit_header();
emit_specialization_constants_and_structs();
emit_resources();
emit_custom_functions();
emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
pass_count++;
} while (is_forcing_recompilation());
return buffer.str();
}
// Register the need to output any custom functions.
void CompilerMSL::preprocess_op_codes()
{
OpCodePreprocessor preproc(*this);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), preproc);
suppress_missing_prototypes = preproc.suppress_missing_prototypes;
if (preproc.uses_atomics)
{
add_header_line("#include <metal_atomic>");
add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
}
// Metal vertex functions that write to resources must disable rasterization and return void.
if (preproc.uses_resource_write)
is_rasterization_disabled = true;
// Tessellation control shaders are run as compute functions in Metal, and so
// must capture their output to a buffer.
if (get_execution_model() == ExecutionModelTessellationControl)
{
is_rasterization_disabled = true;
capture_output_to_buffer = true;
}
}
// Move the Private and Workgroup global variables to the entry function.
// Non-constant variables cannot have global scope in Metal.
void CompilerMSL::localize_global_variables()
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
auto iter = global_variables.begin();
while (iter != global_variables.end())
{
uint32_t v_id = *iter;
auto &var = get<SPIRVariable>(v_id);
if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
{
if (!variable_is_lut(var))
entry_func.add_local_variable(v_id);
iter = global_variables.erase(iter);
}
else
iter++;
}
}
// For any global variable accessed directly by a function,
// extract that variable and add it as an argument to that function.
void CompilerMSL::extract_global_variables_from_functions()
{
// Uniforms
unordered_set<uint32_t> global_var_ids;
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
if (var.storage == StorageClassInput || var.storage == StorageClassOutput ||
var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer)
{
global_var_ids.insert(var.self);
}
});
// Local vars that are declared in the main function and accessed directly by a function
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
for (auto &var : entry_func.local_variables)
if (get<SPIRVariable>(var).storage != StorageClassFunction)
global_var_ids.insert(var);
std::set<uint32_t> added_arg_ids;
unordered_set<uint32_t> processed_func_ids;
extract_global_variables_from_function(ir.default_entry_point, added_arg_ids, global_var_ids, processed_func_ids);
}
// MSL does not support the use of global variables for shader input content.
// For any global variable accessed directly by the specified function, extract that variable,
// add it as an argument to that function, and the arg to the added_arg_ids collection.
void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
unordered_set<uint32_t> &global_var_ids,
unordered_set<uint32_t> &processed_func_ids)
{
// Avoid processing a function more than once
if (processed_func_ids.find(func_id) != processed_func_ids.end())
{
// Return function global variables
added_arg_ids = function_global_vars[func_id];
return;
}
processed_func_ids.insert(func_id);
auto &func = get<SPIRFunction>(func_id);
// Recursively establish global args added to functions on which we depend.
for (auto block : func.blocks)
{
auto &b = get<SPIRBlock>(block);
for (auto &i : b.ops)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
switch (op)
{
case OpLoad:
case OpInBoundsAccessChain:
case OpAccessChain:
case OpPtrAccessChain:
{
uint32_t base_id = ops[2];
if (global_var_ids.find(base_id) != global_var_ids.end())
added_arg_ids.insert(base_id);
auto &type = get<SPIRType>(ops[0]);
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
{
// Implicitly reads gl_FragCoord.
assert(builtin_frag_coord_id != 0);
added_arg_ids.insert(builtin_frag_coord_id);
}
break;
}
case OpFunctionCall:
{
// First see if any of the function call args are globals
for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
{
uint32_t arg_id = ops[arg_idx];
if (global_var_ids.find(arg_id) != global_var_ids.end())
added_arg_ids.insert(arg_id);
}
// Then recurse into the function itself to extract globals used internally in the function
uint32_t inner_func_id = ops[2];
std::set<uint32_t> inner_func_args;
extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
processed_func_ids);
added_arg_ids.insert(inner_func_args.begin(), inner_func_args.end());
break;
}
case OpStore:
{
uint32_t base_id = ops[0];
if (global_var_ids.find(base_id) != global_var_ids.end())
added_arg_ids.insert(base_id);
break;
}
case OpSelect:
{
uint32_t base_id = ops[3];
if (global_var_ids.find(base_id) != global_var_ids.end())
added_arg_ids.insert(base_id);
base_id = ops[4];
if (global_var_ids.find(base_id) != global_var_ids.end())
added_arg_ids.insert(base_id);
break;
}
default:
break;
}
// TODO: Add all other operations which can affect memory.
// We should consider a more unified system here to reduce boiler-plate.
// This kind of analysis is done in several places ...
}
}
function_global_vars[func_id] = added_arg_ids;
// Add the global variables as arguments to the function
if (func_id != ir.default_entry_point)
{
bool added_in = false;
bool added_out = false;
for (uint32_t arg_id : added_arg_ids)
{
auto &var = get<SPIRVariable>(arg_id);
uint32_t type_id = var.basetype;
auto *p_type = &get<SPIRType>(type_id);
BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn));
if (((is_tessellation_shader() && var.storage == StorageClassInput) ||
(get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)) &&
!(has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type)) &&
(!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
p_type->basetype == SPIRType::Struct))
{
// Tessellation control shaders see inputs and per-vertex outputs as arrays.
// Similarly, tessellation evaluation shaders see per-vertex inputs as arrays.
// We collected them into a structure; we must pass the array of this
// structure to the function.
std::string name;
if (var.storage == StorageClassInput)
{
if (added_in)
continue;
name = input_wg_var_name;
arg_id = stage_in_ptr_var_id;
added_in = true;
}
else if (var.storage == StorageClassOutput)
{
if (added_out)
continue;
name = "gl_out";
arg_id = stage_out_ptr_var_id;
added_out = true;
}
type_id = get<SPIRVariable>(arg_id).basetype;
p_type = &get<SPIRType>(type_id);
uint32_t next_id = ir.increase_bound_by(1);
func.add_parameter(type_id, next_id, true);
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
set_name(next_id, name);
}
else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct)
{
// Get the pointee type
type_id = get_pointee_type_id(type_id);
p_type = &get<SPIRType>(type_id);
uint32_t mbr_idx = 0;
for (auto &mbr_type_id : p_type->member_types)
{
BuiltIn builtin = BuiltInMax;
bool is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin);
if (is_builtin && has_active_builtin(builtin, var.storage))
{
// Add a arg variable with the same type and decorations as the member
uint32_t next_ids = ir.increase_bound_by(2);
uint32_t ptr_type_id = next_ids + 0;
uint32_t var_id = next_ids + 1;
// Make sure we have an actual pointer type,
// so that we will get the appropriate address space when declaring these builtins.
auto &ptr = set<SPIRType>(ptr_type_id, get<SPIRType>(mbr_type_id));
ptr.self = mbr_type_id;
ptr.storage = var.storage;
ptr.pointer = true;
ptr.parent_type = mbr_type_id;
func.add_parameter(mbr_type_id, var_id, true);
set<SPIRVariable>(var_id, ptr_type_id, StorageClassFunction);
ir.meta[var_id].decoration = ir.meta[type_id].members[mbr_idx];
}
mbr_idx++;
}
}
else
{
uint32_t next_id = ir.increase_bound_by(1);
func.add_parameter(type_id, next_id, true);
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
// Ensure the existing variable has a valid name and the new variable has all the same meta info
set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
ir.meta[next_id] = ir.meta[arg_id];
}
}
}
}
// For all variables that are some form of non-input-output interface block, mark that all the structs
// that are recursively contained within the type referenced by that variable should be packed tightly.
void CompilerMSL::mark_packable_structs()
{
ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
if (var.storage != StorageClassFunction && !is_hidden_variable(var))
{
auto &type = this->get<SPIRType>(var.basetype);
if (type.pointer &&
(type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) &&
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
mark_as_packable(type);
}
});
}
// If the specified type is a struct, it and any nested structs
// are marked as packable with the SPIRVCrossDecorationPacked decoration,
void CompilerMSL::mark_as_packable(SPIRType &type)
{
// If this is not the base type (eg. it's a pointer or array), tunnel down
if (type.parent_type)
{
mark_as_packable(get<SPIRType>(type.parent_type));
return;
}
if (type.basetype == SPIRType::Struct)
{
set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
// Recurse
size_t mbr_cnt = type.member_types.size();
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
uint32_t mbr_type_id = type.member_types[mbr_idx];
auto &mbr_type = get<SPIRType>(mbr_type_id);
mark_as_packable(mbr_type);
if (mbr_type.type_alias)
{
auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
mark_as_packable(mbr_type_alias);
}
}
}
}
// If a vertex attribute exists at the location, it is marked as being used by this shader
void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage)
{
if ((get_execution_model() == ExecutionModelVertex || is_tessellation_shader()) && (storage == StorageClassInput))
vtx_attrs_in_use.insert(location);
}
uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
{
auto itr = fragment_output_components.find(location);
if (itr == end(fragment_output_components))
return 4;
else
return itr->second;
}
uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components)
{
uint32_t new_type_id = ir.increase_bound_by(1);
auto &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
type.vecsize = components;
type.self = new_type_id;
type.parent_type = type_id;
type.pointer = false;
return new_type_id;
}
void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
SPIRType &ib_type, SPIRVariable &var, bool strip_array)
{
bool is_builtin = is_builtin_variable(var);
BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
bool is_flat = has_decoration(var.self, DecorationFlat);
bool is_noperspective = has_decoration(var.self, DecorationNoPerspective);
bool is_centroid = has_decoration(var.self, DecorationCentroid);
bool is_sample = has_decoration(var.self, DecorationSample);
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
var.basetype = type_id;
type_id = get_pointee_type_id(var.basetype);
if (strip_array && is_array(get<SPIRType>(type_id)))
type_id = get<SPIRType>(type_id).parent_type;
auto &type = get<SPIRType>(type_id);
uint32_t target_components = 0;
uint32_t type_components = type.vecsize;
bool padded_output = false;
// Check if we need to pad fragment output to match a certain number of components.
if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
{
uint32_t locn = get_decoration(var.self, DecorationLocation);
target_components = get_target_components_for_fragment_location(locn);
if (type_components < target_components)
{
// Make a new type here.
type_id = build_extended_vector_type(type_id, target_components);
padded_output = true;
}
}
ib_type.member_types.push_back(type_id);
// Give the member a name
string mbr_name = ensure_valid_name(to_expression(var.self), "m");
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
// Update the original variable reference to include the structure reference
string qual_var_name = ib_var_ref + "." + mbr_name;
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
if (padded_output)
{
entry_func.add_local_variable(var.self);
vars_needing_early_declaration.push_back(var.self);
entry_func.fixup_hooks_out.push_back([=, &var]() {
SPIRType &padded_type = this->get<SPIRType>(type_id);
statement(qual_var_name, " = ", remap_swizzle(padded_type, type_components, to_name(var.self)), ";");
});
}
else if (!strip_array)
ir.meta[var.self].decoration.qualified_alias = qual_var_name;
if (var.storage == StorageClassOutput && var.initializer != 0)
{
entry_func.fixup_hooks_in.push_back(
[=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); });
}
// Copy the variable location from the original variable to the member
if (get_decoration_bitset(var.self).get(DecorationLocation))
{
uint32_t locn = get_decoration(var.self, DecorationLocation);
if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
{
type_id = ensure_correct_attribute_type(var.basetype, locn);
var.basetype = type_id;
type_id = get_pointee_type_id(type_id);
if (strip_array && is_array(get<SPIRType>(type_id)))
type_id = get<SPIRType>(type_id).parent_type;
ib_type.member_types[ib_mbr_idx] = type_id;
}
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
{
uint32_t locn = vtx_attrs_by_builtin[builtin].location;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
if (get_decoration_bitset(var.self).get(DecorationComponent))
{
uint32_t comp = get_decoration(var.self, DecorationComponent);
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
}
if (get_decoration_bitset(var.self).get(DecorationIndex))
{
uint32_t index = get_decoration(var.self, DecorationIndex);
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
}
// Mark the member as builtin if needed
if (is_builtin)
{
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
if (builtin == BuiltInPosition && storage == StorageClassOutput)
qual_pos_var_name = qual_var_name;
}
// Copy interpolation decorations if needed
if (is_flat)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
if (is_noperspective)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
if (is_centroid)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
if (is_sample)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
}
void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
SPIRType &ib_type, SPIRVariable &var, bool strip_array)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
uint32_t elem_cnt = 0;
if (is_matrix(var_type))
{
if (is_array(var_type))
SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
elem_cnt = var_type.columns;
}
else if (is_array(var_type))
{
if (var_type.array.size() != 1)
SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
elem_cnt = to_array_size_literal(var_type);
}
bool is_builtin = is_builtin_variable(var);
BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
bool is_flat = has_decoration(var.self, DecorationFlat);
bool is_noperspective = has_decoration(var.self, DecorationNoPerspective);
bool is_centroid = has_decoration(var.self, DecorationCentroid);
bool is_sample = has_decoration(var.self, DecorationSample);
auto *usable_type = &var_type;
if (usable_type->pointer)
usable_type = &get<SPIRType>(usable_type->parent_type);
while (is_array(*usable_type) || is_matrix(*usable_type))
usable_type = &get<SPIRType>(usable_type->parent_type);
// If a builtin, force it to have the proper name.
if (is_builtin)
set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction));
entry_func.add_local_variable(var.self);
// We need to declare the variable early and at entry-point scope.
vars_needing_early_declaration.push_back(var.self);
for (uint32_t i = 0; i < elem_cnt; i++)
{
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
uint32_t target_components = 0;
bool padded_output = false;
uint32_t type_id = usable_type->self;
// Check if we need to pad fragment output to match a certain number of components.
if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
{
uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
target_components = get_target_components_for_fragment_location(locn);
if (usable_type->vecsize < target_components)
{
// Make a new type here.
type_id = build_extended_vector_type(usable_type->self, target_components);
padded_output = true;
}
}
ib_type.member_types.push_back(get_pointee_type_id(type_id));
// Give the member a name
string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m");
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
// There is no qualified alias since we need to flatten the internal array on return.
if (get_decoration_bitset(var.self).get(DecorationLocation))
{
uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
if (storage == StorageClassInput &&
(get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
{
var.basetype = ensure_correct_attribute_type(var.basetype, locn);
uint32_t mbr_type_id = ensure_correct_attribute_type(usable_type->self, locn);
ib_type.member_types[ib_mbr_idx] = mbr_type_id;
}
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
{
uint32_t locn = vtx_attrs_by_builtin[builtin].location + i;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
if (get_decoration_bitset(var.self).get(DecorationIndex))
{
uint32_t index = get_decoration(var.self, DecorationIndex);
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
}
// Copy interpolation decorations if needed
if (is_flat)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
if (is_noperspective)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
if (is_centroid)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
if (is_sample)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
if (!strip_array)
{
switch (storage)
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back(
[=, &var]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); });
break;
case StorageClassOutput:
entry_func.fixup_hooks_out.push_back([=, &var]() {
if (padded_output)
{
auto &padded_type = this->get<SPIRType>(type_id);
statement(
ib_var_ref, ".", mbr_name, " = ",
remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")),
";");
}
else
statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];");
});
break;
default:
break;
}
}
}
}
uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array)
{
auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
uint32_t location = get_decoration(var.self, DecorationLocation);
for (uint32_t i = 0; i < mbr_idx; i++)
{
auto &mbr_type = get<SPIRType>(type.member_types[i]);
// Start counting from any place we have a new location decoration.
if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
uint32_t location_count = 1;
if (mbr_type.columns > 1)
location_count = mbr_type.columns;
if (!mbr_type.array.empty())
for (uint32_t j = 0; j < uint32_t(mbr_type.array.size()); j++)
location_count *= to_array_size_literal(mbr_type, j);
location += location_count;
}
return location;
}
void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
SPIRType &ib_type, SPIRVariable &var,
uint32_t mbr_idx, bool strip_array)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
BuiltIn builtin;
bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
bool is_flat =
has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
has_decoration(var.self, DecorationNoPerspective);
bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
has_decoration(var.self, DecorationCentroid);
bool is_sample =
has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample);
uint32_t mbr_type_id = var_type.member_types[mbr_idx];
auto &mbr_type = get<SPIRType>(mbr_type_id);
uint32_t elem_cnt = 0;
if (is_matrix(mbr_type))
{
if (is_array(mbr_type))
SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
elem_cnt = mbr_type.columns;
}
else if (is_array(mbr_type))
{
if (mbr_type.array.size() != 1)
SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
elem_cnt = to_array_size_literal(mbr_type);
}
auto *usable_type = &mbr_type;
if (usable_type->pointer)
usable_type = &get<SPIRType>(usable_type->parent_type);
while (is_array(*usable_type) || is_matrix(*usable_type))
usable_type = &get<SPIRType>(usable_type->parent_type);
for (uint32_t i = 0; i < elem_cnt; i++)
{
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
ib_type.member_types.push_back(usable_type->self);
// Give the member a name
string mbr_name = ensure_valid_name(join(to_qualified_member_name(var_type, mbr_idx), "_", i), "m");
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
{
uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
else if (has_decoration(var.self, DecorationLocation))
{
uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array) + i;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
{
uint32_t locn = vtx_attrs_by_builtin[builtin].location + i;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays make little sense.");
// Copy interpolation decorations if needed
if (is_flat)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
if (is_noperspective)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
if (is_centroid)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
if (is_sample)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx);
// Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
if (!strip_array)
{
switch (storage)
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref,
".", mbr_name, ";");
});
break;
case StorageClassOutput:
entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".",
to_member_name(var_type, mbr_idx), "[", i, "];");
});
break;
default:
break;
}
}
}
}
void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx,
bool strip_array)
{
auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
BuiltIn builtin = BuiltInMax;
bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
bool is_flat =
has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) ||
has_decoration(var.self, DecorationNoPerspective);
bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) ||
has_decoration(var.self, DecorationCentroid);
bool is_sample =
has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample);
// Add a reference to the member to the interface struct.
uint32_t mbr_type_id = var_type.member_types[mbr_idx];
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin);
var_type.member_types[mbr_idx] = mbr_type_id;
ib_type.member_types.push_back(mbr_type_id);
// Give the member a name
string mbr_name = ensure_valid_name(to_qualified_member_name(var_type, mbr_idx), "m");
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
// Update the original variable reference to include the structure reference
string qual_var_name = ib_var_ref + "." + mbr_name;
if (is_builtin && !strip_array)
{
// For the builtin gl_PerVertex, we cannot treat it as a block anyways,
// so redirect to qualified name.
set_member_qualified_name(var_type.self, mbr_idx, qual_var_name);
}
else if (!strip_array)
{
// Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
switch (storage)
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";");
});
break;
case StorageClassOutput:
entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";");
});
break;
default:
break;
}
}
// Copy the variable location from the original variable to the member
if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
{
uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
{
mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn);
var_type.member_types[mbr_idx] = mbr_type_id;
ib_type.member_types[ib_mbr_idx] = mbr_type_id;
}
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
else if (has_decoration(var.self, DecorationLocation))
{
// The block itself might have a location and in this case, all members of the block
// receive incrementing locations.
uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array);
if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
{
mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn);
var_type.member_types[mbr_idx] = mbr_type_id;
ib_type.member_types[ib_mbr_idx] = mbr_type_id;
}
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
{
uint32_t locn = 0;
auto builtin_itr = vtx_attrs_by_builtin.find(builtin);
if (builtin_itr != end(vtx_attrs_by_builtin))
locn = builtin_itr->second.location;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, storage);
}
// Copy the component location, if present.
if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
{
uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
}
// Mark the member as builtin if needed
if (is_builtin)
{
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
if (builtin == BuiltInPosition && storage == StorageClassOutput)
qual_pos_var_name = qual_var_name;
}
// Copy interpolation decorations if needed
if (is_flat)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
if (is_noperspective)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
if (is_centroid)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
if (is_sample)
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx);
}
// In Metal, the tessellation levels are stored as tightly packed half-precision floating point values.
// But, stage-in attribute offsets and strides must be multiples of four, so we can't pass the levels
// individually. Therefore, we must pass them as vectors. Triangles get a single float4, with the outer
// levels in 'xyz' and the inner level in 'w'. Quads get a float4 containing the outer levels and a
// float2 containing the inner levels.
void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type,
SPIRVariable &var)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
auto &var_type = get_variable_element_type(var);
BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
// Force the variable to have the proper name.
set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction));
if (get_entry_point().flags.get(ExecutionModeTriangles))
{
// Triangles are tricky, because we want only one member in the struct.
// We need to declare the variable early and at entry-point scope.
entry_func.add_local_variable(var.self);
vars_needing_early_declaration.push_back(var.self);
string mbr_name = "gl_TessLevel";
// If we already added the other one, we can skip this step.
if (!added_builtin_tess_level)
{
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
uint32_t type_id = build_extended_vector_type(var_type.self, 4);
ib_type.member_types.push_back(type_id);
// Give the member a name
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
// There is no qualified alias since we need to flatten the internal array on return.
if (get_decoration_bitset(var.self).get(DecorationLocation))
{
uint32_t locn = get_decoration(var.self, DecorationLocation);
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, StorageClassInput);
}
else if (vtx_attrs_by_builtin.count(builtin))
{
uint32_t locn = vtx_attrs_by_builtin[builtin].location;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, StorageClassInput);
}
added_builtin_tess_level = true;
}
switch (builtin)
{
case BuiltInTessLevelOuter:
entry_func.fixup_hooks_in.push_back([=, &var]() {
statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".x;");
statement(to_name(var.self), "[1] = ", ib_var_ref, ".", mbr_name, ".y;");
statement(to_name(var.self), "[2] = ", ib_var_ref, ".", mbr_name, ".z;");
});
break;
case BuiltInTessLevelInner:
entry_func.fixup_hooks_in.push_back(
[=, &var]() { statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); });
break;
default:
assert(false);
break;
}
}
else
{
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2);
// Change the type of the variable, too.
uint32_t ptr_type_id = ir.increase_bound_by(1);
auto &new_var_type = set<SPIRType>(ptr_type_id, get<SPIRType>(type_id));
new_var_type.pointer = true;
new_var_type.storage = StorageClassInput;
new_var_type.parent_type = type_id;
var.basetype = ptr_type_id;
ib_type.member_types.push_back(type_id);
// Give the member a name
string mbr_name = to_expression(var.self);
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
// Since vectors can be indexed like arrays, there is no need to unpack this. We can
// just refer to the vector directly. So give it a qualified alias.
string qual_var_name = ib_var_ref + "." + mbr_name;
ir.meta[var.self].decoration.qualified_alias = qual_var_name;
if (get_decoration_bitset(var.self).get(DecorationLocation))
{
uint32_t locn = get_decoration(var.self, DecorationLocation);
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, StorageClassInput);
}
else if (vtx_attrs_by_builtin.count(builtin))
{
uint32_t locn = vtx_attrs_by_builtin[builtin].location;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
mark_location_as_used_by_shader(locn, StorageClassInput);
}
}
}
void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type,
SPIRVariable &var, bool strip_array)
{
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
// Tessellation control I/O variables and tessellation evaluation per-point inputs are
// usually declared as arrays. In these cases, we want to add the element type to the
// interface block, since in Metal it's the interface block itself which is arrayed.
auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
bool is_builtin = is_builtin_variable(var);
auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
if (var_type.basetype == SPIRType::Struct)
{
if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && !strip_array)
{
// For I/O blocks or structs, we will need to pass the block itself around
// to functions if they are used globally in leaf functions.
// Rather than passing down member by member,
// we unflatten I/O blocks while running the shader,
// and pass the actual struct type down to leaf functions.
// We then unflatten inputs, and flatten outputs in the "fixup" stages.
entry_func.add_local_variable(var.self);
vars_needing_early_declaration.push_back(var.self);
}
if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock))
{
// In Metal tessellation shaders, the interface block itself is arrayed. This makes things
// very complicated, since stage-in structures in MSL don't support nested structures.
// Luckily, for stage-out when capturing output, we can avoid this and just add
// composite members directly, because the stage-out structure is stored to a buffer,
// not returned.
add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
}
else
{
// Flatten the struct members into the interface struct
for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
{
builtin = BuiltInMax;
is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
auto &mbr_type = get<SPIRType>(var_type.member_types[mbr_idx]);
if (!is_builtin || has_active_builtin(builtin, storage))
{
if ((!is_builtin ||
(storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) &&
(storage == StorageClassInput || storage == StorageClassOutput) &&
(is_matrix(mbr_type) || is_array(mbr_type)))
{
add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx,
strip_array);
}
else
{
add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx,
strip_array);
}
}
}
}
}
else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput &&
!strip_array && is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner))
{
add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var);
}
else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char ||
type_is_integral(var_type) || type_is_floating_point(var_type) || var_type.basetype == SPIRType::Boolean)
{
if (!is_builtin || has_active_builtin(builtin, storage))
{
// MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
if ((!is_builtin || (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) &&
(storage == StorageClassInput || (storage == StorageClassOutput && !capture_output_to_buffer)) &&
(is_matrix(var_type) || is_array(var_type)))
{
add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
}
else
{
add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
}
}
}
}
// Fix up the mapping of variables to interface member indices, which is used to compile access chains
// for per-vertex variables in a tessellation control shader.
void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id)
{
// Only needed for tessellation shaders.
if (get_execution_model() != ExecutionModelTessellationControl &&
!(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput))
return;
bool in_array = false;
for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++)
{
auto &mbr_dec = ir.meta[ib_type_id].members[i];
uint32_t var_id = mbr_dec.extended.ib_orig_id;
if (!var_id)
continue;
auto &var = get<SPIRVariable>(var_id);
// Unfortunately, all this complexity is needed to handle flattened structs and/or
// arrays.
if (storage == StorageClassInput)
{
auto &type = get_variable_element_type(var);
if (is_array(type) || is_matrix(type))
{
if (in_array)
continue;
in_array = true;
set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
}
else
{
if (type.basetype == SPIRType::Struct)
{
uint32_t mbr_idx =
get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex);
auto &mbr_type = get<SPIRType>(type.member_types[mbr_idx]);
if (is_array(mbr_type) || is_matrix(mbr_type))
{
if (in_array)
continue;
in_array = true;
set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
}
else
{
in_array = false;
set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
}
}
else
{
in_array = false;
set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
}
}
}
else
set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
}
}
// Add an interface structure for the type of storage, which is either StorageClassInput or StorageClassOutput.
// Returns the ID of the newly added variable, or zero if no variable was added.
uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
{
// Accumulate the variables that should appear in the interface struct
SmallVector<SPIRVariable *> vars;
bool incl_builtins = (storage == StorageClassOutput || is_tessellation_shader());
ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
auto &type = this->get<SPIRType>(var.basetype);
BuiltIn bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) &&
!is_hidden_variable(var, incl_builtins) && type.pointer &&
(has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch &&
(!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || bi_type == BuiltInLayer ||
bi_type == BuiltInViewportIndex || bi_type == BuiltInFragDepth || bi_type == BuiltInSampleMask ||
(get_execution_model() == ExecutionModelTessellationEvaluation &&
(bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner))))
{
vars.push_back(&var);
}
});
// If no variables qualify, leave.
// For patch input in a tessellation evaluation shader, the per-vertex stage inputs
// are included in a special patch control point array.
if (vars.empty() && !(storage == StorageClassInput && patch && stage_in_var_id))
return 0;
// Add a new typed variable for this interface structure.
// The initializer expression is allocated here, but populated when the function
// declaraion is emitted, because it is cleared after each compilation pass.
uint32_t next_id = ir.increase_bound_by(3);
uint32_t ib_type_id = next_id++;
auto &ib_type = set<SPIRType>(ib_type_id);
ib_type.basetype = SPIRType::Struct;
ib_type.storage = storage;
set_decoration(ib_type_id, DecorationBlock);
uint32_t ib_var_id = next_id++;
auto &var = set<SPIRVariable>(ib_var_id, ib_type_id, storage, 0);
var.initializer = next_id++;
string ib_var_ref;
auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
switch (storage)
{
case StorageClassInput:
ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name;
if (get_execution_model() == ExecutionModelTessellationControl)
{
// Add a hook to populate the shared workgroup memory containing
// the gl_in array.
entry_func.fixup_hooks_in.push_back([=]() {
// Can't use PatchVertices yet; the hook for that may not have run yet.
statement("if (", to_expression(builtin_invocation_id_id), " < ", "spvIndirectParams[0])");
statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref,
";");
statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
statement("if (", to_expression(builtin_invocation_id_id), " >= ", get_entry_point().output_vertices,
")");
statement(" return;");
});
}
break;
case StorageClassOutput:
{
ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name;
// Add the output interface struct as a local variable to the entry function.
// If the entry point should return the output struct, set the entry function
// to return the output interface struct, otherwise to return nothing.
// Indicate the output var requires early initialization.
bool ep_should_return_output = !get_is_rasterization_disabled();
uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0;
if (!capture_output_to_buffer)
{
entry_func.add_local_variable(ib_var_id);
for (auto &blk_id : entry_func.blocks)
{
auto &blk = get<SPIRBlock>(blk_id);
if (blk.terminator == SPIRBlock::Return)
blk.return_value = rtn_id;
}
vars_needing_early_declaration.push_back(ib_var_id);
}
else
{
switch (get_execution_model())
{
case ExecutionModelVertex:
case ExecutionModelTessellationEvaluation:
// Instead of declaring a struct variable to hold the output and then
// copying that to the output buffer, we'll declare the output variable
// as a reference to the final output element in the buffer. Then we can
// avoid the extra copy.
entry_func.fixup_hooks_in.push_back([=]() {
if (stage_out_var_id)
{
// The first member of the indirect buffer is always the number of vertices
// to draw.
statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ",
output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ",
to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ",
to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id),
"];");
}
});
break;
case ExecutionModelTessellationControl:
if (patch)
entry_func.fixup_hooks_in.push_back([=]() {
statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ",
patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "];");
});
else
entry_func.fixup_hooks_in.push_back([=]() {
statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
get_entry_point().output_vertices, "];");
});
break;
default:
break;
}
}
break;
}
default:
break;
}
set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref);
set_name(ib_var_id, ib_var_ref);
for (auto p_var : vars)
{
bool strip_array =
(get_execution_model() == ExecutionModelTessellationControl ||
(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) &&
!patch;
add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, strip_array);
}
// Sort the members of the structure by their locations.
MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location);
member_sorter.sort();
// The member indices were saved to the original variables, but after the members
// were sorted, those indices are now likely incorrect. Fix those up now.
if (!patch)
fix_up_interface_member_indices(storage, ib_type_id);
// For patch inputs, add one more member, holding the array of control point data.
if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch &&
stage_in_var_id)
{
uint32_t pcp_type_id = ir.increase_bound_by(1);
auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type);
pcp_type.basetype = SPIRType::ControlPointArray;
pcp_type.parent_type = pcp_type.type_alias = get_stage_in_struct_type().self;
pcp_type.storage = storage;
ir.meta[pcp_type_id] = ir.meta[ib_type.self];
uint32_t mbr_idx = uint32_t(ib_type.member_types.size());
ib_type.member_types.push_back(pcp_type_id);
set_member_name(ib_type.self, mbr_idx, "gl_in");
}
return ib_var_id;
}
uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageClass storage)
{
if (!ib_var_id)
return 0;
uint32_t ib_ptr_var_id;
uint32_t next_id = ir.increase_bound_by(3);
auto &ib_type = expression_type(ib_var_id);
if (get_execution_model() == ExecutionModelTessellationControl)
{
// Tessellation control per-vertex I/O is presented as an array, so we must
// do the same with our struct here.
uint32_t ib_ptr_type_id = next_id++;
auto &ib_ptr_type = set<SPIRType>(ib_ptr_type_id, ib_type);
ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self;
ib_ptr_type.pointer = true;
ib_ptr_type.storage = storage == StorageClassInput ? StorageClassWorkgroup : StorageClassStorageBuffer;
ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self];
// To ensure that get_variable_data_type() doesn't strip off the pointer,
// which we need, use another pointer.
uint32_t ib_ptr_ptr_type_id = next_id++;
auto &ib_ptr_ptr_type = set<SPIRType>(ib_ptr_ptr_type_id, ib_ptr_type);
ib_ptr_ptr_type.parent_type = ib_ptr_type_id;
ib_ptr_ptr_type.type_alias = ib_type.self;
ib_ptr_ptr_type.storage = StorageClassFunction;
ir.meta[ib_ptr_ptr_type_id] = ir.meta[ib_type.self];
ib_ptr_var_id = next_id;
set<SPIRVariable>(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0);
set_name(ib_ptr_var_id, storage == StorageClassInput ? input_wg_var_name : "gl_out");
}
else
{
// Tessellation evaluation per-vertex inputs are also presented as arrays.
// But, in Metal, this array uses a very special type, 'patch_control_point<T>',
// which is a container that can be used to access the control point data.
// To represent this, a special 'ControlPointArray' type has been added to the
// SPIRV-Cross type system. It should only be generated by and seen in the MSL
// backend (i.e. this one).
uint32_t pcp_type_id = next_id++;
auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type);
pcp_type.basetype = SPIRType::ControlPointArray;
pcp_type.parent_type = pcp_type.type_alias = ib_type.self;
pcp_type.storage = storage;
ir.meta[pcp_type_id] = ir.meta[ib_type.self];
ib_ptr_var_id = next_id;
set<SPIRVariable>(ib_ptr_var_id, pcp_type_id, storage, 0);
set_name(ib_ptr_var_id, "gl_in");
ir.meta[ib_ptr_var_id].decoration.qualified_alias = join(patch_stage_in_var_name, ".gl_in");
}
return ib_ptr_var_id;
}
// Ensure that the type is compatible with the builtin.
// If it is, simply return the given type ID.
// Otherwise, create a new type, and return it's ID.
uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn builtin)
{
auto &type = get<SPIRType>(type_id);
if ((builtin == BuiltInSampleMask && is_array(type)) ||
((builtin == BuiltInLayer || builtin == BuiltInViewportIndex) && type.basetype != SPIRType::UInt))
{
uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
uint32_t base_type_id = next_id++;
auto &base_type = set<SPIRType>(base_type_id);
base_type.basetype = SPIRType::UInt;
base_type.width = 32;
if (!type.pointer)
return base_type_id;
uint32_t ptr_type_id = next_id++;
auto &ptr_type = set<SPIRType>(ptr_type_id);
ptr_type = base_type;
ptr_type.pointer = true;
ptr_type.storage = type.storage;
ptr_type.parent_type = base_type_id;
return ptr_type_id;
}
return type_id;
}
// Ensure that the type is compatible with the vertex attribute.
// If it is, simply return the given type ID.
// Otherwise, create a new type, and return its ID.
uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t location)
{
auto &type = get<SPIRType>(type_id);
auto p_va = vtx_attrs_by_location.find(location);
if (p_va == end(vtx_attrs_by_location))
return type_id;
switch (p_va->second.format)
{
case MSL_VERTEX_FORMAT_UINT8:
{
switch (type.basetype)
{
case SPIRType::UByte:
case SPIRType::UShort:
case SPIRType::UInt:
return type_id;
case SPIRType::Short:
case SPIRType::Int:
break;
default:
SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
}
uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
uint32_t base_type_id = next_id++;
auto &base_type = set<SPIRType>(base_type_id);
base_type = type;
base_type.basetype = type.basetype == SPIRType::Short ? SPIRType::UShort : SPIRType::UInt;
base_type.pointer = false;
if (!type.pointer)
return base_type_id;
uint32_t ptr_type_id = next_id++;
auto &ptr_type = set<SPIRType>(ptr_type_id);
ptr_type = base_type;
ptr_type.pointer = true;
ptr_type.storage = type.storage;
ptr_type.parent_type = base_type_id;
return ptr_type_id;
}
case MSL_VERTEX_FORMAT_UINT16:
{
switch (type.basetype)
{
case SPIRType::UShort:
case SPIRType::UInt:
return type_id;
case SPIRType::Int:
break;
default:
SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
}
uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
uint32_t base_type_id = next_id++;
auto &base_type = set<SPIRType>(base_type_id);
base_type = type;
base_type.basetype = SPIRType::UInt;
base_type.pointer = false;
if (!type.pointer)
return base_type_id;
uint32_t ptr_type_id = next_id++;
auto &ptr_type = set<SPIRType>(ptr_type_id);
ptr_type = base_type;
ptr_type.pointer = true;
ptr_type.storage = type.storage;
ptr_type.parent_type = base_type_id;
return ptr_type_id;
}
default:
case MSL_VERTEX_FORMAT_OTHER:
break;
}
return type_id;
}
// Sort the members of the struct type by offset, and pack and then pad members where needed
// to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
// occurs first, followed by padding, because packing a member reduces both its size and its
// natural alignment, possibly requiring a padding member to be added ahead of it.
void CompilerMSL::align_struct(SPIRType &ib_type)
{
uint32_t &ib_type_id = ib_type.self;
// Sort the members of the interface structure by their offset.
// They should already be sorted per SPIR-V spec anyway.
MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset);
member_sorter.sort();
uint32_t mbr_cnt = uint32_t(ib_type.member_types.size());
// Test the alignment of each member, and if a member should be closer to the previous
// member than the default spacing expects, it is likely that the previous member is in
// a packed format. If so, and the previous member is packable, pack it.
// For example...this applies to any 3-element vector that is followed by a scalar.
uint32_t curr_offset = 0;
for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
{
if (is_member_packable(ib_type, mbr_idx))
{
set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked);
set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType,
ib_type.member_types[mbr_idx]);
}
// Align current offset to the current member's default alignment.
size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
// Fetch the member offset as declared in the SPIRV.
uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
if (mbr_offset > aligned_curr_offset)
{
// Since MSL and SPIR-V have slightly different struct member alignment and
// size rules, we'll pad to standard C-packing rules. If the member is farther
// away than C-packing, expects, add an inert padding member before the the member.
MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx);
struct_member_padding[key] = mbr_offset - curr_offset;
}
// Increment the current offset to be positioned immediately after the current member.
// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
if (mbr_idx + 1 < mbr_cnt)
curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
}
}
// Returns whether the specified struct member supports a packable type
// variation that is smaller than the unpacked variation of that type.
bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
{
// We've already marked it as packable
if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
return true;
auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
uint32_t component_size = mbr_type.width / 8;
uint32_t unpacked_mbr_size;
if (mbr_type.vecsize == 3)
unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
else
unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
// Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
// but we will use the same mechanism.
if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 &&
type_struct_member_array_stride(ib_type, index) == 4 * component_size)
{
return true;
}
// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
// This can happen for struct A { float a }; A a[]; in std140 layout.
// TODO: Emit a padded struct which can be used for this purpose.
if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct)
{
size_t declared_struct_size = get_declared_struct_size(mbr_type);
size_t alignment = get_declared_struct_member_alignment(ib_type, index);
declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1);
if (type_struct_member_array_stride(ib_type, index) > declared_struct_size)
return true;
}
// TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
//if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16)
// SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout.");
// Only vectors or 3-row matrices need to be packed.
if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
return false;
// Only row-major matrices need to be packed.
if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
return false;
if (is_array(mbr_type))
{
// If member is an array, and the array stride is larger than the type needs, don't pack it.
// Take into consideration multi-dimentional arrays.
uint32_t md_elem_cnt = 1;
size_t last_elem_idx = mbr_type.array.size() - 1;
for (uint32_t i = 0; i < last_elem_idx; i++)
md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u);
uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
return unpacked_array_stride > array_stride;
}
else
{
uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
// For vectors, pack if the member's offset doesn't conform to the
// type's usual alignment. For example, a float3 at offset 4.
if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size))
return true;
// Pack if there is not enough space between this member and next.
// If last member, only pack if it's a row-major matrix.
if (index < ib_type.member_types.size() - 1)
{
uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
}
else
return is_matrix(mbr_type);
}
}
// Returns a combination of type ID and member index for use as hash key
MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index)
{
MSLStructMemberKey k = type_id;
k <<= 32;
k += index;
return k;
}
void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
{
if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) ||
get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0)
{
CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
}
else
{
// Special handling when storing to a float[] or float2[] in std140 layout.
auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType));
string lhs = to_dereferenced_expression(lhs_expression);
string rhs = to_pointer_expression(rhs_expression);
// Unpack the expression so we can store to it with a float or float2.
// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
if (is_scalar(type) && is_array(type))
lhs = enclose_expression(lhs) + ".x";
else if (is_vector(type) && type.vecsize == 2 && is_array(type))
lhs = enclose_expression(lhs) + ".xy";
if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(lhs_expression);
}
}
// Converts the format of the current expression from packed to unpacked,
// by wrapping the expression in a constructor of the appropriate type.
string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
{
const SPIRType *packed_type = nullptr;
if (packed_type_id)
packed_type = &get<SPIRType>(packed_type_id);
// float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
if (packed_type && is_array(*packed_type) && is_scalar(*packed_type))
return enclose_expression(expr_str) + ".x";
else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2)
return enclose_expression(expr_str) + ".xy";
else
return join(type_to_glsl(type), "(", expr_str, ")");
}
// Emits the file header info
void CompilerMSL::emit_header()
{
// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
if (suppress_missing_prototypes)
statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
for (auto &pragma : pragma_lines)
statement(pragma);
if (!pragma_lines.empty() || suppress_missing_prototypes)
statement("");
statement("#include <metal_stdlib>");
statement("#include <simd/simd.h>");
for (auto &header : header_lines)
statement(header);
statement("");
statement("using namespace metal;");
statement("");
for (auto &td : typedef_lines)
statement(td);
if (!typedef_lines.empty())
statement("");
}
void CompilerMSL::add_pragma_line(const string &line)
{
auto rslt = pragma_lines.insert(line);
if (rslt.second)
force_recompile();
}
void CompilerMSL::add_typedef_line(const string &line)
{
auto rslt = typedef_lines.insert(line);
if (rslt.second)
force_recompile();
}
// Emits any needed custom function bodies.
void CompilerMSL::emit_custom_functions()
{
for (uint32_t i = SPVFuncImplArrayCopyMultidimMax; i >= 2; i--)
if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i)))
spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1));
for (auto &spv_func : spv_function_implementations)
{
switch (spv_func)
{
case SPVFuncImplMod:
statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
statement("template<typename Tx, typename Ty>");
statement("Tx mod(Tx x, Ty y)");
begin_scope();
statement("return x - y * floor(x / y);");
end_scope();
statement("");
break;
case SPVFuncImplRadians:
statement("// Implementation of the GLSL radians() function");
statement("template<typename T>");
statement("T radians(T d)");
begin_scope();
statement("return d * T(0.01745329251);");
end_scope();
statement("");
break;
case SPVFuncImplDegrees:
statement("// Implementation of the GLSL degrees() function");
statement("template<typename T>");
statement("T degrees(T r)");
begin_scope();
statement("return r * T(57.2957795131);");
end_scope();
statement("");
break;
case SPVFuncImplFindILsb:
statement("// Implementation of the GLSL findLSB() function");
statement("template<typename T>");
statement("T findLSB(T x)");
begin_scope();
statement("return select(ctz(x), T(-1), x == T(0));");
end_scope();
statement("");
break;
case SPVFuncImplFindUMsb:
statement("// Implementation of the unsigned GLSL findMSB() function");
statement("template<typename T>");
statement("T findUMSB(T x)");
begin_scope();
statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
end_scope();
statement("");
break;
case SPVFuncImplFindSMsb:
statement("// Implementation of the signed GLSL findMSB() function");
statement("template<typename T>");
statement("T findSMSB(T x)");
begin_scope();
statement("T v = select(x, T(-1) - x, x < T(0));");
statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
end_scope();
statement("");
break;
case SPVFuncImplSSign:
statement("// Implementation of the GLSL sign() function for integer types");
statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>");
statement("T sign(T x)");
begin_scope();
statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));");
end_scope();
statement("");
break;
case SPVFuncImplArrayCopy:
statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
"assignment.");
statement("template<typename T, uint N>");
statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])");
begin_scope();
statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
end_scope();
statement("");
statement("template<typename T, uint N>");
statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])");
begin_scope();
statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
end_scope();
statement("");
break;
case SPVFuncImplArrayOfArrayCopy2Dim:
case SPVFuncImplArrayOfArrayCopy3Dim:
case SPVFuncImplArrayOfArrayCopy4Dim:
case SPVFuncImplArrayOfArrayCopy5Dim:
case SPVFuncImplArrayOfArrayCopy6Dim:
{
static const char *function_name_tags[] = {
"FromStack",
"FromConstant",
};
static const char *src_address_space[] = {
"thread const",
"constant",
};
for (uint32_t variant = 0; variant < 2; variant++)
{
uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
string tmp = "template<typename T";
for (uint8_t i = 0; i < dimensions; i++)
{
tmp += ", uint ";
tmp += 'A' + i;
}
tmp += ">";
statement(tmp);
string array_arg;
for (uint8_t i = 0; i < dimensions; i++)
{
array_arg += "[";
array_arg += 'A' + i;
array_arg += "]";
}
statement("void spvArrayCopy", function_name_tags[variant], dimensions, "(thread T (&dst)", array_arg,
", ", src_address_space[variant], " T (&src)", array_arg, ")");
begin_scope();
statement("for (uint i = 0; i < A; i++)");
begin_scope();
statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
end_scope();
end_scope();
statement("");
}
break;
}
case SPVFuncImplTexelBufferCoords:
{
string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
statement("uint2 spvTexelBufferCoord(uint tc)");
begin_scope();
statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
end_scope();
statement("");
break;
}
case SPVFuncImplInverse4x4:
statement("// Returns the determinant of a 2x2 matrix.");
statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)");
begin_scope();
statement("return a1 * b2 - b1 * a2;");
end_scope();
statement("");
statement("// Returns the determinant of a 3x3 matrix.");
statement("inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
"float c2, float c3)");
begin_scope();
statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, "
"b2, b3);");
end_scope();
statement("");
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
statement("float4x4 spvInverse4x4(float4x4 m)");
begin_scope();
statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
statement("adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
"m[3][3]);");
statement("adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
"m[3][3]);");
statement("adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
"m[3][3]);");
statement("adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
"m[2][3]);");
statement_no_indent("");
statement("adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
"m[3][3]);");
statement("adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
"m[3][3]);");
statement("adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
"m[3][3]);");
statement("adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
"m[2][3]);");
statement_no_indent("");
statement("adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
"m[3][3]);");
statement("adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
"m[3][3]);");
statement("adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
"m[3][3]);");
statement("adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
"m[2][3]);");
statement_no_indent("");
statement("adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
"m[3][2]);");
statement("adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
"m[3][2]);");
statement("adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
"m[3][2]);");
statement("adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
"m[2][2]);");
statement_no_indent("");
statement("// Calculate the determinant as a combination of the cofactors of the first row.");
statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] "
"* m[3][0]);");
statement_no_indent("");
statement("// Divide the classical adjoint matrix by the determinant.");
statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
end_scope();
statement("");
break;
case SPVFuncImplInverse3x3:
if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0)
{
statement("// Returns the determinant of a 2x2 matrix.");
statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)");
begin_scope();
statement("return a1 * b2 - b1 * a2;");
end_scope();
statement("");
}
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
statement("float3x3 spvInverse3x3(float3x3 m)");
begin_scope();
statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
statement_no_indent("");
statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
statement_no_indent("");
statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
statement_no_indent("");
statement("// Calculate the determinant as a combination of the cofactors of the first row.");
statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
statement_no_indent("");
statement("// Divide the classical adjoint matrix by the determinant.");
statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
end_scope();
statement("");
break;
case SPVFuncImplInverse2x2:
statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
statement("float2x2 spvInverse2x2(float2x2 m)");
begin_scope();
statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)");
statement_no_indent("");
statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
statement("adj[0][0] = m[1][1];");
statement("adj[0][1] = -m[0][1];");
statement_no_indent("");
statement("adj[1][0] = -m[1][0];");
statement("adj[1][1] = m[0][0];");
statement_no_indent("");
statement("// Calculate the determinant as a combination of the cofactors of the first row.");
statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);");
statement_no_indent("");
statement("// Divide the classical adjoint matrix by the determinant.");
statement("// If determinant is zero, matrix is not invertable, so leave it unchanged.");
statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor2x3:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)");
begin_scope();
statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor2x4:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)");
begin_scope();
statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], "
"m[1][3]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor3x2:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)");
begin_scope();
statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor3x4:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)");
begin_scope();
statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], "
"m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor4x2:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)");
begin_scope();
statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), "
"float2(m[1][1], m[3][1]));");
end_scope();
statement("");
break;
case SPVFuncImplRowMajor4x3:
statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)");
begin_scope();
statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), "
"float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));");
end_scope();
statement("");
break;
case SPVFuncImplTextureSwizzle:
statement("enum class spvSwizzle : uint");
begin_scope();
statement("none = 0,");
statement("zero,");
statement("one,");
statement("red,");
statement("green,");
statement("blue,");
statement("alpha");
end_scope_decl();
statement("");
statement("template<typename T> struct spvRemoveReference { typedef T type; };");
statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
"spvRemoveReference<T>::type& x)");
begin_scope();
statement("return static_cast<thread T&&>(x);");
end_scope();
statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
"spvRemoveReference<T>::type&& x)");
begin_scope();
statement("return static_cast<thread T&&>(x);");
end_scope();
statement("");
statement("template<typename T>");
statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)");
begin_scope();
statement("switch (s)");
begin_scope();
statement("case spvSwizzle::none:");
statement(" return c;");
statement("case spvSwizzle::zero:");
statement(" return 0;");
statement("case spvSwizzle::one:");
statement(" return 1;");
statement("case spvSwizzle::red:");
statement(" return x.r;");
statement("case spvSwizzle::green:");
statement(" return x.g;");
statement("case spvSwizzle::blue:");
statement(" return x.b;");
statement("case spvSwizzle::alpha:");
statement(" return x.a;");
end_scope();
end_scope();
statement("");
statement("// Wrapper function that swizzles texture samples and fetches.");
statement("template<typename T>");
statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)");
begin_scope();
statement("if (!s)");
statement(" return x;");
statement("return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), "
"spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) "
"& 0xFF)), "
"spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));");
end_scope();
statement("");
statement("template<typename T>");
statement("inline T spvTextureSwizzle(T x, uint s)");
begin_scope();
statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;");
end_scope();
statement("");
statement("// Wrapper function that swizzles texture gathers.");
statement("template<typename T, typename Tex, typename... Ts>");
statement(
"inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) "
"METAL_CONST_ARG(c)");
begin_scope();
statement("if (sw)");
begin_scope();
statement("switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))");
begin_scope();
statement("case spvSwizzle::none:");
statement(" break;");
statement("case spvSwizzle::zero:");
statement(" return vec<T, 4>(0, 0, 0, 0);");
statement("case spvSwizzle::one:");
statement(" return vec<T, 4>(1, 1, 1, 1);");
statement("case spvSwizzle::red:");
statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);");
statement("case spvSwizzle::green:");
statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);");
statement("case spvSwizzle::blue:");
statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);");
statement("case spvSwizzle::alpha:");
statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);");
end_scope();