Merge pull request #821 from cdavis5e/pass-sampled-images
MSL: Fix passing a sampled image to a function.
diff --git a/main.cpp b/main.cpp
index 4e4605b..7899109 100644
--- a/main.cpp
+++ b/main.cpp
@@ -492,6 +492,7 @@
bool support_nonzero_baseinstance = true;
bool msl_swizzle_texture_samples = false;
bool msl_ios = false;
+ bool msl_pad_fragment_output = false;
vector<PLSArg> pls_in;
vector<PLSArg> pls_out;
vector<Remap> remaps;
@@ -546,6 +547,7 @@
"\t[--msl-version <MMmmpp>]\n"
"\t[--msl-swizzle-texture-samples]\n"
"\t[--msl-ios]\n"
+ "\t[--msl-pad-fragment-output]\n"
"\t[--hlsl]\n"
"\t[--reflect]\n"
"\t[--shader-model]\n"
@@ -714,6 +716,7 @@
cbs.add("--no-420pack-extension", [&args](CLIParser &) { args.use_420pack_extension = false; });
cbs.add("--msl-swizzle-texture-samples", [&args](CLIParser &) { args.msl_swizzle_texture_samples = true; });
cbs.add("--msl-ios", [&args](CLIParser &) { args.msl_ios = true; });
+ cbs.add("--msl-pad-fragment-output", [&args](CLIParser &) { args.msl_pad_fragment_output = true; });
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
auto old_name = parser.next_string();
@@ -843,6 +846,7 @@
msl_opts.swizzle_texture_samples = args.msl_swizzle_texture_samples;
if (args.msl_ios)
msl_opts.platform = CompilerMSL::Options::iOS;
+ msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output;
msl_comp->set_msl_options(msl_opts);
}
else if (args.hlsl)
diff --git a/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
new file mode 100644
index 0000000..53aafa5
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+ float4 FragColors_0 [[color(0)]];
+ float4 FragColors_1 [[color(1)]];
+ float4 FragColor2 [[color(2)]];
+ float4 FragColor3 [[color(3)]];
+};
+
+struct main0_in
+{
+ float3 vColor [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+ main0_out out = {};
+ float FragColors[2] = {};
+ float2 FragColor2 = {};
+ float3 FragColor3 = {};
+ FragColors[0] = in.vColor.x;
+ FragColors[1] = in.vColor.y;
+ FragColor2 = in.vColor.xz;
+ FragColor3 = in.vColor.zzz;
+ out.FragColors_0 = float4(FragColors[0]);
+ out.FragColors_1 = float4(FragColors[1]);
+ out.FragColor2 = FragColor2.xyyy;
+ out.FragColor3 = FragColor3.xyzz;
+ return out;
+}
+
diff --git a/reference/shaders-hlsl-no-opt/vert/pass-array-by-value.vert b/reference/shaders-hlsl-no-opt/vert/pass-array-by-value.vert
new file mode 100644
index 0000000..20afdb5
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/vert/pass-array-by-value.vert
@@ -0,0 +1,48 @@
+static const float4 _68[4] = { 0.0f.xxxx, 1.0f.xxxx, 2.0f.xxxx, 3.0f.xxxx };
+
+static float4 gl_Position;
+static int Index1;
+static int Index2;
+
+struct SPIRV_Cross_Input
+{
+ int Index1 : TEXCOORD0;
+ int Index2 : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+ float4 gl_Position : SV_Position;
+};
+
+float4 consume_constant_arrays2(float4 positions[4], float4 positions2[4])
+{
+ float4 indexable[4] = positions;
+ float4 indexable_1[4] = positions2;
+ return indexable[Index1] + indexable_1[Index2];
+}
+
+float4 consume_constant_arrays(float4 positions[4], float4 positions2[4])
+{
+ return consume_constant_arrays2(positions, positions2);
+}
+
+void vert_main()
+{
+ float4 LUT2[4];
+ LUT2[0] = 10.0f.xxxx;
+ LUT2[1] = 11.0f.xxxx;
+ LUT2[2] = 12.0f.xxxx;
+ LUT2[3] = 13.0f.xxxx;
+ gl_Position = consume_constant_arrays(_68, LUT2);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+ Index1 = stage_input.Index1;
+ Index2 = stage_input.Index2;
+ vert_main();
+ SPIRV_Cross_Output stage_output;
+ stage_output.gl_Position = gl_Position;
+ return stage_output;
+}
diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
new file mode 100644
index 0000000..ab56313
--- /dev/null
+++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
@@ -0,0 +1,60 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+
+struct main0_out
+{
+ float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+ int Index1 [[attribute(0)]];
+ int Index2 [[attribute(1)]];
+};
+
+// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
+template<typename T, uint N>
+void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
+{
+ for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+template<typename T, uint N>
+void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
+{
+ for (uint i = 0; i < N; dst[i] = src[i], i++);
+}
+
+float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+{
+ float4 indexable[4];
+ spvArrayCopyFromStack1(indexable, positions);
+ float4 indexable_1[4];
+ spvArrayCopyFromStack1(indexable_1, positions2);
+ return indexable[Index1] + indexable_1[Index2];
+}
+
+float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+{
+ return consume_constant_arrays2(positions, positions2, Index1, Index2);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+ float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+ main0_out out = {};
+ float4 LUT2[4];
+ LUT2[0] = float4(10.0);
+ LUT2[1] = float4(11.0);
+ LUT2[2] = float4(12.0);
+ LUT2[3] = float4(13.0);
+ out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2);
+ return out;
+}
+
diff --git a/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
new file mode 100644
index 0000000..2d339c4
--- /dev/null
+++ b/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
@@ -0,0 +1,42 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+ float4 FragColors_0 [[color(0)]];
+ float4 FragColors_1 [[color(1)]];
+ float4 FragColor2 [[color(2)]];
+ float4 FragColor3 [[color(3)]];
+};
+
+struct main0_in
+{
+ float3 vColor [[user(locn0)]];
+};
+
+void set_globals(thread float (&FragColors)[2], thread float3& vColor, thread float2& FragColor2, thread float3& FragColor3)
+{
+ FragColors[0] = vColor.x;
+ FragColors[1] = vColor.y;
+ FragColor2 = vColor.xz;
+ FragColor3 = vColor.zzz;
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+ main0_out out = {};
+ float FragColors[2] = {};
+ float2 FragColor2 = {};
+ float3 FragColor3 = {};
+ set_globals(FragColors, in.vColor, FragColor2, FragColor3);
+ out.FragColors_0 = float4(FragColors[0]);
+ out.FragColors_1 = float4(FragColors[1]);
+ out.FragColor2 = FragColor2.xyyy;
+ out.FragColor3 = FragColor3.xyzz;
+ return out;
+}
+
diff --git a/reference/shaders-no-opt/vert/pass-array-by-value.vert b/reference/shaders-no-opt/vert/pass-array-by-value.vert
new file mode 100644
index 0000000..45d4210
--- /dev/null
+++ b/reference/shaders-no-opt/vert/pass-array-by-value.vert
@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(location = 0) in int Index1;
+layout(location = 1) in int Index2;
+
+vec4 consume_constant_arrays2(vec4 positions[4], vec4 positions2[4])
+{
+ vec4 indexable[4] = positions;
+ vec4 indexable_1[4] = positions2;
+ return indexable[Index1] + indexable_1[Index2];
+}
+
+vec4 consume_constant_arrays(vec4 positions[4], vec4 positions2[4])
+{
+ return consume_constant_arrays2(positions, positions2);
+}
+
+void main()
+{
+ vec4 LUT2[4];
+ LUT2[0] = vec4(10.0);
+ LUT2[1] = vec4(11.0);
+ LUT2[2] = vec4(12.0);
+ LUT2[3] = vec4(13.0);
+ gl_Position = consume_constant_arrays(vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0)), LUT2);
+}
+
diff --git a/shaders-hlsl-no-opt/vert/pass-array-by-value.vert b/shaders-hlsl-no-opt/vert/pass-array-by-value.vert
new file mode 100644
index 0000000..2c142a7
--- /dev/null
+++ b/shaders-hlsl-no-opt/vert/pass-array-by-value.vert
@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(location = 0) in int Index1;
+layout(location = 1) in int Index2;
+
+vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
+{
+ return positions[Index1] + positions2[Index2];
+}
+
+vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
+{
+ return consume_constant_arrays2(positions, positions2);
+}
+
+const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
+
+void main()
+{
+ vec4 LUT2[4];
+ LUT2[0] = vec4(10.0);
+ LUT2[1] = vec4(11.0);
+ LUT2[2] = vec4(12.0);
+ LUT2[3] = vec4(13.0);
+ gl_Position = consume_constant_arrays(LUT1, LUT2);
+}
diff --git a/shaders-msl-no-opt/vert/pass-array-by-value.vert b/shaders-msl-no-opt/vert/pass-array-by-value.vert
new file mode 100644
index 0000000..2c142a7
--- /dev/null
+++ b/shaders-msl-no-opt/vert/pass-array-by-value.vert
@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(location = 0) in int Index1;
+layout(location = 1) in int Index2;
+
+vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
+{
+ return positions[Index1] + positions2[Index2];
+}
+
+vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
+{
+ return consume_constant_arrays2(positions, positions2);
+}
+
+const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
+
+void main()
+{
+ vec4 LUT2[4];
+ LUT2[0] = vec4(10.0);
+ LUT2[1] = vec4(11.0);
+ LUT2[2] = vec4(12.0);
+ LUT2[3] = vec4(13.0);
+ gl_Position = consume_constant_arrays(LUT1, LUT2);
+}
diff --git a/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
new file mode 100644
index 0000000..240c59b
--- /dev/null
+++ b/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
@@ -0,0 +1,18 @@
+#version 450
+layout(location = 0) out float FragColors[2];
+layout(location = 2) out vec2 FragColor2;
+layout(location = 3) out vec3 FragColor3;
+layout(location = 0) in vec3 vColor;
+
+void set_globals()
+{
+ FragColors[0] = vColor.x;
+ FragColors[1] = vColor.y;
+ FragColor2 = vColor.xz;
+ FragColor3 = vColor.zzz;
+}
+
+void main()
+{
+ set_globals();
+}
diff --git a/shaders-no-opt/vert/pass-array-by-value.vert b/shaders-no-opt/vert/pass-array-by-value.vert
new file mode 100644
index 0000000..2c142a7
--- /dev/null
+++ b/shaders-no-opt/vert/pass-array-by-value.vert
@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(location = 0) in int Index1;
+layout(location = 1) in int Index2;
+
+vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
+{
+ return positions[Index1] + positions2[Index2];
+}
+
+vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
+{
+ return consume_constant_arrays2(positions, positions2);
+}
+
+const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
+
+void main()
+{
+ vec4 LUT2[4];
+ LUT2[0] = vec4(10.0);
+ LUT2[1] = vec4(11.0);
+ LUT2[2] = vec4(12.0);
+ LUT2[3] = vec4(13.0);
+ gl_Position = consume_constant_arrays(LUT1, LUT2);
+}
diff --git a/spirv_common.hpp b/spirv_common.hpp
index c106008..57820d0 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -812,6 +812,11 @@
// Need to defer this, because they might rely on things which change during compilation.
std::vector<std::function<void()>> fixup_hooks_in;
+ // On function entry, make sure to copy a constant array into thread addr space to work around
+ // the case where we are passing a constant array by value to a function on backends which do not
+ // consider arrays value types.
+ std::vector<uint32_t> constant_arrays_needed_on_stack;
+
bool active = false;
bool flush_undeclared = true;
bool do_combined_parameters = true;
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index d9d04e1..1a3f08c 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -9605,6 +9605,14 @@
current_function = &func;
auto &entry_block = get<SPIRBlock>(func.entry_block);
+ sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
+ for (auto &array : func.constant_arrays_needed_on_stack)
+ {
+ auto &c = get<SPIRConstant>(array);
+ auto &type = get<SPIRType>(c.constant_type);
+ statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
+ }
+
for (auto &v : func.local_variables)
{
auto &var = get<SPIRVariable>(v);
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 0041491..4f81ae3 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -82,6 +82,11 @@
resource_bindings.push_back(&p_res_bindings[i]);
}
+void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
+{
+ fragment_output_components[location] = components;
+}
+
void CompilerMSL::build_implicit_builtins()
{
bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
@@ -779,6 +784,27 @@
p_va->used_by_shader = true;
}
+uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
+{
+ auto itr = fragment_output_components.find(location);
+ if (itr == end(fragment_output_components))
+ return 4;
+ else
+ return itr->second;
+}
+
+uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components)
+{
+ uint32_t new_type_id = ir.increase_bound_by(1);
+ auto &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
+ type.vecsize = components;
+ type.self = new_type_id;
+ type.parent_type = 0;
+ type.pointer = false;
+
+ return new_type_id;
+}
+
void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
SPIRType &ib_type, SPIRVariable &var)
{
@@ -793,6 +819,26 @@
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin);
var.basetype = type_id;
+
+ auto &type = get<SPIRType>(type_id);
+ uint32_t target_components = 0;
+ uint32_t type_components = type.vecsize;
+ bool padded_output = false;
+
+ // Check if we need to pad fragment output to match a certain number of components.
+ if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
+ get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
+ {
+ uint32_t locn = get_decoration(var.self, DecorationLocation);
+ target_components = get_target_components_for_fragment_location(locn);
+ if (type_components < target_components)
+ {
+ // Make a new type here.
+ type_id = build_extended_vector_type(type_id, target_components);
+ padded_output = true;
+ }
+ }
+
ib_type.member_types.push_back(get_pointee_type_id(type_id));
// Give the member a name
@@ -801,7 +847,20 @@
// Update the original variable reference to include the structure reference
string qual_var_name = ib_var_ref + "." + mbr_name;
- ir.meta[var.self].decoration.qualified_alias = qual_var_name;
+
+ if (padded_output)
+ {
+ auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+ entry_func.add_local_variable(var.self);
+ vars_needing_early_declaration.push_back(var.self);
+
+ entry_func.fixup_hooks_out.push_back([=, &var]() {
+ SPIRType &padded_type = this->get<SPIRType>(type_id);
+ statement(qual_var_name, " = ", remap_swizzle(padded_type, type_components, to_name(var.self)), ";");
+ });
+ }
+ else
+ ir.meta[var.self].decoration.qualified_alias = qual_var_name;
// Copy the variable location from the original variable to the member
if (get_decoration_bitset(var.self).get(DecorationLocation))
@@ -890,7 +949,26 @@
{
// Add a reference to the variable type to the interface struct.
uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
- ib_type.member_types.push_back(usable_type->self);
+
+ uint32_t target_components = 0;
+ bool padded_output = false;
+ uint32_t type_id = usable_type->self;
+
+ // Check if we need to pad fragment output to match a certain number of components.
+ if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
+ get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
+ {
+ uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
+ target_components = get_target_components_for_fragment_location(locn);
+ if (usable_type->vecsize < target_components)
+ {
+ // Make a new type here.
+ type_id = build_extended_vector_type(usable_type->self, target_components);
+ padded_output = true;
+ }
+ }
+
+ ib_type.member_types.push_back(get_pointee_type_id(type_id));
// Give the member a name
string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m");
@@ -930,12 +1008,21 @@
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back(
- [=]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); });
+ [=, &var]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); });
break;
case StorageClassOutput:
- entry_func.fixup_hooks_out.push_back(
- [=]() { statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];"); });
+ entry_func.fixup_hooks_out.push_back([=, &var]() {
+ if (padded_output)
+ {
+ auto &padded_type = this->get<SPIRType>(type_id);
+ statement(ib_var_ref, ".", mbr_name, " = ",
+ remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")),
+ ";");
+ }
+ else
+ statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];");
+ });
break;
default:
@@ -1053,14 +1140,14 @@
switch (storage)
{
case StorageClassInput:
- entry_func.fixup_hooks_in.push_back([=]() {
+ entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref, ".",
mbr_name, ";");
});
break;
case StorageClassOutput:
- entry_func.fixup_hooks_out.push_back([=]() {
+ entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx),
"[", i, "];");
});
@@ -1115,13 +1202,13 @@
switch (storage)
{
case StorageClassInput:
- entry_func.fixup_hooks_in.push_back([=]() {
+ entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";");
});
break;
case StorageClassOutput:
- entry_func.fixup_hooks_out.push_back([=]() {
+ entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";");
});
break;
@@ -3684,7 +3771,31 @@
// Manufacture automatic sampler arg for SampledImage texture.
string CompilerMSL::to_func_call_arg(uint32_t id)
{
- string arg_str = CompilerGLSL::to_func_call_arg(id);
+ string arg_str;
+
+ auto *c = maybe_get<SPIRConstant>(id);
+ if (c && !get<SPIRType>(c->constant_type).array.empty())
+ {
+ // If we are passing a constant array directly to a function for some reason,
+ // the callee will expect an argument in thread const address space
+ // (since we can only bind to arrays with references in MSL).
+ // To resolve this, we must emit a copy in this address space.
+ // This kind of code gen should be rare enough that performance is not a real concern.
+ // Inline the SPIR-V to avoid this kind of suboptimal codegen.
+ //
+ // We risk calling this inside a continue block (invalid code),
+ // so just create a thread local copy in the current function.
+ arg_str = join("_", id, "_array_copy");
+ auto &constants = current_function->constant_arrays_needed_on_stack;
+ auto itr = find(begin(constants), end(constants), id);
+ if (itr == end(constants))
+ {
+ force_recompile = true;
+ constants.push_back(id);
+ }
+ }
+ else
+ arg_str = CompilerGLSL::to_func_call_arg(id);
// Manufacture automatic sampler arg if the arg is a SampledImage texture.
auto &type = expression_type(id);
@@ -4514,8 +4625,26 @@
(storage == StorageClassFunction || storage == StorageClassGeneric))
{
// If the argument is a pure value and not an opaque type, we will pass by value.
- decl += " ";
- decl += to_expression(name_id);
+ if (is_array(type))
+ {
+ // We are receiving an array by value. This is problematic.
+ // We cannot be sure of the target address space since we are supposed to receive a copy,
+ // but this is not possible with MSL without some extra work.
+ // We will have to assume we're getting a reference in thread address space.
+ // If we happen to get a reference in constant address space, the caller must emit a copy and pass that.
+ // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from
+ // non-constant arrays, but we can create thread const from constant.
+ decl = string("thread const ") + decl;
+ decl += " (&";
+ decl += to_expression(name_id);
+ decl += ")";
+ decl += type_to_array_glsl(type);
+ }
+ else
+ {
+ decl += " ";
+ decl += to_expression(name_id);
+ }
}
else if (is_array(type) && !type_is_image)
{
@@ -4581,12 +4710,254 @@
// FIXME: MSL and GLSL are doing two different things here.
// Agree on convention and remove this override.
static const unordered_set<string> keywords = {
- "kernel", "vertex", "fragment", "compute", "bias",
+ "kernel",
+ "vertex",
+ "fragment",
+ "compute",
+ "bias",
+ "assert",
+ "VARIABLE_TRACEPOINT",
+ "STATIC_DATA_TRACEPOINT",
+ "STATIC_DATA_TRACEPOINT_V",
+ "METAL_ALIGN",
+ "METAL_ASM",
+ "METAL_CONST",
+ "METAL_DEPRECATED",
+ "METAL_ENABLE_IF",
+ "METAL_FUNC",
+ "METAL_INTERNAL",
+ "METAL_NON_NULL_RETURN",
+ "METAL_NORETURN",
+ "METAL_NOTHROW",
+ "METAL_PURE",
+ "METAL_UNAVAILABLE",
+ "METAL_IMPLICIT",
+ "METAL_EXPLICIT",
+ "METAL_CONST_ARG",
+ "METAL_ARG_UNIFORM",
+ "METAL_ZERO_ARG",
+ "METAL_VALID_LOD_ARG",
+ "METAL_VALID_LEVEL_ARG",
+ "METAL_VALID_STORE_ORDER",
+ "METAL_VALID_LOAD_ORDER",
+ "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER",
+ "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS",
+ "METAL_VALID_RENDER_TARGET",
+ "is_function_constant_defined",
+ "CHAR_BIT",
+ "SCHAR_MAX",
+ "SCHAR_MIN",
+ "UCHAR_MAX",
+ "CHAR_MAX",
+ "CHAR_MIN",
+ "USHRT_MAX",
+ "SHRT_MAX",
+ "SHRT_MIN",
+ "UINT_MAX",
+ "INT_MAX",
+ "INT_MIN",
+ "FLT_DIG",
+ "FLT_MANT_DIG",
+ "FLT_MAX_10_EXP",
+ "FLT_MAX_EXP",
+ "FLT_MIN_10_EXP",
+ "FLT_MIN_EXP",
+ "FLT_RADIX",
+ "FLT_MAX",
+ "FLT_MIN",
+ "FLT_EPSILON",
+ "FP_ILOGB0",
+ "FP_ILOGBNAN",
+ "MAXFLOAT",
+ "HUGE_VALF",
+ "INFINITY",
+ "NAN",
+ "M_E_F",
+ "M_LOG2E_F",
+ "M_LOG10E_F",
+ "M_LN2_F",
+ "M_LN10_F",
+ "M_PI_F",
+ "M_PI_2_F",
+ "M_PI_4_F",
+ "M_1_PI_F",
+ "M_2_PI_F",
+ "M_2_SQRTPI_F",
+ "M_SQRT2_F",
+ "M_SQRT1_2_F",
+ "HALF_DIG",
+ "HALF_MANT_DIG",
+ "HALF_MAX_10_EXP",
+ "HALF_MAX_EXP",
+ "HALF_MIN_10_EXP",
+ "HALF_MIN_EXP",
+ "HALF_RADIX",
+ "HALF_MAX",
+ "HALF_MIN",
+ "HALF_EPSILON",
+ "MAXHALF",
+ "HUGE_VALH",
+ "M_E_H",
+ "M_LOG2E_H",
+ "M_LOG10E_H",
+ "M_LN2_H",
+ "M_LN10_H",
+ "M_PI_H",
+ "M_PI_2_H",
+ "M_PI_4_H",
+ "M_1_PI_H",
+ "M_2_PI_H",
+ "M_2_SQRTPI_H",
+ "M_SQRT2_H",
+ "M_SQRT1_2_H",
+ "DBL_DIG",
+ "DBL_MANT_DIG",
+ "DBL_MAX_10_EXP",
+ "DBL_MAX_EXP",
+ "DBL_MIN_10_EXP",
+ "DBL_MIN_EXP",
+ "DBL_RADIX",
+ "DBL_MAX",
+ "DBL_MIN",
+ "DBL_EPSILON",
+ "HUGE_VAL",
+ "M_E",
+ "M_LOG2E",
+ "M_LOG10E",
+ "M_LN2",
+ "M_LN10",
+ "M_PI",
+ "M_PI_2",
+ "M_PI_4",
+ "M_1_PI",
+ "M_2_PI",
+ "M_2_SQRTPI",
+ "M_SQRT2",
+ "M_SQRT1_2",
};
static const unordered_set<string> illegal_func_names = {
"main",
"saturate",
+ "assert",
+ "VARIABLE_TRACEPOINT",
+ "STATIC_DATA_TRACEPOINT",
+ "STATIC_DATA_TRACEPOINT_V",
+ "METAL_ALIGN",
+ "METAL_ASM",
+ "METAL_CONST",
+ "METAL_DEPRECATED",
+ "METAL_ENABLE_IF",
+ "METAL_FUNC",
+ "METAL_INTERNAL",
+ "METAL_NON_NULL_RETURN",
+ "METAL_NORETURN",
+ "METAL_NOTHROW",
+ "METAL_PURE",
+ "METAL_UNAVAILABLE",
+ "METAL_IMPLICIT",
+ "METAL_EXPLICIT",
+ "METAL_CONST_ARG",
+ "METAL_ARG_UNIFORM",
+ "METAL_ZERO_ARG",
+ "METAL_VALID_LOD_ARG",
+ "METAL_VALID_LEVEL_ARG",
+ "METAL_VALID_STORE_ORDER",
+ "METAL_VALID_LOAD_ORDER",
+ "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER",
+ "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS",
+ "METAL_VALID_RENDER_TARGET",
+ "is_function_constant_defined",
+ "CHAR_BIT",
+ "SCHAR_MAX",
+ "SCHAR_MIN",
+ "UCHAR_MAX",
+ "CHAR_MAX",
+ "CHAR_MIN",
+ "USHRT_MAX",
+ "SHRT_MAX",
+ "SHRT_MIN",
+ "UINT_MAX",
+ "INT_MAX",
+ "INT_MIN",
+ "FLT_DIG",
+ "FLT_MANT_DIG",
+ "FLT_MAX_10_EXP",
+ "FLT_MAX_EXP",
+ "FLT_MIN_10_EXP",
+ "FLT_MIN_EXP",
+ "FLT_RADIX",
+ "FLT_MAX",
+ "FLT_MIN",
+ "FLT_EPSILON",
+ "FP_ILOGB0",
+ "FP_ILOGBNAN",
+ "MAXFLOAT",
+ "HUGE_VALF",
+ "INFINITY",
+ "NAN",
+ "M_E_F",
+ "M_LOG2E_F",
+ "M_LOG10E_F",
+ "M_LN2_F",
+ "M_LN10_F",
+ "M_PI_F",
+ "M_PI_2_F",
+ "M_PI_4_F",
+ "M_1_PI_F",
+ "M_2_PI_F",
+ "M_2_SQRTPI_F",
+ "M_SQRT2_F",
+ "M_SQRT1_2_F",
+ "HALF_DIG",
+ "HALF_MANT_DIG",
+ "HALF_MAX_10_EXP",
+ "HALF_MAX_EXP",
+ "HALF_MIN_10_EXP",
+ "HALF_MIN_EXP",
+ "HALF_RADIX",
+ "HALF_MAX",
+ "HALF_MIN",
+ "HALF_EPSILON",
+ "MAXHALF",
+ "HUGE_VALH",
+ "M_E_H",
+ "M_LOG2E_H",
+ "M_LOG10E_H",
+ "M_LN2_H",
+ "M_LN10_H",
+ "M_PI_H",
+ "M_PI_2_H",
+ "M_PI_4_H",
+ "M_1_PI_H",
+ "M_2_PI_H",
+ "M_2_SQRTPI_H",
+ "M_SQRT2_H",
+ "M_SQRT1_2_H",
+ "DBL_DIG",
+ "DBL_MANT_DIG",
+ "DBL_MAX_10_EXP",
+ "DBL_MAX_EXP",
+ "DBL_MIN_10_EXP",
+ "DBL_MIN_EXP",
+ "DBL_RADIX",
+ "DBL_MAX",
+ "DBL_MIN",
+ "DBL_EPSILON",
+ "HUGE_VAL",
+ "M_E",
+ "M_LOG2E",
+ "M_LOG10E",
+ "M_LN2",
+ "M_LN10",
+ "M_PI",
+ "M_PI_2",
+ "M_PI_4",
+ "M_1_PI",
+ "M_2_PI",
+ "M_2_SQRTPI",
+ "M_SQRT2",
+ "M_SQRT1_2",
};
ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &) {
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index eff1b25..f4520dd 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -168,6 +168,10 @@
bool disable_rasterization = false;
bool swizzle_texture_samples = false;
+ // Fragment output in MSL must have at least as many components as the render pass.
+ // Add support to explicit pad out components.
+ bool pad_fragment_output_components = false;
+
bool is_ios()
{
return platform == iOS;
@@ -312,6 +316,10 @@
// The remapped sampler must not be an array of samplers.
void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler);
+ // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect
+ // to use for a particular location. The default is 4 if number of components is not overridden.
+ void set_fragment_output_components(uint32_t location, uint32_t components);
+
protected:
void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
void emit_instruction(const Instruction &instr) override;
@@ -428,6 +436,7 @@
Options msl_options;
std::set<SPVFuncImpl> spv_function_implementations;
std::unordered_map<uint32_t, MSLVertexAttr *> vtx_attrs_by_location;
+ std::unordered_map<uint32_t, uint32_t> fragment_output_components;
std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
std::set<std::string> pragma_lines;
std::set<std::string> typedef_lines;
@@ -451,6 +460,9 @@
std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers;
std::vector<uint32_t> buffer_arrays;
+ uint32_t get_target_components_for_fragment_location(uint32_t location) const;
+ uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components);
+
// OpcodeHandler that handles several MSL preprocessing operations.
struct OpCodePreprocessor : OpcodeHandler
{
diff --git a/test_shaders.py b/test_shaders.py
index 045c255..92fe5c4 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -129,7 +129,7 @@
raise
except subprocess.CalledProcessError:
print('Error compiling Metal shader: ' + msl_path)
- sys.exit(1)
+ raise RuntimeError('Failed to compile Metal shader')
def cross_compile_msl(shader, spirv, opt):
spirv_path = create_temporary()
@@ -152,6 +152,8 @@
msl_args.append('--msl-swizzle-texture-samples')
if '.ios.' in shader:
msl_args.append('--msl-ios')
+ if '.pad-fragment.' in shader:
+ msl_args.append('--msl-pad-fragment-output')
subprocess.check_call(msl_args)
@@ -201,12 +203,15 @@
subprocess.check_call(['fxc', '-nologo', shader_model_hlsl(shader), win_path])
except OSError as oe:
if (oe.errno != errno.ENOENT): # Ignore not found errors
+ print('Failed to run FXC.')
+ ignore_fxc = True
raise
else:
+ print('Could not find FXC.')
ignore_fxc = True
except subprocess.CalledProcessError:
print('Failed compiling HLSL shader:', shader, 'with FXC.')
- sys.exit(1)
+ raise RuntimeError('Failed compiling HLSL shader')
def shader_to_sm(shader):
if '.sm60.' in shader:
@@ -382,7 +387,8 @@
# Otherwise, fail the test. Keep the shader file around so we can inspect.
if not keep:
remove_file(json_file)
- sys.exit(1)
+
+ raise RuntimeError('Does not match reference')
else:
remove_file(json_file)
else:
@@ -417,7 +423,7 @@
# Otherwise, fail the test. Keep the shader file around so we can inspect.
if not keep:
remove_file(glsl)
- sys.exit(1)
+ raise RuntimeError('Does not match reference')
else:
remove_file(glsl)
else:
@@ -533,14 +539,18 @@
remove_file(spirv)
def test_shader_file(relpath, stats, shader_dir, update, keep, opt, force_no_external_validation, backend):
- if backend == 'msl':
- test_shader_msl(stats, (shader_dir, relpath), update, keep, opt, force_no_external_validation)
- elif backend == 'hlsl':
- test_shader_hlsl(stats, (shader_dir, relpath), update, keep, opt, force_no_external_validation)
- elif backend == 'reflect':
- test_shader_reflect(stats, (shader_dir, relpath), update, keep, opt)
- else:
- test_shader(stats, (shader_dir, relpath), update, keep, opt)
+ try:
+ if backend == 'msl':
+ test_shader_msl(stats, (shader_dir, relpath), update, keep, opt, force_no_external_validation)
+ elif backend == 'hlsl':
+ test_shader_hlsl(stats, (shader_dir, relpath), update, keep, opt, force_no_external_validation)
+ elif backend == 'reflect':
+ test_shader_reflect(stats, (shader_dir, relpath), update, keep, opt)
+ else:
+ test_shader(stats, (shader_dir, relpath), update, keep, opt)
+ return None
+ except Exception as e:
+ return e
def test_shaders_helper(stats, backend, args):
all_files = []
@@ -555,17 +565,27 @@
# at this point we need to switch to explicit arguments
if args.parallel:
pool = multiprocessing.Pool(multiprocessing.cpu_count())
- pool.map(partial(test_shader_file,
- stats = stats,
- shader_dir = args.folder,
- update = args.update,
- keep = args.keep,
- opt = args.opt,
- force_no_external_validation = args.force_no_external_validation,
- backend = backend), all_files)
+
+ results = []
+ for f in all_files:
+ results.append(pool.apply_async(test_shader_file,
+ args = (f, stats,
+ args.folder, args.update, args.keep, args.opt, args.force_no_external_validation,
+ backend)))
+
+ for res in results:
+ error = res.get()
+ if error is not None:
+ pool.close()
+ pool.join()
+ print('Error:', error)
+ sys.exit(1)
else:
for i in all_files:
- test_shader_file(i, stats, args.folder, args.update, args.keep, args.opt, args.force_no_external_validation, backend)
+ e = test_shader_file(i, stats, args.folder, args.update, args.keep, args.opt, args.force_no_external_validation, backend)
+ if e is not None:
+ print('Error:', e)
+ sys.exit(1)
def test_shaders(backend, args):
if args.malisc: