Merge pull request #1143 from KhronosGroup/interlock

Merge SPV_EXT_fragment_shader_interlock with fixes
diff --git a/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 0000000..8923f96
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+    _9.Store(0, uint(0));
+    img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+    img[int2(0, 0)] = img3[int2(0, 0)];
+    uint _39;
+    InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+    _42.Store(0, uint(int(_42.Load(0)) + 42));
+    uint _55;
+    _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 0000000..f77b8ed
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    device Buffer3* m_9 [[id(0)]];
+    texture2d<float, access::write> img4 [[id(1)]];
+    texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+    texture2d<float> img3 [[id(3), raster_order_group(0)]];
+    volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
+    device Buffer2* m_44 [[id(5), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    (*spvDescriptorSet0.m_9).baz = 0;
+    spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+    (*spvDescriptorSet0.m_34).foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
+}
+
diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 0000000..803416c
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
+{
+    _9.baz = 0;
+    img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+    _34.foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
+}
+
diff --git a/reference/opt/shaders/frag/pixel-interlock-ordered.frag b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 0000000..46cca96
--- /dev/null
+++ b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/opt/shaders/frag/pixel-interlock-unordered.frag b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 0000000..d60cd14
--- /dev/null
+++ b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/opt/shaders/frag/sample-interlock-ordered.frag b/reference/opt/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 0000000..67ca556
--- /dev/null
+++ b/reference/opt/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/opt/shaders/frag/sample-interlock-unordered.frag b/reference/opt/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 0000000..ea74397
--- /dev/null
+++ b/reference/opt/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
new file mode 100644
index 0000000..3268995
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
@@ -0,0 +1,32 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RWByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _31 = int(gl_FragCoord.x);
+    _7.Store(_31 * 4 + 0, _7.Load(_31 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _39 = int(gl_FragCoord.x);
+    _9.Store(_39 * 4 + 0, _9.Load(_39 * 4 + 0) + 1u);
+    callee2();
+}
+
+void frag_main()
+{
+    callee();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
new file mode 100644
index 0000000..6927712
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
@@ -0,0 +1,42 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RWByteAddressBuffer _13 : register(u2, space0);
+RasterizerOrderedByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _44 = int(gl_FragCoord.x);
+    _7.Store(_44 * 4 + 0, _7.Load(_44 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _52 = int(gl_FragCoord.x);
+    _9.Store(_52 * 4 + 0, _9.Load(_52 * 4 + 0) + 1u);
+    callee2();
+    if (true)
+    {
+    }
+}
+
+void _35()
+{
+    _13.Store(int(gl_FragCoord.x) * 4 + 0, 4u);
+}
+
+void frag_main()
+{
+    callee();
+    _35();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
new file mode 100644
index 0000000..bd963a7
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
@@ -0,0 +1,42 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RasterizerOrderedByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _37 = int(gl_FragCoord.x);
+    _7.Store(_37 * 4 + 0, _7.Load(_37 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _45 = int(gl_FragCoord.x);
+    _9.Store(_45 * 4 + 0, _9.Load(_45 * 4 + 0) + 1u);
+    callee2();
+}
+
+void _29()
+{
+}
+
+void _31()
+{
+}
+
+void frag_main()
+{
+    callee();
+    _29();
+    _31();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
new file mode 100644
index 0000000..55b71de
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
@@ -0,0 +1,32 @@
+RasterizerOrderedByteAddressBuffer _14 : register(u1, space0);
+RasterizerOrderedByteAddressBuffer _35 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _25 = int(gl_FragCoord.x);
+    _14.Store(_25 * 4 + 0, _14.Load(_25 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _38 = int(gl_FragCoord.x);
+    _35.Store(_38 * 4 + 0, _35.Load(_38 * 4 + 0) + 1u);
+    callee2();
+}
+
+void frag_main()
+{
+    callee();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 0000000..8923f96
--- /dev/null
+++ b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+    _9.Store(0, uint(0));
+    img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+    img[int2(0, 0)] = img3[int2(0, 0)];
+    uint _39;
+    InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+    _42.Store(0, uint(int(_42.Load(0)) + 42));
+    uint _55;
+    _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
new file mode 100644
index 0000000..1b6af2a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+    int _31 = int(gl_FragCoord.x);
+    v_7.values1[_31]++;
+}
+
+inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+    int _39 = int(gl_FragCoord.x);
+    v_9.values0[_39]++;
+    callee2(gl_FragCoord, v_7);
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+    callee(gl_FragCoord, v_7, v_9);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
new file mode 100644
index 0000000..dded6a1
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct _12
+{
+    uint _m0[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+    int _44 = int(gl_FragCoord.x);
+    v_7.values1[_44]++;
+}
+
+inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+    int _52 = int(gl_FragCoord.x);
+    v_9.values0[_52]++;
+    callee2(gl_FragCoord, v_7);
+    if (true)
+    {
+    }
+}
+
+inline void _35(thread float4& gl_FragCoord, device _12& v_13)
+{
+    v_13._m0[int(gl_FragCoord.x)] = 4u;
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device _12& v_13 [[buffer(1)]], device SSBO0& v_9 [[buffer(2), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+    callee(gl_FragCoord, v_7, v_9);
+    _35(gl_FragCoord, v_13);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
new file mode 100644
index 0000000..5fe65f3
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
@@ -0,0 +1,45 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+    int _37 = int(gl_FragCoord.x);
+    v_7.values1[_37]++;
+}
+
+inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+    int _45 = int(gl_FragCoord.x);
+    v_9.values0[_45]++;
+    callee2(gl_FragCoord, v_7);
+}
+
+inline void _29()
+{
+}
+
+inline void _31()
+{
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+    callee(gl_FragCoord, v_7, v_9);
+    _29();
+    _31();
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
new file mode 100644
index 0000000..716ba25
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+inline void callee2(device SSBO1& v_14, thread float4& gl_FragCoord)
+{
+    int _25 = int(gl_FragCoord.x);
+    v_14.values1[_25]++;
+}
+
+inline void callee(device SSBO1& v_14, thread float4& gl_FragCoord, device SSBO0& v_35)
+{
+    int _38 = int(gl_FragCoord.x);
+    v_35.values0[_38]++;
+    callee2(v_14, gl_FragCoord);
+}
+
+fragment void main0(device SSBO1& v_14 [[buffer(0), raster_order_group(0)]], device SSBO0& v_35 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+    callee(v_14, gl_FragCoord, v_35);
+}
+
diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 0000000..f77b8ed
--- /dev/null
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    device Buffer3* m_9 [[id(0)]];
+    texture2d<float, access::write> img4 [[id(1)]];
+    texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+    texture2d<float> img3 [[id(3), raster_order_group(0)]];
+    volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
+    device Buffer2* m_44 [[id(5), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    (*spvDescriptorSet0.m_9).baz = 0;
+    spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+    (*spvDescriptorSet0.m_34).foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 0000000..803416c
--- /dev/null
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
+{
+    _9.baz = 0;
+    img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+    _34.foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
new file mode 100644
index 0000000..948803c
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
@@ -0,0 +1,39 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _7;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _9;
+
+void callee2()
+{
+    int _31 = int(gl_FragCoord.x);
+    _7.values1[_31]++;
+}
+
+void callee()
+{
+    int _39 = int(gl_FragCoord.x);
+    _9.values0[_39]++;
+    callee2();
+}
+
+void spvMainInterlockedBody()
+{
+    callee();
+}
+
+void main()
+{
+    // Interlocks were used in a way not compatible with GLSL, this is very slow.
+    beginInvocationInterlockARB();
+    spvMainInterlockedBody();
+    endInvocationInterlockARB();
+}
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
new file mode 100644
index 0000000..72dca0d
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
@@ -0,0 +1,53 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _7;
+
+layout(binding = 2, std430) buffer _12_13
+{
+    uint _m0[];
+} _13;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _9;
+
+void callee2()
+{
+    int _44 = int(gl_FragCoord.x);
+    _7.values1[_44]++;
+}
+
+void callee()
+{
+    int _52 = int(gl_FragCoord.x);
+    _9.values0[_52]++;
+    callee2();
+    if (true)
+    {
+    }
+}
+
+void _35()
+{
+    _13._m0[int(gl_FragCoord.x)] = 4u;
+}
+
+void spvMainInterlockedBody()
+{
+    callee();
+    _35();
+}
+
+void main()
+{
+    // Interlocks were used in a way not compatible with GLSL, this is very slow.
+    beginInvocationInterlockARB();
+    spvMainInterlockedBody();
+    endInvocationInterlockARB();
+}
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
new file mode 100644
index 0000000..b09eb66
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
@@ -0,0 +1,49 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _7;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _9;
+
+void callee2()
+{
+    int _37 = int(gl_FragCoord.x);
+    _7.values1[_37]++;
+}
+
+void callee()
+{
+    int _45 = int(gl_FragCoord.x);
+    _9.values0[_45]++;
+    callee2();
+}
+
+void _29()
+{
+}
+
+void _31()
+{
+}
+
+void spvMainInterlockedBody()
+{
+    callee();
+    _29();
+    _31();
+}
+
+void main()
+{
+    // Interlocks were used in a way not compatible with GLSL, this is very slow.
+    beginInvocationInterlockARB();
+    spvMainInterlockedBody();
+    endInvocationInterlockARB();
+}
diff --git a/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
new file mode 100644
index 0000000..151ed01
--- /dev/null
+++ b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
@@ -0,0 +1,34 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _14;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _35;
+
+void callee2()
+{
+    int _25 = int(gl_FragCoord.x);
+    _14.values1[_25]++;
+}
+
+void callee()
+{
+    int _38 = int(gl_FragCoord.x);
+    _35.values0[_38]++;
+    callee2();
+}
+
+void main()
+{
+    beginInvocationInterlockARB();
+    callee();
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/pixel-interlock-ordered.frag b/reference/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 0000000..46cca96
--- /dev/null
+++ b/reference/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/pixel-interlock-unordered.frag b/reference/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 0000000..d60cd14
--- /dev/null
+++ b/reference/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/sample-interlock-ordered.frag b/reference/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 0000000..67ca556
--- /dev/null
+++ b/reference/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+    endInvocationInterlockARB();
+}
+
diff --git a/reference/shaders/frag/sample-interlock-unordered.frag b/reference/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 0000000..ea74397
--- /dev/null
+++ b/reference/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
new file mode 100644
index 0000000..ebd8d6b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+               OpReturn
+               OpFunctionEnd
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+               OpBeginInvocationInterlockEXT
+         %43 = OpFunctionCall %void %callee2_
+               OpEndInvocationInterlockEXT
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
new file mode 100644
index 0000000..69b8f91
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %ssbo2 DescriptorSet 0
+               OpDecorate %ssbo2 Binding 2
+
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+      %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+          %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+	  %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%bool = OpTypeBool
+	%true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %callee3_res = OpFunctionCall %void %callee3_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	%calle3_block = OpLabel
+         %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+         %frag_coord_int = OpConvertFToS %int %frag_coord_x
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+		 OpStore %ssbo_ptr %uint_4
+	OpReturn
+	OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+
+		 OpSelectionMerge %merged_block None
+		 OpBranchConditional %true %dummy_block %merged_block
+		 %dummy_block = OpLabel
+		 	OpBeginInvocationInterlockEXT
+		 	OpEndInvocationInterlockEXT
+			OpBranch %merged_block
+
+			%merged_block = OpLabel
+               OpReturn
+
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
new file mode 100644
index 0000000..7c0fe9a
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %call3res = OpFunctionCall %void %callee3_
+		 %call4res = OpFunctionCall %void %callee4_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	      %begin3 = OpLabel
+               OpBeginInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee4_ = OpFunction %void None %3
+   	      %begin4 = OpLabel
+               OpEndInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
new file mode 100644
index 0000000..59079fe
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+	uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+	uint values1[];
+};
+
+void callee2()
+{
+	values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+	values0[int(gl_FragCoord.x)] += 1;
+	callee2();
+}
+
+void main()
+{
+	beginInvocationInterlockARB();
+	callee();
+	endInvocationInterlockARB();
+}
diff --git a/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 0000000..ceac8cc
--- /dev/null
+++ b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
new file mode 100644
index 0000000..ebd8d6b
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+               OpReturn
+               OpFunctionEnd
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+               OpBeginInvocationInterlockEXT
+         %43 = OpFunctionCall %void %callee2_
+               OpEndInvocationInterlockEXT
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
new file mode 100644
index 0000000..69b8f91
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %ssbo2 DescriptorSet 0
+               OpDecorate %ssbo2 Binding 2
+
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+      %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+          %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+	  %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%bool = OpTypeBool
+	%true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %callee3_res = OpFunctionCall %void %callee3_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	%calle3_block = OpLabel
+         %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+         %frag_coord_int = OpConvertFToS %int %frag_coord_x
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+		 OpStore %ssbo_ptr %uint_4
+	OpReturn
+	OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+
+		 OpSelectionMerge %merged_block None
+		 OpBranchConditional %true %dummy_block %merged_block
+		 %dummy_block = OpLabel
+		 	OpBeginInvocationInterlockEXT
+		 	OpEndInvocationInterlockEXT
+			OpBranch %merged_block
+
+			%merged_block = OpLabel
+               OpReturn
+
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
new file mode 100644
index 0000000..7c0fe9a
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %call3res = OpFunctionCall %void %callee3_
+		 %call4res = OpFunctionCall %void %callee4_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	      %begin3 = OpLabel
+               OpBeginInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee4_ = OpFunction %void None %3
+   	      %begin4 = OpLabel
+               OpEndInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
new file mode 100644
index 0000000..59079fe
--- /dev/null
+++ b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+	uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+	uint values1[];
+};
+
+void callee2()
+{
+	values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+	values0[int(gl_FragCoord.x)] += 1;
+	callee2();
+}
+
+void main()
+{
+	beginInvocationInterlockARB();
+	callee();
+	endInvocationInterlockARB();
+}
diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 0000000..04886a6
--- /dev/null
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+//layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	//imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 0000000..04886a6
--- /dev/null
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+//layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	//imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
new file mode 100644
index 0000000..ebd8d6b
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+               OpReturn
+               OpFunctionEnd
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+               OpBeginInvocationInterlockEXT
+         %43 = OpFunctionCall %void %callee2_
+               OpEndInvocationInterlockEXT
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
new file mode 100644
index 0000000..69b8f91
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %ssbo2 DescriptorSet 0
+               OpDecorate %ssbo2 Binding 2
+
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+      %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+          %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+	  %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%bool = OpTypeBool
+	%true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %callee3_res = OpFunctionCall %void %callee3_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	%calle3_block = OpLabel
+         %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+         %frag_coord_int = OpConvertFToS %int %frag_coord_x
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+		 OpStore %ssbo_ptr %uint_4
+	OpReturn
+	OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+
+		 OpSelectionMerge %merged_block None
+		 OpBranchConditional %true %dummy_block %merged_block
+		 %dummy_block = OpLabel
+		 	OpBeginInvocationInterlockEXT
+		 	OpEndInvocationInterlockEXT
+			OpBranch %merged_block
+
+			%merged_block = OpLabel
+               OpReturn
+
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
new file mode 100644
index 0000000..7c0fe9a
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %call3res = OpFunctionCall %void %callee3_
+		 %call4res = OpFunctionCall %void %callee4_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	      %begin3 = OpLabel
+               OpBeginInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee4_ = OpFunction %void None %3
+   	      %begin4 = OpLabel
+               OpEndInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
new file mode 100644
index 0000000..59079fe
--- /dev/null
+++ b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+	uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+	uint values1[];
+};
+
+void callee2()
+{
+	values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+	values0[int(gl_FragCoord.x)] += 1;
+	callee2();
+}
+
+void main()
+{
+	beginInvocationInterlockARB();
+	callee();
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/pixel-interlock-ordered.frag b/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 0000000..4439f06
--- /dev/null
+++ b/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/pixel-interlock-unordered.frag b/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 0000000..f8fd468
--- /dev/null
+++ b/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/sample-interlock-ordered.frag b/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 0000000..fa80dc9
--- /dev/null
+++ b/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, gl_SampleMaskIn[0]);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/sample-interlock-unordered.frag b/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 0000000..6fe5437
--- /dev/null
+++ b/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index a83e14e..c4913ec 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -1507,6 +1507,7 @@
 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
 {
 	handler.set_current_block(block);
+	handler.rearm_current_block(block);
 
 	// Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
 	// but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
@@ -1530,6 +1531,8 @@
 					return false;
 				if (!handler.end_function_scope(ops, i.length))
 					return false;
+
+				handler.rearm_current_block(block);
 			}
 		}
 	}
@@ -3798,7 +3801,12 @@
 const CFG &Compiler::get_cfg_for_current_function() const
 {
 	assert(current_function);
-	auto cfg_itr = function_cfgs.find(current_function->self);
+	return get_cfg_for_function(current_function->self);
+}
+
+const CFG &Compiler::get_cfg_for_function(uint32_t id) const
+{
+	auto cfg_itr = function_cfgs.find(id);
 	assert(cfg_itr != end(function_cfgs));
 	assert(cfg_itr->second);
 	return *cfg_itr->second;
@@ -4249,6 +4257,317 @@
 	sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
 }
 
+bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
+{
+	if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
+	{
+		if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
+		{
+			// Most complex case, we have no sensible way of dealing with this
+			// other than taking the 100% conservative approach, exit early.
+			split_function_case = true;
+			return false;
+		}
+		else
+		{
+			interlock_function_id = call_stack.back();
+			// If this call is performed inside control flow we have a problem.
+			auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
+
+			uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
+			bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
+			if (!outside_control_flow)
+				control_flow_interlock = true;
+		}
+	}
+	return true;
+}
+
+void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
+{
+	current_block_id = block.self;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+	if (length < 3)
+		return false;
+	call_stack.push_back(args[2]);
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+	call_stack.pop_back();
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+	if (length < 3)
+		return false;
+
+	if (args[2] == interlock_function_id)
+		call_stack_is_interlocked = true;
+
+	call_stack.push_back(args[2]);
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+	if (call_stack.back() == interlock_function_id)
+		call_stack_is_interlocked = false;
+
+	call_stack.pop_back();
+	return true;
+}
+
+void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
+{
+	if ((use_critical_section && in_crit_sec) ||
+	    (control_flow_interlock && call_stack_is_interlocked) ||
+	    split_function_case)
+	{
+		compiler.interlocked_resources.insert(id);
+	}
+}
+
+bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+	// Only care about critical section analysis if we have simple case.
+	if (use_critical_section)
+	{
+		if (opcode == OpBeginInvocationInterlockEXT)
+		{
+			in_crit_sec = true;
+			return true;
+		}
+
+		if (opcode == OpEndInvocationInterlockEXT)
+		{
+			// End critical section--nothing more to do.
+			return false;
+		}
+	}
+
+	// We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
+	switch (opcode)
+	{
+	case OpLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniformConstant:
+		{
+			uint32_t result_type = args[0];
+			uint32_t id = args[1];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			break;
+		}
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassStorageBuffer:
+			access_potential_resource(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	case OpPtrAccessChain:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+
+		auto &type = compiler.get<SPIRType>(result_type);
+		if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
+		    type.storage == StorageClassStorageBuffer)
+		{
+			uint32_t id = args[1];
+			uint32_t ptr = args[2];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			compiler.ir.ids[id].set_allow_type_rewrite();
+		}
+		break;
+	}
+
+	case OpImageTexelPointer:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var)
+			e.loaded_from = var->self;
+		break;
+	}
+
+	case OpStore:
+	case OpImageWrite:
+	case OpAtomicStore:
+	{
+		if (length < 1)
+			return false;
+
+		uint32_t ptr = args[0];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+		{
+			access_potential_resource(var->self);
+		}
+
+		break;
+	}
+
+	case OpCopyMemory:
+	{
+		if (length < 2)
+			return false;
+
+		uint32_t dst = args[0];
+		uint32_t src = args[1];
+		auto *dst_var = compiler.maybe_get_backing_variable(dst);
+		auto *src_var = compiler.maybe_get_backing_variable(src);
+
+		if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
+			access_potential_resource(dst_var->self);
+
+		if (src_var)
+		{
+			if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
+				break;
+
+			if (src_var->storage == StorageClassUniform &&
+			    !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
+			{
+				break;
+			}
+
+			access_potential_resource(src_var->self);
+		}
+
+		break;
+	}
+
+	case OpImageRead:
+	case OpAtomicLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassUniformConstant:
+		case StorageClassStorageBuffer:
+			access_potential_resource(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpAtomicExchange:
+	case OpAtomicCompareExchange:
+	case OpAtomicIIncrement:
+	case OpAtomicIDecrement:
+	case OpAtomicIAdd:
+	case OpAtomicISub:
+	case OpAtomicSMin:
+	case OpAtomicUMin:
+	case OpAtomicSMax:
+	case OpAtomicUMax:
+	case OpAtomicAnd:
+	case OpAtomicOr:
+	case OpAtomicXor:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+		{
+			access_potential_resource(var->self);
+		}
+
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return true;
+}
+
+void Compiler::analyze_interlocked_resource_usage()
+{
+	if (get_execution_model() == ExecutionModelFragment &&
+	    (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+	{
+		InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
+		traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
+
+		InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
+		handler.interlock_function_id = prepass_handler.interlock_function_id;
+		handler.split_function_case = prepass_handler.split_function_case;
+		handler.control_flow_interlock = prepass_handler.control_flow_interlock;
+		handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
+
+		traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+		// For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
+		interlocked_is_complex = !handler.use_critical_section ||
+		                         handler.interlock_function_id != ir.default_entry_point;
+	}
+}
+
 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
 {
 	if (!type.pointer)
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index 90cc995..28ccba6 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -710,6 +710,13 @@
 		{
 		}
 
+		// Called after returning from a function or when entering a block,
+		// can be called multiple times per block,
+		// while set_current_block is only called on block entry.
+		virtual void rearm_current_block(const SPIRBlock &)
+		{
+		}
+
 		virtual bool begin_function_scope(const uint32_t *, uint32_t)
 		{
 			return true;
@@ -884,10 +891,11 @@
 	void build_function_control_flow_graphs_and_analyze();
 	std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs;
 	const CFG &get_cfg_for_current_function() const;
+	const CFG &get_cfg_for_function(uint32_t id) const;
 
 	struct CFGBuilder : OpcodeHandler
 	{
-		CFGBuilder(Compiler &compiler_);
+		explicit CFGBuilder(Compiler &compiler_);
 
 		bool follow_function_call(const SPIRFunction &func) override;
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
@@ -932,7 +940,7 @@
 
 	struct PhysicalStorageBufferPointerHandler : OpcodeHandler
 	{
-		PhysicalStorageBufferPointerHandler(Compiler &compiler_);
+		explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_);
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
 		Compiler &compiler;
 		std::unordered_set<uint32_t> types;
@@ -945,6 +953,61 @@
 	                              bool single_function);
 	bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
 
+	// Finds all resources that are written to from inside the critical section, if present.
+	// The critical section is delimited by OpBeginInvocationInterlockEXT and
+	// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
+	// while inside the critical section must be placed in a raster order group.
+	struct InterlockedResourceAccessHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id)
+		    : compiler(compiler_)
+		{
+			call_stack.push_back(entry_point_id);
+		}
+
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+		bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		bool in_crit_sec = false;
+
+		uint32_t interlock_function_id = 0;
+		bool split_function_case = false;
+		bool control_flow_interlock = false;
+		bool use_critical_section = false;
+		bool call_stack_is_interlocked = false;
+		SmallVector<uint32_t> call_stack;
+
+		void access_potential_resource(uint32_t id);
+	};
+
+	struct InterlockedResourceAccessPrepassHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id)
+			: compiler(compiler_)
+		{
+			call_stack.push_back(entry_point_id);
+		}
+
+		void rearm_current_block(const SPIRBlock &block) override;
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+		bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		uint32_t interlock_function_id = 0;
+		uint32_t current_block_id = 0;
+		bool split_function_case = false;
+		bool control_flow_interlock = false;
+		SmallVector<uint32_t> call_stack;
+	};
+
+	void analyze_interlocked_resource_usage();
+	// The set of all resources written while inside the critical section, if present.
+	std::unordered_set<uint32_t> interlocked_resources;
+	bool interlocked_is_complex = false;
+
 	void make_constant_null(uint32_t id, uint32_t type);
 
 	std::unordered_map<uint32_t, std::string> declared_block_names;
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 3619f09..a8f7a86 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -511,6 +511,7 @@
 	fixup_image_load_store_access();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	analyze_interlocked_resource_usage();
 
 	// Shaders might cast unrelated data to pointers of non-block types.
 	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
@@ -535,6 +536,25 @@
 		pass_count++;
 	} while (is_forcing_recompilation());
 
+	// Implement the interlocked wrapper function at the end.
+	// The body was implemented in lieu of main().
+	if (interlocked_is_complex)
+	{
+		statement("void main()");
+		begin_scope();
+		statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
+		if (options.es)
+			statement("beginInvocationInterlockNV();");
+		else
+			statement("beginInvocationInterlockARB();");
+		statement("spvMainInterlockedBody();");
+		if (options.es)
+			statement("endInvocationInterlockNV();");
+		else
+			statement("endInvocationInterlockARB();");
+		end_scope();
+	}
+
 	// Entry point in GLSL is always main().
 	get_entry_point().name = "main";
 
@@ -605,6 +625,26 @@
 	if (execution.flags.get(ExecutionModePostDepthCoverage))
 		require_extension_internal("GL_ARB_post_depth_coverage");
 
+	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
+	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	    execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	    execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	    execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+	{
+		if (options.es)
+		{
+			if (options.version < 310)
+				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
+			require_extension_internal("GL_NV_fragment_shader_interlock");
+		}
+		else
+		{
+			if (options.version < 420)
+				require_extension_internal("GL_ARB_shader_image_load_store");
+			require_extension_internal("GL_ARB_fragment_shader_interlock");
+		}
+	}
+
 	for (auto &ext : forced_extensions)
 	{
 		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
@@ -784,6 +824,15 @@
 		if (execution.flags.get(ExecutionModePostDepthCoverage))
 			inputs.push_back("post_depth_coverage");
 
+		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
+			inputs.push_back("pixel_interlock_ordered");
+		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
+			inputs.push_back("pixel_interlock_unordered");
+		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
+			inputs.push_back("sample_interlock_ordered");
+		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+			inputs.push_back("sample_interlock_unordered");
+
 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
 			statement("layout(depth_greater) out float gl_FragDepth;");
 		else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
@@ -10109,6 +10158,34 @@
 		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
 		break;
 
+	case OpBeginInvocationInterlockEXT:
+		// If the interlock is complex, we emit this elsewhere.
+		if (!interlocked_is_complex)
+		{
+			if (options.es)
+				statement("beginInvocationInterlockNV();");
+			else
+				statement("beginInvocationInterlockARB();");
+
+			flush_all_active_variables();
+			// Make sure forwarding doesn't propagate outside interlock region.
+		}
+		break;
+
+	case OpEndInvocationInterlockEXT:
+		// If the interlock is complex, we emit this elsewhere.
+		if (!interlocked_is_complex)
+		{
+			if (options.es)
+				statement("endInvocationInterlockNV();");
+			else
+				statement("endInvocationInterlockARB();");
+
+			flush_all_active_variables();
+			// Make sure forwarding doesn't propagate outside interlock region.
+		}
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
@@ -11022,7 +11099,13 @@
 
 	if (func.self == ir.default_entry_point)
 	{
-		decl += "main";
+		// If we need complex fallback in GLSL, we just wrap main() in a function
+		// and interlock the entire shader ...
+		if (interlocked_is_complex)
+			decl += "spvMainInterlockedBody";
+		else
+			decl += "main";
+
 		processing_entry_point = true;
 	}
 	else
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 9658d11..0df8906 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -203,7 +203,7 @@
 	}
 }
 
-string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
+string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
 {
 	auto &imagetype = get<SPIRType>(type.image.type);
 	const char *dim = nullptr;
@@ -235,7 +235,12 @@
 		if (type.image.sampled == 1)
 			return join("Buffer<", type_to_glsl(imagetype), components, ">");
 		else if (type.image.sampled == 2)
+		{
+			if (interlocked_resources.count(id))
+				return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
+				            ">");
 			return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
+		}
 		else
 			SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
 	case DimSubpassData:
@@ -248,6 +253,8 @@
 	const char *arrayed = type.image.arrayed ? "Array" : "";
 	const char *ms = type.image.ms ? "MS" : "";
 	const char *rw = typed_load ? "RW" : "";
+	if (typed_load && interlocked_resources.count(id))
+		rw = "RasterizerOrdered";
 	return join(rw, "Texture", dim, ms, arrayed, "<",
 	            typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
 	                         join(type_to_glsl(imagetype), components),
@@ -1848,9 +1855,13 @@
 		Bitset flags = ir.get_buffer_block_flags(var);
 		bool is_readonly = flags.get(DecorationNonWritable);
 		bool is_coherent = flags.get(DecorationCoherent);
+		bool is_interlocked = interlocked_resources.count(var.self) > 0;
+		const char *type_name = "ByteAddressBuffer ";
+		if (!is_readonly)
+			type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
 		add_resource_name(var.self);
-		statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
-		          to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
+		statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
+		          to_resource_binding(var), ";");
 	}
 	else
 	{
@@ -4673,6 +4684,12 @@
 	case OpIsHelperInvocationEXT:
 		SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL.");
 
+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (hlsl_options.shader_model < 51)
+			SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
+		break; // Nothing to do in the body
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -4850,6 +4867,7 @@
 	validate_shader_model();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	analyze_interlocked_resource_usage();
 
 	// Subpass input needs SV_Position.
 	if (need_subpass_input)
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index f4ba6a4..8819952 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -852,6 +852,7 @@
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
 	analyze_sampled_image_usage();
+	analyze_interlocked_resource_usage();
 	preprocess_op_codes();
 	build_implicit_builtins();
 
@@ -5541,6 +5542,12 @@
 		emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
 		break;
 
+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (!msl_options.supports_msl_version(2, 0))
+			SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
+		break; // Nothing to do in the body
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -7436,8 +7443,15 @@
 	bool is_builtin = is_member_builtin(type, index, &builtin);
 
 	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
-		return join(" [[id(",
-		            get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]");
+	{
+		string quals = join(
+		    " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
+		if (interlocked_resources.count(
+		        get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
+			quals += ", raster_order_group(0)";
+		quals += "]]";
+		return quals;
+	}
 
 	// Vertex function inputs
 	if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@@ -8239,7 +8253,10 @@
 						ep_args += ", ";
 					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
 					           r.name + "_" + convert_to_string(i);
-					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
+					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
+					if (interlocked_resources.count(var_id))
+						ep_args += ", raster_order_group(0)";
+					ep_args += "]]";
 				}
 			}
 			else
@@ -8248,7 +8265,10 @@
 					ep_args += ", ";
 				ep_args +=
 				    get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
-				ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+				ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+				if (interlocked_resources.count(var_id))
+					ep_args += ", raster_order_group(0)";
+				ep_args += "]]";
 			}
 			break;
 		}
@@ -8264,7 +8284,10 @@
 			ep_args += image_type_glsl(type, var_id) + " " + r.name;
 			if (r.plane > 0)
 				ep_args += join(plane_name_suffix, r.plane);
-			ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
+			ep_args += " [[texture(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		default:
 			if (!ep_args.empty())
@@ -8274,7 +8297,10 @@
 				           type_to_glsl(type, var_id) + "& " + r.name;
 			else
 				ep_args += type_to_glsl(type, var_id) + " " + r.name;
-			ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+			ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		}
 	}