AST/SPV: Fix #930: translate uvec4 <-> uint64 for SubgroupGeMask et. al.

On reading built-in variables SubgroupEqMask, SubgroupGeMask, SubgroupGtMask,
SubgroupLeMask, and SubgroupLtMask, the AST expects 64-bit ints, while SPIR-V
is defined as vectors of 32-bit ints.

The declaration type has to be translated in the opposite direction.
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 4f58c4f..16d90b8 100644
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -138,7 +138,7 @@
     spv::LoopControlMask TranslateLoopControl(const glslang::TIntermLoop&, std::vector<unsigned int>& operands) const;
     spv::StorageClass TranslateStorageClass(const glslang::TType&);
     void addIndirectionIndexCapabilities(const glslang::TType& baseType, const glslang::TType& indexType);
-    spv::Id createSpvVariable(const glslang::TIntermSymbol*);
+    spv::Id createSpvVariable(const glslang::TIntermSymbol*, spv::Id forcedType);
     spv::Id getSampledType(const glslang::TSampler&);
     spv::Id getInvertedSwizzleType(const glslang::TIntermTyped&);
     spv::Id createInvertedSwizzle(spv::Decoration precision, const glslang::TIntermTyped&, spv::Id parentResult);
@@ -208,6 +208,8 @@
         if (builder.getSpvVersion() < glslang::EShTargetSpv_1_3)
             builder.addExtension(ext);
     }
+    std::pair<spv::Id, spv::Id> getForcedType(spv::BuiltIn, const glslang::TType&);
+    spv::Id translateForcedType(spv::Id object);
 
     glslang::SpvOptions& options;
     spv::Function* shaderEntry;
@@ -238,6 +240,10 @@
     std::unordered_map<std::string, const glslang::TIntermSymbol*> counterOriginator;
     // Map pointee types for EbtReference to their forward pointers
     std::map<const glslang::TType *, spv::Id> forwardPointers;
+    // Type forcing, for when SPIR-V wants a different type than the AST,
+    // requiring local translation to and from SPIR-V type on every access.
+    // Maps <builtin-variable-id -> AST-required-type-id>
+    std::unordered_map<spv::Id, spv::Id> forceType;
 };
 
 //
@@ -733,27 +739,27 @@
     case glslang::EbvSubGroupEqMask:
         builder.addExtension(spv::E_SPV_KHR_shader_ballot);
         builder.addCapability(spv::CapabilitySubgroupBallotKHR);
-        return spv::BuiltInSubgroupEqMaskKHR;
+        return spv::BuiltInSubgroupEqMask;
 
     case glslang::EbvSubGroupGeMask:
         builder.addExtension(spv::E_SPV_KHR_shader_ballot);
         builder.addCapability(spv::CapabilitySubgroupBallotKHR);
-        return spv::BuiltInSubgroupGeMaskKHR;
+        return spv::BuiltInSubgroupGeMask;
 
     case glslang::EbvSubGroupGtMask:
         builder.addExtension(spv::E_SPV_KHR_shader_ballot);
         builder.addCapability(spv::CapabilitySubgroupBallotKHR);
-        return spv::BuiltInSubgroupGtMaskKHR;
+        return spv::BuiltInSubgroupGtMask;
 
     case glslang::EbvSubGroupLeMask:
         builder.addExtension(spv::E_SPV_KHR_shader_ballot);
         builder.addCapability(spv::CapabilitySubgroupBallotKHR);
-        return spv::BuiltInSubgroupLeMaskKHR;
+        return spv::BuiltInSubgroupLeMask;
 
     case glslang::EbvSubGroupLtMask:
         builder.addExtension(spv::E_SPV_KHR_shader_ballot);
         builder.addCapability(spv::CapabilitySubgroupBallotKHR);
-        return spv::BuiltInSubgroupLtMaskKHR;
+        return spv::BuiltInSubgroupLtMask;
 
     case glslang::EbvNumSubgroups:
         builder.addCapability(spv::CapabilityGroupNonUniform);
@@ -795,6 +801,7 @@
         builder.addCapability(spv::CapabilityGroupNonUniform);
         builder.addCapability(spv::CapabilityGroupNonUniformBallot);
         return spv::BuiltInSubgroupLtMask;
+
 #ifdef AMD_EXTENSIONS
     case glslang::EbvBaryCoordNoPersp:
         builder.addExtension(spv::E_SPV_AMD_shader_explicit_vertex_parameter);
@@ -1620,8 +1627,8 @@
     // Formal function parameters were mapped during makeFunctions().
     spv::Id id = getSymbolId(symbol);
 
-    // Include all "static use" and "linkage only" interface variables on the OpEntryPoint instruction
     if (builder.isPointer(id)) {
+        // Include all "static use" and "linkage only" interface variables on the OpEntryPoint instruction
         // Consider adding to the OpEntryPoint interface list.
         // Only looking at structures if they have at least one member.
         if (!symbol->getType().isStruct() || symbol->getType().getStruct()->size() > 0) {
@@ -1633,6 +1640,14 @@
                 iOSet.insert(id);
             }
         }
+
+        // If the SPIR-V type is required to be different than the AST type,
+        // translate now from the SPIR-V type to the AST type, for the consuming
+        // operation.
+        // Note this turns it from an l-value to an r-value.
+        // Currently, all symbols needing this are inputs; avoid the map lookup when non-input.
+        if (symbol->getType().getQualifier().storage == glslang::EvqVaryingIn)
+            id = translateForcedType(id);
     }
 
     // Only process non-linkage-only nodes for generating actual static uses
@@ -1650,8 +1665,10 @@
         //    See comments in handleUserFunctionCall().
         // B) Specialization constants (normal constants don't even come in as a variable),
         //    These are also pure R-values.
+        // C) R-Values from type translation, see above call to translateForcedType()
         glslang::TQualifier qualifier = symbol->getQualifier();
-        if (qualifier.isSpecConstant() || rValueParameters.find(symbol->getId()) != rValueParameters.end())
+        if (qualifier.isSpecConstant() || rValueParameters.find(symbol->getId()) != rValueParameters.end() ||
+            !builder.isPointerType(builder.getTypeId(id)))
             builder.setAccessChainRValue(id);
         else
             builder.setAccessChainLValue(id);
@@ -1908,6 +1925,71 @@
     }
 }
 
+// Figure out what, if any, type changes are needed when accessing a specific built-in.
+// Returns <the type SPIR-V requires for declarion, the type to translate to on use>.
+// Also see comment for 'forceType', regarding tracking SPIR-V-required types.
+std::pair<spv::Id, spv::Id> TGlslangToSpvTraverser::getForcedType(spv::BuiltIn builtIn,
+    const glslang::TType& glslangType)
+{
+    switch(builtIn)
+    {
+        case spv::BuiltInSubgroupEqMask:
+        case spv::BuiltInSubgroupGeMask:
+        case spv::BuiltInSubgroupGtMask:
+        case spv::BuiltInSubgroupLeMask:
+        case spv::BuiltInSubgroupLtMask: {
+            // these require changing a 64-bit scaler -> a vector of 32-bit components
+            if (glslangType.isVector())
+                break;
+            std::pair<spv::Id, spv::Id> ret(builder.makeVectorType(builder.makeUintType(32), 4),
+                                            builder.makeUintType(64));
+            return ret;
+        }
+        default:
+            break;
+    }
+
+    std::pair<spv::Id, spv::Id> ret(spv::NoType, spv::NoType);
+    return ret;
+}
+
+// For an object previously identified (see getForcedType() and forceType)
+// as needing type translations, do the translation needed for a load, turning
+// an L-value into in R-value.
+spv::Id TGlslangToSpvTraverser::translateForcedType(spv::Id object)
+{
+    const auto forceIt = forceType.find(object);
+    if (forceIt == forceType.end())
+        return object;
+
+    spv::Id desiredTypeId = forceIt->second;
+    spv::Id objectTypeId = builder.getTypeId(object);
+    assert(builder.isPointerType(objectTypeId));
+    objectTypeId = builder.getContainedTypeId(objectTypeId);
+    if (builder.isVectorType(objectTypeId) &&
+        builder.getScalarTypeWidth(builder.getContainedTypeId(objectTypeId)) == 32) {
+        if (builder.getScalarTypeWidth(desiredTypeId) == 64) {
+            // handle 32-bit v.xy* -> 64-bit
+            builder.clearAccessChain();
+            builder.setAccessChainLValue(object);
+            object = builder.accessChainLoad(spv::NoPrecision, spv::DecorationMax, objectTypeId);
+            std::vector<spv::Id> components;
+            components.push_back(builder.createCompositeExtract(object, builder.getContainedTypeId(objectTypeId), 0));
+            components.push_back(builder.createCompositeExtract(object, builder.getContainedTypeId(objectTypeId), 1));
+
+            spv::Id vecType = builder.makeVectorType(builder.getContainedTypeId(objectTypeId), 2);
+            return builder.createUnaryOp(spv::OpBitcast, desiredTypeId,
+                                         builder.createCompositeConstruct(vecType, components));
+        } else {
+            logger->missingFunctionality("forcing 32-bit vector type to non 64-bit scalar");
+        }
+    } else {
+        logger->missingFunctionality("forcing non 32-bit vector type");
+    }
+
+    return object;
+}
+
 bool TGlslangToSpvTraverser::visitUnary(glslang::TVisit /* visit */, glslang::TIntermUnary* node)
 {
     builder.setLine(node->getLoc().line, node->getLoc().getFilename());
@@ -3037,7 +3119,7 @@
     return false;
 }
 
-spv::Id TGlslangToSpvTraverser::createSpvVariable(const glslang::TIntermSymbol* node)
+spv::Id TGlslangToSpvTraverser::createSpvVariable(const glslang::TIntermSymbol* node, spv::Id forcedType)
 {
     // First, steer off constants, which are not SPIR-V variables, but
     // can still have a mapping to a SPIR-V Id.
@@ -3050,7 +3132,8 @@
 
     // Now, handle actual variables
     spv::StorageClass storageClass = TranslateStorageClass(node->getType());
-    spv::Id spvType = convertGlslangToSpvType(node->getType());
+    spv::Id spvType = forcedType == spv::NoType ? convertGlslangToSpvType(node->getType())
+                                                : forcedType;
 
     const bool contains16BitType = node->getType().containsBasicType(glslang::EbtFloat16) ||
                                    node->getType().containsBasicType(glslang::EbtInt16)   ||
@@ -7543,8 +7626,12 @@
     }
 
     // it was not found, create it
-    id = createSpvVariable(symbol);
+    spv::BuiltIn builtIn = TranslateBuiltInDecoration(symbol->getQualifier().builtIn, false);
+    auto forcedType = getForcedType(builtIn, symbol->getType());
+    id = createSpvVariable(symbol, forcedType.first);
     symbolValues[symbol->getId()] = id;
+    if (forcedType.second != spv::NoType)
+        forceType[id] = forcedType.second;
 
     if (symbol->getBasicType() != glslang::EbtBlock) {
         builder.addDecoration(id, TranslatePrecisionDecoration(symbol->getType()));
@@ -7604,10 +7691,10 @@
             builder.addDecoration(id, memory[i]);
     }
 
-    // built-in variable decorations
-    spv::BuiltIn builtIn = TranslateBuiltInDecoration(symbol->getQualifier().builtIn, false);
-    if (builtIn != spv::BuiltInMax)
+    // add built-in variable decoration
+    if (builtIn != spv::BuiltInMax) {
         builder.addDecoration(id, spv::DecorationBuiltIn, (int)builtIn);
+    }
 
     // nonuniform
     builder.addDecoration(id, TranslateNonUniformDecoration(symbol->getType().getQualifier()));
diff --git a/Test/baseResults/spv.shaderBallot.comp.out b/Test/baseResults/spv.shaderBallot.comp.out
index d25eec7..1c616ee 100644
--- a/Test/baseResults/spv.shaderBallot.comp.out
+++ b/Test/baseResults/spv.shaderBallot.comp.out
@@ -1,8 +1,7 @@
 spv.shaderBallot.comp
-Validation failed
 // Module Version 10000
 // Generated by (magic number): 80007
-// Id's are bound by 298
+// Id's are bound by 318
 
                               Capability Shader
                               Capability Int64
@@ -10,7 +9,7 @@
                               Extension  "SPV_KHR_shader_ballot"
                1:             ExtInstImport  "GLSL.std.450"
                               MemoryModel Logical GLSL450
-                              EntryPoint GLCompute 4  "main" 10 12 21 23 26 29 32
+                              EntryPoint GLCompute 4  "main" 10 12 22 29 36 43 50
                               ExecutionMode 4 LocalSize 8 8 1
                               Source GLSL 450
                               SourceExtension  "GL_ARB_gpu_shader_int64"
@@ -20,30 +19,30 @@
                               Name 10  "gl_SubGroupInvocationARB"
                               Name 12  "gl_SubGroupSizeARB"
                               Name 19  "relMask"
-                              Name 21  "gl_SubGroupEqMaskARB"
-                              Name 23  "gl_SubGroupGeMaskARB"
-                              Name 26  "gl_SubGroupGtMaskARB"
-                              Name 29  "gl_SubGroupLeMaskARB"
-                              Name 32  "gl_SubGroupLtMaskARB"
-                              Name 52  "Buffers"
-                              MemberName 52(Buffers) 0  "f4"
-                              MemberName 52(Buffers) 1  "i4"
-                              MemberName 52(Buffers) 2  "u4"
-                              Name 55  "data"
+                              Name 22  "gl_SubGroupEqMaskARB"
+                              Name 29  "gl_SubGroupGeMaskARB"
+                              Name 36  "gl_SubGroupGtMaskARB"
+                              Name 43  "gl_SubGroupLeMaskARB"
+                              Name 50  "gl_SubGroupLtMaskARB"
+                              Name 72  "Buffers"
+                              MemberName 72(Buffers) 0  "f4"
+                              MemberName 72(Buffers) 1  "i4"
+                              MemberName 72(Buffers) 2  "u4"
+                              Name 75  "data"
                               Decorate 10(gl_SubGroupInvocationARB) BuiltIn SubgroupLocalInvocationId
                               Decorate 12(gl_SubGroupSizeARB) BuiltIn SubgroupSize
-                              Decorate 21(gl_SubGroupEqMaskARB) BuiltIn SubgroupEqMaskKHR
-                              Decorate 23(gl_SubGroupGeMaskARB) BuiltIn SubgroupGeMaskKHR
-                              Decorate 26(gl_SubGroupGtMaskARB) BuiltIn SubgroupGtMaskKHR
-                              Decorate 29(gl_SubGroupLeMaskARB) BuiltIn SubgroupLeMaskKHR
-                              Decorate 32(gl_SubGroupLtMaskARB) BuiltIn SubgroupLtMaskKHR
-                              MemberDecorate 52(Buffers) 0 Offset 0
-                              MemberDecorate 52(Buffers) 1 Offset 16
-                              MemberDecorate 52(Buffers) 2 Offset 32
-                              Decorate 52(Buffers) BufferBlock
-                              Decorate 55(data) DescriptorSet 0
-                              Decorate 55(data) Binding 0
-                              Decorate 297 BuiltIn WorkgroupSize
+                              Decorate 22(gl_SubGroupEqMaskARB) BuiltIn SubgroupEqMaskKHR
+                              Decorate 29(gl_SubGroupGeMaskARB) BuiltIn SubgroupGeMaskKHR
+                              Decorate 36(gl_SubGroupGtMaskARB) BuiltIn SubgroupGtMaskKHR
+                              Decorate 43(gl_SubGroupLeMaskARB) BuiltIn SubgroupLeMaskKHR
+                              Decorate 50(gl_SubGroupLtMaskARB) BuiltIn SubgroupLtMaskKHR
+                              MemberDecorate 72(Buffers) 0 Offset 0
+                              MemberDecorate 72(Buffers) 1 Offset 16
+                              MemberDecorate 72(Buffers) 2 Offset 32
+                              Decorate 72(Buffers) BufferBlock
+                              Decorate 75(data) DescriptorSet 0
+                              Decorate 75(data) Binding 0
+                              Decorate 317 BuiltIn WorkgroupSize
                2:             TypeVoid
                3:             TypeFunction 2
                6:             TypeInt 32 0
@@ -54,43 +53,43 @@
               15:      6(int) Constant 4
               17:             TypeInt 64 0
               18:             TypePointer Function 17(int64_t)
-              20:             TypePointer Input 17(int64_t)
-21(gl_SubGroupEqMaskARB):     20(ptr) Variable Input
-23(gl_SubGroupGeMaskARB):     20(ptr) Variable Input
-26(gl_SubGroupGtMaskARB):     20(ptr) Variable Input
-29(gl_SubGroupLeMaskARB):     20(ptr) Variable Input
-32(gl_SubGroupLtMaskARB):     20(ptr) Variable Input
-              36:             TypeBool
-              37:    36(bool) ConstantTrue
-              38:             TypeVector 6(int) 4
-              42:             TypeVector 6(int) 2
-              48:             TypeFloat 32
-              49:             TypeVector 48(float) 4
-              50:             TypeInt 32 1
-              51:             TypeVector 50(int) 4
-     52(Buffers):             TypeStruct 49(fvec4) 51(ivec4) 38(ivec4)
-              53:             TypeArray 52(Buffers) 15
-              54:             TypePointer Uniform 53
-        55(data):     54(ptr) Variable Uniform
-              57:     50(int) Constant 0
-              58:      6(int) Constant 0
-              59:             TypePointer Uniform 48(float)
-              66:     50(int) Constant 1
-              67:             TypeVector 48(float) 2
-              68:             TypePointer Uniform 49(fvec4)
-              82:     50(int) Constant 2
-              83:             TypeVector 48(float) 3
-              99:     50(int) Constant 3
-             114:             TypePointer Uniform 50(int)
-             121:             TypeVector 50(int) 2
-             122:             TypePointer Uniform 51(ivec4)
-             136:             TypeVector 50(int) 3
-             166:             TypePointer Uniform 6(int)
-             173:             TypePointer Uniform 38(ivec4)
-             187:             TypeVector 6(int) 3
-             295:      6(int) Constant 8
-             296:      6(int) Constant 1
-             297:  187(ivec3) ConstantComposite 295 295 296
+              20:             TypeVector 6(int) 4
+              21:             TypePointer Input 20(ivec4)
+22(gl_SubGroupEqMaskARB):     21(ptr) Variable Input
+              26:             TypeVector 6(int) 2
+29(gl_SubGroupGeMaskARB):     21(ptr) Variable Input
+36(gl_SubGroupGtMaskARB):     21(ptr) Variable Input
+43(gl_SubGroupLeMaskARB):     21(ptr) Variable Input
+50(gl_SubGroupLtMaskARB):     21(ptr) Variable Input
+              58:             TypeBool
+              59:    58(bool) ConstantTrue
+              68:             TypeFloat 32
+              69:             TypeVector 68(float) 4
+              70:             TypeInt 32 1
+              71:             TypeVector 70(int) 4
+     72(Buffers):             TypeStruct 69(fvec4) 71(ivec4) 20(ivec4)
+              73:             TypeArray 72(Buffers) 15
+              74:             TypePointer Uniform 73
+        75(data):     74(ptr) Variable Uniform
+              77:     70(int) Constant 0
+              78:      6(int) Constant 0
+              79:             TypePointer Uniform 68(float)
+              86:     70(int) Constant 1
+              87:             TypeVector 68(float) 2
+              88:             TypePointer Uniform 69(fvec4)
+             102:     70(int) Constant 2
+             103:             TypeVector 68(float) 3
+             119:     70(int) Constant 3
+             134:             TypePointer Uniform 70(int)
+             141:             TypeVector 70(int) 2
+             142:             TypePointer Uniform 71(ivec4)
+             156:             TypeVector 70(int) 3
+             186:             TypePointer Uniform 6(int)
+             193:             TypePointer Uniform 20(ivec4)
+             207:             TypeVector 6(int) 3
+             315:      6(int) Constant 8
+             316:      6(int) Constant 1
+             317:  207(ivec3) ConstantComposite 315 315 316
          4(main):           2 Function None 3
                5:             Label
    8(invocation):      7(ptr) Variable Function
@@ -100,275 +99,295 @@
               14:      6(int) IAdd 11 13
               16:      6(int) UMod 14 15
                               Store 8(invocation) 16
-              22: 17(int64_t) Load 21(gl_SubGroupEqMaskARB)
-              24: 17(int64_t) Load 23(gl_SubGroupGeMaskARB)
-              25: 17(int64_t) IAdd 22 24
-              27: 17(int64_t) Load 26(gl_SubGroupGtMaskARB)
-              28: 17(int64_t) IAdd 25 27
-              30: 17(int64_t) Load 29(gl_SubGroupLeMaskARB)
-              31: 17(int64_t) IAdd 28 30
-              33: 17(int64_t) Load 32(gl_SubGroupLtMaskARB)
-              34: 17(int64_t) IAdd 31 33
-                              Store 19(relMask) 34
-              35: 17(int64_t) Load 19(relMask)
-              39:   38(ivec4) SubgroupBallotKHR 37
-              40:      6(int) CompositeExtract 39 0
-              41:      6(int) CompositeExtract 39 1
-              43:   42(ivec2) CompositeConstruct 40 41
-              44: 17(int64_t) Bitcast 43
-              45:    36(bool) IEqual 35 44
-                              SelectionMerge 47 None
-                              BranchConditional 45 46 216
-              46:               Label
-              56:      6(int)   Load 8(invocation)
-              60:     59(ptr)   AccessChain 55(data) 57 57 58
-              61:   48(float)   Load 60
-              62:      6(int)   Load 8(invocation)
-              63:   48(float)   SubgroupReadInvocationKHR 61 62
-              64:     59(ptr)   AccessChain 55(data) 56 57 58
-                                Store 64 63
-              65:      6(int)   Load 8(invocation)
-              69:     68(ptr)   AccessChain 55(data) 66 57
-              70:   49(fvec4)   Load 69
-              71:   67(fvec2)   VectorShuffle 70 70 0 1
-              72:      6(int)   Load 8(invocation)
-              73:   48(float)   CompositeExtract 71 0
-              74:   48(float)   SubgroupReadInvocationKHR 73 72
-              75:   48(float)   CompositeExtract 71 1
-              76:   48(float)   SubgroupReadInvocationKHR 75 72
-              77:   67(fvec2)   CompositeConstruct 74 76
-              78:     68(ptr)   AccessChain 55(data) 65 57
-              79:   49(fvec4)   Load 78
-              80:   49(fvec4)   VectorShuffle 79 77 4 5 2 3
-                                Store 78 80
-              81:      6(int)   Load 8(invocation)
-              84:     68(ptr)   AccessChain 55(data) 82 57
-              85:   49(fvec4)   Load 84
-              86:   83(fvec3)   VectorShuffle 85 85 0 1 2
-              87:      6(int)   Load 8(invocation)
-              88:   48(float)   CompositeExtract 86 0
-              89:   48(float)   SubgroupReadInvocationKHR 88 87
-              90:   48(float)   CompositeExtract 86 1
-              91:   48(float)   SubgroupReadInvocationKHR 90 87
-              92:   48(float)   CompositeExtract 86 2
-              93:   48(float)   SubgroupReadInvocationKHR 92 87
-              94:   83(fvec3)   CompositeConstruct 89 91 93
-              95:     68(ptr)   AccessChain 55(data) 81 57
-              96:   49(fvec4)   Load 95
-              97:   49(fvec4)   VectorShuffle 96 94 4 5 6 3
-                                Store 95 97
-              98:      6(int)   Load 8(invocation)
-             100:     68(ptr)   AccessChain 55(data) 99 57
-             101:   49(fvec4)   Load 100
-             102:      6(int)   Load 8(invocation)
-             103:   48(float)   CompositeExtract 101 0
-             104:   48(float)   SubgroupReadInvocationKHR 103 102
-             105:   48(float)   CompositeExtract 101 1
-             106:   48(float)   SubgroupReadInvocationKHR 105 102
-             107:   48(float)   CompositeExtract 101 2
-             108:   48(float)   SubgroupReadInvocationKHR 107 102
-             109:   48(float)   CompositeExtract 101 3
-             110:   48(float)   SubgroupReadInvocationKHR 109 102
-             111:   49(fvec4)   CompositeConstruct 104 106 108 110
-             112:     68(ptr)   AccessChain 55(data) 98 57
-                                Store 112 111
-             113:      6(int)   Load 8(invocation)
-             115:    114(ptr)   AccessChain 55(data) 57 66 58
-             116:     50(int)   Load 115
-             117:      6(int)   Load 8(invocation)
-             118:     50(int)   SubgroupReadInvocationKHR 116 117
-             119:    114(ptr)   AccessChain 55(data) 113 66 58
-                                Store 119 118
-             120:      6(int)   Load 8(invocation)
-             123:    122(ptr)   AccessChain 55(data) 66 66
-             124:   51(ivec4)   Load 123
-             125:  121(ivec2)   VectorShuffle 124 124 0 1
-             126:      6(int)   Load 8(invocation)
-             127:     50(int)   CompositeExtract 125 0
-             128:     50(int)   SubgroupReadInvocationKHR 127 126
-             129:     50(int)   CompositeExtract 125 1
-             130:     50(int)   SubgroupReadInvocationKHR 129 126
-             131:  121(ivec2)   CompositeConstruct 128 130
-             132:    122(ptr)   AccessChain 55(data) 120 66
-             133:   51(ivec4)   Load 132
-             134:   51(ivec4)   VectorShuffle 133 131 4 5 2 3
-                                Store 132 134
-             135:      6(int)   Load 8(invocation)
-             137:    122(ptr)   AccessChain 55(data) 82 66
-             138:   51(ivec4)   Load 137
-             139:  136(ivec3)   VectorShuffle 138 138 0 1 2
+              23:   20(ivec4) Load 22(gl_SubGroupEqMaskARB)
+              24:      6(int) CompositeExtract 23 0
+              25:      6(int) CompositeExtract 23 1
+              27:   26(ivec2) CompositeConstruct 24 25
+              28: 17(int64_t) Bitcast 27
+              30:   20(ivec4) Load 29(gl_SubGroupGeMaskARB)
+              31:      6(int) CompositeExtract 30 0
+              32:      6(int) CompositeExtract 30 1
+              33:   26(ivec2) CompositeConstruct 31 32
+              34: 17(int64_t) Bitcast 33
+              35: 17(int64_t) IAdd 28 34
+              37:   20(ivec4) Load 36(gl_SubGroupGtMaskARB)
+              38:      6(int) CompositeExtract 37 0
+              39:      6(int) CompositeExtract 37 1
+              40:   26(ivec2) CompositeConstruct 38 39
+              41: 17(int64_t) Bitcast 40
+              42: 17(int64_t) IAdd 35 41
+              44:   20(ivec4) Load 43(gl_SubGroupLeMaskARB)
+              45:      6(int) CompositeExtract 44 0
+              46:      6(int) CompositeExtract 44 1
+              47:   26(ivec2) CompositeConstruct 45 46
+              48: 17(int64_t) Bitcast 47
+              49: 17(int64_t) IAdd 42 48
+              51:   20(ivec4) Load 50(gl_SubGroupLtMaskARB)
+              52:      6(int) CompositeExtract 51 0
+              53:      6(int) CompositeExtract 51 1
+              54:   26(ivec2) CompositeConstruct 52 53
+              55: 17(int64_t) Bitcast 54
+              56: 17(int64_t) IAdd 49 55
+                              Store 19(relMask) 56
+              57: 17(int64_t) Load 19(relMask)
+              60:   20(ivec4) SubgroupBallotKHR 59
+              61:      6(int) CompositeExtract 60 0
+              62:      6(int) CompositeExtract 60 1
+              63:   26(ivec2) CompositeConstruct 61 62
+              64: 17(int64_t) Bitcast 63
+              65:    58(bool) IEqual 57 64
+                              SelectionMerge 67 None
+                              BranchConditional 65 66 236
+              66:               Label
+              76:      6(int)   Load 8(invocation)
+              80:     79(ptr)   AccessChain 75(data) 77 77 78
+              81:   68(float)   Load 80
+              82:      6(int)   Load 8(invocation)
+              83:   68(float)   SubgroupReadInvocationKHR 81 82
+              84:     79(ptr)   AccessChain 75(data) 76 77 78
+                                Store 84 83
+              85:      6(int)   Load 8(invocation)
+              89:     88(ptr)   AccessChain 75(data) 86 77
+              90:   69(fvec4)   Load 89
+              91:   87(fvec2)   VectorShuffle 90 90 0 1
+              92:      6(int)   Load 8(invocation)
+              93:   68(float)   CompositeExtract 91 0
+              94:   68(float)   SubgroupReadInvocationKHR 93 92
+              95:   68(float)   CompositeExtract 91 1
+              96:   68(float)   SubgroupReadInvocationKHR 95 92
+              97:   87(fvec2)   CompositeConstruct 94 96
+              98:     88(ptr)   AccessChain 75(data) 85 77
+              99:   69(fvec4)   Load 98
+             100:   69(fvec4)   VectorShuffle 99 97 4 5 2 3
+                                Store 98 100
+             101:      6(int)   Load 8(invocation)
+             104:     88(ptr)   AccessChain 75(data) 102 77
+             105:   69(fvec4)   Load 104
+             106:  103(fvec3)   VectorShuffle 105 105 0 1 2
+             107:      6(int)   Load 8(invocation)
+             108:   68(float)   CompositeExtract 106 0
+             109:   68(float)   SubgroupReadInvocationKHR 108 107
+             110:   68(float)   CompositeExtract 106 1
+             111:   68(float)   SubgroupReadInvocationKHR 110 107
+             112:   68(float)   CompositeExtract 106 2
+             113:   68(float)   SubgroupReadInvocationKHR 112 107
+             114:  103(fvec3)   CompositeConstruct 109 111 113
+             115:     88(ptr)   AccessChain 75(data) 101 77
+             116:   69(fvec4)   Load 115
+             117:   69(fvec4)   VectorShuffle 116 114 4 5 6 3
+                                Store 115 117
+             118:      6(int)   Load 8(invocation)
+             120:     88(ptr)   AccessChain 75(data) 119 77
+             121:   69(fvec4)   Load 120
+             122:      6(int)   Load 8(invocation)
+             123:   68(float)   CompositeExtract 121 0
+             124:   68(float)   SubgroupReadInvocationKHR 123 122
+             125:   68(float)   CompositeExtract 121 1
+             126:   68(float)   SubgroupReadInvocationKHR 125 122
+             127:   68(float)   CompositeExtract 121 2
+             128:   68(float)   SubgroupReadInvocationKHR 127 122
+             129:   68(float)   CompositeExtract 121 3
+             130:   68(float)   SubgroupReadInvocationKHR 129 122
+             131:   69(fvec4)   CompositeConstruct 124 126 128 130
+             132:     88(ptr)   AccessChain 75(data) 118 77
+                                Store 132 131
+             133:      6(int)   Load 8(invocation)
+             135:    134(ptr)   AccessChain 75(data) 77 86 78
+             136:     70(int)   Load 135
+             137:      6(int)   Load 8(invocation)
+             138:     70(int)   SubgroupReadInvocationKHR 136 137
+             139:    134(ptr)   AccessChain 75(data) 133 86 78
+                                Store 139 138
              140:      6(int)   Load 8(invocation)
-             141:     50(int)   CompositeExtract 139 0
-             142:     50(int)   SubgroupReadInvocationKHR 141 140
-             143:     50(int)   CompositeExtract 139 1
-             144:     50(int)   SubgroupReadInvocationKHR 143 140
-             145:     50(int)   CompositeExtract 139 2
-             146:     50(int)   SubgroupReadInvocationKHR 145 140
-             147:  136(ivec3)   CompositeConstruct 142 144 146
-             148:    122(ptr)   AccessChain 55(data) 135 66
-             149:   51(ivec4)   Load 148
-             150:   51(ivec4)   VectorShuffle 149 147 4 5 6 3
-                                Store 148 150
-             151:      6(int)   Load 8(invocation)
-             152:    122(ptr)   AccessChain 55(data) 99 66
-             153:   51(ivec4)   Load 152
-             154:      6(int)   Load 8(invocation)
-             155:     50(int)   CompositeExtract 153 0
-             156:     50(int)   SubgroupReadInvocationKHR 155 154
-             157:     50(int)   CompositeExtract 153 1
-             158:     50(int)   SubgroupReadInvocationKHR 157 154
-             159:     50(int)   CompositeExtract 153 2
-             160:     50(int)   SubgroupReadInvocationKHR 159 154
-             161:     50(int)   CompositeExtract 153 3
-             162:     50(int)   SubgroupReadInvocationKHR 161 154
-             163:   51(ivec4)   CompositeConstruct 156 158 160 162
-             164:    122(ptr)   AccessChain 55(data) 151 66
-                                Store 164 163
-             165:      6(int)   Load 8(invocation)
-             167:    166(ptr)   AccessChain 55(data) 57 82 58
-             168:      6(int)   Load 167
-             169:      6(int)   Load 8(invocation)
-             170:      6(int)   SubgroupReadInvocationKHR 168 169
-             171:    166(ptr)   AccessChain 55(data) 165 82 58
-                                Store 171 170
-             172:      6(int)   Load 8(invocation)
-             174:    173(ptr)   AccessChain 55(data) 66 82
-             175:   38(ivec4)   Load 174
-             176:   42(ivec2)   VectorShuffle 175 175 0 1
-             177:      6(int)   Load 8(invocation)
-             178:      6(int)   CompositeExtract 176 0
-             179:      6(int)   SubgroupReadInvocationKHR 178 177
-             180:      6(int)   CompositeExtract 176 1
-             181:      6(int)   SubgroupReadInvocationKHR 180 177
-             182:   42(ivec2)   CompositeConstruct 179 181
-             183:    173(ptr)   AccessChain 55(data) 172 82
-             184:   38(ivec4)   Load 183
-             185:   38(ivec4)   VectorShuffle 184 182 4 5 2 3
-                                Store 183 185
-             186:      6(int)   Load 8(invocation)
-             188:    173(ptr)   AccessChain 55(data) 82 82
-             189:   38(ivec4)   Load 188
-             190:  187(ivec3)   VectorShuffle 189 189 0 1 2
-             191:      6(int)   Load 8(invocation)
-             192:      6(int)   CompositeExtract 190 0
-             193:      6(int)   SubgroupReadInvocationKHR 192 191
-             194:      6(int)   CompositeExtract 190 1
-             195:      6(int)   SubgroupReadInvocationKHR 194 191
-             196:      6(int)   CompositeExtract 190 2
-             197:      6(int)   SubgroupReadInvocationKHR 196 191
-             198:  187(ivec3)   CompositeConstruct 193 195 197
-             199:    173(ptr)   AccessChain 55(data) 186 82
-             200:   38(ivec4)   Load 199
-             201:   38(ivec4)   VectorShuffle 200 198 4 5 6 3
-                                Store 199 201
-             202:      6(int)   Load 8(invocation)
-             203:    173(ptr)   AccessChain 55(data) 99 82
-             204:   38(ivec4)   Load 203
-             205:      6(int)   Load 8(invocation)
-             206:      6(int)   CompositeExtract 204 0
-             207:      6(int)   SubgroupReadInvocationKHR 206 205
-             208:      6(int)   CompositeExtract 204 1
-             209:      6(int)   SubgroupReadInvocationKHR 208 205
-             210:      6(int)   CompositeExtract 204 2
-             211:      6(int)   SubgroupReadInvocationKHR 210 205
-             212:      6(int)   CompositeExtract 204 3
-             213:      6(int)   SubgroupReadInvocationKHR 212 205
-             214:   38(ivec4)   CompositeConstruct 207 209 211 213
-             215:    173(ptr)   AccessChain 55(data) 202 82
-                                Store 215 214
-                                Branch 47
-             216:               Label
-             217:      6(int)   Load 8(invocation)
-             218:     59(ptr)   AccessChain 55(data) 57 57 58
-             219:   48(float)   Load 218
-             220:   48(float)   SubgroupFirstInvocationKHR 219
-             221:     59(ptr)   AccessChain 55(data) 217 57 58
-                                Store 221 220
+             143:    142(ptr)   AccessChain 75(data) 86 86
+             144:   71(ivec4)   Load 143
+             145:  141(ivec2)   VectorShuffle 144 144 0 1
+             146:      6(int)   Load 8(invocation)
+             147:     70(int)   CompositeExtract 145 0
+             148:     70(int)   SubgroupReadInvocationKHR 147 146
+             149:     70(int)   CompositeExtract 145 1
+             150:     70(int)   SubgroupReadInvocationKHR 149 146
+             151:  141(ivec2)   CompositeConstruct 148 150
+             152:    142(ptr)   AccessChain 75(data) 140 86
+             153:   71(ivec4)   Load 152
+             154:   71(ivec4)   VectorShuffle 153 151 4 5 2 3
+                                Store 152 154
+             155:      6(int)   Load 8(invocation)
+             157:    142(ptr)   AccessChain 75(data) 102 86
+             158:   71(ivec4)   Load 157
+             159:  156(ivec3)   VectorShuffle 158 158 0 1 2
+             160:      6(int)   Load 8(invocation)
+             161:     70(int)   CompositeExtract 159 0
+             162:     70(int)   SubgroupReadInvocationKHR 161 160
+             163:     70(int)   CompositeExtract 159 1
+             164:     70(int)   SubgroupReadInvocationKHR 163 160
+             165:     70(int)   CompositeExtract 159 2
+             166:     70(int)   SubgroupReadInvocationKHR 165 160
+             167:  156(ivec3)   CompositeConstruct 162 164 166
+             168:    142(ptr)   AccessChain 75(data) 155 86
+             169:   71(ivec4)   Load 168
+             170:   71(ivec4)   VectorShuffle 169 167 4 5 6 3
+                                Store 168 170
+             171:      6(int)   Load 8(invocation)
+             172:    142(ptr)   AccessChain 75(data) 119 86
+             173:   71(ivec4)   Load 172
+             174:      6(int)   Load 8(invocation)
+             175:     70(int)   CompositeExtract 173 0
+             176:     70(int)   SubgroupReadInvocationKHR 175 174
+             177:     70(int)   CompositeExtract 173 1
+             178:     70(int)   SubgroupReadInvocationKHR 177 174
+             179:     70(int)   CompositeExtract 173 2
+             180:     70(int)   SubgroupReadInvocationKHR 179 174
+             181:     70(int)   CompositeExtract 173 3
+             182:     70(int)   SubgroupReadInvocationKHR 181 174
+             183:   71(ivec4)   CompositeConstruct 176 178 180 182
+             184:    142(ptr)   AccessChain 75(data) 171 86
+                                Store 184 183
+             185:      6(int)   Load 8(invocation)
+             187:    186(ptr)   AccessChain 75(data) 77 102 78
+             188:      6(int)   Load 187
+             189:      6(int)   Load 8(invocation)
+             190:      6(int)   SubgroupReadInvocationKHR 188 189
+             191:    186(ptr)   AccessChain 75(data) 185 102 78
+                                Store 191 190
+             192:      6(int)   Load 8(invocation)
+             194:    193(ptr)   AccessChain 75(data) 86 102
+             195:   20(ivec4)   Load 194
+             196:   26(ivec2)   VectorShuffle 195 195 0 1
+             197:      6(int)   Load 8(invocation)
+             198:      6(int)   CompositeExtract 196 0
+             199:      6(int)   SubgroupReadInvocationKHR 198 197
+             200:      6(int)   CompositeExtract 196 1
+             201:      6(int)   SubgroupReadInvocationKHR 200 197
+             202:   26(ivec2)   CompositeConstruct 199 201
+             203:    193(ptr)   AccessChain 75(data) 192 102
+             204:   20(ivec4)   Load 203
+             205:   20(ivec4)   VectorShuffle 204 202 4 5 2 3
+                                Store 203 205
+             206:      6(int)   Load 8(invocation)
+             208:    193(ptr)   AccessChain 75(data) 102 102
+             209:   20(ivec4)   Load 208
+             210:  207(ivec3)   VectorShuffle 209 209 0 1 2
+             211:      6(int)   Load 8(invocation)
+             212:      6(int)   CompositeExtract 210 0
+             213:      6(int)   SubgroupReadInvocationKHR 212 211
+             214:      6(int)   CompositeExtract 210 1
+             215:      6(int)   SubgroupReadInvocationKHR 214 211
+             216:      6(int)   CompositeExtract 210 2
+             217:      6(int)   SubgroupReadInvocationKHR 216 211
+             218:  207(ivec3)   CompositeConstruct 213 215 217
+             219:    193(ptr)   AccessChain 75(data) 206 102
+             220:   20(ivec4)   Load 219
+             221:   20(ivec4)   VectorShuffle 220 218 4 5 6 3
+                                Store 219 221
              222:      6(int)   Load 8(invocation)
-             223:     68(ptr)   AccessChain 55(data) 66 57
-             224:   49(fvec4)   Load 223
-             225:   67(fvec2)   VectorShuffle 224 224 0 1
-             226:   67(fvec2)   SubgroupFirstInvocationKHR 225
-             227:     68(ptr)   AccessChain 55(data) 222 57
-             228:   49(fvec4)   Load 227
-             229:   49(fvec4)   VectorShuffle 228 226 4 5 2 3
-                                Store 227 229
-             230:      6(int)   Load 8(invocation)
-             231:     68(ptr)   AccessChain 55(data) 82 57
-             232:   49(fvec4)   Load 231
-             233:   83(fvec3)   VectorShuffle 232 232 0 1 2
-             234:   83(fvec3)   SubgroupFirstInvocationKHR 233
-             235:     68(ptr)   AccessChain 55(data) 230 57
-             236:   49(fvec4)   Load 235
-             237:   49(fvec4)   VectorShuffle 236 234 4 5 6 3
-                                Store 235 237
-             238:      6(int)   Load 8(invocation)
-             239:     68(ptr)   AccessChain 55(data) 99 57
-             240:   49(fvec4)   Load 239
-             241:   49(fvec4)   SubgroupFirstInvocationKHR 240
-             242:     68(ptr)   AccessChain 55(data) 238 57
-                                Store 242 241
-             243:      6(int)   Load 8(invocation)
-             244:    114(ptr)   AccessChain 55(data) 57 66 58
-             245:     50(int)   Load 244
-             246:     50(int)   SubgroupFirstInvocationKHR 245
-             247:    114(ptr)   AccessChain 55(data) 243 66 58
-                                Store 247 246
-             248:      6(int)   Load 8(invocation)
-             249:    122(ptr)   AccessChain 55(data) 66 66
-             250:   51(ivec4)   Load 249
-             251:  121(ivec2)   VectorShuffle 250 250 0 1
-             252:  121(ivec2)   SubgroupFirstInvocationKHR 251
-             253:    122(ptr)   AccessChain 55(data) 248 66
-             254:   51(ivec4)   Load 253
-             255:   51(ivec4)   VectorShuffle 254 252 4 5 2 3
-                                Store 253 255
-             256:      6(int)   Load 8(invocation)
-             257:    122(ptr)   AccessChain 55(data) 82 66
-             258:   51(ivec4)   Load 257
-             259:  136(ivec3)   VectorShuffle 258 258 0 1 2
-             260:  136(ivec3)   SubgroupFirstInvocationKHR 259
-             261:    122(ptr)   AccessChain 55(data) 256 66
-             262:   51(ivec4)   Load 261
-             263:   51(ivec4)   VectorShuffle 262 260 4 5 6 3
-                                Store 261 263
-             264:      6(int)   Load 8(invocation)
-             265:    122(ptr)   AccessChain 55(data) 99 66
-             266:   51(ivec4)   Load 265
-             267:   51(ivec4)   SubgroupFirstInvocationKHR 266
-             268:    122(ptr)   AccessChain 55(data) 264 66
-                                Store 268 267
-             269:      6(int)   Load 8(invocation)
-             270:    166(ptr)   AccessChain 55(data) 57 82 58
-             271:      6(int)   Load 270
-             272:      6(int)   SubgroupFirstInvocationKHR 271
-             273:    166(ptr)   AccessChain 55(data) 269 82 58
-                                Store 273 272
-             274:      6(int)   Load 8(invocation)
-             275:    173(ptr)   AccessChain 55(data) 66 82
-             276:   38(ivec4)   Load 275
-             277:   42(ivec2)   VectorShuffle 276 276 0 1
-             278:   42(ivec2)   SubgroupFirstInvocationKHR 277
-             279:    173(ptr)   AccessChain 55(data) 274 82
-             280:   38(ivec4)   Load 279
-             281:   38(ivec4)   VectorShuffle 280 278 4 5 2 3
-                                Store 279 281
-             282:      6(int)   Load 8(invocation)
-             283:    173(ptr)   AccessChain 55(data) 82 82
-             284:   38(ivec4)   Load 283
-             285:  187(ivec3)   VectorShuffle 284 284 0 1 2
-             286:  187(ivec3)   SubgroupFirstInvocationKHR 285
-             287:    173(ptr)   AccessChain 55(data) 282 82
-             288:   38(ivec4)   Load 287
-             289:   38(ivec4)   VectorShuffle 288 286 4 5 6 3
-                                Store 287 289
-             290:      6(int)   Load 8(invocation)
-             291:    173(ptr)   AccessChain 55(data) 99 82
-             292:   38(ivec4)   Load 291
-             293:   38(ivec4)   SubgroupFirstInvocationKHR 292
-             294:    173(ptr)   AccessChain 55(data) 290 82
-                                Store 294 293
-                                Branch 47
-              47:             Label
+             223:    193(ptr)   AccessChain 75(data) 119 102
+             224:   20(ivec4)   Load 223
+             225:      6(int)   Load 8(invocation)
+             226:      6(int)   CompositeExtract 224 0
+             227:      6(int)   SubgroupReadInvocationKHR 226 225
+             228:      6(int)   CompositeExtract 224 1
+             229:      6(int)   SubgroupReadInvocationKHR 228 225
+             230:      6(int)   CompositeExtract 224 2
+             231:      6(int)   SubgroupReadInvocationKHR 230 225
+             232:      6(int)   CompositeExtract 224 3
+             233:      6(int)   SubgroupReadInvocationKHR 232 225
+             234:   20(ivec4)   CompositeConstruct 227 229 231 233
+             235:    193(ptr)   AccessChain 75(data) 222 102
+                                Store 235 234
+                                Branch 67
+             236:               Label
+             237:      6(int)   Load 8(invocation)
+             238:     79(ptr)   AccessChain 75(data) 77 77 78
+             239:   68(float)   Load 238
+             240:   68(float)   SubgroupFirstInvocationKHR 239
+             241:     79(ptr)   AccessChain 75(data) 237 77 78
+                                Store 241 240
+             242:      6(int)   Load 8(invocation)
+             243:     88(ptr)   AccessChain 75(data) 86 77
+             244:   69(fvec4)   Load 243
+             245:   87(fvec2)   VectorShuffle 244 244 0 1
+             246:   87(fvec2)   SubgroupFirstInvocationKHR 245
+             247:     88(ptr)   AccessChain 75(data) 242 77
+             248:   69(fvec4)   Load 247
+             249:   69(fvec4)   VectorShuffle 248 246 4 5 2 3
+                                Store 247 249
+             250:      6(int)   Load 8(invocation)
+             251:     88(ptr)   AccessChain 75(data) 102 77
+             252:   69(fvec4)   Load 251
+             253:  103(fvec3)   VectorShuffle 252 252 0 1 2
+             254:  103(fvec3)   SubgroupFirstInvocationKHR 253
+             255:     88(ptr)   AccessChain 75(data) 250 77
+             256:   69(fvec4)   Load 255
+             257:   69(fvec4)   VectorShuffle 256 254 4 5 6 3
+                                Store 255 257
+             258:      6(int)   Load 8(invocation)
+             259:     88(ptr)   AccessChain 75(data) 119 77
+             260:   69(fvec4)   Load 259
+             261:   69(fvec4)   SubgroupFirstInvocationKHR 260
+             262:     88(ptr)   AccessChain 75(data) 258 77
+                                Store 262 261
+             263:      6(int)   Load 8(invocation)
+             264:    134(ptr)   AccessChain 75(data) 77 86 78
+             265:     70(int)   Load 264
+             266:     70(int)   SubgroupFirstInvocationKHR 265
+             267:    134(ptr)   AccessChain 75(data) 263 86 78
+                                Store 267 266
+             268:      6(int)   Load 8(invocation)
+             269:    142(ptr)   AccessChain 75(data) 86 86
+             270:   71(ivec4)   Load 269
+             271:  141(ivec2)   VectorShuffle 270 270 0 1
+             272:  141(ivec2)   SubgroupFirstInvocationKHR 271
+             273:    142(ptr)   AccessChain 75(data) 268 86
+             274:   71(ivec4)   Load 273
+             275:   71(ivec4)   VectorShuffle 274 272 4 5 2 3
+                                Store 273 275
+             276:      6(int)   Load 8(invocation)
+             277:    142(ptr)   AccessChain 75(data) 102 86
+             278:   71(ivec4)   Load 277
+             279:  156(ivec3)   VectorShuffle 278 278 0 1 2
+             280:  156(ivec3)   SubgroupFirstInvocationKHR 279
+             281:    142(ptr)   AccessChain 75(data) 276 86
+             282:   71(ivec4)   Load 281
+             283:   71(ivec4)   VectorShuffle 282 280 4 5 6 3
+                                Store 281 283
+             284:      6(int)   Load 8(invocation)
+             285:    142(ptr)   AccessChain 75(data) 119 86
+             286:   71(ivec4)   Load 285
+             287:   71(ivec4)   SubgroupFirstInvocationKHR 286
+             288:    142(ptr)   AccessChain 75(data) 284 86
+                                Store 288 287
+             289:      6(int)   Load 8(invocation)
+             290:    186(ptr)   AccessChain 75(data) 77 102 78
+             291:      6(int)   Load 290
+             292:      6(int)   SubgroupFirstInvocationKHR 291
+             293:    186(ptr)   AccessChain 75(data) 289 102 78
+                                Store 293 292
+             294:      6(int)   Load 8(invocation)
+             295:    193(ptr)   AccessChain 75(data) 86 102
+             296:   20(ivec4)   Load 295
+             297:   26(ivec2)   VectorShuffle 296 296 0 1
+             298:   26(ivec2)   SubgroupFirstInvocationKHR 297
+             299:    193(ptr)   AccessChain 75(data) 294 102
+             300:   20(ivec4)   Load 299
+             301:   20(ivec4)   VectorShuffle 300 298 4 5 2 3
+                                Store 299 301
+             302:      6(int)   Load 8(invocation)
+             303:    193(ptr)   AccessChain 75(data) 102 102
+             304:   20(ivec4)   Load 303
+             305:  207(ivec3)   VectorShuffle 304 304 0 1 2
+             306:  207(ivec3)   SubgroupFirstInvocationKHR 305
+             307:    193(ptr)   AccessChain 75(data) 302 102
+             308:   20(ivec4)   Load 307
+             309:   20(ivec4)   VectorShuffle 308 306 4 5 6 3
+                                Store 307 309
+             310:      6(int)   Load 8(invocation)
+             311:    193(ptr)   AccessChain 75(data) 119 102
+             312:   20(ivec4)   Load 311
+             313:   20(ivec4)   SubgroupFirstInvocationKHR 312
+             314:    193(ptr)   AccessChain 75(data) 310 102
+                                Store 314 313
+                                Branch 67
+              67:             Label
                               Return
                               FunctionEnd