Support Partitioned subgroup builtin functions with extended types

Also refactor the generation of those builtin functions programmatically
combine names and types.
diff --git a/Test/baseResults/spv.subgroupExtendedTypesPartitioned.comp.out b/Test/baseResults/spv.subgroupExtendedTypesPartitioned.comp.out
new file mode 100644
index 0000000..f384609
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesPartitioned.comp.out
@@ -0,0 +1,1836 @@
+spv.subgroupExtendedTypesPartitioned.comp
+Validation failed
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 1558
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability StorageUniformBufferBlock16
+                              Capability CapabilityStorageBuffer8BitAccess
+                              Capability GroupNonUniformPartitionedNV
+                              Extension  "SPV_KHR_8bit_storage"
+                              Extension  "SPV_NV_shader_subgroup_partitioned"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_NV_shader_subgroup_partitioned"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 19  "ballot"
+                              Name 34  "Buffers"
+                              MemberName 34(Buffers) 0  "i8"
+                              MemberName 34(Buffers) 1  "u8"
+                              MemberName 34(Buffers) 2  "i16"
+                              MemberName 34(Buffers) 3  "u16"
+                              MemberName 34(Buffers) 4  "i64"
+                              MemberName 34(Buffers) 5  "u64"
+                              MemberName 34(Buffers) 6  "f16"
+                              Name 37  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 34(Buffers) 0 Offset 0
+                              MemberDecorate 34(Buffers) 1 Offset 4
+                              MemberDecorate 34(Buffers) 2 Offset 8
+                              MemberDecorate 34(Buffers) 3 Offset 16
+                              MemberDecorate 34(Buffers) 4 Offset 32
+                              MemberDecorate 34(Buffers) 5 Offset 64
+                              MemberDecorate 34(Buffers) 6 Offset 96
+                              Decorate 34(Buffers) Block
+                              Decorate 37(data) DescriptorSet 0
+                              Decorate 37(data) Binding 0
+                              Decorate 1557 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeVector 6(int) 4
+              18:             TypePointer Function 17(ivec4)
+              20:             TypeInt 8 1
+              21:             TypeVector 20(int8_t) 4
+              22:             TypeInt 8 0
+              23:             TypeVector 22(int8_t) 4
+              24:             TypeInt 16 1
+              25:             TypeVector 24(int16_t) 4
+              26:             TypeInt 16 0
+              27:             TypeVector 26(int16_t) 4
+              28:             TypeInt 64 1
+              29:             TypeVector 28(int64_t) 4
+              30:             TypeInt 64 0
+              31:             TypeVector 30(int64_t) 4
+              32:             TypeFloat 16
+              33:             TypeVector 32(float16_t) 4
+     34(Buffers):             TypeStruct 21(i8vec4) 23(i8vec4) 25(i16vec4) 27(i16vec4) 29(i64vec4) 31(i64vec4) 33(f16vec4)
+              35:             TypeArray 34(Buffers) 15
+              36:             TypePointer StorageBuffer 35
+        37(data):     36(ptr) Variable StorageBuffer
+              38:             TypeInt 32 1
+              39:     38(int) Constant 0
+              40:      6(int) Constant 0
+              41:             TypePointer StorageBuffer 20(int8_t)
+              45:     38(int) Constant 1
+              46:             TypeVector 20(int8_t) 2
+              47:             TypePointer StorageBuffer 21(i8vec4)
+              52:     38(int) Constant 2
+              53:             TypeVector 20(int8_t) 3
+              58:     38(int) Constant 3
+              62:             TypePointer StorageBuffer 22(int8_t)
+              66:             TypeVector 22(int8_t) 2
+              67:             TypePointer StorageBuffer 23(i8vec4)
+              72:             TypeVector 22(int8_t) 3
+              80:             TypePointer StorageBuffer 24(int16_t)
+              84:             TypeVector 24(int16_t) 2
+              85:             TypePointer StorageBuffer 25(i16vec4)
+              90:             TypeVector 24(int16_t) 3
+              98:             TypePointer StorageBuffer 26(int16_t)
+             102:             TypeVector 26(int16_t) 2
+             103:             TypePointer StorageBuffer 27(i16vec4)
+             108:             TypeVector 26(int16_t) 3
+             116:     38(int) Constant 4
+             117:             TypePointer StorageBuffer 28(int64_t)
+             121:             TypeVector 28(int64_t) 2
+             122:             TypePointer StorageBuffer 29(i64vec4)
+             127:             TypeVector 28(int64_t) 3
+             135:     38(int) Constant 5
+             136:             TypePointer StorageBuffer 30(int64_t)
+             140:             TypeVector 30(int64_t) 2
+             141:             TypePointer StorageBuffer 31(i64vec4)
+             146:             TypeVector 30(int64_t) 3
+             154:     38(int) Constant 6
+             155:             TypePointer StorageBuffer 32(float16_t)
+             159:             TypeVector 32(float16_t) 2
+             160:             TypePointer StorageBuffer 33(f16vec4)
+             165:             TypeVector 32(float16_t) 3
+             177:      6(int) Constant 3
+            1554:             TypeVector 6(int) 3
+            1555:      6(int) Constant 8
+            1556:      6(int) Constant 1
+            1557: 1554(ivec3) ConstantComposite 1555 1556 1556
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+      19(ballot):     18(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              42:     41(ptr) AccessChain 37(data) 39 39 40
+              43:  20(int8_t) Load 42
+              44:   17(ivec4) GroupNonUniformPartitionNV 43
+                              Store 19(ballot) 44
+              48:     47(ptr) AccessChain 37(data) 45 39
+              49:  21(i8vec4) Load 48
+              50:  46(i8vec2) VectorShuffle 49 49 0 1
+              51:   17(ivec4) GroupNonUniformPartitionNV 50
+                              Store 19(ballot) 51
+              54:     47(ptr) AccessChain 37(data) 52 39
+              55:  21(i8vec4) Load 54
+              56:  53(i8vec3) VectorShuffle 55 55 0 1 2
+              57:   17(ivec4) GroupNonUniformPartitionNV 56
+                              Store 19(ballot) 57
+              59:     47(ptr) AccessChain 37(data) 58 39
+              60:  21(i8vec4) Load 59
+              61:   17(ivec4) GroupNonUniformPartitionNV 60
+                              Store 19(ballot) 61
+              63:     62(ptr) AccessChain 37(data) 39 45 40
+              64:  22(int8_t) Load 63
+              65:   17(ivec4) GroupNonUniformPartitionNV 64
+                              Store 19(ballot) 65
+              68:     67(ptr) AccessChain 37(data) 45 45
+              69:  23(i8vec4) Load 68
+              70:  66(i8vec2) VectorShuffle 69 69 0 1
+              71:   17(ivec4) GroupNonUniformPartitionNV 70
+                              Store 19(ballot) 71
+              73:     67(ptr) AccessChain 37(data) 52 45
+              74:  23(i8vec4) Load 73
+              75:  72(i8vec3) VectorShuffle 74 74 0 1 2
+              76:   17(ivec4) GroupNonUniformPartitionNV 75
+                              Store 19(ballot) 76
+              77:     67(ptr) AccessChain 37(data) 58 45
+              78:  23(i8vec4) Load 77
+              79:   17(ivec4) GroupNonUniformPartitionNV 78
+                              Store 19(ballot) 79
+              81:     80(ptr) AccessChain 37(data) 39 52 40
+              82: 24(int16_t) Load 81
+              83:   17(ivec4) GroupNonUniformPartitionNV 82
+                              Store 19(ballot) 83
+              86:     85(ptr) AccessChain 37(data) 45 52
+              87: 25(i16vec4) Load 86
+              88: 84(i16vec2) VectorShuffle 87 87 0 1
+              89:   17(ivec4) GroupNonUniformPartitionNV 88
+                              Store 19(ballot) 89
+              91:     85(ptr) AccessChain 37(data) 52 52
+              92: 25(i16vec4) Load 91
+              93: 90(i16vec3) VectorShuffle 92 92 0 1 2
+              94:   17(ivec4) GroupNonUniformPartitionNV 93
+                              Store 19(ballot) 94
+              95:     85(ptr) AccessChain 37(data) 58 52
+              96: 25(i16vec4) Load 95
+              97:   17(ivec4) GroupNonUniformPartitionNV 96
+                              Store 19(ballot) 97
+              99:     98(ptr) AccessChain 37(data) 39 58 40
+             100: 26(int16_t) Load 99
+             101:   17(ivec4) GroupNonUniformPartitionNV 100
+                              Store 19(ballot) 101
+             104:    103(ptr) AccessChain 37(data) 45 58
+             105: 27(i16vec4) Load 104
+             106:102(i16vec2) VectorShuffle 105 105 0 1
+             107:   17(ivec4) GroupNonUniformPartitionNV 106
+                              Store 19(ballot) 107
+             109:    103(ptr) AccessChain 37(data) 52 58
+             110: 27(i16vec4) Load 109
+             111:108(i16vec3) VectorShuffle 110 110 0 1 2
+             112:   17(ivec4) GroupNonUniformPartitionNV 111
+                              Store 19(ballot) 112
+             113:    103(ptr) AccessChain 37(data) 58 58
+             114: 27(i16vec4) Load 113
+             115:   17(ivec4) GroupNonUniformPartitionNV 114
+                              Store 19(ballot) 115
+             118:    117(ptr) AccessChain 37(data) 39 116 40
+             119: 28(int64_t) Load 118
+             120:   17(ivec4) GroupNonUniformPartitionNV 119
+                              Store 19(ballot) 120
+             123:    122(ptr) AccessChain 37(data) 45 116
+             124: 29(i64vec4) Load 123
+             125:121(i64vec2) VectorShuffle 124 124 0 1
+             126:   17(ivec4) GroupNonUniformPartitionNV 125
+                              Store 19(ballot) 126
+             128:    122(ptr) AccessChain 37(data) 52 116
+             129: 29(i64vec4) Load 128
+             130:127(i64vec3) VectorShuffle 129 129 0 1 2
+             131:   17(ivec4) GroupNonUniformPartitionNV 130
+                              Store 19(ballot) 131
+             132:    122(ptr) AccessChain 37(data) 58 116
+             133: 29(i64vec4) Load 132
+             134:   17(ivec4) GroupNonUniformPartitionNV 133
+                              Store 19(ballot) 134
+             137:    136(ptr) AccessChain 37(data) 39 135 40
+             138: 30(int64_t) Load 137
+             139:   17(ivec4) GroupNonUniformPartitionNV 138
+                              Store 19(ballot) 139
+             142:    141(ptr) AccessChain 37(data) 45 135
+             143: 31(i64vec4) Load 142
+             144:140(i64vec2) VectorShuffle 143 143 0 1
+             145:   17(ivec4) GroupNonUniformPartitionNV 144
+                              Store 19(ballot) 145
+             147:    141(ptr) AccessChain 37(data) 52 135
+             148: 31(i64vec4) Load 147
+             149:146(i64vec3) VectorShuffle 148 148 0 1 2
+             150:   17(ivec4) GroupNonUniformPartitionNV 149
+                              Store 19(ballot) 150
+             151:    141(ptr) AccessChain 37(data) 58 135
+             152: 31(i64vec4) Load 151
+             153:   17(ivec4) GroupNonUniformPartitionNV 152
+                              Store 19(ballot) 153
+             156:    155(ptr) AccessChain 37(data) 39 154 40
+             157:32(float16_t) Load 156
+             158:   17(ivec4) GroupNonUniformPartitionNV 157
+                              Store 19(ballot) 158
+             161:    160(ptr) AccessChain 37(data) 45 154
+             162: 33(f16vec4) Load 161
+             163:159(f16vec2) VectorShuffle 162 162 0 1
+             164:   17(ivec4) GroupNonUniformPartitionNV 163
+                              Store 19(ballot) 164
+             166:    160(ptr) AccessChain 37(data) 52 154
+             167: 33(f16vec4) Load 166
+             168:165(f16vec3) VectorShuffle 167 167 0 1 2
+             169:   17(ivec4) GroupNonUniformPartitionNV 168
+                              Store 19(ballot) 169
+             170:    160(ptr) AccessChain 37(data) 58 154
+             171: 33(f16vec4) Load 170
+             172:   17(ivec4) GroupNonUniformPartitionNV 171
+                              Store 19(ballot) 172
+             173:      6(int) Load 8(invocation)
+             174:     41(ptr) AccessChain 37(data) 39 39 40
+             175:  20(int8_t) Load 174
+             176:   17(ivec4) Load 19(ballot)
+             178:  20(int8_t) GroupNonUniformIAdd 177 PartitionedReduceNV 175 176
+             179:     41(ptr) AccessChain 37(data) 173 39 40
+                              Store 179 178
+             180:      6(int) Load 8(invocation)
+             181:     47(ptr) AccessChain 37(data) 45 39
+             182:  21(i8vec4) Load 181
+             183:  46(i8vec2) VectorShuffle 182 182 0 1
+             184:   17(ivec4) Load 19(ballot)
+             185:  46(i8vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 183 184
+             186:     47(ptr) AccessChain 37(data) 180 39
+             187:  21(i8vec4) Load 186
+             188:  21(i8vec4) VectorShuffle 187 185 4 5 2 3
+                              Store 186 188
+             189:      6(int) Load 8(invocation)
+             190:     47(ptr) AccessChain 37(data) 52 39
+             191:  21(i8vec4) Load 190
+             192:  53(i8vec3) VectorShuffle 191 191 0 1 2
+             193:   17(ivec4) Load 19(ballot)
+             194:  53(i8vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 192 193
+             195:     47(ptr) AccessChain 37(data) 189 39
+             196:  21(i8vec4) Load 195
+             197:  21(i8vec4) VectorShuffle 196 194 4 5 6 3
+                              Store 195 197
+             198:      6(int) Load 8(invocation)
+             199:     47(ptr) AccessChain 37(data) 58 39
+             200:  21(i8vec4) Load 199
+             201:   17(ivec4) Load 19(ballot)
+             202:  21(i8vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 200 201
+             203:     47(ptr) AccessChain 37(data) 198 39
+                              Store 203 202
+             204:      6(int) Load 8(invocation)
+             205:     41(ptr) AccessChain 37(data) 39 39 40
+             206:  20(int8_t) Load 205
+             207:   17(ivec4) Load 19(ballot)
+             208:  20(int8_t) GroupNonUniformIMul 177 PartitionedReduceNV 206 207
+             209:     41(ptr) AccessChain 37(data) 204 39 40
+                              Store 209 208
+             210:      6(int) Load 8(invocation)
+             211:     47(ptr) AccessChain 37(data) 45 39
+             212:  21(i8vec4) Load 211
+             213:  46(i8vec2) VectorShuffle 212 212 0 1
+             214:   17(ivec4) Load 19(ballot)
+             215:  46(i8vec2) GroupNonUniformIMul 177 PartitionedReduceNV 213 214
+             216:     47(ptr) AccessChain 37(data) 210 39
+             217:  21(i8vec4) Load 216
+             218:  21(i8vec4) VectorShuffle 217 215 4 5 2 3
+                              Store 216 218
+             219:      6(int) Load 8(invocation)
+             220:     47(ptr) AccessChain 37(data) 52 39
+             221:  21(i8vec4) Load 220
+             222:  53(i8vec3) VectorShuffle 221 221 0 1 2
+             223:   17(ivec4) Load 19(ballot)
+             224:  53(i8vec3) GroupNonUniformIMul 177 PartitionedReduceNV 222 223
+             225:     47(ptr) AccessChain 37(data) 219 39
+             226:  21(i8vec4) Load 225
+             227:  21(i8vec4) VectorShuffle 226 224 4 5 6 3
+                              Store 225 227
+             228:      6(int) Load 8(invocation)
+             229:     47(ptr) AccessChain 37(data) 58 39
+             230:  21(i8vec4) Load 229
+             231:   17(ivec4) Load 19(ballot)
+             232:  21(i8vec4) GroupNonUniformIMul 177 PartitionedReduceNV 230 231
+             233:     47(ptr) AccessChain 37(data) 228 39
+                              Store 233 232
+             234:      6(int) Load 8(invocation)
+             235:     41(ptr) AccessChain 37(data) 39 39 40
+             236:  20(int8_t) Load 235
+             237:   17(ivec4) Load 19(ballot)
+             238:  20(int8_t) GroupNonUniformSMin 177 PartitionedReduceNV 236 237
+             239:     41(ptr) AccessChain 37(data) 234 39 40
+                              Store 239 238
+             240:      6(int) Load 8(invocation)
+             241:     47(ptr) AccessChain 37(data) 45 39
+             242:  21(i8vec4) Load 241
+             243:  46(i8vec2) VectorShuffle 242 242 0 1
+             244:   17(ivec4) Load 19(ballot)
+             245:  46(i8vec2) GroupNonUniformSMin 177 PartitionedReduceNV 243 244
+             246:     47(ptr) AccessChain 37(data) 240 39
+             247:  21(i8vec4) Load 246
+             248:  21(i8vec4) VectorShuffle 247 245 4 5 2 3
+                              Store 246 248
+             249:      6(int) Load 8(invocation)
+             250:     47(ptr) AccessChain 37(data) 52 39
+             251:  21(i8vec4) Load 250
+             252:  53(i8vec3) VectorShuffle 251 251 0 1 2
+             253:   17(ivec4) Load 19(ballot)
+             254:  53(i8vec3) GroupNonUniformSMin 177 PartitionedReduceNV 252 253
+             255:     47(ptr) AccessChain 37(data) 249 39
+             256:  21(i8vec4) Load 255
+             257:  21(i8vec4) VectorShuffle 256 254 4 5 6 3
+                              Store 255 257
+             258:      6(int) Load 8(invocation)
+             259:     47(ptr) AccessChain 37(data) 58 39
+             260:  21(i8vec4) Load 259
+             261:   17(ivec4) Load 19(ballot)
+             262:  21(i8vec4) GroupNonUniformSMin 177 PartitionedReduceNV 260 261
+             263:     47(ptr) AccessChain 37(data) 258 39
+                              Store 263 262
+             264:      6(int) Load 8(invocation)
+             265:     41(ptr) AccessChain 37(data) 39 39 40
+             266:  20(int8_t) Load 265
+             267:   17(ivec4) Load 19(ballot)
+             268:  20(int8_t) GroupNonUniformSMax 177 PartitionedReduceNV 266 267
+             269:     41(ptr) AccessChain 37(data) 264 39 40
+                              Store 269 268
+             270:      6(int) Load 8(invocation)
+             271:     47(ptr) AccessChain 37(data) 45 39
+             272:  21(i8vec4) Load 271
+             273:  46(i8vec2) VectorShuffle 272 272 0 1
+             274:   17(ivec4) Load 19(ballot)
+             275:  46(i8vec2) GroupNonUniformSMax 177 PartitionedReduceNV 273 274
+             276:     47(ptr) AccessChain 37(data) 270 39
+             277:  21(i8vec4) Load 276
+             278:  21(i8vec4) VectorShuffle 277 275 4 5 2 3
+                              Store 276 278
+             279:      6(int) Load 8(invocation)
+             280:     47(ptr) AccessChain 37(data) 52 39
+             281:  21(i8vec4) Load 280
+             282:  53(i8vec3) VectorShuffle 281 281 0 1 2
+             283:   17(ivec4) Load 19(ballot)
+             284:  53(i8vec3) GroupNonUniformSMax 177 PartitionedReduceNV 282 283
+             285:     47(ptr) AccessChain 37(data) 279 39
+             286:  21(i8vec4) Load 285
+             287:  21(i8vec4) VectorShuffle 286 284 4 5 6 3
+                              Store 285 287
+             288:      6(int) Load 8(invocation)
+             289:     47(ptr) AccessChain 37(data) 58 39
+             290:  21(i8vec4) Load 289
+             291:   17(ivec4) Load 19(ballot)
+             292:  21(i8vec4) GroupNonUniformSMax 177 PartitionedReduceNV 290 291
+             293:     47(ptr) AccessChain 37(data) 288 39
+                              Store 293 292
+             294:      6(int) Load 8(invocation)
+             295:     41(ptr) AccessChain 37(data) 39 39 40
+             296:  20(int8_t) Load 295
+             297:   17(ivec4) Load 19(ballot)
+             298:  20(int8_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 296 297
+             299:     41(ptr) AccessChain 37(data) 294 39 40
+                              Store 299 298
+             300:      6(int) Load 8(invocation)
+             301:     47(ptr) AccessChain 37(data) 45 39
+             302:  21(i8vec4) Load 301
+             303:  46(i8vec2) VectorShuffle 302 302 0 1
+             304:   17(ivec4) Load 19(ballot)
+             305:  46(i8vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 303 304
+             306:     47(ptr) AccessChain 37(data) 300 39
+             307:  21(i8vec4) Load 306
+             308:  21(i8vec4) VectorShuffle 307 305 4 5 2 3
+                              Store 306 308
+             309:      6(int) Load 8(invocation)
+             310:     47(ptr) AccessChain 37(data) 52 39
+             311:  21(i8vec4) Load 310
+             312:  53(i8vec3) VectorShuffle 311 311 0 1 2
+             313:   17(ivec4) Load 19(ballot)
+             314:  53(i8vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 312 313
+             315:     47(ptr) AccessChain 37(data) 309 39
+             316:  21(i8vec4) Load 315
+             317:  21(i8vec4) VectorShuffle 316 314 4 5 6 3
+                              Store 315 317
+             318:      6(int) Load 8(invocation)
+             319:     47(ptr) AccessChain 37(data) 58 39
+             320:  21(i8vec4) Load 319
+             321:   17(ivec4) Load 19(ballot)
+             322:  21(i8vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 320 321
+             323:     47(ptr) AccessChain 37(data) 318 39
+                              Store 323 322
+             324:      6(int) Load 8(invocation)
+             325:     41(ptr) AccessChain 37(data) 39 39 40
+             326:  20(int8_t) Load 325
+             327:   17(ivec4) Load 19(ballot)
+             328:  20(int8_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 326 327
+             329:     41(ptr) AccessChain 37(data) 324 39 40
+                              Store 329 328
+             330:      6(int) Load 8(invocation)
+             331:     47(ptr) AccessChain 37(data) 45 39
+             332:  21(i8vec4) Load 331
+             333:  46(i8vec2) VectorShuffle 332 332 0 1
+             334:   17(ivec4) Load 19(ballot)
+             335:  46(i8vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 333 334
+             336:     47(ptr) AccessChain 37(data) 330 39
+             337:  21(i8vec4) Load 336
+             338:  21(i8vec4) VectorShuffle 337 335 4 5 2 3
+                              Store 336 338
+             339:      6(int) Load 8(invocation)
+             340:     47(ptr) AccessChain 37(data) 52 39
+             341:  21(i8vec4) Load 340
+             342:  53(i8vec3) VectorShuffle 341 341 0 1 2
+             343:   17(ivec4) Load 19(ballot)
+             344:  53(i8vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 342 343
+             345:     47(ptr) AccessChain 37(data) 339 39
+             346:  21(i8vec4) Load 345
+             347:  21(i8vec4) VectorShuffle 346 344 4 5 6 3
+                              Store 345 347
+             348:      6(int) Load 8(invocation)
+             349:     47(ptr) AccessChain 37(data) 58 39
+             350:  21(i8vec4) Load 349
+             351:   17(ivec4) Load 19(ballot)
+             352:  21(i8vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 350 351
+             353:     47(ptr) AccessChain 37(data) 348 39
+                              Store 353 352
+             354:      6(int) Load 8(invocation)
+             355:     41(ptr) AccessChain 37(data) 39 39 40
+             356:  20(int8_t) Load 355
+             357:   17(ivec4) Load 19(ballot)
+             358:  20(int8_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 356 357
+             359:     41(ptr) AccessChain 37(data) 354 39 40
+                              Store 359 358
+             360:      6(int) Load 8(invocation)
+             361:     47(ptr) AccessChain 37(data) 45 39
+             362:  21(i8vec4) Load 361
+             363:  46(i8vec2) VectorShuffle 362 362 0 1
+             364:   17(ivec4) Load 19(ballot)
+             365:  46(i8vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 363 364
+             366:     47(ptr) AccessChain 37(data) 360 39
+             367:  21(i8vec4) Load 366
+             368:  21(i8vec4) VectorShuffle 367 365 4 5 2 3
+                              Store 366 368
+             369:      6(int) Load 8(invocation)
+             370:     47(ptr) AccessChain 37(data) 52 39
+             371:  21(i8vec4) Load 370
+             372:  53(i8vec3) VectorShuffle 371 371 0 1 2
+             373:   17(ivec4) Load 19(ballot)
+             374:  53(i8vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 372 373
+             375:     47(ptr) AccessChain 37(data) 369 39
+             376:  21(i8vec4) Load 375
+             377:  21(i8vec4) VectorShuffle 376 374 4 5 6 3
+                              Store 375 377
+             378:      6(int) Load 8(invocation)
+             379:     47(ptr) AccessChain 37(data) 58 39
+             380:  21(i8vec4) Load 379
+             381:   17(ivec4) Load 19(ballot)
+             382:  21(i8vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 380 381
+             383:     47(ptr) AccessChain 37(data) 378 39
+                              Store 383 382
+             384:      6(int) Load 8(invocation)
+             385:     62(ptr) AccessChain 37(data) 39 45 40
+             386:  22(int8_t) Load 385
+             387:   17(ivec4) Load 19(ballot)
+             388:  22(int8_t) GroupNonUniformIAdd 177 PartitionedReduceNV 386 387
+             389:     62(ptr) AccessChain 37(data) 384 45 40
+                              Store 389 388
+             390:      6(int) Load 8(invocation)
+             391:     67(ptr) AccessChain 37(data) 45 45
+             392:  23(i8vec4) Load 391
+             393:  66(i8vec2) VectorShuffle 392 392 0 1
+             394:   17(ivec4) Load 19(ballot)
+             395:  66(i8vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 393 394
+             396:     67(ptr) AccessChain 37(data) 390 45
+             397:  23(i8vec4) Load 396
+             398:  23(i8vec4) VectorShuffle 397 395 4 5 2 3
+                              Store 396 398
+             399:      6(int) Load 8(invocation)
+             400:     67(ptr) AccessChain 37(data) 52 45
+             401:  23(i8vec4) Load 400
+             402:  72(i8vec3) VectorShuffle 401 401 0 1 2
+             403:   17(ivec4) Load 19(ballot)
+             404:  72(i8vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 402 403
+             405:     67(ptr) AccessChain 37(data) 399 45
+             406:  23(i8vec4) Load 405
+             407:  23(i8vec4) VectorShuffle 406 404 4 5 6 3
+                              Store 405 407
+             408:      6(int) Load 8(invocation)
+             409:     67(ptr) AccessChain 37(data) 58 45
+             410:  23(i8vec4) Load 409
+             411:   17(ivec4) Load 19(ballot)
+             412:  23(i8vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 410 411
+             413:     67(ptr) AccessChain 37(data) 408 45
+                              Store 413 412
+             414:      6(int) Load 8(invocation)
+             415:     62(ptr) AccessChain 37(data) 39 45 40
+             416:  22(int8_t) Load 415
+             417:   17(ivec4) Load 19(ballot)
+             418:  22(int8_t) GroupNonUniformIMul 177 PartitionedReduceNV 416 417
+             419:     62(ptr) AccessChain 37(data) 414 45 40
+                              Store 419 418
+             420:      6(int) Load 8(invocation)
+             421:     67(ptr) AccessChain 37(data) 45 45
+             422:  23(i8vec4) Load 421
+             423:  66(i8vec2) VectorShuffle 422 422 0 1
+             424:   17(ivec4) Load 19(ballot)
+             425:  66(i8vec2) GroupNonUniformIMul 177 PartitionedReduceNV 423 424
+             426:     67(ptr) AccessChain 37(data) 420 45
+             427:  23(i8vec4) Load 426
+             428:  23(i8vec4) VectorShuffle 427 425 4 5 2 3
+                              Store 426 428
+             429:      6(int) Load 8(invocation)
+             430:     67(ptr) AccessChain 37(data) 52 45
+             431:  23(i8vec4) Load 430
+             432:  72(i8vec3) VectorShuffle 431 431 0 1 2
+             433:   17(ivec4) Load 19(ballot)
+             434:  72(i8vec3) GroupNonUniformIMul 177 PartitionedReduceNV 432 433
+             435:     67(ptr) AccessChain 37(data) 429 45
+             436:  23(i8vec4) Load 435
+             437:  23(i8vec4) VectorShuffle 436 434 4 5 6 3
+                              Store 435 437
+             438:      6(int) Load 8(invocation)
+             439:     67(ptr) AccessChain 37(data) 58 45
+             440:  23(i8vec4) Load 439
+             441:   17(ivec4) Load 19(ballot)
+             442:  23(i8vec4) GroupNonUniformIMul 177 PartitionedReduceNV 440 441
+             443:     67(ptr) AccessChain 37(data) 438 45
+                              Store 443 442
+             444:      6(int) Load 8(invocation)
+             445:     62(ptr) AccessChain 37(data) 39 45 40
+             446:  22(int8_t) Load 445
+             447:   17(ivec4) Load 19(ballot)
+             448:  22(int8_t) GroupNonUniformSMin 177 PartitionedReduceNV 446 447
+             449:     62(ptr) AccessChain 37(data) 444 45 40
+                              Store 449 448
+             450:      6(int) Load 8(invocation)
+             451:     67(ptr) AccessChain 37(data) 45 45
+             452:  23(i8vec4) Load 451
+             453:  66(i8vec2) VectorShuffle 452 452 0 1
+             454:   17(ivec4) Load 19(ballot)
+             455:  66(i8vec2) GroupNonUniformSMin 177 PartitionedReduceNV 453 454
+             456:     67(ptr) AccessChain 37(data) 450 45
+             457:  23(i8vec4) Load 456
+             458:  23(i8vec4) VectorShuffle 457 455 4 5 2 3
+                              Store 456 458
+             459:      6(int) Load 8(invocation)
+             460:     67(ptr) AccessChain 37(data) 52 45
+             461:  23(i8vec4) Load 460
+             462:  72(i8vec3) VectorShuffle 461 461 0 1 2
+             463:   17(ivec4) Load 19(ballot)
+             464:  72(i8vec3) GroupNonUniformSMin 177 PartitionedReduceNV 462 463
+             465:     67(ptr) AccessChain 37(data) 459 45
+             466:  23(i8vec4) Load 465
+             467:  23(i8vec4) VectorShuffle 466 464 4 5 6 3
+                              Store 465 467
+             468:      6(int) Load 8(invocation)
+             469:     67(ptr) AccessChain 37(data) 58 45
+             470:  23(i8vec4) Load 469
+             471:   17(ivec4) Load 19(ballot)
+             472:  23(i8vec4) GroupNonUniformSMin 177 PartitionedReduceNV 470 471
+             473:     67(ptr) AccessChain 37(data) 468 45
+                              Store 473 472
+             474:      6(int) Load 8(invocation)
+             475:     62(ptr) AccessChain 37(data) 39 45 40
+             476:  22(int8_t) Load 475
+             477:   17(ivec4) Load 19(ballot)
+             478:  22(int8_t) GroupNonUniformSMax 177 PartitionedReduceNV 476 477
+             479:     62(ptr) AccessChain 37(data) 474 45 40
+                              Store 479 478
+             480:      6(int) Load 8(invocation)
+             481:     67(ptr) AccessChain 37(data) 45 45
+             482:  23(i8vec4) Load 481
+             483:  66(i8vec2) VectorShuffle 482 482 0 1
+             484:   17(ivec4) Load 19(ballot)
+             485:  66(i8vec2) GroupNonUniformSMax 177 PartitionedReduceNV 483 484
+             486:     67(ptr) AccessChain 37(data) 480 45
+             487:  23(i8vec4) Load 486
+             488:  23(i8vec4) VectorShuffle 487 485 4 5 2 3
+                              Store 486 488
+             489:      6(int) Load 8(invocation)
+             490:     67(ptr) AccessChain 37(data) 52 45
+             491:  23(i8vec4) Load 490
+             492:  72(i8vec3) VectorShuffle 491 491 0 1 2
+             493:   17(ivec4) Load 19(ballot)
+             494:  72(i8vec3) GroupNonUniformSMax 177 PartitionedReduceNV 492 493
+             495:     67(ptr) AccessChain 37(data) 489 45
+             496:  23(i8vec4) Load 495
+             497:  23(i8vec4) VectorShuffle 496 494 4 5 6 3
+                              Store 495 497
+             498:      6(int) Load 8(invocation)
+             499:     67(ptr) AccessChain 37(data) 58 45
+             500:  23(i8vec4) Load 499
+             501:   17(ivec4) Load 19(ballot)
+             502:  23(i8vec4) GroupNonUniformSMax 177 PartitionedReduceNV 500 501
+             503:     67(ptr) AccessChain 37(data) 498 45
+                              Store 503 502
+             504:      6(int) Load 8(invocation)
+             505:     62(ptr) AccessChain 37(data) 39 45 40
+             506:  22(int8_t) Load 505
+             507:   17(ivec4) Load 19(ballot)
+             508:  22(int8_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 506 507
+             509:     62(ptr) AccessChain 37(data) 504 45 40
+                              Store 509 508
+             510:      6(int) Load 8(invocation)
+             511:     67(ptr) AccessChain 37(data) 45 45
+             512:  23(i8vec4) Load 511
+             513:  66(i8vec2) VectorShuffle 512 512 0 1
+             514:   17(ivec4) Load 19(ballot)
+             515:  66(i8vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 513 514
+             516:     67(ptr) AccessChain 37(data) 510 45
+             517:  23(i8vec4) Load 516
+             518:  23(i8vec4) VectorShuffle 517 515 4 5 2 3
+                              Store 516 518
+             519:      6(int) Load 8(invocation)
+             520:     67(ptr) AccessChain 37(data) 52 45
+             521:  23(i8vec4) Load 520
+             522:  72(i8vec3) VectorShuffle 521 521 0 1 2
+             523:   17(ivec4) Load 19(ballot)
+             524:  72(i8vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 522 523
+             525:     67(ptr) AccessChain 37(data) 519 45
+             526:  23(i8vec4) Load 525
+             527:  23(i8vec4) VectorShuffle 526 524 4 5 6 3
+                              Store 525 527
+             528:      6(int) Load 8(invocation)
+             529:     67(ptr) AccessChain 37(data) 58 45
+             530:  23(i8vec4) Load 529
+             531:   17(ivec4) Load 19(ballot)
+             532:  23(i8vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 530 531
+             533:     67(ptr) AccessChain 37(data) 528 45
+                              Store 533 532
+             534:      6(int) Load 8(invocation)
+             535:     62(ptr) AccessChain 37(data) 39 45 40
+             536:  22(int8_t) Load 535
+             537:   17(ivec4) Load 19(ballot)
+             538:  22(int8_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 536 537
+             539:     62(ptr) AccessChain 37(data) 534 45 40
+                              Store 539 538
+             540:      6(int) Load 8(invocation)
+             541:     67(ptr) AccessChain 37(data) 45 45
+             542:  23(i8vec4) Load 541
+             543:  66(i8vec2) VectorShuffle 542 542 0 1
+             544:   17(ivec4) Load 19(ballot)
+             545:  66(i8vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 543 544
+             546:     67(ptr) AccessChain 37(data) 540 45
+             547:  23(i8vec4) Load 546
+             548:  23(i8vec4) VectorShuffle 547 545 4 5 2 3
+                              Store 546 548
+             549:      6(int) Load 8(invocation)
+             550:     67(ptr) AccessChain 37(data) 52 45
+             551:  23(i8vec4) Load 550
+             552:  72(i8vec3) VectorShuffle 551 551 0 1 2
+             553:   17(ivec4) Load 19(ballot)
+             554:  72(i8vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 552 553
+             555:     67(ptr) AccessChain 37(data) 549 45
+             556:  23(i8vec4) Load 555
+             557:  23(i8vec4) VectorShuffle 556 554 4 5 6 3
+                              Store 555 557
+             558:      6(int) Load 8(invocation)
+             559:     67(ptr) AccessChain 37(data) 58 45
+             560:  23(i8vec4) Load 559
+             561:   17(ivec4) Load 19(ballot)
+             562:  23(i8vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 560 561
+             563:     67(ptr) AccessChain 37(data) 558 45
+                              Store 563 562
+             564:      6(int) Load 8(invocation)
+             565:     62(ptr) AccessChain 37(data) 39 45 40
+             566:  22(int8_t) Load 565
+             567:   17(ivec4) Load 19(ballot)
+             568:  22(int8_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 566 567
+             569:     62(ptr) AccessChain 37(data) 564 45 40
+                              Store 569 568
+             570:      6(int) Load 8(invocation)
+             571:     67(ptr) AccessChain 37(data) 45 45
+             572:  23(i8vec4) Load 571
+             573:  66(i8vec2) VectorShuffle 572 572 0 1
+             574:   17(ivec4) Load 19(ballot)
+             575:  66(i8vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 573 574
+             576:     67(ptr) AccessChain 37(data) 570 45
+             577:  23(i8vec4) Load 576
+             578:  23(i8vec4) VectorShuffle 577 575 4 5 2 3
+                              Store 576 578
+             579:      6(int) Load 8(invocation)
+             580:     67(ptr) AccessChain 37(data) 52 45
+             581:  23(i8vec4) Load 580
+             582:  72(i8vec3) VectorShuffle 581 581 0 1 2
+             583:   17(ivec4) Load 19(ballot)
+             584:  72(i8vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 582 583
+             585:     67(ptr) AccessChain 37(data) 579 45
+             586:  23(i8vec4) Load 585
+             587:  23(i8vec4) VectorShuffle 586 584 4 5 6 3
+                              Store 585 587
+             588:      6(int) Load 8(invocation)
+             589:     67(ptr) AccessChain 37(data) 58 45
+             590:  23(i8vec4) Load 589
+             591:   17(ivec4) Load 19(ballot)
+             592:  23(i8vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 590 591
+             593:     67(ptr) AccessChain 37(data) 588 45
+                              Store 593 592
+             594:      6(int) Load 8(invocation)
+             595:     80(ptr) AccessChain 37(data) 39 52 40
+             596: 24(int16_t) Load 595
+             597:   17(ivec4) Load 19(ballot)
+             598: 24(int16_t) GroupNonUniformIAdd 177 PartitionedReduceNV 596 597
+             599:     80(ptr) AccessChain 37(data) 594 52 40
+                              Store 599 598
+             600:      6(int) Load 8(invocation)
+             601:     85(ptr) AccessChain 37(data) 45 52
+             602: 25(i16vec4) Load 601
+             603: 84(i16vec2) VectorShuffle 602 602 0 1
+             604:   17(ivec4) Load 19(ballot)
+             605: 84(i16vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 603 604
+             606:     85(ptr) AccessChain 37(data) 600 52
+             607: 25(i16vec4) Load 606
+             608: 25(i16vec4) VectorShuffle 607 605 4 5 2 3
+                              Store 606 608
+             609:      6(int) Load 8(invocation)
+             610:     85(ptr) AccessChain 37(data) 52 52
+             611: 25(i16vec4) Load 610
+             612: 90(i16vec3) VectorShuffle 611 611 0 1 2
+             613:   17(ivec4) Load 19(ballot)
+             614: 90(i16vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 612 613
+             615:     85(ptr) AccessChain 37(data) 609 52
+             616: 25(i16vec4) Load 615
+             617: 25(i16vec4) VectorShuffle 616 614 4 5 6 3
+                              Store 615 617
+             618:      6(int) Load 8(invocation)
+             619:     85(ptr) AccessChain 37(data) 58 52
+             620: 25(i16vec4) Load 619
+             621:   17(ivec4) Load 19(ballot)
+             622: 25(i16vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 620 621
+             623:     85(ptr) AccessChain 37(data) 618 52
+                              Store 623 622
+             624:      6(int) Load 8(invocation)
+             625:     80(ptr) AccessChain 37(data) 39 52 40
+             626: 24(int16_t) Load 625
+             627:   17(ivec4) Load 19(ballot)
+             628: 24(int16_t) GroupNonUniformIMul 177 PartitionedReduceNV 626 627
+             629:     80(ptr) AccessChain 37(data) 624 52 40
+                              Store 629 628
+             630:      6(int) Load 8(invocation)
+             631:     85(ptr) AccessChain 37(data) 45 52
+             632: 25(i16vec4) Load 631
+             633: 84(i16vec2) VectorShuffle 632 632 0 1
+             634:   17(ivec4) Load 19(ballot)
+             635: 84(i16vec2) GroupNonUniformIMul 177 PartitionedReduceNV 633 634
+             636:     85(ptr) AccessChain 37(data) 630 52
+             637: 25(i16vec4) Load 636
+             638: 25(i16vec4) VectorShuffle 637 635 4 5 2 3
+                              Store 636 638
+             639:      6(int) Load 8(invocation)
+             640:     85(ptr) AccessChain 37(data) 52 52
+             641: 25(i16vec4) Load 640
+             642: 90(i16vec3) VectorShuffle 641 641 0 1 2
+             643:   17(ivec4) Load 19(ballot)
+             644: 90(i16vec3) GroupNonUniformIMul 177 PartitionedReduceNV 642 643
+             645:     85(ptr) AccessChain 37(data) 639 52
+             646: 25(i16vec4) Load 645
+             647: 25(i16vec4) VectorShuffle 646 644 4 5 6 3
+                              Store 645 647
+             648:      6(int) Load 8(invocation)
+             649:     85(ptr) AccessChain 37(data) 58 52
+             650: 25(i16vec4) Load 649
+             651:   17(ivec4) Load 19(ballot)
+             652: 25(i16vec4) GroupNonUniformIMul 177 PartitionedReduceNV 650 651
+             653:     85(ptr) AccessChain 37(data) 648 52
+                              Store 653 652
+             654:      6(int) Load 8(invocation)
+             655:     80(ptr) AccessChain 37(data) 39 52 40
+             656: 24(int16_t) Load 655
+             657:   17(ivec4) Load 19(ballot)
+             658: 24(int16_t) GroupNonUniformSMin 177 PartitionedReduceNV 656 657
+             659:     80(ptr) AccessChain 37(data) 654 52 40
+                              Store 659 658
+             660:      6(int) Load 8(invocation)
+             661:     85(ptr) AccessChain 37(data) 45 52
+             662: 25(i16vec4) Load 661
+             663: 84(i16vec2) VectorShuffle 662 662 0 1
+             664:   17(ivec4) Load 19(ballot)
+             665: 84(i16vec2) GroupNonUniformSMin 177 PartitionedReduceNV 663 664
+             666:     85(ptr) AccessChain 37(data) 660 52
+             667: 25(i16vec4) Load 666
+             668: 25(i16vec4) VectorShuffle 667 665 4 5 2 3
+                              Store 666 668
+             669:      6(int) Load 8(invocation)
+             670:     85(ptr) AccessChain 37(data) 52 52
+             671: 25(i16vec4) Load 670
+             672: 90(i16vec3) VectorShuffle 671 671 0 1 2
+             673:   17(ivec4) Load 19(ballot)
+             674: 90(i16vec3) GroupNonUniformSMin 177 PartitionedReduceNV 672 673
+             675:     85(ptr) AccessChain 37(data) 669 52
+             676: 25(i16vec4) Load 675
+             677: 25(i16vec4) VectorShuffle 676 674 4 5 6 3
+                              Store 675 677
+             678:      6(int) Load 8(invocation)
+             679:     85(ptr) AccessChain 37(data) 58 52
+             680: 25(i16vec4) Load 679
+             681:   17(ivec4) Load 19(ballot)
+             682: 25(i16vec4) GroupNonUniformSMin 177 PartitionedReduceNV 680 681
+             683:     85(ptr) AccessChain 37(data) 678 52
+                              Store 683 682
+             684:      6(int) Load 8(invocation)
+             685:     80(ptr) AccessChain 37(data) 39 52 40
+             686: 24(int16_t) Load 685
+             687:   17(ivec4) Load 19(ballot)
+             688: 24(int16_t) GroupNonUniformSMax 177 PartitionedReduceNV 686 687
+             689:     80(ptr) AccessChain 37(data) 684 52 40
+                              Store 689 688
+             690:      6(int) Load 8(invocation)
+             691:     85(ptr) AccessChain 37(data) 45 52
+             692: 25(i16vec4) Load 691
+             693: 84(i16vec2) VectorShuffle 692 692 0 1
+             694:   17(ivec4) Load 19(ballot)
+             695: 84(i16vec2) GroupNonUniformSMax 177 PartitionedReduceNV 693 694
+             696:     85(ptr) AccessChain 37(data) 690 52
+             697: 25(i16vec4) Load 696
+             698: 25(i16vec4) VectorShuffle 697 695 4 5 2 3
+                              Store 696 698
+             699:      6(int) Load 8(invocation)
+             700:     85(ptr) AccessChain 37(data) 52 52
+             701: 25(i16vec4) Load 700
+             702: 90(i16vec3) VectorShuffle 701 701 0 1 2
+             703:   17(ivec4) Load 19(ballot)
+             704: 90(i16vec3) GroupNonUniformSMax 177 PartitionedReduceNV 702 703
+             705:     85(ptr) AccessChain 37(data) 699 52
+             706: 25(i16vec4) Load 705
+             707: 25(i16vec4) VectorShuffle 706 704 4 5 6 3
+                              Store 705 707
+             708:      6(int) Load 8(invocation)
+             709:     85(ptr) AccessChain 37(data) 58 52
+             710: 25(i16vec4) Load 709
+             711:   17(ivec4) Load 19(ballot)
+             712: 25(i16vec4) GroupNonUniformSMax 177 PartitionedReduceNV 710 711
+             713:     85(ptr) AccessChain 37(data) 708 52
+                              Store 713 712
+             714:      6(int) Load 8(invocation)
+             715:     80(ptr) AccessChain 37(data) 39 52 40
+             716: 24(int16_t) Load 715
+             717:   17(ivec4) Load 19(ballot)
+             718: 24(int16_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 716 717
+             719:     80(ptr) AccessChain 37(data) 714 52 40
+                              Store 719 718
+             720:      6(int) Load 8(invocation)
+             721:     85(ptr) AccessChain 37(data) 45 52
+             722: 25(i16vec4) Load 721
+             723: 84(i16vec2) VectorShuffle 722 722 0 1
+             724:   17(ivec4) Load 19(ballot)
+             725: 84(i16vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 723 724
+             726:     85(ptr) AccessChain 37(data) 720 52
+             727: 25(i16vec4) Load 726
+             728: 25(i16vec4) VectorShuffle 727 725 4 5 2 3
+                              Store 726 728
+             729:      6(int) Load 8(invocation)
+             730:     85(ptr) AccessChain 37(data) 52 52
+             731: 25(i16vec4) Load 730
+             732: 90(i16vec3) VectorShuffle 731 731 0 1 2
+             733:   17(ivec4) Load 19(ballot)
+             734: 90(i16vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 732 733
+             735:     85(ptr) AccessChain 37(data) 729 52
+             736: 25(i16vec4) Load 735
+             737: 25(i16vec4) VectorShuffle 736 734 4 5 6 3
+                              Store 735 737
+             738:      6(int) Load 8(invocation)
+             739:     85(ptr) AccessChain 37(data) 58 52
+             740: 25(i16vec4) Load 739
+             741:   17(ivec4) Load 19(ballot)
+             742: 25(i16vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 740 741
+             743:     85(ptr) AccessChain 37(data) 738 52
+                              Store 743 742
+             744:      6(int) Load 8(invocation)
+             745:     80(ptr) AccessChain 37(data) 39 52 40
+             746: 24(int16_t) Load 745
+             747:   17(ivec4) Load 19(ballot)
+             748: 24(int16_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 746 747
+             749:     80(ptr) AccessChain 37(data) 744 52 40
+                              Store 749 748
+             750:      6(int) Load 8(invocation)
+             751:     85(ptr) AccessChain 37(data) 45 52
+             752: 25(i16vec4) Load 751
+             753: 84(i16vec2) VectorShuffle 752 752 0 1
+             754:   17(ivec4) Load 19(ballot)
+             755: 84(i16vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 753 754
+             756:     85(ptr) AccessChain 37(data) 750 52
+             757: 25(i16vec4) Load 756
+             758: 25(i16vec4) VectorShuffle 757 755 4 5 2 3
+                              Store 756 758
+             759:      6(int) Load 8(invocation)
+             760:     85(ptr) AccessChain 37(data) 52 52
+             761: 25(i16vec4) Load 760
+             762: 90(i16vec3) VectorShuffle 761 761 0 1 2
+             763:   17(ivec4) Load 19(ballot)
+             764: 90(i16vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 762 763
+             765:     85(ptr) AccessChain 37(data) 759 52
+             766: 25(i16vec4) Load 765
+             767: 25(i16vec4) VectorShuffle 766 764 4 5 6 3
+                              Store 765 767
+             768:      6(int) Load 8(invocation)
+             769:     85(ptr) AccessChain 37(data) 58 52
+             770: 25(i16vec4) Load 769
+             771:   17(ivec4) Load 19(ballot)
+             772: 25(i16vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 770 771
+             773:     85(ptr) AccessChain 37(data) 768 52
+                              Store 773 772
+             774:      6(int) Load 8(invocation)
+             775:     80(ptr) AccessChain 37(data) 39 52 40
+             776: 24(int16_t) Load 775
+             777:   17(ivec4) Load 19(ballot)
+             778: 24(int16_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 776 777
+             779:     80(ptr) AccessChain 37(data) 774 52 40
+                              Store 779 778
+             780:      6(int) Load 8(invocation)
+             781:     85(ptr) AccessChain 37(data) 45 52
+             782: 25(i16vec4) Load 781
+             783: 84(i16vec2) VectorShuffle 782 782 0 1
+             784:   17(ivec4) Load 19(ballot)
+             785: 84(i16vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 783 784
+             786:     85(ptr) AccessChain 37(data) 780 52
+             787: 25(i16vec4) Load 786
+             788: 25(i16vec4) VectorShuffle 787 785 4 5 2 3
+                              Store 786 788
+             789:      6(int) Load 8(invocation)
+             790:     85(ptr) AccessChain 37(data) 52 52
+             791: 25(i16vec4) Load 790
+             792: 90(i16vec3) VectorShuffle 791 791 0 1 2
+             793:   17(ivec4) Load 19(ballot)
+             794: 90(i16vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 792 793
+             795:     85(ptr) AccessChain 37(data) 789 52
+             796: 25(i16vec4) Load 795
+             797: 25(i16vec4) VectorShuffle 796 794 4 5 6 3
+                              Store 795 797
+             798:      6(int) Load 8(invocation)
+             799:     85(ptr) AccessChain 37(data) 58 52
+             800: 25(i16vec4) Load 799
+             801:   17(ivec4) Load 19(ballot)
+             802: 25(i16vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 800 801
+             803:     85(ptr) AccessChain 37(data) 798 52
+                              Store 803 802
+             804:      6(int) Load 8(invocation)
+             805:     98(ptr) AccessChain 37(data) 39 58 40
+             806: 26(int16_t) Load 805
+             807:   17(ivec4) Load 19(ballot)
+             808: 26(int16_t) GroupNonUniformIAdd 177 PartitionedReduceNV 806 807
+             809:     98(ptr) AccessChain 37(data) 804 58 40
+                              Store 809 808
+             810:      6(int) Load 8(invocation)
+             811:    103(ptr) AccessChain 37(data) 45 58
+             812: 27(i16vec4) Load 811
+             813:102(i16vec2) VectorShuffle 812 812 0 1
+             814:   17(ivec4) Load 19(ballot)
+             815:102(i16vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 813 814
+             816:    103(ptr) AccessChain 37(data) 810 58
+             817: 27(i16vec4) Load 816
+             818: 27(i16vec4) VectorShuffle 817 815 4 5 2 3
+                              Store 816 818
+             819:      6(int) Load 8(invocation)
+             820:    103(ptr) AccessChain 37(data) 52 58
+             821: 27(i16vec4) Load 820
+             822:108(i16vec3) VectorShuffle 821 821 0 1 2
+             823:   17(ivec4) Load 19(ballot)
+             824:108(i16vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 822 823
+             825:    103(ptr) AccessChain 37(data) 819 58
+             826: 27(i16vec4) Load 825
+             827: 27(i16vec4) VectorShuffle 826 824 4 5 6 3
+                              Store 825 827
+             828:      6(int) Load 8(invocation)
+             829:    103(ptr) AccessChain 37(data) 58 58
+             830: 27(i16vec4) Load 829
+             831:   17(ivec4) Load 19(ballot)
+             832: 27(i16vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 830 831
+             833:    103(ptr) AccessChain 37(data) 828 58
+                              Store 833 832
+             834:      6(int) Load 8(invocation)
+             835:     98(ptr) AccessChain 37(data) 39 58 40
+             836: 26(int16_t) Load 835
+             837:   17(ivec4) Load 19(ballot)
+             838: 26(int16_t) GroupNonUniformIMul 177 PartitionedReduceNV 836 837
+             839:     98(ptr) AccessChain 37(data) 834 58 40
+                              Store 839 838
+             840:      6(int) Load 8(invocation)
+             841:    103(ptr) AccessChain 37(data) 45 58
+             842: 27(i16vec4) Load 841
+             843:102(i16vec2) VectorShuffle 842 842 0 1
+             844:   17(ivec4) Load 19(ballot)
+             845:102(i16vec2) GroupNonUniformIMul 177 PartitionedReduceNV 843 844
+             846:    103(ptr) AccessChain 37(data) 840 58
+             847: 27(i16vec4) Load 846
+             848: 27(i16vec4) VectorShuffle 847 845 4 5 2 3
+                              Store 846 848
+             849:      6(int) Load 8(invocation)
+             850:    103(ptr) AccessChain 37(data) 52 58
+             851: 27(i16vec4) Load 850
+             852:108(i16vec3) VectorShuffle 851 851 0 1 2
+             853:   17(ivec4) Load 19(ballot)
+             854:108(i16vec3) GroupNonUniformIMul 177 PartitionedReduceNV 852 853
+             855:    103(ptr) AccessChain 37(data) 849 58
+             856: 27(i16vec4) Load 855
+             857: 27(i16vec4) VectorShuffle 856 854 4 5 6 3
+                              Store 855 857
+             858:      6(int) Load 8(invocation)
+             859:    103(ptr) AccessChain 37(data) 58 58
+             860: 27(i16vec4) Load 859
+             861:   17(ivec4) Load 19(ballot)
+             862: 27(i16vec4) GroupNonUniformIMul 177 PartitionedReduceNV 860 861
+             863:    103(ptr) AccessChain 37(data) 858 58
+                              Store 863 862
+             864:      6(int) Load 8(invocation)
+             865:     98(ptr) AccessChain 37(data) 39 58 40
+             866: 26(int16_t) Load 865
+             867:   17(ivec4) Load 19(ballot)
+             868: 26(int16_t) GroupNonUniformSMin 177 PartitionedReduceNV 866 867
+             869:     98(ptr) AccessChain 37(data) 864 58 40
+                              Store 869 868
+             870:      6(int) Load 8(invocation)
+             871:    103(ptr) AccessChain 37(data) 45 58
+             872: 27(i16vec4) Load 871
+             873:102(i16vec2) VectorShuffle 872 872 0 1
+             874:   17(ivec4) Load 19(ballot)
+             875:102(i16vec2) GroupNonUniformSMin 177 PartitionedReduceNV 873 874
+             876:    103(ptr) AccessChain 37(data) 870 58
+             877: 27(i16vec4) Load 876
+             878: 27(i16vec4) VectorShuffle 877 875 4 5 2 3
+                              Store 876 878
+             879:      6(int) Load 8(invocation)
+             880:    103(ptr) AccessChain 37(data) 52 58
+             881: 27(i16vec4) Load 880
+             882:108(i16vec3) VectorShuffle 881 881 0 1 2
+             883:   17(ivec4) Load 19(ballot)
+             884:108(i16vec3) GroupNonUniformSMin 177 PartitionedReduceNV 882 883
+             885:    103(ptr) AccessChain 37(data) 879 58
+             886: 27(i16vec4) Load 885
+             887: 27(i16vec4) VectorShuffle 886 884 4 5 6 3
+                              Store 885 887
+             888:      6(int) Load 8(invocation)
+             889:    103(ptr) AccessChain 37(data) 58 58
+             890: 27(i16vec4) Load 889
+             891:   17(ivec4) Load 19(ballot)
+             892: 27(i16vec4) GroupNonUniformSMin 177 PartitionedReduceNV 890 891
+             893:    103(ptr) AccessChain 37(data) 888 58
+                              Store 893 892
+             894:      6(int) Load 8(invocation)
+             895:     98(ptr) AccessChain 37(data) 39 58 40
+             896: 26(int16_t) Load 895
+             897:   17(ivec4) Load 19(ballot)
+             898: 26(int16_t) GroupNonUniformSMax 177 PartitionedReduceNV 896 897
+             899:     98(ptr) AccessChain 37(data) 894 58 40
+                              Store 899 898
+             900:      6(int) Load 8(invocation)
+             901:    103(ptr) AccessChain 37(data) 45 58
+             902: 27(i16vec4) Load 901
+             903:102(i16vec2) VectorShuffle 902 902 0 1
+             904:   17(ivec4) Load 19(ballot)
+             905:102(i16vec2) GroupNonUniformSMax 177 PartitionedReduceNV 903 904
+             906:    103(ptr) AccessChain 37(data) 900 58
+             907: 27(i16vec4) Load 906
+             908: 27(i16vec4) VectorShuffle 907 905 4 5 2 3
+                              Store 906 908
+             909:      6(int) Load 8(invocation)
+             910:    103(ptr) AccessChain 37(data) 52 58
+             911: 27(i16vec4) Load 910
+             912:108(i16vec3) VectorShuffle 911 911 0 1 2
+             913:   17(ivec4) Load 19(ballot)
+             914:108(i16vec3) GroupNonUniformSMax 177 PartitionedReduceNV 912 913
+             915:    103(ptr) AccessChain 37(data) 909 58
+             916: 27(i16vec4) Load 915
+             917: 27(i16vec4) VectorShuffle 916 914 4 5 6 3
+                              Store 915 917
+             918:      6(int) Load 8(invocation)
+             919:    103(ptr) AccessChain 37(data) 58 58
+             920: 27(i16vec4) Load 919
+             921:   17(ivec4) Load 19(ballot)
+             922: 27(i16vec4) GroupNonUniformSMax 177 PartitionedReduceNV 920 921
+             923:    103(ptr) AccessChain 37(data) 918 58
+                              Store 923 922
+             924:      6(int) Load 8(invocation)
+             925:     98(ptr) AccessChain 37(data) 39 58 40
+             926: 26(int16_t) Load 925
+             927:   17(ivec4) Load 19(ballot)
+             928: 26(int16_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 926 927
+             929:     98(ptr) AccessChain 37(data) 924 58 40
+                              Store 929 928
+             930:      6(int) Load 8(invocation)
+             931:    103(ptr) AccessChain 37(data) 45 58
+             932: 27(i16vec4) Load 931
+             933:102(i16vec2) VectorShuffle 932 932 0 1
+             934:   17(ivec4) Load 19(ballot)
+             935:102(i16vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 933 934
+             936:    103(ptr) AccessChain 37(data) 930 58
+             937: 27(i16vec4) Load 936
+             938: 27(i16vec4) VectorShuffle 937 935 4 5 2 3
+                              Store 936 938
+             939:      6(int) Load 8(invocation)
+             940:    103(ptr) AccessChain 37(data) 52 58
+             941: 27(i16vec4) Load 940
+             942:108(i16vec3) VectorShuffle 941 941 0 1 2
+             943:   17(ivec4) Load 19(ballot)
+             944:108(i16vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 942 943
+             945:    103(ptr) AccessChain 37(data) 939 58
+             946: 27(i16vec4) Load 945
+             947: 27(i16vec4) VectorShuffle 946 944 4 5 6 3
+                              Store 945 947
+             948:      6(int) Load 8(invocation)
+             949:    103(ptr) AccessChain 37(data) 58 58
+             950: 27(i16vec4) Load 949
+             951:   17(ivec4) Load 19(ballot)
+             952: 27(i16vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 950 951
+             953:    103(ptr) AccessChain 37(data) 948 58
+                              Store 953 952
+             954:      6(int) Load 8(invocation)
+             955:     98(ptr) AccessChain 37(data) 39 58 40
+             956: 26(int16_t) Load 955
+             957:   17(ivec4) Load 19(ballot)
+             958: 26(int16_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 956 957
+             959:     98(ptr) AccessChain 37(data) 954 58 40
+                              Store 959 958
+             960:      6(int) Load 8(invocation)
+             961:    103(ptr) AccessChain 37(data) 45 58
+             962: 27(i16vec4) Load 961
+             963:102(i16vec2) VectorShuffle 962 962 0 1
+             964:   17(ivec4) Load 19(ballot)
+             965:102(i16vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 963 964
+             966:    103(ptr) AccessChain 37(data) 960 58
+             967: 27(i16vec4) Load 966
+             968: 27(i16vec4) VectorShuffle 967 965 4 5 2 3
+                              Store 966 968
+             969:      6(int) Load 8(invocation)
+             970:    103(ptr) AccessChain 37(data) 52 58
+             971: 27(i16vec4) Load 970
+             972:108(i16vec3) VectorShuffle 971 971 0 1 2
+             973:   17(ivec4) Load 19(ballot)
+             974:108(i16vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 972 973
+             975:    103(ptr) AccessChain 37(data) 969 58
+             976: 27(i16vec4) Load 975
+             977: 27(i16vec4) VectorShuffle 976 974 4 5 6 3
+                              Store 975 977
+             978:      6(int) Load 8(invocation)
+             979:    103(ptr) AccessChain 37(data) 58 58
+             980: 27(i16vec4) Load 979
+             981:   17(ivec4) Load 19(ballot)
+             982: 27(i16vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 980 981
+             983:    103(ptr) AccessChain 37(data) 978 58
+                              Store 983 982
+             984:      6(int) Load 8(invocation)
+             985:     98(ptr) AccessChain 37(data) 39 58 40
+             986: 26(int16_t) Load 985
+             987:   17(ivec4) Load 19(ballot)
+             988: 26(int16_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 986 987
+             989:     98(ptr) AccessChain 37(data) 984 58 40
+                              Store 989 988
+             990:      6(int) Load 8(invocation)
+             991:    103(ptr) AccessChain 37(data) 45 58
+             992: 27(i16vec4) Load 991
+             993:102(i16vec2) VectorShuffle 992 992 0 1
+             994:   17(ivec4) Load 19(ballot)
+             995:102(i16vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 993 994
+             996:    103(ptr) AccessChain 37(data) 990 58
+             997: 27(i16vec4) Load 996
+             998: 27(i16vec4) VectorShuffle 997 995 4 5 2 3
+                              Store 996 998
+             999:      6(int) Load 8(invocation)
+            1000:    103(ptr) AccessChain 37(data) 52 58
+            1001: 27(i16vec4) Load 1000
+            1002:108(i16vec3) VectorShuffle 1001 1001 0 1 2
+            1003:   17(ivec4) Load 19(ballot)
+            1004:108(i16vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1002 1003
+            1005:    103(ptr) AccessChain 37(data) 999 58
+            1006: 27(i16vec4) Load 1005
+            1007: 27(i16vec4) VectorShuffle 1006 1004 4 5 6 3
+                              Store 1005 1007
+            1008:      6(int) Load 8(invocation)
+            1009:    103(ptr) AccessChain 37(data) 58 58
+            1010: 27(i16vec4) Load 1009
+            1011:   17(ivec4) Load 19(ballot)
+            1012: 27(i16vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1010 1011
+            1013:    103(ptr) AccessChain 37(data) 1008 58
+                              Store 1013 1012
+            1014:      6(int) Load 8(invocation)
+            1015:    117(ptr) AccessChain 37(data) 39 116 40
+            1016: 28(int64_t) Load 1015
+            1017:   17(ivec4) Load 19(ballot)
+            1018: 28(int64_t) GroupNonUniformIAdd 177 PartitionedReduceNV 1016 1017
+            1019:    117(ptr) AccessChain 37(data) 1014 116 40
+                              Store 1019 1018
+            1020:      6(int) Load 8(invocation)
+            1021:    122(ptr) AccessChain 37(data) 45 116
+            1022: 29(i64vec4) Load 1021
+            1023:121(i64vec2) VectorShuffle 1022 1022 0 1
+            1024:   17(ivec4) Load 19(ballot)
+            1025:121(i64vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 1023 1024
+            1026:    122(ptr) AccessChain 37(data) 1020 116
+            1027: 29(i64vec4) Load 1026
+            1028: 29(i64vec4) VectorShuffle 1027 1025 4 5 2 3
+                              Store 1026 1028
+            1029:      6(int) Load 8(invocation)
+            1030:    122(ptr) AccessChain 37(data) 52 116
+            1031: 29(i64vec4) Load 1030
+            1032:127(i64vec3) VectorShuffle 1031 1031 0 1 2
+            1033:   17(ivec4) Load 19(ballot)
+            1034:127(i64vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 1032 1033
+            1035:    122(ptr) AccessChain 37(data) 1029 116
+            1036: 29(i64vec4) Load 1035
+            1037: 29(i64vec4) VectorShuffle 1036 1034 4 5 6 3
+                              Store 1035 1037
+            1038:      6(int) Load 8(invocation)
+            1039:    122(ptr) AccessChain 37(data) 58 116
+            1040: 29(i64vec4) Load 1039
+            1041:   17(ivec4) Load 19(ballot)
+            1042: 29(i64vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 1040 1041
+            1043:    122(ptr) AccessChain 37(data) 1038 116
+                              Store 1043 1042
+            1044:      6(int) Load 8(invocation)
+            1045:    117(ptr) AccessChain 37(data) 39 116 40
+            1046: 28(int64_t) Load 1045
+            1047:   17(ivec4) Load 19(ballot)
+            1048: 28(int64_t) GroupNonUniformIMul 177 PartitionedReduceNV 1046 1047
+            1049:    117(ptr) AccessChain 37(data) 1044 116 40
+                              Store 1049 1048
+            1050:      6(int) Load 8(invocation)
+            1051:    122(ptr) AccessChain 37(data) 45 116
+            1052: 29(i64vec4) Load 1051
+            1053:121(i64vec2) VectorShuffle 1052 1052 0 1
+            1054:   17(ivec4) Load 19(ballot)
+            1055:121(i64vec2) GroupNonUniformIMul 177 PartitionedReduceNV 1053 1054
+            1056:    122(ptr) AccessChain 37(data) 1050 116
+            1057: 29(i64vec4) Load 1056
+            1058: 29(i64vec4) VectorShuffle 1057 1055 4 5 2 3
+                              Store 1056 1058
+            1059:      6(int) Load 8(invocation)
+            1060:    122(ptr) AccessChain 37(data) 52 116
+            1061: 29(i64vec4) Load 1060
+            1062:127(i64vec3) VectorShuffle 1061 1061 0 1 2
+            1063:   17(ivec4) Load 19(ballot)
+            1064:127(i64vec3) GroupNonUniformIMul 177 PartitionedReduceNV 1062 1063
+            1065:    122(ptr) AccessChain 37(data) 1059 116
+            1066: 29(i64vec4) Load 1065
+            1067: 29(i64vec4) VectorShuffle 1066 1064 4 5 6 3
+                              Store 1065 1067
+            1068:      6(int) Load 8(invocation)
+            1069:    122(ptr) AccessChain 37(data) 58 116
+            1070: 29(i64vec4) Load 1069
+            1071:   17(ivec4) Load 19(ballot)
+            1072: 29(i64vec4) GroupNonUniformIMul 177 PartitionedReduceNV 1070 1071
+            1073:    122(ptr) AccessChain 37(data) 1068 116
+                              Store 1073 1072
+            1074:      6(int) Load 8(invocation)
+            1075:    117(ptr) AccessChain 37(data) 39 116 40
+            1076: 28(int64_t) Load 1075
+            1077:   17(ivec4) Load 19(ballot)
+            1078: 28(int64_t) GroupNonUniformSMin 177 PartitionedReduceNV 1076 1077
+            1079:    117(ptr) AccessChain 37(data) 1074 116 40
+                              Store 1079 1078
+            1080:      6(int) Load 8(invocation)
+            1081:    122(ptr) AccessChain 37(data) 45 116
+            1082: 29(i64vec4) Load 1081
+            1083:121(i64vec2) VectorShuffle 1082 1082 0 1
+            1084:   17(ivec4) Load 19(ballot)
+            1085:121(i64vec2) GroupNonUniformSMin 177 PartitionedReduceNV 1083 1084
+            1086:    122(ptr) AccessChain 37(data) 1080 116
+            1087: 29(i64vec4) Load 1086
+            1088: 29(i64vec4) VectorShuffle 1087 1085 4 5 2 3
+                              Store 1086 1088
+            1089:      6(int) Load 8(invocation)
+            1090:    122(ptr) AccessChain 37(data) 52 116
+            1091: 29(i64vec4) Load 1090
+            1092:127(i64vec3) VectorShuffle 1091 1091 0 1 2
+            1093:   17(ivec4) Load 19(ballot)
+            1094:127(i64vec3) GroupNonUniformSMin 177 PartitionedReduceNV 1092 1093
+            1095:    122(ptr) AccessChain 37(data) 1089 116
+            1096: 29(i64vec4) Load 1095
+            1097: 29(i64vec4) VectorShuffle 1096 1094 4 5 6 3
+                              Store 1095 1097
+            1098:      6(int) Load 8(invocation)
+            1099:    122(ptr) AccessChain 37(data) 58 116
+            1100: 29(i64vec4) Load 1099
+            1101:   17(ivec4) Load 19(ballot)
+            1102: 29(i64vec4) GroupNonUniformSMin 177 PartitionedReduceNV 1100 1101
+            1103:    122(ptr) AccessChain 37(data) 1098 116
+                              Store 1103 1102
+            1104:      6(int) Load 8(invocation)
+            1105:    117(ptr) AccessChain 37(data) 39 116 40
+            1106: 28(int64_t) Load 1105
+            1107:   17(ivec4) Load 19(ballot)
+            1108: 28(int64_t) GroupNonUniformSMax 177 PartitionedReduceNV 1106 1107
+            1109:    117(ptr) AccessChain 37(data) 1104 116 40
+                              Store 1109 1108
+            1110:      6(int) Load 8(invocation)
+            1111:    122(ptr) AccessChain 37(data) 45 116
+            1112: 29(i64vec4) Load 1111
+            1113:121(i64vec2) VectorShuffle 1112 1112 0 1
+            1114:   17(ivec4) Load 19(ballot)
+            1115:121(i64vec2) GroupNonUniformSMax 177 PartitionedReduceNV 1113 1114
+            1116:    122(ptr) AccessChain 37(data) 1110 116
+            1117: 29(i64vec4) Load 1116
+            1118: 29(i64vec4) VectorShuffle 1117 1115 4 5 2 3
+                              Store 1116 1118
+            1119:      6(int) Load 8(invocation)
+            1120:    122(ptr) AccessChain 37(data) 52 116
+            1121: 29(i64vec4) Load 1120
+            1122:127(i64vec3) VectorShuffle 1121 1121 0 1 2
+            1123:   17(ivec4) Load 19(ballot)
+            1124:127(i64vec3) GroupNonUniformSMax 177 PartitionedReduceNV 1122 1123
+            1125:    122(ptr) AccessChain 37(data) 1119 116
+            1126: 29(i64vec4) Load 1125
+            1127: 29(i64vec4) VectorShuffle 1126 1124 4 5 6 3
+                              Store 1125 1127
+            1128:      6(int) Load 8(invocation)
+            1129:    122(ptr) AccessChain 37(data) 58 116
+            1130: 29(i64vec4) Load 1129
+            1131:   17(ivec4) Load 19(ballot)
+            1132: 29(i64vec4) GroupNonUniformSMax 177 PartitionedReduceNV 1130 1131
+            1133:    122(ptr) AccessChain 37(data) 1128 116
+                              Store 1133 1132
+            1134:      6(int) Load 8(invocation)
+            1135:    117(ptr) AccessChain 37(data) 39 116 40
+            1136: 28(int64_t) Load 1135
+            1137:   17(ivec4) Load 19(ballot)
+            1138: 28(int64_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1136 1137
+            1139:    117(ptr) AccessChain 37(data) 1134 116 40
+                              Store 1139 1138
+            1140:      6(int) Load 8(invocation)
+            1141:    122(ptr) AccessChain 37(data) 45 116
+            1142: 29(i64vec4) Load 1141
+            1143:121(i64vec2) VectorShuffle 1142 1142 0 1
+            1144:   17(ivec4) Load 19(ballot)
+            1145:121(i64vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1143 1144
+            1146:    122(ptr) AccessChain 37(data) 1140 116
+            1147: 29(i64vec4) Load 1146
+            1148: 29(i64vec4) VectorShuffle 1147 1145 4 5 2 3
+                              Store 1146 1148
+            1149:      6(int) Load 8(invocation)
+            1150:    122(ptr) AccessChain 37(data) 52 116
+            1151: 29(i64vec4) Load 1150
+            1152:127(i64vec3) VectorShuffle 1151 1151 0 1 2
+            1153:   17(ivec4) Load 19(ballot)
+            1154:127(i64vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1152 1153
+            1155:    122(ptr) AccessChain 37(data) 1149 116
+            1156: 29(i64vec4) Load 1155
+            1157: 29(i64vec4) VectorShuffle 1156 1154 4 5 6 3
+                              Store 1155 1157
+            1158:      6(int) Load 8(invocation)
+            1159:    122(ptr) AccessChain 37(data) 58 116
+            1160: 29(i64vec4) Load 1159
+            1161:   17(ivec4) Load 19(ballot)
+            1162: 29(i64vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1160 1161
+            1163:    122(ptr) AccessChain 37(data) 1158 116
+                              Store 1163 1162
+            1164:      6(int) Load 8(invocation)
+            1165:    117(ptr) AccessChain 37(data) 39 116 40
+            1166: 28(int64_t) Load 1165
+            1167:   17(ivec4) Load 19(ballot)
+            1168: 28(int64_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1166 1167
+            1169:    117(ptr) AccessChain 37(data) 1164 116 40
+                              Store 1169 1168
+            1170:      6(int) Load 8(invocation)
+            1171:    122(ptr) AccessChain 37(data) 45 116
+            1172: 29(i64vec4) Load 1171
+            1173:121(i64vec2) VectorShuffle 1172 1172 0 1
+            1174:   17(ivec4) Load 19(ballot)
+            1175:121(i64vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1173 1174
+            1176:    122(ptr) AccessChain 37(data) 1170 116
+            1177: 29(i64vec4) Load 1176
+            1178: 29(i64vec4) VectorShuffle 1177 1175 4 5 2 3
+                              Store 1176 1178
+            1179:      6(int) Load 8(invocation)
+            1180:    122(ptr) AccessChain 37(data) 52 116
+            1181: 29(i64vec4) Load 1180
+            1182:127(i64vec3) VectorShuffle 1181 1181 0 1 2
+            1183:   17(ivec4) Load 19(ballot)
+            1184:127(i64vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1182 1183
+            1185:    122(ptr) AccessChain 37(data) 1179 116
+            1186: 29(i64vec4) Load 1185
+            1187: 29(i64vec4) VectorShuffle 1186 1184 4 5 6 3
+                              Store 1185 1187
+            1188:      6(int) Load 8(invocation)
+            1189:    122(ptr) AccessChain 37(data) 58 116
+            1190: 29(i64vec4) Load 1189
+            1191:   17(ivec4) Load 19(ballot)
+            1192: 29(i64vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1190 1191
+            1193:    122(ptr) AccessChain 37(data) 1188 116
+                              Store 1193 1192
+            1194:      6(int) Load 8(invocation)
+            1195:    117(ptr) AccessChain 37(data) 39 116 40
+            1196: 28(int64_t) Load 1195
+            1197:   17(ivec4) Load 19(ballot)
+            1198: 28(int64_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1196 1197
+            1199:    117(ptr) AccessChain 37(data) 1194 116 40
+                              Store 1199 1198
+            1200:      6(int) Load 8(invocation)
+            1201:    122(ptr) AccessChain 37(data) 45 116
+            1202: 29(i64vec4) Load 1201
+            1203:121(i64vec2) VectorShuffle 1202 1202 0 1
+            1204:   17(ivec4) Load 19(ballot)
+            1205:121(i64vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1203 1204
+            1206:    122(ptr) AccessChain 37(data) 1200 116
+            1207: 29(i64vec4) Load 1206
+            1208: 29(i64vec4) VectorShuffle 1207 1205 4 5 2 3
+                              Store 1206 1208
+            1209:      6(int) Load 8(invocation)
+            1210:    122(ptr) AccessChain 37(data) 52 116
+            1211: 29(i64vec4) Load 1210
+            1212:127(i64vec3) VectorShuffle 1211 1211 0 1 2
+            1213:   17(ivec4) Load 19(ballot)
+            1214:127(i64vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1212 1213
+            1215:    122(ptr) AccessChain 37(data) 1209 116
+            1216: 29(i64vec4) Load 1215
+            1217: 29(i64vec4) VectorShuffle 1216 1214 4 5 6 3
+                              Store 1215 1217
+            1218:      6(int) Load 8(invocation)
+            1219:    122(ptr) AccessChain 37(data) 58 116
+            1220: 29(i64vec4) Load 1219
+            1221:   17(ivec4) Load 19(ballot)
+            1222: 29(i64vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1220 1221
+            1223:    122(ptr) AccessChain 37(data) 1218 116
+                              Store 1223 1222
+            1224:      6(int) Load 8(invocation)
+            1225:    136(ptr) AccessChain 37(data) 39 135 40
+            1226: 30(int64_t) Load 1225
+            1227:   17(ivec4) Load 19(ballot)
+            1228: 30(int64_t) GroupNonUniformIAdd 177 PartitionedReduceNV 1226 1227
+            1229:    136(ptr) AccessChain 37(data) 1224 135 40
+                              Store 1229 1228
+            1230:      6(int) Load 8(invocation)
+            1231:    141(ptr) AccessChain 37(data) 45 135
+            1232: 31(i64vec4) Load 1231
+            1233:140(i64vec2) VectorShuffle 1232 1232 0 1
+            1234:   17(ivec4) Load 19(ballot)
+            1235:140(i64vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 1233 1234
+            1236:    141(ptr) AccessChain 37(data) 1230 135
+            1237: 31(i64vec4) Load 1236
+            1238: 31(i64vec4) VectorShuffle 1237 1235 4 5 2 3
+                              Store 1236 1238
+            1239:      6(int) Load 8(invocation)
+            1240:    141(ptr) AccessChain 37(data) 52 135
+            1241: 31(i64vec4) Load 1240
+            1242:146(i64vec3) VectorShuffle 1241 1241 0 1 2
+            1243:   17(ivec4) Load 19(ballot)
+            1244:146(i64vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 1242 1243
+            1245:    141(ptr) AccessChain 37(data) 1239 135
+            1246: 31(i64vec4) Load 1245
+            1247: 31(i64vec4) VectorShuffle 1246 1244 4 5 6 3
+                              Store 1245 1247
+            1248:      6(int) Load 8(invocation)
+            1249:    141(ptr) AccessChain 37(data) 58 135
+            1250: 31(i64vec4) Load 1249
+            1251:   17(ivec4) Load 19(ballot)
+            1252: 31(i64vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 1250 1251
+            1253:    141(ptr) AccessChain 37(data) 1248 135
+                              Store 1253 1252
+            1254:      6(int) Load 8(invocation)
+            1255:    136(ptr) AccessChain 37(data) 39 135 40
+            1256: 30(int64_t) Load 1255
+            1257:   17(ivec4) Load 19(ballot)
+            1258: 30(int64_t) GroupNonUniformIMul 177 PartitionedReduceNV 1256 1257
+            1259:    136(ptr) AccessChain 37(data) 1254 135 40
+                              Store 1259 1258
+            1260:      6(int) Load 8(invocation)
+            1261:    141(ptr) AccessChain 37(data) 45 135
+            1262: 31(i64vec4) Load 1261
+            1263:140(i64vec2) VectorShuffle 1262 1262 0 1
+            1264:   17(ivec4) Load 19(ballot)
+            1265:140(i64vec2) GroupNonUniformIMul 177 PartitionedReduceNV 1263 1264
+            1266:    141(ptr) AccessChain 37(data) 1260 135
+            1267: 31(i64vec4) Load 1266
+            1268: 31(i64vec4) VectorShuffle 1267 1265 4 5 2 3
+                              Store 1266 1268
+            1269:      6(int) Load 8(invocation)
+            1270:    141(ptr) AccessChain 37(data) 52 135
+            1271: 31(i64vec4) Load 1270
+            1272:146(i64vec3) VectorShuffle 1271 1271 0 1 2
+            1273:   17(ivec4) Load 19(ballot)
+            1274:146(i64vec3) GroupNonUniformIMul 177 PartitionedReduceNV 1272 1273
+            1275:    141(ptr) AccessChain 37(data) 1269 135
+            1276: 31(i64vec4) Load 1275
+            1277: 31(i64vec4) VectorShuffle 1276 1274 4 5 6 3
+                              Store 1275 1277
+            1278:      6(int) Load 8(invocation)
+            1279:    141(ptr) AccessChain 37(data) 58 135
+            1280: 31(i64vec4) Load 1279
+            1281:   17(ivec4) Load 19(ballot)
+            1282: 31(i64vec4) GroupNonUniformIMul 177 PartitionedReduceNV 1280 1281
+            1283:    141(ptr) AccessChain 37(data) 1278 135
+                              Store 1283 1282
+            1284:      6(int) Load 8(invocation)
+            1285:    136(ptr) AccessChain 37(data) 39 135 40
+            1286: 30(int64_t) Load 1285
+            1287:   17(ivec4) Load 19(ballot)
+            1288: 30(int64_t) GroupNonUniformUMin 177 PartitionedReduceNV 1286 1287
+            1289:    136(ptr) AccessChain 37(data) 1284 135 40
+                              Store 1289 1288
+            1290:      6(int) Load 8(invocation)
+            1291:    141(ptr) AccessChain 37(data) 45 135
+            1292: 31(i64vec4) Load 1291
+            1293:140(i64vec2) VectorShuffle 1292 1292 0 1
+            1294:   17(ivec4) Load 19(ballot)
+            1295:140(i64vec2) GroupNonUniformUMin 177 PartitionedReduceNV 1293 1294
+            1296:    141(ptr) AccessChain 37(data) 1290 135
+            1297: 31(i64vec4) Load 1296
+            1298: 31(i64vec4) VectorShuffle 1297 1295 4 5 2 3
+                              Store 1296 1298
+            1299:      6(int) Load 8(invocation)
+            1300:    141(ptr) AccessChain 37(data) 52 135
+            1301: 31(i64vec4) Load 1300
+            1302:146(i64vec3) VectorShuffle 1301 1301 0 1 2
+            1303:   17(ivec4) Load 19(ballot)
+            1304:146(i64vec3) GroupNonUniformUMin 177 PartitionedReduceNV 1302 1303
+            1305:    141(ptr) AccessChain 37(data) 1299 135
+            1306: 31(i64vec4) Load 1305
+            1307: 31(i64vec4) VectorShuffle 1306 1304 4 5 6 3
+                              Store 1305 1307
+            1308:      6(int) Load 8(invocation)
+            1309:    141(ptr) AccessChain 37(data) 58 135
+            1310: 31(i64vec4) Load 1309
+            1311:   17(ivec4) Load 19(ballot)
+            1312: 31(i64vec4) GroupNonUniformUMin 177 PartitionedReduceNV 1310 1311
+            1313:    141(ptr) AccessChain 37(data) 1308 135
+                              Store 1313 1312
+            1314:      6(int) Load 8(invocation)
+            1315:    136(ptr) AccessChain 37(data) 39 135 40
+            1316: 30(int64_t) Load 1315
+            1317:   17(ivec4) Load 19(ballot)
+            1318: 30(int64_t) GroupNonUniformUMax 177 PartitionedReduceNV 1316 1317
+            1319:    136(ptr) AccessChain 37(data) 1314 135 40
+                              Store 1319 1318
+            1320:      6(int) Load 8(invocation)
+            1321:    141(ptr) AccessChain 37(data) 45 135
+            1322: 31(i64vec4) Load 1321
+            1323:140(i64vec2) VectorShuffle 1322 1322 0 1
+            1324:   17(ivec4) Load 19(ballot)
+            1325:140(i64vec2) GroupNonUniformUMax 177 PartitionedReduceNV 1323 1324
+            1326:    141(ptr) AccessChain 37(data) 1320 135
+            1327: 31(i64vec4) Load 1326
+            1328: 31(i64vec4) VectorShuffle 1327 1325 4 5 2 3
+                              Store 1326 1328
+            1329:      6(int) Load 8(invocation)
+            1330:    141(ptr) AccessChain 37(data) 52 135
+            1331: 31(i64vec4) Load 1330
+            1332:146(i64vec3) VectorShuffle 1331 1331 0 1 2
+            1333:   17(ivec4) Load 19(ballot)
+            1334:146(i64vec3) GroupNonUniformUMax 177 PartitionedReduceNV 1332 1333
+            1335:    141(ptr) AccessChain 37(data) 1329 135
+            1336: 31(i64vec4) Load 1335
+            1337: 31(i64vec4) VectorShuffle 1336 1334 4 5 6 3
+                              Store 1335 1337
+            1338:      6(int) Load 8(invocation)
+            1339:    141(ptr) AccessChain 37(data) 58 135
+            1340: 31(i64vec4) Load 1339
+            1341:   17(ivec4) Load 19(ballot)
+            1342: 31(i64vec4) GroupNonUniformUMax 177 PartitionedReduceNV 1340 1341
+            1343:    141(ptr) AccessChain 37(data) 1338 135
+                              Store 1343 1342
+            1344:      6(int) Load 8(invocation)
+            1345:    136(ptr) AccessChain 37(data) 39 135 40
+            1346: 30(int64_t) Load 1345
+            1347:   17(ivec4) Load 19(ballot)
+            1348: 30(int64_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1346 1347
+            1349:    136(ptr) AccessChain 37(data) 1344 135 40
+                              Store 1349 1348
+            1350:      6(int) Load 8(invocation)
+            1351:    141(ptr) AccessChain 37(data) 45 135
+            1352: 31(i64vec4) Load 1351
+            1353:140(i64vec2) VectorShuffle 1352 1352 0 1
+            1354:   17(ivec4) Load 19(ballot)
+            1355:140(i64vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1353 1354
+            1356:    141(ptr) AccessChain 37(data) 1350 135
+            1357: 31(i64vec4) Load 1356
+            1358: 31(i64vec4) VectorShuffle 1357 1355 4 5 2 3
+                              Store 1356 1358
+            1359:      6(int) Load 8(invocation)
+            1360:    141(ptr) AccessChain 37(data) 52 135
+            1361: 31(i64vec4) Load 1360
+            1362:146(i64vec3) VectorShuffle 1361 1361 0 1 2
+            1363:   17(ivec4) Load 19(ballot)
+            1364:146(i64vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1362 1363
+            1365:    141(ptr) AccessChain 37(data) 1359 135
+            1366: 31(i64vec4) Load 1365
+            1367: 31(i64vec4) VectorShuffle 1366 1364 4 5 6 3
+                              Store 1365 1367
+            1368:      6(int) Load 8(invocation)
+            1369:    141(ptr) AccessChain 37(data) 58 135
+            1370: 31(i64vec4) Load 1369
+            1371:   17(ivec4) Load 19(ballot)
+            1372: 31(i64vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1370 1371
+            1373:    141(ptr) AccessChain 37(data) 1368 135
+                              Store 1373 1372
+            1374:      6(int) Load 8(invocation)
+            1375:    136(ptr) AccessChain 37(data) 39 135 40
+            1376: 30(int64_t) Load 1375
+            1377:   17(ivec4) Load 19(ballot)
+            1378: 30(int64_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1376 1377
+            1379:    136(ptr) AccessChain 37(data) 1374 135 40
+                              Store 1379 1378
+            1380:      6(int) Load 8(invocation)
+            1381:    141(ptr) AccessChain 37(data) 45 135
+            1382: 31(i64vec4) Load 1381
+            1383:140(i64vec2) VectorShuffle 1382 1382 0 1
+            1384:   17(ivec4) Load 19(ballot)
+            1385:140(i64vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1383 1384
+            1386:    141(ptr) AccessChain 37(data) 1380 135
+            1387: 31(i64vec4) Load 1386
+            1388: 31(i64vec4) VectorShuffle 1387 1385 4 5 2 3
+                              Store 1386 1388
+            1389:      6(int) Load 8(invocation)
+            1390:    141(ptr) AccessChain 37(data) 52 135
+            1391: 31(i64vec4) Load 1390
+            1392:146(i64vec3) VectorShuffle 1391 1391 0 1 2
+            1393:   17(ivec4) Load 19(ballot)
+            1394:146(i64vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1392 1393
+            1395:    141(ptr) AccessChain 37(data) 1389 135
+            1396: 31(i64vec4) Load 1395
+            1397: 31(i64vec4) VectorShuffle 1396 1394 4 5 6 3
+                              Store 1395 1397
+            1398:      6(int) Load 8(invocation)
+            1399:    141(ptr) AccessChain 37(data) 58 135
+            1400: 31(i64vec4) Load 1399
+            1401:   17(ivec4) Load 19(ballot)
+            1402: 31(i64vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1400 1401
+            1403:    141(ptr) AccessChain 37(data) 1398 135
+                              Store 1403 1402
+            1404:      6(int) Load 8(invocation)
+            1405:    136(ptr) AccessChain 37(data) 39 135 40
+            1406: 30(int64_t) Load 1405
+            1407:   17(ivec4) Load 19(ballot)
+            1408: 30(int64_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1406 1407
+            1409:    136(ptr) AccessChain 37(data) 1404 135 40
+                              Store 1409 1408
+            1410:      6(int) Load 8(invocation)
+            1411:    141(ptr) AccessChain 37(data) 45 135
+            1412: 31(i64vec4) Load 1411
+            1413:140(i64vec2) VectorShuffle 1412 1412 0 1
+            1414:   17(ivec4) Load 19(ballot)
+            1415:140(i64vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1413 1414
+            1416:    141(ptr) AccessChain 37(data) 1410 135
+            1417: 31(i64vec4) Load 1416
+            1418: 31(i64vec4) VectorShuffle 1417 1415 4 5 2 3
+                              Store 1416 1418
+            1419:      6(int) Load 8(invocation)
+            1420:    141(ptr) AccessChain 37(data) 52 135
+            1421: 31(i64vec4) Load 1420
+            1422:146(i64vec3) VectorShuffle 1421 1421 0 1 2
+            1423:   17(ivec4) Load 19(ballot)
+            1424:146(i64vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1422 1423
+            1425:    141(ptr) AccessChain 37(data) 1419 135
+            1426: 31(i64vec4) Load 1425
+            1427: 31(i64vec4) VectorShuffle 1426 1424 4 5 6 3
+                              Store 1425 1427
+            1428:      6(int) Load 8(invocation)
+            1429:    141(ptr) AccessChain 37(data) 58 135
+            1430: 31(i64vec4) Load 1429
+            1431:   17(ivec4) Load 19(ballot)
+            1432: 31(i64vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1430 1431
+            1433:    141(ptr) AccessChain 37(data) 1428 135
+                              Store 1433 1432
+            1434:      6(int) Load 8(invocation)
+            1435:    155(ptr) AccessChain 37(data) 39 154 40
+            1436:32(float16_t) Load 1435
+            1437:   17(ivec4) Load 19(ballot)
+            1438:32(float16_t) GroupNonUniformIAdd 177 PartitionedReduceNV 1436 1437
+            1439:    155(ptr) AccessChain 37(data) 1434 154 40
+                              Store 1439 1438
+            1440:      6(int) Load 8(invocation)
+            1441:    160(ptr) AccessChain 37(data) 45 154
+            1442: 33(f16vec4) Load 1441
+            1443:159(f16vec2) VectorShuffle 1442 1442 0 1
+            1444:   17(ivec4) Load 19(ballot)
+            1445:159(f16vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 1443 1444
+            1446:    160(ptr) AccessChain 37(data) 1440 154
+            1447: 33(f16vec4) Load 1446
+            1448: 33(f16vec4) VectorShuffle 1447 1445 4 5 2 3
+                              Store 1446 1448
+            1449:      6(int) Load 8(invocation)
+            1450:    160(ptr) AccessChain 37(data) 52 154
+            1451: 33(f16vec4) Load 1450
+            1452:165(f16vec3) VectorShuffle 1451 1451 0 1 2
+            1453:   17(ivec4) Load 19(ballot)
+            1454:165(f16vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 1452 1453
+            1455:    160(ptr) AccessChain 37(data) 1449 154
+            1456: 33(f16vec4) Load 1455
+            1457: 33(f16vec4) VectorShuffle 1456 1454 4 5 6 3
+                              Store 1455 1457
+            1458:      6(int) Load 8(invocation)
+            1459:    160(ptr) AccessChain 37(data) 58 154
+            1460: 33(f16vec4) Load 1459
+            1461:   17(ivec4) Load 19(ballot)
+            1462: 33(f16vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 1460 1461
+            1463:    160(ptr) AccessChain 37(data) 1458 154
+                              Store 1463 1462
+            1464:      6(int) Load 8(invocation)
+            1465:    155(ptr) AccessChain 37(data) 39 154 40
+            1466:32(float16_t) Load 1465
+            1467:   17(ivec4) Load 19(ballot)
+            1468:32(float16_t) GroupNonUniformIMul 177 PartitionedReduceNV 1466 1467
+            1469:    155(ptr) AccessChain 37(data) 1464 154 40
+                              Store 1469 1468
+            1470:      6(int) Load 8(invocation)
+            1471:    160(ptr) AccessChain 37(data) 45 154
+            1472: 33(f16vec4) Load 1471
+            1473:159(f16vec2) VectorShuffle 1472 1472 0 1
+            1474:   17(ivec4) Load 19(ballot)
+            1475:159(f16vec2) GroupNonUniformIMul 177 PartitionedReduceNV 1473 1474
+            1476:    160(ptr) AccessChain 37(data) 1470 154
+            1477: 33(f16vec4) Load 1476
+            1478: 33(f16vec4) VectorShuffle 1477 1475 4 5 2 3
+                              Store 1476 1478
+            1479:      6(int) Load 8(invocation)
+            1480:    160(ptr) AccessChain 37(data) 52 154
+            1481: 33(f16vec4) Load 1480
+            1482:165(f16vec3) VectorShuffle 1481 1481 0 1 2
+            1483:   17(ivec4) Load 19(ballot)
+            1484:165(f16vec3) GroupNonUniformIMul 177 PartitionedReduceNV 1482 1483
+            1485:    160(ptr) AccessChain 37(data) 1479 154
+            1486: 33(f16vec4) Load 1485
+            1487: 33(f16vec4) VectorShuffle 1486 1484 4 5 6 3
+                              Store 1485 1487
+            1488:      6(int) Load 8(invocation)
+            1489:    160(ptr) AccessChain 37(data) 58 154
+            1490: 33(f16vec4) Load 1489
+            1491:   17(ivec4) Load 19(ballot)
+            1492: 33(f16vec4) GroupNonUniformIMul 177 PartitionedReduceNV 1490 1491
+            1493:    160(ptr) AccessChain 37(data) 1488 154
+                              Store 1493 1492
+            1494:      6(int) Load 8(invocation)
+            1495:    155(ptr) AccessChain 37(data) 39 154 40
+            1496:32(float16_t) Load 1495
+            1497:   17(ivec4) Load 19(ballot)
+            1498:32(float16_t) GroupNonUniformSMin 177 PartitionedReduceNV 1496 1497
+            1499:    155(ptr) AccessChain 37(data) 1494 154 40
+                              Store 1499 1498
+            1500:      6(int) Load 8(invocation)
+            1501:    160(ptr) AccessChain 37(data) 45 154
+            1502: 33(f16vec4) Load 1501
+            1503:159(f16vec2) VectorShuffle 1502 1502 0 1
+            1504:   17(ivec4) Load 19(ballot)
+            1505:159(f16vec2) GroupNonUniformSMin 177 PartitionedReduceNV 1503 1504
+            1506:    160(ptr) AccessChain 37(data) 1500 154
+            1507: 33(f16vec4) Load 1506
+            1508: 33(f16vec4) VectorShuffle 1507 1505 4 5 2 3
+                              Store 1506 1508
+            1509:      6(int) Load 8(invocation)
+            1510:    160(ptr) AccessChain 37(data) 52 154
+            1511: 33(f16vec4) Load 1510
+            1512:165(f16vec3) VectorShuffle 1511 1511 0 1 2
+            1513:   17(ivec4) Load 19(ballot)
+            1514:165(f16vec3) GroupNonUniformSMin 177 PartitionedReduceNV 1512 1513
+            1515:    160(ptr) AccessChain 37(data) 1509 154
+            1516: 33(f16vec4) Load 1515
+            1517: 33(f16vec4) VectorShuffle 1516 1514 4 5 6 3
+                              Store 1515 1517
+            1518:      6(int) Load 8(invocation)
+            1519:    160(ptr) AccessChain 37(data) 58 154
+            1520: 33(f16vec4) Load 1519
+            1521:   17(ivec4) Load 19(ballot)
+            1522: 33(f16vec4) GroupNonUniformSMin 177 PartitionedReduceNV 1520 1521
+            1523:    160(ptr) AccessChain 37(data) 1518 154
+                              Store 1523 1522
+            1524:      6(int) Load 8(invocation)
+            1525:    155(ptr) AccessChain 37(data) 39 154 40
+            1526:32(float16_t) Load 1525
+            1527:   17(ivec4) Load 19(ballot)
+            1528:32(float16_t) GroupNonUniformSMax 177 PartitionedReduceNV 1526 1527
+            1529:    155(ptr) AccessChain 37(data) 1524 154 40
+                              Store 1529 1528
+            1530:      6(int) Load 8(invocation)
+            1531:    160(ptr) AccessChain 37(data) 45 154
+            1532: 33(f16vec4) Load 1531
+            1533:159(f16vec2) VectorShuffle 1532 1532 0 1
+            1534:   17(ivec4) Load 19(ballot)
+            1535:159(f16vec2) GroupNonUniformSMax 177 PartitionedReduceNV 1533 1534
+            1536:    160(ptr) AccessChain 37(data) 1530 154
+            1537: 33(f16vec4) Load 1536
+            1538: 33(f16vec4) VectorShuffle 1537 1535 4 5 2 3
+                              Store 1536 1538
+            1539:      6(int) Load 8(invocation)
+            1540:    160(ptr) AccessChain 37(data) 52 154
+            1541: 33(f16vec4) Load 1540
+            1542:165(f16vec3) VectorShuffle 1541 1541 0 1 2
+            1543:   17(ivec4) Load 19(ballot)
+            1544:165(f16vec3) GroupNonUniformSMax 177 PartitionedReduceNV 1542 1543
+            1545:    160(ptr) AccessChain 37(data) 1539 154
+            1546: 33(f16vec4) Load 1545
+            1547: 33(f16vec4) VectorShuffle 1546 1544 4 5 6 3
+                              Store 1545 1547
+            1548:      6(int) Load 8(invocation)
+            1549:    160(ptr) AccessChain 37(data) 58 154
+            1550: 33(f16vec4) Load 1549
+            1551:   17(ivec4) Load 19(ballot)
+            1552: 33(f16vec4) GroupNonUniformSMax 177 PartitionedReduceNV 1550 1551
+            1553:    160(ptr) AccessChain 37(data) 1548 154
+                              Store 1553 1552
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesPartitionedNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesPartitionedNeg.comp.out
new file mode 100644
index 0000000..c029617
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesPartitionedNeg.comp.out
@@ -0,0 +1,217 @@
+spv.subgroupExtendedTypesPartitionedNeg.comp
+ERROR: 0:27: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:30: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:35: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:38: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:39: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:40: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:42: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:43: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:44: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:45: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:47: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:48: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:49: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:50: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:52: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:53: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:54: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:55: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:57: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:58: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:59: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:60: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:62: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:63: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:64: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:65: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:67: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:68: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:69: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:70: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:72: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:73: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:74: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:75: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:77: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:78: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:79: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:80: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:82: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:83: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:84: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:85: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:87: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:88: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:89: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:90: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:92: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:93: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:94: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:95: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:97: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:98: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:99: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:100: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:102: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:103: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:104: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:105: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:107: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:108: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:109: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:110: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:112: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:113: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:114: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:115: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:117: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:118: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:119: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:120: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:122: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:123: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:124: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:125: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:127: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:128: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:129: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:130: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:132: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:133: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:134: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:135: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:137: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:138: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:139: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:140: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:142: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:143: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:144: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:145: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:147: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:148: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:149: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:150: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:152: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:153: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:154: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:155: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:157: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:158: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:159: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:160: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:162: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:163: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:164: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:165: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:167: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:168: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:169: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:170: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:172: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:173: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:174: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:175: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:177: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:178: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:179: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:180: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:182: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:183: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:184: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:185: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:187: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:188: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:189: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:190: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:192: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:193: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:194: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:195: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:197: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:198: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:199: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:200: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:202: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:203: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:204: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:205: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:207: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:208: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:209: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:210: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:212: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:213: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:214: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:215: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:217: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:218: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:219: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:220: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:222: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:223: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:224: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:225: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:227: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:228: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:229: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:230: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:232: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:233: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:234: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:235: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:237: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:238: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:239: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:240: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:242: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:243: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:244: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:245: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:247: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:248: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:249: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:250: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:252: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:253: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:254: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:255: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:257: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:258: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:259: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:260: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:262: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:263: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:264: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:265: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:267: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:268: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:269: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:270: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:272: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:273: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:274: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:275: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:277: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:278: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:279: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:280: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:282: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:283: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:284: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:285: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:287: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:288: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:289: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:290: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 212 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/spv.subgroupExtendedTypesPartitioned.comp b/Test/spv.subgroupExtendedTypesPartitioned.comp
new file mode 100644
index 0000000..382a5b3
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesPartitioned.comp
@@ -0,0 +1,291 @@
+#version 450

+

+#extension GL_NV_shader_subgroup_partitioned: enable

+#extension GL_EXT_shader_subgroup_extended_types_int8: enable

+#extension GL_EXT_shader_subgroup_extended_types_int16: enable

+#extension GL_EXT_shader_subgroup_extended_types_int64: enable

+#extension GL_EXT_shader_subgroup_extended_types_float16: enable

+

+layout (local_size_x = 8) in;

+

+layout(binding = 0) buffer Buffers

+{

+    i8vec4 i8;

+    u8vec4 u8;

+    i16vec4 i16;

+    u16vec4 u16;

+    i64vec4 i64;

+    u64vec4 u64;

+    f16vec4 f16;

+} data[4];

+

+void main()

+{

+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;

+

+    uvec4 ballot;

+    ballot = subgroupPartitionNV(data[0].i8.x);

+    ballot = subgroupPartitionNV(data[1].i8.xy);

+    ballot = subgroupPartitionNV(data[2].i8.xyz);

+    ballot = subgroupPartitionNV(data[3].i8);

+

+    ballot = subgroupPartitionNV(data[0].u8.x);

+    ballot = subgroupPartitionNV(data[1].u8.xy);

+    ballot = subgroupPartitionNV(data[2].u8.xyz);

+    ballot = subgroupPartitionNV(data[3].u8);

+

+    ballot = subgroupPartitionNV(data[0].i16.x);

+    ballot = subgroupPartitionNV(data[1].i16.xy);

+    ballot = subgroupPartitionNV(data[2].i16.xyz);

+    ballot = subgroupPartitionNV(data[3].i16);

+

+    ballot = subgroupPartitionNV(data[0].u16.x);

+    ballot = subgroupPartitionNV(data[1].u16.xy);

+    ballot = subgroupPartitionNV(data[2].u16.xyz);

+    ballot = subgroupPartitionNV(data[3].u16);

+

+    ballot = subgroupPartitionNV(data[0].i64.x);

+    ballot = subgroupPartitionNV(data[1].i64.xy);

+    ballot = subgroupPartitionNV(data[2].i64.xyz);

+    ballot = subgroupPartitionNV(data[3].i64);

+

+    ballot = subgroupPartitionNV(data[0].u64.x);

+    ballot = subgroupPartitionNV(data[1].u64.xy);

+    ballot = subgroupPartitionNV(data[2].u64.xyz);

+    ballot = subgroupPartitionNV(data[3].u64);

+

+    ballot = subgroupPartitionNV(data[0].f16.x);

+    ballot = subgroupPartitionNV(data[1].f16.xy);

+    ballot = subgroupPartitionNV(data[2].f16.xyz);

+    ballot = subgroupPartitionNV(data[3].f16);

+

+    data[invocation].i8.x   = subgroupPartitionedAddNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedAddNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedAddNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedAddNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedMulNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedMulNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedMulNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedMulNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedMinNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedMinNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedMinNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedMinNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedMaxNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedMaxNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedMaxNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedMaxNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedAndNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedAndNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedAndNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedAndNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedOrNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedOrNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedOrNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedOrNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedXorNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedXorNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedXorNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedXorNV(data[3].i8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedAddNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedAddNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedAddNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedAddNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedMulNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedMulNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedMulNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedMulNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedMinNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedMinNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedMinNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedMinNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedMaxNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedMaxNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedMaxNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedMaxNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedAndNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedAndNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedAndNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedAndNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedOrNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedOrNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedOrNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedOrNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedXorNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedXorNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedXorNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedXorNV(data[3].u8, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedAddNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedAddNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedAddNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedAddNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedMulNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedMulNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedMulNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedMulNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedMinNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedMinNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedMinNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedMinNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedMaxNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedMaxNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedMaxNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedMaxNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedAndNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedAndNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedAndNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedAndNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedOrNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedOrNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedOrNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedOrNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedXorNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedXorNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedXorNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedXorNV(data[3].i16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedAddNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedAddNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedAddNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedAddNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedMulNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedMulNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedMulNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedMulNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedMinNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedMinNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedMinNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedMinNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedMaxNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedMaxNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedMaxNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedMaxNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedAndNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedAndNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedAndNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedAndNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedOrNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedOrNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedOrNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedOrNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedXorNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedXorNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedXorNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedXorNV(data[3].u16, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedAddNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedAddNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedAddNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedAddNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedMulNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedMulNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedMulNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedMulNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedMinNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedMinNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedMinNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedMinNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedMaxNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedMaxNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedMaxNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedMaxNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedAndNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedAndNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedAndNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedAndNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedOrNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedOrNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedOrNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedOrNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedXorNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedXorNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedXorNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedXorNV(data[3].i64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedAddNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedAddNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedAddNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedAddNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedMulNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedMulNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedMulNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedMulNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedMinNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedMinNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedMinNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedMinNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedMaxNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedMaxNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedMaxNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedMaxNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedAndNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedAndNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedAndNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedAndNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedOrNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedOrNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedOrNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedOrNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedXorNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedXorNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedXorNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedXorNV(data[3].u64, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedAddNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedAddNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedAddNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedAddNV(data[3].f16, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedMulNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedMulNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedMulNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedMulNV(data[3].f16, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedMinNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedMinNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedMinNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedMinNV(data[3].f16, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedMaxNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedMaxNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedMaxNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedMaxNV(data[3].f16, ballot);

+}

diff --git a/Test/spv.subgroupExtendedTypesPartitionedNeg.comp b/Test/spv.subgroupExtendedTypesPartitionedNeg.comp
new file mode 100644
index 0000000..ad94e35
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesPartitionedNeg.comp
@@ -0,0 +1,291 @@
+#version 450

+

+#extension GL_NV_shader_subgroup_partitioned: enable

+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable

+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable

+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable

+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable

+

+layout (local_size_x = 8) in;

+

+layout(binding = 0) buffer Buffers

+{

+    i8vec4 i8;

+    u8vec4 u8;

+    i16vec4 i16;

+    u16vec4 u16;

+    i64vec4 i64;

+    u64vec4 u64;

+    f16vec4 f16;

+} data[4];

+

+void main()

+{

+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;

+

+    uvec4 ballot;

+    ballot = subgroupPartitionNV(data[0].i8.x);

+    ballot = subgroupPartitionNV(data[1].i8.xy);

+    ballot = subgroupPartitionNV(data[2].i8.xyz);

+    ballot = subgroupPartitionNV(data[3].i8);

+

+    ballot = subgroupPartitionNV(data[0].u8.x);

+    ballot = subgroupPartitionNV(data[1].u8.xy);

+    ballot = subgroupPartitionNV(data[2].u8.xyz);

+    ballot = subgroupPartitionNV(data[3].u8);

+

+    ballot = subgroupPartitionNV(data[0].i16.x);

+    ballot = subgroupPartitionNV(data[1].i16.xy);

+    ballot = subgroupPartitionNV(data[2].i16.xyz);

+    ballot = subgroupPartitionNV(data[3].i16);

+

+    ballot = subgroupPartitionNV(data[0].u16.x);

+    ballot = subgroupPartitionNV(data[1].u16.xy);

+    ballot = subgroupPartitionNV(data[2].u16.xyz);

+    ballot = subgroupPartitionNV(data[3].u16);

+

+    ballot = subgroupPartitionNV(data[0].i64.x);

+    ballot = subgroupPartitionNV(data[1].i64.xy);

+    ballot = subgroupPartitionNV(data[2].i64.xyz);

+    ballot = subgroupPartitionNV(data[3].i64);

+

+    ballot = subgroupPartitionNV(data[0].u64.x);

+    ballot = subgroupPartitionNV(data[1].u64.xy);

+    ballot = subgroupPartitionNV(data[2].u64.xyz);

+    ballot = subgroupPartitionNV(data[3].u64);

+

+    ballot = subgroupPartitionNV(data[0].f16.x);

+    ballot = subgroupPartitionNV(data[1].f16.xy);

+    ballot = subgroupPartitionNV(data[2].f16.xyz);

+    ballot = subgroupPartitionNV(data[3].f16);

+

+    data[invocation].i8.x   = subgroupPartitionedAddNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedAddNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedAddNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedAddNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedMulNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedMulNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedMulNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedMulNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedMinNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedMinNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedMinNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedMinNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedMaxNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedMaxNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedMaxNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedMaxNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedAndNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedAndNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedAndNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedAndNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedOrNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedOrNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedOrNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedOrNV(data[3].i8, ballot);

+

+    data[invocation].i8.x   = subgroupPartitionedXorNV(data[0].i8.x, ballot);

+    data[invocation].i8.xy  = subgroupPartitionedXorNV(data[1].i8.xy, ballot);

+    data[invocation].i8.xyz = subgroupPartitionedXorNV(data[2].i8.xyz, ballot);

+    data[invocation].i8     = subgroupPartitionedXorNV(data[3].i8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedAddNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedAddNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedAddNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedAddNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedMulNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedMulNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedMulNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedMulNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedMinNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedMinNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedMinNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedMinNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedMaxNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedMaxNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedMaxNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedMaxNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedAndNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedAndNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedAndNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedAndNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedOrNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedOrNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedOrNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedOrNV(data[3].u8, ballot);

+

+    data[invocation].u8.x   = subgroupPartitionedXorNV(data[0].u8.x, ballot);

+    data[invocation].u8.xy  = subgroupPartitionedXorNV(data[1].u8.xy, ballot);

+    data[invocation].u8.xyz = subgroupPartitionedXorNV(data[2].u8.xyz, ballot);

+    data[invocation].u8     = subgroupPartitionedXorNV(data[3].u8, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedAddNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedAddNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedAddNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedAddNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedMulNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedMulNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedMulNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedMulNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedMinNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedMinNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedMinNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedMinNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedMaxNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedMaxNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedMaxNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedMaxNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedAndNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedAndNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedAndNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedAndNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedOrNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedOrNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedOrNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedOrNV(data[3].i16, ballot);

+

+    data[invocation].i16.x   = subgroupPartitionedXorNV(data[0].i16.x, ballot);

+    data[invocation].i16.xy  = subgroupPartitionedXorNV(data[1].i16.xy, ballot);

+    data[invocation].i16.xyz = subgroupPartitionedXorNV(data[2].i16.xyz, ballot);

+    data[invocation].i16     = subgroupPartitionedXorNV(data[3].i16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedAddNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedAddNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedAddNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedAddNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedMulNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedMulNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedMulNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedMulNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedMinNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedMinNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedMinNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedMinNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedMaxNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedMaxNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedMaxNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedMaxNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedAndNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedAndNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedAndNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedAndNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedOrNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedOrNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedOrNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedOrNV(data[3].u16, ballot);

+

+    data[invocation].u16.x   = subgroupPartitionedXorNV(data[0].u16.x, ballot);

+    data[invocation].u16.xy  = subgroupPartitionedXorNV(data[1].u16.xy, ballot);

+    data[invocation].u16.xyz = subgroupPartitionedXorNV(data[2].u16.xyz, ballot);

+    data[invocation].u16     = subgroupPartitionedXorNV(data[3].u16, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedAddNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedAddNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedAddNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedAddNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedMulNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedMulNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedMulNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedMulNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedMinNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedMinNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedMinNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedMinNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedMaxNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedMaxNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedMaxNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedMaxNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedAndNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedAndNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedAndNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedAndNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedOrNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedOrNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedOrNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedOrNV(data[3].i64, ballot);

+

+    data[invocation].i64.x   = subgroupPartitionedXorNV(data[0].i64.x, ballot);

+    data[invocation].i64.xy  = subgroupPartitionedXorNV(data[1].i64.xy, ballot);

+    data[invocation].i64.xyz = subgroupPartitionedXorNV(data[2].i64.xyz, ballot);

+    data[invocation].i64     = subgroupPartitionedXorNV(data[3].i64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedAddNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedAddNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedAddNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedAddNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedMulNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedMulNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedMulNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedMulNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedMinNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedMinNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedMinNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedMinNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedMaxNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedMaxNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedMaxNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedMaxNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedAndNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedAndNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedAndNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedAndNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedOrNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedOrNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedOrNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedOrNV(data[3].u64, ballot);

+

+    data[invocation].u64.x   = subgroupPartitionedXorNV(data[0].u64.x, ballot);

+    data[invocation].u64.xy  = subgroupPartitionedXorNV(data[1].u64.xy, ballot);

+    data[invocation].u64.xyz = subgroupPartitionedXorNV(data[2].u64.xyz, ballot);

+    data[invocation].u64     = subgroupPartitionedXorNV(data[3].u64, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedAddNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedAddNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedAddNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedAddNV(data[3].f16, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedMulNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedMulNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedMulNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedMulNV(data[3].f16, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedMinNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedMinNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedMinNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedMinNV(data[3].f16, ballot);

+

+    data[invocation].f16.x   = subgroupPartitionedMaxNV(data[0].f16.x, ballot);

+    data[invocation].f16.xy  = subgroupPartitionedMaxNV(data[1].f16.xy, ballot);

+    data[invocation].f16.xyz = subgroupPartitionedMaxNV(data[2].f16.xyz, ballot);

+    data[invocation].f16     = subgroupPartitionedMaxNV(data[3].f16, ballot);

+}

diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp
index 6fe57ca..a49a560 100644
--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
@@ -1884,58 +1884,6 @@
 
             "bool   subgroupAll(bool);\n"
             "bool   subgroupAny(bool);\n"
-
-            "bool   subgroupAllEqual(float);\n"
-            "bool   subgroupAllEqual(vec2);\n"
-            "bool   subgroupAllEqual(vec3);\n"
-            "bool   subgroupAllEqual(vec4);\n"
-            "bool   subgroupAllEqual(int);\n"
-            "bool   subgroupAllEqual(ivec2);\n"
-            "bool   subgroupAllEqual(ivec3);\n"
-            "bool   subgroupAllEqual(ivec4);\n"
-            "bool   subgroupAllEqual(uint);\n"
-            "bool   subgroupAllEqual(uvec2);\n"
-            "bool   subgroupAllEqual(uvec3);\n"
-            "bool   subgroupAllEqual(uvec4);\n"
-            "bool   subgroupAllEqual(bool);\n"
-            "bool   subgroupAllEqual(bvec2);\n"
-            "bool   subgroupAllEqual(bvec3);\n"
-            "bool   subgroupAllEqual(bvec4);\n"
-
-            "float  subgroupBroadcast(float, uint);\n"
-            "vec2   subgroupBroadcast(vec2, uint);\n"
-            "vec3   subgroupBroadcast(vec3, uint);\n"
-            "vec4   subgroupBroadcast(vec4, uint);\n"
-            "int    subgroupBroadcast(int, uint);\n"
-            "ivec2  subgroupBroadcast(ivec2, uint);\n"
-            "ivec3  subgroupBroadcast(ivec3, uint);\n"
-            "ivec4  subgroupBroadcast(ivec4, uint);\n"
-            "uint   subgroupBroadcast(uint, uint);\n"
-            "uvec2  subgroupBroadcast(uvec2, uint);\n"
-            "uvec3  subgroupBroadcast(uvec3, uint);\n"
-            "uvec4  subgroupBroadcast(uvec4, uint);\n"
-            "bool   subgroupBroadcast(bool, uint);\n"
-            "bvec2  subgroupBroadcast(bvec2, uint);\n"
-            "bvec3  subgroupBroadcast(bvec3, uint);\n"
-            "bvec4  subgroupBroadcast(bvec4, uint);\n"
-
-            "float  subgroupBroadcastFirst(float);\n"
-            "vec2   subgroupBroadcastFirst(vec2);\n"
-            "vec3   subgroupBroadcastFirst(vec3);\n"
-            "vec4   subgroupBroadcastFirst(vec4);\n"
-            "int    subgroupBroadcastFirst(int);\n"
-            "ivec2  subgroupBroadcastFirst(ivec2);\n"
-            "ivec3  subgroupBroadcastFirst(ivec3);\n"
-            "ivec4  subgroupBroadcastFirst(ivec4);\n"
-            "uint   subgroupBroadcastFirst(uint);\n"
-            "uvec2  subgroupBroadcastFirst(uvec2);\n"
-            "uvec3  subgroupBroadcastFirst(uvec3);\n"
-            "uvec4  subgroupBroadcastFirst(uvec4);\n"
-            "bool   subgroupBroadcastFirst(bool);\n"
-            "bvec2  subgroupBroadcastFirst(bvec2);\n"
-            "bvec3  subgroupBroadcastFirst(bvec3);\n"
-            "bvec4  subgroupBroadcastFirst(bvec4);\n"
-
             "uvec4  subgroupBallot(bool);\n"
             "bool   subgroupInverseBallot(uvec4);\n"
             "bool   subgroupBallotBitExtract(uvec4, uint);\n"
@@ -1945,2196 +1893,132 @@
             "uint   subgroupBallotFindLSB(uvec4);\n"
             "uint   subgroupBallotFindMSB(uvec4);\n"
 
-            "float  subgroupShuffle(float, uint);\n"
-            "vec2   subgroupShuffle(vec2, uint);\n"
-            "vec3   subgroupShuffle(vec3, uint);\n"
-            "vec4   subgroupShuffle(vec4, uint);\n"
-            "int    subgroupShuffle(int, uint);\n"
-            "ivec2  subgroupShuffle(ivec2, uint);\n"
-            "ivec3  subgroupShuffle(ivec3, uint);\n"
-            "ivec4  subgroupShuffle(ivec4, uint);\n"
-            "uint   subgroupShuffle(uint, uint);\n"
-            "uvec2  subgroupShuffle(uvec2, uint);\n"
-            "uvec3  subgroupShuffle(uvec3, uint);\n"
-            "uvec4  subgroupShuffle(uvec4, uint);\n"
-            "bool   subgroupShuffle(bool, uint);\n"
-            "bvec2  subgroupShuffle(bvec2, uint);\n"
-            "bvec3  subgroupShuffle(bvec3, uint);\n"
-            "bvec4  subgroupShuffle(bvec4, uint);\n"
+            );
 
-            "float  subgroupShuffleXor(float, uint);\n"
-            "vec2   subgroupShuffleXor(vec2, uint);\n"
-            "vec3   subgroupShuffleXor(vec3, uint);\n"
-            "vec4   subgroupShuffleXor(vec4, uint);\n"
-            "int    subgroupShuffleXor(int, uint);\n"
-            "ivec2  subgroupShuffleXor(ivec2, uint);\n"
-            "ivec3  subgroupShuffleXor(ivec3, uint);\n"
-            "ivec4  subgroupShuffleXor(ivec4, uint);\n"
-            "uint   subgroupShuffleXor(uint, uint);\n"
-            "uvec2  subgroupShuffleXor(uvec2, uint);\n"
-            "uvec3  subgroupShuffleXor(uvec3, uint);\n"
-            "uvec4  subgroupShuffleXor(uvec4, uint);\n"
-            "bool   subgroupShuffleXor(bool, uint);\n"
-            "bvec2  subgroupShuffleXor(bvec2, uint);\n"
-            "bvec3  subgroupShuffleXor(bvec3, uint);\n"
-            "bvec4  subgroupShuffleXor(bvec4, uint);\n"
-
-            "float  subgroupShuffleUp(float, uint delta);\n"
-            "vec2   subgroupShuffleUp(vec2, uint delta);\n"
-            "vec3   subgroupShuffleUp(vec3, uint delta);\n"
-            "vec4   subgroupShuffleUp(vec4, uint delta);\n"
-            "int    subgroupShuffleUp(int, uint delta);\n"
-            "ivec2  subgroupShuffleUp(ivec2, uint delta);\n"
-            "ivec3  subgroupShuffleUp(ivec3, uint delta);\n"
-            "ivec4  subgroupShuffleUp(ivec4, uint delta);\n"
-            "uint   subgroupShuffleUp(uint, uint delta);\n"
-            "uvec2  subgroupShuffleUp(uvec2, uint delta);\n"
-            "uvec3  subgroupShuffleUp(uvec3, uint delta);\n"
-            "uvec4  subgroupShuffleUp(uvec4, uint delta);\n"
-            "bool   subgroupShuffleUp(bool, uint delta);\n"
-            "bvec2  subgroupShuffleUp(bvec2, uint delta);\n"
-            "bvec3  subgroupShuffleUp(bvec3, uint delta);\n"
-            "bvec4  subgroupShuffleUp(bvec4, uint delta);\n"
-
-            "float  subgroupShuffleDown(float, uint delta);\n"
-            "vec2   subgroupShuffleDown(vec2, uint delta);\n"
-            "vec3   subgroupShuffleDown(vec3, uint delta);\n"
-            "vec4   subgroupShuffleDown(vec4, uint delta);\n"
-            "int    subgroupShuffleDown(int, uint delta);\n"
-            "ivec2  subgroupShuffleDown(ivec2, uint delta);\n"
-            "ivec3  subgroupShuffleDown(ivec3, uint delta);\n"
-            "ivec4  subgroupShuffleDown(ivec4, uint delta);\n"
-            "uint   subgroupShuffleDown(uint, uint delta);\n"
-            "uvec2  subgroupShuffleDown(uvec2, uint delta);\n"
-            "uvec3  subgroupShuffleDown(uvec3, uint delta);\n"
-            "uvec4  subgroupShuffleDown(uvec4, uint delta);\n"
-            "bool   subgroupShuffleDown(bool, uint delta);\n"
-            "bvec2  subgroupShuffleDown(bvec2, uint delta);\n"
-            "bvec3  subgroupShuffleDown(bvec3, uint delta);\n"
-            "bvec4  subgroupShuffleDown(bvec4, uint delta);\n"
-
-            "float  subgroupAdd(float);\n"
-            "vec2   subgroupAdd(vec2);\n"
-            "vec3   subgroupAdd(vec3);\n"
-            "vec4   subgroupAdd(vec4);\n"
-            "int    subgroupAdd(int);\n"
-            "ivec2  subgroupAdd(ivec2);\n"
-            "ivec3  subgroupAdd(ivec3);\n"
-            "ivec4  subgroupAdd(ivec4);\n"
-            "uint   subgroupAdd(uint);\n"
-            "uvec2  subgroupAdd(uvec2);\n"
-            "uvec3  subgroupAdd(uvec3);\n"
-            "uvec4  subgroupAdd(uvec4);\n"
-
-            "float  subgroupMul(float);\n"
-            "vec2   subgroupMul(vec2);\n"
-            "vec3   subgroupMul(vec3);\n"
-            "vec4   subgroupMul(vec4);\n"
-            "int    subgroupMul(int);\n"
-            "ivec2  subgroupMul(ivec2);\n"
-            "ivec3  subgroupMul(ivec3);\n"
-            "ivec4  subgroupMul(ivec4);\n"
-            "uint   subgroupMul(uint);\n"
-            "uvec2  subgroupMul(uvec2);\n"
-            "uvec3  subgroupMul(uvec3);\n"
-            "uvec4  subgroupMul(uvec4);\n"
-
-            "float  subgroupMin(float);\n"
-            "vec2   subgroupMin(vec2);\n"
-            "vec3   subgroupMin(vec3);\n"
-            "vec4   subgroupMin(vec4);\n"
-            "int    subgroupMin(int);\n"
-            "ivec2  subgroupMin(ivec2);\n"
-            "ivec3  subgroupMin(ivec3);\n"
-            "ivec4  subgroupMin(ivec4);\n"
-            "uint   subgroupMin(uint);\n"
-            "uvec2  subgroupMin(uvec2);\n"
-            "uvec3  subgroupMin(uvec3);\n"
-            "uvec4  subgroupMin(uvec4);\n"
-
-            "float  subgroupMax(float);\n"
-            "vec2   subgroupMax(vec2);\n"
-            "vec3   subgroupMax(vec3);\n"
-            "vec4   subgroupMax(vec4);\n"
-            "int    subgroupMax(int);\n"
-            "ivec2  subgroupMax(ivec2);\n"
-            "ivec3  subgroupMax(ivec3);\n"
-            "ivec4  subgroupMax(ivec4);\n"
-            "uint   subgroupMax(uint);\n"
-            "uvec2  subgroupMax(uvec2);\n"
-            "uvec3  subgroupMax(uvec3);\n"
-            "uvec4  subgroupMax(uvec4);\n"
-
-            "int    subgroupAnd(int);\n"
-            "ivec2  subgroupAnd(ivec2);\n"
-            "ivec3  subgroupAnd(ivec3);\n"
-            "ivec4  subgroupAnd(ivec4);\n"
-            "uint   subgroupAnd(uint);\n"
-            "uvec2  subgroupAnd(uvec2);\n"
-            "uvec3  subgroupAnd(uvec3);\n"
-            "uvec4  subgroupAnd(uvec4);\n"
-            "bool   subgroupAnd(bool);\n"
-            "bvec2  subgroupAnd(bvec2);\n"
-            "bvec3  subgroupAnd(bvec3);\n"
-            "bvec4  subgroupAnd(bvec4);\n"
-
-            "int    subgroupOr(int);\n"
-            "ivec2  subgroupOr(ivec2);\n"
-            "ivec3  subgroupOr(ivec3);\n"
-            "ivec4  subgroupOr(ivec4);\n"
-            "uint   subgroupOr(uint);\n"
-            "uvec2  subgroupOr(uvec2);\n"
-            "uvec3  subgroupOr(uvec3);\n"
-            "uvec4  subgroupOr(uvec4);\n"
-            "bool   subgroupOr(bool);\n"
-            "bvec2  subgroupOr(bvec2);\n"
-            "bvec3  subgroupOr(bvec3);\n"
-            "bvec4  subgroupOr(bvec4);\n"
-
-            "int    subgroupXor(int);\n"
-            "ivec2  subgroupXor(ivec2);\n"
-            "ivec3  subgroupXor(ivec3);\n"
-            "ivec4  subgroupXor(ivec4);\n"
-            "uint   subgroupXor(uint);\n"
-            "uvec2  subgroupXor(uvec2);\n"
-            "uvec3  subgroupXor(uvec3);\n"
-            "uvec4  subgroupXor(uvec4);\n"
-            "bool   subgroupXor(bool);\n"
-            "bvec2  subgroupXor(bvec2);\n"
-            "bvec3  subgroupXor(bvec3);\n"
-            "bvec4  subgroupXor(bvec4);\n"
-
-            "float  subgroupInclusiveAdd(float);\n"
-            "vec2   subgroupInclusiveAdd(vec2);\n"
-            "vec3   subgroupInclusiveAdd(vec3);\n"
-            "vec4   subgroupInclusiveAdd(vec4);\n"
-            "int    subgroupInclusiveAdd(int);\n"
-            "ivec2  subgroupInclusiveAdd(ivec2);\n"
-            "ivec3  subgroupInclusiveAdd(ivec3);\n"
-            "ivec4  subgroupInclusiveAdd(ivec4);\n"
-            "uint   subgroupInclusiveAdd(uint);\n"
-            "uvec2  subgroupInclusiveAdd(uvec2);\n"
-            "uvec3  subgroupInclusiveAdd(uvec3);\n"
-            "uvec4  subgroupInclusiveAdd(uvec4);\n"
-
-            "float  subgroupInclusiveMul(float);\n"
-            "vec2   subgroupInclusiveMul(vec2);\n"
-            "vec3   subgroupInclusiveMul(vec3);\n"
-            "vec4   subgroupInclusiveMul(vec4);\n"
-            "int    subgroupInclusiveMul(int);\n"
-            "ivec2  subgroupInclusiveMul(ivec2);\n"
-            "ivec3  subgroupInclusiveMul(ivec3);\n"
-            "ivec4  subgroupInclusiveMul(ivec4);\n"
-            "uint   subgroupInclusiveMul(uint);\n"
-            "uvec2  subgroupInclusiveMul(uvec2);\n"
-            "uvec3  subgroupInclusiveMul(uvec3);\n"
-            "uvec4  subgroupInclusiveMul(uvec4);\n"
-
-            "float  subgroupInclusiveMin(float);\n"
-            "vec2   subgroupInclusiveMin(vec2);\n"
-            "vec3   subgroupInclusiveMin(vec3);\n"
-            "vec4   subgroupInclusiveMin(vec4);\n"
-            "int    subgroupInclusiveMin(int);\n"
-            "ivec2  subgroupInclusiveMin(ivec2);\n"
-            "ivec3  subgroupInclusiveMin(ivec3);\n"
-            "ivec4  subgroupInclusiveMin(ivec4);\n"
-            "uint   subgroupInclusiveMin(uint);\n"
-            "uvec2  subgroupInclusiveMin(uvec2);\n"
-            "uvec3  subgroupInclusiveMin(uvec3);\n"
-            "uvec4  subgroupInclusiveMin(uvec4);\n"
-
-            "float  subgroupInclusiveMax(float);\n"
-            "vec2   subgroupInclusiveMax(vec2);\n"
-            "vec3   subgroupInclusiveMax(vec3);\n"
-            "vec4   subgroupInclusiveMax(vec4);\n"
-            "int    subgroupInclusiveMax(int);\n"
-            "ivec2  subgroupInclusiveMax(ivec2);\n"
-            "ivec3  subgroupInclusiveMax(ivec3);\n"
-            "ivec4  subgroupInclusiveMax(ivec4);\n"
-            "uint   subgroupInclusiveMax(uint);\n"
-            "uvec2  subgroupInclusiveMax(uvec2);\n"
-            "uvec3  subgroupInclusiveMax(uvec3);\n"
-            "uvec4  subgroupInclusiveMax(uvec4);\n"
-
-            "int    subgroupInclusiveAnd(int);\n"
-            "ivec2  subgroupInclusiveAnd(ivec2);\n"
-            "ivec3  subgroupInclusiveAnd(ivec3);\n"
-            "ivec4  subgroupInclusiveAnd(ivec4);\n"
-            "uint   subgroupInclusiveAnd(uint);\n"
-            "uvec2  subgroupInclusiveAnd(uvec2);\n"
-            "uvec3  subgroupInclusiveAnd(uvec3);\n"
-            "uvec4  subgroupInclusiveAnd(uvec4);\n"
-            "bool   subgroupInclusiveAnd(bool);\n"
-            "bvec2  subgroupInclusiveAnd(bvec2);\n"
-            "bvec3  subgroupInclusiveAnd(bvec3);\n"
-            "bvec4  subgroupInclusiveAnd(bvec4);\n"
-
-            "int    subgroupInclusiveOr(int);\n"
-            "ivec2  subgroupInclusiveOr(ivec2);\n"
-            "ivec3  subgroupInclusiveOr(ivec3);\n"
-            "ivec4  subgroupInclusiveOr(ivec4);\n"
-            "uint   subgroupInclusiveOr(uint);\n"
-            "uvec2  subgroupInclusiveOr(uvec2);\n"
-            "uvec3  subgroupInclusiveOr(uvec3);\n"
-            "uvec4  subgroupInclusiveOr(uvec4);\n"
-            "bool   subgroupInclusiveOr(bool);\n"
-            "bvec2  subgroupInclusiveOr(bvec2);\n"
-            "bvec3  subgroupInclusiveOr(bvec3);\n"
-            "bvec4  subgroupInclusiveOr(bvec4);\n"
-
-            "int    subgroupInclusiveXor(int);\n"
-            "ivec2  subgroupInclusiveXor(ivec2);\n"
-            "ivec3  subgroupInclusiveXor(ivec3);\n"
-            "ivec4  subgroupInclusiveXor(ivec4);\n"
-            "uint   subgroupInclusiveXor(uint);\n"
-            "uvec2  subgroupInclusiveXor(uvec2);\n"
-            "uvec3  subgroupInclusiveXor(uvec3);\n"
-            "uvec4  subgroupInclusiveXor(uvec4);\n"
-            "bool   subgroupInclusiveXor(bool);\n"
-            "bvec2  subgroupInclusiveXor(bvec2);\n"
-            "bvec3  subgroupInclusiveXor(bvec3);\n"
-            "bvec4  subgroupInclusiveXor(bvec4);\n"
-
-            "float  subgroupExclusiveAdd(float);\n"
-            "vec2   subgroupExclusiveAdd(vec2);\n"
-            "vec3   subgroupExclusiveAdd(vec3);\n"
-            "vec4   subgroupExclusiveAdd(vec4);\n"
-            "int    subgroupExclusiveAdd(int);\n"
-            "ivec2  subgroupExclusiveAdd(ivec2);\n"
-            "ivec3  subgroupExclusiveAdd(ivec3);\n"
-            "ivec4  subgroupExclusiveAdd(ivec4);\n"
-            "uint   subgroupExclusiveAdd(uint);\n"
-            "uvec2  subgroupExclusiveAdd(uvec2);\n"
-            "uvec3  subgroupExclusiveAdd(uvec3);\n"
-            "uvec4  subgroupExclusiveAdd(uvec4);\n"
-
-            "float  subgroupExclusiveMul(float);\n"
-            "vec2   subgroupExclusiveMul(vec2);\n"
-            "vec3   subgroupExclusiveMul(vec3);\n"
-            "vec4   subgroupExclusiveMul(vec4);\n"
-            "int    subgroupExclusiveMul(int);\n"
-            "ivec2  subgroupExclusiveMul(ivec2);\n"
-            "ivec3  subgroupExclusiveMul(ivec3);\n"
-            "ivec4  subgroupExclusiveMul(ivec4);\n"
-            "uint   subgroupExclusiveMul(uint);\n"
-            "uvec2  subgroupExclusiveMul(uvec2);\n"
-            "uvec3  subgroupExclusiveMul(uvec3);\n"
-            "uvec4  subgroupExclusiveMul(uvec4);\n"
-
-            "float  subgroupExclusiveMin(float);\n"
-            "vec2   subgroupExclusiveMin(vec2);\n"
-            "vec3   subgroupExclusiveMin(vec3);\n"
-            "vec4   subgroupExclusiveMin(vec4);\n"
-            "int    subgroupExclusiveMin(int);\n"
-            "ivec2  subgroupExclusiveMin(ivec2);\n"
-            "ivec3  subgroupExclusiveMin(ivec3);\n"
-            "ivec4  subgroupExclusiveMin(ivec4);\n"
-            "uint   subgroupExclusiveMin(uint);\n"
-            "uvec2  subgroupExclusiveMin(uvec2);\n"
-            "uvec3  subgroupExclusiveMin(uvec3);\n"
-            "uvec4  subgroupExclusiveMin(uvec4);\n"
-
-            "float  subgroupExclusiveMax(float);\n"
-            "vec2   subgroupExclusiveMax(vec2);\n"
-            "vec3   subgroupExclusiveMax(vec3);\n"
-            "vec4   subgroupExclusiveMax(vec4);\n"
-            "int    subgroupExclusiveMax(int);\n"
-            "ivec2  subgroupExclusiveMax(ivec2);\n"
-            "ivec3  subgroupExclusiveMax(ivec3);\n"
-            "ivec4  subgroupExclusiveMax(ivec4);\n"
-            "uint   subgroupExclusiveMax(uint);\n"
-            "uvec2  subgroupExclusiveMax(uvec2);\n"
-            "uvec3  subgroupExclusiveMax(uvec3);\n"
-            "uvec4  subgroupExclusiveMax(uvec4);\n"
-
-            "int    subgroupExclusiveAnd(int);\n"
-            "ivec2  subgroupExclusiveAnd(ivec2);\n"
-            "ivec3  subgroupExclusiveAnd(ivec3);\n"
-            "ivec4  subgroupExclusiveAnd(ivec4);\n"
-            "uint   subgroupExclusiveAnd(uint);\n"
-            "uvec2  subgroupExclusiveAnd(uvec2);\n"
-            "uvec3  subgroupExclusiveAnd(uvec3);\n"
-            "uvec4  subgroupExclusiveAnd(uvec4);\n"
-            "bool   subgroupExclusiveAnd(bool);\n"
-            "bvec2  subgroupExclusiveAnd(bvec2);\n"
-            "bvec3  subgroupExclusiveAnd(bvec3);\n"
-            "bvec4  subgroupExclusiveAnd(bvec4);\n"
-
-            "int    subgroupExclusiveOr(int);\n"
-            "ivec2  subgroupExclusiveOr(ivec2);\n"
-            "ivec3  subgroupExclusiveOr(ivec3);\n"
-            "ivec4  subgroupExclusiveOr(ivec4);\n"
-            "uint   subgroupExclusiveOr(uint);\n"
-            "uvec2  subgroupExclusiveOr(uvec2);\n"
-            "uvec3  subgroupExclusiveOr(uvec3);\n"
-            "uvec4  subgroupExclusiveOr(uvec4);\n"
-            "bool   subgroupExclusiveOr(bool);\n"
-            "bvec2  subgroupExclusiveOr(bvec2);\n"
-            "bvec3  subgroupExclusiveOr(bvec3);\n"
-            "bvec4  subgroupExclusiveOr(bvec4);\n"
-
-            "int    subgroupExclusiveXor(int);\n"
-            "ivec2  subgroupExclusiveXor(ivec2);\n"
-            "ivec3  subgroupExclusiveXor(ivec3);\n"
-            "ivec4  subgroupExclusiveXor(ivec4);\n"
-            "uint   subgroupExclusiveXor(uint);\n"
-            "uvec2  subgroupExclusiveXor(uvec2);\n"
-            "uvec3  subgroupExclusiveXor(uvec3);\n"
-            "uvec4  subgroupExclusiveXor(uvec4);\n"
-            "bool   subgroupExclusiveXor(bool);\n"
-            "bvec2  subgroupExclusiveXor(bvec2);\n"
-            "bvec3  subgroupExclusiveXor(bvec3);\n"
-            "bvec4  subgroupExclusiveXor(bvec4);\n"
-
-            "float  subgroupClusteredAdd(float, uint);\n"
-            "vec2   subgroupClusteredAdd(vec2, uint);\n"
-            "vec3   subgroupClusteredAdd(vec3, uint);\n"
-            "vec4   subgroupClusteredAdd(vec4, uint);\n"
-            "int    subgroupClusteredAdd(int, uint);\n"
-            "ivec2  subgroupClusteredAdd(ivec2, uint);\n"
-            "ivec3  subgroupClusteredAdd(ivec3, uint);\n"
-            "ivec4  subgroupClusteredAdd(ivec4, uint);\n"
-            "uint   subgroupClusteredAdd(uint, uint);\n"
-            "uvec2  subgroupClusteredAdd(uvec2, uint);\n"
-            "uvec3  subgroupClusteredAdd(uvec3, uint);\n"
-            "uvec4  subgroupClusteredAdd(uvec4, uint);\n"
-
-            "float  subgroupClusteredMul(float, uint);\n"
-            "vec2   subgroupClusteredMul(vec2, uint);\n"
-            "vec3   subgroupClusteredMul(vec3, uint);\n"
-            "vec4   subgroupClusteredMul(vec4, uint);\n"
-            "int    subgroupClusteredMul(int, uint);\n"
-            "ivec2  subgroupClusteredMul(ivec2, uint);\n"
-            "ivec3  subgroupClusteredMul(ivec3, uint);\n"
-            "ivec4  subgroupClusteredMul(ivec4, uint);\n"
-            "uint   subgroupClusteredMul(uint, uint);\n"
-            "uvec2  subgroupClusteredMul(uvec2, uint);\n"
-            "uvec3  subgroupClusteredMul(uvec3, uint);\n"
-            "uvec4  subgroupClusteredMul(uvec4, uint);\n"
-
-            "float  subgroupClusteredMin(float, uint);\n"
-            "vec2   subgroupClusteredMin(vec2, uint);\n"
-            "vec3   subgroupClusteredMin(vec3, uint);\n"
-            "vec4   subgroupClusteredMin(vec4, uint);\n"
-            "int    subgroupClusteredMin(int, uint);\n"
-            "ivec2  subgroupClusteredMin(ivec2, uint);\n"
-            "ivec3  subgroupClusteredMin(ivec3, uint);\n"
-            "ivec4  subgroupClusteredMin(ivec4, uint);\n"
-            "uint   subgroupClusteredMin(uint, uint);\n"
-            "uvec2  subgroupClusteredMin(uvec2, uint);\n"
-            "uvec3  subgroupClusteredMin(uvec3, uint);\n"
-            "uvec4  subgroupClusteredMin(uvec4, uint);\n"
-
-            "float  subgroupClusteredMax(float, uint);\n"
-            "vec2   subgroupClusteredMax(vec2, uint);\n"
-            "vec3   subgroupClusteredMax(vec3, uint);\n"
-            "vec4   subgroupClusteredMax(vec4, uint);\n"
-            "int    subgroupClusteredMax(int, uint);\n"
-            "ivec2  subgroupClusteredMax(ivec2, uint);\n"
-            "ivec3  subgroupClusteredMax(ivec3, uint);\n"
-            "ivec4  subgroupClusteredMax(ivec4, uint);\n"
-            "uint   subgroupClusteredMax(uint, uint);\n"
-            "uvec2  subgroupClusteredMax(uvec2, uint);\n"
-            "uvec3  subgroupClusteredMax(uvec3, uint);\n"
-            "uvec4  subgroupClusteredMax(uvec4, uint);\n"
-
-            "int    subgroupClusteredAnd(int, uint);\n"
-            "ivec2  subgroupClusteredAnd(ivec2, uint);\n"
-            "ivec3  subgroupClusteredAnd(ivec3, uint);\n"
-            "ivec4  subgroupClusteredAnd(ivec4, uint);\n"
-            "uint   subgroupClusteredAnd(uint, uint);\n"
-            "uvec2  subgroupClusteredAnd(uvec2, uint);\n"
-            "uvec3  subgroupClusteredAnd(uvec3, uint);\n"
-            "uvec4  subgroupClusteredAnd(uvec4, uint);\n"
-            "bool   subgroupClusteredAnd(bool, uint);\n"
-            "bvec2  subgroupClusteredAnd(bvec2, uint);\n"
-            "bvec3  subgroupClusteredAnd(bvec3, uint);\n"
-            "bvec4  subgroupClusteredAnd(bvec4, uint);\n"
-
-            "int    subgroupClusteredOr(int, uint);\n"
-            "ivec2  subgroupClusteredOr(ivec2, uint);\n"
-            "ivec3  subgroupClusteredOr(ivec3, uint);\n"
-            "ivec4  subgroupClusteredOr(ivec4, uint);\n"
-            "uint   subgroupClusteredOr(uint, uint);\n"
-            "uvec2  subgroupClusteredOr(uvec2, uint);\n"
-            "uvec3  subgroupClusteredOr(uvec3, uint);\n"
-            "uvec4  subgroupClusteredOr(uvec4, uint);\n"
-            "bool   subgroupClusteredOr(bool, uint);\n"
-            "bvec2  subgroupClusteredOr(bvec2, uint);\n"
-            "bvec3  subgroupClusteredOr(bvec3, uint);\n"
-            "bvec4  subgroupClusteredOr(bvec4, uint);\n"
-
-            "int    subgroupClusteredXor(int, uint);\n"
-            "ivec2  subgroupClusteredXor(ivec2, uint);\n"
-            "ivec3  subgroupClusteredXor(ivec3, uint);\n"
-            "ivec4  subgroupClusteredXor(ivec4, uint);\n"
-            "uint   subgroupClusteredXor(uint, uint);\n"
-            "uvec2  subgroupClusteredXor(uvec2, uint);\n"
-            "uvec3  subgroupClusteredXor(uvec3, uint);\n"
-            "uvec4  subgroupClusteredXor(uvec4, uint);\n"
-            "bool   subgroupClusteredXor(bool, uint);\n"
-            "bvec2  subgroupClusteredXor(bvec2, uint);\n"
-            "bvec3  subgroupClusteredXor(bvec3, uint);\n"
-            "bvec4  subgroupClusteredXor(bvec4, uint);\n"
-
-            "float  subgroupQuadBroadcast(float, uint);\n"
-            "vec2   subgroupQuadBroadcast(vec2, uint);\n"
-            "vec3   subgroupQuadBroadcast(vec3, uint);\n"
-            "vec4   subgroupQuadBroadcast(vec4, uint);\n"
-            "int    subgroupQuadBroadcast(int, uint);\n"
-            "ivec2  subgroupQuadBroadcast(ivec2, uint);\n"
-            "ivec3  subgroupQuadBroadcast(ivec3, uint);\n"
-            "ivec4  subgroupQuadBroadcast(ivec4, uint);\n"
-            "uint   subgroupQuadBroadcast(uint, uint);\n"
-            "uvec2  subgroupQuadBroadcast(uvec2, uint);\n"
-            "uvec3  subgroupQuadBroadcast(uvec3, uint);\n"
-            "uvec4  subgroupQuadBroadcast(uvec4, uint);\n"
-            "bool   subgroupQuadBroadcast(bool, uint);\n"
-            "bvec2  subgroupQuadBroadcast(bvec2, uint);\n"
-            "bvec3  subgroupQuadBroadcast(bvec3, uint);\n"
-            "bvec4  subgroupQuadBroadcast(bvec4, uint);\n"
-
-            "float  subgroupQuadSwapHorizontal(float);\n"
-            "vec2   subgroupQuadSwapHorizontal(vec2);\n"
-            "vec3   subgroupQuadSwapHorizontal(vec3);\n"
-            "vec4   subgroupQuadSwapHorizontal(vec4);\n"
-            "int    subgroupQuadSwapHorizontal(int);\n"
-            "ivec2  subgroupQuadSwapHorizontal(ivec2);\n"
-            "ivec3  subgroupQuadSwapHorizontal(ivec3);\n"
-            "ivec4  subgroupQuadSwapHorizontal(ivec4);\n"
-            "uint   subgroupQuadSwapHorizontal(uint);\n"
-            "uvec2  subgroupQuadSwapHorizontal(uvec2);\n"
-            "uvec3  subgroupQuadSwapHorizontal(uvec3);\n"
-            "uvec4  subgroupQuadSwapHorizontal(uvec4);\n"
-            "bool   subgroupQuadSwapHorizontal(bool);\n"
-            "bvec2  subgroupQuadSwapHorizontal(bvec2);\n"
-            "bvec3  subgroupQuadSwapHorizontal(bvec3);\n"
-            "bvec4  subgroupQuadSwapHorizontal(bvec4);\n"
-
-            "float  subgroupQuadSwapVertical(float);\n"
-            "vec2   subgroupQuadSwapVertical(vec2);\n"
-            "vec3   subgroupQuadSwapVertical(vec3);\n"
-            "vec4   subgroupQuadSwapVertical(vec4);\n"
-            "int    subgroupQuadSwapVertical(int);\n"
-            "ivec2  subgroupQuadSwapVertical(ivec2);\n"
-            "ivec3  subgroupQuadSwapVertical(ivec3);\n"
-            "ivec4  subgroupQuadSwapVertical(ivec4);\n"
-            "uint   subgroupQuadSwapVertical(uint);\n"
-            "uvec2  subgroupQuadSwapVertical(uvec2);\n"
-            "uvec3  subgroupQuadSwapVertical(uvec3);\n"
-            "uvec4  subgroupQuadSwapVertical(uvec4);\n"
-            "bool   subgroupQuadSwapVertical(bool);\n"
-            "bvec2  subgroupQuadSwapVertical(bvec2);\n"
-            "bvec3  subgroupQuadSwapVertical(bvec3);\n"
-            "bvec4  subgroupQuadSwapVertical(bvec4);\n"
-
-            "float  subgroupQuadSwapDiagonal(float);\n"
-            "vec2   subgroupQuadSwapDiagonal(vec2);\n"
-            "vec3   subgroupQuadSwapDiagonal(vec3);\n"
-            "vec4   subgroupQuadSwapDiagonal(vec4);\n"
-            "int    subgroupQuadSwapDiagonal(int);\n"
-            "ivec2  subgroupQuadSwapDiagonal(ivec2);\n"
-            "ivec3  subgroupQuadSwapDiagonal(ivec3);\n"
-            "ivec4  subgroupQuadSwapDiagonal(ivec4);\n"
-            "uint   subgroupQuadSwapDiagonal(uint);\n"
-            "uvec2  subgroupQuadSwapDiagonal(uvec2);\n"
-            "uvec3  subgroupQuadSwapDiagonal(uvec3);\n"
-            "uvec4  subgroupQuadSwapDiagonal(uvec4);\n"
-            "bool   subgroupQuadSwapDiagonal(bool);\n"
-            "bvec2  subgroupQuadSwapDiagonal(bvec2);\n"
-            "bvec3  subgroupQuadSwapDiagonal(bvec3);\n"
-            "bvec4  subgroupQuadSwapDiagonal(bvec4);\n");
-
-            // GL_EXT_shader_subgroup_extended_types
-        commonBuiltins.append(
-            "bool      subgroupAllEqual(int8_t);\n"
-            "bool      subgroupAllEqual(i8vec2);\n"
-            "bool      subgroupAllEqual(i8vec3);\n"
-            "bool      subgroupAllEqual(i8vec4);\n"
-            "bool      subgroupAllEqual(uint8_t);\n"
-            "bool      subgroupAllEqual(u8vec2);\n"
-            "bool      subgroupAllEqual(u8vec3);\n"
-            "bool      subgroupAllEqual(u8vec4);\n"
-
-            "int8_t    subgroupBroadcast(int8_t, uint);\n"
-            "i8vec2    subgroupBroadcast(i8vec2, uint);\n"
-            "i8vec3    subgroupBroadcast(i8vec3, uint);\n"
-            "i8vec4    subgroupBroadcast(i8vec4, uint);\n"
-            "uint8_t   subgroupBroadcast(uint8_t, uint);\n"
-            "u8vec2    subgroupBroadcast(u8vec2, uint);\n"
-            "u8vec3    subgroupBroadcast(u8vec3, uint);\n"
-            "u8vec4    subgroupBroadcast(u8vec4, uint);\n"
-
-            "int8_t    subgroupBroadcastFirst(int8_t);\n"
-            "i8vec2    subgroupBroadcastFirst(i8vec2);\n"
-            "i8vec3    subgroupBroadcastFirst(i8vec3);\n"
-            "i8vec4    subgroupBroadcastFirst(i8vec4);\n"
-            "uint8_t   subgroupBroadcastFirst(uint8_t);\n"
-            "u8vec2    subgroupBroadcastFirst(u8vec2);\n"
-            "u8vec3    subgroupBroadcastFirst(u8vec3);\n"
-            "u8vec4    subgroupBroadcastFirst(u8vec4);\n"
-
-            "int8_t    subgroupShuffle(int8_t, uint);\n"
-            "i8vec2    subgroupShuffle(i8vec2, uint);\n"
-            "i8vec3    subgroupShuffle(i8vec3, uint);\n"
-            "i8vec4    subgroupShuffle(i8vec4, uint);\n"
-            "uint8_t   subgroupShuffle(uint8_t, uint);\n"
-            "u8vec2    subgroupShuffle(u8vec2, uint);\n"
-            "u8vec3    subgroupShuffle(u8vec3, uint);\n"
-            "u8vec4    subgroupShuffle(u8vec4, uint);\n"
-
-            "int8_t    subgroupShuffleXor(int8_t, uint);\n"
-            "i8vec2    subgroupShuffleXor(i8vec2, uint);\n"
-            "i8vec3    subgroupShuffleXor(i8vec3, uint);\n"
-            "i8vec4    subgroupShuffleXor(i8vec4, uint);\n"
-            "uint8_t   subgroupShuffleXor(uint8_t, uint);\n"
-            "u8vec2    subgroupShuffleXor(u8vec2, uint);\n"
-            "u8vec3    subgroupShuffleXor(u8vec3, uint);\n"
-            "u8vec4    subgroupShuffleXor(u8vec4, uint);\n"
-
-            "int8_t    subgroupShuffleUp(int8_t, uint delta);\n"
-            "i8vec2    subgroupShuffleUp(i8vec2, uint delta);\n"
-            "i8vec3    subgroupShuffleUp(i8vec3, uint delta);\n"
-            "i8vec4    subgroupShuffleUp(i8vec4, uint delta);\n"
-            "uint8_t   subgroupShuffleUp(uint8_t, uint delta);\n"
-            "u8vec2    subgroupShuffleUp(u8vec2, uint delta);\n"
-            "u8vec3    subgroupShuffleUp(u8vec3, uint delta);\n"
-            "u8vec4    subgroupShuffleUp(u8vec4, uint delta);\n"
-
-            "int8_t    subgroupShuffleDown(int8_t, uint delta);\n"
-            "i8vec2    subgroupShuffleDown(i8vec2, uint delta);\n"
-            "i8vec3    subgroupShuffleDown(i8vec3, uint delta);\n"
-            "i8vec4    subgroupShuffleDown(i8vec4, uint delta);\n"
-            "uint8_t   subgroupShuffleDown(uint8_t, uint delta);\n"
-            "u8vec2    subgroupShuffleDown(u8vec2, uint delta);\n"
-            "u8vec3    subgroupShuffleDown(u8vec3, uint delta);\n"
-            "u8vec4    subgroupShuffleDown(u8vec4, uint delta);\n"
-
-            "int8_t    subgroupAdd(int8_t);\n"
-            "i8vec2    subgroupAdd(i8vec2);\n"
-            "i8vec3    subgroupAdd(i8vec3);\n"
-            "i8vec4    subgroupAdd(i8vec4);\n"
-            "uint8_t   subgroupAdd(uint8_t);\n"
-            "u8vec2    subgroupAdd(u8vec2);\n"
-            "u8vec3    subgroupAdd(u8vec3);\n"
-            "u8vec4    subgroupAdd(u8vec4);\n"
-
-            "int8_t    subgroupMul(int8_t);\n"
-            "i8vec2    subgroupMul(i8vec2);\n"
-            "i8vec3    subgroupMul(i8vec3);\n"
-            "i8vec4    subgroupMul(i8vec4);\n"
-            "uint8_t   subgroupMul(uint8_t);\n"
-            "u8vec2    subgroupMul(u8vec2);\n"
-            "u8vec3    subgroupMul(u8vec3);\n"
-            "u8vec4    subgroupMul(u8vec4);\n"
-
-            "int8_t    subgroupMin(int8_t);\n"
-            "i8vec2    subgroupMin(i8vec2);\n"
-            "i8vec3    subgroupMin(i8vec3);\n"
-            "i8vec4    subgroupMin(i8vec4);\n"
-            "uint8_t   subgroupMin(uint8_t);\n"
-            "u8vec2    subgroupMin(u8vec2);\n"
-            "u8vec3    subgroupMin(u8vec3);\n"
-            "u8vec4    subgroupMin(u8vec4);\n"
-
-            "int8_t    subgroupMax(int8_t);\n"
-            "i8vec2    subgroupMax(i8vec2);\n"
-            "i8vec3    subgroupMax(i8vec3);\n"
-            "i8vec4    subgroupMax(i8vec4);\n"
-            "uint8_t   subgroupMax(uint8_t);\n"
-            "u8vec2    subgroupMax(u8vec2);\n"
-            "u8vec3    subgroupMax(u8vec3);\n"
-            "u8vec4    subgroupMax(u8vec4);\n"
-
-            "int8_t    subgroupAnd(int8_t);\n"
-            "i8vec2    subgroupAnd(i8vec2);\n"
-            "i8vec3    subgroupAnd(i8vec3);\n"
-            "i8vec4    subgroupAnd(i8vec4);\n"
-            "uint8_t   subgroupAnd(uint8_t);\n"
-            "u8vec2    subgroupAnd(u8vec2);\n"
-            "u8vec3    subgroupAnd(u8vec3);\n"
-            "u8vec4    subgroupAnd(u8vec4);\n"
-
-            "int8_t    subgroupOr(int8_t);\n"
-            "i8vec2    subgroupOr(i8vec2);\n"
-            "i8vec3    subgroupOr(i8vec3);\n"
-            "i8vec4    subgroupOr(i8vec4);\n"
-            "uint8_t   subgroupOr(uint8_t);\n"
-            "u8vec2    subgroupOr(u8vec2);\n"
-            "u8vec3    subgroupOr(u8vec3);\n"
-            "u8vec4    subgroupOr(u8vec4);\n"
-
-            "int8_t    subgroupXor(int8_t);\n"
-            "i8vec2    subgroupXor(i8vec2);\n"
-            "i8vec3    subgroupXor(i8vec3);\n"
-            "i8vec4    subgroupXor(i8vec4);\n"
-            "uint8_t   subgroupXor(uint8_t);\n"
-            "u8vec2    subgroupXor(u8vec2);\n"
-            "u8vec3    subgroupXor(u8vec3);\n"
-            "u8vec4    subgroupXor(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveAdd(int8_t);\n"
-            "i8vec2    subgroupInclusiveAdd(i8vec2);\n"
-            "i8vec3    subgroupInclusiveAdd(i8vec3);\n"
-            "i8vec4    subgroupInclusiveAdd(i8vec4);\n"
-            "uint8_t   subgroupInclusiveAdd(uint8_t);\n"
-            "u8vec2    subgroupInclusiveAdd(u8vec2);\n"
-            "u8vec3    subgroupInclusiveAdd(u8vec3);\n"
-            "u8vec4    subgroupInclusiveAdd(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveMul(int8_t);\n"
-            "i8vec2    subgroupInclusiveMul(i8vec2);\n"
-            "i8vec3    subgroupInclusiveMul(i8vec3);\n"
-            "i8vec4    subgroupInclusiveMul(i8vec4);\n"
-            "uint8_t   subgroupInclusiveMul(uint8_t);\n"
-            "u8vec2    subgroupInclusiveMul(u8vec2);\n"
-            "u8vec3    subgroupInclusiveMul(u8vec3);\n"
-            "u8vec4    subgroupInclusiveMul(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveMin(int8_t);\n"
-            "i8vec2    subgroupInclusiveMin(i8vec2);\n"
-            "i8vec3    subgroupInclusiveMin(i8vec3);\n"
-            "i8vec4    subgroupInclusiveMin(i8vec4);\n"
-            "uint8_t   subgroupInclusiveMin(uint8_t);\n"
-            "u8vec2    subgroupInclusiveMin(u8vec2);\n"
-            "u8vec3    subgroupInclusiveMin(u8vec3);\n"
-            "u8vec4    subgroupInclusiveMin(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveMax(int8_t);\n"
-            "i8vec2    subgroupInclusiveMax(i8vec2);\n"
-            "i8vec3    subgroupInclusiveMax(i8vec3);\n"
-            "i8vec4    subgroupInclusiveMax(i8vec4);\n"
-            "uint8_t   subgroupInclusiveMax(uint8_t);\n"
-            "u8vec2    subgroupInclusiveMax(u8vec2);\n"
-            "u8vec3    subgroupInclusiveMax(u8vec3);\n"
-            "u8vec4    subgroupInclusiveMax(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveAnd(int8_t);\n"
-            "i8vec2    subgroupInclusiveAnd(i8vec2);\n"
-            "i8vec3    subgroupInclusiveAnd(i8vec3);\n"
-            "i8vec4    subgroupInclusiveAnd(i8vec4);\n"
-            "uint8_t   subgroupInclusiveAnd(uint8_t);\n"
-            "u8vec2    subgroupInclusiveAnd(u8vec2);\n"
-            "u8vec3    subgroupInclusiveAnd(u8vec3);\n"
-            "u8vec4    subgroupInclusiveAnd(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveOr(int8_t);\n"
-            "i8vec2    subgroupInclusiveOr(i8vec2);\n"
-            "i8vec3    subgroupInclusiveOr(i8vec3);\n"
-            "i8vec4    subgroupInclusiveOr(i8vec4);\n"
-            "uint8_t   subgroupInclusiveOr(uint8_t);\n"
-            "u8vec2    subgroupInclusiveOr(u8vec2);\n"
-            "u8vec3    subgroupInclusiveOr(u8vec3);\n"
-            "u8vec4    subgroupInclusiveOr(u8vec4);\n"
-
-            "int8_t    subgroupInclusiveXor(int8_t);\n"
-            "i8vec2    subgroupInclusiveXor(i8vec2);\n"
-            "i8vec3    subgroupInclusiveXor(i8vec3);\n"
-            "i8vec4    subgroupInclusiveXor(i8vec4);\n"
-            "uint8_t   subgroupInclusiveXor(uint8_t);\n"
-            "u8vec2    subgroupInclusiveXor(u8vec2);\n"
-            "u8vec3    subgroupInclusiveXor(u8vec3);\n"
-            "u8vec4    subgroupInclusiveXor(u8vec4);\n"
-
-            "int8_t    subgroupExclusiveAdd(int8_t);\n"
-            "i8vec2    subgroupExclusiveAdd(i8vec2);\n"
-            "i8vec3    subgroupExclusiveAdd(i8vec3);\n"
-            "i8vec4    subgroupExclusiveAdd(i8vec4);\n"
-            "uint8_t   subgroupExclusiveAdd(uint8_t);\n"
-            "u8vec2    subgroupExclusiveAdd(u8vec2);\n"
-            "u8vec3    subgroupExclusiveAdd(u8vec3);\n"
-            "u8vec4    subgroupExclusiveAdd(u8vec4);\n"
-
-            "int8_t    subgroupExclusiveMul(int8_t);\n"
-            "i8vec2    subgroupExclusiveMul(i8vec2);\n"
-            "i8vec3    subgroupExclusiveMul(i8vec3);\n"
-            "i8vec4    subgroupExclusiveMul(i8vec4);\n"
-            "uint8_t   subgroupExclusiveMul(uint8_t);\n"
-            "u8vec2    subgroupExclusiveMul(u8vec2);\n"
-            "u8vec3    subgroupExclusiveMul(u8vec3);\n"
-            "u8vec4    subgroupExclusiveMul(u8vec4);\n"
-
-            "int8_t    subgroupExclusiveMin(int8_t);\n"
-            "i8vec2    subgroupExclusiveMin(i8vec2);\n"
-            "i8vec3    subgroupExclusiveMin(i8vec3);\n"
-            "i8vec4    subgroupExclusiveMin(i8vec4);\n"
-            "uint8_t   subgroupExclusiveMin(uint8_t);\n"
-            "u8vec2    subgroupExclusiveMin(u8vec2);\n"
-            "u8vec3    subgroupExclusiveMin(u8vec3);\n"
-            "u8vec4    subgroupExclusiveMin(u8vec4);\n"
-
-            "int8_t     subgroupExclusiveMax(int8_t);\n"
-            "i8vec2     subgroupExclusiveMax(i8vec2);\n"
-            "i8vec3     subgroupExclusiveMax(i8vec3);\n"
-            "i8vec4     subgroupExclusiveMax(i8vec4);\n"
-            "uint8_t    subgroupExclusiveMax(uint8_t);\n"
-            "u8vec2     subgroupExclusiveMax(u8vec2);\n"
-            "u8vec3     subgroupExclusiveMax(u8vec3);\n"
-            "u8vec4     subgroupExclusiveMax(u8vec4);\n"
-
-            "int8_t     subgroupExclusiveAnd(int8_t);\n"
-            "i8vec2     subgroupExclusiveAnd(i8vec2);\n"
-            "i8vec3     subgroupExclusiveAnd(i8vec3);\n"
-            "i8vec4     subgroupExclusiveAnd(i8vec4);\n"
-            "uint8_t    subgroupExclusiveAnd(uint8_t);\n"
-            "u8vec2     subgroupExclusiveAnd(u8vec2);\n"
-            "u8vec3     subgroupExclusiveAnd(u8vec3);\n"
-            "u8vec4     subgroupExclusiveAnd(u8vec4);\n"
-
-            "int8_t     subgroupExclusiveOr(int8_t);\n"
-            "i8vec2     subgroupExclusiveOr(i8vec2);\n"
-            "i8vec3     subgroupExclusiveOr(i8vec3);\n"
-            "i8vec4     subgroupExclusiveOr(i8vec4);\n"
-            "uint8_t    subgroupExclusiveOr(uint8_t);\n"
-            "u8vec2     subgroupExclusiveOr(u8vec2);\n"
-            "u8vec3     subgroupExclusiveOr(u8vec3);\n"
-            "u8vec4     subgroupExclusiveOr(u8vec4);\n"
-
-            "int8_t     subgroupExclusiveXor(int8_t);\n"
-            "i8vec2     subgroupExclusiveXor(i8vec2);\n"
-            "i8vec3     subgroupExclusiveXor(i8vec3);\n"
-            "i8vec4     subgroupExclusiveXor(i8vec4);\n"
-            "uint8_t    subgroupExclusiveXor(uint8_t);\n"
-            "u8vec2     subgroupExclusiveXor(u8vec2);\n"
-            "u8vec3     subgroupExclusiveXor(u8vec3);\n"
-            "u8vec4     subgroupExclusiveXor(u8vec4);\n"
-
-            "int8_t     subgroupClusteredAdd(int8_t, uint);\n"
-            "i8vec2     subgroupClusteredAdd(i8vec2, uint);\n"
-            "i8vec3     subgroupClusteredAdd(i8vec3, uint);\n"
-            "i8vec4     subgroupClusteredAdd(i8vec4, uint);\n"
-            "uint8_t    subgroupClusteredAdd(uint8_t, uint);\n"
-            "u8vec2     subgroupClusteredAdd(u8vec2, uint);\n"
-            "u8vec3     subgroupClusteredAdd(u8vec3, uint);\n"
-            "u8vec4     subgroupClusteredAdd(u8vec4, uint);\n"
-
-            "int8_t     subgroupClusteredMul(int8_t, uint);\n"
-            "i8vec2     subgroupClusteredMul(i8vec2, uint);\n"
-            "i8vec3     subgroupClusteredMul(i8vec3, uint);\n"
-            "i8vec4     subgroupClusteredMul(i8vec4, uint);\n"
-            "uint8_t    subgroupClusteredMul(uint8_t, uint);\n"
-            "u8vec2     subgroupClusteredMul(u8vec2, uint);\n"
-            "u8vec3     subgroupClusteredMul(u8vec3, uint);\n"
-            "u8vec4     subgroupClusteredMul(u8vec4, uint);\n"
-
-            "int8_t     subgroupClusteredMin(int8_t, uint);\n"
-            "i8vec2     subgroupClusteredMin(i8vec2, uint);\n"
-            "i8vec3     subgroupClusteredMin(i8vec3, uint);\n"
-            "i8vec4     subgroupClusteredMin(i8vec4, uint);\n"
-            "uint8_t    subgroupClusteredMin(uint8_t, uint);\n"
-            "u8vec2     subgroupClusteredMin(u8vec2, uint);\n"
-            "u8vec3     subgroupClusteredMin(u8vec3, uint);\n"
-            "u8vec4     subgroupClusteredMin(u8vec4, uint);\n"
-
-            "int8_t     subgroupClusteredMax(int8_t, uint);\n"
-            "i8vec2     subgroupClusteredMax(i8vec2, uint);\n"
-            "i8vec3     subgroupClusteredMax(i8vec3, uint);\n"
-            "i8vec4     subgroupClusteredMax(i8vec4, uint);\n"
-            "uint8_t    subgroupClusteredMax(uint8_t, uint);\n"
-            "u8vec2     subgroupClusteredMax(u8vec2, uint);\n"
-            "u8vec3     subgroupClusteredMax(u8vec3, uint);\n"
-            "u8vec4     subgroupClusteredMax(u8vec4, uint);\n"
-
-            "int8_t    subgroupClusteredAnd(int8_t, uint);\n"
-            "i8vec2    subgroupClusteredAnd(i8vec2, uint);\n"
-            "i8vec3    subgroupClusteredAnd(i8vec3, uint);\n"
-            "i8vec4    subgroupClusteredAnd(i8vec4, uint);\n"
-            "uint8_t   subgroupClusteredAnd(uint8_t, uint);\n"
-            "u8vec2    subgroupClusteredAnd(u8vec2, uint);\n"
-            "u8vec3    subgroupClusteredAnd(u8vec3, uint);\n"
-            "u8vec4    subgroupClusteredAnd(u8vec4, uint);\n"
-
-            "int8_t    subgroupClusteredOr(int8_t, uint);\n"
-            "i8vec2    subgroupClusteredOr(i8vec2, uint);\n"
-            "i8vec3    subgroupClusteredOr(i8vec3, uint);\n"
-            "i8vec4    subgroupClusteredOr(i8vec4, uint);\n"
-            "uint8_t   subgroupClusteredOr(uint8_t, uint);\n"
-            "u8vec2    subgroupClusteredOr(u8vec2, uint);\n"
-            "u8vec3    subgroupClusteredOr(u8vec3, uint);\n"
-            "u8vec4    subgroupClusteredOr(u8vec4, uint);\n"
-
-            "int8_t    subgroupClusteredXor(int8_t, uint);\n"
-            "i8vec2    subgroupClusteredXor(i8vec2, uint);\n"
-            "i8vec3    subgroupClusteredXor(i8vec3, uint);\n"
-            "i8vec4    subgroupClusteredXor(i8vec4, uint);\n"
-            "uint8_t   subgroupClusteredXor(uint8_t, uint);\n"
-            "u8vec2    subgroupClusteredXor(u8vec2, uint);\n"
-            "u8vec3    subgroupClusteredXor(u8vec3, uint);\n"
-            "u8vec4    subgroupClusteredXor(u8vec4, uint);\n"
-
-            "int8_t    subgroupQuadBroadcast(int8_t, uint);\n"
-            "i8vec2    subgroupQuadBroadcast(i8vec2, uint);\n"
-            "i8vec3    subgroupQuadBroadcast(i8vec3, uint);\n"
-            "i8vec4    subgroupQuadBroadcast(i8vec4, uint);\n"
-            "uint8_t   subgroupQuadBroadcast(uint8_t, uint);\n"
-            "u8vec2    subgroupQuadBroadcast(u8vec2, uint);\n"
-            "u8vec3    subgroupQuadBroadcast(u8vec3, uint);\n"
-            "u8vec4    subgroupQuadBroadcast(u8vec4, uint);\n"
-
-            "int8_t    subgroupQuadSwapHorizontal(int8_t);\n"
-            "i8vec2    subgroupQuadSwapHorizontal(i8vec2);\n"
-            "i8vec3    subgroupQuadSwapHorizontal(i8vec3);\n"
-            "i8vec4    subgroupQuadSwapHorizontal(i8vec4);\n"
-            "uint8_t   subgroupQuadSwapHorizontal(uint8_t);\n"
-            "u8vec2    subgroupQuadSwapHorizontal(u8vec2);\n"
-            "u8vec3    subgroupQuadSwapHorizontal(u8vec3);\n"
-            "u8vec4    subgroupQuadSwapHorizontal(u8vec4);\n"
-
-            "int8_t    subgroupQuadSwapVertical(int8_t);\n"
-            "i8vec2    subgroupQuadSwapVertical(i8vec2);\n"
-            "i8vec3    subgroupQuadSwapVertical(i8vec3);\n"
-            "i8vec4    subgroupQuadSwapVertical(i8vec4);\n"
-            "uint8_t   subgroupQuadSwapVertical(uint8_t);\n"
-            "u8vec2    subgroupQuadSwapVertical(u8vec2);\n"
-            "u8vec3    subgroupQuadSwapVertical(u8vec3);\n"
-            "u8vec4    subgroupQuadSwapVertical(u8vec4);\n"
-
-            "int8_t    subgroupQuadSwapDiagonal(int8_t);\n"
-            "i8vec2    subgroupQuadSwapDiagonal(i8vec2);\n"
-            "i8vec3    subgroupQuadSwapDiagonal(i8vec3);\n"
-            "i8vec4    subgroupQuadSwapDiagonal(i8vec4);\n"
-            "uint8_t   subgroupQuadSwapDiagonal(uint8_t);\n"
-            "u8vec2    subgroupQuadSwapDiagonal(u8vec2);\n"
-            "u8vec3    subgroupQuadSwapDiagonal(u8vec3);\n"
-            "u8vec4    subgroupQuadSwapDiagonal(u8vec4);\n"
-
-            "bool      subgroupAllEqual(int16_t);\n"
-            "bool      subgroupAllEqual(i16vec2);\n"
-            "bool      subgroupAllEqual(i16vec3);\n"
-            "bool      subgroupAllEqual(i16vec4);\n"
-            "bool      subgroupAllEqual(uint16_t);\n"
-            "bool      subgroupAllEqual(u16vec2);\n"
-            "bool      subgroupAllEqual(u16vec3);\n"
-            "bool      subgroupAllEqual(u16vec4);\n"
-
-            "int16_t   subgroupBroadcast(int16_t, uint);\n"
-            "i16vec2   subgroupBroadcast(i16vec2, uint);\n"
-            "i16vec3   subgroupBroadcast(i16vec3, uint);\n"
-            "i16vec4   subgroupBroadcast(i16vec4, uint);\n"
-            "uint16_t  subgroupBroadcast(uint16_t, uint);\n"
-            "u16vec2   subgroupBroadcast(u16vec2, uint);\n"
-            "u16vec3   subgroupBroadcast(u16vec3, uint);\n"
-            "u16vec4   subgroupBroadcast(u16vec4, uint);\n"
-
-            "int16_t   subgroupBroadcastFirst(int16_t);\n"
-            "i16vec2   subgroupBroadcastFirst(i16vec2);\n"
-            "i16vec3   subgroupBroadcastFirst(i16vec3);\n"
-            "i16vec4   subgroupBroadcastFirst(i16vec4);\n"
-            "uint16_t  subgroupBroadcastFirst(uint16_t);\n"
-            "u16vec2   subgroupBroadcastFirst(u16vec2);\n"
-            "u16vec3   subgroupBroadcastFirst(u16vec3);\n"
-            "u16vec4   subgroupBroadcastFirst(u16vec4);\n"
-
-            "int16_t   subgroupShuffle(int16_t, uint);\n"
-            "i16vec2   subgroupShuffle(i16vec2, uint);\n"
-            "i16vec3   subgroupShuffle(i16vec3, uint);\n"
-            "i16vec4   subgroupShuffle(i16vec4, uint);\n"
-            "uint16_t  subgroupShuffle(uint16_t, uint);\n"
-            "u16vec2   subgroupShuffle(u16vec2, uint);\n"
-            "u16vec3   subgroupShuffle(u16vec3, uint);\n"
-            "u16vec4   subgroupShuffle(u16vec4, uint);\n"
-
-            "int16_t   subgroupShuffleXor(int16_t, uint);\n"
-            "i16vec2   subgroupShuffleXor(i16vec2, uint);\n"
-            "i16vec3   subgroupShuffleXor(i16vec3, uint);\n"
-            "i16vec4   subgroupShuffleXor(i16vec4, uint);\n"
-            "uint16_t  subgroupShuffleXor(uint16_t, uint);\n"
-            "u16vec2   subgroupShuffleXor(u16vec2, uint);\n"
-            "u16vec3   subgroupShuffleXor(u16vec3, uint);\n"
-            "u16vec4   subgroupShuffleXor(u16vec4, uint);\n"
-
-            "int16_t   subgroupShuffleUp(int16_t, uint delta);\n"
-            "i16vec2   subgroupShuffleUp(i16vec2, uint delta);\n"
-            "i16vec3   subgroupShuffleUp(i16vec3, uint delta);\n"
-            "i16vec4   subgroupShuffleUp(i16vec4, uint delta);\n"
-            "uint16_t  subgroupShuffleUp(uint16_t, uint delta);\n"
-            "u16vec2   subgroupShuffleUp(u16vec2, uint delta);\n"
-            "u16vec3   subgroupShuffleUp(u16vec3, uint delta);\n"
-            "u16vec4   subgroupShuffleUp(u16vec4, uint delta);\n"
-
-            "int16_t   subgroupShuffleDown(int16_t, uint delta);\n"
-            "i16vec2   subgroupShuffleDown(i16vec2, uint delta);\n"
-            "i16vec3   subgroupShuffleDown(i16vec3, uint delta);\n"
-            "i16vec4   subgroupShuffleDown(i16vec4, uint delta);\n"
-            "uint16_t  subgroupShuffleDown(uint16_t, uint delta);\n"
-            "u16vec2   subgroupShuffleDown(u16vec2, uint delta);\n"
-            "u16vec3   subgroupShuffleDown(u16vec3, uint delta);\n"
-            "u16vec4   subgroupShuffleDown(u16vec4, uint delta);\n"
-
-            "int16_t   subgroupAdd(int16_t);\n"
-            "i16vec2   subgroupAdd(i16vec2);\n"
-            "i16vec3   subgroupAdd(i16vec3);\n"
-            "i16vec4   subgroupAdd(i16vec4);\n"
-            "uint16_t  subgroupAdd(uint16_t);\n"
-            "u16vec2   subgroupAdd(u16vec2);\n"
-            "u16vec3   subgroupAdd(u16vec3);\n"
-            "u16vec4   subgroupAdd(u16vec4);\n"
-
-            "int16_t   subgroupMul(int16_t);\n"
-            "i16vec2   subgroupMul(i16vec2);\n"
-            "i16vec3   subgroupMul(i16vec3);\n"
-            "i16vec4   subgroupMul(i16vec4);\n"
-            "uint16_t  subgroupMul(uint16_t);\n"
-            "u16vec2   subgroupMul(u16vec2);\n"
-            "u16vec3   subgroupMul(u16vec3);\n"
-            "u16vec4   subgroupMul(u16vec4);\n"
-
-            "int16_t   subgroupMin(int16_t);\n"
-            "i16vec2   subgroupMin(i16vec2);\n"
-            "i16vec3   subgroupMin(i16vec3);\n"
-            "i16vec4   subgroupMin(i16vec4);\n"
-            "uint16_t  subgroupMin(uint16_t);\n"
-            "u16vec2   subgroupMin(u16vec2);\n"
-            "u16vec3   subgroupMin(u16vec3);\n"
-            "u16vec4   subgroupMin(u16vec4);\n"
-
-            "int16_t   subgroupMax(int16_t);\n"
-            "i16vec2   subgroupMax(i16vec2);\n"
-            "i16vec3   subgroupMax(i16vec3);\n"
-            "i16vec4   subgroupMax(i16vec4);\n"
-            "uint16_t  subgroupMax(uint16_t);\n"
-            "u16vec2   subgroupMax(u16vec2);\n"
-            "u16vec3   subgroupMax(u16vec3);\n"
-            "u16vec4   subgroupMax(u16vec4);\n"
-
-            "int16_t   subgroupAnd(int16_t);\n"
-            "i16vec2   subgroupAnd(i16vec2);\n"
-            "i16vec3   subgroupAnd(i16vec3);\n"
-            "i16vec4   subgroupAnd(i16vec4);\n"
-            "uint16_t  subgroupAnd(uint16_t);\n"
-            "u16vec2   subgroupAnd(u16vec2);\n"
-            "u16vec3   subgroupAnd(u16vec3);\n"
-            "u16vec4   subgroupAnd(u16vec4);\n"
-
-            "int16_t   subgroupOr(int16_t);\n"
-            "i16vec2   subgroupOr(i16vec2);\n"
-            "i16vec3   subgroupOr(i16vec3);\n"
-            "i16vec4   subgroupOr(i16vec4);\n"
-            "uint16_t  subgroupOr(uint16_t);\n"
-            "u16vec2   subgroupOr(u16vec2);\n"
-            "u16vec3   subgroupOr(u16vec3);\n"
-            "u16vec4   subgroupOr(u16vec4);\n"
-
-            "int16_t   subgroupXor(int16_t);\n"
-            "i16vec2   subgroupXor(i16vec2);\n"
-            "i16vec3   subgroupXor(i16vec3);\n"
-            "i16vec4   subgroupXor(i16vec4);\n"
-            "uint16_t  subgroupXor(uint16_t);\n"
-            "u16vec2   subgroupXor(u16vec2);\n"
-            "u16vec3   subgroupXor(u16vec3);\n"
-            "u16vec4   subgroupXor(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveAdd(int16_t);\n"
-            "i16vec2   subgroupInclusiveAdd(i16vec2);\n"
-            "i16vec3   subgroupInclusiveAdd(i16vec3);\n"
-            "i16vec4   subgroupInclusiveAdd(i16vec4);\n"
-            "uint16_t  subgroupInclusiveAdd(uint16_t);\n"
-            "u16vec2   subgroupInclusiveAdd(u16vec2);\n"
-            "u16vec3   subgroupInclusiveAdd(u16vec3);\n"
-            "u16vec4   subgroupInclusiveAdd(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveMul(int16_t);\n"
-            "i16vec2   subgroupInclusiveMul(i16vec2);\n"
-            "i16vec3   subgroupInclusiveMul(i16vec3);\n"
-            "i16vec4   subgroupInclusiveMul(i16vec4);\n"
-            "uint16_t  subgroupInclusiveMul(uint16_t);\n"
-            "u16vec2   subgroupInclusiveMul(u16vec2);\n"
-            "u16vec3   subgroupInclusiveMul(u16vec3);\n"
-            "u16vec4   subgroupInclusiveMul(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveMin(int16_t);\n"
-            "i16vec2   subgroupInclusiveMin(i16vec2);\n"
-            "i16vec3   subgroupInclusiveMin(i16vec3);\n"
-            "i16vec4   subgroupInclusiveMin(i16vec4);\n"
-            "uint16_t  subgroupInclusiveMin(uint16_t);\n"
-            "u16vec2   subgroupInclusiveMin(u16vec2);\n"
-            "u16vec3   subgroupInclusiveMin(u16vec3);\n"
-            "u16vec4   subgroupInclusiveMin(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveMax(int16_t);\n"
-            "i16vec2   subgroupInclusiveMax(i16vec2);\n"
-            "i16vec3   subgroupInclusiveMax(i16vec3);\n"
-            "i16vec4   subgroupInclusiveMax(i16vec4);\n"
-            "uint16_t  subgroupInclusiveMax(uint16_t);\n"
-            "u16vec2   subgroupInclusiveMax(u16vec2);\n"
-            "u16vec3   subgroupInclusiveMax(u16vec3);\n"
-            "u16vec4   subgroupInclusiveMax(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveAnd(int16_t);\n"
-            "i16vec2   subgroupInclusiveAnd(i16vec2);\n"
-            "i16vec3   subgroupInclusiveAnd(i16vec3);\n"
-            "i16vec4   subgroupInclusiveAnd(i16vec4);\n"
-            "uint16_t  subgroupInclusiveAnd(uint16_t);\n"
-            "u16vec2   subgroupInclusiveAnd(u16vec2);\n"
-            "u16vec3   subgroupInclusiveAnd(u16vec3);\n"
-            "u16vec4   subgroupInclusiveAnd(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveOr(int16_t);\n"
-            "i16vec2   subgroupInclusiveOr(i16vec2);\n"
-            "i16vec3   subgroupInclusiveOr(i16vec3);\n"
-            "i16vec4   subgroupInclusiveOr(i16vec4);\n"
-            "uint16_t  subgroupInclusiveOr(uint16_t);\n"
-            "u16vec2   subgroupInclusiveOr(u16vec2);\n"
-            "u16vec3   subgroupInclusiveOr(u16vec3);\n"
-            "u16vec4   subgroupInclusiveOr(u16vec4);\n"
-
-            "int16_t   subgroupInclusiveXor(int16_t);\n"
-            "i16vec2   subgroupInclusiveXor(i16vec2);\n"
-            "i16vec3   subgroupInclusiveXor(i16vec3);\n"
-            "i16vec4   subgroupInclusiveXor(i16vec4);\n"
-            "uint16_t  subgroupInclusiveXor(uint16_t);\n"
-            "u16vec2   subgroupInclusiveXor(u16vec2);\n"
-            "u16vec3   subgroupInclusiveXor(u16vec3);\n"
-            "u16vec4   subgroupInclusiveXor(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveAdd(int16_t);\n"
-            "i16vec2   subgroupExclusiveAdd(i16vec2);\n"
-            "i16vec3   subgroupExclusiveAdd(i16vec3);\n"
-            "i16vec4   subgroupExclusiveAdd(i16vec4);\n"
-            "uint16_t  subgroupExclusiveAdd(uint16_t);\n"
-            "u16vec2   subgroupExclusiveAdd(u16vec2);\n"
-            "u16vec3   subgroupExclusiveAdd(u16vec3);\n"
-            "u16vec4   subgroupExclusiveAdd(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveMul(int16_t);\n"
-            "i16vec2   subgroupExclusiveMul(i16vec2);\n"
-            "i16vec3   subgroupExclusiveMul(i16vec3);\n"
-            "i16vec4   subgroupExclusiveMul(i16vec4);\n"
-            "uint16_t  subgroupExclusiveMul(uint16_t);\n"
-            "u16vec2   subgroupExclusiveMul(u16vec2);\n"
-            "u16vec3   subgroupExclusiveMul(u16vec3);\n"
-            "u16vec4   subgroupExclusiveMul(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveMin(int16_t);\n"
-            "i16vec2   subgroupExclusiveMin(i16vec2);\n"
-            "i16vec3   subgroupExclusiveMin(i16vec3);\n"
-            "i16vec4   subgroupExclusiveMin(i16vec4);\n"
-            "uint16_t  subgroupExclusiveMin(uint16_t);\n"
-            "u16vec2   subgroupExclusiveMin(u16vec2);\n"
-            "u16vec3   subgroupExclusiveMin(u16vec3);\n"
-            "u16vec4   subgroupExclusiveMin(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveMax(int16_t);\n"
-            "i16vec2   subgroupExclusiveMax(i16vec2);\n"
-            "i16vec3   subgroupExclusiveMax(i16vec3);\n"
-            "i16vec4   subgroupExclusiveMax(i16vec4);\n"
-            "uint16_t  subgroupExclusiveMax(uint16_t);\n"
-            "u16vec2   subgroupExclusiveMax(u16vec2);\n"
-            "u16vec3   subgroupExclusiveMax(u16vec3);\n"
-            "u16vec4   subgroupExclusiveMax(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveAnd(int16_t);\n"
-            "i16vec2   subgroupExclusiveAnd(i16vec2);\n"
-            "i16vec3   subgroupExclusiveAnd(i16vec3);\n"
-            "i16vec4   subgroupExclusiveAnd(i16vec4);\n"
-            "uint16_t  subgroupExclusiveAnd(uint16_t);\n"
-            "u16vec2   subgroupExclusiveAnd(u16vec2);\n"
-            "u16vec3   subgroupExclusiveAnd(u16vec3);\n"
-            "u16vec4   subgroupExclusiveAnd(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveOr(int16_t);\n"
-            "i16vec2   subgroupExclusiveOr(i16vec2);\n"
-            "i16vec3   subgroupExclusiveOr(i16vec3);\n"
-            "i16vec4   subgroupExclusiveOr(i16vec4);\n"
-            "uint16_t  subgroupExclusiveOr(uint16_t);\n"
-            "u16vec2   subgroupExclusiveOr(u16vec2);\n"
-            "u16vec3   subgroupExclusiveOr(u16vec3);\n"
-            "u16vec4   subgroupExclusiveOr(u16vec4);\n"
-
-            "int16_t   subgroupExclusiveXor(int16_t);\n"
-            "i16vec2   subgroupExclusiveXor(i16vec2);\n"
-            "i16vec3   subgroupExclusiveXor(i16vec3);\n"
-            "i16vec4   subgroupExclusiveXor(i16vec4);\n"
-            "uint16_t  subgroupExclusiveXor(uint16_t);\n"
-            "u16vec2   subgroupExclusiveXor(u16vec2);\n"
-            "u16vec3   subgroupExclusiveXor(u16vec3);\n"
-            "u16vec4   subgroupExclusiveXor(u16vec4);\n"
-
-            "int16_t   subgroupClusteredAdd(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredAdd(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredAdd(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredAdd(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredAdd(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredAdd(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredAdd(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredAdd(u16vec4, uint);\n"
-
-            "int16_t   subgroupClusteredMul(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredMul(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredMul(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredMul(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredMul(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredMul(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredMul(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredMul(u16vec4, uint);\n"
-
-            "int16_t   subgroupClusteredMin(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredMin(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredMin(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredMin(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredMin(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredMin(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredMin(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredMin(u16vec4, uint);\n"
-
-            "int16_t   subgroupClusteredMax(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredMax(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredMax(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredMax(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredMax(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredMax(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredMax(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredMax(u16vec4, uint);\n"
-
-            "int16_t   subgroupClusteredAnd(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredAnd(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredAnd(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredAnd(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredAnd(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredAnd(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredAnd(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredAnd(u16vec4, uint);\n"
-
-            "int16_t   subgroupClusteredOr(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredOr(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredOr(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredOr(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredOr(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredOr(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredOr(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredOr(u16vec4, uint);\n"
-
-            "int16_t   subgroupClusteredXor(int16_t, uint);\n"
-            "i16vec2   subgroupClusteredXor(i16vec2, uint);\n"
-            "i16vec3   subgroupClusteredXor(i16vec3, uint);\n"
-            "i16vec4   subgroupClusteredXor(i16vec4, uint);\n"
-            "uint16_t  subgroupClusteredXor(uint16_t, uint);\n"
-            "u16vec2   subgroupClusteredXor(u16vec2, uint);\n"
-            "u16vec3   subgroupClusteredXor(u16vec3, uint);\n"
-            "u16vec4   subgroupClusteredXor(u16vec4, uint);\n"
-
-            "int16_t   subgroupQuadBroadcast(int16_t, uint);\n"
-            "i16vec2   subgroupQuadBroadcast(i16vec2, uint);\n"
-            "i16vec3   subgroupQuadBroadcast(i16vec3, uint);\n"
-            "i16vec4   subgroupQuadBroadcast(i16vec4, uint);\n"
-            "uint16_t  subgroupQuadBroadcast(uint16_t, uint);\n"
-            "u16vec2   subgroupQuadBroadcast(u16vec2, uint);\n"
-            "u16vec3   subgroupQuadBroadcast(u16vec3, uint);\n"
-            "u16vec4   subgroupQuadBroadcast(u16vec4, uint);\n"
-
-            "int16_t   subgroupQuadSwapHorizontal(int16_t);\n"
-            "i16vec2   subgroupQuadSwapHorizontal(i16vec2);\n"
-            "i16vec3   subgroupQuadSwapHorizontal(i16vec3);\n"
-            "i16vec4   subgroupQuadSwapHorizontal(i16vec4);\n"
-            "uint16_t  subgroupQuadSwapHorizontal(uint16_t);\n"
-            "u16vec2   subgroupQuadSwapHorizontal(u16vec2);\n"
-            "u16vec3   subgroupQuadSwapHorizontal(u16vec3);\n"
-            "u16vec4   subgroupQuadSwapHorizontal(u16vec4);\n"
-
-            "int16_t   subgroupQuadSwapVertical(int16_t);\n"
-            "i16vec2   subgroupQuadSwapVertical(i16vec2);\n"
-            "i16vec3   subgroupQuadSwapVertical(i16vec3);\n"
-            "i16vec4   subgroupQuadSwapVertical(i16vec4);\n"
-            "uint16_t  subgroupQuadSwapVertical(uint16_t);\n"
-            "u16vec2   subgroupQuadSwapVertical(u16vec2);\n"
-            "u16vec3   subgroupQuadSwapVertical(u16vec3);\n"
-            "u16vec4   subgroupQuadSwapVertical(u16vec4);\n"
-
-            "int16_t   subgroupQuadSwapDiagonal(int16_t);\n"
-            "i16vec2   subgroupQuadSwapDiagonal(i16vec2);\n"
-            "i16vec3   subgroupQuadSwapDiagonal(i16vec3);\n"
-            "i16vec4   subgroupQuadSwapDiagonal(i16vec4);\n"
-            "uint16_t  subgroupQuadSwapDiagonal(uint16_t);\n"
-            "u16vec2   subgroupQuadSwapDiagonal(u16vec2);\n"
-            "u16vec3   subgroupQuadSwapDiagonal(u16vec3);\n"
-            "u16vec4   subgroupQuadSwapDiagonal(u16vec4);\n"
-
-            "bool      subgroupAllEqual(int64_t);\n"
-            "bool      subgroupAllEqual(i64vec2);\n"
-            "bool      subgroupAllEqual(i64vec3);\n"
-            "bool      subgroupAllEqual(i64vec4);\n"
-            "bool      subgroupAllEqual(uint64_t);\n"
-            "bool      subgroupAllEqual(u64vec2);\n"
-            "bool      subgroupAllEqual(u64vec3);\n"
-            "bool      subgroupAllEqual(u64vec4);\n"
-
-            "int64_t   subgroupBroadcast(int64_t, uint);\n"
-            "i64vec2   subgroupBroadcast(i64vec2, uint);\n"
-            "i64vec3   subgroupBroadcast(i64vec3, uint);\n"
-            "i64vec4   subgroupBroadcast(i64vec4, uint);\n"
-            "uint64_t  subgroupBroadcast(uint64_t, uint);\n"
-            "u64vec2   subgroupBroadcast(u64vec2, uint);\n"
-            "u64vec3   subgroupBroadcast(u64vec3, uint);\n"
-            "u64vec4   subgroupBroadcast(u64vec4, uint);\n"
-
-            "int64_t   subgroupBroadcastFirst(int64_t);\n"
-            "i64vec2   subgroupBroadcastFirst(i64vec2);\n"
-            "i64vec3   subgroupBroadcastFirst(i64vec3);\n"
-            "i64vec4   subgroupBroadcastFirst(i64vec4);\n"
-            "uint64_t  subgroupBroadcastFirst(uint64_t);\n"
-            "u64vec2   subgroupBroadcastFirst(u64vec2);\n"
-            "u64vec3   subgroupBroadcastFirst(u64vec3);\n"
-            "u64vec4   subgroupBroadcastFirst(u64vec4);\n"
-
-            "int64_t   subgroupShuffle(int64_t, uint);\n"
-            "i64vec2   subgroupShuffle(i64vec2, uint);\n"
-            "i64vec3   subgroupShuffle(i64vec3, uint);\n"
-            "i64vec4   subgroupShuffle(i64vec4, uint);\n"
-            "uint64_t  subgroupShuffle(uint64_t, uint);\n"
-            "u64vec2   subgroupShuffle(u64vec2, uint);\n"
-            "u64vec3   subgroupShuffle(u64vec3, uint);\n"
-            "u64vec4   subgroupShuffle(u64vec4, uint);\n"
-
-            "int64_t   subgroupShuffleXor(int64_t, uint);\n"
-            "i64vec2   subgroupShuffleXor(i64vec2, uint);\n"
-            "i64vec3   subgroupShuffleXor(i64vec3, uint);\n"
-            "i64vec4   subgroupShuffleXor(i64vec4, uint);\n"
-            "uint64_t  subgroupShuffleXor(uint64_t, uint);\n"
-            "u64vec2   subgroupShuffleXor(u64vec2, uint);\n"
-            "u64vec3   subgroupShuffleXor(u64vec3, uint);\n"
-            "u64vec4   subgroupShuffleXor(u64vec4, uint);\n"
-
-            "int64_t   subgroupShuffleUp(int64_t, uint delta);\n"
-            "i64vec2   subgroupShuffleUp(i64vec2, uint delta);\n"
-            "i64vec3   subgroupShuffleUp(i64vec3, uint delta);\n"
-            "i64vec4   subgroupShuffleUp(i64vec4, uint delta);\n"
-            "uint64_t  subgroupShuffleUp(uint64_t, uint delta);\n"
-            "u64vec2   subgroupShuffleUp(u64vec2, uint delta);\n"
-            "u64vec3   subgroupShuffleUp(u64vec3, uint delta);\n"
-            "u64vec4   subgroupShuffleUp(u64vec4, uint delta);\n"
-
-            "int64_t   subgroupShuffleDown(int64_t, uint delta);\n"
-            "i64vec2   subgroupShuffleDown(i64vec2, uint delta);\n"
-            "i64vec3   subgroupShuffleDown(i64vec3, uint delta);\n"
-            "i64vec4   subgroupShuffleDown(i64vec4, uint delta);\n"
-            "uint64_t  subgroupShuffleDown(uint64_t, uint delta);\n"
-            "u64vec2   subgroupShuffleDown(u64vec2, uint delta);\n"
-            "u64vec3   subgroupShuffleDown(u64vec3, uint delta);\n"
-            "u64vec4   subgroupShuffleDown(u64vec4, uint delta);\n"
-
-            "int64_t   subgroupAdd(int64_t);\n"
-            "i64vec2   subgroupAdd(i64vec2);\n"
-            "i64vec3   subgroupAdd(i64vec3);\n"
-            "i64vec4   subgroupAdd(i64vec4);\n"
-            "uint64_t  subgroupAdd(uint64_t);\n"
-            "u64vec2   subgroupAdd(u64vec2);\n"
-            "u64vec3   subgroupAdd(u64vec3);\n"
-            "u64vec4   subgroupAdd(u64vec4);\n"
-
-            "int64_t   subgroupMul(int64_t);\n"
-            "i64vec2   subgroupMul(i64vec2);\n"
-            "i64vec3   subgroupMul(i64vec3);\n"
-            "i64vec4   subgroupMul(i64vec4);\n"
-            "uint64_t  subgroupMul(uint64_t);\n"
-            "u64vec2   subgroupMul(u64vec2);\n"
-            "u64vec3   subgroupMul(u64vec3);\n"
-            "u64vec4   subgroupMul(u64vec4);\n"
-
-            "int64_t   subgroupMin(int64_t);\n"
-            "i64vec2   subgroupMin(i64vec2);\n"
-            "i64vec3   subgroupMin(i64vec3);\n"
-            "i64vec4   subgroupMin(i64vec4);\n"
-            "uint64_t  subgroupMin(uint64_t);\n"
-            "u64vec2   subgroupMin(u64vec2);\n"
-            "u64vec3   subgroupMin(u64vec3);\n"
-            "u64vec4   subgroupMin(u64vec4);\n"
-
-            "int64_t   subgroupMax(int64_t);\n"
-            "i64vec2   subgroupMax(i64vec2);\n"
-            "i64vec3   subgroupMax(i64vec3);\n"
-            "i64vec4   subgroupMax(i64vec4);\n"
-            "uint64_t  subgroupMax(uint64_t);\n"
-            "u64vec2   subgroupMax(u64vec2);\n"
-            "u64vec3   subgroupMax(u64vec3);\n"
-            "u64vec4   subgroupMax(u64vec4);\n"
-
-            "int64_t   subgroupAnd(int64_t);\n"
-            "i64vec2   subgroupAnd(i64vec2);\n"
-            "i64vec3   subgroupAnd(i64vec3);\n"
-            "i64vec4   subgroupAnd(i64vec4);\n"
-            "uint64_t  subgroupAnd(uint64_t);\n"
-            "u64vec2   subgroupAnd(u64vec2);\n"
-            "u64vec3   subgroupAnd(u64vec3);\n"
-            "u64vec4   subgroupAnd(u64vec4);\n"
-
-            "int64_t   subgroupOr(int64_t);\n"
-            "i64vec2   subgroupOr(i64vec2);\n"
-            "i64vec3   subgroupOr(i64vec3);\n"
-            "i64vec4   subgroupOr(i64vec4);\n"
-            "uint64_t  subgroupOr(uint64_t);\n"
-            "u64vec2   subgroupOr(u64vec2);\n"
-            "u64vec3   subgroupOr(u64vec3);\n"
-            "u64vec4   subgroupOr(u64vec4);\n"
-
-            "int64_t   subgroupXor(int64_t);\n"
-            "i64vec2   subgroupXor(i64vec2);\n"
-            "i64vec3   subgroupXor(i64vec3);\n"
-            "i64vec4   subgroupXor(i64vec4);\n"
-            "uint64_t  subgroupXor(uint64_t);\n"
-            "u64vec2   subgroupXor(u64vec2);\n"
-            "u64vec3   subgroupXor(u64vec3);\n"
-            "u64vec4   subgroupXor(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveAdd(int64_t);\n"
-            "i64vec2   subgroupInclusiveAdd(i64vec2);\n"
-            "i64vec3   subgroupInclusiveAdd(i64vec3);\n"
-            "i64vec4   subgroupInclusiveAdd(i64vec4);\n"
-            "uint64_t  subgroupInclusiveAdd(uint64_t);\n"
-            "u64vec2   subgroupInclusiveAdd(u64vec2);\n"
-            "u64vec3   subgroupInclusiveAdd(u64vec3);\n"
-            "u64vec4   subgroupInclusiveAdd(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveMul(int64_t);\n"
-            "i64vec2   subgroupInclusiveMul(i64vec2);\n"
-            "i64vec3   subgroupInclusiveMul(i64vec3);\n"
-            "i64vec4   subgroupInclusiveMul(i64vec4);\n"
-            "uint64_t  subgroupInclusiveMul(uint64_t);\n"
-            "u64vec2   subgroupInclusiveMul(u64vec2);\n"
-            "u64vec3   subgroupInclusiveMul(u64vec3);\n"
-            "u64vec4   subgroupInclusiveMul(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveMin(int64_t);\n"
-            "i64vec2   subgroupInclusiveMin(i64vec2);\n"
-            "i64vec3   subgroupInclusiveMin(i64vec3);\n"
-            "i64vec4   subgroupInclusiveMin(i64vec4);\n"
-            "uint64_t  subgroupInclusiveMin(uint64_t);\n"
-            "u64vec2   subgroupInclusiveMin(u64vec2);\n"
-            "u64vec3   subgroupInclusiveMin(u64vec3);\n"
-            "u64vec4   subgroupInclusiveMin(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveMax(int64_t);\n"
-            "i64vec2   subgroupInclusiveMax(i64vec2);\n"
-            "i64vec3   subgroupInclusiveMax(i64vec3);\n"
-            "i64vec4   subgroupInclusiveMax(i64vec4);\n"
-            "uint64_t  subgroupInclusiveMax(uint64_t);\n"
-            "u64vec2   subgroupInclusiveMax(u64vec2);\n"
-            "u64vec3   subgroupInclusiveMax(u64vec3);\n"
-            "u64vec4   subgroupInclusiveMax(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveAnd(int64_t);\n"
-            "i64vec2   subgroupInclusiveAnd(i64vec2);\n"
-            "i64vec3   subgroupInclusiveAnd(i64vec3);\n"
-            "i64vec4   subgroupInclusiveAnd(i64vec4);\n"
-            "uint64_t  subgroupInclusiveAnd(uint64_t);\n"
-            "u64vec2   subgroupInclusiveAnd(u64vec2);\n"
-            "u64vec3   subgroupInclusiveAnd(u64vec3);\n"
-            "u64vec4   subgroupInclusiveAnd(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveOr(int64_t);\n"
-            "i64vec2   subgroupInclusiveOr(i64vec2);\n"
-            "i64vec3   subgroupInclusiveOr(i64vec3);\n"
-            "i64vec4   subgroupInclusiveOr(i64vec4);\n"
-            "uint64_t  subgroupInclusiveOr(uint64_t);\n"
-            "u64vec2   subgroupInclusiveOr(u64vec2);\n"
-            "u64vec3   subgroupInclusiveOr(u64vec3);\n"
-            "u64vec4   subgroupInclusiveOr(u64vec4);\n"
-
-            "int64_t   subgroupInclusiveXor(int64_t);\n"
-            "i64vec2   subgroupInclusiveXor(i64vec2);\n"
-            "i64vec3   subgroupInclusiveXor(i64vec3);\n"
-            "i64vec4   subgroupInclusiveXor(i64vec4);\n"
-            "uint64_t  subgroupInclusiveXor(uint64_t);\n"
-            "u64vec2   subgroupInclusiveXor(u64vec2);\n"
-            "u64vec3   subgroupInclusiveXor(u64vec3);\n"
-            "u64vec4   subgroupInclusiveXor(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveAdd(int64_t);\n"
-            "i64vec2   subgroupExclusiveAdd(i64vec2);\n"
-            "i64vec3   subgroupExclusiveAdd(i64vec3);\n"
-            "i64vec4   subgroupExclusiveAdd(i64vec4);\n"
-            "uint64_t  subgroupExclusiveAdd(uint64_t);\n"
-            "u64vec2   subgroupExclusiveAdd(u64vec2);\n"
-            "u64vec3   subgroupExclusiveAdd(u64vec3);\n"
-            "u64vec4   subgroupExclusiveAdd(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveMul(int64_t);\n"
-            "i64vec2   subgroupExclusiveMul(i64vec2);\n"
-            "i64vec3   subgroupExclusiveMul(i64vec3);\n"
-            "i64vec4   subgroupExclusiveMul(i64vec4);\n"
-            "uint64_t  subgroupExclusiveMul(uint64_t);\n"
-            "u64vec2   subgroupExclusiveMul(u64vec2);\n"
-            "u64vec3   subgroupExclusiveMul(u64vec3);\n"
-            "u64vec4   subgroupExclusiveMul(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveMin(int64_t);\n"
-            "i64vec2   subgroupExclusiveMin(i64vec2);\n"
-            "i64vec3   subgroupExclusiveMin(i64vec3);\n"
-            "i64vec4   subgroupExclusiveMin(i64vec4);\n"
-            "uint64_t  subgroupExclusiveMin(uint64_t);\n"
-            "u64vec2   subgroupExclusiveMin(u64vec2);\n"
-            "u64vec3   subgroupExclusiveMin(u64vec3);\n"
-            "u64vec4   subgroupExclusiveMin(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveMax(int64_t);\n"
-            "i64vec2   subgroupExclusiveMax(i64vec2);\n"
-            "i64vec3   subgroupExclusiveMax(i64vec3);\n"
-            "i64vec4   subgroupExclusiveMax(i64vec4);\n"
-            "uint64_t  subgroupExclusiveMax(uint64_t);\n"
-            "u64vec2   subgroupExclusiveMax(u64vec2);\n"
-            "u64vec3   subgroupExclusiveMax(u64vec3);\n"
-            "u64vec4   subgroupExclusiveMax(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveAnd(int64_t);\n"
-            "i64vec2   subgroupExclusiveAnd(i64vec2);\n"
-            "i64vec3   subgroupExclusiveAnd(i64vec3);\n"
-            "i64vec4   subgroupExclusiveAnd(i64vec4);\n"
-            "uint64_t  subgroupExclusiveAnd(uint64_t);\n"
-            "u64vec2   subgroupExclusiveAnd(u64vec2);\n"
-            "u64vec3   subgroupExclusiveAnd(u64vec3);\n"
-            "u64vec4   subgroupExclusiveAnd(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveOr(int64_t);\n"
-            "i64vec2   subgroupExclusiveOr(i64vec2);\n"
-            "i64vec3   subgroupExclusiveOr(i64vec3);\n"
-            "i64vec4   subgroupExclusiveOr(i64vec4);\n"
-            "uint64_t  subgroupExclusiveOr(uint64_t);\n"
-            "u64vec2   subgroupExclusiveOr(u64vec2);\n"
-            "u64vec3   subgroupExclusiveOr(u64vec3);\n"
-            "u64vec4   subgroupExclusiveOr(u64vec4);\n"
-
-            "int64_t   subgroupExclusiveXor(int64_t);\n"
-            "i64vec2   subgroupExclusiveXor(i64vec2);\n"
-            "i64vec3   subgroupExclusiveXor(i64vec3);\n"
-            "i64vec4   subgroupExclusiveXor(i64vec4);\n"
-            "uint64_t  subgroupExclusiveXor(uint64_t);\n"
-            "u64vec2   subgroupExclusiveXor(u64vec2);\n"
-            "u64vec3   subgroupExclusiveXor(u64vec3);\n"
-            "u64vec4   subgroupExclusiveXor(u64vec4);\n"
-
-            "int64_t   subgroupClusteredAdd(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredAdd(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredAdd(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredAdd(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredAdd(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredAdd(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredAdd(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredAdd(u64vec4, uint);\n"
-
-            "int64_t   subgroupClusteredMul(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredMul(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredMul(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredMul(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredMul(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredMul(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredMul(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredMul(u64vec4, uint);\n"
-
-            "int64_t   subgroupClusteredMin(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredMin(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredMin(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredMin(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredMin(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredMin(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredMin(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredMin(u64vec4, uint);\n"
-
-            "int64_t   subgroupClusteredMax(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredMax(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredMax(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredMax(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredMax(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredMax(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredMax(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredMax(u64vec4, uint);\n"
-
-            "int64_t   subgroupClusteredAnd(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredAnd(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredAnd(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredAnd(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredAnd(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredAnd(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredAnd(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredAnd(u64vec4, uint);\n"
-
-            "int64_t   subgroupClusteredOr(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredOr(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredOr(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredOr(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredOr(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredOr(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredOr(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredOr(u64vec4, uint);\n"
-
-            "int64_t   subgroupClusteredXor(int64_t, uint);\n"
-            "i64vec2   subgroupClusteredXor(i64vec2, uint);\n"
-            "i64vec3   subgroupClusteredXor(i64vec3, uint);\n"
-            "i64vec4   subgroupClusteredXor(i64vec4, uint);\n"
-            "uint64_t  subgroupClusteredXor(uint64_t, uint);\n"
-            "u64vec2   subgroupClusteredXor(u64vec2, uint);\n"
-            "u64vec3   subgroupClusteredXor(u64vec3, uint);\n"
-            "u64vec4   subgroupClusteredXor(u64vec4, uint);\n"
-
-            "int64_t   subgroupQuadBroadcast(int64_t, uint);\n"
-            "i64vec2   subgroupQuadBroadcast(i64vec2, uint);\n"
-            "i64vec3   subgroupQuadBroadcast(i64vec3, uint);\n"
-            "i64vec4   subgroupQuadBroadcast(i64vec4, uint);\n"
-            "uint64_t  subgroupQuadBroadcast(uint64_t, uint);\n"
-            "u64vec2   subgroupQuadBroadcast(u64vec2, uint);\n"
-            "u64vec3   subgroupQuadBroadcast(u64vec3, uint);\n"
-            "u64vec4   subgroupQuadBroadcast(u64vec4, uint);\n"
-
-            "int64_t   subgroupQuadSwapHorizontal(int64_t);\n"
-            "i64vec2   subgroupQuadSwapHorizontal(i64vec2);\n"
-            "i64vec3   subgroupQuadSwapHorizontal(i64vec3);\n"
-            "i64vec4   subgroupQuadSwapHorizontal(i64vec4);\n"
-            "uint64_t  subgroupQuadSwapHorizontal(uint64_t);\n"
-            "u64vec2   subgroupQuadSwapHorizontal(u64vec2);\n"
-            "u64vec3   subgroupQuadSwapHorizontal(u64vec3);\n"
-            "u64vec4   subgroupQuadSwapHorizontal(u64vec4);\n"
-
-            "int64_t   subgroupQuadSwapVertical(int64_t);\n"
-            "i64vec2   subgroupQuadSwapVertical(i64vec2);\n"
-            "i64vec3   subgroupQuadSwapVertical(i64vec3);\n"
-            "i64vec4   subgroupQuadSwapVertical(i64vec4);\n"
-            "uint64_t  subgroupQuadSwapVertical(uint64_t);\n"
-            "u64vec2   subgroupQuadSwapVertical(u64vec2);\n"
-            "u64vec3   subgroupQuadSwapVertical(u64vec3);\n"
-            "u64vec4   subgroupQuadSwapVertical(u64vec4);\n"
-
-            "int64_t   subgroupQuadSwapDiagonal(int64_t);\n"
-            "i64vec2   subgroupQuadSwapDiagonal(i64vec2);\n"
-            "i64vec3   subgroupQuadSwapDiagonal(i64vec3);\n"
-            "i64vec4   subgroupQuadSwapDiagonal(i64vec4);\n"
-            "uint64_t  subgroupQuadSwapDiagonal(uint64_t);\n"
-            "u64vec2   subgroupQuadSwapDiagonal(u64vec2);\n"
-            "u64vec3   subgroupQuadSwapDiagonal(u64vec3);\n"
-            "u64vec4   subgroupQuadSwapDiagonal(u64vec4);\n"
-
-            "bool      subgroupAllEqual(float16_t);\n"
-            "bool      subgroupAllEqual(f16vec2);\n"
-            "bool      subgroupAllEqual(f16vec3);\n"
-            "bool      subgroupAllEqual(f16vec4);\n"
-
-            "float16_t subgroupBroadcast(float16_t, uint);\n"
-            "f16vec2   subgroupBroadcast(f16vec2, uint);\n"
-            "f16vec3   subgroupBroadcast(f16vec3, uint);\n"
-            "f16vec4   subgroupBroadcast(f16vec4, uint);\n"
-
-            "float16_t subgroupBroadcastFirst(float16_t);\n"
-            "f16vec2   subgroupBroadcastFirst(f16vec2);\n"
-            "f16vec3   subgroupBroadcastFirst(f16vec3);\n"
-            "f16vec4   subgroupBroadcastFirst(f16vec4);\n"
-
-            "float16_t subgroupShuffle(float16_t, uint);\n"
-            "f16vec2   subgroupShuffle(f16vec2, uint);\n"
-            "f16vec3   subgroupShuffle(f16vec3, uint);\n"
-            "f16vec4   subgroupShuffle(f16vec4, uint);\n"
-
-            "float16_t subgroupShuffleXor(float16_t, uint);\n"
-            "f16vec2   subgroupShuffleXor(f16vec2, uint);\n"
-            "f16vec3   subgroupShuffleXor(f16vec3, uint);\n"
-            "f16vec4   subgroupShuffleXor(f16vec4, uint);\n"
-
-            "float16_t subgroupShuffleUp(float16_t, uint delta);\n"
-            "f16vec2   subgroupShuffleUp(f16vec2, uint delta);\n"
-            "f16vec3   subgroupShuffleUp(f16vec3, uint delta);\n"
-            "f16vec4   subgroupShuffleUp(f16vec4, uint delta);\n"
-
-            "float16_t subgroupShuffleDown(float16_t, uint delta);\n"
-            "f16vec2   subgroupShuffleDown(f16vec2, uint delta);\n"
-            "f16vec3   subgroupShuffleDown(f16vec3, uint delta);\n"
-            "f16vec4   subgroupShuffleDown(f16vec4, uint delta);\n"
-
-            "float16_t subgroupAdd(float16_t);\n"
-            "f16vec2   subgroupAdd(f16vec2);\n"
-            "f16vec3   subgroupAdd(f16vec3);\n"
-            "f16vec4   subgroupAdd(f16vec4);\n"
-
-            "float16_t subgroupMul(float16_t);\n"
-            "f16vec2   subgroupMul(f16vec2);\n"
-            "f16vec3   subgroupMul(f16vec3);\n"
-            "f16vec4   subgroupMul(f16vec4);\n"
-
-            "float16_t subgroupMin(float16_t);\n"
-            "f16vec2   subgroupMin(f16vec2);\n"
-            "f16vec3   subgroupMin(f16vec3);\n"
-            "f16vec4   subgroupMin(f16vec4);\n"
-
-            "float16_t subgroupMax(float16_t);\n"
-            "f16vec2   subgroupMax(f16vec2);\n"
-            "f16vec3   subgroupMax(f16vec3);\n"
-            "f16vec4   subgroupMax(f16vec4);\n"
-
-            "float16_t subgroupInclusiveAdd(float16_t);\n"
-            "f16vec2   subgroupInclusiveAdd(f16vec2);\n"
-            "f16vec3   subgroupInclusiveAdd(f16vec3);\n"
-            "f16vec4   subgroupInclusiveAdd(f16vec4);\n"
-
-            "float16_t subgroupInclusiveMul(float16_t);\n"
-            "f16vec2   subgroupInclusiveMul(f16vec2);\n"
-            "f16vec3   subgroupInclusiveMul(f16vec3);\n"
-            "f16vec4   subgroupInclusiveMul(f16vec4);\n"
-
-            "float16_t subgroupInclusiveMin(float16_t);\n"
-            "f16vec2   subgroupInclusiveMin(f16vec2);\n"
-            "f16vec3   subgroupInclusiveMin(f16vec3);\n"
-            "f16vec4   subgroupInclusiveMin(f16vec4);\n"
-
-            "float16_t subgroupInclusiveMax(float16_t);\n"
-            "f16vec2   subgroupInclusiveMax(f16vec2);\n"
-            "f16vec3   subgroupInclusiveMax(f16vec3);\n"
-            "f16vec4   subgroupInclusiveMax(f16vec4);\n"
-
-            "float16_t subgroupExclusiveAdd(float16_t);\n"
-            "f16vec2   subgroupExclusiveAdd(f16vec2);\n"
-            "f16vec3   subgroupExclusiveAdd(f16vec3);\n"
-            "f16vec4   subgroupExclusiveAdd(f16vec4);\n"
-
-            "float16_t subgroupExclusiveMul(float16_t);\n"
-            "f16vec2   subgroupExclusiveMul(f16vec2);\n"
-            "f16vec3   subgroupExclusiveMul(f16vec3);\n"
-            "f16vec4   subgroupExclusiveMul(f16vec4);\n"
-
-            "float16_t subgroupExclusiveMin(float16_t);\n"
-            "f16vec2   subgroupExclusiveMin(f16vec2);\n"
-            "f16vec3   subgroupExclusiveMin(f16vec3);\n"
-            "f16vec4   subgroupExclusiveMin(f16vec4);\n"
-
-            "float16_t subgroupExclusiveMax(float16_t);\n"
-            "f16vec2   subgroupExclusiveMax(f16vec2);\n"
-            "f16vec3   subgroupExclusiveMax(f16vec3);\n"
-            "f16vec4   subgroupExclusiveMax(f16vec4);\n"
-
-            "float16_t subgroupClusteredAdd(float16_t, uint);\n"
-            "f16vec2   subgroupClusteredAdd(f16vec2, uint);\n"
-            "f16vec3   subgroupClusteredAdd(f16vec3, uint);\n"
-            "f16vec4   subgroupClusteredAdd(f16vec4, uint);\n"
-
-            "float16_t subgroupClusteredMul(float16_t, uint);\n"
-            "f16vec2   subgroupClusteredMul(f16vec2, uint);\n"
-            "f16vec3   subgroupClusteredMul(f16vec3, uint);\n"
-            "f16vec4   subgroupClusteredMul(f16vec4, uint);\n"
-
-            "float16_t subgroupClusteredMin(float16_t, uint);\n"
-            "f16vec2   subgroupClusteredMin(f16vec2, uint);\n"
-            "f16vec3   subgroupClusteredMin(f16vec3, uint);\n"
-            "f16vec4   subgroupClusteredMin(f16vec4, uint);\n"
-
-            "float16_t subgroupClusteredMax(float16_t, uint);\n"
-            "f16vec2   subgroupClusteredMax(f16vec2, uint);\n"
-            "f16vec3   subgroupClusteredMax(f16vec3, uint);\n"
-            "f16vec4   subgroupClusteredMax(f16vec4, uint);\n"
-
-            "float16_t subgroupQuadBroadcast(float16_t, uint);\n"
-            "f16vec2   subgroupQuadBroadcast(f16vec2, uint);\n"
-            "f16vec3   subgroupQuadBroadcast(f16vec3, uint);\n"
-            "f16vec4   subgroupQuadBroadcast(f16vec4, uint);\n"
-
-            "float16_t subgroupQuadSwapHorizontal(float16_t);\n"
-            "f16vec2   subgroupQuadSwapHorizontal(f16vec2);\n"
-            "f16vec3   subgroupQuadSwapHorizontal(f16vec3);\n"
-            "f16vec4   subgroupQuadSwapHorizontal(f16vec4);\n"
-
-            "float16_t subgroupQuadSwapVertical(float16_t);\n"
-            "f16vec2   subgroupQuadSwapVertical(f16vec2);\n"
-            "f16vec3   subgroupQuadSwapVertical(f16vec3);\n"
-            "f16vec4   subgroupQuadSwapVertical(f16vec4);\n"
-
-            "float16_t subgroupQuadSwapDiagonal(float16_t);\n"
-            "f16vec2   subgroupQuadSwapDiagonal(f16vec2);\n"
-            "f16vec3   subgroupQuadSwapDiagonal(f16vec3);\n"
-            "f16vec4   subgroupQuadSwapDiagonal(f16vec4);\n"
-
+        // Generate all flavors of subgroup ops.
+        static const char *subgroupOps[] = 
+        {
+            "bool   subgroupAllEqual(%s);\n",
+            "%s     subgroupBroadcast(%s, uint);\n",
+            "%s     subgroupBroadcastFirst(%s);\n",
+            "%s     subgroupShuffle(%s, uint);\n",
+            "%s     subgroupShuffleXor(%s, uint);\n",
+            "%s     subgroupShuffleUp(%s, uint delta);\n",
+            "%s     subgroupShuffleDown(%s, uint delta);\n",
+            "%s     subgroupAdd(%s);\n",
+            "%s     subgroupMul(%s);\n",
+            "%s     subgroupMin(%s);\n",
+            "%s     subgroupMax(%s);\n",
+            "%s     subgroupAnd(%s);\n",
+            "%s     subgroupOr(%s);\n",
+            "%s     subgroupXor(%s);\n",
+            "%s     subgroupInclusiveAdd(%s);\n",
+            "%s     subgroupInclusiveMul(%s);\n",
+            "%s     subgroupInclusiveMin(%s);\n",
+            "%s     subgroupInclusiveMax(%s);\n",
+            "%s     subgroupInclusiveAnd(%s);\n",
+            "%s     subgroupInclusiveOr(%s);\n",
+            "%s     subgroupInclusiveXor(%s);\n",
+            "%s     subgroupExclusiveAdd(%s);\n",
+            "%s     subgroupExclusiveMul(%s);\n",
+            "%s     subgroupExclusiveMin(%s);\n",
+            "%s     subgroupExclusiveMax(%s);\n",
+            "%s     subgroupExclusiveAnd(%s);\n",
+            "%s     subgroupExclusiveOr(%s);\n",
+            "%s     subgroupExclusiveXor(%s);\n",
+            "%s     subgroupClusteredAdd(%s, uint);\n",
+            "%s     subgroupClusteredMul(%s, uint);\n",
+            "%s     subgroupClusteredMin(%s, uint);\n",
+            "%s     subgroupClusteredMax(%s, uint);\n",
+            "%s     subgroupClusteredAnd(%s, uint);\n",
+            "%s     subgroupClusteredOr(%s, uint);\n",
+            "%s     subgroupClusteredXor(%s, uint);\n",
+            "%s     subgroupQuadBroadcast(%s, uint);\n",
+            "%s     subgroupQuadSwapHorizontal(%s);\n",
+            "%s     subgroupQuadSwapVertical(%s);\n",
+            "%s     subgroupQuadSwapDiagonal(%s);\n",
 #ifdef NV_EXTENSIONS
-            "uvec4  subgroupPartitionNV(float);\n"
-            "uvec4  subgroupPartitionNV(vec2);\n"
-            "uvec4  subgroupPartitionNV(vec3);\n"
-            "uvec4  subgroupPartitionNV(vec4);\n"
-            "uvec4  subgroupPartitionNV(int);\n"
-            "uvec4  subgroupPartitionNV(ivec2);\n"
-            "uvec4  subgroupPartitionNV(ivec3);\n"
-            "uvec4  subgroupPartitionNV(ivec4);\n"
-            "uvec4  subgroupPartitionNV(uint);\n"
-            "uvec4  subgroupPartitionNV(uvec2);\n"
-            "uvec4  subgroupPartitionNV(uvec3);\n"
-            "uvec4  subgroupPartitionNV(uvec4);\n"
-            "uvec4  subgroupPartitionNV(bool);\n"
-            "uvec4  subgroupPartitionNV(bvec2);\n"
-            "uvec4  subgroupPartitionNV(bvec3);\n"
-            "uvec4  subgroupPartitionNV(bvec4);\n"
-
-            "float  subgroupPartitionedAddNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedAddNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedAddNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedAddNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedAddNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedAddNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedAddNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedAddNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedAddNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedAddNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedAddNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedAddNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedMulNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedMulNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedMulNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedMulNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedMulNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedMulNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedMulNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedMulNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedMulNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedMulNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedMulNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedMulNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedMinNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedMinNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedMinNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedMinNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedMinNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedMinNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedMinNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedMinNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedMinNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedMinNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedMinNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedMinNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedMaxNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedMaxNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedMaxNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedMaxNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedMaxNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedMaxNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedMaxNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedMaxNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedMaxNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedMaxNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedMaxNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedMaxNV(uvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedAndNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedAndNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedAndNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedAndNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedAndNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedAndNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedAndNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedAndNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedAndNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedAndNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedAndNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedAndNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedOrNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedOrNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedOrNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedOrNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedOrNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedOrNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedOrNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedOrNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedOrNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedOrNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedOrNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedOrNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedXorNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedXorNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedXorNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedXorNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedXorNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedXorNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedXorNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedXorNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedXorNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedXorNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedXorNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedXorNV(bvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveAddNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveAddNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveAddNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveAddNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveAddNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveAddNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveAddNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveAddNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveAddNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveAddNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveAddNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveAddNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveMulNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveMulNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveMulNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveMulNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveMulNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveMulNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveMulNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveMulNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveMulNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveMulNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveMulNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveMulNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveMinNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveMinNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveMinNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveMinNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveMinNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveMinNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveMinNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveMinNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveMinNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveMinNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveMinNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveMinNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveMaxNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveMaxNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveMaxNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveMaxNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveMaxNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveMaxNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveMaxNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveMaxNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveMaxNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveMaxNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveMaxNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveMaxNV(uvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedInclusiveAndNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveAndNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveAndNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveAndNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveAndNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveAndNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveAndNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveAndNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedInclusiveAndNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedInclusiveAndNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedInclusiveAndNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedInclusiveAndNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedInclusiveOrNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveOrNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveOrNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveOrNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveOrNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveOrNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveOrNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveOrNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedInclusiveOrNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedInclusiveOrNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedInclusiveOrNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedInclusiveOrNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedInclusiveXorNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveXorNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveXorNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveXorNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveXorNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveXorNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveXorNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveXorNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedInclusiveXorNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedInclusiveXorNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedInclusiveXorNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedInclusiveXorNV(bvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveAddNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveAddNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveAddNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveAddNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveAddNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveAddNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveAddNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveAddNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveAddNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveAddNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveAddNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveAddNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveMulNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveMulNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveMulNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveMulNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveMulNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveMulNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveMulNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveMulNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveMulNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveMulNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveMulNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveMulNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveMinNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveMinNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveMinNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveMinNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveMinNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveMinNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveMinNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveMinNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveMinNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveMinNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveMinNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveMinNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveMaxNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveMaxNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveMaxNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveMaxNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveMaxNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveMaxNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveMaxNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveMaxNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveMaxNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveMaxNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveMaxNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveMaxNV(uvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedExclusiveAndNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveAndNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveAndNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveAndNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveAndNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveAndNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveAndNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveAndNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedExclusiveAndNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedExclusiveAndNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedExclusiveAndNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedExclusiveAndNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedExclusiveOrNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveOrNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveOrNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveOrNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveOrNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveOrNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveOrNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveOrNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedExclusiveOrNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedExclusiveOrNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedExclusiveOrNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedExclusiveOrNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedExclusiveXorNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveXorNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveXorNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveXorNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveXorNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveXorNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveXorNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveXorNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedExclusiveXorNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedExclusiveXorNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedExclusiveXorNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedExclusiveXorNV(bvec4, uvec4 ballot);\n"
+            "uvec4  subgroupPartitionNV(%s);\n",
+            "%s     subgroupPartitionedAddNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedMulNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedMinNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedMaxNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedAndNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedOrNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedXorNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveAddNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveMulNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveMinNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveMaxNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveAndNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveOrNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveXorNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveAddNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveMulNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveMinNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveMaxNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveAndNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveOrNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveXorNV(%s, uvec4 ballot);\n",
 #endif
+        };
 
-            "\n");
+        static const char *floatTypes[] = { 
+            "float", "vec2", "vec3", "vec4", 
+            "float16_t", "f16vec2", "f16vec3", "f16vec4", 
+        };
+        static const char *doubleTypes[] = { 
+            "double", "dvec2", "dvec3", "dvec4", 
+        };
+        static const char *intTypes[] = { 
+            "int8_t", "i8vec2", "i8vec3", "i8vec4", 
+            "int16_t", "i16vec2", "i16vec3", "i16vec4", 
+            "int", "ivec2", "ivec3", "ivec4", 
+            "int64_t", "i64vec2", "i64vec3", "i64vec4", 
+            "uint8_t", "u8vec2", "u8vec3", "u8vec4", 
+            "uint16_t", "u16vec2", "u16vec3", "u16vec4", 
+            "uint", "uvec2", "uvec3", "uvec4", 
+            "uint64_t", "u64vec2", "u64vec3", "u64vec4", 
+        };
+        static const char *boolTypes[] = { 
+            "bool", "bvec2", "bvec3", "bvec4", 
+        };
 
-        if (profile != EEsProfile && version >= 400) {
-            commonBuiltins.append(
-                "bool   subgroupAllEqual(double);\n"
-                "bool   subgroupAllEqual(dvec2);\n"
-                "bool   subgroupAllEqual(dvec3);\n"
-                "bool   subgroupAllEqual(dvec4);\n"
+        for (unsigned int i = 0; i < sizeof(subgroupOps)/sizeof(subgroupOps[0]); ++i) {
+            const char *op = subgroupOps[i];
 
-                "double subgroupBroadcast(double, uint);\n"
-                "dvec2  subgroupBroadcast(dvec2, uint);\n"
-                "dvec3  subgroupBroadcast(dvec3, uint);\n"
-                "dvec4  subgroupBroadcast(dvec4, uint);\n"
+            // Logical operations don't support float
+            bool logicalOp = strstr(op, "Or") || strstr(op, "And") ||
+                             (strstr(op, "Xor") && !strstr(op, "ShuffleXor"));
+            // Math operations don't support bool
+            bool mathOp = strstr(op, "Add") || strstr(op, "Mul") || strstr(op, "Min") || strstr(op, "Max");
 
-                "double subgroupBroadcastFirst(double);\n"
-                "dvec2  subgroupBroadcastFirst(dvec2);\n"
-                "dvec3  subgroupBroadcastFirst(dvec3);\n"
-                "dvec4  subgroupBroadcastFirst(dvec4);\n"
+            const int bufSize = 256;
+            char buf[bufSize];
 
-                "double subgroupShuffle(double, uint);\n"
-                "dvec2  subgroupShuffle(dvec2, uint);\n"
-                "dvec3  subgroupShuffle(dvec3, uint);\n"
-                "dvec4  subgroupShuffle(dvec4, uint);\n"
-
-                "double subgroupShuffleXor(double, uint);\n"
-                "dvec2  subgroupShuffleXor(dvec2, uint);\n"
-                "dvec3  subgroupShuffleXor(dvec3, uint);\n"
-                "dvec4  subgroupShuffleXor(dvec4, uint);\n"
-
-                "double subgroupShuffleUp(double, uint delta);\n"
-                "dvec2  subgroupShuffleUp(dvec2, uint delta);\n"
-                "dvec3  subgroupShuffleUp(dvec3, uint delta);\n"
-                "dvec4  subgroupShuffleUp(dvec4, uint delta);\n"
-
-                "double subgroupShuffleDown(double, uint delta);\n"
-                "dvec2  subgroupShuffleDown(dvec2, uint delta);\n"
-                "dvec3  subgroupShuffleDown(dvec3, uint delta);\n"
-                "dvec4  subgroupShuffleDown(dvec4, uint delta);\n"
-
-                "double subgroupAdd(double);\n"
-                "dvec2  subgroupAdd(dvec2);\n"
-                "dvec3  subgroupAdd(dvec3);\n"
-                "dvec4  subgroupAdd(dvec4);\n"
-
-                "double subgroupMul(double);\n"
-                "dvec2  subgroupMul(dvec2);\n"
-                "dvec3  subgroupMul(dvec3);\n"
-                "dvec4  subgroupMul(dvec4);\n"
-
-                "double subgroupMin(double);\n"
-                "dvec2  subgroupMin(dvec2);\n"
-                "dvec3  subgroupMin(dvec3);\n"
-                "dvec4  subgroupMin(dvec4);\n"
-
-                "double subgroupMax(double);\n"
-                "dvec2  subgroupMax(dvec2);\n"
-                "dvec3  subgroupMax(dvec3);\n"
-                "dvec4  subgroupMax(dvec4);\n"
-
-                "double subgroupInclusiveAdd(double);\n"
-                "dvec2  subgroupInclusiveAdd(dvec2);\n"
-                "dvec3  subgroupInclusiveAdd(dvec3);\n"
-                "dvec4  subgroupInclusiveAdd(dvec4);\n"
-
-                "double subgroupInclusiveMul(double);\n"
-                "dvec2  subgroupInclusiveMul(dvec2);\n"
-                "dvec3  subgroupInclusiveMul(dvec3);\n"
-                "dvec4  subgroupInclusiveMul(dvec4);\n"
-
-                "double subgroupInclusiveMin(double);\n"
-                "dvec2  subgroupInclusiveMin(dvec2);\n"
-                "dvec3  subgroupInclusiveMin(dvec3);\n"
-                "dvec4  subgroupInclusiveMin(dvec4);\n"
-
-                "double subgroupInclusiveMax(double);\n"
-                "dvec2  subgroupInclusiveMax(dvec2);\n"
-                "dvec3  subgroupInclusiveMax(dvec3);\n"
-                "dvec4  subgroupInclusiveMax(dvec4);\n"
-
-                "double subgroupExclusiveAdd(double);\n"
-                "dvec2  subgroupExclusiveAdd(dvec2);\n"
-                "dvec3  subgroupExclusiveAdd(dvec3);\n"
-                "dvec4  subgroupExclusiveAdd(dvec4);\n"
-
-                "double subgroupExclusiveMul(double);\n"
-                "dvec2  subgroupExclusiveMul(dvec2);\n"
-                "dvec3  subgroupExclusiveMul(dvec3);\n"
-                "dvec4  subgroupExclusiveMul(dvec4);\n"
-
-                "double subgroupExclusiveMin(double);\n"
-                "dvec2  subgroupExclusiveMin(dvec2);\n"
-                "dvec3  subgroupExclusiveMin(dvec3);\n"
-                "dvec4  subgroupExclusiveMin(dvec4);\n"
-
-                "double subgroupExclusiveMax(double);\n"
-                "dvec2  subgroupExclusiveMax(dvec2);\n"
-                "dvec3  subgroupExclusiveMax(dvec3);\n"
-                "dvec4  subgroupExclusiveMax(dvec4);\n"
-
-                "double subgroupClusteredAdd(double, uint);\n"
-                "dvec2  subgroupClusteredAdd(dvec2, uint);\n"
-                "dvec3  subgroupClusteredAdd(dvec3, uint);\n"
-                "dvec4  subgroupClusteredAdd(dvec4, uint);\n"
-
-                "double subgroupClusteredMul(double, uint);\n"
-                "dvec2  subgroupClusteredMul(dvec2, uint);\n"
-                "dvec3  subgroupClusteredMul(dvec3, uint);\n"
-                "dvec4  subgroupClusteredMul(dvec4, uint);\n"
-
-                "double subgroupClusteredMin(double, uint);\n"
-                "dvec2  subgroupClusteredMin(dvec2, uint);\n"
-                "dvec3  subgroupClusteredMin(dvec3, uint);\n"
-                "dvec4  subgroupClusteredMin(dvec4, uint);\n"
-
-                "double subgroupClusteredMax(double, uint);\n"
-                "dvec2  subgroupClusteredMax(dvec2, uint);\n"
-                "dvec3  subgroupClusteredMax(dvec3, uint);\n"
-                "dvec4  subgroupClusteredMax(dvec4, uint);\n"
-
-                "double subgroupQuadBroadcast(double, uint);\n"
-                "dvec2  subgroupQuadBroadcast(dvec2, uint);\n"
-                "dvec3  subgroupQuadBroadcast(dvec3, uint);\n"
-                "dvec4  subgroupQuadBroadcast(dvec4, uint);\n"
-
-                "double subgroupQuadSwapHorizontal(double);\n"
-                "dvec2  subgroupQuadSwapHorizontal(dvec2);\n"
-                "dvec3  subgroupQuadSwapHorizontal(dvec3);\n"
-                "dvec4  subgroupQuadSwapHorizontal(dvec4);\n"
-
-                "double subgroupQuadSwapVertical(double);\n"
-                "dvec2  subgroupQuadSwapVertical(dvec2);\n"
-                "dvec3  subgroupQuadSwapVertical(dvec3);\n"
-                "dvec4  subgroupQuadSwapVertical(dvec4);\n"
-
-                "double subgroupQuadSwapDiagonal(double);\n"
-                "dvec2  subgroupQuadSwapDiagonal(dvec2);\n"
-                "dvec3  subgroupQuadSwapDiagonal(dvec3);\n"
-                "dvec4  subgroupQuadSwapDiagonal(dvec4);\n"
-
-#ifdef NV_EXTENSIONS
-                "uvec4  subgroupPartitionNV(double);\n"
-                "uvec4  subgroupPartitionNV(dvec2);\n"
-                "uvec4  subgroupPartitionNV(dvec3);\n"
-                "uvec4  subgroupPartitionNV(dvec4);\n"
-
-                "double subgroupPartitionedAddNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedAddNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedAddNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedAddNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedMulNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedMulNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedMulNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedMulNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedMinNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedMinNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedMinNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedMinNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedMaxNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedMaxNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedMaxNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedMaxNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveAddNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveAddNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveAddNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveAddNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveMulNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveMulNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveMulNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveMulNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveMinNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveMinNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveMinNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveMinNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveMaxNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveMaxNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveMaxNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveMaxNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveAddNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveAddNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveAddNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveAddNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveMulNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveMulNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveMulNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveMulNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveMinNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveMinNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveMinNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveMinNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveMaxNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveMaxNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveMaxNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveMaxNV(dvec4, uvec4 ballot);\n"
-#endif
-
-                "\n");
+            if (!logicalOp) {
+                for (unsigned int j = 0; j < sizeof(floatTypes)/sizeof(floatTypes[0]); ++j) {
+                    snprintf(buf, bufSize, op, floatTypes[j], floatTypes[j]);
+                    commonBuiltins.append(buf);
+                }
+                if (profile != EEsProfile && version >= 400) {
+                    for (unsigned int j = 0; j < sizeof(doubleTypes)/sizeof(doubleTypes[0]); ++j) {
+                        snprintf(buf, bufSize, op, doubleTypes[j], doubleTypes[j]);
+                        commonBuiltins.append(buf);
+                    }
+                }
             }
+            if (!mathOp) {
+                for (unsigned int j = 0; j < sizeof(boolTypes)/sizeof(boolTypes[0]); ++j) {
+                    snprintf(buf, bufSize, op, boolTypes[j], boolTypes[j]);
+                    commonBuiltins.append(buf);
+                }
+            }
+            for (unsigned int j = 0; j < sizeof(intTypes)/sizeof(intTypes[0]); ++j) {
+                snprintf(buf, bufSize, op, intTypes[j], intTypes[j]);
+                commonBuiltins.append(buf);
+            }
+        }
 
         stageBuiltins[EShLangCompute].append(
             "void subgroupMemoryBarrierShared();"
diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp
index e6ee32b..0566c8f 100644
--- a/gtests/Spv.FromFile.cpp
+++ b/gtests/Spv.FromFile.cpp
@@ -451,6 +451,8 @@
         "spv.subgroupExtendedTypesBallotNeg.comp",
         "spv.subgroupExtendedTypesClustered.comp",
         "spv.subgroupExtendedTypesClusteredNeg.comp",
+        "spv.subgroupExtendedTypesPartitioned.comp",
+        "spv.subgroupExtendedTypesPartitionedNeg.comp",
         "spv.subgroupExtendedTypesShuffle.comp",
         "spv.subgroupExtendedTypesShuffleNeg.comp",
         "spv.subgroupExtendedTypesShuffleRelative.comp",