Merge pull request #831 from cdavis5e/force-recompile-hooks

MSL: Hoist fixup hooks in entry_point_args() out of the compile loop.
diff --git a/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp b/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp
index ddf9582..ab375a3 100644
--- a/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp
@@ -18,6 +18,6 @@
 
 kernel void main0(constant cb& _6 [[buffer(7)]], texture2d<float, access::write> _buffer [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
-    _buffer.write(_6.value, spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex)));
+    _buffer.write(float4(_6.value), spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex)));
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
new file mode 100644
index 0000000..fb97d0d
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
@@ -0,0 +1,10 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+kernel void main0(texture2d<float, access::read_write> TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    TargetTexture.write((TargetTexture.read(uint2(gl_WorkGroupID.xy)).xy + float2(1.0)).xyyy, uint2((gl_WorkGroupID.xy + uint2(1u))));
+}
+
diff --git a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp
new file mode 100644
index 0000000..905222d
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp
@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Sub
+{
+    float4 f[2];
+    float4 f2[2];
+    float3 f3[2];
+    float4 f4[2];
+};
+
+struct SSBO
+{
+    Sub sub[2];
+};
+
+kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    float _153[2];
+    _153[0] = _27.sub[gl_WorkGroupID.x].f[0].x;
+    _153[1] = _27.sub[gl_WorkGroupID.x].f[1].x;
+    float2 _154[2];
+    _154[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy;
+    _154[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy;
+    float3 _155[2];
+    _155[0] = _27.sub[gl_WorkGroupID.x].f3[0];
+    _155[1] = _27.sub[gl_WorkGroupID.x].f3[1];
+    float4 _156[2];
+    _156[0] = _27.sub[gl_WorkGroupID.x].f4[0];
+    _156[1] = _27.sub[gl_WorkGroupID.x].f4[1];
+    _153[gl_GlobalInvocationID.x] += 1.0;
+    _154[gl_GlobalInvocationID.x] += float2(2.0);
+    _155[gl_GlobalInvocationID.x] += float3(3.0);
+    _156[gl_GlobalInvocationID.x] += float4(4.0);
+    _27.sub[gl_WorkGroupID.x].f[0].x = _153[0];
+    _27.sub[gl_WorkGroupID.x].f[1].x = _153[1];
+    _27.sub[gl_WorkGroupID.x].f2[0].xy = _154[0];
+    _27.sub[gl_WorkGroupID.x].f2[1].xy = _154[1];
+    _27.sub[gl_WorkGroupID.x].f3[0] = _155[0];
+    _27.sub[gl_WorkGroupID.x].f3[1] = _155[1];
+    _27.sub[gl_WorkGroupID.x].f4[0] = _156[0];
+    _27.sub[gl_WorkGroupID.x].f4[1] = _156[1];
+    _27.sub[0].f[0].x += 5.0;
+    _27.sub[0].f2[1].xy += float2(5.0);
+}
+
diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp
index 2b37844..468eb7e 100644
--- a/reference/opt/shaders-msl/comp/struct-packing.comp
+++ b/reference/opt/shaders-msl/comp/struct-packing.comp
@@ -69,7 +69,7 @@
 
 struct S0_1
 {
-    float2 a[1];
+    float4 a[1];
     float b;
 };
 
@@ -115,21 +115,21 @@
     Content_1 content;
     Content_1 content1[2];
     Content_1 content2;
-    float array[1];
+    float4 array[1];
 };
 
 kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
 {
     Content_1 _60 = ssbo_140.content;
-    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
+    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
     ssbo_430.content.m0s[0].b = _60.m0s[0].b;
-    ssbo_430.content.m1s[0].a = _60.m1s[0].a;
+    ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
     ssbo_430.content.m1s[0].b = _60.m1s[0].b;
     ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
     ssbo_430.content.m2s[0].b = _60.m2s[0].b;
-    ssbo_430.content.m0.a[0] = _60.m0.a[0];
+    ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
     ssbo_430.content.m0.b = _60.m0.b;
-    ssbo_430.content.m1.a = _60.m1.a;
+    ssbo_430.content.m1.a = float3(_60.m1.a);
     ssbo_430.content.m1.b = _60.m1.b;
     ssbo_430.content.m2.a[0] = _60.m2.a[0];
     ssbo_430.content.m2.b = _60.m2.b;
diff --git a/reference/shaders-msl/asm/comp/buffer-write.asm.comp b/reference/shaders-msl/asm/comp/buffer-write.asm.comp
index ddf9582..ab375a3 100644
--- a/reference/shaders-msl/asm/comp/buffer-write.asm.comp
+++ b/reference/shaders-msl/asm/comp/buffer-write.asm.comp
@@ -18,6 +18,6 @@
 
 kernel void main0(constant cb& _6 [[buffer(7)]], texture2d<float, access::write> _buffer [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
-    _buffer.write(_6.value, spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex)));
+    _buffer.write(float4(_6.value), spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex)));
 }
 
diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
new file mode 100644
index 0000000..c90faf9
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
@@ -0,0 +1,21 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+void _main(thread const uint3& id, thread texture2d<float, access::read_write> TargetTexture)
+{
+    float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
+    float2 storeTemp = loaded + float2(1.0);
+    TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
+}
+
+kernel void main0(texture2d<float, access::read_write> TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    uint3 id = gl_WorkGroupID;
+    uint3 param = id;
+    _main(param, TargetTexture);
+}
+
diff --git a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
index 7d51b15..957b77f 100644
--- a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
+++ b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
@@ -32,7 +32,7 @@
 {
     int _46 = int(pos.x) % 16;
     Foo_1 foo;
-    foo.a = v_11.results[_46].a;
+    foo.a = float3(v_11.results[_46].a);
     foo.b = v_11.results[_46].b;
     return float4(dot(foo.a, v_11.bar.xyz), foo.b, 0.0, 0.0);
 }
diff --git a/reference/shaders-msl/comp/packing-test-1.comp b/reference/shaders-msl/comp/packing-test-1.comp
index 92351c6..06160eb 100644
--- a/reference/shaders-msl/comp/packing-test-1.comp
+++ b/reference/shaders-msl/comp/packing-test-1.comp
@@ -30,7 +30,7 @@
 kernel void main0(device Buffer0& _15 [[buffer(1)]], device Buffer1& _34 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     T1_1 v;
-    v.a = _15.buf0[0].a;
+    v.a = float3(_15.buf0[0].a);
     v.b = _15.buf0[0].b;
     float x = v.b;
     _34.buf1[gl_GlobalInvocationID.x] = x;
diff --git a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp
new file mode 100644
index 0000000..a5a1cf9
--- /dev/null
+++ b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp
@@ -0,0 +1,53 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Sub
+{
+    float4 f[2];
+    float4 f2[2];
+    float3 f3[2];
+    float4 f4[2];
+};
+
+struct Sub_1
+{
+    float f[2];
+    float2 f2[2];
+    float3 f3[2];
+    float4 f4[2];
+};
+
+struct SSBO
+{
+    Sub sub[2];
+};
+
+kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    Sub_1 foo;
+    foo.f[0] = _27.sub[gl_WorkGroupID.x].f[0].x;
+    foo.f[1] = _27.sub[gl_WorkGroupID.x].f[1].x;
+    foo.f2[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy;
+    foo.f2[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy;
+    foo.f3[0] = _27.sub[gl_WorkGroupID.x].f3[0];
+    foo.f3[1] = _27.sub[gl_WorkGroupID.x].f3[1];
+    foo.f4[0] = _27.sub[gl_WorkGroupID.x].f4[0];
+    foo.f4[1] = _27.sub[gl_WorkGroupID.x].f4[1];
+    foo.f[gl_GlobalInvocationID.x] += 1.0;
+    foo.f2[gl_GlobalInvocationID.x] += float2(2.0);
+    foo.f3[gl_GlobalInvocationID.x] += float3(3.0);
+    foo.f4[gl_GlobalInvocationID.x] += float4(4.0);
+    _27.sub[gl_WorkGroupID.x].f[0].x = foo.f[0];
+    _27.sub[gl_WorkGroupID.x].f[1].x = foo.f[1];
+    _27.sub[gl_WorkGroupID.x].f2[0].xy = foo.f2[0];
+    _27.sub[gl_WorkGroupID.x].f2[1].xy = foo.f2[1];
+    _27.sub[gl_WorkGroupID.x].f3[0] = foo.f3[0];
+    _27.sub[gl_WorkGroupID.x].f3[1] = foo.f3[1];
+    _27.sub[gl_WorkGroupID.x].f4[0] = foo.f4[0];
+    _27.sub[gl_WorkGroupID.x].f4[1] = foo.f4[1];
+    _27.sub[0].f[0].x += 5.0;
+    _27.sub[0].f2[1].xy += float2(5.0);
+}
+
diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp
index 2b37844..468eb7e 100644
--- a/reference/shaders-msl/comp/struct-packing.comp
+++ b/reference/shaders-msl/comp/struct-packing.comp
@@ -69,7 +69,7 @@
 
 struct S0_1
 {
-    float2 a[1];
+    float4 a[1];
     float b;
 };
 
@@ -115,21 +115,21 @@
     Content_1 content;
     Content_1 content1[2];
     Content_1 content2;
-    float array[1];
+    float4 array[1];
 };
 
 kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
 {
     Content_1 _60 = ssbo_140.content;
-    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
+    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
     ssbo_430.content.m0s[0].b = _60.m0s[0].b;
-    ssbo_430.content.m1s[0].a = _60.m1s[0].a;
+    ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
     ssbo_430.content.m1s[0].b = _60.m1s[0].b;
     ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
     ssbo_430.content.m2s[0].b = _60.m2s[0].b;
-    ssbo_430.content.m0.a[0] = _60.m0.a[0];
+    ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
     ssbo_430.content.m0.b = _60.m0.b;
-    ssbo_430.content.m1.a = _60.m1.a;
+    ssbo_430.content.m1.a = float3(_60.m1.a);
     ssbo_430.content.m1.b = _60.m1.b;
     ssbo_430.content.m2.a[0] = _60.m2.a[0];
     ssbo_430.content.m2.b = _60.m2.b;
diff --git a/reference/shaders-msl/frag/packing-test-3.frag b/reference/shaders-msl/frag/packing-test-3.frag
index a02884e..916ab87 100644
--- a/reference/shaders-msl/frag/packing-test-3.frag
+++ b/reference/shaders-msl/frag/packing-test-3.frag
@@ -35,7 +35,7 @@
 float4 _main(thread const VertexOutput& IN, constant CB0& v_26)
 {
     TestStruct_1 st;
-    st.position = v_26.CB0[1].position;
+    st.position = float3(v_26.CB0[1].position);
     st.radius = v_26.CB0[1].radius;
     float4 col = float4(st.position, st.radius);
     return col;
diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert
index a87b447..3230e4f 100644
--- a/reference/shaders-msl/vert/copy.flatten.vert
+++ b/reference/shaders-msl/vert/copy.flatten.vert
@@ -43,7 +43,7 @@
     for (int i = 0; i < 4; i++)
     {
         Light_1 light;
-        light.Position = _21.lights[i].Position;
+        light.Position = float3(_21.lights[i].Position);
         light.Radius = _21.lights[i].Radius;
         light.Color = _21.lights[i].Color;
         float3 L = in.aVertex.xyz - light.Position;
diff --git a/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
new file mode 100644
index 0000000..8f75929
--- /dev/null
+++ b/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
@@ -0,0 +1,75 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 44
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageImageExtendedFormats
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %id_1
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %_main_vu3_ "@main(vu3;"
+               OpName %id "id"
+               OpName %loaded "loaded"
+               OpName %TargetTexture "TargetTexture"
+               OpName %storeTemp "storeTemp"
+               OpName %id_0 "id"
+               OpName %id_1 "id"
+               OpName %param "param"
+               OpDecorate %TargetTexture DescriptorSet 0
+               OpDecorate %TargetTexture Binding 0
+               OpDecorate %id_1 BuiltIn WorkgroupId
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Function_v3uint = OpTypePointer Function %v3uint
+          %9 = OpTypeFunction %void %_ptr_Function_v3uint
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+         %17 = OpTypeImage %float 2D 0 0 0 2 Rg32f
+%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17
+%TargetTexture = OpVariable %_ptr_UniformConstant_17 UniformConstant
+     %v2uint = OpTypeVector %uint 2
+    %float_1 = OpConstant %float 1
+     %uint_1 = OpConstant %uint 1
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+       %id_1 = OpVariable %_ptr_Input_v3uint Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+       %id_0 = OpVariable %_ptr_Function_v3uint Function
+      %param = OpVariable %_ptr_Function_v3uint Function
+         %40 = OpLoad %v3uint %id_1
+               OpStore %id_0 %40
+         %42 = OpLoad %v3uint %id_0
+               OpStore %param %42
+         %43 = OpFunctionCall %void %_main_vu3_ %param
+               OpReturn
+               OpFunctionEnd
+ %_main_vu3_ = OpFunction %void None %9
+         %id = OpFunctionParameter %_ptr_Function_v3uint
+         %12 = OpLabel
+     %loaded = OpVariable %_ptr_Function_v2float Function
+  %storeTemp = OpVariable %_ptr_Function_v2float Function
+         %20 = OpLoad %17 %TargetTexture
+         %22 = OpLoad %v3uint %id
+         %23 = OpVectorShuffle %v2uint %22 %22 0 1
+         %24 = OpImageRead %v2float %20 %23
+               OpStore %loaded %24
+         %26 = OpLoad %v2float %loaded
+         %28 = OpCompositeConstruct %v2float %float_1 %float_1
+         %29 = OpFAdd %v2float %26 %28
+               OpStore %storeTemp %29
+         %30 = OpLoad %17 %TargetTexture
+         %31 = OpLoad %v3uint %id
+         %32 = OpVectorShuffle %v2uint %31 %31 0 1
+         %34 = OpCompositeConstruct %v2uint %uint_1 %uint_1
+         %35 = OpIAdd %v2uint %32 %34
+         %36 = OpLoad %v2float %storeTemp
+               OpImageWrite %30 %35 %36
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/shaders-msl/comp/storage-buffer-std140-vector-array.comp
new file mode 100644
index 0000000..7e786ec
--- /dev/null
+++ b/shaders-msl/comp/storage-buffer-std140-vector-array.comp
@@ -0,0 +1,30 @@
+#version 450
+layout(local_size_x = 1) in;
+
+struct Sub
+{
+	float f[2];
+	vec2 f2[2];
+	vec3 f3[2];
+	vec4 f4[2];
+};
+
+layout(std140, binding = 0) buffer SSBO
+{
+	Sub sub[2];
+};
+
+void main()
+{
+	Sub foo = sub[gl_WorkGroupID.x];
+
+	foo.f[gl_GlobalInvocationID.x] += 1.0;
+	foo.f2[gl_GlobalInvocationID.x] += 2.0;
+	foo.f3[gl_GlobalInvocationID.x] += 3.0;
+	foo.f4[gl_GlobalInvocationID.x] += 4.0;
+	sub[gl_WorkGroupID.x] = foo;
+
+	sub[0].f[0] += 5.0;
+	sub[0].f2[1] += 5.0;
+}
+
diff --git a/spirv_common.hpp b/spirv_common.hpp
index 57820d0..6852542 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -1339,6 +1339,7 @@
 
 struct AccessChainMeta
 {
+	uint32_t storage_packed_type = 0;
 	bool need_transpose = false;
 	bool storage_is_packed = false;
 	bool storage_is_invariant = false;
@@ -1365,6 +1366,12 @@
 		uint32_t index = 0;
 		spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
 		bool builtin = false;
+
+		struct
+		{
+			uint32_t packed_type = 0;
+			bool packed = false;
+		} extended;
 	};
 
 	Decoration decoration;
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index ecddc05..ef3ec24 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -169,7 +169,7 @@
 		{
 			// If the alias master has been specially packed, we will have emitted a clean variant as well,
 			// so skip the name aliasing here.
-			if (!has_decoration(type.type_alias, DecorationCPacked))
+			if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
 				return to_name(type.type_alias);
 		}
 	}
@@ -515,7 +515,7 @@
 
 bool Compiler::is_scalar(const SPIRType &type) const
 {
-	return type.vecsize == 1 && type.columns == 1;
+	return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1;
 }
 
 bool Compiler::is_vector(const SPIRType &type) const
@@ -868,7 +868,7 @@
 	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
 	{
 		auto &type = get<SPIRType>(*alias_itr);
-		if (type.type_alias != 0 && !has_decoration(type.type_alias, DecorationCPacked))
+		if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
 		{
 			// We will skip declaring this type, so make sure the type_alias type comes before.
 			auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias);
@@ -1165,6 +1165,153 @@
 	ir.set_decoration(id, decoration, argument);
 }
 
+void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
+{
+	auto &dec = ir.meta[id].decoration;
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		dec.extended.packed = true;
+		break;
+
+	case SPIRVCrossDecorationPackedType:
+		dec.extended.packed_type = value;
+		break;
+	}
+}
+
+void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
+                                              uint32_t value)
+{
+	ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
+	auto &dec = ir.meta[type].members[index];
+
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		dec.extended.packed = true;
+		break;
+
+	case SPIRVCrossDecorationPackedType:
+		dec.extended.packed_type = value;
+		break;
+	}
+}
+
+uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
+{
+	auto *m = ir.find_meta(id);
+	if (!m)
+		return 0;
+
+	auto &dec = m->decoration;
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		return uint32_t(dec.extended.packed);
+
+	case SPIRVCrossDecorationPackedType:
+		return dec.extended.packed_type;
+	}
+
+	return 0;
+}
+
+uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
+{
+	auto *m = ir.find_meta(type);
+	if (!m)
+		return 0;
+
+	if (index >= m->members.size())
+		return 0;
+
+	auto &dec = m->members[index];
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		return uint32_t(dec.extended.packed);
+
+	case SPIRVCrossDecorationPackedType:
+		return dec.extended.packed_type;
+	}
+
+	return 0;
+}
+
+bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
+{
+	auto *m = ir.find_meta(id);
+	if (!m)
+		return false;
+
+	auto &dec = m->decoration;
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		return dec.extended.packed;
+
+	case SPIRVCrossDecorationPackedType:
+		return dec.extended.packed_type != 0;
+	}
+
+	return false;
+}
+
+bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
+{
+	auto *m = ir.find_meta(type);
+	if (!m)
+		return false;
+
+	if (index >= m->members.size())
+		return false;
+
+	auto &dec = m->members[index];
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		return dec.extended.packed;
+
+	case SPIRVCrossDecorationPackedType:
+		return dec.extended.packed_type != 0;
+	}
+
+	return false;
+}
+
+void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
+{
+	auto &dec = ir.meta[id].decoration;
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		dec.extended.packed = false;
+		break;
+
+	case SPIRVCrossDecorationPackedType:
+		dec.extended.packed_type = 0;
+		break;
+	}
+}
+
+void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
+{
+	ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
+	auto &dec = ir.meta[type].members[index];
+
+	switch (decoration)
+	{
+	case SPIRVCrossDecorationPacked:
+		dec.extended.packed = false;
+		break;
+
+	case SPIRVCrossDecorationPackedType:
+		dec.extended.packed_type = 0;
+		break;
+	}
+}
+
 StorageClass Compiler::get_storage_class(uint32_t id) const
 {
 	return get<SPIRVariable>(id).storage;
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index 82e2e33..d587e50 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -114,6 +114,12 @@
 	spv::ExecutionModel execution_model;
 };
 
+enum ExtendedDecorations
+{
+	SPIRVCrossDecorationPacked,
+	SPIRVCrossDecorationPackedType
+};
+
 class Compiler
 {
 public:
@@ -946,6 +952,17 @@
 
 	bool image_is_comparison(const SPIRType &type, uint32_t id) const;
 
+	void set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value = 0);
+	uint32_t get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const;
+	bool has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const;
+	void unset_extended_decoration(uint32_t id, ExtendedDecorations decoration);
+
+	void set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
+	                                    uint32_t value = 0);
+	uint32_t get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const;
+	bool has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const;
+	void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration);
+
 private:
 	// Used only to implement the old deprecated get_entry_point() interface.
 	const SPIREntryPoint &get_first_entry_point(const std::string &name) const;
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 1a3f08c..131bc80 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -696,7 +696,7 @@
 	// Type-punning with these types is legal, which complicates things
 	// when we are storing struct and array types in an SSBO for example.
 	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
-	if (type.type_alias != 0 && !has_decoration(type.type_alias, DecorationCPacked))
+	if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
 		return;
 
 	add_resource_name(type.self);
@@ -810,9 +810,9 @@
 			SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
 	}
 
-	// DecorationCPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
+	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
 	// This is only done selectively in GLSL as needed.
-	if (has_decoration(type.self, DecorationCPacked) && dec.decoration_flags.get(DecorationOffset))
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset))
 		attr.push_back(join("offset = ", dec.offset));
 
 	if (attr.empty())
@@ -1370,7 +1370,7 @@
 			// layout_for_variable() will be called before the actual buffer emit.
 			// The alternative is a full pass before codegen where we deduce this decoration,
 			// but then we are just doing the exact same work twice, and more complexity.
-			set_decoration(type.self, DecorationCPacked);
+			set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
 		}
 		else
 		{
@@ -1398,7 +1398,7 @@
 			if (!options.es && !options.vulkan_semantics && options.version < 440)
 				require_extension_internal("GL_ARB_enhanced_layouts");
 
-			set_decoration(type.self, DecorationCPacked);
+			set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
 		}
 		else if (buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
 		{
@@ -1409,7 +1409,7 @@
 			if (!options.es && !options.vulkan_semantics && options.version < 440)
 				require_extension_internal("GL_ARB_enhanced_layouts");
 
-			set_decoration(type.self, DecorationCPacked);
+			set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
 		}
 		else
 		{
@@ -2384,7 +2384,7 @@
 // by wrapping the expression in a constructor of the appropriate type.
 // GLSL does not support packed formats, so simply return the expression.
 // Subclasses that do will override
-string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &)
+string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t)
 {
 	return expr_str;
 }
@@ -2486,26 +2486,28 @@
 	return enclose_expression(to_expression(id, register_expression_read));
 }
 
-string CompilerGLSL::to_unpacked_expression(uint32_t id)
+string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
 {
 	// If we need to transpose, it will also take care of unpacking rules.
 	auto *e = maybe_get<SPIRExpression>(id);
 	bool need_transpose = e && e->need_transpose;
-	if (!need_transpose && has_decoration(id, DecorationCPacked))
-		return unpack_expression_type(to_expression(id), expression_type(id));
+	if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
+		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
+		                              get_extended_decoration(id, SPIRVCrossDecorationPackedType));
 	else
-		return to_expression(id);
+		return to_expression(id, register_expression_read);
 }
 
-string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id)
+string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
 {
 	// If we need to transpose, it will also take care of unpacking rules.
 	auto *e = maybe_get<SPIRExpression>(id);
 	bool need_transpose = e && e->need_transpose;
-	if (!need_transpose && has_decoration(id, DecorationCPacked))
-		return unpack_expression_type(to_expression(id), expression_type(id));
+	if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
+		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
+		                              get_extended_decoration(id, SPIRVCrossDecorationPackedType));
 	else
-		return to_enclosed_expression(id);
+		return to_enclosed_expression(id, register_expression_read);
 }
 
 string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
@@ -2517,28 +2519,28 @@
 		return to_expression(id, register_expression_read);
 }
 
-string CompilerGLSL::to_pointer_expression(uint32_t id)
+string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
 {
 	auto &type = expression_type(id);
 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
-		return address_of_expression(to_enclosed_expression(id));
+		return address_of_expression(to_enclosed_expression(id, register_expression_read));
 	else
-		return to_expression(id);
+		return to_unpacked_expression(id, register_expression_read);
 }
 
-string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id)
+string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
 {
 	auto &type = expression_type(id);
 	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
-		return address_of_expression(to_enclosed_expression(id));
+		return address_of_expression(to_enclosed_expression(id, register_expression_read));
 	else
-		return to_enclosed_expression(id);
+		return to_enclosed_unpacked_expression(id, register_expression_read);
 }
 
 string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
 {
 	auto expr = to_enclosed_expression(id);
-	if (has_decoration(id, DecorationCPacked))
+	if (has_extended_decoration(id, SPIRVCrossDecorationPacked))
 		return join(expr, "[", index, "]");
 	else
 		return join(expr, ".", index_to_swizzle(index));
@@ -2581,7 +2583,7 @@
 			return to_enclosed_expression(e.base_expression) + e.expression;
 		else if (e.need_transpose)
 		{
-			bool is_packed = has_decoration(id, DecorationCPacked);
+			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked);
 			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
 		}
 		else
@@ -2776,8 +2778,8 @@
 
 	case OpCompositeExtract:
 	{
-		auto expr =
-		    access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), true, false);
+		auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
+		                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
 		return expr;
 	}
 
@@ -4210,6 +4212,10 @@
 	if (is_legacy() && image_is_comparison(imgtype, img))
 		expr += ".r";
 
+	// Deals with reads from MSL. We might need to downconvert to fewer components.
+	if (op == OpImageRead)
+		expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
+
 	emit_op(result_type, id, expr, forward);
 	for (auto &inherit : inherited_expressions)
 		inherit_expression_dependencies(id, inherit);
@@ -5482,10 +5488,15 @@
 }
 
 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
-                                           bool index_is_literal, bool chain_only, bool ptr_chain,
-                                           AccessChainMeta *meta, bool register_expression_read)
+                                           AccessChainFlags flags, AccessChainMeta *meta)
 {
 	string expr;
+
+	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
+	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
+	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
+	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
+
 	if (!chain_only)
 		expr = to_enclosed_expression(base, register_expression_read);
 
@@ -5496,7 +5507,8 @@
 
 	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
 	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
-	bool is_packed = has_decoration(base, DecorationCPacked);
+	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked);
+	uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType);
 	bool is_invariant = has_decoration(base, DecorationInvariant);
 	bool pending_array_enclose = false;
 	bool dimension_flatten = false;
@@ -5672,6 +5684,11 @@
 				is_invariant = true;
 
 			is_packed = member_is_packed_type(*type, index);
+			if (is_packed)
+				packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType);
+			else
+				packed_type = 0;
+
 			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
 			type = &get<SPIRType>(type->member_types[index]);
 		}
@@ -5683,6 +5700,7 @@
 				expr = convert_row_major_matrix(expr, *type, is_packed);
 				row_major_matrix_needs_conversion = false;
 				is_packed = false;
+				packed_type = 0;
 			}
 
 			expr += "[";
@@ -5722,6 +5740,7 @@
 			}
 
 			is_packed = false;
+			packed_type = 0;
 			type_id = type->parent_type;
 			type = &get<SPIRType>(type_id);
 		}
@@ -5741,6 +5760,7 @@
 		meta->need_transpose = row_major_matrix_needs_conversion;
 		meta->storage_is_packed = is_packed;
 		meta->storage_is_invariant = is_invariant;
+		meta->storage_packed_type = packed_type;
 	}
 
 	return expr;
@@ -5772,7 +5792,11 @@
 	}
 	else if (flattened_structs.count(base) && count > 0)
 	{
-		auto chain = access_chain_internal(base, indices, count, false, true, ptr_chain, nullptr, false).substr(1);
+		AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
+		if (ptr_chain)
+			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
+
+		auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
 		if (meta)
 		{
 			meta->need_transpose = false;
@@ -5782,7 +5806,10 @@
 	}
 	else
 	{
-		return access_chain_internal(base, indices, count, false, false, ptr_chain, meta, false);
+		AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
+		if (ptr_chain)
+			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
+		return access_chain_internal(base, indices, count, flags, meta);
 	}
 }
 
@@ -6603,6 +6630,30 @@
 	disallow_forwarding_in_expression_chain(*expr);
 }
 
+void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
+{
+	auto rhs = to_pointer_expression(rhs_expression);
+
+	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
+	if (!rhs.empty())
+	{
+		handle_store_to_invariant_variable(lhs_expression, rhs_expression);
+
+		auto lhs = to_dereferenced_expression(lhs_expression);
+
+		// We might need to bitcast in order to store to a builtin.
+		bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
+
+		// Tries to optimize assignments like "<lhs> = <lhs> op expr".
+		// While this is purely cosmetic, this is important for legacy ESSL where loop
+		// variable increments must be in either i++ or i += const-expr.
+		// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
+		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+			statement(lhs, " = ", rhs, ";");
+		register_write(lhs_expression);
+	}
+}
+
 void CompilerGLSL::emit_instruction(const Instruction &instruction)
 {
 	auto ops = stream(instruction);
@@ -6674,8 +6725,12 @@
 		register_read(id, ptr, forward);
 
 		// Pass through whether the result is of a packed type.
-		if (has_decoration(ptr, DecorationCPacked))
-			set_decoration(id, DecorationCPacked);
+		if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked))
+		{
+			set_extended_decoration(id, SPIRVCrossDecorationPacked);
+			set_extended_decoration(id, SPIRVCrossDecorationPackedType,
+			                        get_extended_decoration(ptr, SPIRVCrossDecorationPackedType));
+		}
 
 		inherit_expression_dependencies(id, ptr);
 		if (forward)
@@ -6706,14 +6761,11 @@
 
 		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
 		if (meta.storage_is_packed)
-			set_decoration(ops[1], DecorationCPacked);
-		else
-			unset_decoration(ops[1], DecorationCPacked);
-
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
+		if (meta.storage_packed_type != 0)
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
 		if (meta.storage_is_invariant)
 			set_decoration(ops[1], DecorationInvariant);
-		else
-			unset_decoration(ops[1], DecorationInvariant);
 
 		for (uint32_t i = 2; i < length; i++)
 		{
@@ -6743,27 +6795,9 @@
 		}
 		else
 		{
-			auto rhs = to_pointer_expression(ops[1]);
-
-			// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
-			if (!rhs.empty())
-			{
-				handle_store_to_invariant_variable(ops[0], ops[1]);
-
-				auto lhs = to_dereferenced_expression(ops[0]);
-
-				// We might need to bitcast in order to store to a builtin.
-				bitcast_to_builtin_store(ops[0], rhs, expression_type(ops[1]));
-
-				// Tries to optimize assignments like "<lhs> = <lhs> op expr".
-				// While this is purely cosmetic, this is important for legacy ESSL where loop
-				// variable increments must be in either i++ or i += const-expr.
-				// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
-				if (!optimize_read_modify_write(expression_type(ops[1]), lhs, rhs))
-					statement(lhs, " = ", rhs, ";");
-				register_write(ops[0]);
-			}
+			emit_store_statement(ops[0], ops[1]);
 		}
+
 		// Storing a pointer results in a variable pointer, so we must conservatively assume
 		// we can write through it.
 		if (expression_type(ops[1]).pointer)
@@ -6775,7 +6809,7 @@
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		auto e = access_chain_internal(ops[2], &ops[3], length - 3, true);
+		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
 		set<SPIRExpression>(id, e + ".length()", result_type, true);
 		break;
 	}
@@ -7007,7 +7041,7 @@
 		// Make a copy, then use access chain to store the variable.
 		statement(declare_temporary(result_type, id), to_expression(vec), ";");
 		set<SPIRExpression>(id, to_name(id), result_type, true);
-		auto chain = access_chain_internal(id, &index, 1, false);
+		auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
 		statement(chain, " = ", to_expression(comp), ";");
 		break;
 	}
@@ -7017,7 +7051,7 @@
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 
-		auto expr = access_chain_internal(ops[2], &ops[3], 1, false);
+		auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
 		emit_op(result_type, id, expr, should_forward(ops[2]));
 		inherit_expression_dependencies(id, ops[2]);
 		inherit_expression_dependencies(id, ops[3]);
@@ -7041,7 +7075,7 @@
 			allow_base_expression = false;
 
 		// Packed expressions cannot be split up.
-		if (has_decoration(ops[2], DecorationCPacked))
+		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked))
 			allow_base_expression = false;
 
 		AccessChainMeta meta;
@@ -7062,14 +7096,15 @@
 			//
 			// Including the base will prevent this and would trigger multiple reads
 			// from expression causing it to be forced to an actual temporary in GLSL.
-			auto expr = access_chain_internal(ops[2], &ops[3], length, true, true, false, &meta);
+			auto expr = access_chain_internal(ops[2], &ops[3], length,
+			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
 			e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
 			inherit_expression_dependencies(id, ops[2]);
 			e->base_expression = ops[2];
 		}
 		else
 		{
-			auto expr = access_chain_internal(ops[2], &ops[3], length, true, false, false, &meta);
+			auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
 			e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
 			inherit_expression_dependencies(id, ops[2]);
 		}
@@ -7079,7 +7114,9 @@
 		// instead of loading everything through an access chain.
 		e->need_transpose = meta.need_transpose;
 		if (meta.storage_is_packed)
-			set_decoration(id, DecorationCPacked);
+			set_extended_decoration(id, SPIRVCrossDecorationPacked);
+		if (meta.storage_packed_type != 0)
+			set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type);
 		if (meta.storage_is_invariant)
 			set_decoration(id, DecorationInvariant);
 
@@ -7100,7 +7137,7 @@
 		// Make a copy, then use access chain to store the variable.
 		statement(declare_temporary(result_type, id), to_expression(composite), ";");
 		set<SPIRExpression>(id, to_name(id), result_type, true);
-		auto chain = access_chain_internal(id, elems, length, true);
+		auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
 		statement(chain, " = ", to_expression(obj), ";");
 
 		break;
@@ -7167,7 +7204,7 @@
 				shuffle = true;
 
 		// Cannot use swizzles with packed expressions, force shuffle path.
-		if (!shuffle && has_decoration(vec0, DecorationCPacked))
+		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked))
 			shuffle = true;
 
 		string expr;
@@ -8796,7 +8833,7 @@
 // GLSL does not define packed data types, but certain subclasses do.
 bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
 {
-	return has_member_decoration(type.self, index, DecorationCPacked);
+	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked);
 }
 
 // Wraps the expression string in a function call that converts the
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index 6192e7a..e19228c 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -51,6 +51,15 @@
 	PlsFormat format;
 };
 
+enum AccessChainFlagBits
+{
+	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT = 1 << 0,
+	ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1,
+	ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2,
+	ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3
+};
+typedef uint32_t AccessChainFlags;
+
 class CompilerGLSL : public Compiler
 {
 public:
@@ -261,7 +270,7 @@
 	virtual void emit_buffer_block(const SPIRVariable &type);
 	virtual void emit_push_constant_block(const SPIRVariable &var);
 	virtual void emit_uniform(const SPIRVariable &var);
-	virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type);
+	virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id);
 
 	std::unique_ptr<std::ostringstream> buffer;
 
@@ -447,9 +456,10 @@
 	bool expression_is_forwarded(uint32_t id);
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
 	                        bool suppress_usage_tracking = false);
-	std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal,
-	                                  bool chain_only = false, bool ptr_chain = false, AccessChainMeta *meta = nullptr,
-	                                  bool register_expression_read = true);
+
+	std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
+	                                  AccessChainMeta *meta);
+
 	std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
 	                         AccessChainMeta *meta = nullptr, bool ptr_chain = false);
 
@@ -476,11 +486,11 @@
 	void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector<std::string> &arglist);
 	std::string to_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
-	std::string to_unpacked_expression(uint32_t id);
-	std::string to_enclosed_unpacked_expression(uint32_t id);
+	std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
+	std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true);
-	std::string to_pointer_expression(uint32_t id);
-	std::string to_enclosed_pointer_expression(uint32_t id);
+	std::string to_pointer_expression(uint32_t id, bool register_expression_read = true);
+	std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_extract_component_expression(uint32_t id, uint32_t index);
 	std::string enclose_expression(const std::string &expr);
 	std::string dereference_expression(const std::string &expr);
@@ -624,6 +634,7 @@
 	void disallow_forwarding_in_expression_chain(const SPIRExpression &expr);
 
 	bool expression_is_constant_null(uint32_t id) const;
+	virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression);
 
 private:
 	void init()
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index dc2e030..4b470a1 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -1835,7 +1835,7 @@
 	string packing_offset;
 	bool is_push_constant = type.storage == StorageClassPushConstant;
 
-	if ((has_decoration(type.self, DecorationCPacked) || is_push_constant) &&
+	if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) &&
 	    has_member_decoration(type.self, index, DecorationOffset))
 	{
 		uint32_t offset = memb[index].offset - base_offset;
@@ -1870,7 +1870,7 @@
 		if (type.array.empty())
 		{
 			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset))
-				set_decoration(type.self, DecorationCPacked);
+				set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
 			else
 				SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
 
@@ -1952,7 +1952,7 @@
 			auto &type = get<SPIRType>(var.basetype);
 
 			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end))
-				set_decoration(type.self, DecorationCPacked);
+				set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
 			else
 				SPIRV_CROSS_THROW(
 				    "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index 071c9de..976722a 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -749,7 +749,7 @@
 }
 
 // If the specified type is a struct, it and any nested structs
-// are marked as packable with the DecorationCPacked decoration,
+// are marked as packable with the SPIRVCrossDecorationPacked decoration,
 void CompilerMSL::mark_as_packable(SPIRType &type)
 {
 	// If this is not the base type (eg. it's a pointer or array), tunnel down
@@ -761,7 +761,7 @@
 
 	if (type.basetype == SPIRType::Struct)
 	{
-		set_decoration(type.self, DecorationCPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
 
 		// Recurse
 		size_t mbr_cnt = type.member_types.size();
@@ -1483,6 +1483,7 @@
 		auto &base_type = set<SPIRType>(base_type_id);
 		base_type = type;
 		base_type.basetype = type.basetype == SPIRType::Short ? SPIRType::UShort : SPIRType::UInt;
+		base_type.pointer = false;
 
 		if (!type.pointer)
 			return base_type_id;
@@ -1512,6 +1513,7 @@
 		auto &base_type = set<SPIRType>(base_type_id);
 		base_type = type;
 		base_type.basetype = SPIRType::UInt;
+		base_type.pointer = false;
 
 		if (!type.pointer)
 			return base_type_id;
@@ -1555,7 +1557,11 @@
 	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
 	{
 		if (is_member_packable(ib_type, mbr_idx))
-			set_member_decoration(ib_type_id, mbr_idx, DecorationCPacked);
+		{
+			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked);
+			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType,
+			                               ib_type.member_types[mbr_idx]);
+		}
 
 		// Align current offset to the current member's default alignment.
 		size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
@@ -1584,11 +1590,30 @@
 bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
 {
 	// We've already marked it as packable
-	if (has_member_decoration(ib_type.self, index, DecorationCPacked))
+	if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
 		return true;
 
 	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
 
+	uint32_t component_size = mbr_type.width / 8;
+	uint32_t unpacked_mbr_size;
+	if (mbr_type.vecsize == 3)
+		unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
+	else
+		unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
+
+	// Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
+	// but we will use the same mechanism.
+	if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 &&
+	    type_struct_member_array_stride(ib_type, index) == 4 * component_size)
+	{
+		return true;
+	}
+
+	// TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
+	//if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16)
+	//	SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout.");
+
 	// Only vectors or 3-row matrices need to be packed.
 	if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
 		return false;
@@ -1597,12 +1622,6 @@
 	if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
 		return false;
 
-	uint32_t component_size = mbr_type.width / 8;
-	uint32_t unpacked_mbr_size;
-	if (mbr_type.vecsize == 3)
-		unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
-	else
-		unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
 	if (is_array(mbr_type))
 	{
 		// If member is an array, and the array stride is larger than the type needs, don't pack it.
@@ -1644,11 +1663,49 @@
 	return k;
 }
 
+void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
+{
+	if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) ||
+	    get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0)
+	{
+		CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
+	}
+	else
+	{
+		// Special handling when storing to a float[] or float2[] in std140 layout.
+
+		auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType));
+		string lhs = to_dereferenced_expression(lhs_expression);
+		string rhs = to_pointer_expression(rhs_expression);
+
+		// Unpack the expression so we can store to it with a float or float2.
+		// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
+		if (is_scalar(type) && is_array(type))
+			lhs = enclose_expression(lhs) + ".x";
+		else if (is_vector(type) && type.vecsize == 2 && is_array(type))
+			lhs = enclose_expression(lhs) + ".xy";
+
+		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+			statement(lhs, " = ", rhs, ";");
+		register_write(lhs_expression);
+	}
+}
+
 // Converts the format of the current expression from packed to unpacked,
 // by wrapping the expression in a constructor of the appropriate type.
-string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type)
+string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
 {
-	return join(type_to_glsl(type), "(", expr_str, ")");
+	const SPIRType *packed_type = nullptr;
+	if (packed_type_id)
+		packed_type = &get<SPIRType>(packed_type_id);
+
+	// float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
+	if (packed_type && is_array(*packed_type) && is_scalar(*packed_type))
+		return enclose_expression(expr_str) + ".x";
+	else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2)
+		return enclose_expression(expr_str) + ".xy";
+	else
+		return join(type_to_glsl(type), "(", expr_str, ")");
 }
 
 // Emits the file header info
@@ -2113,8 +2170,9 @@
 			statement("");
 			statement("// Wrapper function that swizzles texture gathers.");
 			statement("template<typename T, typename Tex, typename... Ts>");
-			statement("inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) "
-			          "METAL_CONST_ARG(c)");
+			statement(
+			    "inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) "
+			    "METAL_CONST_ARG(c)");
 			begin_scope();
 			statement("if (sw)");
 			begin_scope();
@@ -2153,7 +2211,8 @@
 			statement("");
 			statement("// Wrapper function that swizzles depth texture gathers.");
 			statement("template<typename T, typename Tex, typename... Ts>");
-			statement("inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) ");
+			statement(
+			    "inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) ");
 			begin_scope();
 			statement("if (sw)");
 			begin_scope();
@@ -2334,7 +2393,7 @@
 
 				declared_structs.insert(type_id);
 
-				if (has_decoration(type_id, DecorationCPacked))
+				if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked))
 					align_struct(type);
 
 				// Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
@@ -2656,8 +2715,13 @@
 		test(bias, ImageOperandsBiasMask);
 		test(lod, ImageOperandsLodMask);
 
+		auto &texel_type = expression_type(texel_id);
+		auto store_type = texel_type;
+		store_type.vecsize = 4;
+
 		statement(join(
-		    to_expression(img_id), ".write(", to_expression(texel_id), ", ",
+		    to_expression(img_id), ".write(", remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)),
+		    ", ",
 		    to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, &forward),
 		    ");"));
 
@@ -2817,7 +2881,7 @@
 		uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
 		auto *e = maybe_get<SPIRExpression>(mtx_id);
 		auto &t = expression_type(mtx_id);
-		bool is_packed = has_decoration(mtx_id, DecorationCPacked);
+		bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
 		if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
 		{
 			e->need_transpose = false;
@@ -2826,11 +2890,11 @@
 			// are generally transposed, so unpacking using a constructor argument
 			// will result in an error.
 			// The simplest solution for now is to just avoid unpacking the matrix in this operation.
-			unset_decoration(mtx_id, DecorationCPacked);
+			unset_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
 
 			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
 			if (is_packed)
-				set_decoration(mtx_id, DecorationCPacked);
+				set_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
 			e->need_transpose = true;
 		}
 		else
@@ -3863,7 +3927,7 @@
 
 	// Generate a function that will swap matrix elements from row-major to column-major.
 	// Packed row-matrix should just use transpose() function.
-	if (!has_decoration(id, DecorationCPacked))
+	if (!has_extended_decoration(id, SPIRVCrossDecorationPacked))
 	{
 		const auto type = expression_type(id);
 		add_convert_row_major_matrix_function(type.columns, type.vecsize);
@@ -3885,7 +3949,7 @@
 
 	// Generate a function that will swap matrix elements from row-major to column-major.
 	// Packed row-matrix should just use transpose() function.
-	if (!has_member_decoration(type.self, index, DecorationCPacked))
+	if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked))
 	{
 		const auto mbr_type = get<SPIRType>(type.member_types[index]);
 		add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
@@ -3968,13 +4032,16 @@
 
 	// If this member is packed, mark it as so.
 	string pack_pfx = "";
+
+	const SPIRType *effective_membertype = &membertype;
+	SPIRType override_type;
+
 	if (member_is_packed_type(type, index))
 	{
-		pack_pfx = "packed_";
-
 		// If we're packing a matrix, output an appropriate typedef
 		if (membertype.vecsize > 1 && membertype.columns > 1)
 		{
+			pack_pfx = "packed_";
 			string base_type = membertype.width == 16 ? "half" : "float";
 			string td_line = "typedef ";
 			td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns);
@@ -3983,10 +4050,19 @@
 			td_line += ";";
 			add_typedef_line(td_line);
 		}
+		else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
+		{
+			// A "packed" float array, but we pad here instead to 4-vector.
+			override_type = membertype;
+			override_type.vecsize = 4;
+			effective_membertype = &override_type;
+		}
+		else
+			pack_pfx = "packed_";
 	}
 
-	statement(pack_pfx, type_to_glsl(membertype), " ", qualifier, to_member_name(type, index),
-	          member_attribute_qualifier(type, index), type_to_array_glsl(membertype), ";");
+	statement(pack_pfx, type_to_glsl(*effective_membertype), " ", qualifier, to_member_name(type, index),
+	          member_attribute_qualifier(type, index), type_to_array_glsl(*effective_membertype), ";");
 }
 
 // Return a MSL qualifier for the specified function attribute member
@@ -5607,7 +5683,7 @@
 		uint32_t columns = type.columns;
 
 		// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
-		if (vecsize == 3 && !has_member_decoration(struct_type.self, index, DecorationCPacked))
+		if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked))
 			vecsize = 4;
 
 		return component_size * vecsize * columns;
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index 7c60e9d..3ea5b4d 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -346,7 +346,7 @@
 	                             uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
 	                             uint32_t comp, uint32_t sample, bool *p_forward) override;
 	std::string to_initializer_expression(const SPIRVariable &var) override;
-	std::string unpack_expression_type(std::string expr_str, const SPIRType &type) override;
+	std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override;
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
 	bool skip_argument(uint32_t id) const override;
 	std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override;
@@ -431,6 +431,7 @@
 
 	void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
 	void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
+	void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override;
 
 	void analyze_sampled_image_usage();