Merge pull request #1161 from LoopDawg/matmul-truncate

WIP: HLSL: matrix and vector truncations for m*v, v*m, m*m
diff --git a/Test/baseResults/hlsl.mul-truncate.frag.out b/Test/baseResults/hlsl.mul-truncate.frag.out
new file mode 100644
index 0000000..a1c3c03
--- /dev/null
+++ b/Test/baseResults/hlsl.mul-truncate.frag.out
@@ -0,0 +1,413 @@
+hlsl.mul-truncate.frag
+Shader version: 500
+gl_FragCoord origin is upper left
+0:? Sequence
+0:17  Function Definition: @main( ( temp 4-component vector of float)
+0:17    Function Parameters: 
+0:?     Sequence
+0:19      Sequence
+0:19        move second child to first child ( temp float)
+0:19          'r00' ( temp float)
+0:19          dot-product ( temp float)
+0:19            v2: direct index for structure (layout( row_major std140) uniform 2-component vector of float)
+0:19              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:19              Constant:
+0:19                7 (const uint)
+0:19            Construct vec2 ( in 2-component vector of float)
+0:19              v3: direct index for structure (layout( row_major std140) uniform 3-component vector of float)
+0:19                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:19                Constant:
+0:19                  6 (const uint)
+0:20      Sequence
+0:20        move second child to first child ( temp float)
+0:20          'r01' ( temp float)
+0:20          dot-product ( temp float)
+0:20            Construct vec2 ( in 2-component vector of float)
+0:20              v4: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:20                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:20                Constant:
+0:20                  5 (const uint)
+0:20            v2: direct index for structure (layout( row_major std140) uniform 2-component vector of float)
+0:20              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:20              Constant:
+0:20                7 (const uint)
+0:23      Sequence
+0:23        move second child to first child ( temp 4-component vector of float)
+0:23          'r10' ( temp 4-component vector of float)
+0:23          matrix-times-vector ( temp 4-component vector of float)
+0:23            Construct mat3x4 ( uniform 3X4 matrix of float)
+0:23              m44: direct index for structure (layout( row_major std140) uniform 4X4 matrix of float)
+0:23                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:23                Constant:
+0:23                  0 (const uint)
+0:23            v3: direct index for structure (layout( row_major std140) uniform 3-component vector of float)
+0:23              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:23              Constant:
+0:23                6 (const uint)
+0:24      Sequence
+0:24        move second child to first child ( temp 4-component vector of float)
+0:24          'r11' ( temp 4-component vector of float)
+0:24          matrix-times-vector ( temp 4-component vector of float)
+0:24            m34: direct index for structure (layout( row_major std140) uniform 3X4 matrix of float)
+0:24              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:24              Constant:
+0:24                2 (const uint)
+0:24            Construct vec3 ( uniform 3-component vector of float)
+0:24              v4: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:24                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:24                Constant:
+0:24                  5 (const uint)
+0:27      Sequence
+0:27        move second child to first child ( temp 4-component vector of float)
+0:27          'r20' ( temp 4-component vector of float)
+0:27          vector-times-matrix ( temp 4-component vector of float)
+0:27            v3: direct index for structure (layout( row_major std140) uniform 3-component vector of float)
+0:27              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:27              Constant:
+0:27                6 (const uint)
+0:27            Construct mat4x3 ( uniform 4X3 matrix of float)
+0:27              m44: direct index for structure (layout( row_major std140) uniform 4X4 matrix of float)
+0:27                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:27                Constant:
+0:27                  0 (const uint)
+0:28      Sequence
+0:28        move second child to first child ( temp 4-component vector of float)
+0:28          'r21' ( temp 4-component vector of float)
+0:28          vector-times-matrix ( temp 4-component vector of float)
+0:28            Construct vec3 ( uniform 3-component vector of float)
+0:28              v4: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:28                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:28                Constant:
+0:28                  5 (const uint)
+0:28            m43: direct index for structure (layout( row_major std140) uniform 4X3 matrix of float)
+0:28              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:28              Constant:
+0:28                1 (const uint)
+0:36      Branch: Return with expression
+0:36        add ( temp 4-component vector of float)
+0:36          add ( temp 4-component vector of float)
+0:36            add ( temp 4-component vector of float)
+0:36              add ( temp 4-component vector of float)
+0:36                add ( temp 4-component vector of float)
+0:36                  'r10' ( temp 4-component vector of float)
+0:36                  'r11' ( temp 4-component vector of float)
+0:36                'r20' ( temp 4-component vector of float)
+0:36              'r21' ( temp 4-component vector of float)
+0:36            'r00' ( temp float)
+0:36          'r01' ( temp float)
+0:17  Function Definition: main( ( temp void)
+0:17    Function Parameters: 
+0:?     Sequence
+0:17      move second child to first child ( temp 4-component vector of float)
+0:?         '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:17        Function Call: @main( ( temp 4-component vector of float)
+0:?   Linker Objects
+0:?     'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+
+
+Linked fragment stage:
+
+
+Shader version: 500
+gl_FragCoord origin is upper left
+0:? Sequence
+0:17  Function Definition: @main( ( temp 4-component vector of float)
+0:17    Function Parameters: 
+0:?     Sequence
+0:19      Sequence
+0:19        move second child to first child ( temp float)
+0:19          'r00' ( temp float)
+0:19          dot-product ( temp float)
+0:19            v2: direct index for structure (layout( row_major std140) uniform 2-component vector of float)
+0:19              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:19              Constant:
+0:19                7 (const uint)
+0:19            Construct vec2 ( in 2-component vector of float)
+0:19              v3: direct index for structure (layout( row_major std140) uniform 3-component vector of float)
+0:19                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:19                Constant:
+0:19                  6 (const uint)
+0:20      Sequence
+0:20        move second child to first child ( temp float)
+0:20          'r01' ( temp float)
+0:20          dot-product ( temp float)
+0:20            Construct vec2 ( in 2-component vector of float)
+0:20              v4: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:20                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:20                Constant:
+0:20                  5 (const uint)
+0:20            v2: direct index for structure (layout( row_major std140) uniform 2-component vector of float)
+0:20              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:20              Constant:
+0:20                7 (const uint)
+0:23      Sequence
+0:23        move second child to first child ( temp 4-component vector of float)
+0:23          'r10' ( temp 4-component vector of float)
+0:23          matrix-times-vector ( temp 4-component vector of float)
+0:23            Construct mat3x4 ( uniform 3X4 matrix of float)
+0:23              m44: direct index for structure (layout( row_major std140) uniform 4X4 matrix of float)
+0:23                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:23                Constant:
+0:23                  0 (const uint)
+0:23            v3: direct index for structure (layout( row_major std140) uniform 3-component vector of float)
+0:23              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:23              Constant:
+0:23                6 (const uint)
+0:24      Sequence
+0:24        move second child to first child ( temp 4-component vector of float)
+0:24          'r11' ( temp 4-component vector of float)
+0:24          matrix-times-vector ( temp 4-component vector of float)
+0:24            m34: direct index for structure (layout( row_major std140) uniform 3X4 matrix of float)
+0:24              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:24              Constant:
+0:24                2 (const uint)
+0:24            Construct vec3 ( uniform 3-component vector of float)
+0:24              v4: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:24                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:24                Constant:
+0:24                  5 (const uint)
+0:27      Sequence
+0:27        move second child to first child ( temp 4-component vector of float)
+0:27          'r20' ( temp 4-component vector of float)
+0:27          vector-times-matrix ( temp 4-component vector of float)
+0:27            v3: direct index for structure (layout( row_major std140) uniform 3-component vector of float)
+0:27              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:27              Constant:
+0:27                6 (const uint)
+0:27            Construct mat4x3 ( uniform 4X3 matrix of float)
+0:27              m44: direct index for structure (layout( row_major std140) uniform 4X4 matrix of float)
+0:27                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:27                Constant:
+0:27                  0 (const uint)
+0:28      Sequence
+0:28        move second child to first child ( temp 4-component vector of float)
+0:28          'r21' ( temp 4-component vector of float)
+0:28          vector-times-matrix ( temp 4-component vector of float)
+0:28            Construct vec3 ( uniform 3-component vector of float)
+0:28              v4: direct index for structure (layout( row_major std140) uniform 4-component vector of float)
+0:28                'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:28                Constant:
+0:28                  5 (const uint)
+0:28            m43: direct index for structure (layout( row_major std140) uniform 4X3 matrix of float)
+0:28              'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:28              Constant:
+0:28                1 (const uint)
+0:36      Branch: Return with expression
+0:36        add ( temp 4-component vector of float)
+0:36          add ( temp 4-component vector of float)
+0:36            add ( temp 4-component vector of float)
+0:36              add ( temp 4-component vector of float)
+0:36                add ( temp 4-component vector of float)
+0:36                  'r10' ( temp 4-component vector of float)
+0:36                  'r11' ( temp 4-component vector of float)
+0:36                'r20' ( temp 4-component vector of float)
+0:36              'r21' ( temp 4-component vector of float)
+0:36            'r00' ( temp float)
+0:36          'r01' ( temp float)
+0:17  Function Definition: main( ( temp void)
+0:17    Function Parameters: 
+0:?     Sequence
+0:17      move second child to first child ( temp 4-component vector of float)
+0:?         '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+0:17        Function Call: @main( ( temp 4-component vector of float)
+0:?   Linker Objects
+0:?     'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform 4X4 matrix of float m44, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform 2X4 matrix of float m24, layout( row_major std140) uniform 4X2 matrix of float m42, layout( row_major std140) uniform 4-component vector of float v4, layout( row_major std140) uniform 3-component vector of float v3, layout( row_major std140) uniform 2-component vector of float v2})
+0:?     '@entryPointOutput' (layout( location=0) out 4-component vector of float)
+
+// Module Version 10000
+// Generated by (magic number): 80002
+// Id's are bound by 139
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "main" 137
+                              ExecutionMode 4 OriginUpperLeft
+                              Source HLSL 500
+                              Name 4  "main"
+                              Name 9  "@main("
+                              Name 12  "r00"
+                              Name 20  "Matrix"
+                              MemberName 20(Matrix) 0  "m44"
+                              MemberName 20(Matrix) 1  "m43"
+                              MemberName 20(Matrix) 2  "m34"
+                              MemberName 20(Matrix) 3  "m24"
+                              MemberName 20(Matrix) 4  "m42"
+                              MemberName 20(Matrix) 5  "v4"
+                              MemberName 20(Matrix) 6  "v3"
+                              MemberName 20(Matrix) 7  "v2"
+                              Name 22  ""
+                              Name 36  "r01"
+                              Name 48  "r10"
+                              Name 74  "r11"
+                              Name 86  "r20"
+                              Name 109  "r21"
+                              Name 137  "@entryPointOutput"
+                              MemberDecorate 20(Matrix) 0 RowMajor
+                              MemberDecorate 20(Matrix) 0 Offset 0
+                              MemberDecorate 20(Matrix) 0 MatrixStride 16
+                              MemberDecorate 20(Matrix) 1 RowMajor
+                              MemberDecorate 20(Matrix) 1 Offset 64
+                              MemberDecorate 20(Matrix) 1 MatrixStride 16
+                              MemberDecorate 20(Matrix) 2 RowMajor
+                              MemberDecorate 20(Matrix) 2 Offset 112
+                              MemberDecorate 20(Matrix) 2 MatrixStride 16
+                              MemberDecorate 20(Matrix) 3 RowMajor
+                              MemberDecorate 20(Matrix) 3 Offset 176
+                              MemberDecorate 20(Matrix) 3 MatrixStride 16
+                              MemberDecorate 20(Matrix) 4 RowMajor
+                              MemberDecorate 20(Matrix) 4 Offset 240
+                              MemberDecorate 20(Matrix) 4 MatrixStride 16
+                              MemberDecorate 20(Matrix) 5 Offset 272
+                              MemberDecorate 20(Matrix) 6 Offset 288
+                              MemberDecorate 20(Matrix) 7 Offset 304
+                              Decorate 20(Matrix) Block
+                              Decorate 22 DescriptorSet 0
+                              Decorate 137(@entryPointOutput) Location 0
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypeFunction 7(fvec4)
+              11:             TypePointer Function 6(float)
+              13:             TypeMatrix 7(fvec4) 4
+              14:             TypeVector 6(float) 3
+              15:             TypeMatrix 14(fvec3) 4
+              16:             TypeMatrix 7(fvec4) 3
+              17:             TypeMatrix 7(fvec4) 2
+              18:             TypeVector 6(float) 2
+              19:             TypeMatrix 18(fvec2) 4
+      20(Matrix):             TypeStruct 13 15 16 17 19 7(fvec4) 14(fvec3) 18(fvec2)
+              21:             TypePointer Uniform 20(Matrix)
+              22:     21(ptr) Variable Uniform
+              23:             TypeInt 32 1
+              24:     23(int) Constant 7
+              25:             TypePointer Uniform 18(fvec2)
+              28:     23(int) Constant 6
+              29:             TypePointer Uniform 14(fvec3)
+              37:     23(int) Constant 5
+              38:             TypePointer Uniform 7(fvec4)
+              47:             TypePointer Function 7(fvec4)
+              49:     23(int) Constant 0
+              50:             TypePointer Uniform 13
+              53:    6(float) Constant 1065353216
+              54:    6(float) Constant 0
+              75:     23(int) Constant 2
+              76:             TypePointer Uniform 16
+             116:     23(int) Constant 1
+             117:             TypePointer Uniform 15
+             136:             TypePointer Output 7(fvec4)
+137(@entryPointOutput):    136(ptr) Variable Output
+         4(main):           2 Function None 3
+               5:             Label
+             138:    7(fvec4) FunctionCall 9(@main()
+                              Store 137(@entryPointOutput) 138
+                              Return
+                              FunctionEnd
+       9(@main():    7(fvec4) Function None 8
+              10:             Label
+         12(r00):     11(ptr) Variable Function
+         36(r01):     11(ptr) Variable Function
+         48(r10):     47(ptr) Variable Function
+         74(r11):     47(ptr) Variable Function
+         86(r20):     47(ptr) Variable Function
+        109(r21):     47(ptr) Variable Function
+              26:     25(ptr) AccessChain 22 24
+              27:   18(fvec2) Load 26
+              30:     29(ptr) AccessChain 22 28
+              31:   14(fvec3) Load 30
+              32:    6(float) CompositeExtract 31 0
+              33:    6(float) CompositeExtract 31 1
+              34:   18(fvec2) CompositeConstruct 32 33
+              35:    6(float) Dot 27 34
+                              Store 12(r00) 35
+              39:     38(ptr) AccessChain 22 37
+              40:    7(fvec4) Load 39
+              41:    6(float) CompositeExtract 40 0
+              42:    6(float) CompositeExtract 40 1
+              43:   18(fvec2) CompositeConstruct 41 42
+              44:     25(ptr) AccessChain 22 24
+              45:   18(fvec2) Load 44
+              46:    6(float) Dot 43 45
+                              Store 36(r01) 46
+              51:     50(ptr) AccessChain 22 49
+              52:          13 Load 51
+              55:    6(float) CompositeExtract 52 0 0
+              56:    6(float) CompositeExtract 52 0 1
+              57:    6(float) CompositeExtract 52 0 2
+              58:    6(float) CompositeExtract 52 0 3
+              59:    6(float) CompositeExtract 52 1 0
+              60:    6(float) CompositeExtract 52 1 1
+              61:    6(float) CompositeExtract 52 1 2
+              62:    6(float) CompositeExtract 52 1 3
+              63:    6(float) CompositeExtract 52 2 0
+              64:    6(float) CompositeExtract 52 2 1
+              65:    6(float) CompositeExtract 52 2 2
+              66:    6(float) CompositeExtract 52 2 3
+              67:    7(fvec4) CompositeConstruct 55 56 57 58
+              68:    7(fvec4) CompositeConstruct 59 60 61 62
+              69:    7(fvec4) CompositeConstruct 63 64 65 66
+              70:          16 CompositeConstruct 67 68 69
+              71:     29(ptr) AccessChain 22 28
+              72:   14(fvec3) Load 71
+              73:    7(fvec4) MatrixTimesVector 70 72
+                              Store 48(r10) 73
+              77:     76(ptr) AccessChain 22 75
+              78:          16 Load 77
+              79:     38(ptr) AccessChain 22 37
+              80:    7(fvec4) Load 79
+              81:    6(float) CompositeExtract 80 0
+              82:    6(float) CompositeExtract 80 1
+              83:    6(float) CompositeExtract 80 2
+              84:   14(fvec3) CompositeConstruct 81 82 83
+              85:    7(fvec4) MatrixTimesVector 78 84
+                              Store 74(r11) 85
+              87:     29(ptr) AccessChain 22 28
+              88:   14(fvec3) Load 87
+              89:     50(ptr) AccessChain 22 49
+              90:          13 Load 89
+              91:    6(float) CompositeExtract 90 0 0
+              92:    6(float) CompositeExtract 90 0 1
+              93:    6(float) CompositeExtract 90 0 2
+              94:    6(float) CompositeExtract 90 1 0
+              95:    6(float) CompositeExtract 90 1 1
+              96:    6(float) CompositeExtract 90 1 2
+              97:    6(float) CompositeExtract 90 2 0
+              98:    6(float) CompositeExtract 90 2 1
+              99:    6(float) CompositeExtract 90 2 2
+             100:    6(float) CompositeExtract 90 3 0
+             101:    6(float) CompositeExtract 90 3 1
+             102:    6(float) CompositeExtract 90 3 2
+             103:   14(fvec3) CompositeConstruct 91 92 93
+             104:   14(fvec3) CompositeConstruct 94 95 96
+             105:   14(fvec3) CompositeConstruct 97 98 99
+             106:   14(fvec3) CompositeConstruct 100 101 102
+             107:          15 CompositeConstruct 103 104 105 106
+             108:    7(fvec4) VectorTimesMatrix 88 107
+                              Store 86(r20) 108
+             110:     38(ptr) AccessChain 22 37
+             111:    7(fvec4) Load 110
+             112:    6(float) CompositeExtract 111 0
+             113:    6(float) CompositeExtract 111 1
+             114:    6(float) CompositeExtract 111 2
+             115:   14(fvec3) CompositeConstruct 112 113 114
+             118:    117(ptr) AccessChain 22 116
+             119:          15 Load 118
+             120:    7(fvec4) VectorTimesMatrix 115 119
+                              Store 109(r21) 120
+             121:    7(fvec4) Load 48(r10)
+             122:    7(fvec4) Load 74(r11)
+             123:    7(fvec4) FAdd 121 122
+             124:    7(fvec4) Load 86(r20)
+             125:    7(fvec4) FAdd 123 124
+             126:    7(fvec4) Load 109(r21)
+             127:    7(fvec4) FAdd 125 126
+             128:    6(float) Load 12(r00)
+             129:    7(fvec4) CompositeConstruct 128 128 128 128
+             130:    7(fvec4) FAdd 127 129
+             131:    6(float) Load 36(r01)
+             132:    7(fvec4) CompositeConstruct 131 131 131 131
+             133:    7(fvec4) FAdd 130 132
+                              ReturnValue 133
+                              FunctionEnd
diff --git a/Test/hlsl.mul-truncate.frag b/Test/hlsl.mul-truncate.frag
new file mode 100644
index 0000000..6baf2ac
--- /dev/null
+++ b/Test/hlsl.mul-truncate.frag
@@ -0,0 +1,37 @@
+
+// Test v*v, v*m, m*v, and m*m argument clamping.
+
+cbuffer Matrix
+{
+    float4x4  m44;
+    float4x3  m43;
+    float3x4  m34;
+    float2x4  m24;
+    float4x2  m42;
+    float4    v4;
+    float3    v3;
+    float2    v2;
+}
+
+float4 main() : SV_Target0
+{
+    // v*v:
+    float  r00 = mul(v2, v3);  // float = float2*float3; // clamp to float2 dot product
+    float  r01 = mul(v4, v2);  // float = float4*float2; // clamp to float2 dot product
+
+    // v*m
+    float4 r10 = mul(v3, m44); // float4 = float3 * float4x4;  // clamp mat to float3x4;
+    float4 r11 = mul(v4, m34); // truncate vector to vec3
+
+    // m*v
+    float4 r20 = mul(m44, v3); // float4 = float4x4 * float3;  // clamp mat to float4x3;
+    float4 r21 = mul(m43, v4); // truncate vector to vec3
+
+    // // m*m
+    // float2x3 r30 = mul(m24, m33);  // float2x3 = float2x4 * float3x3;
+    // float3x4 r31 = mul(m33, m24);  // float3x4 = float3x3 * float2x4;
+    // float3x2 r32 = mul(m33, m42);  // float3x2 = float3x3 * float4x2;
+    // float4x3 r33 = mul(m42, m33);  // float4x3 = float4x2 * float3x3;
+
+    return r10 + r11 + r20 + r21 + r00 + r01; // + r30[0].x + r31[0] + r32[0].x + transpose(r33)[0];
+}
diff --git a/glslang/Include/Types.h b/glslang/Include/Types.h
index b5b91f5..d23b615 100644
--- a/glslang/Include/Types.h
+++ b/glslang/Include/Types.h
@@ -1152,6 +1152,7 @@
                                 sampler.clear();
                                 qualifier.clear();
                                 qualifier.storage = q;
+                                assert(!(isMatrix() && vectorSize != 0));  // prevent vectorSize != 0 on matrices
                             }
     // for explicit precision qualifier
     TType(TBasicType t, TStorageQualifier q, TPrecisionQualifier p, int vs = 1, int mc = 0, int mr = 0,
@@ -1164,6 +1165,7 @@
                                 qualifier.storage = q;
                                 qualifier.precision = p;
                                 assert(p >= EpqNone && p <= EpqHigh);
+                                assert(!(isMatrix() && vectorSize != 0));  // prevent vectorSize != 0 on matrices
                             }
     // for turning a TPublicType into a TType, using a shallow copy
     explicit TType(const TPublicType& p) :
diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp
index 2e4b146..b409d81 100644
--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -238,6 +238,7 @@
         {"hlsl.matrixSwizzle.vert", "ShaderFunction"},
         {"hlsl.memberFunCall.frag", "main"},
         {"hlsl.mintypes.frag", "main"},
+        {"hlsl.mul-truncate.frag", "main"},
         {"hlsl.multiEntry.vert", "RealEntrypoint"},
         {"hlsl.multiReturn.frag", "main"},
         {"hlsl.matrixindex.frag", "main"},
diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp
index adb955f..ce9afc7 100755
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -5008,6 +5008,12 @@
         bool builtIn = false;
         int thisDepth = 0;
 
+        // For mat mul, the situation is unusual: we have to compare vector sizes to mat row or col sizes,
+        // and clamp the opposite arg.  Since that's complex, we farm it off to a separate method.
+        // It doesn't naturally fall out of processing an argument at a time in isolation.
+        if (function->getName() == "mul")
+            addGenMulArgumentConversion(loc, *function, arguments);
+
         TIntermAggregate* aggregate = arguments ? arguments->getAsAggregate() : nullptr;
 
         // TODO: this needs improvement: there's no way at present to look up a signature in
@@ -5170,6 +5176,68 @@
 }
 
 //
+// HLSL allows mismatched dimensions on vec*mat, mat*vec, vec*vec, and mat*mat.  This is a
+// situation not well suited to resolution in intrinsic selection, but we can do so here, since we
+// can look at both arguments insert explicit shape changes here, if required.
+//
+void HlslParseContext::addGenMulArgumentConversion(const TSourceLoc& loc, TFunction& call, TIntermTyped*& args)
+{
+    TIntermAggregate* argAggregate = args ? args->getAsAggregate() : nullptr;
+
+    if (argAggregate == nullptr || argAggregate->getSequence().size() != 2) {
+        // It really ought to have two arguments.
+        error(loc, "expected: mul arguments", "", "");
+        return;
+    }
+
+    TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
+    TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
+
+    if (arg0->isVector() && arg1->isVector()) {
+        // For:
+        //    vec * vec: it's handled during intrinsic selection, so while we could do it here,
+        //               we can also ignore it, which is easier.
+    } else if (arg0->isVector() && arg1->isMatrix()) {
+        // vec * mat: we clamp the vec if the mat col is smaller, else clamp the mat col.
+        if (arg0->getVectorSize() < arg1->getMatrixCols()) {
+            // vec is smaller, so truncate larger mat dimension
+            const TType truncType(arg1->getBasicType(), arg1->getQualifier().storage, arg1->getQualifier().precision,
+                                  0, arg0->getVectorSize(), arg1->getMatrixRows());
+            arg1 = addConstructor(loc, arg1, truncType);
+        } else if (arg0->getVectorSize() > arg1->getMatrixCols()) {
+            // vec is larger, so truncate vec to mat size
+            const TType truncType(arg0->getBasicType(), arg0->getQualifier().storage, arg0->getQualifier().precision,
+                                  arg1->getMatrixCols());
+            arg0 = addConstructor(loc, arg0, truncType);
+        }
+    } else if (arg0->isMatrix() && arg1->isVector()) {
+        // mat * vec: we clamp the vec if the mat col is smaller, else clamp the mat col.
+        if (arg1->getVectorSize() < arg0->getMatrixRows()) {
+            // vec is smaller, so truncate larger mat dimension
+            const TType truncType(arg0->getBasicType(), arg0->getQualifier().storage, arg0->getQualifier().precision,
+                                  0, arg0->getMatrixCols(), arg1->getVectorSize());
+            arg0 = addConstructor(loc, arg0, truncType);
+        } else if (arg1->getVectorSize() > arg0->getMatrixRows()) {
+            // vec is larger, so truncate vec to mat size
+            const TType truncType(arg1->getBasicType(), arg1->getQualifier().storage, arg1->getQualifier().precision,
+                                  arg0->getMatrixRows());
+            arg1 = addConstructor(loc, arg1, truncType);
+        }
+    } else if (arg0->isMatrix() && arg1->isMatrix()) {
+        // mat * mat
+    } else {
+        // It's something with scalars: we'll just leave it alone.
+    }
+
+    // Put arguments back.
+    argAggregate->getSequence()[0] = arg0;
+    argAggregate->getSequence()[1] = arg1;
+
+    call[0].type = &arg0->getWritableType();
+    call[1].type = &arg1->getWritableType();
+}
+
+//
 // Add any needed implicit conversions for function-call arguments to input parameters.
 //
 void HlslParseContext::addInputArgumentConversions(const TFunction& function, TIntermTyped*& arguments)
@@ -7015,6 +7083,7 @@
     }
 }
 
+
 //
 // Look up a function name in the symbol table, and make sure it is a function.
 //
diff --git a/hlsl/hlslParseHelper.h b/hlsl/hlslParseHelper.h
index c1752f1..67d192d 100755
--- a/hlsl/hlslParseHelper.h
+++ b/hlsl/hlslParseHelper.h
@@ -141,6 +141,7 @@
     void checkNoShaderLayouts(const TSourceLoc&, const TShaderQualifiers&);
 
     const TFunction* findFunction(const TSourceLoc& loc, TFunction& call, bool& builtIn, int& thisDepth, TIntermTyped*& args);
+    void addGenMulArgumentConversion(const TSourceLoc& loc, TFunction& call, TIntermTyped*& args);
     void declareTypedef(const TSourceLoc&, const TString& identifier, const TType&);
     void declareStruct(const TSourceLoc&, TString& structName, TType&);
     TSymbol* lookupUserType(const TString&, TType&);