|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | 
|  | ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabihf -mattr=+mve.fp | FileCheck %s --check-prefix=CHECKLE | 
|  | ; RUN: llc < %s -mtriple=thumbebv8.1m.main-none-eabihf -mattr=+mve.fp | FileCheck %s --check-prefix=CHECKBE | 
|  |  | 
|  |  | 
|  | define <8 x i8> @inserti8_first(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti8_first: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrb.u16 q0, [r0] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti8_first: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.u16 q1, [r0] | 
|  | ; CHECKBE-NEXT:    vrev64.16 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 1 | 
|  | %l1 = load <8 x i8>, ptr %q | 
|  | %l2 = load i8, ptr %p | 
|  | %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> | 
|  | %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 | 
|  | ret <8 x i8> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i8> @inserti8_last(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti8_last: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrb.u16 q0, [r0, #1] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti8_last: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.u16 q1, [r0, #1] | 
|  | ; CHECKBE-NEXT:    vrev64.16 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 8 | 
|  | %l1 = load <8 x i8>, ptr %p | 
|  | %l2 = load i8, ptr %q | 
|  | %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> | 
|  | %ins = insertelement <8 x i8> %s, i8 %l2, i32 7 | 
|  | ret <8 x i8> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i16> @inserti8_first_sext(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti8_first_sext: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrb.s16 q0, [r0] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti8_first_sext: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.s16 q1, [r0] | 
|  | ; CHECKBE-NEXT:    vrev64.16 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 1 | 
|  | %l1 = load <8 x i8>, ptr %q | 
|  | %s1 = sext <8 x i8> %l1 to <8 x i16> | 
|  | %l2 = load i8, ptr %p | 
|  | %s2 = sext i8 %l2 to i16 | 
|  | %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> | 
|  | %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 | 
|  | ret <8 x i16> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i16> @inserti8_last_sext(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti8_last_sext: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrb.s16 q0, [r0, #1] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti8_last_sext: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.s16 q1, [r0, #1] | 
|  | ; CHECKBE-NEXT:    vrev64.16 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 8 | 
|  | %l1 = load <8 x i8>, ptr %p | 
|  | %s1 = sext <8 x i8> %l1 to <8 x i16> | 
|  | %l2 = load i8, ptr %q | 
|  | %s2 = sext i8 %l2 to i16 | 
|  | %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> | 
|  | %ins = insertelement <8 x i16> %s, i16 %s2, i32 7 | 
|  | ret <8 x i16> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i16> @inserti8_first_zext(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti8_first_zext: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrb.u16 q0, [r0] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti8_first_zext: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.u16 q1, [r0] | 
|  | ; CHECKBE-NEXT:    vrev64.16 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 1 | 
|  | %l1 = load <8 x i8>, ptr %q | 
|  | %s1 = zext <8 x i8> %l1 to <8 x i16> | 
|  | %l2 = load i8, ptr %p | 
|  | %s2 = zext i8 %l2 to i16 | 
|  | %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> | 
|  | %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 | 
|  | ret <8 x i16> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i16> @inserti8_last_zext(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti8_last_zext: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrb.u16 q0, [r0, #1] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti8_last_zext: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.u16 q1, [r0, #1] | 
|  | ; CHECKBE-NEXT:    vrev64.16 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 8 | 
|  | %l1 = load <8 x i8>, ptr %p | 
|  | %s1 = zext <8 x i8> %l1 to <8 x i16> | 
|  | %l2 = load i8, ptr %q | 
|  | %s2 = zext i8 %l2 to i16 | 
|  | %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> | 
|  | %ins = insertelement <8 x i16> %s, i16 %s2, i32 7 | 
|  | ret <8 x i16> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i32> @inserti32_first(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti32_first: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q2, [r0, #20] | 
|  | ; CHECKLE-NEXT:    vldr s4, [r0, #16] | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q0, [r0] | 
|  | ; CHECKLE-NEXT:    vmov.f32 s5, s8 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s6, s9 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s7, s10 | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti32_first: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q3, [r0, #20] | 
|  | ; CHECKBE-NEXT:    vldrb.u8 q1, [r0] | 
|  | ; CHECKBE-NEXT:    vldr s8, [r0, #16] | 
|  | ; CHECKBE-NEXT:    vmov.f32 s9, s12 | 
|  | ; CHECKBE-NEXT:    vrev64.8 q0, q1 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s10, s13 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s11, s14 | 
|  | ; CHECKBE-NEXT:    vrev64.32 q1, q2 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 4 | 
|  | %l1 = load <8 x i32>, ptr %q | 
|  | %l2 = load i32, ptr %p | 
|  | %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> | 
|  | %ins = insertelement <8 x i32> %s, i32 %l2, i32 0 | 
|  | ret <8 x i32> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i32> @inserti32_last(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti32_last: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q2, [r0] | 
|  | ; CHECKLE-NEXT:    vldr s3, [r0, #16] | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q1, [r0, #20] | 
|  | ; CHECKLE-NEXT:    vmov.f32 s0, s9 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s1, s10 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s2, s11 | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti32_last: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q3, [r0] | 
|  | ; CHECKBE-NEXT:    vldrb.u8 q0, [r0, #20] | 
|  | ; CHECKBE-NEXT:    vldr s11, [r0, #16] | 
|  | ; CHECKBE-NEXT:    vmov.f32 s8, s13 | 
|  | ; CHECKBE-NEXT:    vrev64.8 q1, q0 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s9, s14 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s10, s15 | 
|  | ; CHECKBE-NEXT:    vrev64.32 q0, q2 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 32 | 
|  | %l1 = load <8 x i32>, ptr %p | 
|  | %l2 = load i32, ptr %q | 
|  | %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> | 
|  | %ins = insertelement <8 x i32> %s, i32 %l2, i32 7 | 
|  | ret <8 x i32> %ins | 
|  | } | 
|  |  | 
|  | define <8 x i32> @inserti32_first_multiuse(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti32_first_multiuse: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q0, [r0, #20] | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q2, [r0, #4] | 
|  | ; CHECKLE-NEXT:    vmov.f32 s4, s11 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s5, s0 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s6, s1 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s7, s2 | 
|  | ; CHECKLE-NEXT:    vadd.i32 q1, q0, q1 | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q0, [r0] | 
|  | ; CHECKLE-NEXT:    vadd.i32 q0, q2, q0 | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti32_first_multiuse: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q0, [r0, #20] | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q2, [r0, #4] | 
|  | ; CHECKBE-NEXT:    vmov.f32 s4, s11 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s5, s0 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s6, s1 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s7, s2 | 
|  | ; CHECKBE-NEXT:    vadd.i32 q0, q0, q1 | 
|  | ; CHECKBE-NEXT:    vrev64.32 q1, q0 | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q0, [r0] | 
|  | ; CHECKBE-NEXT:    vadd.i32 q2, q2, q0 | 
|  | ; CHECKBE-NEXT:    vrev64.32 q0, q2 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 4 | 
|  | %l1 = load <8 x i32>, ptr %q | 
|  | %l2 = load i32, ptr %p | 
|  | %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> | 
|  | %ins = insertelement <8 x i32> %s, i32 %l2, i32 0 | 
|  | %a = add <8 x i32> %l1, %ins | 
|  | ret <8 x i32> %a | 
|  | } | 
|  |  | 
|  | define <8 x i32> @inserti32_last_multiuse(ptr %p) { | 
|  | ; CHECKLE-LABEL: inserti32_last_multiuse: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q0, [r0] | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q1, [r0, #16] | 
|  | ; CHECKLE-NEXT:    vmov.f32 s8, s1 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s9, s2 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s10, s3 | 
|  | ; CHECKLE-NEXT:    vmov.f32 s11, s4 | 
|  | ; CHECKLE-NEXT:    vadd.i32 q0, q0, q2 | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q2, [r0, #20] | 
|  | ; CHECKLE-NEXT:    vadd.i32 q1, q1, q2 | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: inserti32_last_multiuse: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q0, [r0] | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q1, [r0, #16] | 
|  | ; CHECKBE-NEXT:    vmov.f32 s8, s1 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s9, s2 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s10, s3 | 
|  | ; CHECKBE-NEXT:    vmov.f32 s11, s4 | 
|  | ; CHECKBE-NEXT:    vadd.i32 q2, q0, q2 | 
|  | ; CHECKBE-NEXT:    vrev64.32 q0, q2 | 
|  | ; CHECKBE-NEXT:    vldrw.u32 q2, [r0, #20] | 
|  | ; CHECKBE-NEXT:    vadd.i32 q2, q1, q2 | 
|  | ; CHECKBE-NEXT:    vrev64.32 q1, q2 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 32 | 
|  | %l1 = load <8 x i32>, ptr %p | 
|  | %l2 = load i32, ptr %q | 
|  | %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> | 
|  | %ins = insertelement <8 x i32> %s, i32 %l2, i32 7 | 
|  | %a = add <8 x i32> %l1, %ins | 
|  | ret <8 x i32> %a | 
|  | } | 
|  |  | 
|  | define <4 x float> @insertf32_first(ptr %p) { | 
|  | ; CHECKLE-LABEL: insertf32_first: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q0, [r0] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: insertf32_first: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.u8 q1, [r0] | 
|  | ; CHECKBE-NEXT:    vrev64.8 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 4 | 
|  | %l1 = load <4 x float>, ptr %q | 
|  | %l2 = load float, ptr %p | 
|  | %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2> | 
|  | %ins = insertelement <4 x float> %s, float %l2, i32 0 | 
|  | ret <4 x float> %ins | 
|  | } | 
|  |  | 
|  | define <4 x float> @insertf32_last(ptr %p) { | 
|  | ; CHECKLE-LABEL: insertf32_last: | 
|  | ; CHECKLE:       @ %bb.0: | 
|  | ; CHECKLE-NEXT:    vldrw.u32 q0, [r0, #4] | 
|  | ; CHECKLE-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECKBE-LABEL: insertf32_last: | 
|  | ; CHECKBE:       @ %bb.0: | 
|  | ; CHECKBE-NEXT:    vldrb.u8 q1, [r0, #4] | 
|  | ; CHECKBE-NEXT:    vrev64.8 q0, q1 | 
|  | ; CHECKBE-NEXT:    bx lr | 
|  | %q = getelementptr inbounds i8, ptr %p, i32 16 | 
|  | %l1 = load <4 x float>, ptr %p | 
|  | %l2 = load float, ptr %q | 
|  | %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> | 
|  | %ins = insertelement <4 x float> %s, float %l2, i32 3 | 
|  | ret <4 x float> %ins | 
|  | } |