|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s | 
|  | ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-postra-bias-addi=false < %s |\ | 
|  | ; RUN:   FileCheck -check-prefix=CHECK-P9-NO-HEURISTIC %s | 
|  |  | 
|  | %_type_of_scalars = type <{ [16 x i8], double, [152 x i8] }> | 
|  | %_elem_type_of_x = type <{ double }> | 
|  | %_elem_type_of_a = type <{ double }> | 
|  |  | 
|  | @scalars = common dso_local local_unnamed_addr global %_type_of_scalars zeroinitializer, align 16 | 
|  |  | 
|  | define dso_local void @test(ptr noalias %.x, ptr %.a, ptr noalias %.n) { | 
|  | ; CHECK-P9-LABEL: test: | 
|  | ; CHECK-P9:       # %bb.0: # %entry | 
|  | ; CHECK-P9-NEXT:    ld 5, 0(5) | 
|  | ; CHECK-P9-NEXT:    addis 6, 2, scalars@toc@ha | 
|  | ; CHECK-P9-NEXT:    addi 6, 6, scalars@toc@l | 
|  | ; CHECK-P9-NEXT:    rldicr 5, 5, 0, 58 | 
|  | ; CHECK-P9-NEXT:    addi 6, 6, 16 | 
|  | ; CHECK-P9-NEXT:    addi 5, 5, -32 | 
|  | ; CHECK-P9-NEXT:    lxvdsx 0, 0, 6 | 
|  | ; CHECK-P9-NEXT:    rldicl 5, 5, 59, 5 | 
|  | ; CHECK-P9-NEXT:    addi 5, 5, 1 | 
|  | ; CHECK-P9-NEXT:    mtctr 5 | 
|  | ; CHECK-P9-NEXT:    .p2align 4 | 
|  | ; CHECK-P9-NEXT:  .LBB0_1: # %vector.body | 
|  | ; CHECK-P9-NEXT:    # | 
|  | ; CHECK-P9-NEXT:    lxv 1, 16(4) | 
|  | ; CHECK-P9-NEXT:    lxv 2, 0(4) | 
|  | ; CHECK-P9-NEXT:    lxv 3, 48(4) | 
|  | ; CHECK-P9-NEXT:    lxv 4, 32(4) | 
|  | ; CHECK-P9-NEXT:    xvmuldp 2, 2, 0 | 
|  | ; CHECK-P9-NEXT:    lxv 5, 240(4) | 
|  | ; CHECK-P9-NEXT:    lxv 6, 224(4) | 
|  | ; CHECK-P9-NEXT:    xvmuldp 1, 1, 0 | 
|  | ; CHECK-P9-NEXT:    xvmuldp 4, 4, 0 | 
|  | ; CHECK-P9-NEXT:    xvmuldp 3, 3, 0 | 
|  | ; CHECK-P9-NEXT:    xvmuldp 6, 6, 0 | 
|  | ; CHECK-P9-NEXT:    xvmuldp 5, 5, 0 | 
|  | ; CHECK-P9-NEXT:    addi 4, 4, 256 | 
|  | ; CHECK-P9-NEXT:    stxv 1, 16(3) | 
|  | ; CHECK-P9-NEXT:    stxv 2, 0(3) | 
|  | ; CHECK-P9-NEXT:    stxv 3, 48(3) | 
|  | ; CHECK-P9-NEXT:    stxv 4, 32(3) | 
|  | ; CHECK-P9-NEXT:    stxv 5, 240(3) | 
|  | ; CHECK-P9-NEXT:    stxv 6, 224(3) | 
|  | ; CHECK-P9-NEXT:    addi 3, 3, 256 | 
|  | ; CHECK-P9-NEXT:    bdnz .LBB0_1 | 
|  | ; CHECK-P9-NEXT:  # %bb.2: # %return.block | 
|  | ; CHECK-P9-NEXT:    blr | 
|  | ; | 
|  | ; CHECK-P9-NO-HEURISTIC-LABEL: test: | 
|  | ; CHECK-P9-NO-HEURISTIC:       # %bb.0: # %entry | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    ld 5, 0(5) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addis 6, 2, scalars@toc@ha | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 6, 6, scalars@toc@l | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    rldicr 5, 5, 0, 58 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 6, 6, 16 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 5, 5, -32 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxvdsx 0, 0, 6 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    rldicl 5, 5, 59, 5 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 5, 5, 1 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    mtctr 5 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    .p2align 4 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:  .LBB0_1: # %vector.body | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    # | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 1, 16(4) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 2, 0(4) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 3, 48(4) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 4, 32(4) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 2, 2, 0 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 5, 240(4) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 6, 224(4) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 1, 1, 0 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 4, 4, 0 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 3, 3, 0 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 6, 6, 0 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 5, 5, 0 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 4, 4, 256 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 1, 16(3) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 2, 0(3) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 3, 48(3) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 4, 32(3) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 5, 240(3) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 6, 224(3) | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 3, 3, 256 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    bdnz .LBB0_1 | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:  # %bb.2: # %return.block | 
|  | ; CHECK-P9-NO-HEURISTIC-NEXT:    blr | 
|  | entry: | 
|  | %x_rvo_based_addr_3 = getelementptr inbounds [0 x %_elem_type_of_x], ptr %.x, i64 0, i64 -1 | 
|  | %a_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_a], ptr %.a, i64 0, i64 -1 | 
|  | %_val_n_ = load i64, ptr %.n, align 8 | 
|  | %_val_c1_ = load double, ptr getelementptr inbounds (%_type_of_scalars, ptr @scalars, i64 0, i32 1), align 16 | 
|  | %n.vec = and i64 %_val_n_, -32 | 
|  | %broadcast.splatinsert26 = insertelement <4 x double> undef, double %_val_c1_, i32 0 | 
|  | %broadcast.splat27 = shufflevector <4 x double> %broadcast.splatinsert26, <4 x double> undef, <4 x i32> zeroinitializer | 
|  | br label %vector.body | 
|  |  | 
|  | vector.body: | 
|  | %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] | 
|  | %offset.idx = or disjoint i64 %index, 1 | 
|  | %0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0 | 
|  | %1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0 | 
|  | %wide.load = load <4 x double>, ptr %1, align 8 | 
|  | %2 = getelementptr double, ptr %1, i64 4 | 
|  | %wide.load19 = load <4 x double>, ptr %2, align 8 | 
|  | %3 = getelementptr double, ptr %1, i64 8 | 
|  | %wide.load20 = load <4 x double>, ptr %3, align 8 | 
|  | %4 = getelementptr double, ptr %1, i64 12 | 
|  | %wide.load21 = load <4 x double>, ptr %4, align 8 | 
|  | %5 = getelementptr double, ptr %1, i64 16 | 
|  | %wide.load22 = load <4 x double>, ptr %5, align 8 | 
|  | %6 = getelementptr double, ptr %1, i64 20 | 
|  | %wide.load23 = load <4 x double>, ptr %6, align 8 | 
|  | %7 = getelementptr double, ptr %1, i64 24 | 
|  | %wide.load24 = load <4 x double>, ptr %7, align 8 | 
|  | %8 = getelementptr double, ptr %1, i64 28 | 
|  | %wide.load25 = load <4 x double>, ptr %8, align 8 | 
|  | %9 = fmul fast <4 x double> %wide.load, %broadcast.splat27 | 
|  | %10 = fmul fast <4 x double> %wide.load19, %broadcast.splat27 | 
|  | %11 = fmul fast <4 x double> %wide.load20, %broadcast.splat27 | 
|  | %12 = fmul fast <4 x double> %wide.load21, %broadcast.splat27 | 
|  | %13 = fmul fast <4 x double> %wide.load22, %broadcast.splat27 | 
|  | %14 = fmul fast <4 x double> %wide.load23, %broadcast.splat27 | 
|  | %15 = fmul fast <4 x double> %wide.load24, %broadcast.splat27 | 
|  | %16 = fmul fast <4 x double> %wide.load25, %broadcast.splat27 | 
|  | store <4 x double> %9, ptr %0, align 8 | 
|  | %17 = getelementptr double, ptr %0, i64 4 | 
|  | store <4 x double> %10, ptr %17, align 8 | 
|  | %18 = getelementptr double, ptr %0, i64 8 | 
|  | %19 = getelementptr double, ptr %0, i64 12 | 
|  | %20 = getelementptr double, ptr %0, i64 16 | 
|  | %21 = getelementptr double, ptr %0, i64 20 | 
|  | %22 = getelementptr double, ptr %0, i64 24 | 
|  | %23 = getelementptr double, ptr %0, i64 28 | 
|  | store <4 x double> %16, ptr %23, align 8 | 
|  | %index.next = add i64 %index, 32 | 
|  | %cm = icmp eq i64 %index.next, %n.vec | 
|  | br i1 %cm, label %return.block, label %vector.body | 
|  |  | 
|  | return.block: | 
|  | ret void | 
|  | } | 
|  |  |