| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON |
| ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON |
| ; RUN: %if ptxas %{ \ |
| ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs -O0 \ |
| ; RUN: | %ptxas-verify -arch=sm_90 \ |
| ; RUN: %} |
| ; RUN: %if ptxas %{ \ |
| ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ |
| ; RUN: -verify-machineinstrs \ |
| ; RUN: | %ptxas-verify -arch=sm_90 \ |
| ; RUN: %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
| |
| define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) { |
| ; O0-LABEL: test_bitcast_2xi8_i16( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<5>; |
| ; O0-NEXT: .reg .b32 %r<3>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; |
| ; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; |
| ; O0-NEXT: shl.b16 %rs3, %rs2, 8; |
| ; O0-NEXT: or.b16 %rs4, %rs1, %rs3; |
| ; O0-NEXT: cvt.u32.u16 %r2, %rs4; |
| ; O0-NEXT: st.param.b32 [func_retval0], %r2; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_bitcast_2xi8_i16( |
| ; O3: { |
| ; O3-NEXT: .reg .b32 %r<2>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0]; |
| ; O3-NEXT: st.param.b32 [func_retval0], %r1; |
| ; O3-NEXT: ret; |
| %res = bitcast <2 x i8> %a to i16 |
| ret i16 %res |
| } |
| |
| define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) { |
| ; O0-LABEL: test_bitcast_i16_2xi8( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<2>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; |
| ; O0-NEXT: st.param.b16 [func_retval0], %rs1; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_bitcast_i16_2xi8( |
| ; O3: { |
| ; O3-NEXT: .reg .b16 %rs<2>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; |
| ; O3-NEXT: st.param.b16 [func_retval0], %rs1; |
| ; O3-NEXT: ret; |
| %res = bitcast i16 %a to <2 x i8> |
| ret <2 x i8> %res |
| } |
| |
| define <2 x i8> @test_call_2xi8(<2 x i8> %a) { |
| ; O0-LABEL: test_call_2xi8( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<5>; |
| ; O0-NEXT: .reg .b32 %r<2>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; |
| ; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; |
| ; O0-NEXT: { // callseq 0, 0 |
| ; O0-NEXT: .param .align 2 .b8 param0[2]; |
| ; O0-NEXT: .param .align 2 .b8 retval0[2]; |
| ; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; |
| ; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0); |
| ; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; |
| ; O0-NEXT: } // callseq 0 |
| ; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_call_2xi8( |
| ; O3: { |
| ; O3-NEXT: .reg .b16 %rs<5>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; |
| ; O3-NEXT: { // callseq 0, 0 |
| ; O3-NEXT: .param .align 2 .b8 param0[2]; |
| ; O3-NEXT: .param .align 2 .b8 retval0[2]; |
| ; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; |
| ; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0); |
| ; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; |
| ; O3-NEXT: } // callseq 0 |
| ; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; |
| ; O3-NEXT: ret; |
| %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a) |
| ret <2 x i8> %res |
| } |
| |
| define <2 x float> @test_uitofp_2xi8(<2 x i8> %a) { |
| ; O0-LABEL: test_uitofp_2xi8( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<3>; |
| ; O0-NEXT: .reg .b32 %r<4>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_uitofp_2xi8_param_0]; |
| ; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; |
| ; O0-NEXT: cvt.rn.f32.u16 %r2, %rs2; |
| ; O0-NEXT: cvt.rn.f32.u16 %r3, %rs1; |
| ; O0-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_uitofp_2xi8( |
| ; O3: { |
| ; O3-NEXT: .reg .b16 %rs<3>; |
| ; O3-NEXT: .reg .b32 %r<3>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_uitofp_2xi8_param_0]; |
| ; O3-NEXT: cvt.rn.f32.u16 %r1, %rs2; |
| ; O3-NEXT: cvt.rn.f32.u16 %r2, %rs1; |
| ; O3-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; |
| ; O3-NEXT: ret; |
| %1 = uitofp <2 x i8> %a to <2 x float> |
| ret <2 x float> %1 |
| } |
| |
| define void @test_store_i8x2_unaligned(ptr %ptr, <2 x i8> %a) { |
| ; O0-LABEL: test_store_i8x2_unaligned( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<3>; |
| ; O0-NEXT: .reg .b32 %r<2>; |
| ; O0-NEXT: .reg .b64 %rd<2>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0]; |
| ; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1]; |
| ; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; |
| ; O0-NEXT: st.b8 [%rd1+1], %rs2; |
| ; O0-NEXT: st.b8 [%rd1], %rs1; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_store_i8x2_unaligned( |
| ; O3: { |
| ; O3-NEXT: .reg .b16 %rs<3>; |
| ; O3-NEXT: .reg .b64 %rd<2>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0]; |
| ; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1]; |
| ; O3-NEXT: st.b8 [%rd1+1], %rs2; |
| ; O3-NEXT: st.b8 [%rd1], %rs1; |
| ; O3-NEXT: ret; |
| store <2 x i8> %a, ptr %ptr, align 1 |
| ret void |
| } |
| |
| define void @test_store_i8x2_unaligned_immediate(ptr %ptr) { |
| ; O0-LABEL: test_store_i8x2_unaligned_immediate( |
| ; O0: { |
| ; O0-NEXT: .reg .b64 %rd<2>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0]; |
| ; O0-NEXT: st.b8 [%rd1+1], 2; |
| ; O0-NEXT: st.b8 [%rd1], 1; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_store_i8x2_unaligned_immediate( |
| ; O3: { |
| ; O3-NEXT: .reg .b64 %rd<2>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0]; |
| ; O3-NEXT: st.b8 [%rd1+1], 2; |
| ; O3-NEXT: st.b8 [%rd1], 1; |
| ; O3-NEXT: ret; |
| store <2 x i8> <i8 1, i8 2>, ptr %ptr, align 1 |
| ret void |
| } |
| |
| define i32 @test_zext_load_i8x2_unaligned(ptr %ptr) { |
| ; O0-LABEL: test_zext_load_i8x2_unaligned( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<3>; |
| ; O0-NEXT: .reg .b64 %rd<2>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0]; |
| ; O0-NEXT: ld.b8 %rs1, [%rd1+1]; |
| ; O0-NEXT: ld.b8 %rs2, [%rd1]; |
| ; O0-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_zext_load_i8x2_unaligned( |
| ; O3: { |
| ; O3-NEXT: .reg .b16 %rs<3>; |
| ; O3-NEXT: .reg .b64 %rd<2>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0]; |
| ; O3-NEXT: ld.b8 %rs1, [%rd1+1]; |
| ; O3-NEXT: ld.b8 %rs2, [%rd1]; |
| ; O3-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; |
| ; O3-NEXT: ret; |
| %a = load <2 x i8>, ptr %ptr, align 1 |
| %b = zext <2 x i8> %a to <2 x i16> |
| %c = bitcast <2 x i16> %b to i32 |
| ret i32 %c |
| } |
| |
| define i32 @test_sext_load_i8x2_unaligned(ptr %ptr) { |
| ; O0-LABEL: test_sext_load_i8x2_unaligned( |
| ; O0: { |
| ; O0-NEXT: .reg .b16 %rs<3>; |
| ; O0-NEXT: .reg .b64 %rd<2>; |
| ; O0-EMPTY: |
| ; O0-NEXT: // %bb.0: |
| ; O0-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0]; |
| ; O0-NEXT: ld.s8 %rs1, [%rd1+1]; |
| ; O0-NEXT: ld.s8 %rs2, [%rd1]; |
| ; O0-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; |
| ; O0-NEXT: ret; |
| ; |
| ; O3-LABEL: test_sext_load_i8x2_unaligned( |
| ; O3: { |
| ; O3-NEXT: .reg .b16 %rs<3>; |
| ; O3-NEXT: .reg .b64 %rd<2>; |
| ; O3-EMPTY: |
| ; O3-NEXT: // %bb.0: |
| ; O3-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0]; |
| ; O3-NEXT: ld.s8 %rs1, [%rd1+1]; |
| ; O3-NEXT: ld.s8 %rs2, [%rd1]; |
| ; O3-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; |
| ; O3-NEXT: ret; |
| %a = load <2 x i8>, ptr %ptr, align 1 |
| %b = sext <2 x i8> %a to <2 x i16> |
| %c = bitcast <2 x i16> %b to i32 |
| ret i32 %c |
| } |
| |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; COMMON: {{.*}} |