diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b9d8d8f48c5..725bc9e7d0c 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -138,10 +138,12 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::BR_CC, MVT::i16, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + // Some SIGN_EXTEND_INREG can be done using cvt instruction. + // For others we will expand to a SHL/SRA pair. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (nvptxSubtarget.hasROT64()) { diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 013e24c92b8..e6335a0d8e0 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -298,6 +298,7 @@ multiclass F2 { // General Type Conversion //----------------------------------- +let neverHasSideEffects = 1 in { // Generate a cvt to the given type from all possible types. // Each instance takes a CvtMode immediate that defines the conversion mode to // use. It can be CvtNONE to omit a conversion mode. @@ -360,6 +361,23 @@ defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; +// This set of cvt is different from the above. The type of the source +// and target are the same. +// +def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.s16.s8 \t$dst, $src;", []>; +def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s8 \t$dst, $src;", []>; +def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s16 \t$dst, $src;", []>; +def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s8 \t$dst, $src;", []>; +def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s16 \t$dst, $src;", []>; +def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s32 \t$dst, $src;", []>; +} + //----------------------------------- // Integer Arithmetic //----------------------------------- @@ -2349,6 +2367,14 @@ def : Pat<(i1 (trunc Int32Regs:$a)), def : Pat<(i1 (trunc Int16Regs:$a)), (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; +// sext_inreg +def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; +def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; +def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; + // Select instructions with 32-bit predicates def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), diff --git a/test/CodeGen/NVPTX/sext-in-reg.ll b/test/CodeGen/NVPTX/sext-in-reg.ll new file mode 100644 index 00000000000..4761fb59013 --- /dev/null +++ b/test/CodeGen/NVPTX/sext-in-reg.ll @@ -0,0 +1,111 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + + +define void @one(i64 %a, i64 %b, i64* %p1, i64* %p2) { +; CHECK: cvt.s64.s8 +; CHECK: cvt.s64.s8 +entry: + %sext = shl i64 %a, 56 + %conv1 = ashr exact i64 %sext, 56 + %sext1 = shl i64 %b, 56 + %conv4 = ashr exact i64 %sext1, 56 + %shr = ashr i64 %a, 16 + %shr9 = ashr i64 %b, 16 + %add = add nsw i64 %conv4, %conv1 + store i64 %add, i64* %p1, align 8 + %add17 = add nsw i64 %shr9, %shr + store i64 %add17, i64* %p2, align 8 + ret void +} + + +define void @two(i64 %a, i64 %b, i64* %p1, i64* %p2) { +entry: +; CHECK: cvt.s64.s32 +; CHECK: cvt.s64.s32 + %sext = shl i64 %a, 32 + %conv1 = ashr exact i64 %sext, 32 + %sext1 = shl i64 %b, 32 + %conv4 = ashr exact i64 %sext1, 32 + %shr = ashr i64 %a, 16 + %shr9 = ashr i64 %b, 16 + %add = add nsw i64 %conv4, %conv1 + store i64 %add, i64* %p1, align 8 + %add17 = add nsw i64 %shr9, %shr + store i64 %add17, i64* %p2, align 8 + ret void +} + + +define void @three(i64 %a, i64 %b, i64* %p1, i64* %p2) { +entry: +; CHECK: cvt.s64.s16 +; CHECK: cvt.s64.s16 + %sext = shl i64 %a, 48 + %conv1 = ashr exact i64 %sext, 48 + %sext1 = shl i64 %b, 48 + %conv4 = ashr exact i64 %sext1, 48 + %shr = ashr i64 %a, 16 + %shr9 = ashr i64 %b, 16 + %add = add nsw i64 %conv4, %conv1 + store i64 %add, i64* %p1, align 8 + %add17 = add nsw i64 %shr9, %shr + store i64 %add17, i64* %p2, align 8 + ret void +} + + +define void @four(i32 %a, i32 %b, i32* %p1, i32* %p2) { +entry: +; CHECK: cvt.s32.s8 +; CHECK: cvt.s32.s8 + %sext = shl i32 %a, 24 + %conv1 = ashr exact i32 %sext, 24 + %sext1 = shl i32 %b, 24 + %conv4 = ashr exact i32 %sext1, 24 + %shr = ashr i32 %a, 16 + %shr9 = ashr i32 %b, 16 + %add = add nsw i32 %conv4, %conv1 + store i32 %add, i32* %p1, align 4 + %add17 = add nsw i32 %shr9, %shr + store i32 %add17, i32* %p2, align 4 + ret void +} + + +define void @five(i32 %a, i32 %b, i32* %p1, i32* %p2) { +entry: +; CHECK: cvt.s32.s16 +; CHECK: cvt.s32.s16 + %sext = shl i32 %a, 16 + %conv1 = ashr exact i32 %sext, 16 + %sext1 = shl i32 %b, 16 + %conv4 = ashr exact i32 %sext1, 16 + %shr = ashr i32 %a, 16 + %shr9 = ashr i32 %b, 16 + %add = add nsw i32 %conv4, %conv1 + store i32 %add, i32* %p1, align 4 + %add17 = add nsw i32 %shr9, %shr + store i32 %add17, i32* %p2, align 4 + ret void +} + + +define void @six(i16 %a, i16 %b, i16* %p1, i16* %p2) { +entry: +; CHECK: cvt.s16.s8 +; CHECK: cvt.s16.s8 + %sext = shl i16 %a, 8 + %conv1 = ashr exact i16 %sext, 8 + %sext1 = shl i16 %b, 8 + %conv4 = ashr exact i16 %sext1, 8 + %shr = ashr i16 %a, 8 + %shr9 = ashr i16 %b, 8 + %add = add nsw i16 %conv4, %conv1 + store i16 %add, i16* %p1, align 4 + %add17 = add nsw i16 %shr9, %shr + store i16 %add17, i16* %p2, align 4 + ret void +} \ No newline at end of file