From 4ce8b9c7fba75ea9703088b1853fdc22cf5bd211 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Mon, 13 Apr 2015 16:26:00 +0000 Subject: [PATCH] R600: Add carry and borrow instructions. Use them to implement UADDO/USUBO v2: tighten the sub64 tests v3: rename to CARRY/BORROW v4: fixup test cmdline add known bits computation use sign extend instead of sub 0,x better add test v5: remove redundant break move lowering to separate functions fix comments Signed-off-by: Jan Vesely Reviewers: arsenm git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234759 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 10 ++++++++ lib/Target/R600/AMDGPUISelLowering.h | 2 ++ lib/Target/R600/AMDGPUInstrInfo.td | 7 ++++++ lib/Target/R600/AMDGPUSubtarget.h | 8 ++++++ lib/Target/R600/EvergreenInstructions.td | 3 +++ lib/Target/R600/R600ISelLowering.cpp | 31 ++++++++++++++++++++++-- lib/Target/R600/R600ISelLowering.h | 2 ++ test/CodeGen/R600/add.ll | 26 ++++++++++++++++++++ test/CodeGen/R600/sub.ll | 18 +++++++++----- test/CodeGen/R600/uaddo.ll | 17 ++++++++++++- test/CodeGen/R600/usubo.ll | 21 +++++++++++++++- 11 files changed, 135 insertions(+), 10 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 7c5235d1507..41ab6c29b1e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -2763,6 +2763,12 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( KnownZero, KnownOne, DAG, Depth); break; + case AMDGPUISD::CARRY: + case AMDGPUISD::BORROW: { + KnownZero = APInt::getHighBitsSet(32, 31); + break; + } + case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { ConstantSDNode *CWidth = dyn_cast(Op.getOperand(2)); @@ -2805,6 +2811,10 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; } + case AMDGPUISD::CARRY: + case AMDGPUISD::BORROW: + return 31; + default: return 1; } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 6bc6ca5bbc5..d231bcac2ab 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -250,6 +250,8 @@ enum { LDEXP, FP_CLASS, DOT4, + CARRY, + BORROW, BFE_U32, // Extract range of bits with zero extension to 32-bits. BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 901eb5110f2..d72cb1d7f8c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -136,6 +136,13 @@ def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, [/*SDNPCommutative, SDNPAssociative*/] >; +// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 +def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; + +// out = (src1 > src0) ? 1 : 0 +def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; + + def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", SDTIntToFPOp, []>; def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index aeb0817553d..2b7f6f22110 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -183,6 +183,14 @@ public: return (getGeneration() >= EVERGREEN); } + bool hasCARRY() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBORROW() const { + return (getGeneration() >= EVERGREEN); + } + bool IsIRStructurizerEnabled() const { return EnableIRStructurizer; } diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 55601469009..7adcd46fe19 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -335,6 +335,9 @@ defm CUBE_eg : CUBE_Common<0xC0>; def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>; +def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>; +def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>; + def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>; def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b6b7067f7e1..8d466b9c891 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -91,6 +91,15 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v2i32, Expand); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + // ADD, SUB overflow. These need to be Custom because + // SelectionDAGLegalize::LegalizeOp (LegalizeDAG.cpp) + // turns Legal into expand + if (Subtarget->hasCARRY()) + setOperationAction(ISD::UADDO, MVT::i32, Custom); + + if (Subtarget->hasBORROW()) + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // Expand sign extension of vectors if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -163,8 +172,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setOperationAction(ISD::SUB, MVT::i64, Expand); - // These should be replaced by UDVIREM, but it does not happen automatically // during Type Legalization setOperationAction(ISD::UDIV, MVT::i64, Custom); @@ -585,6 +592,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); + case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); + case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); case ISD::FCOS: case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -1076,6 +1085,24 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); } +SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, + unsigned mainop, unsigned ovf) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + + SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); + // Extend sign. + OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, + DAG.getValueType(MVT::i1)); + + SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); + + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::SETCC, diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index c54719574f9..5eb63e389e1 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -63,6 +63,8 @@ private: SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, + unsigned mainop, unsigned ovf) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll index 70271616796..655e75dbc1a 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/R600/add.ll @@ -62,6 +62,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT + ; SI: s_add_i32 ; SI: s_add_i32 ; SI: s_add_i32 @@ -94,6 +95,7 @@ entry: ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT + ; SI: s_add_i32 ; SI: s_add_i32 ; SI: s_add_i32 @@ -120,6 +122,14 @@ entry: ; FUNC-LABEL: {{^}}add64: ; SI: s_add_u32 ; SI: s_addc_u32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: ADD_INT {{[* ]*}}[[LO]] +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %0 = add i64 %a, %b @@ -134,6 +144,14 @@ entry: ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: ; SI-NOT: v_addc_u32_e32 s + +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: ADD_INT {{[* ]*}}[[LO]] +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64, i64 addrspace(1)* %in @@ -146,6 +164,14 @@ entry: ; FUNC-LABEL: {{^}}add64_in_branch: ; SI: s_add_u32 ; SI: s_addc_u32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: ADD_INT {{[* ]*}}[[LO]] +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 7216a312ebc..b7fba0efa5b 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -58,10 +58,13 @@ define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1) ; SI: s_sub_u32 ; SI: s_subb_u32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{[* ]*}}[[LO]] +; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT -; EG-DAG: SETGT_UINT -; EG-DAG: SUB_INT -; EG-DAG: ADD_INT +; EG-DAG: SUB_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { %result = sub i64 %a, %b store i64 %result, i64 addrspace(1)* %out, align 8 @@ -72,10 +75,13 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind ; SI: v_sub_i32_e32 ; SI: v_subb_u32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{[* ]*}}[[LO]] +; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT -; EG-DAG: SETGT_UINT -; EG-DAG: SUB_INT -; EG-DAG: ADD_INT +; EG-DAG: SUB_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() readnone %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll index 9f383654b70..11438f267ad 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/R600/uaddo.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone @@ -9,6 +9,9 @@ declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone ; SI: add ; SI: addc ; SI: addc + +; EG: ADDC_UINT +; EG: ADDC_UINT define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %uadd, 0 @@ -21,6 +24,9 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { ; FUNC-LABEL: {{^}}s_uaddo_i32: ; SI: s_add_i32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %uadd, 0 @@ -32,6 +38,9 @@ define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}v_uaddo_i32: ; SI: v_add_i32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %bptr, align 4 @@ -46,6 +55,9 @@ define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}s_uaddo_i64: ; SI: s_add_u32 ; SI: s_addc_u32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %uadd, 0 @@ -58,6 +70,9 @@ define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 ; FUNC-LABEL: {{^}}v_uaddo_i64: ; SI: v_add_i32 ; SI: v_addc_u32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4 diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll index a753ca4e979..3c9b1622a07 100644 --- a/test/CodeGen/R600/usubo.ll +++ b/test/CodeGen/R600/usubo.ll @@ -1,11 +1,14 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone ; FUNC-LABEL: {{^}}usubo_i64_zext: + +; EG: SUBB_UINT +; EG: ADDC_UINT define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %usub, 0 @@ -18,6 +21,9 @@ define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { ; FUNC-LABEL: {{^}}s_usubo_i32: ; SI: s_sub_i32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %usub, 0 @@ -29,6 +35,9 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}v_usubo_i32: ; SI: v_subrev_i32_e32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %bptr, align 4 @@ -43,6 +52,11 @@ define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}s_usubo_i64: ; SI: s_sub_u32 ; SI: s_subb_u32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG: SUB_INT define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %usub, 0 @@ -55,6 +69,11 @@ define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 ; FUNC-LABEL: {{^}}v_usubo_i64: ; SI: v_sub_i32 ; SI: v_subb_u32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG: SUB_INT define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4