R600: Try to use lower types for 64bit division if possible

v2: add and enable tests for SI

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Matt Arsenault <Matthew.Arsenault@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226881 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jan Vesely 2015-01-22 23:42:43 +00:00
parent 3ff611bb38
commit 1d07592ec7
4 changed files with 393 additions and 15 deletions

View File

@ -1661,6 +1661,20 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
if (VT == MVT::i64 &&
DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
LHS_Lo, RHS_Lo);
SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
Results.push_back(DIV);
Results.push_back(REM);
return;
}
// Get Speculative values
SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
@ -1722,8 +1736,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Den = Op.getOperand(1);
if (VT == MVT::i32) {
if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
// TODO: We technically could do this for i64, but shouldn't that just be
// handled by something generally reducing 64-bit division on 32-bit
// values to 32-bit?
@ -1835,19 +1849,31 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
if (VT == MVT::i32) {
if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
// TODO: We technically could do this for i64, but shouldn't that just be
// handled by something generally reducing 64-bit division on 32-bit
// values to 32-bit?
return LowerDIVREM24(Op, DAG, true);
}
}
SDValue Zero = DAG.getConstant(0, VT);
SDValue NegOne = DAG.getConstant(-1, VT);
if (VT == MVT::i32 &&
DAG.ComputeNumSignBits(LHS) > 8 &&
DAG.ComputeNumSignBits(RHS) > 8) {
return LowerDIVREM24(Op, DAG, true);
}
if (VT == MVT::i64 &&
DAG.ComputeNumSignBits(LHS) > 32 &&
DAG.ComputeNumSignBits(RHS) > 32) {
EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
//HiLo split
SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
LHS_Lo, RHS_Lo);
SDValue Res[2] = {
DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
};
return DAG.getMergeValues(Res, DL);
}
SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);

View File

@ -43,7 +43,6 @@ private:
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
@ -90,6 +89,7 @@ protected:
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;

View File

@ -0,0 +1,218 @@
;RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: {{^}}test_sdiv:
;EG: RECIP_UINT
;EG: LSHL {{.*}}, 1,
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI: v_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = sdiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_srem:
;EG: RECIP_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_sdiv3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = sdiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_srem3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = srem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_sdiv2464:
;EG: INT_TO_FLT
;EG: INT_TO_FLT
;EG: FLT_TO_INT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = sdiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_srem2464:
;EG: INT_TO_FLT
;EG: INT_TO_FLT
;EG: FLT_TO_INT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = srem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}

View File

@ -1,5 +1,5 @@
;XUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: {{^}}test_udiv:
;EG: RECIP_UINT
@ -34,6 +34,39 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
@ -74,9 +107,110 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_udiv3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
%result = udiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_urem3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: s_bfe_u32
;SI-NOT: v_mad_f32
;SI-NOT: v_lshr_64
;SI: s_endpgm
define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
%result = urem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_udiv2464:
;EG: UINT_TO_FLT
;EG: UINT_TO_FLT
;EG: FLT_TO_UINT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: v_lshr_64
;SI: v_mad_f32
;SI: s_endpgm
define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 40
%2 = lshr i64 %y, 40
%result = udiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}
;FUNC-LABEL: {{^}}test_urem2464:
;EG: UINT_TO_FLT
;EG: UINT_TO_FLT
;EG: FLT_TO_UINT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
;SI-NOT: v_lshr_64
;SI: v_mad_f32
;SI: s_endpgm
define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 40
%2 = lshr i64 %y, 40
%result = urem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
ret void
}