mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-26 10:29:36 +00:00
R600/SI: Custom select 64-bit ADD
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202194 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
495e40121b
commit
d8c31046a9
@ -200,6 +200,54 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
// We are selecting i64 ADD here instead of custom lower it during
|
||||
// DAG legalization, so we can fold some i64 ADDs used for address
|
||||
// calculation into the LOAD and STORE instructions.
|
||||
case ISD::ADD: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (N->getValueType(0) != MVT::i64 ||
|
||||
ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
break;
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
|
||||
SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
|
||||
SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
|
||||
|
||||
SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
||||
DL, MVT::i32, LHS, Sub0);
|
||||
SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
||||
DL, MVT::i32, LHS, Sub1);
|
||||
|
||||
SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
||||
DL, MVT::i32, RHS, Sub0);
|
||||
SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
||||
DL, MVT::i32, RHS, Sub1);
|
||||
|
||||
SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
|
||||
|
||||
SmallVector<SDValue, 8> AddLoArgs;
|
||||
AddLoArgs.push_back(SDValue(Lo0, 0));
|
||||
AddLoArgs.push_back(SDValue(Lo1, 0));
|
||||
|
||||
SDNode *AddLo = CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL,
|
||||
VTList, AddLoArgs);
|
||||
SDValue Carry = SDValue(AddLo, 1);
|
||||
SDNode *AddHi = CurDAG->getMachineNode(AMDGPU::S_ADDC_U32, DL,
|
||||
MVT::i32, SDValue(Hi0, 0),
|
||||
SDValue(Hi1, 0), Carry);
|
||||
|
||||
SDValue Args[5] = {
|
||||
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
|
||||
SDValue(AddLo,0),
|
||||
Sub0,
|
||||
SDValue(AddHi,0),
|
||||
Sub1,
|
||||
};
|
||||
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5);
|
||||
}
|
||||
case ISD::BUILD_VECTOR: {
|
||||
unsigned RegClassID;
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
|
@ -76,7 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
|
||||
|
||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||
setOperationAction(ISD::ADDC, MVT::i32, Legal);
|
||||
setOperationAction(ISD::ADDE, MVT::i32, Legal);
|
||||
@ -488,7 +487,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::ADD: return LowerADD(Op, DAG);
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
case ISD::LOAD: {
|
||||
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
|
||||
@ -628,33 +626,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerADD(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() != MVT::i64)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
|
||||
SDValue Zero = DAG.getConstant(0, MVT::i32);
|
||||
SDValue One = DAG.getConstant(1, MVT::i32);
|
||||
|
||||
SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
|
||||
SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
|
||||
|
||||
SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
|
||||
SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One);
|
||||
|
||||
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue);
|
||||
|
||||
SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1);
|
||||
SDValue Carry = AddLo.getValue(1);
|
||||
SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0));
|
||||
}
|
||||
|
||||
/// \brief Helper function for LowerBRCOND
|
||||
static SDNode *findUser(SDValue Value, unsigned Opcode) {
|
||||
|
||||
|
@ -31,7 +31,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -75,3 +75,13 @@ entry:
|
||||
store <8 x i32> %0, <8 x i32> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: @add64
|
||||
; SI-CHECK: S_ADD_I32
|
||||
; SI-CHECK: S_ADDC_U32
|
||||
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
entry:
|
||||
%0 = add i64 %a, %b
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -1,14 +1,13 @@
|
||||
; XFAIL: *
|
||||
; This will fail until i64 add is enabled
|
||||
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
|
||||
|
||||
|
||||
declare i32 @llvm.SI.tid() readnone
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
|
||||
; SI-LABEL: @test_i64_vreg:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.SI.tid() readnone
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
|
||||
%b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
|
||||
%a = load i64 addrspace(1)* %a_ptr
|
||||
@ -20,6 +19,8 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
|
||||
|
||||
; Check that the SGPR add operand is correctly moved to a VGPR.
|
||||
; SI-LABEL: @sgpr_operand:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
|
||||
%foo = load i64 addrspace(1)* %in, align 8
|
||||
%result = add i64 %foo, %a
|
||||
@ -31,6 +32,8 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
|
||||
; SGPR as other operand.
|
||||
;
|
||||
; SI-LABEL: @sgpr_operand_reversed:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
|
||||
%foo = load i64 addrspace(1)* %in, align 8
|
||||
%result = add i64 %a, %foo
|
||||
@ -40,6 +43,10 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
|
||||
|
||||
|
||||
; SI-LABEL: @test_v2i64_sreg:
|
||||
; SI: S_ADD_I32
|
||||
; SI: S_ADDC_U32
|
||||
; SI: S_ADD_I32
|
||||
; SI: S_ADDC_U32
|
||||
define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
|
||||
%result = add <2 x i64> %a, %b
|
||||
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
|
||||
@ -47,8 +54,12 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
|
||||
}
|
||||
|
||||
; SI-LABEL: @test_v2i64_vreg:
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
; SI: V_ADD_I32
|
||||
; SI: V_ADDC_U32
|
||||
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
|
||||
%tid = call i32 @llvm.SI.tid() readnone
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
|
||||
%b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
|
||||
%a = load <2 x i64> addrspace(1)* %a_ptr
|
||||
|
Loading…
x
Reference in New Issue
Block a user