R600/SI: nuke SReg_1 v3

It's completely unnecessary and can be replace with proper
SReg_64 handling instead.

This actually fixes a piglit test on SI.

v2: use correct register class in addRegisterClass,
    set special classes as not allocatable
v3: revert setting special classes as not allocateable

This is a candidate for the stable branch.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175355 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Christian Konig
2013-02-16 11:28:30 +00:00
parent e25e490793
commit e9ba1830df
7 changed files with 23 additions and 118 deletions

View File

@@ -135,16 +135,6 @@ enum {
} // End namespace AMDGPUISD } // End namespace AMDGPUISD
namespace SIISD {
enum {
SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
VCC_AND,
VCC_BITCAST
};
} // End namespace SIISD
} // End namespace llvm } // End namespace llvm
#endif // AMDGPUISELLOWERING_H #endif // AMDGPUISELLOWERING_H

View File

@@ -31,8 +31,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
@@ -42,8 +41,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties(); computeRegisterProperties();
setOperationAction(ISD::AND, MVT::i1, Custom);
setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal);
@@ -202,7 +199,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
case ISD::INTRINSIC_WO_CHAIN: { case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID = unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -219,30 +215,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue(); return SDValue();
} }
/// \brief The function is for lowering i1 operations on the
/// VCC register.
///
/// In the VALU context, VCC is a one bit register, but in the
/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
/// the SALU can perform operations on the VCC register, we need to promote
/// the operand types from i1 to i64 in order for tablegen to be able to match
/// this operation to the correct SALU instruction. We do this promotion by
/// wrapping the operands in a CopyToReg node.
///
SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
SelectionDAG &DAG,
unsigned VCCNode) const {
DebugLoc DL = Op.getDebugLoc();
SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
Op.getOperand(0)),
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
Op.getOperand(1)));
return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
}
/// \brief Helper function for LowerBRCOND /// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) { static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -446,13 +418,3 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
} }
return SDValue(); return SDValue();
} }
#define NODE_NAME_CASE(node) case SIISD::node: return #node;
const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
NODE_NAME_CASE(VCC_AND)
NODE_NAME_CASE(VCC_BITCAST)
}
}

View File

@@ -32,8 +32,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
unsigned VCCNode) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -45,7 +43,6 @@ public:
virtual EVT getSetCCResultType(EVT VT) const; virtual EVT getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual const char* getTargetNodeName(unsigned Opcode) const;
}; };
} // End namespace llvm } // End namespace llvm

View File

@@ -39,9 +39,6 @@ class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
class SOP2_64 <bits<7> op, string opName, list<dag> pattern> class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), opName, pattern>; : SOP2 <op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), opName, pattern>;
class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
: SOP2 <op, (outs SReg_1:$vcc), (ins SSrc_64:$src0, SSrc_64:$src1), opName, pattern>;
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern> : string opName, list<dag> pattern> :
VOP1 < VOP1 <
@@ -101,7 +98,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
def _e32 : VOPC <op, (ins arc:$src0, vrc:$src1), opName, pattern>; def _e32 : VOPC <op, (ins arc:$src0, vrc:$src1), opName, pattern>;
def _e64 : VOP3 < def _e64 : VOP3 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs SReg_1:$dst), (outs SReg_64:$dst),
(ins arc:$src0, vrc:$src1, (ins arc:$src0, vrc:$src1,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$abs, InstFlag:$clamp,
InstFlag:$omod, InstFlag:$neg), InstFlag:$omod, InstFlag:$neg),

View File

@@ -7,37 +7,10 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SI DAG Profiles
//===----------------------------------------------------------------------===//
def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// SI DAG Nodes // SI DAG Nodes
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// and operation on 64-bit wide vcc
def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
[SDNPCommutative, SDNPAssociative]
>;
// Special bitcast node for sharing VCC register between VALU and SALU
def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
>;
// and operation on 64-bit wide vcc
def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
[SDNPCommutative, SDNPAssociative]
>;
// Special bitcast node for sharing VCC register between VALU and SALU
def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
>;
// SMRD takes a 64bit memory address and can only add an 32bit offset // SMRD takes a 64bit memory address and can only add an 32bit offset
def SIadd64bit32bit : SDNode<"ISD::ADD", def SIadd64bit32bit : SDNode<"ISD::ADD",
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]> SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>

View File

@@ -781,15 +781,15 @@ def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
} }
def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
(ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), (ins VReg_32:$src0, VReg_32:$src1, SReg_64:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
"V_CNDMASK_B32_e64", "V_CNDMASK_B32_e64",
[(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))] [(set (i32 VReg_32:$dst), (select (i1 SReg_64:$src2), VReg_32:$src1, VReg_32:$src0))]
>; >;
//f32 pattern for V_CNDMASK_B32_e64 //f32 pattern for V_CNDMASK_B32_e64
def : Pat < def : Pat <
(f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)), (f32 (select (i1 SReg_64:$src2), VReg_32:$src1, VReg_32:$src0)),
(V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2) (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_64:$src2)
>; >;
defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
@@ -983,11 +983,14 @@ def : Pat <
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
[(set SReg_64:$dst, (and SSrc_64:$src0, SSrc_64:$src1))] [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
>; >;
def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
[(set SReg_1:$vcc, (SIvcc_and SSrc_64:$src0, SSrc_64:$src1))] def : Pat <
(i1 (and SSrc_64:$src0, SSrc_64:$src1)),
(S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
>; >;
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
@@ -1069,9 +1072,9 @@ let isBranch = 1, isTerminator = 1 in {
def SI_IF : InstSI < def SI_IF : InstSI <
(outs SReg_64:$dst), (outs SReg_64:$dst),
(ins SReg_1:$vcc, brtarget:$target), (ins SReg_64:$vcc, brtarget:$target),
"SI_IF", "SI_IF",
[(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))] [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
>; >;
def SI_ELSE : InstSI < def SI_ELSE : InstSI <
@@ -1101,9 +1104,9 @@ def SI_BREAK : InstSI <
def SI_IF_BREAK : InstSI < def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst), (outs SReg_64:$dst),
(ins SReg_1:$vcc, SReg_64:$src), (ins SReg_64:$vcc, SReg_64:$src),
"SI_IF_BREAK", "SI_IF_BREAK",
[(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))] [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
>; >;
def SI_ELSE_BREAK : InstSI < def SI_ELSE_BREAK : InstSI <
@@ -1260,30 +1263,15 @@ def : BitConvert <i32, f32, VReg_32>;
def : BitConvert <f32, i32, SReg_32>; def : BitConvert <f32, i32, SReg_32>;
def : BitConvert <f32, i32, VReg_32>; def : BitConvert <f32, i32, VReg_32>;
def : Pat <
(i64 (SIsreg1_bitcast SReg_1:$vcc)),
(S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64))
>;
def : Pat <
(i1 (SIsreg1_bitcast SReg_64:$vcc)),
(COPY_TO_REGCLASS SReg_64:$vcc, SReg_1)
>;
def : Pat <
(i64 (SIvcc_bitcast VCCReg:$vcc)),
(S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
>;
def : Pat <
(i1 (SIvcc_bitcast SReg_64:$vcc)),
(COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
>;
/********** ================== **********/ /********** ================== **********/
/********** Immediate Patterns **********/ /********** Immediate Patterns **********/
/********** ================== **********/ /********** ================== **********/
def : Pat <
(i1 imm:$imm),
(S_MOV_B64 imm:$imm)
>;
def : Pat < def : Pat <
(i32 imm:$imm), (i32 imm:$imm),
(V_MOV_B32_e32 imm:$imm) (V_MOV_B32_e32 imm:$imm)

View File

@@ -137,9 +137,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add SGPR_32, M0, EXEC_LO, EXEC_HI) (add SGPR_32, M0, EXEC_LO, EXEC_HI)
>; >;
def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; def SReg_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SGPR_64, VCC, EXEC)>;
def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>;
def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
@@ -178,7 +176,7 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
// [SV]Src_* operands can have either an immediate or an register // [SV]Src_* operands can have either an immediate or an register
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add SReg_64)>; def SSrc_64 : RegisterClass<"AMDGPU", [i1, i64], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;