From 1cd1b0b283079b5a8c54759983e9e70845971b2c Mon Sep 17 00:00:00 2001 From: Kalle Raiskila Date: Thu, 16 Sep 2010 12:29:33 +0000 Subject: [PATCH] Change SPU register re-interpretations from OR to COPY_TO_REGCLASS instruction. This cleans up after the mess r108567 left in the CellSPU backend. ORCvt-instruction were used to reinterpret registers, and the ORs were then removed by isMoveInstr(). This patch now removes 350 instrucions of format: or $3, $3, $3 (from the 52 testcases in CodeGen/CellSPU). One case of a nonexistant or is checked for. Some moves of the form 'ori $., $., 0' and 'ai $., $., 0' still remain. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@114074 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPU64InstrInfo.td | 79 ++++--- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 64 ++++-- lib/Target/CellSPU/SPUInstrInfo.td | 272 ++++++++----------------- test/CodeGen/CellSPU/v2i32.ll | 5 +- 4 files changed, 179 insertions(+), 241 deletions(-) diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 069a182c26d..5ef5716bd8c 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -54,8 +54,8 @@ class I64SETCCNegCond: // The i64 seteq fragment that does the scalar->vector conversion and // comparison: def CEQr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB))), 0xb)>; + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; // The i64 seteq fragment that does the vector comparison def CEQv2i64compare: @@ -67,12 +67,14 @@ def CEQv2i64compare: // v2i64 seteq (equality): the setcc result is v4i32 multiclass CompareEqual64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>; - def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CEQr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; } defm I64EQ: CompareEqual64; @@ -89,10 +91,12 @@ def : I64SELECTNegCond; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def CLGTr64ugt: - CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CLGTr64eq: - CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CLGTr64compare: CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, @@ -112,12 +116,14 @@ def CLGTv2i64compare: multiclass CompareLogicalGreaterThan64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; def v2i64: CodeFrag; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGTr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; } defm I64LGT: CompareLogicalGreaterThan64; @@ -144,12 +150,14 @@ def CLGEv2i64compare: multiclass CompareLogicalGreaterEqual64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; def v2i64: CodeFrag; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGEr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; } defm I64LGE: CompareLogicalGreaterEqual64; @@ -168,10 +176,12 @@ def : I64SELECTNegCond; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def CGTr64sgt: - CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CGTr64eq: - CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CGTr64compare: CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, @@ -191,12 +201,14 @@ def CGTv2i64compare: multiclass CompareGreaterThan64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; def v2i64: CodeFrag; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CGTr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; } defm I64GT: CompareLogicalGreaterThan64; @@ -223,12 +235,12 @@ def CGEv2i64compare: multiclass CompareGreaterEqual64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; def v2i64: CodeFrag; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; } defm I64GE: CompareGreaterEqual64; @@ -255,9 +267,9 @@ class v2i64_add: v2i64_add_1.Fragment, cg_mask>; def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment)>; + (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), @@ -275,11 +287,12 @@ class v2i64_sub: CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB), - v2i64_sub_bg<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB)>.Fragment, - (v4i32 VECREG:$rCGmask)>.Fragment)>; + (COPY_TO_REGCLASS + v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), @@ -374,9 +387,9 @@ class v2i64_mul: rCGmask>; def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment)>; + (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 371c25b1e83..68001e5bc2f 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -357,6 +357,9 @@ namespace { assert(II && "No InstrInfo?"); return new SPUHazardRecognizer(*II); } + + private: + SDValue getRC( MVT ); // Include the pieces autogenerated from the target description. #include "SPUGenDAGISel.inc" @@ -619,6 +622,29 @@ SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, return false; } +/*! + Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue + to be used as the last parameter of a +CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call + \arg VT the value type for which we want a register class +*/ +SDValue SPUDAGToDAGISel::getRC( MVT VT ) { + switch( VT.SimpleTy ) { + case MVT::i32: + return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); + break; + case MVT::i64: + return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); + break; + case MVT::v2i64: + return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); + break; + default: + assert( false && "add a new case here" ); + } + return SDValue(); +} + //! Convert the operand from a target-independent to a target-specific node /*! */ @@ -773,8 +799,8 @@ SPUDAGToDAGISel::Select(SDNode *N) { if (shift_amt >= 32) { SDNode *hi32 = - CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT, - Op0.getOperand(0)); + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + Op0.getOperand(0), getRC(MVT::i32)); shift_amt -= 32; if (shift_amt > 0) { @@ -941,7 +967,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { SDValue SelMaskVal; DebugLoc dl = N->getDebugLoc(); - VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, + Op0, getRC(MVT::v2i64) ); SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT, @@ -985,7 +1012,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { SDValue(Shift, 0), SDValue(Bits, 0)); } - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); } /*! @@ -1006,7 +1034,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { SDNode *VecOp0, *Shift = 0; DebugLoc dl = N->getDebugLoc(); - VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, + Op0, getRC(MVT::v2i64) ); if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { unsigned bytes = unsigned(CN->getZExtValue()) >> 3; @@ -1052,7 +1081,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { SDValue(Shift, 0), SDValue(Bits, 0)); } - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); } /*! @@ -1073,14 +1103,16 @@ SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { DebugLoc dl = N->getDebugLoc(); SDNode *VecOp0 = - CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0)); + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + VecVT, N->getOperand(0), getRC(MVT::v2i64)); SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); SDNode *SignRot = CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, SDValue(VecOp0, 0), SignRotAmt); SDNode *UpperHalfSign = - CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0)); + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32)); SDNode *UpperHalfSignMask = CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); @@ -1127,7 +1159,8 @@ SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { SDValue(Shift, 0), SDValue(NegShift, 0)); } - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); } /*! @@ -1154,8 +1187,9 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDValue Op0 = i64vec.getOperand(0); ReplaceUses(i64vec, Op0); - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(Op0.getNode()), 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + SDValue(emitBuildVector(Op0.getNode()), 0), + getRC(MVT::i64)); } else if (i64vec.getOpcode() == SPUISD::SHUFB) { SDValue lhs = i64vec.getOperand(0); SDValue rhs = i64vec.getOperand(1); @@ -1196,10 +1230,12 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, SDNode *SN = SelectCode(Dummy.getValue().getNode()); if (SN == 0) SN = Dummy.getValue().getNode(); - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(SN, 0), getRC(MVT::i64)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(i64vec.getNode()), 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + SDValue(emitBuildVector(i64vec.getNode()), 0), + getRC(MVT::i64)); } else { report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" "condition"); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index ca0fe00e37f..f671a3cfe46 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1385,59 +1385,6 @@ class ORRegInst: ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; -// ORCvtForm: OR conversion form -// -// This is used to "convert" the preferred slot to its vector equivalent, as -// well as convert a vector back to its preferred slot. -// -// These are effectively no-ops, but need to exist for proper type conversion -// and type coercion. - -class ORCvtForm pattern = [/* no pattern */]> - : SPUInstr { - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-10} = 0b10000010000; - let Inst{11-17} = RA; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -class ORPromoteScalar: - ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>; - -class ORExtractElt: - ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>; - -/* class ORCvtRegGPRC: - ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */ - -/* class ORCvtGPRCReg: - ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */ - -class ORCvtFormR32Reg pattern = [ ]>: - ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>; - -class ORCvtFormRegR32 pattern = [ ]>: - ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>; - -class ORCvtFormR64Reg pattern = [ ]>: - ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>; - -class ORCvtFormRegR64 pattern = [ ]>: - ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>; - -class ORCvtGPRCVec: - ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; - -class ORCvtVecGPRC: - ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; - -class ORCvtVecVec: - ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>; multiclass BitwiseOr { @@ -1468,119 +1415,48 @@ multiclass BitwiseOr def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), [/* no pattern */]>; - - // scalar->vector promotion, prefslot2vec: - def v16i8_i8: ORPromoteScalar; - def v8i16_i16: ORPromoteScalar; - def v4i32_i32: ORPromoteScalar; - def v2i64_i64: ORPromoteScalar; - def v4f32_f32: ORPromoteScalar; - def v2f64_f64: ORPromoteScalar; - - // vector->scalar demotion, vec2prefslot: - def i8_v16i8: ORExtractElt; - def i16_v8i16: ORExtractElt; - def i32_v4i32: ORExtractElt; - def i64_v2i64: ORExtractElt; - def f32_v4f32: ORExtractElt; - def f64_v2f64: ORExtractElt; - - // Conversion from vector to GPRC - def i128_vec: ORCvtVecGPRC; - - // Conversion from GPRC to vector - def vec_i128: ORCvtGPRCVec; - -/* - // Conversion from register to GPRC - def i128_r64: ORCvtRegGPRC; - def i128_f64: ORCvtRegGPRC; - def i128_r32: ORCvtRegGPRC; - def i128_f32: ORCvtRegGPRC; - def i128_r16: ORCvtRegGPRC; - def i128_r8: ORCvtRegGPRC; - - // Conversion from GPRC to register - def r64_i128: ORCvtGPRCReg; - def f64_i128: ORCvtGPRCReg; - def r32_i128: ORCvtGPRCReg; - def f32_i128: ORCvtGPRCReg; - def r16_i128: ORCvtGPRCReg; - def r8_i128: ORCvtGPRCReg; -*/ -/* - // Conversion from register to R32C: - def r32_r16: ORCvtFormRegR32; - def r32_r8: ORCvtFormRegR32; - - // Conversion from R32C to register - def r32_r16: ORCvtFormR32Reg; - def r32_r8: ORCvtFormR32Reg; -*/ - - // Conversion from R64C to register: - def r32_r64: ORCvtFormR64Reg; - // def r16_r64: ORCvtFormR64Reg; - // def r8_r64: ORCvtFormR64Reg; - - // Conversion to R64C from register: - def r64_r32: ORCvtFormRegR64; - // def r64_r16: ORCvtFormRegR64; - // def r64_r8: ORCvtFormRegR64; - - // bitconvert patterns: - def r32_f32: ORCvtFormR32Reg; - def f32_r32: ORCvtFormRegR32; - - def r64_f64: ORCvtFormR64Reg; - def f64_r64: ORCvtFormRegR64; } defm OR : BitwiseOr; -// scalar->vector promotion patterns (preferred slot to vector): +//===----------------------------------------------------------------------===// +// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers +//===----------------------------------------------------------------------===// def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), - (ORv16i8_i8 R8C:$rA)>; + (COPY_TO_REGCLASS R8C:$rA, VECREG)>; def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), - (ORv8i16_i16 R16C:$rA)>; + (COPY_TO_REGCLASS R16C:$rA, VECREG)>; def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), - (ORv4i32_i32 R32C:$rA)>; + (COPY_TO_REGCLASS R32C:$rA, VECREG)>; def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), - (ORv2i64_i64 R64C:$rA)>; + (COPY_TO_REGCLASS R64C:$rA, VECREG)>; def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), - (ORv4f32_f32 R32FP:$rA)>; + (COPY_TO_REGCLASS R32FP:$rA, VECREG)>; def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), - (ORv2f64_f64 R64FP:$rA)>; + (COPY_TO_REGCLASS R64FP:$rA, VECREG)>; + +def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))), + (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>; -// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise -// known as converting the vector back to its preferred slot +def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))), + (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>; -def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)), - (ORi8_v16i8 VECREG:$rA)>; +def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))), + (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>; -def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)), - (ORi16_v8i16 VECREG:$rA)>; +def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))), + (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>; -def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)), - (ORi32_v4i32 VECREG:$rA)>; +def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))), + (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>; -def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)), - (ORi64_v2i64 VECREG:$rA)>; - -def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)), - (ORf32_v4f32 VECREG:$rA)>; - -def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)), - (ORf64_v2f64 VECREG:$rA)>; +def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))), + (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>; // Load Register: This is an assembler alias for a bitwise OR of a register // against itself. It's here because it brings some clarity to assembly @@ -4379,30 +4255,43 @@ def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))), - (v16i8 (ORvec_i128 GPRC:$src))>; + (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))), - (v8i16 (ORvec_i128 GPRC:$src))>; + (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))), - (v4i32 (ORvec_i128 GPRC:$src))>; + (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))), - (v2i64 (ORvec_i128 GPRC:$src))>; + (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))), - (v4f32 (ORvec_i128 GPRC:$src))>; + (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))), - (v2f64 (ORvec_i128 GPRC:$src))>; + (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; + +def : Pat<(i32 (bitconvert R32FP:$rA)), + (COPY_TO_REGCLASS R32FP:$rA, R32C)>; + +def : Pat<(f32 (bitconvert R32C:$rA)), + (COPY_TO_REGCLASS R32C:$rA, R32FP)>; + +def : Pat<(i64 (bitconvert R64FP:$rA)), + (COPY_TO_REGCLASS R64FP:$rA, R64C)>; + +def : Pat<(f64 (bitconvert R64C:$rA)), + (COPY_TO_REGCLASS R64C:$rA, R64FP)>; + //===----------------------------------------------------------------------===// // Instruction patterns: @@ -4453,11 +4342,12 @@ def : Pat<(i32 (zext R8C:$rSrc)), // zext 8->64: Zero extend bytes to double words def : Pat<(i64 (zext R8C:$rSrc)), - (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32 - (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)), + (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32 + (COPY_TO_REGCLASS + (ANDIi8i32 R8C:$rSrc,0xff), VECREG), 0x4), (ILv4i32 0x0), - (FSMBIv4i32 0x0f0f)))>; + (FSMBIv4i32 0x0f0f)), R64C)>; // anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits def : Pat<(i16 (anyext R8C:$rSrc)), @@ -4498,61 +4388,61 @@ def : Pat<(i32 (anyext R16C:$rSrc)), //===----------------------------------------------------------------------===// def : Pat<(i8 (trunc GPRC:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>; + (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>; def : Pat<(i8 (trunc R64C:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBv2i64_m32 - (ORv2i64_i64 R64C:$src), - (ORv2i64_i64 R64C:$src), - (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>; + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>; def : Pat<(i8 (trunc R32C:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBv4i32_m32 - (ORv4i32_i32 R32C:$src), - (ORv4i32_i32 R32C:$src), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + (COPY_TO_REGCLASS R32C:$src, VECREG), + (COPY_TO_REGCLASS R32C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; def : Pat<(i8 (trunc R16C:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBv4i32_m32 - (ORv8i16_i16 R16C:$src), - (ORv8i16_i16 R16C:$src), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + (COPY_TO_REGCLASS R16C:$src, VECREG), + (COPY_TO_REGCLASS R16C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; def : Pat<(i16 (trunc GPRC:$src)), - (ORi16_v8i16 + (COPY_TO_REGCLASS (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>; + (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>; def : Pat<(i16 (trunc R64C:$src)), - (ORi16_v8i16 + (COPY_TO_REGCLASS (SHUFBv2i64_m32 - (ORv2i64_i64 R64C:$src), - (ORv2i64_i64 R64C:$src), - (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>; + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>; def : Pat<(i16 (trunc R32C:$src)), - (ORi16_v8i16 + (COPY_TO_REGCLASS (SHUFBv4i32_m32 - (ORv4i32_i32 R32C:$src), - (ORv4i32_i32 R32C:$src), - (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>; + (COPY_TO_REGCLASS R32C:$src, VECREG), + (COPY_TO_REGCLASS R32C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>; def : Pat<(i32 (trunc GPRC:$src)), - (ORi32_v4i32 + (COPY_TO_REGCLASS (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>; + (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>; def : Pat<(i32 (trunc R64C:$src)), - (ORi32_v4i32 + (COPY_TO_REGCLASS (SHUFBv2i64_m32 - (ORv2i64_i64 R64C:$src), - (ORv2i64_i64 R64C:$src), - (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>; + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>; //===----------------------------------------------------------------------===// // Address generation: SPU, like PPC, has to split addresses into high and diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll index dd51be5a71d..8cfc490e02d 100644 --- a/test/CodeGen/CellSPU/v2i32.ll +++ b/test/CodeGen/CellSPU/v2i32.ll @@ -37,9 +37,8 @@ define %vec @test_mul(%vec %param) } define <2 x i32> @test_splat(i32 %param ) { -;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the -; somewhat redundant: -;CHECK-NOT or $3, $3, $3 +;see svn log for why this is here... +;CHECK-NOT: or $3, $3, $3 ;CHECK: lqa ;CHECK: shufb %sv = insertelement <1 x i32> undef, i32 %param, i32 0