- Start moving target-dependent nodes that could be represented by an

instruction sequence and cannot ordinarily be simplified by DAGcombine
  into the various target description files or SPUDAGToDAGISel.cpp.

  This makes some 64-bit operations legal.

- Eliminate target-dependent ISD enums.

- Update tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61508 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Scott Michel 2008-12-30 23:28:25 +00:00
parent 998dee96d3
commit 02d711b93e
15 changed files with 736 additions and 634 deletions

View File

@ -15,6 +15,13 @@
//
include "llvm/Target/Target.td"
// Holder of code fragments (you'd think this'd already be in
// a td file somewhere... :-)
class CodeFrag<dag frag> {
dag Fragment = frag;
}
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//

View File

@ -1,8 +1,17 @@
//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
//
// Cell SPU 64-bit operations
//
// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// 64-bit comparisons:
//
// 1. The instruction sequences for vector vice scalar differ by a
// constant.
// constant. In the scalar case, we're only interested in the
// top two 32-bit slots, whereas we're interested in an exact
// all-four-slot match in the vector case.
//
// 2. There are no "immediate" forms, since loading 64-bit constants
// could be a constant pool load.
@ -10,10 +19,10 @@
// 3. i64 setcc results are i32, which are subsequently converted to a FSM
// mask when used in a select pattern.
//
// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
// (TODO)
// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
// [Note: this may be moot, since gb produces v4i32 or r32.]
//
// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// selb instruction definition for i64. Note that the selection mask is
@ -22,17 +31,15 @@ def SELBr64_cond:
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
[/* no pattern */]>;
class CodeFrag<dag frag> {
dag Fragment = frag;
}
class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>:
// select the negative condition:
class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>:
// setcc the negative condition:
class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
Pat<(cond R64C:$rA, R64C:$rB),
(XORIr32 cmpare.Fragment, -1)>;
(XORIr32 compare.Fragment, -1)>;
// The i64 seteq fragment that does the scalar->vector conversion and
// comparison:
@ -64,14 +71,13 @@ multiclass CompareEqual64 {
defm I64EQ: CompareEqual64;
def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
I64EQv2i64.Fragment>;
def I64Select:
Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
(SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
(SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
// i64 setne:
def : I64SETCCNegCond<setne, I64EQr64>;
def : I64SELECTNegCond<setne, I64EQr64>;
def : I64SELECTNegCond<setne, I64EQr64>;
// i64 setugt:

View File

@ -149,7 +149,7 @@ namespace {
}
bool
isHighLow(const SDValue &Op)
isHighLow(const SDValue &Op)
{
return (Op.getOpcode() == SPUISD::IndirectAddr
&& ((Op.getOperand(0).getOpcode() == SPUISD::Hi
@ -229,14 +229,14 @@ public:
TM(tm),
SPUtli(*tm.getTargetLowering())
{}
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
SelectionDAGISel::runOnFunction(Fn);
return true;
}
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(uint32_t Imm) {
@ -248,7 +248,7 @@ public:
inline SDValue getI64Imm(uint64_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i64);
}
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
@ -258,6 +258,15 @@ public:
/// target-specific node if it hasn't already been changed.
SDNode *Select(SDValue Op);
//! Emit the instruction sequence for i64 shl
SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
//! Emit the instruction sequence for i64 srl
SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
//! Emit the instruction sequence for i64 sra
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
//! Returns true if the address N is an A-form (local store) address
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
@ -287,7 +296,7 @@ public:
switch (ConstraintCode) {
default: return true;
case 'm': // memory
if (!SelectDFormAddr(Op, Op, Op0, Op1)
if (!SelectDFormAddr(Op, Op, Op0, Op1)
&& !SelectAFormAddr(Op, Op, Op0, Op1))
SelectXFormAddr(Op, Op, Op0, Op1);
break;
@ -306,7 +315,7 @@ public:
#endif
break;
}
OutOps.push_back(Op0);
OutOps.push_back(Op1);
return false;
@ -318,14 +327,14 @@ public:
virtual const char *getPassName() const {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
virtual HazardRecognizer *CreateTargetHazardRecognizer() {
const TargetInstrInfo *II = TM.getInstrInfo();
assert(II && "No InstrInfo?");
return new SPUHazardRecognizer(*II);
return new SPUHazardRecognizer(*II);
}
// Include the pieces autogenerated from the target description.
@ -375,7 +384,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
abort();
/*NOTREACHED*/
case SPUISD::AFormAddr:
case SPUISD::AFormAddr:
// Just load from memory if there's only a single use of the location,
// otherwise, this will get handled below with D-form offset addresses
if (N.hasOneUse()) {
@ -404,7 +413,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
return false;
}
bool
bool
SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
SDValue &Base) {
const int minDForm2Offset = -(1 << 7);
@ -527,7 +536,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
offset = int32_t(CN->getSExtValue());
idxOp = Op1;
}
}
if (offset >= minOffset && offset <= maxOffset) {
Base = CurDAG->getTargetConstant(offset, PtrTy);
@ -622,27 +631,20 @@ SPUDAGToDAGISel::Select(SDValue Op) {
if (N->isMachineOpcode()) {
return NULL; // Already selected.
} else if (Opc == ISD::FrameIndex) {
// Selects to (add $sp, FI * stackSlotSize)
int FI =
SPUFrameInfo::FItoStackOffset(cast<FrameIndexSDNode>(N)->getIndex());
MVT PtrVT = SPUtli.getPointerTy();
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
// Adjust stack slot to actual offset in frame:
if (isS10Constant(FI)) {
DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AIr32 $sp, "
<< FI
<< "\n");
if (FI < 128) {
NewOpc = SPU::AIr32;
Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT);
Ops[1] = CurDAG->getTargetConstant(FI, PtrVT);
Ops[0] = TFI;
Ops[1] = Imm0;
n_ops = 2;
} else {
DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with Ar32 $sp, "
<< FI
<< "\n");
NewOpc = SPU::Ar32;
Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT);
Ops[1] = CurDAG->getConstant(FI, PtrVT);
Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, Op.getValueType(),
TFI, Imm0), 0);
n_ops = 2;
}
} else if (Opc == ISD::ZERO_EXTEND) {
@ -661,6 +663,18 @@ SPUDAGToDAGISel::Select(SDValue Op) {
n_ops = 2;
}
}
} else if (Opc == ISD::SHL) {
if (OpVT == MVT::i64) {
return SelectSHLi64(Op, OpVT);
}
} else if (Opc == ISD::SRL) {
if (OpVT == MVT::i64) {
return SelectSRLi64(Op, OpVT);
}
} else if (Opc == ISD::SRA) {
if (OpVT == MVT::i64) {
return SelectSRAi64(Op, OpVT);
}
} else if (Opc == SPUISD::LDRESULT) {
// Custom select instructions for LDRESULT
MVT VT = N->getValueType(0);
@ -713,7 +727,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
n_ops = 2;
}
}
if (n_ops > 0) {
if (N->hasOneUse())
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
@ -723,7 +737,213 @@ SPUDAGToDAGISel::Select(SDValue Op) {
return SelectCode(Op);
}
/// createPPCISelDag - This pass converts a legalized DAG into a
/*!
* Emit the instruction sequence for i64 left shifts. The basic algorithm
* is to fill the bottom two word slots with zeros so that zeros are shifted
* in as the entire quadword is shifted left.
*
* \note This code could also be used to implement v2i64 shl.
*
* @param Op The shl operand
* @param OpVT Op's machine value value type (doesn't need to be passed, but
* makes life easier.)
* @return The SDNode with the entire instruction sequence
*/
SDNode *
SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
SDValue Op0 = Op.getOperand(0);
MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
SDValue SelMaskVal;
VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0);
SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, SelMaskVal);
ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, VecVT,
CurDAG->getTargetConstant(0, OpVT));
VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, VecVT,
SDValue(ZeroFill, 0),
SDValue(VecOp0, 0),
SDValue(SelMask, 0));
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
unsigned bits = unsigned(CN->getZExtValue()) & 7;
if (bytes > 0) {
Shift =
CurDAG->getTargetNode(SPU::SHLQBYIv2i64, VecVT,
SDValue(VecOp0, 0),
CurDAG->getTargetConstant(bytes, ShiftAmtVT));
}
if (bits > 0) {
Shift =
CurDAG->getTargetNode(SPU::SHLQBIIv2i64, VecVT,
SDValue((Shift != 0 ? Shift : VecOp0), 0),
CurDAG->getTargetConstant(bits, ShiftAmtVT));
}
} else {
SDNode *Bytes =
CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT,
ShiftAmt,
CurDAG->getTargetConstant(3, ShiftAmtVT));
SDNode *Bits =
CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT,
ShiftAmt,
CurDAG->getTargetConstant(7, ShiftAmtVT));
Shift =
CurDAG->getTargetNode(SPU::SHLQBYv2i64, VecVT,
SDValue(VecOp0, 0), SDValue(Bytes, 0));
Shift =
CurDAG->getTargetNode(SPU::SHLQBIv2i64, VecVT,
SDValue(Shift, 0), SDValue(Bits, 0));
}
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
}
/*!
* Emit the instruction sequence for i64 logical right shifts.
*
* @param Op The shl operand
* @param OpVT Op's machine value value type (doesn't need to be passed, but
* makes life easier.)
* @return The SDNode with the entire instruction sequence
*/
SDNode *
SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
SDValue Op0 = Op.getOperand(0);
MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
SDNode *VecOp0, *Shift = 0;
VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0);
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
unsigned bits = unsigned(CN->getZExtValue()) & 7;
if (bytes > 0) {
Shift =
CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, VecVT,
SDValue(VecOp0, 0),
CurDAG->getTargetConstant(bytes, ShiftAmtVT));
}
if (bits > 0) {
Shift =
CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, VecVT,
SDValue((Shift != 0 ? Shift : VecOp0), 0),
CurDAG->getTargetConstant(bits, ShiftAmtVT));
}
} else {
SDNode *Bytes =
CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT,
ShiftAmt,
CurDAG->getTargetConstant(3, ShiftAmtVT));
SDNode *Bits =
CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT,
ShiftAmt,
CurDAG->getTargetConstant(7, ShiftAmtVT));
// Ensure that the shift amounts are negated!
Bytes = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
SDValue(Bytes, 0),
CurDAG->getTargetConstant(0, ShiftAmtVT));
Bits = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
SDValue(Bits, 0),
CurDAG->getTargetConstant(0, ShiftAmtVT));
Shift =
CurDAG->getTargetNode(SPU::ROTQMBYv2i64, VecVT,
SDValue(VecOp0, 0), SDValue(Bytes, 0));
Shift =
CurDAG->getTargetNode(SPU::ROTQMBIv2i64, VecVT,
SDValue(Shift, 0), SDValue(Bits, 0));
}
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
}
/*!
* Emit the instruction sequence for i64 arithmetic right shifts.
*
* @param Op The shl operand
* @param OpVT Op's machine value value type (doesn't need to be passed, but
* makes life easier.)
* @return The SDNode with the entire instruction sequence
*/
SDNode *
SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
// Promote Op0 to vector
MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
SDNode *VecOp0 =
CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op.getOperand(0));
SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
SDNode *SignRot =
CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, MVT::v2i64,
SDValue(VecOp0, 0), SignRotAmt);
SDNode *UpperHalfSign =
CurDAG->getTargetNode(SPU::ORi32_v4i32, MVT::i32, SDValue(SignRot, 0));
SDNode *UpperHalfSignMask =
CurDAG->getTargetNode(SPU::FSM64r32, VecVT, SDValue(UpperHalfSign, 0));
SDNode *UpperLowerMask =
CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT,
CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
SDNode *UpperLowerSelect =
CurDAG->getTargetNode(SPU::SELBv2i64, VecVT,
SDValue(UpperHalfSignMask, 0),
SDValue(VecOp0, 0),
SDValue(UpperLowerMask, 0));
SDNode *Shift = 0;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
unsigned bits = unsigned(CN->getZExtValue()) & 7;
if (bytes > 0) {
bytes = 31 - bytes;
Shift =
CurDAG->getTargetNode(SPU::ROTQBYIv2i64, VecVT,
SDValue(UpperLowerSelect, 0),
CurDAG->getTargetConstant(bytes, ShiftAmtVT));
}
if (bits > 0) {
bits = 8 - bits;
Shift =
CurDAG->getTargetNode(SPU::ROTQBIIv2i64, VecVT,
SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
CurDAG->getTargetConstant(bits, ShiftAmtVT));
}
} else {
SDNode *NegShift =
CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
Shift =
CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, VecVT,
SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
Shift =
CurDAG->getTargetNode(SPU::ROTQBIv2i64, VecVT,
SDValue(Shift, 0), SDValue(NegShift, 0));
}
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
}
/// createSPUISelDag - This pass converts a legalized DAG into a
/// SPU-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {

View File

@ -204,10 +204,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SRL, MVT::i8, Custom);
setOperationAction(ISD::SRA, MVT::i8, Custom);
// SPU needs custom lowering for shift left/right for i64
setOperationAction(ISD::SHL, MVT::i64, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
// Make these operations legal and handle them during instruction selection:
setOperationAction(ISD::SHL, MVT::i64, Legal);
setOperationAction(ISD::SRL, MVT::i64, Legal);
setOperationAction(ISD::SRA, MVT::i64, Legal);
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
@ -215,6 +215,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i8, Custom);
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
@ -249,7 +250,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Zero extension and sign extension for i64 have to be
// custom legalized
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
// Custom lower i128 -> i64 truncates
@ -262,7 +262,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
// FDIV on SPU requires custom lowering
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
// SPU has [U|S]INT_TO_FP
@ -340,7 +339,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
// mul has to be custom lowered.
setOperationAction(ISD::MUL , VT, Custom);
// TODO: v2i64 vector multiply
setOperationAction(ISD::MUL , VT, Legal);
setOperationAction(ISD::AND , VT, Legal);
setOperationAction(ISD::OR , VT, Legal);
@ -354,7 +354,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Custom);
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
@ -371,9 +370,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
// FIXME: This is only temporary until I put all vector multiplications in
// SPUInstrInfo.td:
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setShiftAmountType(MVT::i32);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@ -411,10 +408,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
@ -422,21 +415,12 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
"SPUISD::ROTQUAD_RZ_BYTES";
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
"SPUISD::ROTQUAD_RZ_BITS";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
"SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
}
@ -1922,182 +1906,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
switch (Op.getValueType().getSimpleVT()) {
default:
cerr << "CellSPU: Unknown vector multiplication, got "
<< Op.getValueType().getMVTString()
<< "\n";
abort();
/*NOTREACHED*/
case MVT::v4i32:
break;
// Multiply two v8i16 vectors (pipeline friendly version):
// a) multiply lower halves, mask off upper 16-bit of 32-bit product
// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
// c) Use SELB to select upper and lower halves from the intermediate results
//
// NOTE: We really want to move the SELECT_MASK to earlier to actually get the
// dual-issue. This code does manage to do this, even if it's a little on
// the wacky side
case MVT::v8i16: {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SDValue Chain = Op.getOperand(0);
SDValue rA = Op.getOperand(0);
SDValue rB = Op.getOperand(1);
unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
SDValue FSMBOp =
DAG.getCopyToReg(Chain, FSMBIreg,
DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
DAG.getConstant(0xcccc, MVT::i16)));
SDValue HHProd =
DAG.getCopyToReg(FSMBOp, HiProdReg,
DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
SDValue HHProd_v4i32 =
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
return DAG.getNode(SPUISD::SELB, MVT::v8i16,
DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
HHProd_v4i32,
DAG.getConstant(16, MVT::i16))),
DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
}
// This M00sE is N@stI! (apologies to Monty Python)
//
// SPU doesn't know how to do any 8-bit multiplication, so the solution
// is to break it all apart, sign extend, and reassemble the various
// intermediate products.
case MVT::v16i8: {
SDValue rA = Op.getOperand(0);
SDValue rB = Op.getOperand(1);
SDValue c8 = DAG.getConstant(8, MVT::i32);
SDValue c16 = DAG.getConstant(16, MVT::i32);
SDValue LLProd =
DAG.getNode(SPUISD::MPY, MVT::v8i16,
DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
SDValue LHProd =
DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
DAG.getConstant(0x2222, MVT::i16));
SDValue LoProdParts =
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
DAG.getNode(SPUISD::SELB, MVT::v8i16,
LLProd, LHProd, FSMBmask));
SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
SDValue LoProd =
DAG.getNode(ISD::AND, MVT::v4i32,
LoProdParts,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
LoProdMask, LoProdMask,
LoProdMask, LoProdMask));
SDValue rAH =
DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
SDValue rBH =
DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
SDValue HLProd =
DAG.getNode(SPUISD::MPY, MVT::v8i16,
DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
SDValue HHProd_1 =
DAG.getNode(SPUISD::MPY, MVT::v8i16,
DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
DAG.getNode(SPUISD::VEC_SRA,
MVT::v4i32, rAH, c8)),
DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
DAG.getNode(SPUISD::VEC_SRA,
MVT::v4i32, rBH, c8)));
SDValue HHProd =
DAG.getNode(SPUISD::SELB, MVT::v8i16,
HLProd,
DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
FSMBmask);
SDValue HiProd =
DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
DAG.getNode(ISD::OR, MVT::v4i32,
LoProd, HiProd));
}
}
return SDValue();
}
static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
MVT VT = Op.getValueType();
unsigned VRegBR, VRegC;
if (VT == MVT::f32) {
VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
} else {
VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
}
// TODO: make sure we're feeding FPInterp the right arguments
// Right now: fi B, frest(B)
// Computes BRcpl =
// (Floating Interpolate (FP Reciprocal Estimate B))
SDValue BRcpl =
DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
DAG.getNode(SPUISD::FPInterp, VT, B,
DAG.getNode(SPUISD::FPRecipEst, VT, B)));
// Computes A * BRcpl and stores in a temporary register
SDValue AxBRcpl =
DAG.getCopyToReg(BRcpl, VRegC,
DAG.getNode(ISD::FMUL, VT, A,
DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
// What's the Chain variable do? It's magic!
// TODO: set Chain = Op(0).getEntryNode()
return DAG.getNode(ISD::FADD, VT,
DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
DAG.getNode(ISD::FMUL, VT,
DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
DAG.getNode(ISD::FSUB, VT, A,
DAG.getNode(ISD::FMUL, VT, B,
DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
}
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
SDValue N = Op.getOperand(0);
@ -2296,18 +2104,23 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
assert(0 && "Unhandled i8 math operator");
/*NOTREACHED*/
break;
case ISD::ADD: {
// 8-bit addition: Promote the arguments up to 16-bits and truncate
// the result:
SDValue N1 = Op.getOperand(1);
N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
case ISD::SUB: {
// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
// the result:
SDValue N1 = Op.getOperand(1);
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
MVT::i16));
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
MVT::i16));
N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
@ -2397,7 +2210,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
switch (Opc) {
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
MVT Op0VT = Op0.getValueType();
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
@ -2410,39 +2222,16 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
SDValue PromoteScalar =
DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
if (Opc != ISD::SIGN_EXTEND) {
// Use a shuffle to zero extend the i32 to i64 directly:
SDValue shufMask =
DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
DAG.getConstant(0x80808080, MVT::i32),
DAG.getConstant(0x00010203, MVT::i32),
DAG.getConstant(0x80808080, MVT::i32),
DAG.getConstant(0x08090a0b, MVT::i32));
SDValue zextShuffle =
DAG.getNode(SPUISD::SHUFB, Op0VecVT,
PromoteScalar, PromoteScalar, shufMask);
// Use a shuffle to zero extend the i32 to i64 directly:
SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
0x08090a0b, MVT::i32));
SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
PromoteScalar, shufMask);
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
} else {
// SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
// right and propagate the sign bit) instruction.
SDValue RotQuad =
DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
PromoteScalar, DAG.getConstant(4, MVT::i32));
SDValue SignQuad =
DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
PromoteScalar, DAG.getConstant(32, MVT::i32));
SDValue SelMask =
DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
DAG.getConstant(0xf0f0, MVT::i16));
SDValue CombineQuad =
DAG.getNode(SPUISD::SELB, Op0VecVT,
SignQuad, RotQuad, SelMask);
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
}
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
VecVT, zextShuffle));
}
case ISD::ADD: {
@ -2502,88 +2291,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedBorrow));
}
case ISD::SHL: {
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0);
SDValue MaskLower =
DAG.getNode(SPUISD::SELB, VecVT,
Op0Vec,
DAG.getConstant(0, VecVT),
DAG.getNode(SPUISD::SELECT_MASK, VecVT,
DAG.getConstant(0xff00ULL, MVT::i16)));
SDValue ShiftAmtBytes =
DAG.getNode(ISD::SRL, ShiftAmtVT,
ShiftAmt,
DAG.getConstant(3, ShiftAmtVT));
SDValue ShiftAmtBits =
DAG.getNode(ISD::AND, ShiftAmtVT,
ShiftAmt,
DAG.getConstant(7, ShiftAmtVT));
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
MaskLower, ShiftAmtBytes),
ShiftAmtBits));
}
case ISD::SRL: {
MVT VT = Op.getValueType();
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
SDValue ShiftAmtBytes =
DAG.getNode(ISD::SRL, ShiftAmtVT,
ShiftAmt,
DAG.getConstant(3, ShiftAmtVT));
SDValue ShiftAmtBits =
DAG.getNode(ISD::AND, ShiftAmtVT,
ShiftAmt,
DAG.getConstant(7, ShiftAmtVT));
return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
Op0, ShiftAmtBytes),
ShiftAmtBits);
}
case ISD::SRA: {
// Promote Op0 to vector
SDValue Op0 =
DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftVT = ShiftAmt.getValueType();
// Negate variable shift amounts
if (!isa<ConstantSDNode>(ShiftAmt)) {
ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
DAG.getConstant(0, ShiftVT), ShiftAmt);
}
SDValue UpperHalfSign =
DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
Op0, DAG.getConstant(31, MVT::i32))));
SDValue UpperHalfSignMask =
DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
SDValue UpperLowerMask =
DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
DAG.getConstant(0xff00, MVT::i16));
SDValue UpperLowerSelect =
DAG.getNode(SPUISD::SELB, MVT::v2i64,
UpperHalfSignMask, Op0, UpperLowerMask);
SDValue RotateLeftBytes =
DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
UpperLowerSelect, ShiftAmt);
SDValue RotateLeftBits =
DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
RotateLeftBytes, ShiftAmt);
return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
RotateLeftBits);
}
}
return SDValue();
@ -2890,10 +2597,11 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
return LowerRET(Op, DAG, getTargetMachine());
// i8, i64 math ops:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
return LowerI64Math(Op, DAG, Opc);
// i8, i64 math ops:
case ISD::ADD:
case ISD::SUB:
case ISD::ROTR:
@ -2928,22 +2636,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
// Vector and i8 multiply:
case ISD::MUL:
if (VT.isVector())
return LowerVectorMUL(Op, DAG);
else if (VT == MVT::i8)
if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
case ISD::FDIV:
if (VT == MVT::f32 || VT == MVT::v4f32)
return LowerFDIVf32(Op, DAG);
#if 0
// This is probably a libcall
else if (Op.getValueType() == MVT::f64)
return LowerFDIVf64(Op, DAG);
#endif
else
assert(0 && "Calling FDIV on unsupported MVT");
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
@ -3119,8 +2814,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case SPUISD::VEC_SHL:
case SPUISD::VEC_SRL:
case SPUISD::VEC_SRA:
case SPUISD::ROTQUAD_RZ_BYTES:
case SPUISD::ROTQUAD_RZ_BITS:
case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
@ -3268,10 +2961,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
}
#if 0
case MPY:
case MPYU:
case MPYH:
case MPYHH:
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
case SPUISD::VEC_SHL:
@ -3279,18 +2968,14 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case SPUISD::VEC_SRA:
case SPUISD::VEC_ROTL:
case SPUISD::VEC_ROTR:
case SPUISD::ROTQUAD_RZ_BYTES:
case SPUISD::ROTQUAD_RZ_BITS:
case SPUISD::ROTBYTES_LEFT:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
case SPUISD::FPInterp:
case SPUISD::FPRecipEst:
case SPUISD::SEXT32TO64:
#endif
}
}
unsigned
SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
unsigned Depth) const {

View File

@ -24,10 +24,10 @@ namespace llvm {
enum NodeType {
// Start the numbering where the builting ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
// Pseudo instructions:
RET_FLAG, ///< Return with flag, matched by bi instruction
Hi, ///< High address component (upper 16)
Lo, ///< Low address component (lower 16)
PCRelAddr, ///< Program counter relative address
@ -41,10 +41,6 @@ namespace llvm {
CNTB, ///< Count leading ones in bytes
PREFSLOT2VEC, ///< Promote scalar->vector
VEC2PREFSLOT, ///< Extract element 0
MPY, ///< 16-bit Multiply (low parts of a 32-bit)
MPYU, ///< Multiply Unsigned
MPYH, ///< Multiply High
MPYHH, ///< Multiply High-High
SHLQUAD_L_BITS, ///< Rotate quad left, by bits
SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
VEC_SHL, ///< Vector shift left
@ -52,8 +48,6 @@ namespace llvm {
VEC_SRA, ///< Vector shift right (arithmetic)
VEC_ROTL, ///< Vector rotate left
VEC_ROTR, ///< Vector rotate right
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
@ -63,8 +57,6 @@ namespace llvm {
CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
SUB_EXTENDED, ///< Subtract extended, with borrow
BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
FPInterp, ///< Floating point interpolate
FPRecipEst, ///< Floating point reciprocal estimate
SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits
LAST_SPUISD ///< Last user-defined instruction
};
@ -87,7 +79,7 @@ namespace llvm {
}
class SPUTargetMachine; // forward dec'l.
class SPUTargetLowering :
public TargetLowering
{
@ -97,14 +89,14 @@ namespace llvm {
public:
SPUTargetLowering(SPUTargetMachine &TM);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - Return the ValueType for ISD::SETCC
virtual MVT getSetCCResultType(const SDValue &) const;
//! Custom lowering hooks
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@ -116,7 +108,7 @@ namespace llvm {
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
APInt &KnownZero,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
@ -126,12 +118,12 @@ namespace llvm {
ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
std::pair<unsigned, const TargetRegisterClass*>
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const;
void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
bool hasMemory,
bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;

View File

@ -82,7 +82,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORIi8i32:
case SPU::AHIvec:
case SPU::AHIr16:
case SPU::AIvec:
case SPU::AIv4i32:
assert(MI.getNumOperands() == 3 &&
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
@ -98,8 +98,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
assert(MI.getNumOperands() == 3 &&
"wrong number of operands to AIr32");
if (MI.getOperand(0).isReg() &&
(MI.getOperand(1).isReg() ||
MI.getOperand(1).isFI()) &&
MI.getOperand(1).isReg() &&
(MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0)) {
sourceReg = MI.getOperand(1).getReg();
@ -265,7 +264,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
// reg class to any other reg class containing R3. This is required because
// we instruction select bitconvert i64 -> f64 as a noop for example, so our
// types have no specific meaning.
if (DestRC == SPU::R8CRegisterClass) {
BuildMI(MBB, MI, get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0);
} else if (DestRC == SPU::R16CRegisterClass) {
@ -291,7 +290,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
// Attempt to copy unknown/unsupported register class!
return false;
}
return true;
}
@ -464,7 +463,7 @@ SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned OpNum = Ops[0];
unsigned Opc = MI->getOpcode();
MachineInstr *NewMI = 0;
if ((Opc == SPU::ORr32
|| Opc == SPU::ORv4i32)
&& MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
@ -508,7 +507,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranch(LastInst)) {
@ -524,7 +523,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Otherwise, don't know what this is.
return true;
}
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = I;
@ -532,7 +531,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
if (SecondLastInst && I != MBB.begin() &&
isUnpredicatedTerminator(--I))
return true;
// If the block ends with a conditional and unconditional branch, handle it.
if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
TBB = SecondLastInst->getOperand(1).getMBB();
@ -541,7 +540,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
@ -554,7 +553,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Otherwise, can't handle this.
return true;
}
unsigned
SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
@ -578,16 +577,16 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I->eraseFromParent();
return 2;
}
unsigned
SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
assert((Cond.size() == 2 || Cond.size() == 0) &&
"SPU branch conditions have two components!");
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
@ -600,7 +599,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
return 1;
}
// Two-way Conditional Branch.
#if 0
BuildMI(&MBB, get(SPU::BRNZ))

View File

@ -583,7 +583,9 @@ def AHIvec:
def AHIr16:
RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
"ahi\t$rT, $rA, $val", IntegerOp,
[(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
[(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
// v4i32, i32 add instruction:
class AInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00000011000, OOL, IOL,
@ -604,21 +606,42 @@ multiclass AddInstruction {
def v16i8: AVecInst<v16i8>;
def r32: ARegInst<R32C>;
def r8: AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;
}
defm A : AddInstruction;
def AIvec:
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
"ai\t$rT, $rA, $val", IntegerOp,
[(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA),
v4i32SExt10Imm:$val))]>;
class AIInst<dag OOL, dag IOL, list<dag> pattern>:
RI10Form<0b00111000, OOL, IOL,
"ai\t$rT, $rA, $val", IntegerOp,
pattern>;
def AIr32:
RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
"ai\t$rT, $rA, $val", IntegerOp,
[(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>;
class AIVecInst<ValueType vectype, PatLeaf immpred>:
AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
[(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
[/* no pattern */]>;
class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
[(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
// This is used to add epsilons to floating point numbers in the f32 fdiv code:
class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
[/* no pattern */]>;
multiclass AddImmediate {
def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
def r32: AIRegInst<R32C, i32ImmSExt10>;
def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
def f32: AIFPInst<R32FP, i32ImmSExt10>;
}
defm AI : AddImmediate;
def SFHvec:
RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
@ -795,8 +818,7 @@ def BGXvec:
def MPYv8i16:
RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA),
(v8i16 VECREG:$rB)))]>;
[/* no pattern */]>;
def MPYr16:
RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
@ -812,8 +834,7 @@ class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
def MPYUv4i32:
MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (v4i32 VECREG:$rT),
(SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
[/* no pattern */]>;
def MPYUr16:
MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
@ -821,7 +842,7 @@ def MPYUr16:
def MPYUr32:
MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
[(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>;
[/* no pattern */]>;
// mpyi: multiply 16 x s10imm -> 32 result.
@ -892,87 +913,78 @@ class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
def MPYHv4i32:
MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (v4i32 VECREG:$rT),
(SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
[/* no pattern */]>;
def MPYHr32:
MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
[(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>;
[/* no pattern */]>;
// mpys: multiply high and shift right (returns the top half of
// a 16-bit multiply, sign extended to 32 bits.)
def MPYSvec:
RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYSr16:
RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
class MPYSInst<dag OOL, dag IOL>:
RRForm<0b11100011110, OOL, IOL,
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
[/* no pattern */]>;
def MPYSvec:
MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYSr16:
MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
// mpyhh: multiply high-high (returns the 32-bit result from multiplying
// the top 16 bits of the $rA, $rB)
class MPYHHInst<dag OOL, dag IOL>:
RRForm<0b01100011110, OOL, IOL,
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
[/* no pattern */]>;
def MPYHHv8i16:
RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v8i16 VECREG:$rT),
(SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHr32:
RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
// mpyhha: Multiply high-high, add to $rT:
def MPYHHAvec:
RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYHHAr32:
RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
class MPYHHAInst<dag OOL, dag IOL>:
RRForm<0b01100010110, OOL, IOL,
"mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
[/* no pattern */]>;
def MPYHHAvec:
MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHAr32:
MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
// mpyhhu: Multiply high-high, unsigned
def MPYHHUvec:
RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
def MPYHHUr32:
RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
class MPYHHUInst<dag OOL, dag IOL>:
RRForm<0b01110011110, OOL, IOL,
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
[/* no pattern */]>;
def MPYHHUvec:
MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHUr32:
MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
// mpyhhau: Multiply high-high, unsigned
class MPYHHAUInst<dag OOL, dag IOL>:
RRForm<0b01110010110, OOL, IOL,
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
[/* no pattern */]>;
def MPYHHAUvec:
RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHAUr32:
RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v4i32, i32 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def MPYv4i32:
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
(Av4i32
(Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
(MPYHv4i32 VECREG:$rB, VECREG:$rA)),
(MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
def MPYi32:
Pat<(mul R32C:$rA, R32C:$rB),
(Ar32
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
(MPYHr32 R32C:$rB, R32C:$rA)),
(MPYUr32 R32C:$rA, R32C:$rB))>;
MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// clz: Count leading zeroes
@ -983,7 +995,7 @@ class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
class CLZRegInst<RegisterClass rclass>:
CLZInst<(outs rclass:$rT), (ins rclass:$rA),
[(set rclass:$rT, (ctlz rclass:$rA))]>;
[(set rclass:$rT, (ctlz rclass:$rA))]>;
class CLZVecInst<ValueType vectype>:
CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
@ -1424,7 +1436,7 @@ multiclass BitwiseOr
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
[/* no pattern */]>;
// scalar->vector promotion:
// scalar->vector promotion, prefslot2vec:
def v16i8_i8: ORPromoteScalar<R8C>;
def v8i16_i16: ORPromoteScalar<R16C>;
def v4i32_i32: ORPromoteScalar<R32C>;
@ -1432,7 +1444,7 @@ multiclass BitwiseOr
def v4f32_f32: ORPromoteScalar<R32FP>;
def v2f64_f64: ORPromoteScalar<R64FP>;
// extract element 0:
// vector->scalar demotion, vec2prefslot:
def i8_v16i8: ORExtractElt<R8C>;
def i16_v8i16: ORExtractElt<R16C>;
def i32_v4i32: ORExtractElt<R32C>;
@ -1831,6 +1843,13 @@ class SELBVecInst<ValueType vectype>:
(and (vnot (vectype VECREG:$rC)),
(vectype VECREG:$rA))))]>;
class SELBVecVCondInst<ValueType vectype>:
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
[(set (vectype VECREG:$rT),
(select (vectype VECREG:$rC),
(vectype VECREG:$rB),
(vectype VECREG:$rA)))]>;
class SELBVecCondInst<ValueType vectype>:
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
[(set (vectype VECREG:$rT),
@ -1867,8 +1886,21 @@ multiclass SelectBits
def v4i32_cond: SELBVecCondInst<v4i32>;
def v2i64_cond: SELBVecCondInst<v2i64>;
def v16i8_vcond: SELBVecCondInst<v16i8>;
def v8i16_vcond: SELBVecCondInst<v8i16>;
def v4i32_vcond: SELBVecCondInst<v4i32>;
def v2i64_vcond: SELBVecCondInst<v2i64>;
def v4f32_cond:
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
[(set (v4f32 VECREG:$rT),
(select (v4i32 VECREG:$rC),
(v4f32 VECREG:$rB),
(v4f32 VECREG:$rA)))]>;
// SELBr64_cond is defined further down, look for i64 comparisons
def r32_cond: SELBRegCondInst<R32C, R32C>;
def f32_cond: SELBRegCondInst<R32C, R32FP>;
def r16_cond: SELBRegCondInst<R16C, R16C>;
def r8_cond: SELBRegCondInst<R8C, R8C>;
}
@ -2454,11 +2486,11 @@ class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
RotateShift, pattern>;
class ROTQBIVecInst<ValueType vectype>:
ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
[/* no pattern yet */]>;
class ROTQBIRegInst<RegisterClass rclass>:
ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
[/* no pattern yet */]>;
multiclass RotateQuadByBitCount
@ -2645,9 +2677,6 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)),
// ROTQMBYvec: This is a vector form merely so that when used in an
// instruction pattern, type checking will succeed. This instruction assumes
// that the user knew to negate $rB.
//
// Using the SPUrotquad_rz_bytes target-specific DAG node, the patterns
// ensure that $rB is negated.
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
@ -2660,8 +2689,7 @@ class ROTQMBYVecInst<ValueType vectype>:
class ROTQMBYRegInst<RegisterClass rclass>:
ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
[(set rclass:$rT,
(SPUrotquad_rz_bytes rclass:$rA, R32C:$rB))]>;
[/* no pattern */]>;
multiclass RotateQuadBytes
{
@ -2676,32 +2704,17 @@ multiclass RotateQuadBytes
defm ROTQMBY : RotateQuadBytes;
def : Pat<(SPUrotquad_rz_bytes (v16i8 VECREG:$rA), R32C:$rB),
(ROTQMBYv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bytes (v8i16 VECREG:$rA), R32C:$rB),
(ROTQMBYv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bytes (v4i32 VECREG:$rA), R32C:$rB),
(ROTQMBYv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bytes (v2i64 VECREG:$rA), R32C:$rB),
(ROTQMBYv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bytes GPRC:$rA, R32C:$rB),
(ROTQMBYr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bytes R64C:$rA, R32C:$rB),
(ROTQMBYr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>;
class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
RotateShift, pattern>;
class ROTQMBYIVecInst<ValueType vectype>:
ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
[(set (vectype VECREG:$rT),
(SPUrotquad_rz_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
[/* no pattern */]>;
class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
[(set rclass:$rT,
(SPUrotquad_rz_bytes rclass:$rA, (inttype pred:$val)))]>;
[/* no pattern */]>;
multiclass RotateQuadBytesImm
{
@ -2725,8 +2738,8 @@ class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RotateShift, pattern>;
class ROTQMBYBIVecInst<ValueType vectype>:
ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* no pattern, intrinsic? */]>;
ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
[/* no pattern, */]>;
multiclass RotateMaskQuadByBitCount
{
@ -2768,19 +2781,6 @@ multiclass RotateMaskQuadByBits
defm ROTQMBI: RotateMaskQuadByBits;
def : Pat<(SPUrotquad_rz_bits (v16i8 VECREG:$rA), R32C:$rB),
(ROTQMBIv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bits (v8i16 VECREG:$rA), R32C:$rB),
(ROTQMBIv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bits (v4i32 VECREG:$rA), R32C:$rB),
(ROTQMBIv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bits (v2i64 VECREG:$rA), R32C:$rB),
(ROTQMBIv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bits GPRC:$rA, R32C:$rB),
(ROTQMBIr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>;
def : Pat<(SPUrotquad_rz_bits R64C:$rA, R32C:$rB),
(ROTQMBIr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate quad and mask by bits, immediate
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@ -2791,13 +2791,11 @@ class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
class ROTQMBIIVecInst<ValueType vectype>:
ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
[(set (vectype VECREG:$rT),
(SPUrotquad_rz_bits (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
[/* no pattern */]>;
class ROTQMBIIRegInst<RegisterClass rclass>:
ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
[(set rclass:$rT,
(SPUrotquad_rz_bits rclass:$rA, (i32 uimm7:$val)))]>;
[/* no pattern */]>;
multiclass RotateMaskQuadByBitsImm
{
@ -3142,6 +3140,15 @@ multiclass CmpGtrWordImm
def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
[(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
// CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
[(set (v4i32 VECREG:$rT),
(setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
(v4i32 v4i32SExt16Imm:$val)))]>;
def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
[/* no pattern */]>;
}
class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
@ -3750,62 +3757,63 @@ let isTerminator = 1, isBarrier = 1 in {
class FAInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
SPrecFP, pattern>;
SPrecFP, pattern>;
class FAVecInst<ValueType vectype>:
FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
(fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
multiclass SFPAdd
{
def v4f32: FAVecInst<v4f32>;
def r32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
[(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
[(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
}
defm FA : SFPAdd;
class FSInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
SPrecFP, pattern>;
SPrecFP, pattern>;
class FSVecInst<ValueType vectype>:
FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
[(set (vectype VECREG:$rT),
(fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
multiclass SFPSub
{
def v4f32: FSVecInst<v4f32>;
def r32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
[(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
[(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
}
defm FS : SFPSub;
// Floating point reciprocal estimate
def FREv4f32 :
RRForm_1<0b00011101100, (outs VECREG:$rT), (ins VECREG:$rA),
"frest\t$rT, $rA", SPrecFP,
[(set (v4f32 VECREG:$rT), (SPUreciprocalEst (v4f32 VECREG:$rA)))]>;
def FREf32 :
RRForm_1<0b00011101100, (outs R32FP:$rT), (ins R32FP:$rA),
"frest\t$rT, $rA", SPrecFP,
[(set R32FP:$rT, (SPUreciprocalEst R32FP:$rA))]>;
class FRESTInst<dag OOL, dag IOL>:
RRForm_1<0b00110111000, OOL, IOL,
"frest\t$rT, $rA", SPrecFP,
[/* no pattern */]>;
def FRESTv4f32 :
FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
def FRESTf32 :
FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
// Floating point interpolate (used in conjunction with reciprocal estimate)
def FIv4f32 :
RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"fi\t$rT, $rA, $rB", SPrecFP,
[(set (v4f32 VECREG:$rT), (SPUinterpolate (v4f32 VECREG:$rA),
(v4f32 VECREG:$rB)))]>;
[/* no pattern */]>;
def FIf32 :
RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
"fi\t$rT, $rA, $rB", SPrecFP,
[(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>;
[/* no pattern */]>;
//--------------------------------------------------------------------------
// Basic single precision floating point comparisons:
@ -4445,12 +4453,14 @@ def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
(SPUlo tconstpool:$in, 0)),
(IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
/*
def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm),
(AIr32 R32C:$sp, i32ImmSExt10:$imm)>;
def : Pat<(SPUindirect R32C:$sp, imm:$imm),
(Ar32 R32C:$sp,
(IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>;
*/
def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
(IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
@ -4466,5 +4476,7 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
// Instrinsics:
include "CellSDKIntrinsics.td"
// Various math operator instruction sequences
include "SPUMathInstr.td"
// 64-bit "instructions"/support
include "SPU64InstrInfo.td"

View File

@ -0,0 +1,99 @@
//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
//
// Cell SPU math operations
//
// This target description file contains instruction sequences for various
// math operations, such as vector multiplies, i32 multiply, etc., for the
// SPU's i32, i16 i8 and corresponding vector types.
//
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
// purely and completely coincidental.
//
// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v16i8 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
(ORv4i32
(ANDv4i32
(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
(FSMBIv8i16 0x2222)),
(ILAv4i32 0x0000ffff)),
(SHLIv4i32
(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
(ROTMAIv4i32_i32 VECREG:$rB, 16)),
(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
(FSMBIv8i16 0x2222)), 16))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v8i16 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
(FSMBIv8i16 0xcccc))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v4i32, i32 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def MPYv4i32:
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
(Av4i32
(Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
(MPYHv4i32 VECREG:$rB, VECREG:$rA)),
(MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
def MPYi32:
Pat<(mul R32C:$rA, R32C:$rB),
(Ar32
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
(MPYHr32 R32C:$rB, R32C:$rA)),
(MPYUr32 R32C:$rA, R32C:$rB))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// f32, v4f32 divide instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Reciprocal estimate and interpolation
def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
// Division estimate
def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
// Newton-Raphson iteration
def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
Interpf32.Fragment,
DivEstf32.Fragment)>;
// Epsilon addition
def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
(SELBf32_cond NRaphf32.Fragment,
Epsilonf32.Fragment,
(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
// Reciprocal estimate and interpolation
def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
// Division estimate
def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
// Newton-Raphson iteration
def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
(v4f32 VECREG:$rB),
(v4f32 VECREG:$rA)),
Interpv4f32.Fragment,
DivEstv4f32.Fragment)>;
// Epsilon addition
def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
(SELBv4f32_cond NRaphv4f32.Fragment,
Epsilonv4f32.Fragment,
(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
Epsilonv4f32.Fragment,
(v4f32 VECREG:$rA)), -1))>;

View File

@ -87,24 +87,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
// SPUISelLowering.h):
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
// SPU 16-bit multiply
def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>;
// SPU multiply unsigned, used in instruction lowering for v4i32
// multiplies:
def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>;
def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
// SPU 16-bit multiply high x low, shift result 16-bits
// Used to compute intermediate products for 32-bit multiplies
def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>;
def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
// SPU 16-bit multiply high x high, 32-bit product
// Used to compute intermediate products for 16-bit multiplies
def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>;
def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>;
// Shift left quadword by bits and bytes
def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
@ -117,11 +99,6 @@ def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>;
def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES",
SPUvecshift_type, []>;
def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
SPUvecshift_type, []>;
// Vector rotate left, bits shifted out of the left are rotated in on the right
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
SPUvecshift_type, []>;
@ -141,12 +118,6 @@ def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
// SPU gather bits instruction:
def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>;
// SPU floating point interpolate
def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>;
// SPU floating point reciprocal estimate (used for fdiv)
def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>;
def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;

View File

@ -238,7 +238,7 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
SPU::R0, /* link register */
0 /* end */
};
return SPU_CalleeSaveRegs;
}
@ -268,7 +268,7 @@ SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
&SPU::GPRCRegClass, /* link register */
0 /* end */
};
return SPU_CalleeSaveRegClasses;
}
@ -339,10 +339,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
// Most instructions, except for generated FrameIndex additions using AIr32,
// have the immediate in operand 1. AIr32, in this case, has the immediate
// in operand 2.
unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2);
// Most instructions, except for generated FrameIndex additions using AIr32
// and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
// immediate in operand 2.
unsigned OpNo = 1;
if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
OpNo = 2;
MachineOperand &MO = MI.getOperand(OpNo);
// Offset is biased by $lr's slot at the bottom.
@ -355,7 +358,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
if (Offset > SPUFrameInfo::maxFrameOffset()
|| Offset < SPUFrameInfo::minFrameOffset()) {
cerr << "Large stack adjustment ("
<< Offset
<< Offset
<< ") in SPURegisterInfo::eliminateFrameIndex.";
} else {
MO.ChangeToImmediate(Offset);
@ -371,7 +374,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize = MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
@ -381,7 +384,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
// that allocations will be aligned.
if (MFI->hasVarSizedObjects())
@ -389,7 +392,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
// Update maximum call frame size.
MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@ -418,18 +421,18 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
// Prepare for debug frame info.
bool hasDebugInfo = MMI && MMI->hasDebugInfo();
unsigned FrameLabelId = 0;
// Move MBBI back to the beginning of the function.
MBBI = MBB.begin();
// Work out frame sizes.
determineFrameLayout(MF);
int FrameSize = MFI->getStackSize();
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
@ -440,7 +443,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
FrameLabelId = MMI->NextLabelID();
BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId);
}
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
// for the ABI
BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
@ -476,15 +479,15 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
cerr << "Unhandled frame size: " << FrameSize << "\n";
abort();
}
if (hasDebugInfo) {
std::vector<MachineMove> &Moves = MMI->getFrameMoves();
// Show update of SP.
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
@ -495,11 +498,11 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
MachineLocation CSSrc(Reg);
Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
}
// Mark effective beginning of when frame pointer is ready.
unsigned ReadyLabelId = MMI->NextLabelID();
BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId);
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));

View File

@ -1,9 +1,11 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep frest %t1.s | count 2
; RUN: grep -w fi %t1.s | count 2
; RUN: grep fm %t1.s | count 4
; RUN: grep -w fm %t1.s | count 2
; RUN: grep fma %t1.s | count 2
; RUN: grep fnms %t1.s | count 2
; RUN: grep fnms %t1.s | count 4
; RUN: grep cgti %t1.s | count 2
; RUN: grep selb %t1.s | count 2
;
; This file includes standard floating point arithmetic instructions
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"

View File

@ -1,8 +1,5 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep {fsmbi.*61680} %t1.s | count 1
; RUN: grep rotqmbyi %t1.s | count 1
; RUN: grep rotmai %t1.s | count 1
; RUN: grep selb %t1.s | count 1
; RUN: grep xswd %t1.s | count 1
; RUN: grep shufb %t1.s | count 2
; RUN: grep cg %t1.s | count 1
; RUN: grep addx %t1.s | count 1

View File

@ -8,7 +8,7 @@
; RUN: grep and %t1.s | count 2
; RUN: grep selb %t1.s | count 6
; RUN: grep fsmbi %t1.s | count 4
; RUN: grep shli %t1.s | count 2
; RUN: grep shli %t1.s | count 4
; RUN: grep shlhi %t1.s | count 4
; RUN: grep ila %t1.s | count 2
; RUN: grep xsbh %t1.s | count 4

View File

@ -1,10 +1,21 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep shlh %t1.s | count 84
; RUN: grep shlhi %t1.s | count 51
; RUN: grep shl %t1.s | count 168
; RUN: grep shli %t1.s | count 51
; RUN: grep xshw %t1.s | count 5
; RUN: grep and %t1.s | count 5
; RUN: grep -w shlh %t1.s | count 9
; RUN: grep -w shlhi %t1.s | count 3
; RUN: grep -w shl %t1.s | count 9
; RUN: grep -w shli %t1.s | count 3
; RUN: grep -w xshw %t1.s | count 5
; RUN: grep -w and %t1.s | count 5
; RUN: grep -w andi %t1.s | count 2
; RUN: grep -w rotmi %t1.s | count 2
; RUN: grep -w rotqmbyi %t1.s | count 1
; RUN: grep -w rotqmbii %t1.s | count 2
; RUN: grep -w rotqmby %t1.s | count 1
; RUN: grep -w rotqmbi %t1.s | count 1
; RUN: grep -w rotqbyi %t1.s | count 1
; RUN: grep -w rotqbii %t1.s | count 2
; RUN: grep -w rotqbybi %t1.s | count 1
; RUN: grep -w sfi %t1.s | count 3
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@ -210,3 +221,57 @@ define i32 @shli_i32_12(i32 zeroext %arg1) zeroext {
%A = shl i32 0, %arg1
ret i32 %A
}
;; i64 shift left
define i64 @shl_i64_1(i64 %arg1) {
%A = shl i64 %arg1, 9
ret i64 %A
}
define i64 @shl_i64_2(i64 %arg1) {
%A = shl i64 %arg1, 3
ret i64 %A
}
define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
%1 = zext i32 %shift to i64
%2 = shl i64 %arg1, %1
ret i64 %2
}
;; i64 shift right logical (shift 0s from the right)
define i64 @lshr_i64_1(i64 %arg1) {
%1 = lshr i64 %arg1, 9
ret i64 %1
}
define i64 @lshr_i64_2(i64 %arg1) {
%1 = lshr i64 %arg1, 3
ret i64 %1
}
define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
%1 = zext i32 %shift to i64
%2 = lshr i64 %arg1, %1
ret i64 %2
}
;; i64 shift right arithmetic (shift 1s from the right)
define i64 @ashr_i64_1(i64 %arg) {
%1 = ashr i64 %arg, 9
ret i64 %1
}
define i64 @ashr_i64_2(i64 %arg) {
%1 = ashr i64 %arg, 3
ret i64 %1
}
define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
%1 = zext i32 %shift to i64
%2 = ashr i64 %arg1, %1
ret i64 %2
}

View File

@ -34,19 +34,45 @@ struct pred_s preds[] = {
{ "neq", i64_neq, i64_neq_select }
};
uint64_t i64_shl_const(uint64_t a) {
return a << 10;
}
uint64_t i64_shl(uint64_t a, int amt) {
return a << amt;
}
uint64_t i64_srl_const(uint64_t a) {
return a >> 10;
}
uint64_t i64_srl(uint64_t a, int amt) {
return a >> amt;
}
int64_t i64_sra_const(int64_t a) {
return a >> 10;
}
int64_t i64_sra(int64_t a, int amt) {
return a >> amt;
}
int main(void) {
int i;
int64_t a = 1234567890000LL;
int64_t b = 2345678901234LL;
int64_t c = 1234567890001LL;
int64_t d = 10001LL;
int64_t e = 10000LL;
int64_t a = 1234567890003LL;
int64_t b = 2345678901235LL;
int64_t c = 1234567890001LL;
int64_t d = 10001LL;
int64_t e = 10000LL;
int64_t f = -1068103409991LL;
printf("a = %16lld (0x%016llx)\n", a, a);
printf("b = %16lld (0x%016llx)\n", b, b);
printf("c = %16lld (0x%016llx)\n", c, c);
printf("d = %16lld (0x%016llx)\n", d, d);
printf("e = %16lld (0x%016llx)\n", e, e);
printf("f = %16lld (0x%016llx)\n", f, f);
printf("----------------------------------------\n");
for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
@ -64,5 +90,23 @@ int main(void) {
printf("----------------------------------------\n");
}
printf("a = 0x%016llx\n", a);
printf("i64_shl_const(a) = 0x%016llx\n", i64_shl_const(a));
printf("i64_shl(a) = 0x%016llx\n", i64_shl(a, 5));
printf("i64_srl_const(a) = 0x%016llx\n", i64_srl_const(a));
printf("i64_srl(a) = 0x%016llx\n", i64_srl(a, 5));
printf("i64_sra_const(a) = 0x%016llx\n", i64_sra_const(a));
printf("i64_sra(a) = 0x%016llx\n", i64_sra(a, 5));
printf("----------------------------------------\n");
printf("f = 0x%016llx\n", f);
printf("i64_shl_const(f) = 0x%016llx\n", i64_shl_const(f));
printf("i64_shl(f) = 0x%016llx\n", i64_shl(f, 10));
printf("i64_srl_const(f) = 0x%016llx\n", i64_srl_const(f));
printf("i64_srl(f) = 0x%016llx\n", i64_srl(f, 10));
printf("i64_sra_const(f) = 0x%016llx\n", i64_sra_const(f));
printf("i64_sra(f) = 0x%016llx\n", i64_sra(f, 10));
printf("----------------------------------------\n");
return 0;
}