- Fix fabs, fneg for f32 and f64.
- Use BuildVectorSDNode.isConstantSplat, now that the functionality exists
- Continue to improve i64 constant lowering. Lower certain special constants
  to the constant pool when they correspond to SPU's shufb instruction's
  special mask values. This avoids the overhead of performing a shuffle on a
  zero-filled vector just to get the special constant when the memory load
  suffices.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67067 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Scott Michel 2009-03-17 01:15:45 +00:00
parent d17d74bb80
commit 7ea02ffe91
7 changed files with 424 additions and 497 deletions

View File

@ -200,182 +200,212 @@ namespace {
return retval;
}
}
namespace {
//! Generate the carry-generate shuffle mask.
SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
SmallVector<SDValue, 16 > ShufBytes;
//===--------------------------------------------------------------------===//
/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
/// instructions for SelectionDAG operations.
///
class SPUDAGToDAGISel :
public SelectionDAGISel
{
SPUTargetMachine &TM;
SPUTargetLowering &SPUtli;
unsigned GlobalBaseReg;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
public:
explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
SelectionDAGISel(tm),
TM(tm),
SPUtli(*tm.getTargetLowering())
{ }
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
SelectionDAGISel::runOnFunction(Fn);
return true;
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
}
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(uint32_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
//! Generate the borrow-generate shuffle mask
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
SmallVector<SDValue, 16 > ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
}
/// getI64Imm - Return a target constant with the specified value, of type
/// i64.
inline SDValue getI64Imm(uint64_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i64);
}
//===------------------------------------------------------------------===//
/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
/// instructions for SelectionDAG operations.
///
class SPUDAGToDAGISel :
public SelectionDAGISel
{
SPUTargetMachine &TM;
SPUTargetLowering &SPUtli;
unsigned GlobalBaseReg;
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
public:
explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
SelectionDAGISel(tm),
TM(tm),
SPUtli(*tm.getTargetLowering())
{ }
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
SelectionDAGISel::runOnFunction(Fn);
return true;
}
SDNode *emitBuildVector(SDValue build_vec) {
MVT vecVT = build_vec.getValueType();
SDNode *bvNode = build_vec.getNode();
DebugLoc dl = bvNode->getDebugLoc();
// Check to see if this vector can be represented as a CellSPU immediate
// constant by invoking all of the instruction selection predicates:
if (((vecVT == MVT::v8i16) &&
(SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
((vecVT == MVT::v4i32) &&
((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
(SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
(SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
(SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
((vecVT == MVT::v2i64) &&
((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
(SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
(SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
return Select(build_vec);
// No, need to emit a constant pool spill:
std::vector<Constant*> CV;
for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(uint32_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
Constant *CP = ConstantVector::get(CV);
SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG,
SPUtli.getSPUTargetMachine());
return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
CurDAG->getEntryNode(), CGPoolOffset,
PseudoSourceValue::getConstantPool(), 0,
false, Alignment));
}
/// getI64Imm - Return a target constant with the specified value, of type
/// i64.
inline SDValue getI64Imm(uint64_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i64);
}
/// Select - Convert the specified operand from a target-independent to a
/// target-specific node if it hasn't already been changed.
SDNode *Select(SDValue Op);
//! Emit the instruction sequence for i64 shl
SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
//! Emit the instruction sequence for i64 srl
SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
//! Emit the instruction sequence for i64 sra
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
//! Emit the necessary sequence for loading i64 constants:
SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
//! Returns true if the address N is an A-form (local store) address
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
//! D-form address predicate
bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
/// Alternate D-form address using i7 offset predicate
bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
SDValue &Base);
/// D-form address selection workhorse
bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
SDValue &Base, int minOffset, int maxOffset);
//! Address predicate if N can be expressed as an indexed [r+r] operation.
bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintCode) {
default: return true;
case 'm': // memory
if (!SelectDFormAddr(Op, Op, Op0, Op1)
&& !SelectAFormAddr(Op, Op, Op0, Op1))
SelectXFormAddr(Op, Op, Op0, Op1);
break;
case 'o': // offsetable
if (!SelectDFormAddr(Op, Op, Op0, Op1)
&& !SelectAFormAddr(Op, Op, Op0, Op1)) {
Op0 = Op;
Op1 = getSmallIPtrImm(0);
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
}
break;
case 'v': // not offsetable
#if 1
assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
#else
SelectAddrIdxOnly(Op, Op, Op0, Op1);
#endif
break;
SDNode *emitBuildVector(SDValue build_vec) {
MVT vecVT = build_vec.getValueType();
MVT eltVT = vecVT.getVectorElementType();
SDNode *bvNode = build_vec.getNode();
DebugLoc dl = bvNode->getDebugLoc();
// Check to see if this vector can be represented as a CellSPU immediate
// constant by invoking all of the instruction selection predicates:
if (((vecVT == MVT::v8i16) &&
(SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
((vecVT == MVT::v4i32) &&
((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
(SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
(SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
(SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
((vecVT == MVT::v2i64) &&
((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
(SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
(SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
return Select(build_vec);
// No, need to emit a constant pool spill:
std::vector<Constant*> CV;
for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
}
Constant *CP = ConstantVector::get(CV);
SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG,
SPUtli.getSPUTargetMachine());
return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
CurDAG->getEntryNode(), CGPoolOffset,
PseudoSourceValue::getConstantPool(), 0,
false, Alignment));
}
OutOps.push_back(Op0);
OutOps.push_back(Op1);
return false;
}
/// Select - Convert the specified operand from a target-independent to a
/// target-specific node if it hasn't already been changed.
SDNode *Select(SDValue Op);
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
virtual void InstructionSelect();
//! Emit the instruction sequence for i64 shl
SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
virtual const char *getPassName() const {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
//! Emit the instruction sequence for i64 srl
SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
const TargetInstrInfo *II = TM.getInstrInfo();
assert(II && "No InstrInfo?");
return new SPUHazardRecognizer(*II);
}
//! Emit the instruction sequence for i64 sra
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
// Include the pieces autogenerated from the target description.
//! Emit the necessary sequence for loading i64 constants:
SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl);
//! Alternate instruction emit sequence for loading i64 constants
SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl);
//! Returns true if the address N is an A-form (local store) address
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
//! D-form address predicate
bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
/// Alternate D-form address using i7 offset predicate
bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
SDValue &Base);
/// D-form address selection workhorse
bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
SDValue &Base, int minOffset, int maxOffset);
//! Address predicate if N can be expressed as an indexed [r+r] operation.
bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintCode) {
default: return true;
case 'm': // memory
if (!SelectDFormAddr(Op, Op, Op0, Op1)
&& !SelectAFormAddr(Op, Op, Op0, Op1))
SelectXFormAddr(Op, Op, Op0, Op1);
break;
case 'o': // offsetable
if (!SelectDFormAddr(Op, Op, Op0, Op1)
&& !SelectAFormAddr(Op, Op, Op0, Op1)) {
Op0 = Op;
Op1 = getSmallIPtrImm(0);
}
break;
case 'v': // not offsetable
#if 1
assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
#else
SelectAddrIdxOnly(Op, Op, Op0, Op1);
#endif
break;
}
OutOps.push_back(Op0);
OutOps.push_back(Op1);
return false;
}
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
virtual void InstructionSelect();
virtual const char *getPassName() const {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
const TargetInstrInfo *II = TM.getInstrInfo();
assert(II && "No InstrInfo?");
return new SPUHazardRecognizer(*II);
}
// Include the pieces autogenerated from the target description.
#include "SPUGenDAGISel.inc"
};
};
}
/// InstructionSelect - This callback is invoked by
@ -689,7 +719,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
// Catch the i64 constants that end up here. Note: The backend doesn't
// attempt to legalize the constant (it's useless because DAGCombiner
// will insert 64-bit constants and we can't stop it).
return SelectI64Constant(Op, OpVT);
return SelectI64Constant(Op, OpVT, Op.getDebugLoc());
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
&& OpVT == MVT::i64) {
SDValue Op0 = Op.getOperand(0);
@ -747,21 +777,21 @@ SPUDAGToDAGISel::Select(SDValue Op) {
zextShuffle));
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl));
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG, dl));
emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl));
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl));
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
Op.getOperand(0), Op.getOperand(1),
@ -813,6 +843,54 @@ SPUDAGToDAGISel::Select(SDValue Op) {
if (OpVT == MVT::i64) {
return SelectSRAi64(Op, OpVT);
}
} else if (Opc == ISD::FNEG
&& (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
DebugLoc dl = Op.getDebugLoc();
// Check if the pattern is a special form of DFNMS:
// (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
SDValue Op0 = Op.getOperand(0);
if (Op0.getOpcode() == ISD::FSUB) {
SDValue Op00 = Op0.getOperand(0);
if (Op00.getOpcode() == ISD::FMUL) {
unsigned Opc = SPU::DFNMSf64;
if (OpVT == MVT::v2f64)
Opc = SPU::DFNMSv2f64;
return CurDAG->getTargetNode(Opc, dl, OpVT,
Op00.getOperand(0),
Op00.getOperand(1),
Op0.getOperand(1));
}
}
SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
SDNode *signMask = 0;
unsigned Opc = SPU::ORfneg64;
if (OpVT == MVT::f64) {
signMask = SelectI64Constant(negConst, MVT::i64, dl);
} else if (OpVT == MVT::v2f64) {
Opc = SPU::ORfnegvec;
signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
MVT::v2i64,
negConst, negConst));
}
return CurDAG->getTargetNode(Opc, dl, OpVT,
Op.getOperand(0), SDValue(signMask, 0));
} else if (Opc == ISD::FABS) {
if (OpVT == MVT::f64) {
SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT,
Op.getOperand(0), SDValue(signMask, 0));
} else if (OpVT == MVT::v2f64) {
SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
absConst, absConst);
SDNode *signMask = emitBuildVector(absVec);
return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT,
Op.getOperand(0), SDValue(signMask, 0));
}
} else if (Opc == SPUISD::LDRESULT) {
// Custom select instructions for LDRESULT
MVT VT = N->getValueType(0);
@ -1087,13 +1165,17 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
/*!
Do the necessary magic necessary to load a i64 constant
*/
SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT,
DebugLoc dl) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
// Currently there's no DL on the input, but won't hurt to pretend.
DebugLoc dl = Op.getDebugLoc();
return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
}
SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
DebugLoc dl) {
MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
SDValue i64vec =
SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue(), dl);
SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
// Here's where it gets interesting, because we have to parse out the
// subtree handed back in i64vec:
@ -1145,6 +1227,9 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
SDValue(shufNode, 0));
} else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
SDValue(emitBuildVector(i64vec), 0));
} else {
cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
abort();

View File

@ -1,5 +1,5 @@
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
//
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
@ -1353,7 +1353,7 @@ getVecImm(SDNode *N) {
}
}
return 0; // All UNDEF: use implicit def.; not Constant node
return 0;
}
/// get_vec_i18imm - Test if this vector is a vector filled with the same value
@ -1480,131 +1480,30 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
// zero. Return true if this is not an array of constants, false if it is.
//
static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
uint64_t UndefBits[2]) {
// Start with zero'd results.
VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue OpVal = BV->getOperand(i);
unsigned PartNo = i >= e/2; // In the upper 128 bits?
unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
uint64_t EltBits = 0;
if (OpVal.getOpcode() == ISD::UNDEF) {
uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
continue;
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
const APFloat &apf = CN->getValueAPF();
EltBits = (CN->getValueType(0) == MVT::f32
? FloatToBits(apf.convertToFloat())
: DoubleToBits(apf.convertToDouble()));
} else {
// Nonconstant element.
return true;
}
VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
}
//printf("%llx %llx %llx %llx\n",
// VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
return false;
}
/// If this is a splat (repetition) of a value across the whole vector, return
/// the smallest size that splats it. For example, "0x01010101010101..." is a
/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
/// SplatSize = 1 byte.
static bool isConstantSplat(const uint64_t Bits128[2],
const uint64_t Undef128[2],
int MinSplatBits,
uint64_t &SplatBits, uint64_t &SplatUndef,
int &SplatSize) {
// Don't let undefs prevent splats from matching. See if the top 64-bits are
// the same as the lower 64-bits, ignoring undefs.
uint64_t Bits64 = Bits128[0] | Bits128[1];
uint64_t Undef64 = Undef128[0] & Undef128[1];
uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
if (MinSplatBits < 64) {
// Check that the top 32-bits are the same as the lower 32-bits, ignoring
// undefs.
if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
if (MinSplatBits < 32) {
// If the top 16-bits are different than the lower 16-bits, ignoring
// undefs, we have an i32 splat.
if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
if (MinSplatBits < 16) {
// If the top 8-bits are different than the lower 8-bits, ignoring
// undefs, we have an i16 splat.
if ((Bits16 & (uint16_t(~Undef16) >> 8))
== ((Bits16 >> 8) & ~Undef16)) {
// Otherwise, we have an 8-bit splat.
SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
SplatSize = 1;
return true;
}
} else {
SplatBits = Bits16;
SplatUndef = Undef16;
SplatSize = 2;
return true;
}
}
} else {
SplatBits = Bits32;
SplatUndef = Undef32;
SplatSize = 4;
return true;
}
}
} else {
SplatBits = Bits128[0];
SplatUndef = Undef128[0];
SplatSize = 8;
return true;
}
}
return false; // Can't be a splat if two pieces don't match.
}
//! Lower a BUILD_VECTOR instruction creatively:
SDValue
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
// zero.
uint64_t VectorBits[2];
uint64_t UndefBits[2];
uint64_t SplatBits, SplatUndef;
int SplatSize;
if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
|| !isConstantSplat(VectorBits, UndefBits,
VT.getVectorElementType().getSizeInBits(),
SplatBits, SplatUndef, SplatSize))
return SDValue(); // Not a constant vector, not a splat.
BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
unsigned minSplatBits = EltVT.getSizeInBits();
if (minSplatBits < 16)
minSplatBits = 16;
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, minSplatBits)
|| minSplatBits < SplatBitSize)
return SDValue(); // Wasn't a constant vector or splat exceeded min
uint64_t SplatBits = APSplatBits.getZExtValue();
unsigned SplatSize = SplatBitSize / 8;
switch (VT.getSimpleVT()) {
default:
@ -1620,8 +1519,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(Value32, MVT::i32);
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v4i32, T, T, T, T));
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T, T, T, T));
break;
}
case MVT::v2f64: {
@ -1636,45 +1534,42 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
case MVT::v16i8: {
// 8-bit constants have to be expanded to 16-bits
unsigned short Value16 = SplatBits | (SplatBits << 8);
SDValue Ops[8];
for (int i = 0; i < 8; ++i)
Ops[i] = DAG.getConstant(Value16, MVT::i16);
unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
SmallVector<SDValue, 8> Ops;
Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, Ops, 8));
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
}
case MVT::v8i16: {
unsigned short Value16;
if (SplatSize == 2)
Value16 = (unsigned short) (SplatBits & 0xffff);
else
Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
SDValue Ops[8];
for (int i = 0; i < 8; ++i) Ops[i] = T;
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops, 8);
unsigned short Value16 = SplatBits;
SDValue T = DAG.getConstant(Value16, EltVT);
SmallVector<SDValue, 8> Ops;
Ops.assign(8, T);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
}
case MVT::v4i32: {
unsigned int Value = SplatBits;
SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
case MVT::v2i32: {
unsigned int Value = SplatBits;
SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
}
case MVT::v2i64: {
return SPU::LowerSplat_v2i64(VT, DAG, SplatBits, dl);
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
}
return SDValue();
}
/*!
*/
SDValue
SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
DebugLoc dl) {
SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
DebugLoc dl) {
uint32_t upper = uint32_t(SplatVal >> 32);
uint32_t lower = uint32_t(SplatVal);
@ -1685,10 +1580,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Val, Val, Val, Val));
} else {
SDValue LO32;
SDValue HI32;
SmallVector<SDValue, 16> ShufBytes;
SDValue Result;
bool upper_special, lower_special;
// NOTE: This code creates common-case shuffle masks that can be easily
@ -1699,6 +1590,18 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
// Both upper and lower are special, lower to a constant pool load:
if (lower_special && upper_special) {
SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
SplatValCN, SplatValCN);
}
SDValue LO32;
SDValue HI32;
SmallVector<SDValue, 16> ShufBytes;
SDValue Result;
// Create lower vector if not a special pattern
if (!lower_special) {
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
@ -1721,13 +1624,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
LO32 = HI32;
if (upper_special)
HI32 = LO32;
if (lower_special && upper_special) {
// Unhappy situation... both upper and lower are special, so punt with
// a target constant:
SDValue Zero = DAG.getConstant(0, MVT::i32);
HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Zero, Zero,
Zero, Zero);
}
for (int i = 0; i < 4; ++i) {
uint64_t val = 0;
@ -2022,9 +1918,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
ShufMask[i] = DAG.getConstant(bits, MVT::i32);
}
SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufMask[0],
sizeof(ShufMask) / sizeof(ShufMask[0]));
SDValue ShufMaskVec =
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
@ -2067,21 +1963,21 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
/*NOTREACHED*/
case MVT::i8: {
SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
factor, factor);
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
factor, factor, factor, factor);
break;
}
case MVT::i16: {
SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
factor, factor);
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
factor, factor, factor, factor);
break;
}
case MVT::i32:
case MVT::f32: {
SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
factor, factor);
replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
factor, factor, factor, factor);
break;
}
case MVT::i64:
@ -2164,71 +2060,65 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
case ISD::ROTR:
case ISD::ROTL: {
SDValue N1 = Op.getOperand(1);
unsigned N1Opc;
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
MVT::i16));
N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(N1Opc, dl, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
TLI.getShiftAmountTy()));
MVT N1VT = N1.getValueType();
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
if (!N1VT.bitsEq(ShiftVT)) {
unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
}
// Replicate lower 8-bits into upper 8:
SDValue ExpandArg =
DAG.getNode(ISD::OR, dl, MVT::i16, N0,
DAG.getNode(ISD::SHL, dl, MVT::i16,
N0, DAG.getConstant(8, MVT::i32)));
// Truncate back down to i8
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
}
case ISD::SRL:
case ISD::SHL: {
SDValue N1 = Op.getOperand(1);
unsigned N1Opc;
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
MVT::i32));
N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(N1Opc, dl, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
MVT N1VT = N1.getValueType();
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
if (!N1VT.bitsEq(ShiftVT)) {
unsigned N1Opc = ISD::ZERO_EXTEND;
if (N1.getValueType().bitsGT(ShiftVT))
N1Opc = ISD::TRUNCATE;
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
}
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::SRA: {
SDValue N1 = Op.getOperand(1);
unsigned N1Opc;
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
MVT::i16));
N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::SIGN_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(N1Opc, dl, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
ShiftVT));
MVT N1VT = N1.getValueType();
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
if (!N1VT.bitsEq(ShiftVT)) {
unsigned N1Opc = ISD::SIGN_EXTEND;
if (N1VT.bitsGT(ShiftVT))
N1Opc = ISD::TRUNCATE;
N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
}
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::MUL: {
SDValue N1 = Op.getOperand(1);
unsigned N1Opc;
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
MVT::i16));
N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(N1Opc, dl, MVT::i16, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
MVT::i16));
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
break;
@ -2238,36 +2128,6 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
return SDValue();
}
//! Generate the carry-generate shuffle mask.
SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
SmallVector<SDValue, 16 > ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
}
//! Generate the borrow-generate shuffle mask
SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
SmallVector<SDValue, 16 > ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
// once the borrow is generated.
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
&ShufBytes[0], ShufBytes.size());
}
//! Lower byte immediate operations for v16i8 vectors:
static SDValue
LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
@ -2291,26 +2151,24 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
}
if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
uint64_t VectorBits[2];
uint64_t UndefBits[2];
uint64_t SplatBits, SplatUndef;
int SplatSize;
BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
&& isConstantSplat(VectorBits, UndefBits,
VT.getVectorElementType().getSizeInBits(),
SplatBits, SplatUndef, SplatSize)) {
SDValue tcVec[16];
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, minSplatBits)
&& minSplatBits <= SplatBitSize) {
uint64_t SplatBits = APSplatBits.getZExtValue();
SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
// Turn the BUILD_VECTOR into a set of target constants:
for (size_t i = 0; i < tcVecSize; ++i)
tcVec[i] = tc;
SmallVector<SDValue, 16> tcVec;
tcVec.assign(16, tc);
return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
tcVec, tcVecSize));
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
}
}
@ -2452,7 +2310,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
}
return Op; // return unmolested, legalized op
return SDValue();
}
//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
@ -2478,7 +2336,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
}
return Op; // return unmolested, legalized
return SDValue();
}
//! Lower ISD::SETCC

View File

@ -78,11 +78,9 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
const SPUTargetMachine &TM);
SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
//! Simplify a MVT::v2i64 constant splat to CellSPU-ready form
SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
DebugLoc dl);
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl);
SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl);
}
class SPUTargetMachine; // forward dec'l.

View File

@ -60,9 +60,6 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned& SrcSR, unsigned& DstSR) const {
SrcSR = DstSR = 0; // No sub-registers.
// Primarily, ORI and OR are generated by copyRegToReg. But, there are other
// cases where we can safely say that what's being done is really a move
// (see how PowerPC does this -- it's the model for this code too.)
switch (MI.getOpcode()) {
default:
break;
@ -167,7 +164,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
MI.getOperand(1).isReg() &&
"invalid SPU OR<type>_<vec> or LR instruction!");
if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
sourceReg = MI.getOperand(0).getReg();
sourceReg = MI.getOperand(1).getReg();
destReg = MI.getOperand(0).getReg();
return true;
}

View File

@ -1258,10 +1258,9 @@ multiclass BitwiseAnd
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* Intentionally does not match a pattern */]>;
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
[/* Intentionally does not match a pattern */]>;
// Could use v4i32, but won't for clarity
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
@ -1288,10 +1287,11 @@ class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
IntegerOp, pattern>;
class ANDCVecInst<ValueType vectype>:
class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
(vnot (vectype VECREG:$rB))))]>;
[(set (vectype VECREG:$rT),
(and (vectype VECREG:$rA),
(vnot_frag (vectype VECREG:$rB))))]>;
class ANDCRegInst<RegisterClass rclass>:
ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
@ -1309,6 +1309,9 @@ multiclass AndComplement
def r32: ANDCRegInst<R32C>;
def r16: ANDCRegInst<R16C>;
def r8: ANDCRegInst<R8C>;
// Sometimes, the xor pattern has a bitcast constant:
def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
}
defm ANDC : AndComplement;
@ -1480,6 +1483,17 @@ multiclass BitwiseOr
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
[/* no pattern */]>;
// OR instructions used to negate f32 and f64 quantities.
def fneg32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* no pattern */]>;
def fneg64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
[/* no pattern */]>;
def fnegvec: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* no pattern, see fneg{32,64} */]>;
// scalar->vector promotion, prefslot2vec:
def v16i8_i8: ORPromoteScalar<R8C>;
def v8i16_i16: ORPromoteScalar<R16C>;
@ -1783,18 +1797,6 @@ multiclass BitwiseExclusiveOr
def r32: XORRegInst<R32C>;
def r16: XORRegInst<R16C>;
def r8: XORRegInst<R8C>;
// Special forms for floating point instructions.
// fneg and fabs require bitwise logical ops to manipulate the sign bit.
def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* no pattern */]>;
def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
[/* no pattern */]>;
def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* no pattern, see fneg{32,64} */]>;
}
defm XOR : BitwiseExclusiveOr;
@ -4239,33 +4241,36 @@ def FMSv2f64 :
(fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
(v2f64 VECREG:$rC)))]>;
// FNMS: - (a * b - c)
// DFNMS: - (a * b - c)
// - (a * b) + c => c - (a * b)
def FNMSf64 :
RRForm<0b01111010110, (outs R64FP:$rT),
(ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
"dfnms\t$rT, $rA, $rB", DPrecFP,
[(set R64FP:$rT, (fsub R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
DPrecFP, pattern>,
RegConstraint<"$rC = $rT">,
NoEncode<"$rC">;
def : Pat<(fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)),
(FNMSf64 R64FP:$rA, R64FP:$rB, R64FP:$rC)>;
class DFNMSVecInst<list<dag> pattern>:
DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
pattern>;
def FNMSv2f64 :
RRForm<0b01111010110, (outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
"dfnms\t$rT, $rA, $rB", DPrecFP,
[(set (v2f64 VECREG:$rT),
(fsub (v2f64 VECREG:$rC),
(fmul (v2f64 VECREG:$rA),
(v2f64 VECREG:$rB))))]>,
RegConstraint<"$rC = $rT">,
NoEncode<"$rC">;
class DFNMSRegInst<list<dag> pattern>:
DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
pattern>;
def : Pat<(fneg (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
(v2f64 VECREG:$rC))),
(FNMSv2f64 VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
multiclass DFMultiplySubtract
{
def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
(fsub (v2f64 VECREG:$rC),
(fmul (v2f64 VECREG:$rA),
(v2f64 VECREG:$rB))))]>;
def f64 : DFNMSRegInst<[(set R64FP:$rT,
(fsub R64FP:$rC,
(fmul R64FP:$rA, R64FP:$rB)))]>;
}
defm DFNMS : DFMultiplySubtract;
// - (a * b + c)
// - (a * b) - c
@ -4293,35 +4298,21 @@ def FNMAv2f64 :
//===----------------------------------------------------------------------==//
def : Pat<(fneg (v4f32 VECREG:$rA)),
(XORfnegvec (v4f32 VECREG:$rA),
(v4f32 (ILHUv4i32 0x8000)))>;
(ORfnegvec (v4f32 VECREG:$rA),
(v4f32 (ILHUv4i32 0x8000)))>;
def : Pat<(fneg R32FP:$rA),
(XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
def : Pat<(fneg (v2f64 VECREG:$rA)),
(XORfnegvec (v2f64 VECREG:$rA),
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80)))>;
def : Pat<(fneg R64FP:$rA),
(XORfneg64 R64FP:$rA,
(ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80))>;
(ORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
// Floating point absolute value
// Note: f64 fabs is custom-selected.
def : Pat<(fabs R32FP:$rA),
(ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
def : Pat<(fabs (v4f32 VECREG:$rA)),
(ANDfabsvec (v4f32 VECREG:$rA),
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
def : Pat<(fabs R64FP:$rA),
(ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
def : Pat<(fabs (v2f64 VECREG:$rA)),
(ANDfabsvec (v2f64 VECREG:$rA),
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
(IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
//===----------------------------------------------------------------------===//
// Hint for branch instructions:

View File

@ -8,11 +8,11 @@ target triple = "spu"
define double @__floatunsidf(i32 %arg_a) nounwind {
entry:
%in = alloca %struct.fp_number_type, align 8 ; <%struct.fp_number_type*> [#uses=5]
%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 ; <i32*> [#uses=1]
%in = alloca %struct.fp_number_type, align 16
%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
store i32 0, i32* %0, align 4
%1 = icmp eq i32 %arg_a, 0 ; <i1> [#uses=1]
%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 ; <i32*> [#uses=2]
%1 = icmp eq i32 %arg_a, 0
%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
br i1 %1, label %bb, label %bb1
bb: ; preds = %entry
@ -26,6 +26,6 @@ bb7: ; preds = %bb5, %bb1, %bb
ret double 1.0
}
declare i32 @llvm.ctlz.i32(i32) nounwind readnone
; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
declare double @__pack_d(%struct.fp_number_type*)

View File

@ -1,9 +1,7 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep fsmbi %t1.s | count 3
; RUN: grep 32768 %t1.s | count 2
; RUN: grep xor %t1.s | count 4
; RUN: grep and %t1.s | count 5
; RUN: grep andbi %t1.s | count 3
; RUN: grep or %t1.s | count 4
; RUN: grep and %t1.s | count 2
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"