mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
CellSPU:
- First patch from Nehal Desai, a new contributor at Aerospace. Nehal's patch fixes sign/zero/any-extending loads for integers and floating point. Example code, compiled w/o debugging or optimization where he first noticed the bug: int main(void) { float a = 99.0; printf("%d\n", a); return 0; } Verified that this code actually works on a Cell SPU. Changes by Scott Michel: - Fix bug in the value type list constructed by SPUISD::LDRESULT to include both the load result's result and chain, not just the chain alone. - Simplify LowerLOAD and remove extraneous and unnecessary chains. - Remove unused SPUISD pseudo instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60526 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
80e201b8db
commit
30ee7df71c
@ -676,7 +676,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
|||||||
|
|
||||||
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Zero, Chain);
|
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Zero, Chain);
|
||||||
} else {
|
} else {
|
||||||
Result = CurDAG->getTargetNode(Opc, MVT::Other, Arg, Arg, Chain);
|
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain);
|
||||||
}
|
}
|
||||||
|
|
||||||
Chain = SDValue(Result, 1);
|
Chain = SDValue(Result, 1);
|
||||||
|
@ -436,12 +436,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||||||
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
||||||
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
|
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
|
||||||
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
||||||
node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
|
|
||||||
= "SPUISD::VEC2PREFSLOT_CHAINED";
|
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
|
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
|
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
|
|
||||||
node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
|
|
||||||
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
|
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
|
||||||
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
|
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
|
||||||
node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
|
node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
|
||||||
@ -458,8 +452,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||||||
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
|
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
|
||||||
"SPUISD::ROTQUAD_RZ_BITS";
|
"SPUISD::ROTQUAD_RZ_BITS";
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
|
|
||||||
"SPUISD::ROTBYTES_LEFT_CHAINED";
|
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
|
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
|
||||||
"SPUISD::ROTBYTES_LEFT_BITS";
|
"SPUISD::ROTBYTES_LEFT_BITS";
|
||||||
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
|
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
|
||||||
@ -597,13 +589,24 @@ AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
|
|||||||
/*!
|
/*!
|
||||||
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
|
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
|
||||||
within a 16-byte block, we have to rotate to extract the requested element.
|
within a 16-byte block, we have to rotate to extract the requested element.
|
||||||
*/
|
|
||||||
|
For extending loads, we also want to ensure that the following sequence is
|
||||||
|
emitted, e.g. for MVT::f32 extending load to MVT::f64:
|
||||||
|
|
||||||
|
\verbatim
|
||||||
|
%1 v16i8,ch = load
|
||||||
|
%2 v16i8,ch = rotate %1
|
||||||
|
%3 v4f8, ch = bitconvert %2
|
||||||
|
%4 f32 = vec2perfslot %3
|
||||||
|
%5 f64 = fp_extend %4
|
||||||
|
\endverbatim
|
||||||
|
*/
|
||||||
static SDValue
|
static SDValue
|
||||||
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||||
LoadSDNode *LN = cast<LoadSDNode>(Op);
|
LoadSDNode *LN = cast<LoadSDNode>(Op);
|
||||||
SDValue the_chain = LN->getChain();
|
SDValue the_chain = LN->getChain();
|
||||||
MVT VT = LN->getMemoryVT();
|
MVT InVT = LN->getMemoryVT();
|
||||||
MVT OpVT = Op.getNode()->getValueType(0);
|
MVT OutVT = Op.getValueType();
|
||||||
ISD::LoadExtType ExtType = LN->getExtensionType();
|
ISD::LoadExtType ExtType = LN->getExtensionType();
|
||||||
unsigned alignment = LN->getAlignment();
|
unsigned alignment = LN->getAlignment();
|
||||||
SDValue Ops[8];
|
SDValue Ops[8];
|
||||||
@ -613,7 +616,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
int offset, rotamt;
|
int offset, rotamt;
|
||||||
bool was16aligned;
|
bool was16aligned;
|
||||||
SDValue result =
|
SDValue result =
|
||||||
AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
|
AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
|
||||||
|
was16aligned);
|
||||||
|
|
||||||
if (result.getNode() == 0)
|
if (result.getNode() == 0)
|
||||||
return result;
|
return result;
|
||||||
@ -625,57 +629,40 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
if (rotamt != 0 || !was16aligned) {
|
if (rotamt != 0 || !was16aligned) {
|
||||||
SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
|
SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
|
||||||
|
|
||||||
Ops[0] = the_chain;
|
Ops[0] = result;
|
||||||
Ops[1] = result;
|
|
||||||
if (was16aligned) {
|
if (was16aligned) {
|
||||||
Ops[2] = DAG.getConstant(rotamt, MVT::i16);
|
Ops[1] = DAG.getConstant(rotamt, MVT::i16);
|
||||||
} else {
|
} else {
|
||||||
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||||
LoadSDNode *LN1 = cast<LoadSDNode>(result);
|
LoadSDNode *LN1 = cast<LoadSDNode>(result);
|
||||||
Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
|
Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
|
||||||
DAG.getConstant(rotamt, PtrVT));
|
DAG.getConstant(rotamt, PtrVT));
|
||||||
}
|
}
|
||||||
|
|
||||||
result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
|
result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
|
||||||
the_chain = result.getValue(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VT == OpVT || ExtType == ISD::EXTLOAD) {
|
|
||||||
SDVTList scalarvts;
|
|
||||||
MVT vecVT = MVT::v16i8;
|
|
||||||
|
|
||||||
// Convert the loaded v16i8 vector to the appropriate vector type
|
// Convert the loaded v16i8 vector to the appropriate vector type
|
||||||
// specified by the operand:
|
// specified by the operand:
|
||||||
if (OpVT == VT) {
|
MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
|
||||||
if (VT != MVT::i1)
|
result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
|
||||||
vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
|
DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
|
||||||
} else
|
|
||||||
vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
|
|
||||||
|
|
||||||
Ops[0] = the_chain;
|
|
||||||
Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
|
|
||||||
scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
|
|
||||||
result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
|
|
||||||
the_chain = result.getValue(1);
|
|
||||||
} else {
|
|
||||||
// Handle the sign and zero-extending loads for i1 and i8:
|
|
||||||
unsigned NewOpC;
|
|
||||||
|
|
||||||
|
// Handle extending loads by extending the scalar result:
|
||||||
if (ExtType == ISD::SEXTLOAD) {
|
if (ExtType == ISD::SEXTLOAD) {
|
||||||
NewOpC = (OpVT == MVT::i1
|
result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
|
||||||
? SPUISD::EXTRACT_I1_SEXT
|
} else if (ExtType == ISD::ZEXTLOAD) {
|
||||||
: SPUISD::EXTRACT_I8_SEXT);
|
result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
|
||||||
} else {
|
} else if (ExtType == ISD::EXTLOAD) {
|
||||||
assert(ExtType == ISD::ZEXTLOAD);
|
unsigned NewOpc = ISD::ANY_EXTEND;
|
||||||
NewOpC = (OpVT == MVT::i1
|
|
||||||
? SPUISD::EXTRACT_I1_ZEXT
|
if (OutVT.isFloatingPoint())
|
||||||
: SPUISD::EXTRACT_I8_ZEXT);
|
NewOpc = ISD::FP_EXTEND;
|
||||||
|
|
||||||
|
result = DAG.getNode(NewOpc, OutVT, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
result = DAG.getNode(NewOpC, OpVT, result);
|
SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
|
||||||
}
|
|
||||||
|
|
||||||
SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
|
|
||||||
SDValue retops[2] = {
|
SDValue retops[2] = {
|
||||||
result,
|
result,
|
||||||
the_chain
|
the_chain
|
||||||
@ -3034,10 +3021,16 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||||||
SDValue combinedConst =
|
SDValue combinedConst =
|
||||||
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
|
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
|
||||||
|
|
||||||
DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
|
#if defined(NDEBUG)
|
||||||
<< "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
|
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
|
||||||
DEBUG(cerr << "With: (SPUindirect <arg>, "
|
cerr << "\n"
|
||||||
<< CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
|
<< "Replace: (add " << CN0->getZExtValue() << ", "
|
||||||
|
<< "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
|
||||||
|
<< "With: (SPUindirect <arg>, "
|
||||||
|
<< CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
|
return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
|
||||||
Op0.getOperand(0), combinedConst);
|
Op0.getOperand(0), combinedConst);
|
||||||
}
|
}
|
||||||
@ -3071,11 +3064,14 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||||||
// (any_extend (SPUextract_elt0 <arg>)) ->
|
// (any_extend (SPUextract_elt0 <arg>)) ->
|
||||||
// (SPUextract_elt0 <arg>)
|
// (SPUextract_elt0 <arg>)
|
||||||
// Types must match, however...
|
// Types must match, however...
|
||||||
DEBUG(cerr << "Replace: ");
|
#if defined(NDEBUG)
|
||||||
DEBUG(N->dump(&DAG));
|
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
|
||||||
DEBUG(cerr << "\nWith: ");
|
cerr << "\nReplace: ";
|
||||||
DEBUG(Op0.getNode()->dump(&DAG));
|
N->dump(&DAG);
|
||||||
DEBUG(cerr << "\n");
|
cerr << "\nWith: ";
|
||||||
|
Op0.getNode()->dump(&DAG);
|
||||||
|
cerr << "\n";
|
||||||
|
#endif
|
||||||
|
|
||||||
return Op0;
|
return Op0;
|
||||||
}
|
}
|
||||||
@ -3243,8 +3239,7 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
|||||||
}
|
}
|
||||||
|
|
||||||
case SPUISD::LDRESULT:
|
case SPUISD::LDRESULT:
|
||||||
case SPUISD::VEC2PREFSLOT:
|
case SPUISD::VEC2PREFSLOT: {
|
||||||
case SPUISD::VEC2PREFSLOT_CHAINED: {
|
|
||||||
MVT OpVT = Op.getValueType();
|
MVT OpVT = Op.getValueType();
|
||||||
unsigned OpVTBits = OpVT.getSizeInBits();
|
unsigned OpVTBits = OpVT.getSizeInBits();
|
||||||
uint64_t InMask = OpVT.getIntegerVTBitMask();
|
uint64_t InMask = OpVT.getIntegerVTBitMask();
|
||||||
@ -3254,10 +3249,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
case EXTRACT_I1_ZEXT:
|
|
||||||
case EXTRACT_I1_SEXT:
|
|
||||||
case EXTRACT_I8_ZEXT:
|
|
||||||
case EXTRACT_I8_SEXT:
|
|
||||||
case MPY:
|
case MPY:
|
||||||
case MPYU:
|
case MPYU:
|
||||||
case MPYH:
|
case MPYH:
|
||||||
@ -3272,7 +3263,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
|
|||||||
case SPUISD::ROTQUAD_RZ_BYTES:
|
case SPUISD::ROTQUAD_RZ_BYTES:
|
||||||
case SPUISD::ROTQUAD_RZ_BITS:
|
case SPUISD::ROTQUAD_RZ_BITS:
|
||||||
case SPUISD::ROTBYTES_LEFT:
|
case SPUISD::ROTBYTES_LEFT:
|
||||||
case SPUISD::ROTBYTES_LEFT_CHAINED:
|
|
||||||
case SPUISD::SELECT_MASK:
|
case SPUISD::SELECT_MASK:
|
||||||
case SPUISD::SELB:
|
case SPUISD::SELB:
|
||||||
case SPUISD::FPInterp:
|
case SPUISD::FPInterp:
|
||||||
|
@ -41,11 +41,6 @@ namespace llvm {
|
|||||||
CNTB, ///< Count leading ones in bytes
|
CNTB, ///< Count leading ones in bytes
|
||||||
PROMOTE_SCALAR, ///< Promote scalar->vector
|
PROMOTE_SCALAR, ///< Promote scalar->vector
|
||||||
VEC2PREFSLOT, ///< Extract element 0
|
VEC2PREFSLOT, ///< Extract element 0
|
||||||
VEC2PREFSLOT_CHAINED, ///< Extract element 0, with chain
|
|
||||||
EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend
|
|
||||||
EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend
|
|
||||||
EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend
|
|
||||||
EXTRACT_I8_SEXT, ///< Extract element 0 as i8, sign extend
|
|
||||||
MPY, ///< 16-bit Multiply (low parts of a 32-bit)
|
MPY, ///< 16-bit Multiply (low parts of a 32-bit)
|
||||||
MPYU, ///< Multiply Unsigned
|
MPYU, ///< Multiply Unsigned
|
||||||
MPYH, ///< Multiply High
|
MPYH, ///< Multiply High
|
||||||
@ -60,7 +55,6 @@ namespace llvm {
|
|||||||
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
|
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
|
||||||
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
|
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
|
||||||
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
||||||
ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
|
|
||||||
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
||||||
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
|
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
|
||||||
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
|
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
|
||||||
|
@ -1288,39 +1288,21 @@ def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)),
|
|||||||
def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
|
||||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot_chained (v16i8 VECREG:$rA)),
|
|
||||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
|
||||||
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot_chained (v8i16 VECREG:$rA)),
|
|
||||||
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
|
||||||
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot_chained (v4i32 VECREG:$rA)),
|
|
||||||
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
|
||||||
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot_chained (v2i64 VECREG:$rA)),
|
|
||||||
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
|
||||||
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot_chained (v4f32 VECREG:$rA)),
|
|
||||||
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
|
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
|
||||||
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
||||||
|
|
||||||
def : Pat<(SPUvec2prefslot_chained (v2f64 VECREG:$rA)),
|
|
||||||
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
|
||||||
|
|
||||||
// ORC: Bitwise "or" with complement (c = a | ~b)
|
// ORC: Bitwise "or" with complement (c = a | ~b)
|
||||||
|
|
||||||
class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
|
class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
@ -2147,15 +2129,6 @@ multiclass RotateQuadLeftByBytes
|
|||||||
|
|
||||||
defm ROTQBY: RotateQuadLeftByBytes;
|
defm ROTQBY: RotateQuadLeftByBytes;
|
||||||
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB),
|
|
||||||
(ROTQBYv16i8 VECREG:$rA, R32C:$rB)>;
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v8i16 VECREG:$rA), R32C:$rB),
|
|
||||||
(ROTQBYv8i16 VECREG:$rA, R32C:$rB)>;
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v4i32 VECREG:$rA), R32C:$rB),
|
|
||||||
(ROTQBYv4i32 VECREG:$rA, R32C:$rB)>;
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), R32C:$rB),
|
|
||||||
(ROTQBYv2i64 VECREG:$rA, R32C:$rB)>;
|
|
||||||
|
|
||||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
// Rotate quad by byte (count), immediate
|
// Rotate quad by byte (count), immediate
|
||||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
@ -2179,15 +2152,6 @@ multiclass RotateQuadByBytesImm
|
|||||||
|
|
||||||
defm ROTQBYI: RotateQuadByBytesImm;
|
defm ROTQBYI: RotateQuadByBytesImm;
|
||||||
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), (i16 uimm7:$val)),
|
|
||||||
(ROTQBYIv16i8 VECREG:$rA, uimm7:$val)>;
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v8i16 VECREG:$rA), (i16 uimm7:$val)),
|
|
||||||
(ROTQBYIv8i16 VECREG:$rA, uimm7:$val)>;
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v4i32 VECREG:$rA), (i16 uimm7:$val)),
|
|
||||||
(ROTQBYIv4i32 VECREG:$rA, uimm7:$val)>;
|
|
||||||
def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), (i16 uimm7:$val)),
|
|
||||||
(ROTQBYIv2i64 VECREG:$rA, uimm7:$val)>;
|
|
||||||
|
|
||||||
// See ROTQBY note above.
|
// See ROTQBY note above.
|
||||||
class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
|
class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RI7Form<0b00110011100, OOL, IOL,
|
RI7Form<0b00110011100, OOL, IOL,
|
||||||
@ -3972,10 +3936,6 @@ def : Pat<(ret),
|
|||||||
// Zero/Any/Sign extensions
|
// Zero/Any/Sign extensions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// zext 1->32: Zero extend i1 to i32
|
|
||||||
def : Pat<(SPUextract_i1_zext R32C:$rSrc),
|
|
||||||
(ANDIr32 R32C:$rSrc, 0x1)>;
|
|
||||||
|
|
||||||
// sext 8->32: Sign extend bytes to words
|
// sext 8->32: Sign extend bytes to words
|
||||||
def : Pat<(sext_inreg R32C:$rSrc, i8),
|
def : Pat<(sext_inreg R32C:$rSrc, i8),
|
||||||
(XSHWr32 (XSBHr32 R32C:$rSrc))>;
|
(XSHWr32 (XSBHr32 R32C:$rSrc))>;
|
||||||
@ -3983,19 +3943,10 @@ def : Pat<(sext_inreg R32C:$rSrc, i8),
|
|||||||
def : Pat<(i32 (sext R8C:$rSrc)),
|
def : Pat<(i32 (sext R8C:$rSrc)),
|
||||||
(XSHWr16 (XSBHr8 R8C:$rSrc))>;
|
(XSHWr16 (XSBHr8 R8C:$rSrc))>;
|
||||||
|
|
||||||
def : Pat<(SPUextract_i8_sext VECREG:$rSrc),
|
|
||||||
(XSHWr32 (XSBHr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc),
|
|
||||||
(v4i32 VECREG:$rSrc))))>;
|
|
||||||
|
|
||||||
// zext 8->16: Zero extend bytes to halfwords
|
// zext 8->16: Zero extend bytes to halfwords
|
||||||
def : Pat<(i16 (zext R8C:$rSrc)),
|
def : Pat<(i16 (zext R8C:$rSrc)),
|
||||||
(ANDHIi8i16 R8C:$rSrc, 0xff)>;
|
(ANDHIi8i16 R8C:$rSrc, 0xff)>;
|
||||||
|
|
||||||
// zext 8->32 from preferred slot in load/store
|
|
||||||
def : Pat<(SPUextract_i8_zext VECREG:$rSrc),
|
|
||||||
(ANDIr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc), (v4i32 VECREG:$rSrc)),
|
|
||||||
0xff)>;
|
|
||||||
|
|
||||||
// zext 8->32: Zero extend bytes to words
|
// zext 8->32: Zero extend bytes to words
|
||||||
def : Pat<(i32 (zext R8C:$rSrc)),
|
def : Pat<(i32 (zext R8C:$rSrc)),
|
||||||
(ANDIi8i32 R8C:$rSrc, 0xff)>;
|
(ANDIi8i32 R8C:$rSrc, 0xff)>;
|
||||||
|
@ -125,11 +125,6 @@ def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
|
|||||||
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
|
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
|
||||||
SPUvecshift_type, []>;
|
SPUvecshift_type, []>;
|
||||||
|
|
||||||
// Same as above, but the node also has a chain associated (used in loads and
|
|
||||||
// stores)
|
|
||||||
def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED",
|
|
||||||
SPUvecshift_type, [SDNPHasChain]>;
|
|
||||||
|
|
||||||
// Vector rotate left by bytes, but the count is given in bits and the SPU
|
// Vector rotate left by bytes, but the count is given in bits and the SPU
|
||||||
// internally converts it to bytes (saves an instruction to mask off lower
|
// internally converts it to bytes (saves an instruction to mask off lower
|
||||||
// three bits)
|
// three bits)
|
||||||
@ -153,13 +148,6 @@ def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>;
|
|||||||
|
|
||||||
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
||||||
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
|
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
|
||||||
def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>;
|
|
||||||
def SPUvec2prefslot_chained: SDNode<"SPUISD::VEC2PREFSLOT_CHAINED",
|
|
||||||
SPU_vec_demote_chained, [SDNPHasChain]>;
|
|
||||||
def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>;
|
|
||||||
def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>;
|
|
||||||
def SPUextract_i8_sext: SDNode<"SPUISD::EXTRACT_I8_SEXT", SPU_vec_demote, []>;
|
|
||||||
def SPUextract_i8_zext: SDNode<"SPUISD::EXTRACT_I8_ZEXT", SPU_vec_demote, []>;
|
|
||||||
|
|
||||||
// Address high and low components, used for [r+r] type addressing
|
// Address high and low components, used for [r+r] type addressing
|
||||||
def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
|
def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user