mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 18:31:23 +00:00
- Various '#if 0' cleanups.
- Move v4i32, i32 mul into SPUInstrInfo.td, with a few more instruction cleanups there as well. - Make SMUL_LOHI, UMUL_LOHI competely illegal for Cell SPU, to better assist Chris to see the problem in bug 3101. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61464 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
845145f8b5
commit
1df30c4061
@ -124,6 +124,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
|
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
|
||||||
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
|
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
|
||||||
|
|
||||||
|
// SMUL_LOHI, UMUL_LOHI are not legal for Cell:
|
||||||
|
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||||
|
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||||
|
|
||||||
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
|
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
|
||||||
MVT StoreVT = (MVT::SimpleValueType) stype;
|
MVT StoreVT = (MVT::SimpleValueType) stype;
|
||||||
setTruncStoreAction(VT, StoreVT, Expand);
|
setTruncStoreAction(VT, StoreVT, Expand);
|
||||||
@ -207,7 +211,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
|
|
||||||
// Custom lower i8, i32 and i64 multiplications
|
// Custom lower i8, i32 and i64 multiplications
|
||||||
setOperationAction(ISD::MUL, MVT::i8, Custom);
|
setOperationAction(ISD::MUL, MVT::i8, Custom);
|
||||||
setOperationAction(ISD::MUL, MVT::i32, Custom);
|
setOperationAction(ISD::MUL, MVT::i32, Legal);
|
||||||
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
|
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
|
||||||
|
|
||||||
// Need to custom handle (some) common i8, i64 math ops
|
// Need to custom handle (some) common i8, i64 math ops
|
||||||
@ -239,8 +243,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
|
|
||||||
setOperationAction(ISD::SETCC, MVT::i8, Legal);
|
setOperationAction(ISD::SETCC, MVT::i8, Legal);
|
||||||
setOperationAction(ISD::SETCC, MVT::i16, Legal);
|
setOperationAction(ISD::SETCC, MVT::i16, Legal);
|
||||||
setOperationAction(ISD::SETCC, MVT::i32, Custom);
|
setOperationAction(ISD::SETCC, MVT::i32, Legal);
|
||||||
setOperationAction(ISD::SETCC, MVT::i64, Custom);
|
setOperationAction(ISD::SETCC, MVT::i64, Legal);
|
||||||
|
|
||||||
// Zero extension and sign extension for i64 have to be
|
// Zero extension and sign extension for i64 have to be
|
||||||
// custom legalized
|
// custom legalized
|
||||||
@ -362,12 +366,15 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
|
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
|
|
||||||
setOperationAction(ISD::AND, MVT::v16i8, Custom);
|
setOperationAction(ISD::AND, MVT::v16i8, Custom);
|
||||||
setOperationAction(ISD::OR, MVT::v16i8, Custom);
|
setOperationAction(ISD::OR, MVT::v16i8, Custom);
|
||||||
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
|
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
|
||||||
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
|
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
|
||||||
|
|
||||||
|
// FIXME: This is only temporary until I put all vector multiplications in
|
||||||
|
// SPUInstrInfo.td:
|
||||||
|
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
||||||
|
|
||||||
setShiftAmountType(MVT::i32);
|
setShiftAmountType(MVT::i32);
|
||||||
setBooleanContents(ZeroOrNegativeOneBooleanContent);
|
setBooleanContents(ZeroOrNegativeOneBooleanContent);
|
||||||
|
|
||||||
@ -402,7 +409,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||||||
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
|
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
|
||||||
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
|
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
|
||||||
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
||||||
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR";
|
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
|
||||||
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
||||||
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
|
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
|
||||||
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
|
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
|
||||||
@ -1197,9 +1204,18 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
// address pairs:
|
// address pairs:
|
||||||
Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
|
Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
|
||||||
}
|
}
|
||||||
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
|
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
|
||||||
Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
|
MVT CalleeVT = Callee.getValueType();
|
||||||
else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
|
SDValue Zero = DAG.getConstant(0, PtrVT);
|
||||||
|
SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
|
||||||
|
Callee.getValueType());
|
||||||
|
|
||||||
|
if (!ST->usingLargeMem()) {
|
||||||
|
Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
|
||||||
|
} else {
|
||||||
|
Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
|
||||||
|
}
|
||||||
|
} else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
|
||||||
// If this is an absolute destination address that appears to be a legal
|
// If this is an absolute destination address that appears to be a legal
|
||||||
// local store address, use the munged value.
|
// local store address, use the munged value.
|
||||||
Callee = SDValue(Dest, 0);
|
Callee = SDValue(Dest, 0);
|
||||||
@ -1915,17 +1931,8 @@ static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
|
|||||||
abort();
|
abort();
|
||||||
/*NOTREACHED*/
|
/*NOTREACHED*/
|
||||||
|
|
||||||
case MVT::v4i32: {
|
case MVT::v4i32:
|
||||||
SDValue rA = Op.getOperand(0);
|
|
||||||
SDValue rB = Op.getOperand(1);
|
|
||||||
SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
|
|
||||||
SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
|
|
||||||
SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
|
|
||||||
SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
|
|
||||||
|
|
||||||
return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
// Multiply two v8i16 vectors (pipeline friendly version):
|
// Multiply two v8i16 vectors (pipeline friendly version):
|
||||||
// a) multiply lower halves, mask off upper 16-bit of 32-bit product
|
// a) multiply lower halves, mask off upper 16-bit of 32-bit product
|
||||||
@ -2630,32 +2637,6 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
|
|||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Lower i32 multiplication
|
|
||||||
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
|
|
||||||
unsigned Opc) {
|
|
||||||
switch (VT.getSimpleVT()) {
|
|
||||||
default:
|
|
||||||
cerr << "CellSPU: Unknown LowerMUL value type, got "
|
|
||||||
<< Op.getValueType().getMVTString()
|
|
||||||
<< "\n";
|
|
||||||
abort();
|
|
||||||
/*NOTREACHED*/
|
|
||||||
|
|
||||||
case MVT::i32: {
|
|
||||||
SDValue rA = Op.getOperand(0);
|
|
||||||
SDValue rB = Op.getOperand(1);
|
|
||||||
|
|
||||||
return DAG.getNode(ISD::ADD, MVT::i32,
|
|
||||||
DAG.getNode(ISD::ADD, MVT::i32,
|
|
||||||
DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
|
|
||||||
DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
|
|
||||||
DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Custom lowering for CTPOP (count population)
|
//! Custom lowering for CTPOP (count population)
|
||||||
/*!
|
/*!
|
||||||
Custom lowering code that counts the number ones in the input
|
Custom lowering code that counts the number ones in the input
|
||||||
@ -2951,8 +2932,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||||||
return LowerVectorMUL(Op, DAG);
|
return LowerVectorMUL(Op, DAG);
|
||||||
else if (VT == MVT::i8)
|
else if (VT == MVT::i8)
|
||||||
return LowerI8Math(Op, DAG, Opc, *this);
|
return LowerI8Math(Op, DAG, Opc, *this);
|
||||||
else
|
|
||||||
return LowerMUL(Op, DAG, VT, Opc);
|
|
||||||
|
|
||||||
case ISD::FDIV:
|
case ISD::FDIV:
|
||||||
if (VT == MVT::f32 || VT == MVT::v4f32)
|
if (VT == MVT::f32 || VT == MVT::v4f32)
|
||||||
@ -3160,9 +3139,9 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||||||
case ISD::ANY_EXTEND:
|
case ISD::ANY_EXTEND:
|
||||||
case ISD::ZERO_EXTEND:
|
case ISD::ZERO_EXTEND:
|
||||||
case ISD::SIGN_EXTEND: {
|
case ISD::SIGN_EXTEND: {
|
||||||
// (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
|
// (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
|
||||||
// <arg>
|
// <arg>
|
||||||
// but only if the SPUpromote_scalar and <arg> types match.
|
// but only if the SPUprefslot2vec and <arg> types match.
|
||||||
SDValue Op00 = Op0.getOperand(0);
|
SDValue Op00 = Op0.getOperand(0);
|
||||||
if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
|
if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
|
||||||
SDValue Op000 = Op00.getOperand(0);
|
SDValue Op000 = Op00.getOperand(0);
|
||||||
@ -3173,7 +3152,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SPUISD::VEC2PREFSLOT: {
|
case SPUISD::VEC2PREFSLOT: {
|
||||||
// (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
|
// (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
|
||||||
// <arg>
|
// <arg>
|
||||||
Result = Op0.getOperand(0);
|
Result = Op0.getOperand(0);
|
||||||
break;
|
break;
|
||||||
|
@ -585,23 +585,29 @@ def AHIr16:
|
|||||||
"ahi\t$rT, $rA, $val", IntegerOp,
|
"ahi\t$rT, $rA, $val", IntegerOp,
|
||||||
[(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
|
[(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
|
||||||
|
|
||||||
def Avec:
|
class AInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RRForm<0b00000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
RRForm<0b00000011000, OOL, IOL,
|
||||||
"a\t$rT, $rA, $rB", IntegerOp,
|
"a\t$rT, $rA, $rB", IntegerOp,
|
||||||
[(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
|
pattern>;
|
||||||
|
|
||||||
def : Pat<(add (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
|
class AVecInst<ValueType vectype>:
|
||||||
(Avec VECREG:$rA, VECREG:$rB)>;
|
AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||||
|
[(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
|
||||||
|
(vectype VECREG:$rB)))]>;
|
||||||
|
|
||||||
def Ar32:
|
class ARegInst<RegisterClass rclass>:
|
||||||
RRForm<0b00000011000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
|
||||||
"a\t$rT, $rA, $rB", IntegerOp,
|
[(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
|
||||||
[(set R32C:$rT, (add R32C:$rA, R32C:$rB))]>;
|
|
||||||
|
|
||||||
def Ar8:
|
multiclass AddInstruction {
|
||||||
RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
|
def v4i32: AVecInst<v4i32>;
|
||||||
"a\t$rT, $rA, $rB", IntegerOp,
|
def v16i8: AVecInst<v16i8>;
|
||||||
[/* no pattern */]>;
|
|
||||||
|
def r32: ARegInst<R32C>;
|
||||||
|
def r8: AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
defm A : AddInstruction;
|
||||||
|
|
||||||
def AIvec:
|
def AIvec:
|
||||||
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
|
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
|
||||||
@ -789,7 +795,7 @@ def BGXvec:
|
|||||||
def MPYv8i16:
|
def MPYv8i16:
|
||||||
RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||||
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
[(set (v8i16 VECREG:$rT), (SPUmpy_v8i16 (v8i16 VECREG:$rA),
|
[(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA),
|
||||||
(v8i16 VECREG:$rB)))]>;
|
(v8i16 VECREG:$rB)))]>;
|
||||||
|
|
||||||
def MPYr16:
|
def MPYr16:
|
||||||
@ -797,88 +803,101 @@ def MPYr16:
|
|||||||
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
[(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
|
[(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
|
||||||
|
|
||||||
def MPYUv4i32:
|
// Unsigned 16-bit multiply:
|
||||||
RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
|
||||||
|
class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
|
RRForm<0b00110011110, OOL, IOL,
|
||||||
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
|
pattern>;
|
||||||
|
|
||||||
|
def MPYUv4i32:
|
||||||
|
MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||||
[(set (v4i32 VECREG:$rT),
|
[(set (v4i32 VECREG:$rT),
|
||||||
(SPUmpyu_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
|
(SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
|
||||||
|
|
||||||
def MPYUr16:
|
def MPYUr16:
|
||||||
RRForm<0b00110011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
|
MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
|
||||||
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
|
[(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
|
||||||
[(set R32C:$rT, (mul (zext R16C:$rA),
|
|
||||||
(zext R16C:$rB)))]>;
|
|
||||||
|
|
||||||
def MPYUr32:
|
def MPYUr32:
|
||||||
RRForm<0b00110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
||||||
"mpyu\t$rT, $rA, $rB", IntegerMulDiv,
|
[(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>;
|
||||||
[(set R32C:$rT, (SPUmpyu_i32 R32C:$rA, R32C:$rB))]>;
|
|
||||||
|
|
||||||
// mpyi: multiply 16 x s10imm -> 32 result (custom lowering for 32 bit result,
|
// mpyi: multiply 16 x s10imm -> 32 result.
|
||||||
// this only produces the lower 16 bits)
|
|
||||||
def MPYIvec:
|
class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
|
RI10Form<0b00101110, OOL, IOL,
|
||||||
"mpyi\t$rT, $rA, $val", IntegerMulDiv,
|
"mpyi\t$rT, $rA, $val", IntegerMulDiv,
|
||||||
[(set (v8i16 VECREG:$rT), (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
|
pattern>;
|
||||||
|
|
||||||
|
def MPYIvec:
|
||||||
|
MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
|
||||||
|
[(set (v8i16 VECREG:$rT),
|
||||||
|
(mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
|
||||||
|
|
||||||
def MPYIr16:
|
def MPYIr16:
|
||||||
RI10Form<0b00101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
|
MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
|
||||||
"mpyi\t$rT, $rA, $val", IntegerMulDiv,
|
|
||||||
[(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
|
[(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
|
||||||
|
|
||||||
// mpyui: same issues as other multiplies, plus, this doesn't match a
|
// mpyui: same issues as other multiplies, plus, this doesn't match a
|
||||||
// pattern... but may be used during target DAG selection or lowering
|
// pattern... but may be used during target DAG selection or lowering
|
||||||
def MPYUIvec:
|
|
||||||
RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
|
class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
|
RI10Form<0b10101110, OOL, IOL,
|
||||||
"mpyui\t$rT, $rA, $val", IntegerMulDiv,
|
"mpyui\t$rT, $rA, $val", IntegerMulDiv,
|
||||||
|
pattern>;
|
||||||
|
|
||||||
|
def MPYUIvec:
|
||||||
|
MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
|
||||||
[]>;
|
[]>;
|
||||||
|
|
||||||
def MPYUIr16:
|
def MPYUIr16:
|
||||||
RI10Form<0b10101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
|
MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
|
||||||
"mpyui\t$rT, $rA, $val", IntegerMulDiv,
|
|
||||||
[]>;
|
[]>;
|
||||||
|
|
||||||
// mpya: 16 x 16 + 16 -> 32 bit result
|
// mpya: 16 x 16 + 16 -> 32 bit result
|
||||||
def MPYAvec:
|
class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
RRRForm<0b0011, OOL, IOL,
|
||||||
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
||||||
[(set (v4i32 VECREG:$rT), (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
|
pattern>;
|
||||||
|
|
||||||
|
def MPYAvec:
|
||||||
|
MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
||||||
|
[(set (v4i32 VECREG:$rT),
|
||||||
|
(add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
|
||||||
(v8i16 VECREG:$rB)))),
|
(v8i16 VECREG:$rB)))),
|
||||||
(v4i32 VECREG:$rC)))]>;
|
(v4i32 VECREG:$rC)))]>;
|
||||||
|
|
||||||
def MPYAr32:
|
def MPYAr32:
|
||||||
RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
|
MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
|
||||||
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
|
||||||
[(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
|
[(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
|
||||||
R32C:$rC))]>;
|
R32C:$rC))]>;
|
||||||
|
|
||||||
def : Pat<(add (mul (sext R16C:$rA), (sext R16C:$rB)), R32C:$rC),
|
def MPYAr32_sext:
|
||||||
(MPYAr32 R16C:$rA, R16C:$rB, R32C:$rC)>;
|
MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
|
||||||
|
[(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
|
||||||
|
R32C:$rC))]>;
|
||||||
|
|
||||||
def MPYAr32_sextinreg:
|
def MPYAr32_sextinreg:
|
||||||
RRRForm<0b0011, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
|
MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
|
||||||
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
|
||||||
[(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
|
[(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
|
||||||
(sext_inreg R32C:$rB, i16)),
|
(sext_inreg R32C:$rB, i16)),
|
||||||
R32C:$rC))]>;
|
R32C:$rC))]>;
|
||||||
|
|
||||||
//def MPYAr32:
|
|
||||||
// RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
|
|
||||||
// "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
|
||||||
// [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
|
|
||||||
// R32C:$rC))]>;
|
|
||||||
|
|
||||||
// mpyh: multiply high, used to synthesize 32-bit multiplies
|
// mpyh: multiply high, used to synthesize 32-bit multiplies
|
||||||
def MPYHv4i32:
|
class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
RRForm<0b10100011110, OOL, IOL,
|
||||||
"mpyh\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpyh\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
|
pattern>;
|
||||||
|
|
||||||
|
def MPYHv4i32:
|
||||||
|
MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||||
[(set (v4i32 VECREG:$rT),
|
[(set (v4i32 VECREG:$rT),
|
||||||
(SPUmpyh_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
|
(SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
|
||||||
|
|
||||||
def MPYHr32:
|
def MPYHr32:
|
||||||
RRForm<0b10100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
||||||
"mpyh\t$rT, $rA, $rB", IntegerMulDiv,
|
[(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>;
|
||||||
[(set R32C:$rT, (SPUmpyh_i32 R32C:$rA, R32C:$rB))]>;
|
|
||||||
|
|
||||||
// mpys: multiply high and shift right (returns the top half of
|
// mpys: multiply high and shift right (returns the top half of
|
||||||
// a 16-bit multiply, sign extended to 32 bits.)
|
// a 16-bit multiply, sign extended to 32 bits.)
|
||||||
@ -898,7 +917,7 @@ def MPYHHv8i16:
|
|||||||
RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||||
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
[(set (v8i16 VECREG:$rT),
|
[(set (v8i16 VECREG:$rT),
|
||||||
(SPUmpyhh_v8i16 (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
|
(SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
|
||||||
|
|
||||||
def MPYHHr32:
|
def MPYHHr32:
|
||||||
RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
|
||||||
@ -938,7 +957,26 @@ def MPYHHAUr32:
|
|||||||
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
[]>;
|
[]>;
|
||||||
|
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
// v4i32, i32 multiply instruction sequence:
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
def MPYv4i32:
|
||||||
|
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
|
||||||
|
(Av4i32
|
||||||
|
(Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
|
||||||
|
(MPYHv4i32 VECREG:$rB, VECREG:$rA)),
|
||||||
|
(MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
|
||||||
|
|
||||||
|
def MPYi32:
|
||||||
|
Pat<(mul R32C:$rA, R32C:$rB),
|
||||||
|
(Ar32
|
||||||
|
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
|
||||||
|
(MPYHr32 R32C:$rB, R32C:$rA)),
|
||||||
|
(MPYUr32 R32C:$rA, R32C:$rB))>;
|
||||||
|
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
// clz: Count leading zeroes
|
// clz: Count leading zeroes
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
|
class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
|
RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
|
||||||
IntegerOp, pattern>;
|
IntegerOp, pattern>;
|
||||||
@ -1803,8 +1841,8 @@ class SELBVecCondInst<ValueType vectype>:
|
|||||||
class SELBRegInst<RegisterClass rclass>:
|
class SELBRegInst<RegisterClass rclass>:
|
||||||
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
|
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
|
||||||
[(set rclass:$rT,
|
[(set rclass:$rT,
|
||||||
(or (and rclass:$rA, rclass:$rC),
|
(or (and rclass:$rB, rclass:$rC),
|
||||||
(and rclass:$rB, (not rclass:$rC))))]>;
|
(and rclass:$rA, (not rclass:$rC))))]>;
|
||||||
|
|
||||||
class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
|
class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
|
||||||
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
|
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
|
||||||
@ -3442,6 +3480,13 @@ let isCall = 1,
|
|||||||
BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
|
BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Support calls to external symbols:
|
||||||
|
def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
|
||||||
|
(BRSL texternalsym:$func)>;
|
||||||
|
|
||||||
|
def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
|
||||||
|
(BRASL texternalsym:$func)>;
|
||||||
|
|
||||||
// Unconditional branches:
|
// Unconditional branches:
|
||||||
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
|
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
|
||||||
def BR :
|
def BR :
|
||||||
|
@ -35,17 +35,12 @@ def SDT_SPUshuffle : SDTypeProfile<1, 3, [
|
|||||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
// Unary, binary v16i8 operator type constraints:
|
// Vector binary operator type constraints (needs a further constraint to
|
||||||
def SPUv16i8_binop: SDTypeProfile<1, 2, [
|
// ensure that operand 0 is a vector...):
|
||||||
SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
|
||||||
|
|
||||||
// Binary v8i16 operator type constraints:
|
def SPUVecBinop: SDTypeProfile<1, 2, [
|
||||||
def SPUv8i16_binop: SDTypeProfile<1, 2, [
|
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||||
SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
]>;
|
||||||
|
|
||||||
// Binary v4i32 operator type constraints:
|
|
||||||
def SPUv4i32_binop: SDTypeProfile<1, 2, [
|
|
||||||
SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
|
||||||
|
|
||||||
// Trinary operators, e.g., addx, carry generate
|
// Trinary operators, e.g., addx, carry generate
|
||||||
def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
|
def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
|
||||||
@ -93,23 +88,22 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
|
|||||||
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
|
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
|
||||||
|
|
||||||
// SPU 16-bit multiply
|
// SPU 16-bit multiply
|
||||||
def SPUmpy_v16i8: SDNode<"SPUISD::MPY", SPUv16i8_binop, []>;
|
def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>;
|
||||||
def SPUmpy_v8i16: SDNode<"SPUISD::MPY", SPUv8i16_binop, []>;
|
|
||||||
def SPUmpy_v4i32: SDNode<"SPUISD::MPY", SPUv4i32_binop, []>;
|
|
||||||
|
|
||||||
// SPU multiply unsigned, used in instruction lowering for v4i32
|
// SPU multiply unsigned, used in instruction lowering for v4i32
|
||||||
// multiplies:
|
// multiplies:
|
||||||
def SPUmpyu_v4i32: SDNode<"SPUISD::MPYU", SPUv4i32_binop, []>;
|
def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>;
|
||||||
def SPUmpyu_i32: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
|
def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
|
||||||
|
|
||||||
// SPU 16-bit multiply high x low, shift result 16-bits
|
// SPU 16-bit multiply high x low, shift result 16-bits
|
||||||
// Used to compute intermediate products for 32-bit multiplies
|
// Used to compute intermediate products for 32-bit multiplies
|
||||||
def SPUmpyh_v4i32: SDNode<"SPUISD::MPYH", SPUv4i32_binop, []>;
|
def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>;
|
||||||
def SPUmpyh_i32: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
|
def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
|
||||||
|
|
||||||
// SPU 16-bit multiply high x high, 32-bit product
|
// SPU 16-bit multiply high x high, 32-bit product
|
||||||
// Used to compute intermediate products for 16-bit multiplies
|
// Used to compute intermediate products for 16-bit multiplies
|
||||||
def SPUmpyhh_v8i16: SDNode<"SPUISD::MPYHH", SPUv8i16_binop, []>;
|
def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>;
|
||||||
|
def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>;
|
||||||
|
|
||||||
// Shift left quadword by bits and bytes
|
// Shift left quadword by bits and bytes
|
||||||
def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
|
def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
|
||||||
|
Loading…
Reference in New Issue
Block a user