diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e975d0d039c..0822181d3b7 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -124,6 +124,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + // SMUL_LOHI, UMUL_LOHI are not legal for Cell: + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { MVT StoreVT = (MVT::SimpleValueType) stype; setTruncStoreAction(VT, StoreVT, Expand); @@ -207,7 +211,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i32, Custom); + setOperationAction(ISD::MUL, MVT::i32, Legal); setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall // Need to custom handle (some) common i8, i64 math ops @@ -239,8 +243,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i8, Legal); setOperationAction(ISD::SETCC, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SETCC, MVT::i64, Legal); // Zero extension and sign extension for i64 have to be // custom legalized @@ -289,9 +293,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) ++sctype) { MVT VT = (MVT::SimpleValueType)sctype; - setOperationAction(ISD::GlobalAddress, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); } // RET must be custom lowered, to meet ABI requirements @@ -362,12 +366,15 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } - setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::AND, MVT::v16i8, Custom); setOperationAction(ISD::OR, MVT::v16i8, Custom); setOperationAction(ISD::XOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + // FIXME: This is only temporary until I put all vector multiplications in + // SPUInstrInfo.td: + setOperationAction(ISD::MUL, MVT::v4i32, Legal); + setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -402,7 +409,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; @@ -467,9 +474,9 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { emitted, e.g. for MVT::f32 extending load to MVT::f64: \verbatim -%1 v16i8,ch = load +%1 v16i8,ch = load %2 v16i8,ch = rotate %1 -%3 v4f8, ch = bitconvert %2 +%3 v4f8, ch = bitconvert %2 %4 f32 = vec2perfslot %3 %5 f64 = fp_extend %4 \endverbatim @@ -902,7 +909,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) { assert((FP != 0) && "LowerConstantFP: Node is not ConstantFPSDNode"); - + uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); SDValue T = DAG.getConstant(dbits, MVT::i64); SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T); @@ -936,7 +943,7 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { return DAG.getNode(ISD::BRCOND, Op.getValueType(), Op.getOperand(0), Cond, Op.getOperand(2)); } - + return SDValue(); // Unchanged } @@ -1197,9 +1204,18 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // address pairs: Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); } - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + MVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + + if (!ST->usingLargeMem()) { + Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero); + } else { + Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero); + } + } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { // If this is an absolute destination address that appears to be a legal // local store address, use the munged value. Callee = SDValue(Dest, 0); @@ -1831,7 +1847,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); } else if (rotate) { int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; - + return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(), V1, DAG.getConstant(rotamt, MVT::i16)); } else { @@ -1915,17 +1931,8 @@ static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { abort(); /*NOTREACHED*/ - case MVT::v4i32: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB); - SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA); - SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB); - SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1); - - return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2); - break; - } + case MVT::v4i32: + break; // Multiply two v8i16 vectors (pipeline friendly version): // a) multiply lower halves, mask off upper 16-bit of 32-bit product @@ -2271,7 +2278,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue result = DAG.getNode(SPUISD::SHUFB, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), - VecOp, + VecOp, DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask)); return result; @@ -2630,32 +2637,6 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { return Op; } -//! Lower i32 multiplication -static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT, - unsigned Opc) { - switch (VT.getSimpleVT()) { - default: - cerr << "CellSPU: Unknown LowerMUL value type, got " - << Op.getValueType().getMVTString() - << "\n"; - abort(); - /*NOTREACHED*/ - - case MVT::i32: { - SDValue rA = Op.getOperand(0); - SDValue rB = Op.getOperand(1); - - return DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB), - DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)), - DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB)); - } - } - - return SDValue(); -} - //! Custom lowering for CTPOP (count population) /*! Custom lowering code that counts the number ones in the input @@ -2951,8 +2932,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerVectorMUL(Op, DAG); else if (VT == MVT::i8) return LowerI8Math(Op, DAG, Opc, *this); - else - return LowerMUL(Op, DAG, VT, Opc); case ISD::FDIV: if (VT == MVT::f32 || VT == MVT::v4f32) @@ -3030,7 +3009,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const || Op1.getOpcode() == SPUISD::IndirectAddr) { // Normalize the operands to reduce repeated code SDValue IndirectArg = Op0, AddArg = Op1; - + if (Op1.getOpcode() == SPUISD::IndirectAddr) { IndirectArg = Op1; AddArg = Op0; @@ -3160,9 +3139,9 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { - // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot ))) -> + // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> // - // but only if the SPUpromote_scalar and types match. + // but only if the SPUprefslot2vec and types match. SDValue Op00 = Op0.getOperand(0); if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { SDValue Op000 = Op00.getOperand(0); @@ -3173,7 +3152,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } case SPUISD::VEC2PREFSLOT: { - // (SPUpromote_scalar (SPUvec2prefslot )) -> + // (SPUprefslot2vec (SPUvec2prefslot )) -> // Result = Op0.getOperand(0); break; @@ -3329,7 +3308,7 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, } } } - + // LowerAsmOperandForConstraint void SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 08d767684af..1abbc0a5c04 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -585,23 +585,29 @@ def AHIr16: "ahi\t$rT, $rA, $val", IntegerOp, [(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>; -def Avec: - RRForm<0b00000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "a\t$rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; +class AInst pattern>: + RRForm<0b00000011000, OOL, IOL, + "a\t$rT, $rA, $rB", IntegerOp, + pattern>; -def : Pat<(add (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), - (Avec VECREG:$rA, VECREG:$rB)>; +class AVecInst: + AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; -def Ar32: - RRForm<0b00000011000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "a\t$rT, $rA, $rB", IntegerOp, - [(set R32C:$rT, (add R32C:$rA, R32C:$rB))]>; +class ARegInst: + AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>; + +multiclass AddInstruction { + def v4i32: AVecInst; + def v16i8: AVecInst; + + def r32: ARegInst; + def r8: AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>; +} -def Ar8: - RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - "a\t$rT, $rA, $rB", IntegerOp, - [/* no pattern */]>; +defm A : AddInstruction; def AIvec: RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -789,96 +795,109 @@ def BGXvec: def MPYv8i16: RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "mpy\t$rT, $rA, $rB", IntegerMulDiv, - [(set (v8i16 VECREG:$rT), (SPUmpy_v8i16 (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; + [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; def MPYr16: RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), "mpy\t$rT, $rA, $rB", IntegerMulDiv, [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>; +// Unsigned 16-bit multiply: + +class MPYUInst pattern>: + RRForm<0b00110011110, OOL, IOL, + "mpyu\t$rT, $rA, $rB", IntegerMulDiv, + pattern>; + def MPYUv4i32: - RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyu\t$rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), - (SPUmpyu_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def MPYUr16: - RRForm<0b00110011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB), - "mpyu\t$rT, $rA, $rB", IntegerMulDiv, - [(set R32C:$rT, (mul (zext R16C:$rA), - (zext R16C:$rB)))]>; + MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>; def MPYUr32: - RRForm<0b00110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "mpyu\t$rT, $rA, $rB", IntegerMulDiv, - [(set R32C:$rT, (SPUmpyu_i32 R32C:$rA, R32C:$rB))]>; + MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>; -// mpyi: multiply 16 x s10imm -> 32 result (custom lowering for 32 bit result, -// this only produces the lower 16 bits) -def MPYIvec: - RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), +// mpyi: multiply 16 x s10imm -> 32 result. + +class MPYIInst pattern>: + RI10Form<0b00101110, OOL, IOL, "mpyi\t$rT, $rA, $val", IntegerMulDiv, - [(set (v8i16 VECREG:$rT), (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; + pattern>; + +def MPYIvec: + MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; def MPYIr16: - RI10Form<0b00101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "mpyi\t$rT, $rA, $val", IntegerMulDiv, - [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>; + MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>; // mpyui: same issues as other multiplies, plus, this doesn't match a // pattern... but may be used during target DAG selection or lowering + +class MPYUIInst pattern>: + RI10Form<0b10101110, OOL, IOL, + "mpyui\t$rT, $rA, $val", IntegerMulDiv, + pattern>; + def MPYUIvec: - RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "mpyui\t$rT, $rA, $val", IntegerMulDiv, - []>; + MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + []>; def MPYUIr16: - RI10Form<0b10101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "mpyui\t$rT, $rA, $val", IntegerMulDiv, - []>; + MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + []>; // mpya: 16 x 16 + 16 -> 32 bit result +class MPYAInst pattern>: + RRRForm<0b0011, OOL, IOL, + "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, + pattern>; + def MPYAvec: - RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))), - (v4i32 VECREG:$rC)))]>; + MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (v4i32 VECREG:$rT), + (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))), + (v4i32 VECREG:$rC)))]>; def MPYAr32: - RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), - "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, - [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), - R32C:$rC))]>; - -def : Pat<(add (mul (sext R16C:$rA), (sext R16C:$rB)), R32C:$rC), - (MPYAr32 R16C:$rA, R16C:$rB, R32C:$rC)>; + MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), + [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), + R32C:$rC))]>; + +def MPYAr32_sext: + MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), + [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)), + R32C:$rC))]>; def MPYAr32_sextinreg: - RRRForm<0b0011, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC), - "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, - [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16), - (sext_inreg R32C:$rB, i16)), - R32C:$rC))]>; - -//def MPYAr32: -// RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), -// "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, -// [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), -// R32C:$rC))]>; + MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC), + [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16), + (sext_inreg R32C:$rB, i16)), + R32C:$rC))]>; // mpyh: multiply high, used to synthesize 32-bit multiplies +class MPYHInst pattern>: + RRForm<0b10100011110, OOL, IOL, + "mpyh\t$rT, $rA, $rB", IntegerMulDiv, + pattern>; + def MPYHv4i32: - RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyh\t$rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), - (SPUmpyh_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def MPYHr32: - RRForm<0b10100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "mpyh\t$rT, $rA, $rB", IntegerMulDiv, - [(set R32C:$rT, (SPUmpyh_i32 R32C:$rA, R32C:$rB))]>; + MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>; // mpys: multiply high and shift right (returns the top half of // a 16-bit multiply, sign extended to 32 bits.) @@ -898,7 +917,7 @@ def MPYHHv8i16: RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, [(set (v8i16 VECREG:$rT), - (SPUmpyhh_v8i16 (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; def MPYHHr32: RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), @@ -938,7 +957,26 @@ def MPYHHAUr32: "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, []>; +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v4i32, i32 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +def MPYv4i32: + Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (Av4i32 + (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB), + (MPYHv4i32 VECREG:$rB, VECREG:$rA)), + (MPYUv4i32 VECREG:$rA, VECREG:$rB))>; + +def MPYi32: + Pat<(mul R32C:$rA, R32C:$rB), + (Ar32 + (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), + (MPYHr32 R32C:$rB, R32C:$rA)), + (MPYUr32 R32C:$rA, R32C:$rB))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // clz: Count leading zeroes +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ class CLZInst pattern>: RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA", IntegerOp, pattern>; @@ -1803,8 +1841,8 @@ class SELBVecCondInst: class SELBRegInst: SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC), [(set rclass:$rT, - (or (and rclass:$rA, rclass:$rC), - (and rclass:$rB, (not rclass:$rC))))]>; + (or (and rclass:$rB, rclass:$rC), + (and rclass:$rA, (not rclass:$rC))))]>; class SELBRegCondInst: SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC), @@ -3442,6 +3480,13 @@ let isCall = 1, BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>; } +// Support calls to external symbols: +def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)), + (BRSL texternalsym:$func)>; + +def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), + (BRASL texternalsym:$func)>; + // Unconditional branches: let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { def BR : diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index b22c6b5d9fe..5cf229e4b78 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -35,17 +35,12 @@ def SDT_SPUshuffle : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; -// Unary, binary v16i8 operator type constraints: -def SPUv16i8_binop: SDTypeProfile<1, 2, [ - SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +// Vector binary operator type constraints (needs a further constraint to +// ensure that operand 0 is a vector...): -// Binary v8i16 operator type constraints: -def SPUv8i16_binop: SDTypeProfile<1, 2, [ - SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; - -// Binary v4i32 operator type constraints: -def SPUv4i32_binop: SDTypeProfile<1, 2, [ - SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SPUVecBinop: SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; // Trinary operators, e.g., addx, carry generate def SPUIntTrinaryOp : SDTypeProfile<1, 3, [ @@ -93,23 +88,22 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; // SPU 16-bit multiply -def SPUmpy_v16i8: SDNode<"SPUISD::MPY", SPUv16i8_binop, []>; -def SPUmpy_v8i16: SDNode<"SPUISD::MPY", SPUv8i16_binop, []>; -def SPUmpy_v4i32: SDNode<"SPUISD::MPY", SPUv4i32_binop, []>; +def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>; // SPU multiply unsigned, used in instruction lowering for v4i32 // multiplies: -def SPUmpyu_v4i32: SDNode<"SPUISD::MPYU", SPUv4i32_binop, []>; -def SPUmpyu_i32: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>; +def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>; +def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>; // SPU 16-bit multiply high x low, shift result 16-bits // Used to compute intermediate products for 32-bit multiplies -def SPUmpyh_v4i32: SDNode<"SPUISD::MPYH", SPUv4i32_binop, []>; -def SPUmpyh_i32: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>; +def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>; +def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>; // SPU 16-bit multiply high x high, 32-bit product // Used to compute intermediate products for 16-bit multiplies -def SPUmpyhh_v8i16: SDNode<"SPUISD::MPYHH", SPUv8i16_binop, []>; +def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>; +def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>; // Shift left quadword by bits and bytes def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;