diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a6268e73400..3ec4181a2d4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4441,11 +4441,20 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) { AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1))); break; } - case ISD::SELECT: + case ISD::SELECT: { Tmp2 = PromoteOp(Node->getOperand(1)); // Legalize the op0 Tmp3 = PromoteOp(Node->getOperand(2)); // Legalize the op1 + + unsigned VT2 = Tmp2.getValueType(); + assert(VT2 == Tmp3.getValueType() + && "PromoteOp: Operands 2 and 3 ValueTypes don't match"); + // Ensure tha NVT is the same as the operands' value types, because we + // cannot assume that TLI.getSetCCValueType() is constant. + if (NVT != VT2) + NVT = VT2; Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3); break; + } case ISD::SELECT_CC: Tmp2 = PromoteOp(Node->getOperand(2)); // True Tmp3 = PromoteOp(Node->getOperand(3)); // False diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index ad797dda8ee..0a736d72035 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -219,8 +219,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i32 multiplications setOperationAction(ISD::MUL, MVT::i32, Custom); - // Need to custom handle (some) common i8 math ops + // Need to custom handle (some) common i8, i64 math ops + setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i8, Custom); + setOperationAction(ISD::SUB, MVT::i64, Custom); setOperationAction(ISD::MUL, MVT::i8, Custom); // SPU does not have BSWAP. It does have i32 support CTLZ. @@ -238,7 +240,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTLZ , MVT::i32, Legal); - // SPU has a version of select that implements (a&~c)|(b|c), just like + // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: setOperationAction(ISD::SELECT, MVT::i1, Promote); setOperationAction(ISD::SELECT, MVT::i8, Legal); @@ -427,8 +429,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] = "SPUISD::ROTBYTES_LEFT_CHAINED"; - node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = + "SPUISD::ROTBYTES_LEFT_BITS"; + node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; + node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; + node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; + node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; + node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; @@ -1706,33 +1714,33 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { } for (int i = 0; i < 4; ++i) { + uint64_t val = 0; for (int j = 0; j < 4; ++j) { SDOperand V; bool process_upper, process_lower; - uint64_t val = 0; - + val <<= 8; process_upper = (upper_special && (i & 1) == 0); process_lower = (lower_special && (i & 1) == 1); if (process_upper || process_lower) { if ((process_upper && upper == 0) || (process_lower && lower == 0)) - val = 0x80; + val |= 0x80; else if ((process_upper && upper == 0xffffffff) || (process_lower && lower == 0xffffffff)) - val = 0xc0; + val |= 0xc0; else if ((process_upper && upper == 0x80000000) || (process_lower && lower == 0x80000000)) - val = (j == 0 ? 0xe0 : 0x80); + val |= (j == 0 ? 0xe0 : 0x80); } else - val = i * 4 + j + ((i & 1) * 16); - - ShufBytes.push_back(DAG.getConstant(val, MVT::i8)); + val |= i * 4 + j + ((i & 1) * 16); } + + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); } return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &ShufBytes[0], ShufBytes.size())); } } @@ -1904,7 +1912,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) { // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) // c) Use SELB to select upper and lower halves from the intermediate results // - // NOTE: We really want to move the FSMBI to earlier to actually get the + // NOTE: We really want to move the SELECT_MASK to earlier to actually get the // dual-issue. This code does manage to do this, even if it's a little on // the wacky side case MVT::v8i16: { @@ -1918,7 +1926,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) { SDOperand FSMBOp = DAG.getCopyToReg(Chain, FSMBIreg, - DAG.getNode(SPUISD::FSMBI, MVT::v8i16, + DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, DAG.getConstant(0xcccc, MVT::i16))); SDOperand HHProd = @@ -1962,7 +1970,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) { DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); - SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16, + SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, DAG.getConstant(0x2222, MVT::i16)); SDOperand LoProdParts = @@ -2293,6 +2301,64 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) DAG.getConstant(4, MVT::i32)))); } + case ISD::ADD: { + // Turn operands into vectors to satisfy type checking (shufb works on + // vectors) + SDOperand Op0 = + DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); + SDOperand Op1 = + DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); + SmallVector ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + + SDOperand CarryGen = + DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1); + SDOperand ShiftedCarry = + DAG.getNode(SPUISD::SHUFB, MVT::v2i64, + CarryGen, CarryGen, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + + return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64, + DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, + Op0, Op1, ShiftedCarry)); + } + + case ISD::SUB: { + // Turn operands into vectors to satisfy type checking (shufb works on + // vectors) + SDOperand Op0 = + DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); + SDOperand Op1 = + DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1)); + SmallVector ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + + SDOperand BorrowGen = + DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1); + SDOperand ShiftedBorrow = + DAG.getNode(SPUISD::SHUFB, MVT::v2i64, + BorrowGen, BorrowGen, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + + return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64, + DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, + Op0, Op1, ShiftedBorrow)); + } + case ISD::SHL: { SDOperand ShiftAmt = Op.getOperand(1); unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType()); @@ -2301,7 +2367,7 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) DAG.getNode(SPUISD::SELB, VecVT, Op0Vec, DAG.getConstant(0, VecVT), - DAG.getNode(SPUISD::FSMBI, VecVT, + DAG.getNode(SPUISD::SELECT_MASK, VecVT, DAG.getConstant(0xff00ULL, MVT::i16))); SDOperand ShiftAmtBytes = DAG.getNode(ISD::SRL, ShiftAmtVT, @@ -2337,6 +2403,43 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) Op0, ShiftAmtBytes), ShiftAmtBits); } + + case ISD::SRA: { + // Promote Op0 to vector + SDOperand Op0 = + DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0)); + SDOperand ShiftAmt = Op.getOperand(1); + unsigned ShiftVT = ShiftAmt.getValueType(); + + // Negate variable shift amounts + if (!isa(ShiftAmt)) { + ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, + DAG.getConstant(0, ShiftVT), ShiftAmt); + } + + SDOperand UpperHalfSign = + DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, + DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, + Op0, DAG.getConstant(31, MVT::i32)))); + SDOperand UpperHalfSignMask = + DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); + SDOperand UpperLowerMask = + DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, + DAG.getConstant(0xff00, MVT::i16)); + SDOperand UpperLowerSelect = + DAG.getNode(SPUISD::SELB, MVT::v2i64, + UpperHalfSignMask, Op0, UpperLowerMask); + SDOperand RotateLeftBytes = + DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, + UpperLowerSelect, ShiftAmt); + SDOperand RotateLeftBits = + DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, + RotateLeftBytes, ShiftAmt); + + return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64, + RotateLeftBits); + } } return SDOperand(); @@ -2567,17 +2670,19 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: + case ISD::ADD: case ISD::SUB: case ISD::ROTR: case ISD::ROTL: case ISD::SRL: case ISD::SHL: - case ISD::SRA: + case ISD::SRA: { if (VT == MVT::i8) return LowerI8Math(Op, DAG, Opc); else if (VT == MVT::i64) return LowerI64Math(Op, DAG, Opc); break; + } // Vector-related lowering. case ISD::BUILD_VECTOR: @@ -2641,9 +2746,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case ISD::ADD: { SDOperand Op1 = N->getOperand(1); - if ((Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) - && Op0.getOpcode() == SPUISD::IndirectAddr) { + if (isa(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) { SDOperand Op01 = Op0.getOperand(1); if (Op01.getOpcode() == ISD::Constant || Op01.getOpcode() == ISD::TargetConstant) { @@ -2662,8 +2765,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(), Op0.getOperand(0), combinedConst); } - } else if ((Op0.getOpcode() == ISD::Constant - || Op0.getOpcode() == ISD::TargetConstant) + } else if (isa(Op0) && Op1.getOpcode() == SPUISD::IndirectAddr) { SDOperand Op11 = Op1.getOperand(1); if (Op11.getOpcode() == ISD::Constant @@ -2899,11 +3001,11 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, case SPUISD::ROTBYTES_RIGHT_S: case SPUISD::ROTBYTES_LEFT: case SPUISD::ROTBYTES_LEFT_CHAINED: - case FSMBI: - case SELB: - case FPInterp: - case FPRecipEst: - case SEXT32TO64: + case SPUISD::SELECT_MASK: + case SPUISD::SELB: + case SPUISD::FPInterp: + case SPUISD::FPRecipEst: + case SPUISD::SEXT32TO64: #endif } } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 3c73aa51c06..5632ee3152c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -62,8 +62,13 @@ namespace llvm { ROTBYTES_RIGHT_S, ///< Vector rotate right, by bytes, sign fill ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain - FSMBI, ///< Form Select Mask for Bytes, Immediate + ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count + SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) SELB, ///< Select bits -> (b & mask) | (a & ~mask) + ADD_EXTENDED, ///< Add extended, with carry + CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED + SUB_EXTENDED, ///< Subtract extended, with borrow + BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED FPInterp, ///< Floating point interpolate FPRecipEst, ///< Floating point reciprocal estimate SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 8e1933c8ba0..d00fe71ae86 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -469,7 +469,7 @@ class FSMBIVec: RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), "fsmbi\t$rT, $val", SelectOp, - [(set (vectype VECREG:$rT), (SPUfsmbi (i16 immU16:$val)))]>; + [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>; multiclass FormSelectMaskBytesImm { @@ -485,21 +485,37 @@ defm FSMBI : FormSelectMaskBytesImm; def FSMB: RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA), "fsmb\t$rT, $rA", SelectOp, - [(set (v16i8 VECREG:$rT), (SPUfsmbi R16C:$rA))]>; + [(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>; // fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is // only 8-bits wide (even though it's input as 16-bits here) def FSMH: RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA), "fsmh\t$rT, $rA", SelectOp, - [(set (v8i16 VECREG:$rT), (SPUfsmbi R16C:$rA))]>; + [(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>; // fsm: Form select mask for words. Like the other fsm* instructions, // only the lower 4 bits of $rA are significant. -def FSM: - RRForm_1<0b00101101100, (outs VECREG:$rT), (ins R16C:$rA), - "fsm\t$rT, $rA", SelectOp, - [(set (v4i32 VECREG:$rT), (SPUfsmbi R16C:$rA))]>; +class FSMInst: + RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA), + "fsm\t$rT, $rA", + SelectOp, + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +multiclass FormSelectMaskWord { + def r32 : FSMInst; + def r16 : FSMInst; +} + +defm FSM : FormSelectMaskWord; + +// Special case when used for i64 math operations +multiclass FormSelectMaskWord64 { + def r32 : FSMInst; + def r16 : FSMInst; +} + +defm FSM64 : FormSelectMaskWord64; //===----------------------------------------------------------------------===// // Integer and Logical Operations: @@ -545,7 +561,7 @@ def Ar32: def Ar8: RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), "a\t$rT, $rA, $rB", IntegerOp, - [(set R8C:$rT, (add R8C:$rA, R8C:$rB))]>; + [/* no pattern */]>; def AIvec: RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -600,42 +616,125 @@ def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT), [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>; // ADDX: only available in vector form, doesn't match a pattern. -def ADDXvec: - RRForm<0b00000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), - "addx\t$rT, $rA, $rB", IntegerOp, - []>, +class ADDXInst pattern>: + RRForm<0b00000010110, OOL, IOL, + "addx\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ADDXVecInst: + ADDXInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), + [(set (vectype VECREG:$rT), + (SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB), + (vectype VECREG:$rCarry)))]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; -// CG: only available in vector form, doesn't match a pattern. -def CGvec: - RRForm<0b01000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), - "cg\t$rT, $rA, $rB", IntegerOp, - []>, +class ADDXRegInst: + ADDXInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB, rclass:$rCarry), + [(set rclass:$rT, + (SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; -// SFX: only available in vector form, doesn't match a pattern -def SFXvec: - RRForm<0b10000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), - "sfx\t$rT, $rA, $rB", IntegerOp, - []>, +multiclass AddExtended { + def v2i64 : ADDXVecInst; + def v4i32 : ADDXVecInst; + def r64 : ADDXRegInst; + def r32 : ADDXRegInst; +} + +defm ADDX : AddExtended; + +// CG: Generate carry for add +class CGInst pattern>: + RRForm<0b01000011000, OOL, IOL, + "cg\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class CGVecInst: + CGInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +class CGRegInst: + CGInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, + (SPUcarry_gen rclass:$rA, rclass:$rB))]>; + +multiclass CarryGenerate { + def v2i64 : CGVecInst; + def v4i32 : CGVecInst; + def r64 : CGRegInst; + def r32 : CGRegInst; +} + +defm CG : CarryGenerate; + +// SFX: Subract from, extended. This is used in conjunction with BG to subtract +// with carry (borrow, in this case) +class SFXInst pattern>: + RRForm<0b10000010110, OOL, IOL, + "sfx\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class SFXVecInst: + SFXInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), + [(set (vectype VECREG:$rT), + (SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB), + (vectype VECREG:$rCarry)))]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; +class SFXRegInst: + SFXInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB, rclass:$rCarry), + [(set rclass:$rT, + (SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +multiclass SubtractExtended { + def v2i64 : SFXVecInst; + def v4i32 : SFXVecInst; + def r64 : SFXRegInst; + def r32 : SFXRegInst; +} + +defm SFX : SubtractExtended; + // BG: only available in vector form, doesn't match a pattern. -def BGvec: - RRForm<0b01000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), - "bg\t$rT, $rA, $rB", IntegerOp, - []>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; +class BGInst pattern>: + RRForm<0b01000010000, OOL, IOL, + "bg\t$rT, $rA, $rB", + IntegerOp, pattern>; -// BGX: only available in vector form, doesn't match a pattern. +class BGVecInst: + BGInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +class BGRegInst: + BGInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, + (SPUborrow_gen rclass:$rA, rclass:$rB))]>; + +multiclass BorrowGenerate { + def v4i32 : BGVecInst; + def v2i64 : BGVecInst; + def r64 : BGRegInst; + def r32 : BGRegInst; +} + +defm BG : BorrowGenerate; + +// BGX: Borrow generate, extended. def BGXvec: RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), @@ -817,17 +916,17 @@ def CLZr32: def CNTBv16i8: RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), "cntb\t$rT, $rA", IntegerOp, - [(set (v16i8 VECREG:$rT), (SPUcntb_v16i8 (v16i8 VECREG:$rA)))]>; + [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>; def CNTBv8i16 : RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), "cntb\t$rT, $rA", IntegerOp, - [(set (v8i16 VECREG:$rT), (SPUcntb_v8i16 (v8i16 VECREG:$rA)))]>; + [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>; def CNTBv4i32 : RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), "cntb\t$rT, $rA", IntegerOp, - [(set (v4i32 VECREG:$rT), (SPUcntb_v4i32 (v4i32 VECREG:$rA)))]>; + [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>; // gbb: Gather all low order bits from each byte in $rA into a single 16-bit // quantity stored into $rT @@ -869,31 +968,38 @@ def SUMB: []>; // Sign extension operations: -def XSBHvec: - RRForm_1<0b01101101010, (outs VECREG:$rDst), (ins VECREG:$rSrc), - "xsbh\t$rDst, $rSrc", IntegerOp, - [(set (v8i16 VECREG:$rDst), (sext (v16i8 VECREG:$rSrc)))]>; +class XSBHInst pattern>: + RRForm_1<0b01101101010, OOL, IOL, + "xsbh\t$rDst, $rSrc", + IntegerOp, pattern>; -// Ordinary form for XSBH -def XSBHr16: - RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R16C:$rSrc), - "xsbh\t$rDst, $rSrc", IntegerOp, - [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>; +class XSBHVecInst: + XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>; +class XSBHRegInst: + XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), + [(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>; + +multiclass ExtendByteHalfword { + def v16i8: XSBHVecInst; + def r16: XSBHRegInst; + + // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit + // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 + // pattern below). Intentionally doesn't match a pattern because we want the + // sext 8->32 pattern to do the work for us, namely because we need the extra + // XSHWr32. + def r32: XSBHRegInst; +} + +defm XSBH : ExtendByteHalfword; + +// Sign-extend, but take an 8-bit register to a 16-bit register (not done as +// sext_inreg) def XSBHr8: - RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R8C:$rSrc), - "xsbh\t$rDst, $rSrc", IntegerOp, - [(set R16C:$rDst, (sext R8C:$rSrc))]>; - -// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit -// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 -// pattern below). Intentionally doesn't match a pattern because we want the -// sext 8->32 pattern to do the work for us, namely because we need the extra -// XSHWr32. -def XSBHr32: - RRForm_1<0b01101101010, (outs R32C:$rDst), (ins R32C:$rSrc), - "xsbh\t$rDst, $rSrc", IntegerOp, - [(set R32C:$rDst, (sext_inreg R32C:$rSrc, i8))]>; + XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), + [(set R16C:$rDst, (sext R8C:$rSrc))]>; // Sign extend halfwords to words: def XSHWvec: @@ -1658,9 +1764,9 @@ class SHUFBVecInst: // It's this pattern that's probably the most useful, since SPUISelLowering // methods create a v16i8 vector for $rC: -class SHUFBVecPat1: +class SHUFBVecPat1: Pat<(SPUshuffle (vectype VECREG:$rA), (vectype VECREG:$rB), - (v16i8 VECREG:$rC)), + (masktype VECREG:$rC)), (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>; multiclass ShuffleBytes @@ -1676,11 +1782,19 @@ multiclass ShuffleBytes defm SHUFB : ShuffleBytes; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; -def : SHUFBVecPat1; +// Shuffle mask is a v16i8 vector +def : SHUFBVecPat1; +def : SHUFBVecPat1; +def : SHUFBVecPat1; +def : SHUFBVecPat1; +def : SHUFBVecPat1; + +// Shuffle mask is a v4i32 vector: +def : SHUFBVecPat1; +def : SHUFBVecPat1; +def : SHUFBVecPat1; +def : SHUFBVecPat1; +def : SHUFBVecPat1; //===----------------------------------------------------------------------===// // Shift and rotate group: @@ -2079,10 +2193,24 @@ def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), (i16 uimm7:$val)), (ROTQBYIv2i64 VECREG:$rA, uimm7:$val)>; // See ROTQBY note above. -def ROTQBYBIvec: - RI7Form<0b00110011100, (outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), - "rotqbybi\t$rT, $rA, $val", RotateShift, - [/* intrinsic */]>; +class ROTQBYBIInst pattern>: + RI7Form<0b00110011100, OOL, IOL, + "rotqbybi\t$rT, $rA, $shift", + RotateShift, pattern>; + +class ROTQBYBIVecInst: + ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift), + [(set (vectype VECREG:$rT), + (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>; + +multiclass RotateQuadByBytesByBitshift { + def v16i8_r32: ROTQBYBIVecInst; + def v8i16_r32: ROTQBYBIVecInst; + def v4i32_r32: ROTQBYBIVecInst; + def v2i64_r32: ROTQBYBIVecInst; +} + +defm ROTQBYBI : RotateQuadByBytesByBitshift; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // See ROTQBY note above. @@ -2358,7 +2486,6 @@ multiclass RotateQuadBytesImm defm ROTQMBYI : RotateQuadBytesImm; - //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // Rotate right and mask by bit count //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ @@ -2545,25 +2672,28 @@ def : Pat<(sra R32C:$rA, R8C:$rB), (ROTMAr32 R32C:$rA, (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; -def ROTMAIv4i32: - RRForm<0b01011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - "rotmai\t$rT, $rA, $val", RotateShift, - [(set (v4i32 VECREG:$rT), - (SPUvec_sra VECREG:$rA, (i32 uimm7:$val)))]>; +class ROTMAIInst pattern>: + RRForm<0b01011110000, OOL, IOL, + "rotmai\t$rT, $rA, $val", + RotateShift, pattern>; -def : Pat<(SPUvec_sra VECREG:$rA, (i16 uimm7:$val)), - (ROTMAIv4i32 VECREG:$rA, uimm7:$val)>; +class ROTMAIVecInst: + ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val), + [(set (vectype VECREG:$rT), + (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>; -def ROTMAIr32: - RRForm<0b01011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val), - "rotmai\t$rT, $rA, $val", RotateShift, - [(set R32C:$rT, (sra R32C:$rA, (i32 uimm7:$val)))]>; +class ROTMAIRegInst: + ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val), + [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>; -def : Pat<(sra R32C:$rA, (i16 uimm7:$val)), - (ROTMAIr32 R32C:$rA, uimm7:$val)>; +multiclass RotateMaskAlgebraicImm { + def v2i64_i32 : ROTMAIVecInst; + def v4i32_i32 : ROTMAIVecInst; + def r64_i32 : ROTMAIRegInst; + def r32_i32 : ROTMAIRegInst; +} -def : Pat<(sra R32C:$rA, (i8 uimm7:$val)), - (ROTMAIr32 R32C:$rA, uimm7:$val)>; +defm ROTMAI : RotateMaskAlgebraicImm; //===----------------------------------------------------------------------===// // Branch and conditionals: diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 00d0f94212a..63b852f2542 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -36,29 +36,25 @@ def SDT_SPUshuffle : SDTypeProfile<1, 3, [ ]>; // Unary, binary v16i8 operator type constraints: -def SPUv16i8_unop: SDTypeProfile<1, 1, [ - SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>]>; - def SPUv16i8_binop: SDTypeProfile<1, 2, [ SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; // Binary v8i16 operator type constraints: -def SPUv8i16_unop: SDTypeProfile<1, 1, [ - SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>]>; - def SPUv8i16_binop: SDTypeProfile<1, 2, [ SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; // Binary v4i32 operator type constraints: -def SPUv4i32_unop: SDTypeProfile<1, 1, [ - SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>]>; - def SPUv4i32_binop: SDTypeProfile<1, 2, [ SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; -// FSMBI type constraints: There are several variations for the various +// Trinary operators, e.g., addx, carry generate +def SPUIntTrinaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> +]>; + +// SELECT_MASK type constraints: There are several variations for the various // vector types (this avoids having to bit_convert all over the place.) -def SPUfsmbi_type: SDTypeProfile<1, 1, [ +def SPUselmask_type: SDTypeProfile<1, 1, [ SDTCisInt<1> ]>; @@ -74,10 +70,16 @@ def SPUvecshift_type: SDTypeProfile<1, 2, [ // Synthetic/pseudo-instructions //===----------------------------------------------------------------------===// +/// Add extended, carry generate: +def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>; +def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>; + +// Subtract extended, borrow generate +def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>; +def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>; + // SPU CNTB: -def SPUcntb_v16i8: SDNode<"SPUISD::CNTB", SPUv16i8_unop, []>; -def SPUcntb_v8i16: SDNode<"SPUISD::CNTB", SPUv8i16_unop, []>; -def SPUcntb_v4i32: SDNode<"SPUISD::CNTB", SPUv4i32_unop, []>; +def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; // SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see // SPUISelLowering.h): @@ -122,14 +124,23 @@ def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS", def SPUrotbytes_right_sfill: SDNode<"SPUISD::ROTBYTES_RIGHT_S", SPUvecshift_type, []>; +// Vector rotate left, bits shifted out of the left are rotated in on the right def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", SPUvecshift_type, []>; +// Same as above, but the node also has a chain associated (used in loads and +// stores) def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED", SPUvecshift_type, [SDNPHasChain]>; +// Vector rotate left by bytes, but the count is given in bits and the SPU +// internally converts it to bytes (saves an instruction to mask off lower +// three bits) +def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS", + SPUvecshift_type>; + // SPU form select mask for bytes, immediate -def SPUfsmbi: SDNode<"SPUISD::FSMBI", SPUfsmbi_type, []>; +def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; // SPU select bits instruction def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index aae79b509a1..d17faac861f 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -559,6 +559,10 @@ def rotNeg7imm_i16 : Operand { let PrintMethod = "printROTNeg7Imm"; } +def rotNeg7imm_i8 : Operand { + let PrintMethod = "printROTNeg7Imm"; +} + def target : Operand { let PrintMethod = "printBranchOperand"; } diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll index 7ef3d766bb5..d93322202ad 100644 --- a/test/CodeGen/CellSPU/immed64.ll +++ b/test/CodeGen/CellSPU/immed64.ll @@ -1,16 +1,16 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep lqa %t1.s | count 13 -; RUN: grep il %t1.s | count 22 -; RUN: grep shufb %t1.s | count 13 -; RUN: grep 65520 %t1.s | count 1 -; RUN: grep 43981 %t1.s | count 1 -; RUN: grep 13702 %t1.s | count 1 -; RUN: grep 81 %t1.s | count 2 -; RUN: grep 28225 %t1.s | count 1 -; RUN: grep 30720 %t1.s | count 1 -; RUN: grep 192 %t1.s | count 32 -; RUN: grep 128 %t1.s | count 30 -; RUN: grep 224 %t1.s | count 2 +; RUN: grep lqa %t1.s | count 13 +; RUN: grep il %t1.s | count 22 +; RUN: grep shufb %t1.s | count 13 +; RUN: grep 65520 %t1.s | count 1 +; RUN: grep 43981 %t1.s | count 1 +; RUN: grep 13702 %t1.s | count 1 +; RUN: grep 28225 %t1.s | count 1 +; RUN: grep 30720 %t1.s | count 1 +; RUN: grep 3233857728 %t1.s | count 8 +; RUN: grep 2155905152 %t1.s | count 6 +; RUN: grep 66051 %t1.s | count 7 +; RUN: grep 471670303 %t1.s | count 11 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu"