diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 74c0ecad7f8..cb8b48bc1fc 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -2,7 +2,6 @@ // // Cell SPU 64-bit operations // -// Primary author: Scott Michel (scottm@aero.org) //===----------------------------------------------------------------------===// //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ @@ -240,3 +239,145 @@ def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), // i64 setult: def : I64SETCCNegCond; def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 add +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_add_cg: + CodeFrag<(CGv4i32 lhs, rhs)>; + +class v2i64_add_1: + CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; + +class v2i64_add: + v2i64_add_1.Fragment, cg_mask>; + +def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_add<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 subtraction +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_sub_bg: CodeFrag<(BGv4i32 lhs, rhs)>; + +class v2i64_sub: + CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; + +def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + v2i64_sub_bg<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_sub<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + v2i64_sub_bg<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 multiply +// +// Note: i64 multiply is simply the vector->scalar conversion of the +// full-on v2i64 multiply, since the entire vector has to be manipulated +// anyway. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_mul_ahi64 : + CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_bhi64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_alo64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_blo64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_ashlq2: + CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; + +class v2i64_mul_ashlq4: + CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; + +class v2i64_mul_bshlq2 : + CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; + +class v2i64_mul_bshlq4 : + CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; + +class v2i64_highprod: + CodeFrag<(Av4i32 + (Av4i32 + (MPYUv4i32 v2i64_mul_bshlq4.Fragment, // a1 x b3 + v2i64_mul_ahi64.Fragment), + (MPYHv4i32 v2i64_mul_ahi64.Fragment, // a0 x b3 + v2i64_mul_bshlq4.Fragment)), + (Av4i32 + (MPYHv4i32 v2i64_mul_bhi64.Fragment, + v2i64_mul_ashlq4.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), + (Av4i32 + (MPYUv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq2.Fragment, + v2i64_mul_bshlq2.Fragment), + (MPYUv4i32 v2i64_mul_ashlq2.Fragment, + v2i64_mul_bshlq2.Fragment))))))>; + +class v2i64_mul_a3_b3: + CodeFrag<(MPYUv4i32 v2i64_mul_alo64.Fragment, + v2i64_mul_blo64.Fragment)>; + +class v2i64_mul_a2_b3: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_alo64.Fragment, + v2i64_mul_bshlq2.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_mul_a3_b2: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_blo64.Fragment, + v2i64_mul_ashlq2.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_lowsum: + v2i64_add.Fragment, + v2i64_mul_a2_b3.Fragment, rCGmask>.Fragment, + v2i64_mul_a3_b2.Fragment, rCGmask>; + +class v2i64_mul: + v2i64_add.Fragment, + (SELBv4i32 v2i64_highprod.Fragment, + (ILv4i32 0), + (FSMBIv4i32 0x0f0f)), + rCGmask>; + +def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 76b22843696..1f00bacb5e6 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -18,11 +18,13 @@ #include "SPUHazardRecognizers.h" #include "SPUFrameInfo.h" #include "SPURegisterNames.h" +#include "SPUTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" @@ -254,6 +256,26 @@ public: return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); } + SDNode *emitBuildVector(SDValue build_vec) { + std::vector CV; + + for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast(build_vec.getOperand(i)); + CV.push_back(const_cast(V->getConstantIntValue())); + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); + unsigned Alignment = 1 << cast(CPIdx)->getAlignment(); + SDValue CGPoolOffset = + SPU::LowerConstantPool(CPIdx, *CurDAG, + SPUtli.getSPUTargetMachine()); + return SelectCode(CurDAG->getLoad(build_vec.getValueType(), + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); + } + /// Select - Convert the specified operand from a target-independent to a /// target-specific node if it hasn't already been changed. SDNode *Select(SDValue Op); @@ -647,22 +669,82 @@ SPUDAGToDAGISel::Select(SDValue Op) { TFI, Imm0), 0); n_ops = 2; } - } else if (Opc == ISD::ZERO_EXTEND) { - // (zero_extend:i16 (and:i8 , )) - const SDValue &Op1 = N->getOperand(0); + } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) + && OpVT == MVT::i64) { + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); + MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + SDValue shufMask; - if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) { - if (Op1.getOpcode() == ISD::AND) { - // Fold this into a single ANDHI. This is often seen in expansions of i1 - // to i8, then i8 to i16 in logical/branching operations. - DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 " - ", ))\n"); - NewOpc = SPU::ANDHIi8i16; - Ops[0] = Op1.getOperand(0); - Ops[1] = Op1.getOperand(1); - n_ops = 2; - } + switch (Op0VT.getSimpleVT()) { + default: + cerr << "CellSPU Select: Unhandled zero/any extend MVT\n"; + abort(); + /*NOTREACHED*/ + break; + case MVT::i32: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x00010203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x08090a0b, MVT::i32)); + break; + + case MVT::i16: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800a0b, MVT::i32)); + break; + + case MVT::i8: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80808003, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x8080800b, MVT::i32)); + break; } + + SDNode *shufMaskLoad = emitBuildVector(shufMask); + SDNode *PromoteScalar = + SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0)); + + SDValue zextShuffle = + CurDAG->getNode(SPUISD::SHUFB, OpVecVT, + SDValue(PromoteScalar, 0), + SDValue(PromoteScalar, 0), + SDValue(shufMaskLoad, 0)); + + // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we + // re-use it in the VEC2PREFSLOT selection without needing to explicitly + // call SelectCode (it's already done for us.) + SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, OpVecVT, zextShuffle)); + return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, OpVT, + zextShuffle)); + } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { return SelectSHLi64(Op, OpVT); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 9dd98558509..92bd92886c3 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -78,6 +78,7 @@ namespace { return retval; } + } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -208,13 +209,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); setOperationAction(ISD::MUL, MVT::i32, Legal); - setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall + setOperationAction(ISD::MUL, MVT::i64, Legal); // Need to custom handle (some) common i8, i64 math ops setOperationAction(ISD::ADD, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); // SPU does not have BSWAP. It does have i32 support CTLZ. // CTPOP has to be custom lowered. @@ -243,11 +244,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i32, Legal); setOperationAction(ISD::SETCC, MVT::i64, Legal); - // Zero extension and sign extension for i64 have to be - // custom legalized - setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); @@ -416,10 +412,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; - node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; - node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; - node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; + node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; + node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; + node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; } std::map::iterator i = node_names.find(Opcode); @@ -778,8 +773,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return SDValue(); } -/// Generate the address of a constant pool entry. -static SDValue +//! Generate the address of a constant pool entry. +SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); @@ -805,6 +800,12 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return SDValue(); } +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); +} + static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT PtrVT = Op.getValueType(); @@ -2185,123 +2186,34 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, return SDValue(); } -static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) -{ - MVT VT = Op.getValueType(); - MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); +//! Generate the carry-generate shuffle mask. +SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) { +SmallVector ShufBytes; - SDValue Op0 = Op.getOperand(0); +// Create the shuffle mask for "rotating" the borrow up one register slot +// once the borrow is generated. +ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: { - MVT Op0VT = Op0.getValueType(); - MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); +return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); +} - SDValue PromoteScalar = - DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); +//! Generate the borrow-generate shuffle mask +SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) { +SmallVector ShufBytes; - // Use a shuffle to zero extend the i32 to i64 directly: - SDValue shufMask; +// Create the shuffle mask for "rotating" the borrow up one register slot +// once the borrow is generated. +ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - switch (Op0VT.getSimpleVT()) { - default: - cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n"; - abort(); - /*NOTREACHED*/ - break; - case MVT::i32: - shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x00010203, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x08090a0b, MVT::i32)); - break; - - case MVT::i16: - shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x80800203, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x80800a0b, MVT::i32)); - break; - - case MVT::i8: - shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x80808003, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x8080800b, MVT::i32)); - break; - } - - SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, - PromoteScalar, PromoteScalar, shufMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); - } - - case ISD::ADD: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - SDValue CarryGen = - DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedCarry = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - CarryGen, CarryGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedCarry)); - } - - case ISD::SUB: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - SDValue BorrowGen = - DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedBorrow = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - BorrowGen, BorrowGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedBorrow)); - } - } - - return SDValue(); +return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } //! Lower byte immediate operations for v16i8 vectors: @@ -2576,11 +2488,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); - - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - return LowerI64Math(Op, DAG, Opc); - // i8, i64 math ops: case ISD::ADD: case ISD::SUB: @@ -2591,8 +2498,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SRA: { if (VT == MVT::i8) return LowerI8Math(Op, DAG, Opc, *this); - else if (VT == MVT::i64) - return LowerI64Math(Op, DAG, Opc); break; } @@ -2831,6 +2736,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } } + // Otherwise, return unchanged. #ifndef NDEBUG if (Result.getNode()) { diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 24b8f82ecbe..a98a8f6bbef 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -52,10 +52,11 @@ namespace llvm { ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) SELB, ///< Select bits -> (b & mask) | (a & ~mask) - ADD_EXTENDED, ///< Add extended, with carry - CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED - SUB_EXTENDED, ///< Subtract extended, with borrow - BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED + // Markers: These aren't used to generate target-dependent nodes, but + // are used during instruction selection. + ADD64_MARKER, ///< i64 addition marker + SUB64_MARKER, ///< i64 subtraction marker + MUL64_MARKER, ///< i64 multiply marker LAST_SPUISD ///< Last user-defined instruction }; } @@ -74,6 +75,12 @@ namespace llvm { MVT ValueType); SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); + + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, + const SPUTargetMachine &TM); + + SDValue getBorrowGenerateShufMask(SelectionDAG &DAG); + SDValue getCarryGenerateShufMask(SelectionDAG &DAG); } class SPUTargetMachine; // forward dec'l. @@ -86,8 +93,18 @@ namespace llvm { SPUTargetMachine &SPUTM; public: + //! The venerable constructor + /*! + This is where the CellSPU backend sets operation handling (i.e., legal, + custom, expand or promote.) + */ SPUTargetLowering(SPUTargetMachine &TM); + //! Get the target machine + SPUTargetMachine &getSPUTargetMachine() { + return SPUTM; + } + /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index b9956402d95..b639ec254a9 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -705,17 +705,14 @@ class ADDXInst pattern>: class ADDXVecInst: ADDXInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [(set (vectype VECREG:$rT), - (SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB), - (vectype VECREG:$rCarry)))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; class ADDXRegInst: ADDXInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [(set rclass:$rT, - (SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; @@ -737,14 +734,12 @@ class CGInst pattern>: class CGVecInst: CGInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + [/* no pattern */]>; class CGRegInst: CGInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, - (SPUcarry_gen rclass:$rA, rclass:$rB))]>; + [/* no pattern */]>; multiclass CarryGenerate { def v2i64 : CGVecInst; @@ -765,17 +760,14 @@ class SFXInst pattern>: class SFXVecInst: SFXInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [(set (vectype VECREG:$rT), - (SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB), - (vectype VECREG:$rCarry)))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; class SFXRegInst: SFXInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [(set rclass:$rT, - (SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; @@ -797,14 +789,12 @@ class BGInst pattern>: class BGVecInst: BGInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + [/* no pattern */]>; class BGRegInst: BGInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, - (SPUborrow_gen rclass:$rA, rclass:$rB))]>; + [/* no pattern */]>; multiclass BorrowGenerate { def v4i32 : BGVecInst; @@ -894,7 +884,7 @@ class MPYAInst pattern>: "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, pattern>; -def MPYAvec: +def MPYAv4i32: MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), [(set (v4i32 VECREG:$rT), (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), @@ -939,7 +929,7 @@ class MPYSInst: "mpys\t$rT, $rA, $rB", IntegerMulDiv, [/* no pattern */]>; -def MPYSvec: +def MPYSv4i32: MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; def MPYSr16: @@ -972,14 +962,20 @@ def MPYHHAvec: def MPYHHAr32: MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; -// mpyhhu: Multiply high-high, unsigned +// mpyhhu: Multiply high-high, unsigned, e.g.: +// +// +-------+-------+ +-------+-------+ +---------+ +// | a0 . a1 | x | b0 . b1 | = | a0 x b0 | +// +-------+-------+ +-------+-------+ +---------+ +// +// where a0, b0 are the upper 16 bits of the 32-bit word class MPYHHUInst: RRForm<0b01110011110, OOL, IOL, "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, [/* no pattern */]>; -def MPYHHUvec: +def MPYHHUv4i32: MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; def MPYHHUr32: diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td index 38279a0a9f8..64548fd8c08 100644 --- a/lib/Target/CellSPU/SPUMathInstr.td +++ b/lib/Target/CellSPU/SPUMathInstr.td @@ -8,8 +8,6 @@ // // Any resemblance to libsimdmath or the Cell SDK simdmath library is // purely and completely coincidental. -// -// Primary author: Scott Michel (scottm@aero.org) //===----------------------------------------------------------------------===// //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index cae6023cd45..87c4115d1b1 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -61,18 +61,20 @@ def SPUselb_type: SDTypeProfile<1, 3, [ def SPUvecshift_type: SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; +// "marker" type for i64 operators that need a shuffle mask +// (i.e., uses cg or bg or another instruction that needs to +// use shufb to get things in the right place.) +// Op0: The result +// Op1, 2: LHS, RHS +// Op3: Carry-generate shuffle mask + +def SPUmarker_type : SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; + //===----------------------------------------------------------------------===// // Synthetic/pseudo-instructions //===----------------------------------------------------------------------===// -/// Add extended, carry generate: -def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>; -def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>; - -// Subtract extended, borrow generate -def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>; -def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>; - // SPU CNTB: def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; @@ -127,6 +129,12 @@ def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; // Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; +// i64 markers: supplies extra operands used to generate the i64 operator +// instruction sequences +def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>; +def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>; +def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>; + //===----------------------------------------------------------------------===// // Constraints: (taken from PPCInstrInfo.td) //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll index d118c5f88c5..dd6782772a5 100644 --- a/test/CodeGen/CellSPU/i64ops.ll +++ b/test/CodeGen/CellSPU/i64ops.ll @@ -2,9 +2,15 @@ ; RUN: grep xswd %t1.s | count 3 ; RUN: grep xsbh %t1.s | count 1 ; RUN: grep xshw %t1.s | count 2 -; RUN: grep shufb %t1.s | count 4 -; RUN: grep cg %t1.s | count 1 -; RUN: grep addx %t1.s | count 1 +; RUN: grep shufb %t1.s | count 7 +; RUN: grep cg %t1.s | count 4 +; RUN: grep addx %t1.s | count 4 +; RUN: grep fsmbi %t1.s | count 3 +; RUN: grep il %t1.s | count 2 +; RUN: grep mpy %t1.s | count 10 +; RUN: grep mpyh %t1.s | count 6 +; RUN: grep mpyhhu %t1.s | count 2 +; RUN: grep mpyu %t1.s | count 4 ; ModuleID = 'stores.bc' target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -44,3 +50,8 @@ define i64 @add_i64(i64 %a, i64 %b) nounwind { %1 = add i64 %a, %b ret i64 %1 } + +define i64 @mul_i64(i64 %a, i64 %b) nounwind { + %1 = mul i64 %a, %b + ret i64 %1 +} diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c index 7a4bf1ab0d2..b613bd872e2 100644 --- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c +++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c @@ -7,6 +7,7 @@ int64_t tval_c = 1234567890001LL; int64_t tval_d = 10001LL; int64_t tval_e = 10000LL; uint64_t tval_f = 0xffffff0750135eb9; +int64_t tval_g = -1; /* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ @@ -546,6 +547,12 @@ test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), in /* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ +int64_t i64_mul(int64_t a, int64_t b) { + return a * b; +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + int main(void) { @@ -553,12 +560,13 @@ main(void) const char *something_failed = " %d tests failed.\n"; const char *all_tests_passed = " All tests passed.\n"; - printf("tval_a = %20lld (0x%020llx)\n", tval_a, tval_a); - printf("tval_b = %20lld (0x%020llx)\n", tval_b, tval_b); - printf("tval_c = %20lld (0x%020llx)\n", tval_c, tval_c); - printf("tval_d = %20lld (0x%020llx)\n", tval_d, tval_d); - printf("tval_e = %20lld (0x%020llx)\n", tval_e, tval_e); - printf("tval_f = %20llu (0x%020llx)\n", tval_f, tval_f); + printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a); + printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b); + printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c); + printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d); + printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e); + printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f); + printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g); printf("----------------------------------------\n"); for (i = 0; i < ARR_SIZE(int64_preds); ++i) { @@ -649,5 +657,17 @@ main(void) printf("----------------------------------------\n"); + int64_t result; + + result = i64_mul(tval_g, tval_g); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result); + result = i64_mul(tval_d, tval_e); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result); + /* 0xba7a664f13077c9 */ + result = i64_mul(tval_a, tval_b); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result); + + printf("----------------------------------------\n"); + return 0; }