From 94bd57e154088f2d45c465e73f896f64f6da4ade Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Thu, 15 Jan 2009 04:41:47 +0000 Subject: [PATCH] - Convert remaining i64 custom lowering into custom instruction emission sequences in SPUDAGToDAGISel.cpp and SPU64InstrInfo.td, killing custom DAG node types as needed. - i64 mul is now a legal instruction, but emits an instruction sequence that stretches tblgen and the imagination, as well as violating laws of several small countries and most southern US states (just kidding, but looking at a function with 80+ parameters is really weird and just plain wrong.) - Update tests as needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62254 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPU64InstrInfo.td | 143 ++++++++++++++- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 110 +++++++++-- lib/Target/CellSPU/SPUISelLowering.cpp | 172 ++++-------------- lib/Target/CellSPU/SPUISelLowering.h | 25 ++- lib/Target/CellSPU/SPUInstrInfo.td | 40 ++-- lib/Target/CellSPU/SPUMathInstr.td | 2 - lib/Target/CellSPU/SPUNodes.td | 24 ++- test/CodeGen/CellSPU/i64ops.ll | 17 +- .../CellSPU/useful-harnesses/i64operations.c | 32 +++- 9 files changed, 372 insertions(+), 193 deletions(-) diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 74c0ecad7f8..cb8b48bc1fc 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -2,7 +2,6 @@ // // Cell SPU 64-bit operations // -// Primary author: Scott Michel (scottm@aero.org) //===----------------------------------------------------------------------===// //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ @@ -240,3 +239,145 @@ def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), // i64 setult: def : I64SETCCNegCond; def : I64SELECTNegCond; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 add +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_add_cg: + CodeFrag<(CGv4i32 lhs, rhs)>; + +class v2i64_add_1: + CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; + +class v2i64_add: + v2i64_add_1.Fragment, cg_mask>; + +def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_add<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 subtraction +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_sub_bg: CodeFrag<(BGv4i32 lhs, rhs)>; + +class v2i64_sub: + CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; + +def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + v2i64_sub_bg<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_sub<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + v2i64_sub_bg<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 multiply +// +// Note: i64 multiply is simply the vector->scalar conversion of the +// full-on v2i64 multiply, since the entire vector has to be manipulated +// anyway. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_mul_ahi64 : + CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_bhi64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_alo64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_blo64 : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_ashlq2: + CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; + +class v2i64_mul_ashlq4: + CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; + +class v2i64_mul_bshlq2 : + CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; + +class v2i64_mul_bshlq4 : + CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; + +class v2i64_highprod: + CodeFrag<(Av4i32 + (Av4i32 + (MPYUv4i32 v2i64_mul_bshlq4.Fragment, // a1 x b3 + v2i64_mul_ahi64.Fragment), + (MPYHv4i32 v2i64_mul_ahi64.Fragment, // a0 x b3 + v2i64_mul_bshlq4.Fragment)), + (Av4i32 + (MPYHv4i32 v2i64_mul_bhi64.Fragment, + v2i64_mul_ashlq4.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), + (Av4i32 + (MPYUv4i32 v2i64_mul_ashlq4.Fragment, + v2i64_mul_bhi64.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq2.Fragment, + v2i64_mul_bshlq2.Fragment), + (MPYUv4i32 v2i64_mul_ashlq2.Fragment, + v2i64_mul_bshlq2.Fragment))))))>; + +class v2i64_mul_a3_b3: + CodeFrag<(MPYUv4i32 v2i64_mul_alo64.Fragment, + v2i64_mul_blo64.Fragment)>; + +class v2i64_mul_a2_b3: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_alo64.Fragment, + v2i64_mul_bshlq2.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_mul_a3_b2: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_blo64.Fragment, + v2i64_mul_ashlq2.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_lowsum: + v2i64_add.Fragment, + v2i64_mul_a2_b3.Fragment, rCGmask>.Fragment, + v2i64_mul_a3_b2.Fragment, rCGmask>; + +class v2i64_mul: + v2i64_add.Fragment, + (SELBv4i32 v2i64_highprod.Fragment, + (ILv4i32 0), + (FSMBIv4i32 0x0f0f)), + rCGmask>; + +def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA), + (ORv2i64_i64 R64C:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment)>; + +def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 76b22843696..1f00bacb5e6 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -18,11 +18,13 @@ #include "SPUHazardRecognizers.h" #include "SPUFrameInfo.h" #include "SPURegisterNames.h" +#include "SPUTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" @@ -254,6 +256,26 @@ public: return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); } + SDNode *emitBuildVector(SDValue build_vec) { + std::vector CV; + + for (size_t i = 0; i < build_vec.getNumOperands(); ++i) { + ConstantSDNode *V = dyn_cast(build_vec.getOperand(i)); + CV.push_back(const_cast(V->getConstantIntValue())); + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); + unsigned Alignment = 1 << cast(CPIdx)->getAlignment(); + SDValue CGPoolOffset = + SPU::LowerConstantPool(CPIdx, *CurDAG, + SPUtli.getSPUTargetMachine()); + return SelectCode(CurDAG->getLoad(build_vec.getValueType(), + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(), 0, + false, Alignment)); + } + /// Select - Convert the specified operand from a target-independent to a /// target-specific node if it hasn't already been changed. SDNode *Select(SDValue Op); @@ -647,22 +669,82 @@ SPUDAGToDAGISel::Select(SDValue Op) { TFI, Imm0), 0); n_ops = 2; } - } else if (Opc == ISD::ZERO_EXTEND) { - // (zero_extend:i16 (and:i8 , )) - const SDValue &Op1 = N->getOperand(0); + } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) + && OpVT == MVT::i64) { + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType(); + MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); + MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + SDValue shufMask; - if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) { - if (Op1.getOpcode() == ISD::AND) { - // Fold this into a single ANDHI. This is often seen in expansions of i1 - // to i8, then i8 to i16 in logical/branching operations. - DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 " - ", ))\n"); - NewOpc = SPU::ANDHIi8i16; - Ops[0] = Op1.getOperand(0); - Ops[1] = Op1.getOperand(1); - n_ops = 2; - } + switch (Op0VT.getSimpleVT()) { + default: + cerr << "CellSPU Select: Unhandled zero/any extend MVT\n"; + abort(); + /*NOTREACHED*/ + break; + case MVT::i32: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x00010203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x08090a0b, MVT::i32)); + break; + + case MVT::i16: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800a0b, MVT::i32)); + break; + + case MVT::i8: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80808003, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x8080800b, MVT::i32)); + break; } + + SDNode *shufMaskLoad = emitBuildVector(shufMask); + SDNode *PromoteScalar = + SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0)); + + SDValue zextShuffle = + CurDAG->getNode(SPUISD::SHUFB, OpVecVT, + SDValue(PromoteScalar, 0), + SDValue(PromoteScalar, 0), + SDValue(shufMaskLoad, 0)); + + // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we + // re-use it in the VEC2PREFSLOT selection without needing to explicitly + // call SelectCode (it's already done for us.) + SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, OpVecVT, zextShuffle)); + return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, OpVT, + zextShuffle)); + } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); + } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG)); + + return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT, + Op.getOperand(0), Op.getOperand(1), + SDValue(CGLoad, 0))); } else if (Opc == ISD::SHL) { if (OpVT == MVT::i64) { return SelectSHLi64(Op, OpVT); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 9dd98558509..92bd92886c3 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -78,6 +78,7 @@ namespace { return retval; } + } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) @@ -208,13 +209,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); setOperationAction(ISD::MUL, MVT::i32, Legal); - setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall + setOperationAction(ISD::MUL, MVT::i64, Legal); // Need to custom handle (some) common i8, i64 math ops setOperationAction(ISD::ADD, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); // SPU does not have BSWAP. It does have i32 support CTLZ. // CTPOP has to be custom lowered. @@ -243,11 +244,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i32, Legal); setOperationAction(ISD::SETCC, MVT::i64, Legal); - // Zero extension and sign extension for i64 have to be - // custom legalized - setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); @@ -416,10 +412,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; - node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; - node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; - node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; + node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; + node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; + node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; } std::map::iterator i = node_names.find(Opcode); @@ -778,8 +773,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return SDValue(); } -/// Generate the address of a constant pool entry. -static SDValue +//! Generate the address of a constant pool entry. +SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); @@ -805,6 +800,12 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return SDValue(); } +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); +} + static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { MVT PtrVT = Op.getValueType(); @@ -2185,123 +2186,34 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, return SDValue(); } -static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) -{ - MVT VT = Op.getValueType(); - MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); +//! Generate the carry-generate shuffle mask. +SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) { +SmallVector ShufBytes; - SDValue Op0 = Op.getOperand(0); +// Create the shuffle mask for "rotating" the borrow up one register slot +// once the borrow is generated. +ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: { - MVT Op0VT = Op0.getValueType(); - MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); +return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); +} - SDValue PromoteScalar = - DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); +//! Generate the borrow-generate shuffle mask +SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) { +SmallVector ShufBytes; - // Use a shuffle to zero extend the i32 to i64 directly: - SDValue shufMask; +// Create the shuffle mask for "rotating" the borrow up one register slot +// once the borrow is generated. +ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); +ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - switch (Op0VT.getSimpleVT()) { - default: - cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n"; - abort(); - /*NOTREACHED*/ - break; - case MVT::i32: - shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x00010203, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x08090a0b, MVT::i32)); - break; - - case MVT::i16: - shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x80800203, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x80800a0b, MVT::i32)); - break; - - case MVT::i8: - shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x80808003, MVT::i32), - DAG.getConstant(0x80808080, MVT::i32), - DAG.getConstant(0x8080800b, MVT::i32)); - break; - } - - SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, - PromoteScalar, PromoteScalar, shufMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, - DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); - } - - case ISD::ADD: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - SDValue CarryGen = - DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedCarry = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - CarryGen, CarryGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedCarry)); - } - - case ISD::SUB: { - // Turn operands into vectors to satisfy type checking (shufb works on - // vectors) - SDValue Op0 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); - SDValue Op1 = - DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - SDValue BorrowGen = - DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1); - SDValue ShiftedBorrow = - DAG.getNode(SPUISD::SHUFB, MVT::v2i64, - BorrowGen, BorrowGen, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, - DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, - Op0, Op1, ShiftedBorrow)); - } - } - - return SDValue(); +return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); } //! Lower byte immediate operations for v16i8 vectors: @@ -2576,11 +2488,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); - - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - return LowerI64Math(Op, DAG, Opc); - // i8, i64 math ops: case ISD::ADD: case ISD::SUB: @@ -2591,8 +2498,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SRA: { if (VT == MVT::i8) return LowerI8Math(Op, DAG, Opc, *this); - else if (VT == MVT::i64) - return LowerI64Math(Op, DAG, Opc); break; } @@ -2831,6 +2736,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const break; } } + // Otherwise, return unchanged. #ifndef NDEBUG if (Result.getNode()) { diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 24b8f82ecbe..a98a8f6bbef 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -52,10 +52,11 @@ namespace llvm { ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) SELB, ///< Select bits -> (b & mask) | (a & ~mask) - ADD_EXTENDED, ///< Add extended, with carry - CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED - SUB_EXTENDED, ///< Subtract extended, with borrow - BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED + // Markers: These aren't used to generate target-dependent nodes, but + // are used during instruction selection. + ADD64_MARKER, ///< i64 addition marker + SUB64_MARKER, ///< i64 subtraction marker + MUL64_MARKER, ///< i64 multiply marker LAST_SPUISD ///< Last user-defined instruction }; } @@ -74,6 +75,12 @@ namespace llvm { MVT ValueType); SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); + + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, + const SPUTargetMachine &TM); + + SDValue getBorrowGenerateShufMask(SelectionDAG &DAG); + SDValue getCarryGenerateShufMask(SelectionDAG &DAG); } class SPUTargetMachine; // forward dec'l. @@ -86,8 +93,18 @@ namespace llvm { SPUTargetMachine &SPUTM; public: + //! The venerable constructor + /*! + This is where the CellSPU backend sets operation handling (i.e., legal, + custom, expand or promote.) + */ SPUTargetLowering(SPUTargetMachine &TM); + //! Get the target machine + SPUTargetMachine &getSPUTargetMachine() { + return SPUTM; + } + /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index b9956402d95..b639ec254a9 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -705,17 +705,14 @@ class ADDXInst pattern>: class ADDXVecInst: ADDXInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [(set (vectype VECREG:$rT), - (SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB), - (vectype VECREG:$rCarry)))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; class ADDXRegInst: ADDXInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [(set rclass:$rT, - (SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; @@ -737,14 +734,12 @@ class CGInst pattern>: class CGVecInst: CGInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + [/* no pattern */]>; class CGRegInst: CGInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, - (SPUcarry_gen rclass:$rA, rclass:$rB))]>; + [/* no pattern */]>; multiclass CarryGenerate { def v2i64 : CGVecInst; @@ -765,17 +760,14 @@ class SFXInst pattern>: class SFXVecInst: SFXInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [(set (vectype VECREG:$rT), - (SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB), - (vectype VECREG:$rCarry)))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; class SFXRegInst: SFXInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [(set rclass:$rT, - (SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>, + [/* no pattern */]>, RegConstraint<"$rCarry = $rT">, NoEncode<"$rCarry">; @@ -797,14 +789,12 @@ class BGInst pattern>: class BGVecInst: BGInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + [/* no pattern */]>; class BGRegInst: BGInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, - (SPUborrow_gen rclass:$rA, rclass:$rB))]>; + [/* no pattern */]>; multiclass BorrowGenerate { def v4i32 : BGVecInst; @@ -894,7 +884,7 @@ class MPYAInst pattern>: "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, pattern>; -def MPYAvec: +def MPYAv4i32: MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), [(set (v4i32 VECREG:$rT), (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), @@ -939,7 +929,7 @@ class MPYSInst: "mpys\t$rT, $rA, $rB", IntegerMulDiv, [/* no pattern */]>; -def MPYSvec: +def MPYSv4i32: MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; def MPYSr16: @@ -972,14 +962,20 @@ def MPYHHAvec: def MPYHHAr32: MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; -// mpyhhu: Multiply high-high, unsigned +// mpyhhu: Multiply high-high, unsigned, e.g.: +// +// +-------+-------+ +-------+-------+ +---------+ +// | a0 . a1 | x | b0 . b1 | = | a0 x b0 | +// +-------+-------+ +-------+-------+ +---------+ +// +// where a0, b0 are the upper 16 bits of the 32-bit word class MPYHHUInst: RRForm<0b01110011110, OOL, IOL, "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, [/* no pattern */]>; -def MPYHHUvec: +def MPYHHUv4i32: MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; def MPYHHUr32: diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td index 38279a0a9f8..64548fd8c08 100644 --- a/lib/Target/CellSPU/SPUMathInstr.td +++ b/lib/Target/CellSPU/SPUMathInstr.td @@ -8,8 +8,6 @@ // // Any resemblance to libsimdmath or the Cell SDK simdmath library is // purely and completely coincidental. -// -// Primary author: Scott Michel (scottm@aero.org) //===----------------------------------------------------------------------===// //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index cae6023cd45..87c4115d1b1 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -61,18 +61,20 @@ def SPUselb_type: SDTypeProfile<1, 3, [ def SPUvecshift_type: SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisInt<2>]>; +// "marker" type for i64 operators that need a shuffle mask +// (i.e., uses cg or bg or another instruction that needs to +// use shufb to get things in the right place.) +// Op0: The result +// Op1, 2: LHS, RHS +// Op3: Carry-generate shuffle mask + +def SPUmarker_type : SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; + //===----------------------------------------------------------------------===// // Synthetic/pseudo-instructions //===----------------------------------------------------------------------===// -/// Add extended, carry generate: -def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>; -def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>; - -// Subtract extended, borrow generate -def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>; -def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>; - // SPU CNTB: def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; @@ -127,6 +129,12 @@ def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; // Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; +// i64 markers: supplies extra operands used to generate the i64 operator +// instruction sequences +def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>; +def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>; +def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>; + //===----------------------------------------------------------------------===// // Constraints: (taken from PPCInstrInfo.td) //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll index d118c5f88c5..dd6782772a5 100644 --- a/test/CodeGen/CellSPU/i64ops.ll +++ b/test/CodeGen/CellSPU/i64ops.ll @@ -2,9 +2,15 @@ ; RUN: grep xswd %t1.s | count 3 ; RUN: grep xsbh %t1.s | count 1 ; RUN: grep xshw %t1.s | count 2 -; RUN: grep shufb %t1.s | count 4 -; RUN: grep cg %t1.s | count 1 -; RUN: grep addx %t1.s | count 1 +; RUN: grep shufb %t1.s | count 7 +; RUN: grep cg %t1.s | count 4 +; RUN: grep addx %t1.s | count 4 +; RUN: grep fsmbi %t1.s | count 3 +; RUN: grep il %t1.s | count 2 +; RUN: grep mpy %t1.s | count 10 +; RUN: grep mpyh %t1.s | count 6 +; RUN: grep mpyhhu %t1.s | count 2 +; RUN: grep mpyu %t1.s | count 4 ; ModuleID = 'stores.bc' target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" @@ -44,3 +50,8 @@ define i64 @add_i64(i64 %a, i64 %b) nounwind { %1 = add i64 %a, %b ret i64 %1 } + +define i64 @mul_i64(i64 %a, i64 %b) nounwind { + %1 = mul i64 %a, %b + ret i64 %1 +} diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c index 7a4bf1ab0d2..b613bd872e2 100644 --- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c +++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c @@ -7,6 +7,7 @@ int64_t tval_c = 1234567890001LL; int64_t tval_d = 10001LL; int64_t tval_e = 10000LL; uint64_t tval_f = 0xffffff0750135eb9; +int64_t tval_g = -1; /* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ @@ -546,6 +547,12 @@ test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), in /* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ +int64_t i64_mul(int64_t a, int64_t b) { + return a * b; +} + +/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */ + int main(void) { @@ -553,12 +560,13 @@ main(void) const char *something_failed = " %d tests failed.\n"; const char *all_tests_passed = " All tests passed.\n"; - printf("tval_a = %20lld (0x%020llx)\n", tval_a, tval_a); - printf("tval_b = %20lld (0x%020llx)\n", tval_b, tval_b); - printf("tval_c = %20lld (0x%020llx)\n", tval_c, tval_c); - printf("tval_d = %20lld (0x%020llx)\n", tval_d, tval_d); - printf("tval_e = %20lld (0x%020llx)\n", tval_e, tval_e); - printf("tval_f = %20llu (0x%020llx)\n", tval_f, tval_f); + printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a); + printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b); + printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c); + printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d); + printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e); + printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f); + printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g); printf("----------------------------------------\n"); for (i = 0; i < ARR_SIZE(int64_preds); ++i) { @@ -649,5 +657,17 @@ main(void) printf("----------------------------------------\n"); + int64_t result; + + result = i64_mul(tval_g, tval_g); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result); + result = i64_mul(tval_d, tval_e); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result); + /* 0xba7a664f13077c9 */ + result = i64_mul(tval_a, tval_b); + printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result); + + printf("----------------------------------------\n"); + return 0; }