From 9c0c6b2e4a403454c7c5105e18d9ffe1eef2f498 Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Fri, 21 Nov 2008 02:56:16 +0000 Subject: [PATCH] CellSPU: (a) Fix bgs 3052, 3057 (b) Incorporate Duncan's suggestions re: i1 promotion (c) Indentation updates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@59790 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 58 +++++++++++++++++----- lib/Target/CellSPU/SPUISelLowering.cpp | 66 +++++++++++++------------- lib/Target/CellSPU/SPUInstrInfo.cpp | 4 +- lib/Target/CellSPU/SPUInstrInfo.td | 46 ++++++++++++++++-- test/CodeGen/CellSPU/loads.ll | 20 ++++++++ test/CodeGen/CellSPU/stores.ll | 22 +++++++++ 6 files changed, 165 insertions(+), 51 deletions(-) create mode 100644 test/CodeGen/CellSPU/loads.ll create mode 100644 test/CodeGen/CellSPU/stores.ll diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 144f5781cda..109cd5ee1ee 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -430,8 +430,8 @@ bool SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Index) { return DFormAddressPredicate(Op, N, Base, Index, - SPUFrameInfo::minFrameOffset(), - SPUFrameInfo::maxFrameOffset()); + SPUFrameInfo::minFrameOffset(), + SPUFrameInfo::maxFrameOffset()); } bool @@ -544,7 +544,35 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; + } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { + unsigned OpOpc = Op.getOpcode(); + + if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { + // Direct load/store without getelementptr + SDValue Addr, Offs; + + // Get the register from CopyFromReg + if (Opc == ISD::CopyFromReg) + Addr = N.getOperand(1); + else + Addr = N; // Register + + if (OpOpc == ISD::STORE) + Offs = Op.getOperand(3); + else + Offs = Op.getOperand(2); // LOAD + + if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { + if (Offs.getOpcode() == ISD::UNDEF) + Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); + + Base = Offs; + Index = Addr; + return true; + } + } } + return false; } @@ -554,21 +582,27 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, \arg Base The base pointer operand \arg Index The offset/index operand - If the address \a N can be expressed as a [r + s10imm] address, returns false. - Otherwise, creates two operands, Base and Index that will become the [r+r] - address. + If the address \a N can be expressed as an A-form or D-form address, returns + false. Otherwise, creates two operands, Base and Index that will become the + (r)(r) X-form address. */ bool SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Index) { - if (SelectAFormAddr(Op, N, Base, Index) - || SelectDFormAddr(Op, N, Base, Index)) - return false; + if (!SelectAFormAddr(Op, N, Base, Index) + && !SelectDFormAddr(Op, N, Base, Index)) { + // default form of a X-form address is r(r) in operands 0 and 1: + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); - // All else fails, punt and use an X-form address: - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; + if (Op0.getOpcode() == ISD::Register && Op1.getOpcode() == ISD::Register) { + Base = Op0; + Index = Op1; + return true; + } + } + + return false; } //! Convert the operand from a target-independent to a target-specific node diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 1cd00978eef..9f828b410b6 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -165,8 +165,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::STORE, VT, Custom); } - // Custom lower BRCOND for i1, i8 to "promote" the result to - // i32 and i16, respectively. + // Custom lower BRCOND for i8 to "promote" the result to i16 setOperationAction(ISD::BRCOND, MVT::Other, Custom); // Expand the jumptable branches @@ -215,7 +214,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SHL, MVT::i8, Custom); setOperationAction(ISD::SRL, MVT::i8, Custom); setOperationAction(ISD::SRA, MVT::i8, Custom); - // And SPU needs custom lowering for shift left/right for i64 + + // SPU needs custom lowering for shift left/right for i64 setOperationAction(ISD::SHL, MVT::i64, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); @@ -223,7 +223,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); setOperationAction(ISD::MUL, MVT::i32, Custom); - setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall + + // SMUL_LOHI, UMUL_LOHI + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); // Need to custom handle (some) common i8, i64 math ops setOperationAction(ISD::ADD, MVT::i64, Custom); @@ -247,13 +253,11 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: - setOperationAction(ISD::SELECT, MVT::i1, Promote); setOperationAction(ISD::SELECT, MVT::i8, Legal); setOperationAction(ISD::SELECT, MVT::i16, Legal); setOperationAction(ISD::SELECT, MVT::i32, Legal); setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SETCC, MVT::i1, Promote); setOperationAction(ISD::SETCC, MVT::i8, Legal); setOperationAction(ISD::SETCC, MVT::i16, Legal); setOperationAction(ISD::SETCC, MVT::i32, Legal); @@ -299,7 +303,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. - for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; ++sctype) { MVT VT = (MVT::SimpleValueType)sctype; @@ -699,8 +703,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { int chunk_offset, slot_offset; bool was16aligned; - // The vector type we really want to load from the 16-byte chunk, except - // in the case of MVT::i1, which has to be v16i8. + // The vector type we really want to load from the 16-byte chunk. MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); @@ -908,7 +911,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) +//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) { @@ -916,8 +919,8 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) MVT CondVT = Cond.getValueType(); MVT CondNVT; - if (CondVT == MVT::i1 || CondVT == MVT::i8) { - CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16); + if (CondVT == MVT::i8) { + CondNVT = MVT::i16; return DAG.getNode(ISD::BRCOND, Op.getValueType(), Op.getOperand(0), DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)), @@ -957,37 +960,37 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) switch (ObjectVT.getSimpleVT()) { default: { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << ObjectVT.getMVTString() - << "\n"; - abort(); + cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " + << ObjectVT.getMVTString() + << "\n"; + abort(); } case MVT::i8: - ArgRegClass = &SPU::R8CRegClass; - break; + ArgRegClass = &SPU::R8CRegClass; + break; case MVT::i16: - ArgRegClass = &SPU::R16CRegClass; - break; + ArgRegClass = &SPU::R16CRegClass; + break; case MVT::i32: - ArgRegClass = &SPU::R32CRegClass; - break; + ArgRegClass = &SPU::R32CRegClass; + break; case MVT::i64: - ArgRegClass = &SPU::R64CRegClass; - break; + ArgRegClass = &SPU::R64CRegClass; + break; case MVT::f32: - ArgRegClass = &SPU::R32FPRegClass; - break; + ArgRegClass = &SPU::R32FPRegClass; + break; case MVT::f64: - ArgRegClass = &SPU::R64FPRegClass; - break; + ArgRegClass = &SPU::R64FPRegClass; + break; case MVT::v2f64: case MVT::v4f32: case MVT::v2i64: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: - ArgRegClass = &SPU::VECREGRegClass; - break; + ArgRegClass = &SPU::VECREGRegClass; + break; } unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); @@ -2103,7 +2106,6 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // zero fill uppper part of preferred slot, don't care about the // other slots: unsigned int mask_val; - if (i <= prefslot_end) { mask_val = ((i < prefslot_begin) @@ -2884,7 +2886,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } } // Otherwise, return unchanged. -#if 1 +#ifdef NDEBUG if (Result.getNode()) { DEBUG(cerr << "\nReplace.SPU: "); DEBUG(N->dump(&DAG)); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index d10f2b8e52d..510b05091f7 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -161,7 +161,7 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case SPU::STQDr64: case SPU::STQDr32: case SPU::STQDr16: - // case SPU::STQDr8: + case SPU::STQDr8: case SPU::STQXv16i8: case SPU::STQXv8i16: case SPU::STQXv4i32: @@ -171,7 +171,7 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case SPU::STQXr64: case SPU::STQXr32: case SPU::STQXr16: - // case SPU::STQXr8: + case SPU::STQXr8: if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 17472a2a3ce..990865df04e 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -3494,26 +3494,62 @@ def FIf32 : "fi\t$rT, $rA, $rB", SPrecFP, [(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>; -// Floating Compare Equal +//-------------------------------------------------------------------------- +// Basic single precision floating point comparisons: +// +// Note: There is no support on SPU for single precision NaN. Consequently, +// ordered and unordered comparisons are the same. +//-------------------------------------------------------------------------- + def FCEQf32 : RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), "fceq\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setoeq R32FP:$rA, R32FP:$rB))]>; + [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>; + +def : Pat<(setoeq R32FP:$rA, R32FP:$rB), + (FCEQf32 R32FP:$rA, R32FP:$rB)>; def FCMEQf32 : RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), "fcmeq\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + +def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)), + (FCMEQf32 R32FP:$rA, R32FP:$rB)>; def FCGTf32 : RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), "fcgt\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setogt R32FP:$rA, R32FP:$rB))]>; + [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>; + +def : Pat<(setugt R32FP:$rA, R32FP:$rB), + (FCGTf32 R32FP:$rA, R32FP:$rB)>; def FCMGTf32 : RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), "fcmgt\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setogt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + +def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)), + (FCMGTf32 R32FP:$rA, R32FP:$rB)>; + +//-------------------------------------------------------------------------- +// Single precision floating point comparisons and SETCC equivalents: +//-------------------------------------------------------------------------- + +def : SETCCNegCondReg; +def : SETCCNegCondReg; + +def : SETCCBinOpReg; +def : SETCCBinOpReg; + +def : SETCCBinOpReg; +def : SETCCBinOpReg; + +def : Pat<(setule R32FP:$rA, R32FP:$rB), + (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; +def : Pat<(setole R32FP:$rA, R32FP:$rB), + (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; // FP Status and Control Register Write // Why isn't rT a don't care in the ISA? diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll new file mode 100644 index 00000000000..3b9746c8080 --- /dev/null +++ b/test/CodeGen/CellSPU/loads.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep {lqd.*0(\$3)} %t1.s | count 1 +; RUN: grep {lqd.*16(\$3)} %t1.s | count 1 + +; ModuleID = 'loads.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly { +entry: + %tmp1 = load <4 x float>* %a + ret <4 x float> %tmp1 +} + +define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1] + %tmp1 = load <4 x float>* %arrayidx ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp1 +} diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll new file mode 100644 index 00000000000..b9534abfc1a --- /dev/null +++ b/test/CodeGen/CellSPU/stores.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep {stqd.*0(\$3)} %t1.s | count 1 +; RUN: grep {stqd.*16(\$3)} %t1.s | count 1 +; RUN: grep 16256 %t1.s | count 1 +; RUN: grep 16384 %t1.s | count 1 + +; ModuleID = 'stores.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +define void @store_v4f32_1(<4 x float>* %a) nounwind { +entry: + store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a + ret void +} + +define void @store_v4f32_2(<4 x float>* %a) nounwind { +entry: + %arrayidx = getelementptr <4 x float>* %a, i32 1 + store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx + ret void +}