From 6eaeff29b8a6990107735f7e5f5e49da38f56223 Mon Sep 17 00:00:00 2001 From: Dale Johannesen Date: Wed, 10 Oct 2007 01:01:31 +0000 Subject: [PATCH] Next PPC long double bits: ppcf128->i32 conversion. Surprisingly complicated. Adds getTargetNode for 2 outputs, no inputs (missing). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42822 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAG.h | 2 + lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 22 +++++++- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 +++ lib/Target/PowerPC/PPCISelLowering.cpp | 62 +++++++++++++++++++++++ lib/Target/PowerPC/PPCISelLowering.h | 23 ++++++++- lib/Target/PowerPC/PPCInstrFormats.td | 49 ++++++++++++++++++ lib/Target/PowerPC/PPCInstrInfo.td | 46 +++++++++++++++++ 7 files changed, 207 insertions(+), 3 deletions(-) diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index dd2e99d9022..731846d7600 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -402,6 +402,8 @@ public: SDOperand Op1, SDOperand Op2, SDOperand Op3); SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT, const SDOperand *Ops, unsigned NumOps); + SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2); SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, MVT::ValueType VT2, SDOperand Op1); SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 98a2495d28d..ce28c79dc9c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3324,10 +3324,18 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { case Expand: { // Convert f32 / f64 to i32 / i64. MVT::ValueType VT = Op.getValueType(); + MVT::ValueType OVT = Node->getOperand(0).getValueType(); + if (OVT == MVT::ppcf128 && VT == MVT::i32) { + Result = DAG.getNode(ISD::FP_TO_SINT, VT, + DAG.getNode(ISD::FP_ROUND, MVT::f64, + (DAG.getNode(ISD::FP_ROUND_INREG, + MVT::ppcf128, Node->getOperand(0), + DAG.getValueType(MVT::f64))))); + break; + } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; switch (Node->getOpcode()) { case ISD::FP_TO_SINT: { - MVT::ValueType OVT = Node->getOperand(0).getValueType(); if (OVT == MVT::f32) LC = (VT == MVT::i32) ? RTLIB::FPTOSINT_F32_I32 : RTLIB::FPTOSINT_F32_I64; @@ -3345,7 +3353,6 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } case ISD::FP_TO_UINT: { - MVT::ValueType OVT = Node->getOperand(0).getValueType(); if (OVT == MVT::f32) LC = (VT == MVT::i32) ? RTLIB::FPTOUINT_F32_I32 : RTLIB::FPTOSINT_F32_I64; @@ -5159,6 +5166,17 @@ void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){ switch (Node->getOpcode()) { case ISD::CopyFromReg: assert(0 && "CopyFromReg must be legal!"); + case ISD::FP_ROUND_INREG: + if (VT == MVT::ppcf128 && + TLI.getOperationAction(ISD::FP_ROUND_INREG, VT) == + TargetLowering::Custom) { + SDOperand Result = TLI.LowerOperation(Op, DAG); + assert(Result.Val->getOpcode() == ISD::BUILD_PAIR); + Lo = Result.Val->getOperand(0); + Hi = Result.Val->getOperand(1); + break; + } + // fall through default: #ifndef NDEBUG cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bb5f1b14ab2..2ac30f6f3da 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3032,6 +3032,12 @@ SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT, const SDOperand *Ops, unsigned NumOps) { return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops, NumOps).Val; } +SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, + MVT::ValueType VT2) { + const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2); + SDOperand Op; + return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, &Op, 0).Val; +} SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1, MVT::ValueType VT2, SDOperand Op1) { const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 20ec3c37304..4a206653d51 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -76,6 +76,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) setConvertAction(MVT::ppcf128, MVT::f64, Expand); setConvertAction(MVT::ppcf128, MVT::f32, Expand); + // This is used in the ppcf128->int sequence. Note it has different semantics + // from FP_ROUND: that rounds to nearest, this rounds to zero. + setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // PowerPC has no intrinsics for these particular operations setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); @@ -2079,6 +2082,64 @@ static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { return Bits; } +static SDOperand LowerFP_ROUND_INREG(SDOperand Op, SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::ppcf128); + SDNode *Node = Op.Val; + assert(Node->getOperand(0).getValueType() == MVT::ppcf128); + assert(Node->getOperand(0).Val->getOpcode()==ISD::BUILD_PAIR); + SDOperand Lo = Node->getOperand(0).Val->getOperand(0); + SDOperand Hi = Node->getOperand(0).Val->getOperand(1); + + // This sequence changes FPSCR to do round-to-zero, adds the two halves + // of the long double, and puts FPSCR back the way it was. We do not + // actually model FPSCR. + std::vector NodeTys; + SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg; + + NodeTys.push_back(MVT::f64); // Return register + NodeTys.push_back(MVT::Flag); // Returns a flag for later insns + Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); + MFFSreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = DAG.getConstant(31, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = DAG.getConstant(30, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); // result of add + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = Lo; + Ops[1] = Hi; + Ops[2] = InFlag; + Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); + FPreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); + Ops[0] = DAG.getConstant(1, MVT::i32); + Ops[1] = MFFSreg; + Ops[2] = FPreg; + Ops[3] = InFlag; + Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); + FPreg = Result.getValue(0); + + // We know the low half is about to be thrown away, so just use something + // convenient. + return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); +} + static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { if (Op.getOperand(0).getValueType() == MVT::i64) { SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); @@ -2935,6 +2996,7 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 33f63814bc0..01a35a84553 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -129,7 +129,28 @@ namespace llvm { /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. - LBRX + LBRX, + + // The following 5 instructions are used only as part of the + // long double-to-int conversion sequence. + + /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the + /// register. + MFFS, + + /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. + MTFSB0, + + /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. + MTFSB1, + + /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with + /// rounding towards zero. It has flags added so it won't move past the + /// FPSCR-setting instructions. + FADDRTZ, + + /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. + MTFSF }; } diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 865320c1d61..98e1e352661 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -315,6 +315,34 @@ class XForm_28 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, : XForm_base_r3xo { } +// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of +// numbers presumably relates to some document, but I haven't found it. +class XForm_42 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} +class XForm_43 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_base_r3xo { + let Pattern = pattern; + bits<5> FM; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FM; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + // DCB_Form - Form X instruction, used for dcb* instructions. class DCB_Form xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> @@ -513,6 +541,27 @@ class XFXForm_7_ext opcode, bits<10> xo, bits<10> spr, let SPR = spr; } +// XFL-Form - MTFSF +// This is probably 1.7.9, but I don't have the reference that uses this +// numbering scheme... +class XFLForm opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + string cstr, InstrItinClass itin, listpattern> + : I { + bits<8> FM; + bits<5> RT; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + let Constraints = cstr; + + let Inst{6} = 0; + let Inst{7-14} = FM; + let Inst{15} = 0; + let Inst{16-20} = RT; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + // 1.7.10 XS-Form - SRADI. class XSForm_1 opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 1591148b880..f62f7cb0e5e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,21 @@ def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain]>; +// This sequence is used for long double->int conversions. It changes the +// bits in the FPSCR which is not modelled. +def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, + [SDNPOutFlag]>; +def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPInFlag, SDNPOutFlag]>; +def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPInFlag, SDNPOutFlag]>; +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, + [SDNPInFlag, SDNPOutFlag]>; +def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, + [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, + SDTCisVT<3, f64>]>, + [SDNPInFlag]>; + def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, @@ -873,6 +888,37 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfcr $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; +// Instructions to manipulate FPSCR. Only long double handling uses these. +// FPSCR is not modelled; we use the SDNode Flag to keep things in order. + +def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), + "mffs $rT", IntMFFS, + [(set F8RC:$rT, (PPCmffs))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), + "mtfsb0 $FM", IntMTFSB0, + [(PPCmtfsb0 (i32 imm:$FM))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), + "mtfsb1 $FM", IntMTFSB0, + [(PPCmtfsb1 (i32 imm:$FM))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +def FADDrtz: AForm_2<63, 21, + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), + "fadd $FRT, $FRA, $FRB", FPGeneral, + [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +// MTFSF does not actually produce an FP result. We pretend it copies +// input reg B to the output. If we didn't do this it would look like the +// instruction had no outputs (because we aren't modelling the FPSCR) and +// it would be deleted. +def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), + (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), + "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, + [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), + F8RC:$rT, F8RC:$FRB))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + let PPC970_Unit = 1 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit