diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6631e4ab8f3..5ff23d1cef2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -584,10 +584,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; - case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; @@ -5667,50 +5664,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1)); - // This sequence changes FPSCR to do round-to-zero, adds the two halves - // of the long double, and puts FPSCR back the way it was. We do not - // actually model FPSCR. - std::vector NodeTys; - SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; - - NodeTys.push_back(MVT::f64); // Return register - NodeTys.push_back(MVT::Glue); // Returns a flag for later insns - Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); - MFFSreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(31, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(30, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); // result of add - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = Lo; - Ops[1] = Hi; - Ops[2] = InFlag; - Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); - FPreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); - Ops[0] = DAG.getConstant(1, MVT::i32); - Ops[1] = MFFSreg; - Ops[2] = FPreg; - Ops[3] = InFlag; - Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); - FPreg = Result.getValue(0); + // Add the two halves of the long double in round-to-zero mode. + SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // We know the low half is about to be thrown away, so just use something // convenient. @@ -6523,6 +6478,30 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); + } else if (MI->getOpcode() == PPC::FADDrtz) { + // This pseudo performs an FADD with rounding mode temporarily forced + // to round-to-zero. We emit this via custom inserter since the FPSCR + // is not modeled at the SelectionDAG level. + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src1 = MI->getOperand(1).getReg(); + unsigned Src2 = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); + + // Set rounding mode to round-to-zero. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); + + // Perform addition. + BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + + // Restore FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index da438a5aa7f..6296400ec71 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -142,26 +142,13 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, - // The following 5 instructions are used only as part of the - // long double-to-int conversion sequence. - - /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the - /// register. - MFFS, - - /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. - MTFSB0, - - /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. - MTFSB1, - - /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the - /// FPSCR-setting instructions. + /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding + /// towards zero. Used only as part of the long double-to-int + /// conversion sequence. FADDRTZ, - /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. - MTFSF, + /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. + MFFS, /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and /// reserve indexed. This is used to implement atomic operations. diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index c4fd1929502..129c5855aa0 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -676,14 +676,13 @@ class XFXForm_7_ext opcode, bits<10> xo, bits<10> spr, // This is probably 1.7.9, but I don't have the reference that uses this // numbering scheme... class XFLForm opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, - string cstr, InstrItinClass itin, listpattern> + InstrItinClass itin, listpattern> : I { bits<8> FM; bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; - let Constraints = cstr; let Inst{6} = 0; let Inst{7-14} = FM; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a847cd83ccc..aee41d3d7e0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -64,20 +64,13 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; -// This sequence is used for long double->int conversions. It changes the -// bits in the FPSCR which is not modelled. -def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, - [SDNPOutGlue]>; -def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, - [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, - SDTCisVT<3, f64>]>, - [SDNPInGlue]>; +// Extract FPSCR (not modeled at the DAG level). +def PPCmffs : SDNode<"PPCISD::MFFS", + SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; + +// Perform FADD in round-to-zero mode. +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; + def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -1288,27 +1281,23 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -// Instructions to manipulate FPSCR. Only long double handling uses these. -// FPSCR is not modelled; we use the SDNode Flag to keep things in order. +// Pseudo instruction to perform FADD in round-to-zero mode. +let usesCustomInserter = 1, Uses = [RM] in { + def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "", + [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; +} +// The above pseudo gets expanded to make use of the following instructions +// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, - [(PPCmtfsb0 (i32 imm:$FM))]>, + "mtfsb0 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, - [(PPCmtfsb1 (i32 imm:$FM))]>, + "mtfsb1 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; - // MTFSF does not actually produce an FP result. We pretend it copies - // input reg B to the output. If we didn't do this it would look like the - // instruction had no outputs (because we aren't modelling the FPSCR) and - // it would be deleted. - def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), - (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), - "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, - [(set f64:$FRA, (PPCmtfsf (i32 imm:$FM), - f64:$rT, f64:$FRB))]>, + def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT), + "mtfsf $FM, $rT", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { @@ -1316,11 +1305,6 @@ let Uses = [RM] in { "mffs $rT", IntMFFS, [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; - def FADDrtz: AForm_2<63, 21, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>, - PPC970_DGroup_Single, PPC970_Unit_FPU; }