From 6eaeff29b8a6990107735f7e5f5e49da38f56223 Mon Sep 17 00:00:00 2001
From: Dale Johannesen <dalej@apple.com>
Date: Wed, 10 Oct 2007 01:01:31 +0000
Subject: [PATCH] Next PPC long double bits: ppcf128->i32 conversion.
 Surprisingly complicated. Adds getTargetNode for 2 outputs, no inputs
 (missing).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42822 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/SelectionDAG.h       |  2 +
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp  | 22 +++++++-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  6 +++
 lib/Target/PowerPC/PPCISelLowering.cpp    | 62 +++++++++++++++++++++++
 lib/Target/PowerPC/PPCISelLowering.h      | 23 ++++++++-
 lib/Target/PowerPC/PPCInstrFormats.td     | 49 ++++++++++++++++++
 lib/Target/PowerPC/PPCInstrInfo.td        | 46 +++++++++++++++++
 7 files changed, 207 insertions(+), 3 deletions(-)

diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index dd2e99d9022..731846d7600 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -402,6 +402,8 @@ public:
                         SDOperand Op1, SDOperand Op2, SDOperand Op3);
   SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT,
                         const SDOperand *Ops, unsigned NumOps);
+  SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                        MVT::ValueType VT2);
   SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1,
                         MVT::ValueType VT2, SDOperand Op1);
   SDNode *getTargetNode(unsigned Opcode, MVT::ValueType VT1,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 98a2495d28d..ce28c79dc9c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3324,10 +3324,18 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
     case Expand: {
       // Convert f32 / f64 to i32 / i64.
       MVT::ValueType VT = Op.getValueType();
+      MVT::ValueType OVT = Node->getOperand(0).getValueType();
+      if (OVT == MVT::ppcf128 && VT == MVT::i32) {
+        Result = DAG.getNode(ISD::FP_TO_SINT, VT,
+                             DAG.getNode(ISD::FP_ROUND, MVT::f64,
+                                         (DAG.getNode(ISD::FP_ROUND_INREG, 
+                                          MVT::ppcf128, Node->getOperand(0),
+                                          DAG.getValueType(MVT::f64)))));
+        break;
+      }
       RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
       switch (Node->getOpcode()) {
       case ISD::FP_TO_SINT: {
-        MVT::ValueType OVT = Node->getOperand(0).getValueType();
         if (OVT == MVT::f32)
           LC = (VT == MVT::i32)
             ? RTLIB::FPTOSINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
@@ -3345,7 +3353,6 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
         break;
       }
       case ISD::FP_TO_UINT: {
-        MVT::ValueType OVT = Node->getOperand(0).getValueType();
         if (OVT == MVT::f32)
           LC = (VT == MVT::i32)
             ? RTLIB::FPTOUINT_F32_I32 : RTLIB::FPTOSINT_F32_I64;
@@ -5159,6 +5166,17 @@ void SelectionDAGLegalize::ExpandOp(SDOperand Op, SDOperand &Lo, SDOperand &Hi){
   switch (Node->getOpcode()) {
   case ISD::CopyFromReg:
     assert(0 && "CopyFromReg must be legal!");
+  case ISD::FP_ROUND_INREG:
+    if (VT == MVT::ppcf128 && 
+        TLI.getOperationAction(ISD::FP_ROUND_INREG, VT) == 
+            TargetLowering::Custom) {
+      SDOperand Result = TLI.LowerOperation(Op, DAG);
+      assert(Result.Val->getOpcode() == ISD::BUILD_PAIR);
+      Lo = Result.Val->getOperand(0);
+      Hi = Result.Val->getOperand(1);
+      break;
+    }
+    // fall through
   default:
 #ifndef NDEBUG
     cerr << "NODE: "; Node->dump(&DAG); cerr << "\n";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bb5f1b14ab2..2ac30f6f3da 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3032,6 +3032,12 @@ SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT,
                                     const SDOperand *Ops, unsigned NumOps) {
   return getNode(ISD::BUILTIN_OP_END+Opcode, VT, Ops, NumOps).Val;
 }
+SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
+                                    MVT::ValueType VT2) {
+  const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
+  SDOperand Op;
+  return getNode(ISD::BUILTIN_OP_END+Opcode, VTs, 2, &Op, 0).Val;
+}
 SDNode *SelectionDAG::getTargetNode(unsigned Opcode, MVT::ValueType VT1,
                                     MVT::ValueType VT2, SDOperand Op1) {
   const MVT::ValueType *VTs = getNodeValueTypes(VT1, VT2);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 20ec3c37304..4a206653d51 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -76,6 +76,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)
   setConvertAction(MVT::ppcf128, MVT::f64, Expand);
   setConvertAction(MVT::ppcf128, MVT::f32, Expand);
+  // This is used in the ppcf128->int sequence.  Note it has different semantics
+  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
+  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
   // PowerPC has no intrinsics for these particular operations
   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
@@ -2079,6 +2082,64 @@ static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
   return Bits;
 }
 
+static SDOperand LowerFP_ROUND_INREG(SDOperand Op, SelectionDAG &DAG) {
+  assert(Op.getValueType() == MVT::ppcf128);
+  SDNode *Node = Op.Val;
+  assert(Node->getOperand(0).getValueType() == MVT::ppcf128);
+  assert(Node->getOperand(0).Val->getOpcode()==ISD::BUILD_PAIR);
+  SDOperand Lo = Node->getOperand(0).Val->getOperand(0);
+  SDOperand Hi = Node->getOperand(0).Val->getOperand(1);
+
+  // This sequence changes FPSCR to do round-to-zero, adds the two halves
+  // of the long double, and puts FPSCR back the way it was.  We do not
+  // actually model FPSCR.
+  std::vector<MVT::ValueType> NodeTys;
+  SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+  NodeTys.push_back(MVT::f64);   // Return register
+  NodeTys.push_back(MVT::Flag);    // Returns a flag for later insns
+  Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0);
+  MFFSreg = Result.getValue(0);
+  InFlag = Result.getValue(1);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::Flag);   // Returns a flag
+  Ops[0] = DAG.getConstant(31, MVT::i32);
+  Ops[1] = InFlag;
+  Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2);
+  InFlag = Result.getValue(0);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::Flag);   // Returns a flag
+  Ops[0] = DAG.getConstant(30, MVT::i32);
+  Ops[1] = InFlag;
+  Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2);
+  InFlag = Result.getValue(0);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::f64);    // result of add
+  NodeTys.push_back(MVT::Flag);   // Returns a flag
+  Ops[0] = Lo;
+  Ops[1] = Hi;
+  Ops[2] = InFlag;
+  Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3);
+  FPreg = Result.getValue(0);
+  InFlag = Result.getValue(1);
+
+  NodeTys.clear();
+  NodeTys.push_back(MVT::f64);
+  Ops[0] = DAG.getConstant(1, MVT::i32);
+  Ops[1] = MFFSreg;
+  Ops[2] = FPreg;
+  Ops[3] = InFlag;
+  Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4);
+  FPreg = Result.getValue(0);
+
+  // We know the low half is about to be thrown away, so just use something
+  // convenient.
+  return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg);
+}
+
 static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
   if (Op.getOperand(0).getValueType() == MVT::i64) {
     SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
@@ -2935,6 +2996,7 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::FP_ROUND_INREG:     return LowerFP_ROUND_INREG(Op, DAG);
 
   // Lower 64-bit shifts.
   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 33f63814bc0..01a35a84553 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -129,7 +129,28 @@ namespace llvm {
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
       /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
       /// or i32.
-      LBRX
+      LBRX,
+
+      // The following 5 instructions are used only as part of the
+      // long double-to-int conversion sequence.
+
+      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+      /// register.
+      MFFS,
+
+      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+      MTFSB0,
+
+      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+      MTFSB1,
+
+      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+      /// rounding towards zero.  It has flags added so it won't move past the 
+      /// FPSCR-setting instructions.
+      FADDRTZ,
+
+      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+      MTFSF
     };
   }
 
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 865320c1d61..98e1e352661 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -315,6 +315,34 @@ class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
 }
 
+// This is used for MFFS, MTFSB0, MTFSB1.  42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+  bits<5> FM;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FM;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // DCB_Form - Form X instruction, used for dcb* instructions.
 class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, 
                       InstrItinClass itin, list<dag> pattern>
@@ -513,6 +541,27 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
   let SPR = spr;
 }
 
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      string cstr, InstrItinClass itin, list<dag>pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<8> FM;
+  bits<5> RT;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+  let Constraints = cstr;
+
+  let Inst{6} = 0;
+  let Inst{7-14}  = FM;
+  let Inst{15} = 0;
+  let Inst{16-20} = RT;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
 // 1.7.10 XS-Form - SRADI.
 class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1591148b880..f62f7cb0e5e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -53,6 +53,21 @@ def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
 def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
 def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain]>;
 
+// This sequence is used for long double->int conversions.  It changes the
+// bits in the FPSCR which is not modelled.  
+def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+                        [SDNPOutFlag]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInFlag, SDNPOutFlag]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+                       [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
+                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+                        SDTCisVT<3, f64>]>,
+                       [SDNPInFlag]>;
+
 def PPCfsel   : SDNode<"PPCISD::FSEL",  
    // Type constraint for fsel.
    SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 
@@ -873,6 +888,37 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "mfcr $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
 
+// Instructions to manipulate FPSCR.  Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
+                       "mffs $rT", IntMFFS,
+                       [(set F8RC:$rT, (PPCmffs))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+                       "mtfsb0 $FM", IntMTFSB0,
+                      [(PPCmtfsb0 (i32 imm:$FM))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+                       "mtfsb1 $FM", IntMTFSB0,
+                      [(PPCmtfsb1 (i32 imm:$FM))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+def FADDrtz: AForm_2<63, 21,
+                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                    "fadd $FRT, $FRA, $FRB", FPGeneral,
+                    [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+// MTFSF does not actually produce an FP result.  We pretend it copies
+// input reg B to the output.  If we didn't do this it would look like the
+// instruction had no outputs (because we aren't modelling the FPSCR) and
+// it would be deleted.
+def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
+                              (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+                       "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+                       [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), 
+                                                   F8RC:$rT, F8RC:$FRB))]>,
+             PPC970_DGroup_Single, PPC970_Unit_FPU;
+
 let PPC970_Unit = 1 in {  // FXU Operations.
 
 // XO-Form instructions.  Arithmetic instructions that can set overflow bit