From fb11288109329cb736d9f49769581a0d0c23fe19 Mon Sep 17 00:00:00 2001
From: Evan Cheng <evan.cheng@apple.com>
Date: Mon, 23 Mar 2009 08:01:15 +0000
Subject: [PATCH] Model inline asm constraint which ties an input to an output
 register as machine operand TIED_TO constraint. This eliminated the need to
 pre-allocate registers for these. This also allows register allocator can
 eliminate the unneeded copies.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67512 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/InlineAsm.h                      | 15 +++--
 lib/CodeGen/LiveIntervalAnalysis.cpp          | 10 +--
 lib/CodeGen/MachineInstr.cpp                  | 61 ++++++++++++++++++-
 .../SelectionDAG/SelectionDAGBuild.cpp        | 50 +++++++--------
 lib/CodeGen/TwoAddressInstructionPass.cpp     |  9 +--
 lib/Target/X86/X86FloatingPoint.cpp           | 32 +++++++++-
 .../X86/2008-02-20-InlineAsmClobber.ll        |  4 +-
 test/CodeGen/X86/2008-09-18-inline-asm-2.ll   |  4 +-
 .../CodeGen/X86/2008-12-19-EarlyClobberBug.ll |  3 +-
 test/CodeGen/X86/inline-asm-2addr.ll          |  9 +++
 10 files changed, 144 insertions(+), 53 deletions(-)
 create mode 100644 test/CodeGen/X86/inline-asm-2addr.ll

diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index 6347cae51f4..84292cf19e3 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -135,14 +135,13 @@ public:
     return (Flag & 0xffff) >> 3;
   }
 
-  /// isOutputOperandTiedToUse - Return true if the flag of the inline asm
-  /// operand indicates it is an output that's matched to an input operand.
-  static bool isOutputOperandTiedToUse(unsigned Flag, unsigned &UseIdx) {
-    if (Flag & 0x80000000) {
-      UseIdx = Flag >> 16;
-      return true;
-    }
-    return false;
+  /// isUseOperandTiedToDef - Return true if the flag of the inline asm
+  /// operand indicates it is an use operand that's matched to a def operand.
+  static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) {
+    if ((Flag & 0x80000000) == 0)
+      return false;
+    Idx = (Flag & ~0x80000000) >> 16;
+    return true;
   }
 
 
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 29be67bf605..b5c21952fa3 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -477,8 +477,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       assert(interval.containsOneValue());
       unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def);
       unsigned RedefIndex = getDefIndex(MIIdx);
-      // It cannot be an early clobber MO.
-      assert(!MO.isEarlyClobber() && "Unexpected early clobber!");
+      if (MO.isEarlyClobber())
+        RedefIndex = getUseIndex(MIIdx);
 
       const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1);
       VNInfo *OldValNo = OldLR->valno;
@@ -499,6 +499,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
       OldValNo->copy = 0;
+      if (MO.isEarlyClobber())
+        OldValNo->redefByEC = true;
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -546,8 +548,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
       unsigned defIndex = getDefIndex(MIIdx);
-      // It cannot be an early clobber MO.
-      assert(!MO.isEarlyClobber() && "Unexpected early clobber!");
+      if (MO.isEarlyClobber())
+        defIndex = getUseIndex(MIIdx);
       
       VNInfo *ValNo;
       MachineInstr *CopyMI = NULL;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index cc85b80a178..965c2a2c5d2 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -11,8 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Value.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -692,6 +693,35 @@ int MachineInstr::findFirstPredOperandIdx() const {
 /// isRegReDefinedByTwoAddr - Given the index of a register operand,
 /// check if the register def is a re-definition due to two addr elimination.
 bool MachineInstr::isRegReDefinedByTwoAddr(unsigned DefIdx) const{
+  if (getOpcode() == TargetInstrInfo::INLINEASM) {
+    assert(DefIdx >= 2);
+    const MachineOperand &MO = getOperand(DefIdx);
+    if (!MO.isReg() || !MO.isDef())
+      return false;
+    // Determine the actual operand no corresponding to this index.
+    unsigned DefNo = 0;
+    for (unsigned i = 1, e = getNumOperands(); i < e; ) {
+      const MachineOperand &FMO = getOperand(i);
+      assert(FMO.isImm());
+      // Skip over this def.
+      i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+      if (i > DefIdx)
+        break;
+      ++DefNo;
+    }
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      const MachineOperand &FMO = getOperand(i);
+      if (!FMO.isImm())
+        continue;
+      if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+        continue;
+      unsigned Idx;
+      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && 
+          Idx == DefNo)
+        return true;
+    }
+  }
+
   assert(getOperand(DefIdx).isDef() && "DefIdx is not a def!");
   const TargetInstrDesc &TID = getDesc();
   for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
@@ -707,6 +737,35 @@ bool MachineInstr::isRegReDefinedByTwoAddr(unsigned DefIdx) const{
 /// is a register use and it is tied to an def operand. It also returns the def
 /// operand index by reference.
 bool MachineInstr::isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx){
+  if (getOpcode() == TargetInstrInfo::INLINEASM) {
+    const MachineOperand &MO = getOperand(UseOpIdx);
+    if (!MO.isReg() || !MO.isUse())
+      return false;
+    assert(UseOpIdx > 0);
+    const MachineOperand &UFMO = getOperand(UseOpIdx-1);
+    if (!UFMO.isImm())
+      return false;  // Must be physreg uses.
+    unsigned DefNo;
+    if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+      if (!DefOpIdx)
+        return true;
+
+      unsigned DefIdx = 1;
+      // Remember to adjust the index. First operand is asm string, then there
+      // is a flag for each.
+      while (DefNo) {
+        const MachineOperand &FMO = getOperand(DefIdx);
+        assert(FMO.isImm());
+        // Skip over this def.
+        DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+        --DefNo;
+      }
+      *DefOpIdx = DefIdx+1;
+      return true;
+    }
+    return false;
+  }
+
   const TargetInstrDesc &TID = getDesc();
   if (UseOpIdx >= TID.getNumOperands())
     return false;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index cbfdb793ede..90ebc4a84ed 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -4932,28 +4932,17 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
   std::vector<unsigned> RegClassRegs;
   const TargetRegisterClass *RC = PhysReg.second;
   if (RC) {
-    // If this is a tied register, our regalloc doesn't know how to maintain
-    // the constraint, so we have to pick a register to pin the input/output to.
-    // If it isn't a matched constraint, go ahead and create vreg and let the
-    // regalloc do its thing.
-    if (!OpInfo.hasMatchingInput()) {
-      RegVT = *PhysReg.second->vt_begin();
-      if (OpInfo.ConstraintVT == MVT::Other)
-        ValueVT = RegVT;
+    RegVT = *PhysReg.second->vt_begin();
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = RegVT;
 
-      // Create the appropriate number of virtual registers.
-      MachineRegisterInfo &RegInfo = MF.getRegInfo();
-      for (; NumRegs; --NumRegs)
-        Regs.push_back(RegInfo.createVirtualRegister(PhysReg.second));
-
-      OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
-      return;
-    }
-
-    // Otherwise, we can't allocate it.  Let the code below figure out how to
-    // maintain these constraints.
-    RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end());
+    // Create the appropriate number of virtual registers.
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    for (; NumRegs; --NumRegs)
+      Regs.push_back(RegInfo.createVirtualRegister(PhysReg.second));
 
+    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+    return;
   } else {
     // This is a reference to a register class that doesn't directly correspond
     // to an LLVM register class.  Allocate NumRegs consecutive, available,
@@ -5237,8 +5226,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
                                                6 /* EARLYCLOBBER REGDEF */ :
                                                2 /* REGDEF */ ,
-                                               OpInfo.hasMatchingInput(),
-                                               OpInfo.MatchingInput,
+                                               false,
+                                               0,
                                                DAG, AsmNodeOperands);
       break;
     }
@@ -5272,18 +5261,19 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
           RegsForValue MatchedRegs;
           MatchedRegs.TLI = &TLI;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
-          MatchedRegs.RegVTs.push_back(AsmNodeOperands[CurOp+1].getValueType());
+          MVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MatchedRegs.RegVTs.push_back(RegVT);
+          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
-               i != e; ++i) {
-            unsigned Reg =
-              cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg();
-            MatchedRegs.Regs.push_back(Reg);
-          }
+               i != e; ++i)
+            MatchedRegs.Regs.
+              push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
 
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                     Chain, &Flag);
-          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, false, 0,
+          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+                                           true, OpInfo.getMatchedOperand(),
                                            DAG, AsmNodeOperands);
           break;
         } else {
@@ -5291,6 +5281,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
           assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
                  "Unexpected number of operands");
           // Add information to the INLINEASM node to know about this input.
+          // See InlineAsm.h isUseOperandTiedToDef.
+          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
           AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
                                                           TLI.getPointerTy()));
           AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 3b923ef8db5..4aa143ea58c 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -634,7 +634,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
       ProcessCopy(&*mi, &*mbbi, Processed);
 
-      for (unsigned si = 1, e = TID.getNumOperands(); si < e; ++si) {
+      unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM)
+        ? mi->getNumOperands() : TID.getNumOperands();
+      for (unsigned si = 0; si < NumOps; ++si) {
         unsigned ti = 0;
         if (!mi->isRegTiedToDefOperand(si, &ti))
           continue;
@@ -660,8 +662,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           unsigned regA = mi->getOperand(ti).getReg();
           unsigned regB = mi->getOperand(si).getReg();
 
-          assert(TargetRegisterInfo::isVirtualRegister(regA) &&
-                 TargetRegisterInfo::isVirtualRegister(regB) &&
+          assert(TargetRegisterInfo::isVirtualRegister(regB) &&
                  "cannot update physical register live information");
 
 #ifndef NDEBUG
@@ -753,7 +754,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           }
 
         InstructionRearranged:
-          const TargetRegisterClass* rc = MRI->getRegClass(regA);
+          const TargetRegisterClass* rc = MRI->getRegClass(regB);
           MachineInstr *DefMI = MRI->getVRegDef(regB);
           // If it's safe and profitable, remat the definition instead of
           // copying it.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index e0dbf995d84..8862428b226 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1017,9 +1017,37 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   case X86::MOV_Fp8032:
   case X86::MOV_Fp8064: 
   case X86::MOV_Fp8080: {
-    unsigned SrcReg = getFPReg(MI->getOperand(1));
-    unsigned DestReg = getFPReg(MI->getOperand(0));
+    const MachineOperand &MO1 = MI->getOperand(1);
+    unsigned SrcReg = getFPReg(MO1);
 
+    const MachineOperand &MO0 = MI->getOperand(0);
+    // These can be created due to inline asm. Two address pass can introduce
+    // copies from RFP registers to virtual registers.
+    if (MO0.getReg() == X86::ST0 && SrcReg == 0) {
+      assert(MO1.isKill());
+      // Treat %ST0<def> = MOV_Fp8080 %FP0<kill>
+      // like  FpSET_ST0_80 %FP0<kill>, %ST0<imp-def>
+      assert((StackTop == 1 || StackTop == 2)
+             && "Stack should have one or two element on it to return!");
+      --StackTop;   // "Forget" we have something on the top of stack!
+      break;
+    } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) {
+      assert(MO1.isKill());
+      // Treat %ST1<def> = MOV_Fp8080 %FP1<kill>
+      // like  FpSET_ST1_80 %FP0<kill>, %ST1<imp-def>
+      // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
+      if (StackTop == 1) {
+        BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
+        NumFXCH++;
+        StackTop = 0;
+        break;
+      }
+      assert(StackTop == 2 && "Stack should have two element on it to return!");
+      --StackTop;   // "Forget" we have something on the top of stack!
+      break;
+    }
+
+    unsigned DestReg = getFPReg(MO0);
     if (MI->killsRegister(X86::FP0+SrcReg)) {
       // If the input operand is killed, we can just change the owner of the
       // incoming stack slot into the result.
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index d49d5bf9f9b..557d00c6293 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc | grep {a: %ecx %ecx}
-; RUN: llvm-as < %s | llc | grep {b: %ecx %edx %ecx}
+; RUN: llvm-as < %s | llc | grep {a:} | not grep ax
+; RUN: llvm-as < %s | llc | grep {b:} | not grep ax
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index a5a526354a7..62e3233f9b3 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,5 +1,5 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %eax %edx 8(%esi) %ebx (%edi)"
-; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%ecx %eax %edx 8(%edi) %ebx (%esi)"
+; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)"
+; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)"
 ; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
 ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
 ; operand.  There are many combinations that work; this is what llc puts out now.
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 6442ac7c765..c7fdfb26920 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,5 +1,6 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 1 | grep {movl.*%ecx}
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 2 | grep mov
 ; PR3149
+; Make sure the copy after inline asm is not coalesced away.
 
 @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
new file mode 100644
index 00000000000..119495372a7
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-2addr.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | count 1
+
+define i64 @t(i64 %a, i64 %b) nounwind ssp {
+entry:
+	%asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind		; <i64> [#uses=1]
+	%asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind		; <i64> [#uses=1]
+	%0 = add i64 %asmtmp1, %asmtmp		; <i64> [#uses=1]
+	ret i64 %0
+}