From fb11288109329cb736d9f49769581a0d0c23fe19 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 23 Mar 2009 08:01:15 +0000 Subject: [PATCH] Model inline asm constraint which ties an input to an output register as machine operand TIED_TO constraint. This eliminated the need to pre-allocate registers for these. This also allows register allocator can eliminate the unneeded copies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67512 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/InlineAsm.h | 15 +++-- lib/CodeGen/LiveIntervalAnalysis.cpp | 10 +-- lib/CodeGen/MachineInstr.cpp | 61 ++++++++++++++++++- .../SelectionDAG/SelectionDAGBuild.cpp | 50 +++++++-------- lib/CodeGen/TwoAddressInstructionPass.cpp | 9 +-- lib/Target/X86/X86FloatingPoint.cpp | 32 +++++++++- .../X86/2008-02-20-InlineAsmClobber.ll | 4 +- test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 4 +- .../CodeGen/X86/2008-12-19-EarlyClobberBug.ll | 3 +- test/CodeGen/X86/inline-asm-2addr.ll | 9 +++ 10 files changed, 144 insertions(+), 53 deletions(-) create mode 100644 test/CodeGen/X86/inline-asm-2addr.ll diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h index 6347cae51f4..84292cf19e3 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/InlineAsm.h @@ -135,14 +135,13 @@ public: return (Flag & 0xffff) >> 3; } - /// isOutputOperandTiedToUse - Return true if the flag of the inline asm - /// operand indicates it is an output that's matched to an input operand. - static bool isOutputOperandTiedToUse(unsigned Flag, unsigned &UseIdx) { - if (Flag & 0x80000000) { - UseIdx = Flag >> 16; - return true; - } - return false; + /// isUseOperandTiedToDef - Return true if the flag of the inline asm + /// operand indicates it is an use operand that's matched to a def operand. + static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) { + if ((Flag & 0x80000000) == 0) + return false; + Idx = (Flag & ~0x80000000) >> 16; + return true; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 29be67bf605..b5c21952fa3 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -477,8 +477,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, assert(interval.containsOneValue()); unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def); unsigned RedefIndex = getDefIndex(MIIdx); - // It cannot be an early clobber MO. - assert(!MO.isEarlyClobber() && "Unexpected early clobber!"); + if (MO.isEarlyClobber()) + RedefIndex = getUseIndex(MIIdx); const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1); VNInfo *OldValNo = OldLR->valno; @@ -499,6 +499,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; OldValNo->copy = 0; + if (MO.isEarlyClobber()) + OldValNo->redefByEC = true; // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -546,8 +548,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live until the end of the block. We've already taken care of the // rest of the live range. unsigned defIndex = getDefIndex(MIIdx); - // It cannot be an early clobber MO. - assert(!MO.isEarlyClobber() && "Unexpected early clobber!"); + if (MO.isEarlyClobber()) + defIndex = getUseIndex(MIIdx); VNInfo *ValNo; MachineInstr *CopyMI = NULL; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index cc85b80a178..965c2a2c5d2 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Constants.h" +#include "llvm/InlineAsm.h" #include "llvm/Value.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -692,6 +693,35 @@ int MachineInstr::findFirstPredOperandIdx() const { /// isRegReDefinedByTwoAddr - Given the index of a register operand, /// check if the register def is a re-definition due to two addr elimination. bool MachineInstr::isRegReDefinedByTwoAddr(unsigned DefIdx) const{ + if (getOpcode() == TargetInstrInfo::INLINEASM) { + assert(DefIdx >= 2); + const MachineOperand &MO = getOperand(DefIdx); + if (!MO.isReg() || !MO.isDef()) + return false; + // Determine the actual operand no corresponding to this index. + unsigned DefNo = 0; + for (unsigned i = 1, e = getNumOperands(); i < e; ) { + const MachineOperand &FMO = getOperand(i); + assert(FMO.isImm()); + // Skip over this def. + i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + if (i > DefIdx) + break; + ++DefNo; + } + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &FMO = getOperand(i); + if (!FMO.isImm()) + continue; + if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) + continue; + unsigned Idx; + if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && + Idx == DefNo) + return true; + } + } + assert(getOperand(DefIdx).isDef() && "DefIdx is not a def!"); const TargetInstrDesc &TID = getDesc(); for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { @@ -707,6 +737,35 @@ bool MachineInstr::isRegReDefinedByTwoAddr(unsigned DefIdx) const{ /// is a register use and it is tied to an def operand. It also returns the def /// operand index by reference. bool MachineInstr::isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx){ + if (getOpcode() == TargetInstrInfo::INLINEASM) { + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse()) + return false; + assert(UseOpIdx > 0); + const MachineOperand &UFMO = getOperand(UseOpIdx-1); + if (!UFMO.isImm()) + return false; // Must be physreg uses. + unsigned DefNo; + if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { + if (!DefOpIdx) + return true; + + unsigned DefIdx = 1; + // Remember to adjust the index. First operand is asm string, then there + // is a flag for each. + while (DefNo) { + const MachineOperand &FMO = getOperand(DefIdx); + assert(FMO.isImm()); + // Skip over this def. + DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; + --DefNo; + } + *DefOpIdx = DefIdx+1; + return true; + } + return false; + } + const TargetInstrDesc &TID = getDesc(); if (UseOpIdx >= TID.getNumOperands()) return false; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index cbfdb793ede..90ebc4a84ed 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -4932,28 +4932,17 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::vector RegClassRegs; const TargetRegisterClass *RC = PhysReg.second; if (RC) { - // If this is a tied register, our regalloc doesn't know how to maintain - // the constraint, so we have to pick a register to pin the input/output to. - // If it isn't a matched constraint, go ahead and create vreg and let the - // regalloc do its thing. - if (!OpInfo.hasMatchingInput()) { - RegVT = *PhysReg.second->vt_begin(); - if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = RegVT; + RegVT = *PhysReg.second->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; - // Create the appropriate number of virtual registers. - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - for (; NumRegs; --NumRegs) - Regs.push_back(RegInfo.createVirtualRegister(PhysReg.second)); - - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); - return; - } - - // Otherwise, we can't allocate it. Let the code below figure out how to - // maintain these constraints. - RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end()); + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(PhysReg.second)); + OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + return; } else { // This is a reference to a register class that doesn't directly correspond // to an LLVM register class. Allocate NumRegs consecutive, available, @@ -5237,8 +5226,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? 6 /* EARLYCLOBBER REGDEF */ : 2 /* REGDEF */ , - OpInfo.hasMatchingInput(), - OpInfo.MatchingInput, + false, + 0, DAG, AsmNodeOperands); break; } @@ -5272,18 +5261,19 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); - MatchedRegs.RegVTs.push_back(AsmNodeOperands[CurOp+1].getValueType()); + MVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); - i != e; ++i) { - unsigned Reg = - cast(AsmNodeOperands[++CurOp])->getReg(); - MatchedRegs.Regs.push_back(Reg); - } + i != e; ++i) + MatchedRegs.Regs. + push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, false, 0, + MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); break; } else { @@ -5291,6 +5281,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3b923ef8db5..4aa143ea58c 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -634,7 +634,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { ProcessCopy(&*mi, &*mbbi, Processed); - for (unsigned si = 1, e = TID.getNumOperands(); si < e; ++si) { + unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) + ? mi->getNumOperands() : TID.getNumOperands(); + for (unsigned si = 0; si < NumOps; ++si) { unsigned ti = 0; if (!mi->isRegTiedToDefOperand(si, &ti)) continue; @@ -660,8 +662,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { unsigned regA = mi->getOperand(ti).getReg(); unsigned regB = mi->getOperand(si).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(regA) && - TargetRegisterInfo::isVirtualRegister(regB) && + assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot update physical register live information"); #ifndef NDEBUG @@ -753,7 +754,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } InstructionRearranged: - const TargetRegisterClass* rc = MRI->getRegClass(regA); + const TargetRegisterClass* rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index e0dbf995d84..8862428b226 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1017,9 +1017,37 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::MOV_Fp8032: case X86::MOV_Fp8064: case X86::MOV_Fp8080: { - unsigned SrcReg = getFPReg(MI->getOperand(1)); - unsigned DestReg = getFPReg(MI->getOperand(0)); + const MachineOperand &MO1 = MI->getOperand(1); + unsigned SrcReg = getFPReg(MO1); + const MachineOperand &MO0 = MI->getOperand(0); + // These can be created due to inline asm. Two address pass can introduce + // copies from RFP registers to virtual registers. + if (MO0.getReg() == X86::ST0 && SrcReg == 0) { + assert(MO1.isKill()); + // Treat %ST0 = MOV_Fp8080 %FP0 + // like FpSET_ST0_80 %FP0, %ST0 + assert((StackTop == 1 || StackTop == 2) + && "Stack should have one or two element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) { + assert(MO1.isKill()); + // Treat %ST1 = MOV_Fp8080 %FP1 + // like FpSET_ST1_80 %FP0, %ST1 + // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. + if (StackTop == 1) { + BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); + NumFXCH++; + StackTop = 0; + break; + } + assert(StackTop == 2 && "Stack should have two element on it to return!"); + --StackTop; // "Forget" we have something on the top of stack! + break; + } + + unsigned DestReg = getFPReg(MO0); if (MI->killsRegister(X86::FP0+SrcReg)) { // If the input operand is killed, we can just change the owner of the // incoming stack slot into the result. diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll index d49d5bf9f9b..557d00c6293 100644 --- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll +++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc | grep {a: %ecx %ecx} -; RUN: llvm-as < %s | llc | grep {b: %ecx %edx %ecx} +; RUN: llvm-as < %s | llc | grep {a:} | not grep ax +; RUN: llvm-as < %s | llc | grep {b:} | not grep ax ; PR2078 ; The clobber list says that "ax" is clobbered. Make sure that eax isn't ; allocated to the input/output register. diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll index a5a526354a7..62e3233f9b3 100644 --- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll +++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %eax %edx 8(%esi) %ebx (%edi)" -; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%ecx %eax %edx 8(%edi) %ebx (%esi)" +; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %edi %esi 8(%edx) %eax (%ebx)" +; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%edi %edx %ebp 8(%ebx) %eax (%esi)" ; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th ; operand. There are many combinations that work; this is what llc puts out now. diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll index 6442ac7c765..c7fdfb26920 100644 --- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll +++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll @@ -1,5 +1,6 @@ -; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 1 | grep {movl.*%ecx} +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | %prcontext End 2 | grep mov ; PR3149 +; Make sure the copy after inline asm is not coalesced away. @"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00" ; <[7 x i8]*> [#uses=1] @llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll new file mode 100644 index 00000000000..119495372a7 --- /dev/null +++ b/test/CodeGen/X86/inline-asm-2addr.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep movq | count 1 + +define i64 @t(i64 %a, i64 %b) nounwind ssp { +entry: + %asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind ; [#uses=1] + %asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind ; [#uses=1] + %0 = add i64 %asmtmp1, %asmtmp ; [#uses=1] + ret i64 %0 +}