From 8e3455ba1734a64dc5a6884d4a5218d436da54e2 Mon Sep 17 00:00:00 2001 From: Dale Johannesen Date: Wed, 24 Sep 2008 23:13:09 +0000 Subject: [PATCH] Remove SelectionDag early allocation of registers for earlyclobbers. Teach Local RA about earlyclobber, and add some tests for it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocLocal.cpp | 51 +++++++++++++++++++ .../SelectionDAG/SelectionDAGBuild.cpp | 49 ++++-------------- lib/CodeGen/SelectionDAG/SelectionDAGBuild.h | 2 +- .../2007-04-30-InlineAsmEarlyClobber.ll | 7 ++- test/CodeGen/X86/2008-09-17-inline-asm-1.ll | 4 ++ test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 26 ++++++++++ 6 files changed, 96 insertions(+), 43 deletions(-) create mode 100644 test/CodeGen/X86/2008-09-18-inline-asm-2.ll diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 0a20128632d..cd4de344cc4 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -722,6 +722,55 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } } + // If any physical regs are earlyclobber, spill any value they might + // have in them, then mark them unallocatable. + // If any virtual regs are earlyclobber, allocate them now (before + // freeing inputs that are killed). + if (MI->getOpcode()==TargetInstrInfo::INLINEASM) { + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && MO.isEarlyClobber() && + MO.getReg()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = + std::make_pair((MachineInstr*)0, 0); + DOUT << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"; + MO.setReg(DestPhysReg); // Assign the earlyclobber register + } else { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *AliasSet = TRI->getSubRegisters(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + MF->getRegInfo().setPhysRegUsed(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*AliasSet); + } + } + } + } + } + } + // Get the used operands into registers. This has the potential to spill // incoming values if we are out of registers. Note that we completely // ignore physical register uses here. We assume that if an explicit @@ -778,6 +827,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + !MO.isEarlyClobber() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { unsigned Reg = MO.getReg(); if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. @@ -839,6 +889,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); if (MO.isRegister() && MO.isDef() && MO.getReg() && + !MO.isEarlyClobber() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { unsigned DestVirtReg = MO.getReg(); unsigned DestPhysReg; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 43ca2364d59..1365d684f4d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -4465,12 +4465,10 @@ private: /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// HasEarlyClobber is true if there are any early clobber constraints (=&r) -/// or any explicitly clobbered registers. /// Input and OutputRegs are the set of already allocated physical registers. /// void SelectionDAGLowering:: -GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, bool HasEarlyClobber, +GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs) { // Compute whether this value requires an input register, an output register, @@ -4481,10 +4479,9 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, bool HasEarlyClobber, case InlineAsm::isOutput: isOutReg = true; - // If this is an early-clobber output, or if there is an input - // constraint that matches this, we need to reserve the input register - // so no other inputs allocate to it. - isInReg = OpInfo.isEarlyClobber || OpInfo.hasMatchingInput; + // If there is an input constraint that matches this, we need to reserve + // the input register so no other inputs allocate to it. + isInReg = OpInfo.hasMatchingInput; break; case InlineAsm::isInput: isInReg = true; @@ -4551,16 +4548,11 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, bool HasEarlyClobber, std::vector RegClassRegs; const TargetRegisterClass *RC = PhysReg.second; if (RC) { - // If this is an early clobber or tied register, our regalloc doesn't know - // how to maintain the constraint. If it isn't, go ahead and create vreg + // If this is a tied register, our regalloc doesn't know how to maintain + // the constraint. If it isn't, go ahead and create vreg // and let the regalloc do the right thing. - if (!OpInfo.hasMatchingInput && !OpInfo.isEarlyClobber && - // If there is some other early clobber and this is an input register, - // then we are forced to pre-allocate the input reg so it doesn't - // conflict with the earlyclobber. - !(OpInfo.Type == InlineAsm::isInput && HasEarlyClobber)) { + if (!OpInfo.hasMatchingInput) { RegVT = *PhysReg.second->vt_begin(); - if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; @@ -4664,11 +4656,6 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { std::vector ConstraintInfos = IA->ParseConstraints(); - // SawEarlyClobber - Keep track of whether we saw an earlyclobber output - // constraint. If so, we can't let the register allocator allocate any input - // registers, because it will not know to avoid the earlyclobbered output reg. - bool SawEarlyClobber = false; - bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. @@ -4744,24 +4731,6 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); - // Keep track of whether we see an earlyclobber. - SawEarlyClobber |= OpInfo.isEarlyClobber; - - // If we see a clobber of a register, it is an early clobber. - if (!SawEarlyClobber && - OpInfo.Type == InlineAsm::isClobber && - OpInfo.ConstraintType == TargetLowering::C_Register) { - // Note that we want to ignore things that we don't track here, like - // dirflag, fpsr, flags, etc. - std::pair PhysReg = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - if (PhysReg.first || PhysReg.second) { - // This is a register we know of. - SawEarlyClobber = true; - } - } - // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -4802,7 +4771,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs); + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } ConstraintInfos.clear(); @@ -4815,7 +4784,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(OpInfo, SawEarlyClobber, OutputRegs, InputRegs); + GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index 18e15af4577..eb9ed3f7cab 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -398,7 +398,7 @@ public: N = NewN; } - void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, bool HasEarlyClobber, + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs); diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll index f43b87c76d6..2466f4fc8e9 100644 --- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll +++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll @@ -1,5 +1,8 @@ -; RUN: llvm-as < %s | llc | grep {subfc r2,r5,r4} -; RUN: llvm-as < %s | llc | grep {subfze r4,r3} +; RUN: llvm-as < %s | llc | grep {subfc r3,r5,r4} +; RUN: llvm-as < %s | llc | grep {subfze r4,r2} +; RUN: llvm-as < %s | llc -regalloc=local | grep {subfc r5,r2,r4} +; RUN: llvm-as < %s | llc -regalloc=local | grep {subfze r2,r3} +; The first argument of subfc must not be the same as any other register. ; PR1357 diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll index b3c6007e9c4..ed8d345aad3 100644 --- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll +++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll @@ -2,6 +2,10 @@ ; RUN: llvm-as < %s | llc -march=x86 | not grep "movl %edx, %edx" ; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%eax), %eax" ; RUN: llvm-as < %s | llc -march=x86 | not grep "movl (%edx), %edx" +; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %eax, %eax" +; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl %edx, %edx" +; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%eax), %eax" +; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | not grep "movl (%edx), %edx" ; %0 must not be put in EAX or EDX. ; In the first asm, $0 and $2 must not be put in EAX. diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll new file mode 100644 index 00000000000..a5a526354a7 --- /dev/null +++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep "#%ebp %eax %edx 8(%esi) %ebx (%edi)" +; RUN: llvm-as < %s | llc -march=x86 -regalloc=local | grep "#%ecx %eax %edx 8(%edi) %ebx (%esi)" +; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers +; referenced in the 4th and 6th operands must not be the same as the 1st or 5th +; operand. There are many combinations that work; this is what llc puts out now. +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + %struct.foo = type { i32, i32, i8* } + +define i32 @get(%struct.foo* %c, i8* %state) nounwind { +entry: + %0 = getelementptr %struct.foo* %c, i32 0, i32 0 ; [#uses=2] + %1 = getelementptr %struct.foo* %c, i32 0, i32 1 ; [#uses=2] + %2 = getelementptr %struct.foo* %c, i32 0, i32 2 ; [#uses=2] + %3 = load i32* %0, align 4 ; [#uses=1] + %4 = load i32* %1, align 4 ; [#uses=1] + %5 = load i8* %state, align 1 ; [#uses=1] + %asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#$0 $1 $2 $3 $4 $5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind ; <{ i32, i32, i32, i32 }> [#uses=3] + %asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0 ; [#uses=1] + %asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1 ; [#uses=1] + store i32 %asmresult1, i32* %0 + %asmresult2 = extractvalue { i32, i32, i32, i32 } %asmtmp, 2 ; [#uses=1] + store i32 %asmresult2, i32* %1 + ret i32 %asmresult +}