From 4662a9f270fe2c916c35545718720ed181384c30 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 4 Apr 2011 21:00:03 +0000 Subject: [PATCH] Allow coalescing with reserved physregs in certain cases: When a virtual register has a single value that is defined as a copy of a reserved register, permit that copy to be joined. These virtual register are usually copies of the stack pointer: %vreg75 = COPY %ESP; GR32:%vreg75 MOV32mr %vreg75, 1, %noreg, 0, %noreg, %vreg74 MOV32mi %vreg75, 1, %noreg, 8, %noreg, 0 MOV32mi %vreg75, 1, %noreg, 4, %noreg, 0 CALLpcrel32 ... Coalescing these virtual registers early decreases register pressure. Previously, they were coalesced by RALinScan::attemptTrivialCoalescing after register allocation was completed. The lower register pressure causes the mcinst-lowering-cmp0.ll test case to fail because it depends on linear scan spilling a particular register. I am deleting 2008-08-05-SpillerBug.ll because it is counting the number of instructions emitted, and its revision history shows the 'correct' count being edited many times. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128845 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 2 +- lib/CodeGen/RegAllocLinearScan.cpp | 2 +- lib/CodeGen/SimpleRegisterCoalescing.cpp | 16 ++++-- test/CodeGen/X86/2008-08-05-SpillerBug.ll | 44 --------------- test/CodeGen/X86/mcinst-lowering-cmp0.ll | 68 ----------------------- 5 files changed, 14 insertions(+), 118 deletions(-) delete mode 100644 test/CodeGen/X86/2008-08-05-SpillerBug.ll delete mode 100644 test/CodeGen/X86/mcinst-lowering-cmp0.ll diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 68bd0a60c98..07f83ea7cef 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -572,7 +572,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else if (allocatableRegs_[MO.getReg()]) { + else { MachineInstr *CopyMI = NULL; if (MI->isCopyLike()) CopyMI = MI; diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 966570f3f33..ef78949c54e 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -572,7 +572,7 @@ void RALinScan::initIntervalSets() for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty()) { + if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { mri_->setPhysRegUsed(i->second->reg); fixed_.push_back(std::make_pair(i->second, i->second->begin())); } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 93aa0a31207..c621726a03f 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -919,13 +919,23 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, /// are not spillable! If the destination interval uses are far away, think /// twice about coalescing them! bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + if (DisablePhysicalJoin) { DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); return false; } - // Only coalesce to allocatable physreg. - if (!li_->isAllocatable(CP.getDstReg())) { + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); return false; // Not coalescable. } @@ -944,8 +954,6 @@ bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { // FIXME: Why are we skipping this test for partial copies? // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. if (!CP.isPartial()) { - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(JoinVInt); diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll deleted file mode 100644 index ef90498a02c..00000000000 --- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -pre-RA-sched=list-burr -disable-fp-elim -stats |& grep asm-printer | grep 55 -; PR2568 - -@g_3 = external global i16 ; [#uses=1] -@g_5 = external global i32 ; [#uses=3] - -declare i32 @func_15(i16 signext , i16 signext , i32) nounwind - -define void @func_9_entry_2E_ce(i8 %p_11) nounwind { -newFuncRoot: - br label %entry.ce - -entry.ce.ret.exitStub: ; preds = %entry.ce - ret void - -entry.ce: ; preds = %newFuncRoot - load i16* @g_3, align 2 ; :0 [#uses=1] - icmp sgt i16 %0, 0 ; :1 [#uses=1] - zext i1 %1 to i32 ; :2 [#uses=1] - load i32* @g_5, align 4 ; :3 [#uses=4] - icmp ugt i32 %2, %3 ; :4 [#uses=1] - zext i1 %4 to i32 ; :5 [#uses=1] - icmp eq i32 %3, 0 ; :6 [#uses=1] - %.0 = select i1 %6, i32 1, i32 %3 ; [#uses=1] - urem i32 1, %.0 ; :7 [#uses=2] - sext i8 %p_11 to i16 ; :8 [#uses=1] - trunc i32 %3 to i16 ; :9 [#uses=1] - tail call i32 @func_15( i16 signext %8, i16 signext %9, i32 1 ) nounwind ; :10 [#uses=0] - load i32* @g_5, align 4 ; :11 [#uses=1] - trunc i32 %11 to i16 ; :12 [#uses=1] - tail call i32 @func_15( i16 signext %12, i16 signext 1, i32 %7 ) nounwind ; :13 [#uses=0] - sext i8 %p_11 to i32 ; :14 [#uses=1] - %p_11.lobit = lshr i8 %p_11, 7 ; [#uses=1] - %tmp = zext i8 %p_11.lobit to i32 ; [#uses=1] - %tmp.not = xor i32 %tmp, 1 ; [#uses=1] - %.015 = ashr i32 %14, %tmp.not ; [#uses=2] - icmp eq i32 %.015, 0 ; :15 [#uses=1] - %.016 = select i1 %15, i32 1, i32 %.015 ; [#uses=1] - udiv i32 %7, %.016 ; :16 [#uses=1] - icmp ult i32 %5, %16 ; :17 [#uses=1] - zext i1 %17 to i32 ; :18 [#uses=1] - store i32 %18, i32* @g_5, align 4 - br label %entry.ce.ret.exitStub -} diff --git a/test/CodeGen/X86/mcinst-lowering-cmp0.ll b/test/CodeGen/X86/mcinst-lowering-cmp0.ll deleted file mode 100644 index 756be1fabfd..00000000000 --- a/test/CodeGen/X86/mcinst-lowering-cmp0.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: llc --show-mc-encoding -relocation-model=pic -disable-fp-elim -O3 < %s | FileCheck %s - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin10.0.0" - -%struct.NSConstantString = type { i32*, i32, i8*, i32 } -%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* } -%struct._objc_symtab = type { i32, i8*, i16, i16, [0 x i8*] } - -@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular" ; <[2 x i32]*> [#uses=1] -@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1] -@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; [#uses=3] -@__CFConstantStringClassReference = external global [0 x i32] ; <[0 x i32]*> [#uses=1] -@.str = private constant [3 x i8] c"||\00" ; <[3 x i8]*> [#uses=1] -@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 2 }, section "__DATA,__cfstring" ; <%struct.NSConstantString*> [#uses=1] -@"\01L_OBJC_METH_VAR_NAME_1" = internal global [5 x i8] c"baz:\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[5 x i8]*> [#uses=1] -@"\01L_OBJC_SELECTOR_REFERENCES_2" = internal global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; [#uses=2] -@"\01L_OBJC_METH_VAR_NAME_3" = internal global [4 x i8] c"bar\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1] -@"\01L_OBJC_SELECTOR_REFERENCES_4" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; [#uses=2] -@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1 ; <[1 x i8]*> [#uses=1] -@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4 ; <%struct._objc_module*> [#uses=1] -@llvm.used = appending global [9 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" to i8*), i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata" ; <[9 x i8*]*> [#uses=0] - -define void @f0(i8* nocapture %a, i8* nocapture %b) nounwind optsize ssp { -entry: - %call = tail call i32 (...)* @get_name() nounwind optsize ; [#uses=2] - %conv = inttoptr i32 %call to i8* ; [#uses=1] - %call1 = tail call i32 (...)* @get_dict() nounwind optsize ; [#uses=2] - %conv2 = inttoptr i32 %call1 to i8* ; [#uses=2] - -; Check that we lower to the short form of cmpl, which has an 8-bit immediate. -; -; CHECK: cmpl $0, -16(%ebp) ## 4-byte Folded Reload -; CHECK: ## encoding: [0x83,0x7d,0xf0,0x00] -; rdar://7999130 - %cmp = icmp eq i32 %call1, 0 ; [#uses=1] - br i1 %cmp, label %if.end, label %if.then - -if.then: ; preds = %entry - %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; [#uses=1] - %call6 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp5) nounwind optsize ; [#uses=1] - %tmp7 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" ; [#uses=1] - %call820 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6, i8* %tmp7, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*)) nounwind optsize ; [#uses=0] - br label %if.end - -if.end: ; preds = %entry, %if.then - %tmp10 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; [#uses=1] - %call11 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp10) nounwind optsize ; [#uses=1] - %tmp12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" ; [#uses=1] - %call13 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call11, i8* %tmp12) nounwind optsize ; [#uses=0] - %cmp15 = icmp eq i32 %call, 0 ; [#uses=1] - br i1 %cmp15, label %if.end19, label %if.then17 - -if.then17: ; preds = %if.end - tail call void (...)* @f1(i8* %conv) nounwind optsize - ret void - -if.end19: ; preds = %if.end - ret void -} - -declare i32 @get_name(...) optsize - -declare i32 @get_dict(...) optsize - -declare i8* @objc_msgSend(i8*, i8*, ...) - -declare void @f1(...) optsize