From 5248468473f0488a652b545ad95f7abda302b7b5 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sat, 18 Jul 2009 02:10:10 +0000 Subject: [PATCH] Enable cross register class coalescing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76281 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetRegisterInfo.h | 9 +++ lib/CodeGen/SimpleRegisterCoalescing.cpp | 31 +++++--- lib/CodeGen/VirtRegRewriter.cpp | 39 +++++++++- lib/Target/X86/X86RegisterInfo.cpp | 78 +++++++++++++++++++ lib/Target/X86/X86RegisterInfo.h | 7 ++ .../2009-07-10-BadIncomingArgOffset.ll | 2 +- test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll | 5 +- test/CodeGen/X86/coalescer-cross.ll | 41 ++++++++++ test/CodeGen/X86/stack-color-with-reg.ll | 3 +- test/CodeGen/X86/x86-64-sret-return.ll | 2 +- 10 files changed, 198 insertions(+), 19 deletions(-) create mode 100644 test/CodeGen/X86/coalescer-cross.ll diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 29f96e961ab..9cd8489c803 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -484,6 +484,15 @@ public: return 0; } + /// getMatchingSuperRegClass - Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const { + return 0; + } + //===--------------------------------------------------------------------===// // Register Class Information // diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 7cb14e93123..9f8d1bad34b 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -59,7 +59,7 @@ NewHeuristic("new-coalescer-heuristic", static cl::opt CrossClassJoin("join-cross-class-copies", cl::desc("Coalesce cross register class copies"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); static cl::opt PhysJoinTweak("tweak-phys-join-heuristics", @@ -1308,6 +1308,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Should be non-null only when coalescing to a sub-register class. bool CrossRC = false; + const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); + const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); const TargetRegisterClass *NewRC = NULL; MachineBasicBlock *CopyMBB = CopyMI->getParent(); unsigned RealDstReg = 0; @@ -1373,6 +1375,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } if (SubIdx) { + if (isInsSubReg || isSubRegToReg) { + if (!DstIsPhys && !SrcIsPhys) { + NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); + if (!NewRC) + return false; + } + } unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); @@ -1424,11 +1433,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } - const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); - const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); unsigned LargeReg = SrcReg; unsigned SmallReg = DstReg; - unsigned Limit = 0; // Now determine the register class of the joined register. if (isExtSubReg) { @@ -1439,7 +1445,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { Again = true; return false; } - Limit = allocatableRCRegs_[DstRC].count(); + if (!DstIsPhys && !SrcIsPhys) + NewRC = SrcRC; } else if (!SrcIsPhys && !DstIsPhys) { NewRC = getCommonSubClass(SrcRC, DstRC); if (!NewRC) { @@ -1643,11 +1650,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Coalescing to a virtual register that is of a sub-register class of the // other. Make sure the resulting register is set to the right register class. - if (CrossRC) { - ++numCrossRCs; - if (NewRC) - mri_->setRegClass(DstReg, NewRC); - } + if (CrossRC) + ++numCrossRCs; + + // This may happen even if it's cross-rc coalescing. e.g. + // %reg1026 = SUBREG_TO_REG 0, %reg1037, 4 + // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to + // be allocate a register from GR64_ABCD. + if (NewRC) + mri_->setRegClass(DstReg, NewRC); if (NewHeuristic) { // Add all copies that define val# in the source interval into the queue. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index abaa8bd212e..9537c05d820 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -1344,6 +1344,31 @@ private: ++NumStores; } + /// isSafeToDelete - Return true if this instruction doesn't produce any side + /// effect and all of its defs are dead. + static bool isSafeToDelete(MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.isCall() || TID.isBarrier() || TID.isReturn() || + TID.hasUnmodeledSideEffects()) + return false; + if (TID.getImplicitDefs()) + return false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.isKill()) + // FIXME: We can't remove kill markers or else the scavenger will assert. + // An alternative is to add a ADD pseudo instruction to replace kill + // markers. + return false; + } + return true; + } + /// TransferDeadness - A identity copy definition is dead and it's being /// removed. Find the last def or use and mark it as dead / kill. void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, @@ -1385,9 +1410,7 @@ private: if (LastUD->isDef()) { // If the instruction has no side effect, delete it and propagate // backward further. Otherwise, mark is dead and we are done. - const TargetInstrDesc &TID = LastUDMI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) { + if (!isSafeToDelete(*LastUDMI)) { LastUD->setIsDead(); break; } @@ -2170,7 +2193,15 @@ private: } } ProcessNextInst: - DistanceMap.insert(std::make_pair(&MI, Dist++)); + // Delete dead instructions without side effects. + if (!Erased && !BackTracked && isSafeToDelete(MI)) { + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + } + if (!Erased) + DistanceMap.insert(std::make_pair(&MI, Dist++)); if (!Erased && !BackTracked) { for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) UpdateKills(*II, TRI, RegKills, KillOps); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 437986fb9c3..0dc63ef9a61 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -152,6 +152,84 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { } } +const TargetRegisterClass * +X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned SubIdx) const { + switch (SubIdx) { + default: return 0; + case 1: + // 8-bit + if (B == &X86::GR8RegClass) { + if (A == &X86::GR64RegClass) + return &X86::GR64RegClass; + else if (A == &X86::GR32RegClass) + return &X86::GR32RegClass; + else if (A == &X86::GR16RegClass) + return &X86::GR16RegClass; + } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass) + return &X86::GR16_ABCDRegClass; + } else if (B == &X86::GR8_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass) + return &X86::GR32_NOREXRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) + return &X86::GR16_NOREXRegClass; + } + break; + case 2: + // 8-bit hi + if (B == &X86::GR8_ABCD_HRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass) + return &X86::GR16_ABCDRegClass; + } + break; + case 3: + // 16-bit + if (B == &X86::GR16RegClass) { + if (A == &X86::GR64RegClass) + return &X86::GR64RegClass; + else if (A == &X86::GR32RegClass) + return &X86::GR32RegClass; + } else if (B == &X86::GR16_ABCDRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass) + return &X86::GR32_ABCDRegClass; + } else if (B == &X86::GR16_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + } + break; + case 4: + // 32-bit + if (B == &X86::GR32RegClass) { + if (A == &X86::GR64RegClass) + return &X86::GR64RegClass; + } else if (B == &X86::GR32_ABCDRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + } else if (B == &X86::GR32_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) + return &X86::GR64_NOREXRegClass; + } + break; + } + return 0; +} + const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const { const X86Subtarget *Subtarget = &TM.getSubtarget(); if (Subtarget->is64Bit()) diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index eac8426a980..702e69dee26 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -93,6 +93,13 @@ public: /// Code Generation virtual methods... /// + /// getMatchingSuperRegClass - Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const; + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. const TargetRegisterClass *getPointerRegClass() const; diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index 6c86764073e..1af07d4130a 100644 --- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep 328 +; RUN: llvm-as < %s | llc | grep 168 target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" target triple = "s390x-linux" diff --git a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll index b5635b38cfc..bec43f0fbb7 100644 --- a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll +++ b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll @@ -1,4 +1,5 @@ -; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1 +; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 2 +; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | not grep movb %struct.double_int = type { i64, i64 } %struct.tree_common = type <{ i8, [3 x i8] }> @@ -6,7 +7,7 @@ %struct.tree_node = type { %struct.tree_int_cst } @tree_code_type = external constant [0 x i32] ; <[0 x i32]*> [#uses=1] -define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) { +define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind { entry: %tmp2526 = bitcast %struct.tree_node* %t1 to i32* ; [#uses=1] br i1 false, label %UnifiedReturnBlock, label %bb21 diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll new file mode 100644 index 00000000000..1da214c6103 --- /dev/null +++ b/test/CodeGen/X86/coalescer-cross.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | not grep movaps +; rdar://6509240 + + type { %struct.TValue } ; type %0 + type { %struct.L_Umaxalign, i32, %struct.Node* } ; type %1 + %struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 } + %struct.GCObject = type { %struct.lua_State } + %struct.L_Umaxalign = type { double } + %struct.Mbuffer = type { i8*, i32, i32 } + %struct.Node = type { %struct.TValue, %struct.TKey } + %struct.TKey = type { %1 } + %struct.TString = type { %struct.anon } + %struct.TValue = type { %struct.L_Umaxalign, i32 } + %struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 } + %struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 } + %struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 } + %struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] } + %struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 } + %struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 } + %struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 } + %struct.stringtable = type { %struct.GCObject**, i32, i32 } +@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp { +entry: + %0 = tail call i32 @"\01_clock$UNIX2003"() nounwind ; [#uses=1] + %1 = uitofp i32 %0 to double ; [#uses=1] + %2 = fdiv double %1, 1.000000e+06 ; [#uses=1] + %3 = getelementptr %struct.lua_State* %L, i32 0, i32 4 ; <%struct.TValue**> [#uses=3] + %4 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=2] + %5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0 ; [#uses=1] + store double %2, double* %5, align 4 + %6 = getelementptr %struct.TValue* %4, i32 0, i32 1 ; [#uses=1] + store i32 3, i32* %6, align 4 + %7 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=1] + %8 = getelementptr %struct.TValue* %7, i32 1 ; <%struct.TValue*> [#uses=1] + store %struct.TValue* %8, %struct.TValue** %3, align 4 + ret i32 1 +} + +declare i32 @"\01_clock$UNIX2003"() diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll index 72a985a6c29..74326a382c4 100644 --- a/test/CodeGen/X86/stack-color-with-reg.ll +++ b/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,6 +1,7 @@ ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t ; RUN: grep stackcoloring %t | grep "loads eliminated" -; RUN: grep stackcoloring %t | grep "stores eliminated" +; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5 +; RUN: grep asm-printer %t | grep 176 type { [62 x %struct.Bitvec*] } ; type %0 type { i8* } ; type %1 diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll index 9298661998b..458030c2791 100644 --- a/test/CodeGen/X86/x86-64-sret-return.ll +++ b/test/CodeGen/X86/x86-64-sret-return.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep {movq %rdi, %rax} +; RUN: llvm-as < %s | llc target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8"