From da6fc15f0fb26ebbe42ab96e0d066bbd5bdbb72e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 30 Aug 2013 04:27:29 +0000 Subject: [PATCH] mi-sched: improve the generic register pressure comparison. Only compare pressure within the same set. When multiple sets are affected, we prioritize the most constrained set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189641 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/RegisterPressure.h | 5 ++--- lib/CodeGen/MachineScheduler.cpp | 26 ++++++++++++------------- test/CodeGen/X86/misched-balance.ll | 15 +++++++------- test/CodeGen/X86/misched-matmul.ll | 3 +-- test/CodeGen/X86/misched-matrix.ll | 14 ++++++------- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index b271f7f3d56..68f2ac23494 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -112,14 +112,13 @@ public: assert(isValid() && "invalid PressureChange"); return PSetID - 1; } + // If PSetID is invalid, return UINT16_MAX to give it lowest priority. + unsigned getPSetOrMax() const { return (PSetID - 1) & UINT16_MAX; } int getUnitInc() const { return UnitInc; } void setUnitInc(int Inc) { UnitInc = Inc; } - // If PSetID is invalid, convert to INT_MAX to give it lowest priority. - int getRank() const { return (PSetID - 1) & INT_MAX; } - bool operator==(const PressureChange &RHS) const { return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ffab5f4e66d..e233d4af440 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -2182,21 +2182,19 @@ static bool tryPressure(const PressureChange &TryP, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { - if (TryP.isValid() && CandP.isValid()) { - // If both candidates affect the same set, go with the smallest increase. - if (TryP.getPSet() == CandP.getPSet()) { - return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, - Reason); - } - // If one candidate decreases and the other increases, go with it. - if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, - Reason)) { - return true; - } + int TryRank = TryP.getPSetOrMax(); + int CandRank = CandP.getPSetOrMax(); + // If both candidates affect the same set, go with the smallest increase. + if (TryRank == CandRank) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, + Reason); + } + // If one candidate decreases and the other increases, go with it. + // Invalid candidates have UnitInc==0. + if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { + return true; } - // If TryP has lower Rank, it has a higher priority. - int TryRank = TryP.getRank(); - int CandRank = CandP.getRank(); // If the candidates are decreasing pressure, reverse priority. if (TryP.getUnitInc() < 0) std::swap(TryRank, CandRank); diff --git a/test/CodeGen/X86/misched-balance.ll b/test/CodeGen/X86/misched-balance.ll index 78c56d212bd..3d670238576 100644 --- a/test/CodeGen/X86/misched-balance.ll +++ b/test/CodeGen/X86/misched-balance.ll @@ -1,5 +1,4 @@ -; RUN-disabled: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s -; RUN: true +; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s ; ; Verify that misched resource/latency balancy heuristics are sane. @@ -16,7 +15,7 @@ entry: ; Since mmult1 IR is already in good order, this effectively ensure ; the scheduler maintains source order. ; -; CHECK: %for.body +; CHECK-LABEL: %for.body ; CHECK-NOT: %rsp ; CHECK: imull 4 ; CHECK-NOT: {{imull|rsp}} @@ -46,7 +45,7 @@ entry: ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl ; CHECK-NOT: {{imull|rsp}} -; CHECK: %end +; CHECK-LABEL: %end for.body: %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ] %tmp57 = load i32* %tmp56, align 4 @@ -121,7 +120,7 @@ end: ; Unlike the above loop, this IR starts out bad and must be ; rescheduled. ; -; CHECK: %for.body +; CHECK-LABEL: %for.body ; CHECK-NOT: %rsp ; CHECK: imull 4 ; CHECK-NOT: {{imull|rsp}} @@ -151,7 +150,7 @@ end: ; CHECK-NOT: {{imull|rsp}} ; CHECK: addl ; CHECK-NOT: {{imull|rsp}} -; CHECK: %end +; CHECK-LABEL: %end define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94, i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99, i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104) @@ -233,8 +232,8 @@ end: ; balanced heuristics are interesting here because we have resource, ; latency, and register limits all at once. For now, simply check that ; we don't use any callee-saves. -; CHECK: @encpc1 -; CHECK: %entry +; CHECK-LABEL: @encpc1 +; CHECK-LABEL: %entry ; CHECK-NOT: push ; CHECK-NOT: pop ; CHECK: ret diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll index c13ac6a7684..fe78e70f053 100644 --- a/test/CodeGen/X86/misched-matmul.ll +++ b/test/CodeGen/X86/misched-matmul.ll @@ -1,6 +1,5 @@ ; REQUIRES: asserts -; RUN-disabled: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s -; RUN: true +; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s ; ; Verify that register pressure heuristics are working in MachineScheduler. ; diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll index c602a0a8567..23b561f6e5d 100644 --- a/test/CodeGen/X86/misched-matrix.ll +++ b/test/CodeGen/X86/misched-matrix.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \ ; RUN: -misched-topdown -verify-machineinstrs \ -; RUN: | FileCheck %s -check-prefix=TOPDOWN-disabled +; RUN: | FileCheck %s -check-prefix=TOPDOWN ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \ ; RUN: -misched=ilpmin -verify-machineinstrs \ ; RUN: | FileCheck %s -check-prefix=ILPMIN @@ -15,19 +15,19 @@ ; been reordered with the stores. This tests the scheduler's cheap ; alias analysis ability (that doesn't require any AliasAnalysis pass). ; -; TOPDOWN-disabled: %for.body +; TOPDOWN-LABEL: %for.body ; TOPDOWN: movl %{{.*}}, ( ; TOPDOWN: imull {{[0-9]*}}( ; TOPDOWN: movl %{{.*}}, 4( ; TOPDOWN: imull {{[0-9]*}}( ; TOPDOWN: movl %{{.*}}, 8( ; TOPDOWN: movl %{{.*}}, 12( -; TOPDOWN: %for.end +; TOPDOWN-LABEL: %for.end ; ; For -misched=ilpmin, verify that each expression subtree is ; scheduled independently, and that the imull/adds are interleaved. ; -; ILPMIN: %for.body +; ILPMIN-LABEL: %for.body ; ILPMIN: movl %{{.*}}, ( ; ILPMIN: imull ; ILPMIN: imull @@ -53,12 +53,12 @@ ; ILPMIN: imull ; ILPMIN: addl ; ILPMIN: movl %{{.*}}, 12( -; ILPMIN: %for.end +; ILPMIN-LABEL: %for.end ; ; For -misched=ilpmax, verify that each expression subtree is ; scheduled independently, and that the imull/adds are clustered. ; -; ILPMAX: %for.body +; ILPMAX-LABEL: %for.body ; ILPMAX: movl %{{.*}}, ( ; ILPMAX: imull ; ILPMAX: imull @@ -84,7 +84,7 @@ ; ILPMAX: addl ; ILPMAX: addl ; ILPMAX: movl %{{.*}}, 12( -; ILPMAX: %for.end +; ILPMAX-LABEL: %for.end define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2, [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {