mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
mi-sched: improve the generic register pressure comparison.
Only compare pressure within the same set. When multiple sets are affected, we prioritize the most constrained set. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189641 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4c60b8a78d
commit
da6fc15f0f
@ -112,14 +112,13 @@ public:
|
|||||||
assert(isValid() && "invalid PressureChange");
|
assert(isValid() && "invalid PressureChange");
|
||||||
return PSetID - 1;
|
return PSetID - 1;
|
||||||
}
|
}
|
||||||
|
// If PSetID is invalid, return UINT16_MAX to give it lowest priority.
|
||||||
|
unsigned getPSetOrMax() const { return (PSetID - 1) & UINT16_MAX; }
|
||||||
|
|
||||||
int getUnitInc() const { return UnitInc; }
|
int getUnitInc() const { return UnitInc; }
|
||||||
|
|
||||||
void setUnitInc(int Inc) { UnitInc = Inc; }
|
void setUnitInc(int Inc) { UnitInc = Inc; }
|
||||||
|
|
||||||
// If PSetID is invalid, convert to INT_MAX to give it lowest priority.
|
|
||||||
int getRank() const { return (PSetID - 1) & INT_MAX; }
|
|
||||||
|
|
||||||
bool operator==(const PressureChange &RHS) const {
|
bool operator==(const PressureChange &RHS) const {
|
||||||
return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc;
|
return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc;
|
||||||
}
|
}
|
||||||
|
@ -2182,21 +2182,19 @@ static bool tryPressure(const PressureChange &TryP,
|
|||||||
ConvergingScheduler::SchedCandidate &TryCand,
|
ConvergingScheduler::SchedCandidate &TryCand,
|
||||||
ConvergingScheduler::SchedCandidate &Cand,
|
ConvergingScheduler::SchedCandidate &Cand,
|
||||||
ConvergingScheduler::CandReason Reason) {
|
ConvergingScheduler::CandReason Reason) {
|
||||||
if (TryP.isValid() && CandP.isValid()) {
|
int TryRank = TryP.getPSetOrMax();
|
||||||
// If both candidates affect the same set, go with the smallest increase.
|
int CandRank = CandP.getPSetOrMax();
|
||||||
if (TryP.getPSet() == CandP.getPSet()) {
|
// If both candidates affect the same set, go with the smallest increase.
|
||||||
return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
|
if (TryRank == CandRank) {
|
||||||
Reason);
|
return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
|
||||||
}
|
Reason);
|
||||||
// If one candidate decreases and the other increases, go with it.
|
}
|
||||||
if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
|
// If one candidate decreases and the other increases, go with it.
|
||||||
Reason)) {
|
// Invalid candidates have UnitInc==0.
|
||||||
return true;
|
if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
|
||||||
}
|
Reason)) {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
// If TryP has lower Rank, it has a higher priority.
|
|
||||||
int TryRank = TryP.getRank();
|
|
||||||
int CandRank = CandP.getRank();
|
|
||||||
// If the candidates are decreasing pressure, reverse priority.
|
// If the candidates are decreasing pressure, reverse priority.
|
||||||
if (TryP.getUnitInc() < 0)
|
if (TryP.getUnitInc() < 0)
|
||||||
std::swap(TryRank, CandRank);
|
std::swap(TryRank, CandRank);
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
; RUN-disabled: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
|
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-machineinstrs | FileCheck %s
|
||||||
; RUN: true
|
|
||||||
;
|
;
|
||||||
; Verify that misched resource/latency balancy heuristics are sane.
|
; Verify that misched resource/latency balancy heuristics are sane.
|
||||||
|
|
||||||
@ -16,7 +15,7 @@ entry:
|
|||||||
; Since mmult1 IR is already in good order, this effectively ensure
|
; Since mmult1 IR is already in good order, this effectively ensure
|
||||||
; the scheduler maintains source order.
|
; the scheduler maintains source order.
|
||||||
;
|
;
|
||||||
; CHECK: %for.body
|
; CHECK-LABEL: %for.body
|
||||||
; CHECK-NOT: %rsp
|
; CHECK-NOT: %rsp
|
||||||
; CHECK: imull 4
|
; CHECK: imull 4
|
||||||
; CHECK-NOT: {{imull|rsp}}
|
; CHECK-NOT: {{imull|rsp}}
|
||||||
@ -46,7 +45,7 @@ entry:
|
|||||||
; CHECK-NOT: {{imull|rsp}}
|
; CHECK-NOT: {{imull|rsp}}
|
||||||
; CHECK: addl
|
; CHECK: addl
|
||||||
; CHECK-NOT: {{imull|rsp}}
|
; CHECK-NOT: {{imull|rsp}}
|
||||||
; CHECK: %end
|
; CHECK-LABEL: %end
|
||||||
for.body:
|
for.body:
|
||||||
%indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
|
%indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
|
||||||
%tmp57 = load i32* %tmp56, align 4
|
%tmp57 = load i32* %tmp56, align 4
|
||||||
@ -121,7 +120,7 @@ end:
|
|||||||
; Unlike the above loop, this IR starts out bad and must be
|
; Unlike the above loop, this IR starts out bad and must be
|
||||||
; rescheduled.
|
; rescheduled.
|
||||||
;
|
;
|
||||||
; CHECK: %for.body
|
; CHECK-LABEL: %for.body
|
||||||
; CHECK-NOT: %rsp
|
; CHECK-NOT: %rsp
|
||||||
; CHECK: imull 4
|
; CHECK: imull 4
|
||||||
; CHECK-NOT: {{imull|rsp}}
|
; CHECK-NOT: {{imull|rsp}}
|
||||||
@ -151,7 +150,7 @@ end:
|
|||||||
; CHECK-NOT: {{imull|rsp}}
|
; CHECK-NOT: {{imull|rsp}}
|
||||||
; CHECK: addl
|
; CHECK: addl
|
||||||
; CHECK-NOT: {{imull|rsp}}
|
; CHECK-NOT: {{imull|rsp}}
|
||||||
; CHECK: %end
|
; CHECK-LABEL: %end
|
||||||
define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
|
define void @unrolled_mmult2(i32* %tmp55, i32* %tmp56, i32* %pre, i32* %pre94,
|
||||||
i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
|
i32* %pre95, i32* %pre96, i32* %pre97, i32* %pre98, i32* %pre99,
|
||||||
i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
|
i32* %pre100, i32* %pre101, i32* %pre102, i32* %pre103, i32* %pre104)
|
||||||
@ -233,8 +232,8 @@ end:
|
|||||||
; balanced heuristics are interesting here because we have resource,
|
; balanced heuristics are interesting here because we have resource,
|
||||||
; latency, and register limits all at once. For now, simply check that
|
; latency, and register limits all at once. For now, simply check that
|
||||||
; we don't use any callee-saves.
|
; we don't use any callee-saves.
|
||||||
; CHECK: @encpc1
|
; CHECK-LABEL: @encpc1
|
||||||
; CHECK: %entry
|
; CHECK-LABEL: %entry
|
||||||
; CHECK-NOT: push
|
; CHECK-NOT: push
|
||||||
; CHECK-NOT: pop
|
; CHECK-NOT: pop
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
; REQUIRES: asserts
|
; REQUIRES: asserts
|
||||||
; RUN-disabled: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
|
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
|
||||||
; RUN: true
|
|
||||||
;
|
;
|
||||||
; Verify that register pressure heuristics are working in MachineScheduler.
|
; Verify that register pressure heuristics are working in MachineScheduler.
|
||||||
;
|
;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
|
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
|
||||||
; RUN: -misched-topdown -verify-machineinstrs \
|
; RUN: -misched-topdown -verify-machineinstrs \
|
||||||
; RUN: | FileCheck %s -check-prefix=TOPDOWN-disabled
|
; RUN: | FileCheck %s -check-prefix=TOPDOWN
|
||||||
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
|
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
|
||||||
; RUN: -misched=ilpmin -verify-machineinstrs \
|
; RUN: -misched=ilpmin -verify-machineinstrs \
|
||||||
; RUN: | FileCheck %s -check-prefix=ILPMIN
|
; RUN: | FileCheck %s -check-prefix=ILPMIN
|
||||||
@ -15,19 +15,19 @@
|
|||||||
; been reordered with the stores. This tests the scheduler's cheap
|
; been reordered with the stores. This tests the scheduler's cheap
|
||||||
; alias analysis ability (that doesn't require any AliasAnalysis pass).
|
; alias analysis ability (that doesn't require any AliasAnalysis pass).
|
||||||
;
|
;
|
||||||
; TOPDOWN-disabled: %for.body
|
; TOPDOWN-LABEL: %for.body
|
||||||
; TOPDOWN: movl %{{.*}}, (
|
; TOPDOWN: movl %{{.*}}, (
|
||||||
; TOPDOWN: imull {{[0-9]*}}(
|
; TOPDOWN: imull {{[0-9]*}}(
|
||||||
; TOPDOWN: movl %{{.*}}, 4(
|
; TOPDOWN: movl %{{.*}}, 4(
|
||||||
; TOPDOWN: imull {{[0-9]*}}(
|
; TOPDOWN: imull {{[0-9]*}}(
|
||||||
; TOPDOWN: movl %{{.*}}, 8(
|
; TOPDOWN: movl %{{.*}}, 8(
|
||||||
; TOPDOWN: movl %{{.*}}, 12(
|
; TOPDOWN: movl %{{.*}}, 12(
|
||||||
; TOPDOWN: %for.end
|
; TOPDOWN-LABEL: %for.end
|
||||||
;
|
;
|
||||||
; For -misched=ilpmin, verify that each expression subtree is
|
; For -misched=ilpmin, verify that each expression subtree is
|
||||||
; scheduled independently, and that the imull/adds are interleaved.
|
; scheduled independently, and that the imull/adds are interleaved.
|
||||||
;
|
;
|
||||||
; ILPMIN: %for.body
|
; ILPMIN-LABEL: %for.body
|
||||||
; ILPMIN: movl %{{.*}}, (
|
; ILPMIN: movl %{{.*}}, (
|
||||||
; ILPMIN: imull
|
; ILPMIN: imull
|
||||||
; ILPMIN: imull
|
; ILPMIN: imull
|
||||||
@ -53,12 +53,12 @@
|
|||||||
; ILPMIN: imull
|
; ILPMIN: imull
|
||||||
; ILPMIN: addl
|
; ILPMIN: addl
|
||||||
; ILPMIN: movl %{{.*}}, 12(
|
; ILPMIN: movl %{{.*}}, 12(
|
||||||
; ILPMIN: %for.end
|
; ILPMIN-LABEL: %for.end
|
||||||
;
|
;
|
||||||
; For -misched=ilpmax, verify that each expression subtree is
|
; For -misched=ilpmax, verify that each expression subtree is
|
||||||
; scheduled independently, and that the imull/adds are clustered.
|
; scheduled independently, and that the imull/adds are clustered.
|
||||||
;
|
;
|
||||||
; ILPMAX: %for.body
|
; ILPMAX-LABEL: %for.body
|
||||||
; ILPMAX: movl %{{.*}}, (
|
; ILPMAX: movl %{{.*}}, (
|
||||||
; ILPMAX: imull
|
; ILPMAX: imull
|
||||||
; ILPMAX: imull
|
; ILPMAX: imull
|
||||||
@ -84,7 +84,7 @@
|
|||||||
; ILPMAX: addl
|
; ILPMAX: addl
|
||||||
; ILPMAX: addl
|
; ILPMAX: addl
|
||||||
; ILPMAX: movl %{{.*}}, 12(
|
; ILPMAX: movl %{{.*}}, 12(
|
||||||
; ILPMAX: %for.end
|
; ILPMAX-LABEL: %for.end
|
||||||
|
|
||||||
define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
|
define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
|
||||||
[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
|
[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user