From 614dacc9102ed3bf3fe4c985b5bc12857a26bae2 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 5 Apr 2013 00:31:34 +0000 Subject: [PATCH] RegisterPressure heuristics currently require signed comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178823 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineScheduler.cpp | 4 +- test/CodeGen/X86/misched-matmul.ll | 228 +++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/misched-matmul.ll diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ed6b9861dcf..5bd2349b50f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1660,7 +1660,7 @@ initResourceDelta(const ScheduleDAGMI *DAG, } /// Return true if this heuristic determines order. -static bool tryLess(unsigned TryVal, unsigned CandVal, +static bool tryLess(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { @@ -1676,7 +1676,7 @@ static bool tryLess(unsigned TryVal, unsigned CandVal, return false; } -static bool tryGreater(unsigned TryVal, unsigned CandVal, +static bool tryGreater(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll new file mode 100644 index 00000000000..0f6e442b1a8 --- /dev/null +++ b/test/CodeGen/X86/misched-matmul.ll @@ -0,0 +1,228 @@ +; REQUIRES: asserts +; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s +; +; Verify that register pressure heuristics are working in MachineScheduler. +; +; When we enable subtree scheduling heuristics on X86, we may need a +; flag to disable it for this test case. +; +; CHECK: @wrap_mul4 +; CHECK: 30 regalloc - Number of spills inserted + +define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 { +entry: + %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0 + %0 = load double* %arrayidx1.i, align 8, !tbaa !0 + %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0 + %1 = load double* %arrayidx3.i, align 8, !tbaa !0 + %mul.i = fmul double %0, %1 + %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1 + %2 = load double* %arrayidx5.i, align 8, !tbaa !0 + %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0 + %3 = load double* %arrayidx7.i, align 8, !tbaa !0 + %mul8.i = fmul double %2, %3 + %add.i = fadd double %mul.i, %mul8.i + %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2 + %4 = load double* %arrayidx10.i, align 8, !tbaa !0 + %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0 + %5 = load double* %arrayidx12.i, align 8, !tbaa !0 + %mul13.i = fmul double %4, %5 + %add14.i = fadd double %add.i, %mul13.i + %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3 + %6 = load double* %arrayidx16.i, align 8, !tbaa !0 + %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0 + %7 = load double* %arrayidx18.i, align 8, !tbaa !0 + %mul19.i = fmul double %6, %7 + %add20.i = fadd double %add14.i, %mul19.i + %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1 + %8 = load double* %arrayidx25.i, align 8, !tbaa !0 + %mul26.i = fmul double %0, %8 + %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1 + %9 = load double* %arrayidx30.i, align 8, !tbaa !0 + %mul31.i = fmul double %2, %9 + %add32.i = fadd double %mul26.i, %mul31.i + %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1 + %10 = load double* %arrayidx36.i, align 8, !tbaa !0 + %mul37.i = fmul double %4, %10 + %add38.i = fadd double %add32.i, %mul37.i + %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1 + %11 = load double* %arrayidx42.i, align 8, !tbaa !0 + %mul43.i = fmul double %6, %11 + %add44.i = fadd double %add38.i, %mul43.i + %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2 + %12 = load double* %arrayidx49.i, align 8, !tbaa !0 + %mul50.i = fmul double %0, %12 + %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2 + %13 = load double* %arrayidx54.i, align 8, !tbaa !0 + %mul55.i = fmul double %2, %13 + %add56.i = fadd double %mul50.i, %mul55.i + %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2 + %14 = load double* %arrayidx60.i, align 8, !tbaa !0 + %mul61.i = fmul double %4, %14 + %add62.i = fadd double %add56.i, %mul61.i + %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2 + %15 = load double* %arrayidx66.i, align 8, !tbaa !0 + %mul67.i = fmul double %6, %15 + %add68.i = fadd double %add62.i, %mul67.i + %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3 + %16 = load double* %arrayidx73.i, align 8, !tbaa !0 + %mul74.i = fmul double %0, %16 + %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3 + %17 = load double* %arrayidx78.i, align 8, !tbaa !0 + %mul79.i = fmul double %2, %17 + %add80.i = fadd double %mul74.i, %mul79.i + %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3 + %18 = load double* %arrayidx84.i, align 8, !tbaa !0 + %mul85.i = fmul double %4, %18 + %add86.i = fadd double %add80.i, %mul85.i + %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3 + %19 = load double* %arrayidx90.i, align 8, !tbaa !0 + %mul91.i = fmul double %6, %19 + %add92.i = fadd double %add86.i, %mul91.i + %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0 + %20 = load double* %arrayidx95.i, align 8, !tbaa !0 + %mul98.i = fmul double %1, %20 + %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1 + %21 = load double* %arrayidx100.i, align 8, !tbaa !0 + %mul103.i = fmul double %3, %21 + %add104.i = fadd double %mul98.i, %mul103.i + %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2 + %22 = load double* %arrayidx106.i, align 8, !tbaa !0 + %mul109.i = fmul double %5, %22 + %add110.i = fadd double %add104.i, %mul109.i + %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3 + %23 = load double* %arrayidx112.i, align 8, !tbaa !0 + %mul115.i = fmul double %7, %23 + %add116.i = fadd double %add110.i, %mul115.i + %mul122.i = fmul double %8, %20 + %mul127.i = fmul double %9, %21 + %add128.i = fadd double %mul122.i, %mul127.i + %mul133.i = fmul double %10, %22 + %add134.i = fadd double %add128.i, %mul133.i + %mul139.i = fmul double %11, %23 + %add140.i = fadd double %add134.i, %mul139.i + %mul146.i = fmul double %12, %20 + %mul151.i = fmul double %13, %21 + %add152.i = fadd double %mul146.i, %mul151.i + %mul157.i = fmul double %14, %22 + %add158.i = fadd double %add152.i, %mul157.i + %mul163.i = fmul double %15, %23 + %add164.i = fadd double %add158.i, %mul163.i + %mul170.i = fmul double %16, %20 + %mul175.i = fmul double %17, %21 + %add176.i = fadd double %mul170.i, %mul175.i + %mul181.i = fmul double %18, %22 + %add182.i = fadd double %add176.i, %mul181.i + %mul187.i = fmul double %19, %23 + %add188.i = fadd double %add182.i, %mul187.i + %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0 + %24 = load double* %arrayidx191.i, align 8, !tbaa !0 + %mul194.i = fmul double %1, %24 + %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1 + %25 = load double* %arrayidx196.i, align 8, !tbaa !0 + %mul199.i = fmul double %3, %25 + %add200.i = fadd double %mul194.i, %mul199.i + %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2 + %26 = load double* %arrayidx202.i, align 8, !tbaa !0 + %mul205.i = fmul double %5, %26 + %add206.i = fadd double %add200.i, %mul205.i + %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3 + %27 = load double* %arrayidx208.i, align 8, !tbaa !0 + %mul211.i = fmul double %7, %27 + %add212.i = fadd double %add206.i, %mul211.i + %mul218.i = fmul double %8, %24 + %mul223.i = fmul double %9, %25 + %add224.i = fadd double %mul218.i, %mul223.i + %mul229.i = fmul double %10, %26 + %add230.i = fadd double %add224.i, %mul229.i + %mul235.i = fmul double %11, %27 + %add236.i = fadd double %add230.i, %mul235.i + %mul242.i = fmul double %12, %24 + %mul247.i = fmul double %13, %25 + %add248.i = fadd double %mul242.i, %mul247.i + %mul253.i = fmul double %14, %26 + %add254.i = fadd double %add248.i, %mul253.i + %mul259.i = fmul double %15, %27 + %add260.i = fadd double %add254.i, %mul259.i + %mul266.i = fmul double %16, %24 + %mul271.i = fmul double %17, %25 + %add272.i = fadd double %mul266.i, %mul271.i + %mul277.i = fmul double %18, %26 + %add278.i = fadd double %add272.i, %mul277.i + %mul283.i = fmul double %19, %27 + %add284.i = fadd double %add278.i, %mul283.i + %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0 + %28 = load double* %arrayidx287.i, align 8, !tbaa !0 + %mul290.i = fmul double %1, %28 + %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1 + %29 = load double* %arrayidx292.i, align 8, !tbaa !0 + %mul295.i = fmul double %3, %29 + %add296.i = fadd double %mul290.i, %mul295.i + %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2 + %30 = load double* %arrayidx298.i, align 8, !tbaa !0 + %mul301.i = fmul double %5, %30 + %add302.i = fadd double %add296.i, %mul301.i + %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3 + %31 = load double* %arrayidx304.i, align 8, !tbaa !0 + %mul307.i = fmul double %7, %31 + %add308.i = fadd double %add302.i, %mul307.i + %mul314.i = fmul double %8, %28 + %mul319.i = fmul double %9, %29 + %add320.i = fadd double %mul314.i, %mul319.i + %mul325.i = fmul double %10, %30 + %add326.i = fadd double %add320.i, %mul325.i + %mul331.i = fmul double %11, %31 + %add332.i = fadd double %add326.i, %mul331.i + %mul338.i = fmul double %12, %28 + %mul343.i = fmul double %13, %29 + %add344.i = fadd double %mul338.i, %mul343.i + %mul349.i = fmul double %14, %30 + %add350.i = fadd double %add344.i, %mul349.i + %mul355.i = fmul double %15, %31 + %add356.i = fadd double %add350.i, %mul355.i + %mul362.i = fmul double %16, %28 + %mul367.i = fmul double %17, %29 + %add368.i = fadd double %mul362.i, %mul367.i + %mul373.i = fmul double %18, %30 + %add374.i = fadd double %add368.i, %mul373.i + %mul379.i = fmul double %19, %31 + %add380.i = fadd double %add374.i, %mul379.i + store double %add20.i, double* %Out, align 8 + %Res.i.sroa.1.8.idx2 = getelementptr inbounds double* %Out, i64 1 + store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8 + %Res.i.sroa.2.16.idx4 = getelementptr inbounds double* %Out, i64 2 + store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8 + %Res.i.sroa.3.24.idx6 = getelementptr inbounds double* %Out, i64 3 + store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8 + %Res.i.sroa.4.32.idx8 = getelementptr inbounds double* %Out, i64 4 + store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8 + %Res.i.sroa.5.40.idx10 = getelementptr inbounds double* %Out, i64 5 + store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8 + %Res.i.sroa.6.48.idx12 = getelementptr inbounds double* %Out, i64 6 + store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8 + %Res.i.sroa.7.56.idx14 = getelementptr inbounds double* %Out, i64 7 + store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8 + %Res.i.sroa.8.64.idx16 = getelementptr inbounds double* %Out, i64 8 + store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8 + %Res.i.sroa.9.72.idx18 = getelementptr inbounds double* %Out, i64 9 + store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8 + %Res.i.sroa.10.80.idx20 = getelementptr inbounds double* %Out, i64 10 + store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8 + %Res.i.sroa.11.88.idx22 = getelementptr inbounds double* %Out, i64 11 + store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8 + %Res.i.sroa.12.96.idx24 = getelementptr inbounds double* %Out, i64 12 + store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8 + %Res.i.sroa.13.104.idx26 = getelementptr inbounds double* %Out, i64 13 + store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8 + %Res.i.sroa.14.112.idx28 = getelementptr inbounds double* %Out, i64 14 + store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8 + %Res.i.sroa.15.120.idx30 = getelementptr inbounds double* %Out, i64 15 + store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8 + ret void +} + +attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"}