llvm-6502/test/CodeGen/X86/misched-matmul.ll
Hal Finkel d3aa46a1bc [MiSched] Fix a logic error in tryPressure()
Fixes a logic error in the MachineScheduler found by Steve Montgomery (and
confirmed by Andy). This has gone unfixed for months because the fix has been
found to introduce some small performance regressions. However, Andy has
recommended that, at this point, we fix this to avoid further dependence on the
incorrect behavior (and then follow-up separately on any regressions), and I
agree.

Fixes PR18883.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219512 91177308-0d34-0410-b5e6-96231b3b80d8
2014-10-10 17:06:20 +00:00

228 lines
11 KiB
LLVM

; REQUIRES: asserts
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
;
; Verify that register pressure heuristics are working in MachineScheduler.
;
; We can further reduce spills in this case with a global register
; pressure heuristic, like sethi-ullman numbers or biasing toward
; scheduled subtrees. However, these heuristics are marginally
; beneficial on x86_64 and exacerbate register pressure in other
; more complex cases.
;
; CHECK: @wrap_mul4
; CHECK: 23 regalloc - Number of spills inserted
define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
entry:
%arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
%0 = load double* %arrayidx1.i, align 8
%arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
%1 = load double* %arrayidx3.i, align 8
%mul.i = fmul double %0, %1
%arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
%2 = load double* %arrayidx5.i, align 8
%arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
%3 = load double* %arrayidx7.i, align 8
%mul8.i = fmul double %2, %3
%add.i = fadd double %mul.i, %mul8.i
%arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
%4 = load double* %arrayidx10.i, align 8
%arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
%5 = load double* %arrayidx12.i, align 8
%mul13.i = fmul double %4, %5
%add14.i = fadd double %add.i, %mul13.i
%arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
%6 = load double* %arrayidx16.i, align 8
%arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
%7 = load double* %arrayidx18.i, align 8
%mul19.i = fmul double %6, %7
%add20.i = fadd double %add14.i, %mul19.i
%arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
%8 = load double* %arrayidx25.i, align 8
%mul26.i = fmul double %0, %8
%arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
%9 = load double* %arrayidx30.i, align 8
%mul31.i = fmul double %2, %9
%add32.i = fadd double %mul26.i, %mul31.i
%arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
%10 = load double* %arrayidx36.i, align 8
%mul37.i = fmul double %4, %10
%add38.i = fadd double %add32.i, %mul37.i
%arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
%11 = load double* %arrayidx42.i, align 8
%mul43.i = fmul double %6, %11
%add44.i = fadd double %add38.i, %mul43.i
%arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
%12 = load double* %arrayidx49.i, align 8
%mul50.i = fmul double %0, %12
%arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
%13 = load double* %arrayidx54.i, align 8
%mul55.i = fmul double %2, %13
%add56.i = fadd double %mul50.i, %mul55.i
%arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
%14 = load double* %arrayidx60.i, align 8
%mul61.i = fmul double %4, %14
%add62.i = fadd double %add56.i, %mul61.i
%arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
%15 = load double* %arrayidx66.i, align 8
%mul67.i = fmul double %6, %15
%add68.i = fadd double %add62.i, %mul67.i
%arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
%16 = load double* %arrayidx73.i, align 8
%mul74.i = fmul double %0, %16
%arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
%17 = load double* %arrayidx78.i, align 8
%mul79.i = fmul double %2, %17
%add80.i = fadd double %mul74.i, %mul79.i
%arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
%18 = load double* %arrayidx84.i, align 8
%mul85.i = fmul double %4, %18
%add86.i = fadd double %add80.i, %mul85.i
%arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
%19 = load double* %arrayidx90.i, align 8
%mul91.i = fmul double %6, %19
%add92.i = fadd double %add86.i, %mul91.i
%arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
%20 = load double* %arrayidx95.i, align 8
%mul98.i = fmul double %1, %20
%arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
%21 = load double* %arrayidx100.i, align 8
%mul103.i = fmul double %3, %21
%add104.i = fadd double %mul98.i, %mul103.i
%arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
%22 = load double* %arrayidx106.i, align 8
%mul109.i = fmul double %5, %22
%add110.i = fadd double %add104.i, %mul109.i
%arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
%23 = load double* %arrayidx112.i, align 8
%mul115.i = fmul double %7, %23
%add116.i = fadd double %add110.i, %mul115.i
%mul122.i = fmul double %8, %20
%mul127.i = fmul double %9, %21
%add128.i = fadd double %mul122.i, %mul127.i
%mul133.i = fmul double %10, %22
%add134.i = fadd double %add128.i, %mul133.i
%mul139.i = fmul double %11, %23
%add140.i = fadd double %add134.i, %mul139.i
%mul146.i = fmul double %12, %20
%mul151.i = fmul double %13, %21
%add152.i = fadd double %mul146.i, %mul151.i
%mul157.i = fmul double %14, %22
%add158.i = fadd double %add152.i, %mul157.i
%mul163.i = fmul double %15, %23
%add164.i = fadd double %add158.i, %mul163.i
%mul170.i = fmul double %16, %20
%mul175.i = fmul double %17, %21
%add176.i = fadd double %mul170.i, %mul175.i
%mul181.i = fmul double %18, %22
%add182.i = fadd double %add176.i, %mul181.i
%mul187.i = fmul double %19, %23
%add188.i = fadd double %add182.i, %mul187.i
%arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
%24 = load double* %arrayidx191.i, align 8
%mul194.i = fmul double %1, %24
%arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
%25 = load double* %arrayidx196.i, align 8
%mul199.i = fmul double %3, %25
%add200.i = fadd double %mul194.i, %mul199.i
%arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
%26 = load double* %arrayidx202.i, align 8
%mul205.i = fmul double %5, %26
%add206.i = fadd double %add200.i, %mul205.i
%arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
%27 = load double* %arrayidx208.i, align 8
%mul211.i = fmul double %7, %27
%add212.i = fadd double %add206.i, %mul211.i
%mul218.i = fmul double %8, %24
%mul223.i = fmul double %9, %25
%add224.i = fadd double %mul218.i, %mul223.i
%mul229.i = fmul double %10, %26
%add230.i = fadd double %add224.i, %mul229.i
%mul235.i = fmul double %11, %27
%add236.i = fadd double %add230.i, %mul235.i
%mul242.i = fmul double %12, %24
%mul247.i = fmul double %13, %25
%add248.i = fadd double %mul242.i, %mul247.i
%mul253.i = fmul double %14, %26
%add254.i = fadd double %add248.i, %mul253.i
%mul259.i = fmul double %15, %27
%add260.i = fadd double %add254.i, %mul259.i
%mul266.i = fmul double %16, %24
%mul271.i = fmul double %17, %25
%add272.i = fadd double %mul266.i, %mul271.i
%mul277.i = fmul double %18, %26
%add278.i = fadd double %add272.i, %mul277.i
%mul283.i = fmul double %19, %27
%add284.i = fadd double %add278.i, %mul283.i
%arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
%28 = load double* %arrayidx287.i, align 8
%mul290.i = fmul double %1, %28
%arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
%29 = load double* %arrayidx292.i, align 8
%mul295.i = fmul double %3, %29
%add296.i = fadd double %mul290.i, %mul295.i
%arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
%30 = load double* %arrayidx298.i, align 8
%mul301.i = fmul double %5, %30
%add302.i = fadd double %add296.i, %mul301.i
%arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
%31 = load double* %arrayidx304.i, align 8
%mul307.i = fmul double %7, %31
%add308.i = fadd double %add302.i, %mul307.i
%mul314.i = fmul double %8, %28
%mul319.i = fmul double %9, %29
%add320.i = fadd double %mul314.i, %mul319.i
%mul325.i = fmul double %10, %30
%add326.i = fadd double %add320.i, %mul325.i
%mul331.i = fmul double %11, %31
%add332.i = fadd double %add326.i, %mul331.i
%mul338.i = fmul double %12, %28
%mul343.i = fmul double %13, %29
%add344.i = fadd double %mul338.i, %mul343.i
%mul349.i = fmul double %14, %30
%add350.i = fadd double %add344.i, %mul349.i
%mul355.i = fmul double %15, %31
%add356.i = fadd double %add350.i, %mul355.i
%mul362.i = fmul double %16, %28
%mul367.i = fmul double %17, %29
%add368.i = fadd double %mul362.i, %mul367.i
%mul373.i = fmul double %18, %30
%add374.i = fadd double %add368.i, %mul373.i
%mul379.i = fmul double %19, %31
%add380.i = fadd double %add374.i, %mul379.i
store double %add20.i, double* %Out, align 8
%Res.i.sroa.1.8.idx2 = getelementptr inbounds double* %Out, i64 1
store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
%Res.i.sroa.2.16.idx4 = getelementptr inbounds double* %Out, i64 2
store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
%Res.i.sroa.3.24.idx6 = getelementptr inbounds double* %Out, i64 3
store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
%Res.i.sroa.4.32.idx8 = getelementptr inbounds double* %Out, i64 4
store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
%Res.i.sroa.5.40.idx10 = getelementptr inbounds double* %Out, i64 5
store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
%Res.i.sroa.6.48.idx12 = getelementptr inbounds double* %Out, i64 6
store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
%Res.i.sroa.7.56.idx14 = getelementptr inbounds double* %Out, i64 7
store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
%Res.i.sroa.8.64.idx16 = getelementptr inbounds double* %Out, i64 8
store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
%Res.i.sroa.9.72.idx18 = getelementptr inbounds double* %Out, i64 9
store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
%Res.i.sroa.10.80.idx20 = getelementptr inbounds double* %Out, i64 10
store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
%Res.i.sroa.11.88.idx22 = getelementptr inbounds double* %Out, i64 11
store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
%Res.i.sroa.12.96.idx24 = getelementptr inbounds double* %Out, i64 12
store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
%Res.i.sroa.13.104.idx26 = getelementptr inbounds double* %Out, i64 13
store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
%Res.i.sroa.14.112.idx28 = getelementptr inbounds double* %Out, i64 14
store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
%Res.i.sroa.15.120.idx30 = getelementptr inbounds double* %Out, i64 15
store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
ret void
}
attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }