mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-18 10:31:57 +00:00
b42a626122
version uses a new algorithm for evaluating the binomial coefficients which is significantly more efficient for AddRecs of more than 2 terms (see the comments in the code for details on how the algorithm works). It also fixes some bugs: it removes the arbitrary length restriction for AddRecs, it fixes the silent generation of incorrect code for AddRecs which require a wide calculation width, and it fixes an issue where we were incorrectly truncating the iteration count too far when evaluating an AddRec expression narrower than the induction variable. There are still a few related issues I know of: I think there's still an issue with the SCEVExpander expansion of AddRec in terms of the width of the induction variable used. The hack to avoid generating too-wide integers shouldn't be necessary; instead, the callers should be considering the cost of the expansion before expanding it (in addition to not expanding too-wide integers, we might not want to expand expressions that are really expensive, especially when optimizing for size; calculating an length-17 32-bit AddRec currently generates about 250 instructions of straight-line code on X86). Also, for long 32-bit AddRecs on X86, CodeGen really sucks at scheduling the code. I'm planning on filing follow-up PRs for these issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54332 91177308-0d34-0410-b5e6-96231b3b80d8
26 lines
480 B
LLVM
26 lines
480 B
LLVM
; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \
|
|
; RUN: -scalar-evolution-max-iterations=0 | grep -F "Exits: 20028"
|
|
; PR2621
|
|
|
|
define i32 @a() nounwind {
|
|
entry:
|
|
br label %bb1
|
|
|
|
bb:
|
|
trunc i32 %i.0 to i16
|
|
add i16 %0, %x16.0
|
|
add i32 %i.0, 1
|
|
br label %bb1
|
|
|
|
bb1:
|
|
%i.0 = phi i32 [ 0, %entry ], [ %2, %bb ]
|
|
%x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ]
|
|
icmp ult i32 %i.0, 888888
|
|
br i1 %3, label %bb, label %bb2
|
|
|
|
bb2:
|
|
zext i16 %x16.0 to i32
|
|
ret i32 %4
|
|
}
|
|
|