mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-20 12:31:40 +00:00
Simplify a README.txt entry significantly to expose the core issue.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123556 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
02efda4b4e
commit
cad33c624e
@ -2162,37 +2162,35 @@ Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i.
|
|||||||
|
|
||||||
clang -O3 -fno-exceptions currently compiles this code:
|
clang -O3 -fno-exceptions currently compiles this code:
|
||||||
|
|
||||||
void f(int N) {
|
void f(char* a, int n) {
|
||||||
std::vector<int> v(N);
|
__builtin_memset(a, 0, n);
|
||||||
for (int k = 0; k < N; ++k)
|
for (int i = 0; i < n; ++i)
|
||||||
v[k] = 0;
|
a[i] = 0;
|
||||||
|
|
||||||
extern void sink(void*); sink(&v);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
into almost the same as the previous note, but replace its final BB with:
|
into:
|
||||||
|
|
||||||
for.body.lr.ph: ; preds = %cond.true.i.i.i.i
|
define void @_Z1fPci(i8* nocapture %a, i32 %n) nounwind {
|
||||||
%mul.i.i.i.i.i = shl i64 %conv, 2
|
entry:
|
||||||
%call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind
|
%conv = sext i32 %n to i64
|
||||||
%0 = bitcast i8* %call3.i.i.i.i.i to i32*
|
tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %conv, i32 1, i1 false)
|
||||||
store i32* %0, i32** %v8.sub, align 8, !tbaa !0
|
%cmp8 = icmp sgt i32 %n, 0
|
||||||
%add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv
|
br i1 %cmp8, label %for.body.lr.ph, label %for.end
|
||||||
store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
|
|
||||||
call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false)
|
|
||||||
store i32* %add.ptr.i.i.i, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
|
|
||||||
%tmp18 = add i32 %N, -1
|
|
||||||
%tmp19 = zext i32 %tmp18 to i64
|
|
||||||
%tmp20 = shl i64 %tmp19, 2
|
|
||||||
%tmp21 = add i64 %tmp20, 4
|
|
||||||
call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %tmp21, i32 4, i1 false)
|
|
||||||
br label %for.end
|
|
||||||
|
|
||||||
First off, why (((zext %N - 1) << 2) + 4) instead of the ((sext %N) << 2) done
|
for.body.lr.ph: ; preds = %entry
|
||||||
previously? (or better yet, re-use that one?)
|
%tmp10 = add i32 %n, -1
|
||||||
|
%tmp11 = zext i32 %tmp10 to i64
|
||||||
|
%tmp12 = add i64 %tmp11, 1
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %tmp12, i32 1, i1 false)
|
||||||
|
ret void
|
||||||
|
|
||||||
Then, the really painful one is the second memset, of the same memory, to the
|
for.end: ; preds = %entry
|
||||||
same value.
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold
|
||||||
|
the two memset's together. The issue with %n seems to stem from poor handling
|
||||||
|
of the original loop.
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user