mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 21:29:41 +00:00
Storing the load/store instructions with the values and inspect them using Alias Analysis to make sure they don't alias, since the GEP pointer operand doesn't take the offset into account. Trying hard to not add any extra cost to loads and stores that don't overlap on global values, AA is *only* calculated if all of the previous attempts failed. Using biggest vector register size as the stride for the vectorization access, as we're being conservative and the cost model (which calculates the real vectorization factor) is only run after the legalization phase. We might re-think this relationship in the future, but for now, I'd rather be safe than sorry. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175818 91177308-0d34-0410-b5e6-96231b3b80d8
357 lines
12 KiB
LLVM
357 lines
12 KiB
LLVM
; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
|
|
|
|
%struct.anon = type { [100 x i32], i32, [100 x i32] }
|
|
|
|
@Foo = common global %struct.anon zeroinitializer, align 4
|
|
@PB = external global i32*
|
|
@PA = external global i32*
|
|
|
|
; int noAlias01 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; Foo.A[i] = Foo.B[i] + a;
|
|
; return Foo.A[a];
|
|
; }
|
|
; CHECK: define i32 @noAlias01
|
|
; CHECK: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @noAlias01(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32* %i, align 4
|
|
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
|
|
%2 = load i32* %arrayidx, align 4
|
|
%3 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %2, %3
|
|
%4 = load i32* %i, align 4
|
|
%arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
|
|
store i32 %add, i32* %arrayidx1, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%5 = load i32* %i, align 4
|
|
%inc = add nsw i32 %5, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%6 = load i32* %a.addr, align 4
|
|
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
|
%7 = load i32* %arrayidx2, align 4
|
|
ret i32 %7
|
|
}
|
|
|
|
; int mayAlias01 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; Foo.A[i] = Foo.B[SIZE-i-1] + a;
|
|
; return Foo.A[a];
|
|
; }
|
|
; CHECK: define i32 @mayAlias01
|
|
; CHECK-NOT: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @mayAlias01(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32* %i, align 4
|
|
%sub = sub nsw i32 100, %1
|
|
%sub1 = sub nsw i32 %sub, 1
|
|
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
|
%2 = load i32* %arrayidx, align 4
|
|
%3 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %2, %3
|
|
%4 = load i32* %i, align 4
|
|
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
|
|
store i32 %add, i32* %arrayidx2, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%5 = load i32* %i, align 4
|
|
%inc = add nsw i32 %5, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%6 = load i32* %a.addr, align 4
|
|
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
|
%7 = load i32* %arrayidx3, align 4
|
|
ret i32 %7
|
|
}
|
|
|
|
; int mayAlias02 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; Foo.A[SIZE-i-1] = Foo.B[i] + a;
|
|
; return Foo.A[a];
|
|
; }
|
|
; CHECK: define i32 @mayAlias02
|
|
; CHECK-NOT: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @mayAlias02(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32* %i, align 4
|
|
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
|
|
%2 = load i32* %arrayidx, align 4
|
|
%3 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %2, %3
|
|
%4 = load i32* %i, align 4
|
|
%sub = sub nsw i32 100, %4
|
|
%sub1 = sub nsw i32 %sub, 1
|
|
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
|
|
store i32 %add, i32* %arrayidx2, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%5 = load i32* %i, align 4
|
|
%inc = add nsw i32 %5, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%6 = load i32* %a.addr, align 4
|
|
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
|
%7 = load i32* %arrayidx3, align 4
|
|
ret i32 %7
|
|
}
|
|
|
|
; int mayAlias03 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; *(PA+i) = *(PB+SIZE-i-1) + a;
|
|
; return *(PA+a);
|
|
; }
|
|
; CHECK: define i32 @mayAlias03
|
|
; CHECK-NOT: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @mayAlias03(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32** @PB, align 4
|
|
%add.ptr = getelementptr inbounds i32* %1, i32 100
|
|
%2 = load i32* %i, align 4
|
|
%idx.neg = sub i32 0, %2
|
|
%add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
|
|
%add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
|
|
%3 = load i32* %add.ptr2, align 4
|
|
%4 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %3, %4
|
|
%5 = load i32** @PA, align 4
|
|
%6 = load i32* %i, align 4
|
|
%add.ptr3 = getelementptr inbounds i32* %5, i32 %6
|
|
store i32 %add, i32* %add.ptr3, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%7 = load i32* %i, align 4
|
|
%inc = add nsw i32 %7, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%8 = load i32** @PA, align 4
|
|
%9 = load i32* %a.addr, align 4
|
|
%add.ptr4 = getelementptr inbounds i32* %8, i32 %9
|
|
%10 = load i32* %add.ptr4, align 4
|
|
ret i32 %10
|
|
}
|
|
|
|
; int mustAlias01 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
|
|
; return Foo.A[a];
|
|
; }
|
|
; CHECK: define i32 @mustAlias01
|
|
; CHECK-NOT: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @mustAlias01(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32* %i, align 4
|
|
%sub = sub nsw i32 100, %1
|
|
%sub1 = sub nsw i32 %sub, 1
|
|
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
|
%2 = load i32* %arrayidx, align 4
|
|
%3 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %2, %3
|
|
%4 = load i32* %i, align 4
|
|
%add2 = add nsw i32 %4, 10
|
|
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
|
|
store i32 %add, i32* %arrayidx3, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%5 = load i32* %i, align 4
|
|
%inc = add nsw i32 %5, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%6 = load i32* %a.addr, align 4
|
|
%arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
|
%7 = load i32* %arrayidx4, align 4
|
|
ret i32 %7
|
|
}
|
|
|
|
; int mustAlias02 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; Foo.A[i] = Foo.B[SIZE-i-10] + a;
|
|
; return Foo.A[a];
|
|
; }
|
|
; CHECK: define i32 @mustAlias02
|
|
; CHECK-NOT: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @mustAlias02(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32* %i, align 4
|
|
%sub = sub nsw i32 100, %1
|
|
%sub1 = sub nsw i32 %sub, 10
|
|
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
|
%2 = load i32* %arrayidx, align 4
|
|
%3 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %2, %3
|
|
%4 = load i32* %i, align 4
|
|
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
|
|
store i32 %add, i32* %arrayidx2, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%5 = load i32* %i, align 4
|
|
%inc = add nsw i32 %5, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%6 = load i32* %a.addr, align 4
|
|
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
|
%7 = load i32* %arrayidx3, align 4
|
|
ret i32 %7
|
|
}
|
|
|
|
; int mustAlias03 (int a) {
|
|
; int i;
|
|
; for (i=0; i<SIZE; i++)
|
|
; Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
|
|
; return Foo.A[a];
|
|
; }
|
|
; CHECK: define i32 @mustAlias03
|
|
; CHECK-NOT: add nsw <4 x i32>
|
|
; CHECK ret
|
|
|
|
define i32 @mustAlias03(i32 %a) nounwind {
|
|
entry:
|
|
%a.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %a, i32* %a.addr, align 4
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc, %entry
|
|
%0 = load i32* %i, align 4
|
|
%cmp = icmp slt i32 %0, 100
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %for.cond
|
|
%1 = load i32* %i, align 4
|
|
%sub = sub nsw i32 100, %1
|
|
%sub1 = sub nsw i32 %sub, 10
|
|
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
|
%2 = load i32* %arrayidx, align 4
|
|
%3 = load i32* %a.addr, align 4
|
|
%add = add nsw i32 %2, %3
|
|
%4 = load i32* %i, align 4
|
|
%add2 = add nsw i32 %4, 10
|
|
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
|
|
store i32 %add, i32* %arrayidx3, align 4
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body
|
|
%5 = load i32* %i, align 4
|
|
%inc = add nsw i32 %5, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end: ; preds = %for.cond
|
|
%6 = load i32* %a.addr, align 4
|
|
%arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
|
%7 = load i32* %arrayidx4, align 4
|
|
ret i32 %7
|
|
}
|