mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 19:31:58 +00:00
614fe873ce
{code} // loop body ... = a[i] (1) ... = a[i+1] (2) ....... a[i+1] = .... (3) a[i] = ... (4) {code} The algorithm tries to collect memory access candidates from AliasSetTracker, and then check memory dependences one another. The memory accesses are unique in AliasSetTracker, and a single memory access in AliasSetTracker may map to multiple entries in AccessAnalysis, which could cover both 'read' and 'write'. Originally the algorithm only checked 'write' entry in Accesses if only 'write' exists. This is incorrect and the consequence is it ignored all read access, and finally some RAW and WAR dependence are missed. For the case given above, if we ignore two reads, the dependence between (1) and (3) would not be able to be captured, and finally this loop will be incorrectly vectorized. The fix simply inserts a new loop to find all entries in Accesses. Since it will skip most of all other memory accesses by checking the Value pointer at the very beginning of the loop, it should not increase compile-time visibly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225159 91177308-0d34-0410-b5e6-96231b3b80d8
27 lines
913 B
LLVM
27 lines
913 B
LLVM
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
; RUN: opt < %s -S -loop-vectorize -debug-only=loop-vectorize 2>&1 | FileCheck %s
|
|
|
|
; CHECK: LV: Can't vectorize due to memory conflicts
|
|
|
|
define void @test_loop_novect(double** %arr, i64 %n) {
|
|
for.body.lr.ph:
|
|
%t = load double** %arr, align 8
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.body.lr.ph
|
|
%i = phi i64 [ 0, %for.body.lr.ph ], [ %i.next, %for.body ]
|
|
%a = getelementptr inbounds double* %t, i64 %i
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%a.next = getelementptr inbounds double* %t, i64 %i.next
|
|
%t1 = load double* %a, align 8
|
|
%t2 = load double* %a.next, align 8
|
|
store double %t1, double* %a.next, align 8
|
|
store double %t2, double* %a, align 8
|
|
%c = icmp eq i64 %i, %n
|
|
br i1 %c, label %final, label %for.body
|
|
|
|
final: ; preds = %for.body
|
|
ret void
|
|
}
|