mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
1386692ef6
InstCombine can be uncooperative to vectorization and sink loads into conditional blocks. This prevents vectorization. Undo this optimization if there are unconditional memory accesses to the same addresses in the loop. radar://13815763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8
70 lines
2.3 KiB
LLVM
70 lines
2.3 KiB
LLVM
; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
@A = common global [1024 x float] zeroinitializer, align 16
|
|
@B = common global [1024 x float] zeroinitializer, align 16
|
|
|
|
; Make sure we can vectorize in the presence of hoistable conditional loads.
|
|
; CHECK: hoist_cond_load
|
|
; CHECK: load <2 x float>
|
|
|
|
define void @hoist_cond_load() {
|
|
entry:
|
|
br label %for.body
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
|
|
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
|
|
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
|
|
%0 = load float* %arrayidx2, align 4
|
|
%cmp3 = fcmp oeq float %0, 0.000000e+00
|
|
br i1 %cmp3, label %if.end9, label %if.else
|
|
|
|
if.else:
|
|
%1 = load float* %arrayidx, align 4
|
|
br label %if.end9
|
|
|
|
if.end9:
|
|
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
|
|
store float %tmp.0, float* %arrayidx, align 4
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; However, we can't hoist loads whose address we have not seen unconditionally
|
|
; accessed.
|
|
; CHECK: dont_hoist_cond_load
|
|
; CHECK-NOT: load <2 x float>
|
|
|
|
define void @dont_hoist_cond_load() {
|
|
entry:
|
|
br label %for.body
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
|
|
%arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv
|
|
%arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv
|
|
%0 = load float* %arrayidx2, align 4
|
|
%cmp3 = fcmp oeq float %0, 0.000000e+00
|
|
br i1 %cmp3, label %if.end9, label %if.else
|
|
|
|
if.else:
|
|
%1 = load float* %arrayidx, align 4
|
|
br label %if.end9
|
|
|
|
if.end9:
|
|
%tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
|
|
store float %tmp.0, float* %arrayidx2, align 4
|
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
%exitcond = icmp ne i32 %lftr.wideiv, 1024
|
|
br i1 %exitcond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|