mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
081e6fcd17
For the purpose of calculating the cost of the loop at various vectorization factors, we need to count dependencies of consecutive pointers as uniforms (which means that the VF = 1 cost is used for all overall VF values). For example, the TSVC benchmark function s173 has: ... %3 = add nsw i64 %indvars.iv, 16000 %arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3 ... and we must realize that the add will be a scalar in order to correctly deduce it to be profitable to vectorize this on PowerPC with VSX enabled. In fact, all dependencies of a consecutive pointer must be a scalar (uniform), and so we simply need to add all consecutive pointers to the worklist that currently detects collects uniforms. Fixes PR19296. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205387 91177308-0d34-0410-b5e6-96231b3b80d8
52 lines
2.2 KiB
LLVM
52 lines
2.2 KiB
LLVM
; RUN: opt < %s -mcpu=pwr7 -mattr=+vsx -loop-vectorize -instcombine -S | FileCheck %s
|
|
target datalayout = "E-m:e-i64:64-n32:64"
|
|
target triple = "powerpc64-unknown-linux-gnu"
|
|
|
|
%struct.GlobalData = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float], [5 x i32], [12 x i8], [32000 x float], [7 x i32], [4 x i8], [32000 x float], [11 x i32], [4 x i8], [32000 x float], [13 x i32], [12 x i8], [256 x [256 x float]], [17 x i32], [12 x i8], [256 x [256 x float]], [19 x i32], [4 x i8], [256 x [256 x float]], [23 x i32], [4 x i8], [256 x [256 x float]] }
|
|
|
|
@global_data = external global %struct.GlobalData, align 16
|
|
@ntimes = external hidden unnamed_addr global i32, align 4
|
|
|
|
define signext i32 @s173() #0 {
|
|
entry:
|
|
%0 = load i32* @ntimes, align 4
|
|
%cmp21 = icmp sgt i32 %0, 0
|
|
br i1 %cmp21, label %for.cond1.preheader, label %for.end12
|
|
|
|
for.cond1.preheader: ; preds = %for.end, %entry
|
|
%nl.022 = phi i32 [ %inc11, %for.end ], [ 0, %entry ]
|
|
br label %for.body3
|
|
|
|
for.body3: ; preds = %for.body3, %for.cond1.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
|
%arrayidx = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
|
|
%1 = load float* %arrayidx, align 4
|
|
%arrayidx5 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
|
|
%2 = load float* %arrayidx5, align 4
|
|
%add = fadd float %1, %2
|
|
%3 = add nsw i64 %indvars.iv, 16000
|
|
%arrayidx8 = getelementptr inbounds %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
|
|
store float %add, float* %arrayidx8, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 16000
|
|
br i1 %exitcond, label %for.end, label %for.body3
|
|
|
|
for.end: ; preds = %for.body3
|
|
%inc11 = add nsw i32 %nl.022, 1
|
|
%4 = load i32* @ntimes, align 4
|
|
%mul = mul nsw i32 %4, 10
|
|
%cmp = icmp slt i32 %inc11, %mul
|
|
br i1 %cmp, label %for.cond1.preheader, label %for.end12
|
|
|
|
for.end12: ; preds = %for.end, %entry
|
|
ret i32 0
|
|
|
|
; CHECK-LABEL: @s173
|
|
; CHECK: load <4 x float>*
|
|
; CHECK: add i64 %index, 16000
|
|
; CHECK: ret i32 0
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
|