Add Rpass-missed and Rpass-analysis reports to the loop vectorizer. The remarks give the vector width of vectorized loops and a brief analysis of loops that fail to be vectorized. For example, an analysis will be generated for loops containing control flow that cannot be simplified to a select. The optimization remarks also give the debug location of expressions that cannot be vectorized, for example the location of an unvectorizable call.

Reviewed by: Arnold Schwaighofer


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211721 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tyler Nowicki
2014-06-25 17:50:15 +00:00
parent cae1ea691d
commit d5a8fa72bb
5 changed files with 511 additions and 27 deletions

View File

@@ -0,0 +1,160 @@
; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
; C/C++ code for tests
; void test(int *A, int Length) {
; #pragma clang loop vectorize(enable) interleave(enable)
; for (int i = 0; i < Length; i++) {
; A[i] = i;
; if (A[i] > Length)
; break;
; }
; }
; void test_disabled(int *A, int Length) {
; #pragma clang loop vectorize(disable) interleave(disable)
; for (int i = 0; i < Length; i++)
; A[i] = i;
; }
; void test_array_bounds(int *A, int *B, int Length) {
; #pragma clang loop vectorize(enable)
; for (int i = 0; i < Length; i++)
; A[i] = A[B[i]];
; }
; File, line, and column should match those specified in the metadata
; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified
; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled
; CHECK: _Z4testPii
; CHECK-NOT: x i32>
; CHECK: ret
; CHECK: _Z13test_disabledPii
; CHECK-NOT: x i32>
; CHECK: ret
; CHECK: _Z17test_array_boundsPiS_i
; CHECK-NOT: x i32>
; CHECK: ret
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
entry:
%cmp10 = icmp sgt i32 %Length, 0, !dbg !12
br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16
%0 = trunc i64 %indvars.iv to i32, !dbg !16
store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
%cmp3 = icmp sle i32 %0, %Length, !dbg !22
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, %Length, !dbg !12
%or.cond = and i1 %cmp3, %cmp, !dbg !22
br i1 %or.cond, label %for.body, label %for.end, !dbg !22
for.end: ; preds = %for.body, %entry
ret void, !dbg !24
}
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
entry:
%cmp4 = icmp sgt i32 %Length, 0, !dbg !25
br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30
%0 = trunc i64 %indvars.iv to i32, !dbg !30
store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25
%exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25
br i1 %exitcond, label %for.end, label %for.body, !dbg !25, !llvm.loop !27
for.end: ; preds = %for.body, %entry
ret void, !dbg !31
}
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
entry:
%cmp9 = icmp sgt i32 %Length, 0, !dbg !32
br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
for.body.preheader: ; preds = %entry
br label %for.body, !dbg !35
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35
%0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18
%idxprom1 = sext i32 %0 to i64, !dbg !35
%arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35
%1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
%arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35
store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
%exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34
for.end.loopexit: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void, !dbg !36
}
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
!1 = metadata !{metadata !"source.cpp", metadata !"."}
!2 = metadata !{}
!3 = metadata !{metadata !4, metadata !7, metadata !8}
!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1}
!5 = metadata !{i32 786473, metadata !1}
!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10}
!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16}
!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
!11 = metadata !{metadata !"clang version 3.5.0"}
!12 = metadata !{i32 3, i32 8, metadata !13, null}
!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
!14 = metadata !{metadata !14, metadata !15, metadata !15}
!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
!16 = metadata !{i32 4, i32 5, metadata !17, null}
!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1}
!18 = metadata !{metadata !19, metadata !19, i64 0}
!19 = metadata !{metadata !"int", metadata !20, i64 0}
!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
!21 = metadata !{metadata !"Simple C/C++ TBAA"}
!22 = metadata !{i32 5, i32 9, metadata !23, null}
!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2}
!24 = metadata !{i32 8, i32 1, metadata !4, null}
!25 = metadata !{i32 12, i32 8, metadata !26, null}
!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3}
!27 = metadata !{metadata !27, metadata !28, metadata !29}
!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1}
!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
!30 = metadata !{i32 13, i32 5, metadata !26, null}
!31 = metadata !{i32 14, i32 1, metadata !7, null}
!32 = metadata !{i32 18, i32 8, metadata !33, null}
!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4}
!34 = metadata !{metadata !34, metadata !15}
!35 = metadata !{i32 19, i32 5, metadata !33, null}
!36 = metadata !{i32 20, i32 1, metadata !8, null}

View File

@@ -1,5 +1,6 @@
; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
; This code has all the !dbg annotations needed to track source line information,
; but is missing the llvm.dbg.cu annotation. This prevents code generation from
@@ -8,8 +9,9 @@
; DEBUG-OUTPUT-NOT: .loc
; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1)
; UNROLLED: remark: vectorization-remarks.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial)
; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@@ -0,0 +1,78 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
; C/C++ code for control flow test
; int test(int *A, int Length) {
; for (int i = 0; i < Length; i++) {
; if (A[i] > 10.0) goto end;
; A[i] = 0;
; }
; end:
; return 0;
; }
; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer
; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified
; CHECK: _Z4testPii
; CHECK-NOT: x i32>
; CHECK: ret
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
entry:
%cmp8 = icmp sgt i32 %Length, 0, !dbg !10
br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
for.body.preheader: ; preds = %entry
br label %for.body, !dbg !12
for.body: ; preds = %for.body.preheader, %if.else
%indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !12
%0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !15
%cmp1 = icmp sgt i32 %0, 10, !dbg !12
br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
if.else: ; preds = %for.body
store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
%1 = trunc i64 %indvars.iv.next to i32, !dbg !10
%cmp = icmp slt i32 %1, %Length, !dbg !10
br i1 %cmp, label %for.body, label %end.loopexit, !dbg !10
end.loopexit: ; preds = %if.else, %for.body
br label %end
end: ; preds = %end.loopexit, %entry
ret i32 0, !dbg !20
}
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
!1 = metadata !{metadata !"source.cpp", metadata !"."}
!2 = metadata !{}
!3 = metadata !{metadata !4}
!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2}
!5 = metadata !{i32 786473, metadata !1}
!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
!9 = metadata !{metadata !"clang version 3.5.0"}
!10 = metadata !{i32 3, i32 8, metadata !11, null}
!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
!12 = metadata !{i32 5, i32 9, metadata !13, null}
!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2}
!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1}
!15 = metadata !{metadata !16, metadata !16, i64 0}
!16 = metadata !{metadata !"int", metadata !17, i64 0}
!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
!18 = metadata !{metadata !"Simple C/C++ TBAA"}
!19 = metadata !{i32 8, i32 7, metadata !13, null}
!20 = metadata !{i32 12, i32 3, metadata !4, null}

View File

@@ -0,0 +1,85 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4
; CHECK: _Z11test_switchPii
; CHECK-NOT: x i32>
; CHECK: ret
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
entry:
%cmp18 = icmp sgt i32 %Length, 0, !dbg !10
br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
for.body.preheader: ; preds = %entry
br label %for.body, !dbg !14
for.body: ; preds = %for.body.preheader, %for.inc
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !14
%0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !16
switch i32 %0, label %for.inc [
i32 0, label %sw.bb
i32 1, label %sw.bb3
], !dbg !14
sw.bb: ; preds = %for.body
%1 = trunc i64 %indvars.iv to i32, !dbg !20
%mul = shl nsw i32 %1, 1, !dbg !20
br label %for.inc, !dbg !22
sw.bb3: ; preds = %for.body
%2 = trunc i64 %indvars.iv to i32, !dbg !23
store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
br label %for.inc, !dbg !23
for.inc: ; preds = %sw.bb3, %for.body, %sw.bb
%storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
%exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
for.end.loopexit: ; preds = %for.inc
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
ret void, !dbg !24
}
attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
!1 = metadata !{metadata !"source.cpp", metadata !"."}
!2 = metadata !{}
!3 = metadata !{metadata !4}
!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1}
!5 = metadata !{i32 786473, metadata !1}
!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
!9 = metadata !{metadata !"clang version 3.5.0"}
!10 = metadata !{i32 3, i32 8, metadata !11, null}
!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
!12 = metadata !{metadata !12, metadata !13, metadata !13}
!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
!14 = metadata !{i32 4, i32 5, metadata !15, null}
!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1}
!16 = metadata !{metadata !17, metadata !17, i64 0}
!17 = metadata !{metadata !"int", metadata !18, i64 0}
!18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0}
!19 = metadata !{metadata !"Simple C/C++ TBAA"}
!20 = metadata !{i32 6, i32 7, metadata !21, null}
!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2}
!22 = metadata !{i32 7, i32 5, metadata !21, null}
!23 = metadata !{i32 9, i32 7, metadata !21, null}
!24 = metadata !{i32 14, i32 1, metadata !4, null}