SLPVectorizer: Implement DCE as part of vectorization.

This is a complete re-write if the bottom-up vectorization class.
Before this commit we scanned the instruction tree 3 times. First in search of merge points for the trees. Second, for estimating the cost. And finally for vectorization.
There was a lot of code duplication and adding the DCE exposed bugs. The new design is simpler and DCE was a part of the design.
In this implementation we build the tree once. After that we estimate the cost by scanning the different entries in the constructed tree (in any order). The vectorization phase also works on the built tree.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185774 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-07-07 06:57:07 +00:00
parent 95a1b3484d
commit 369cc938d2
17 changed files with 1695 additions and 1028 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, i16*, i8*, i8*, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] }
%struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 }
define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p) {
entry:
%range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 4
%code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 5
br label %do.body66.i
do.body66.i: ; preds = %do.cond.i, %entry
%range.2.i = phi i32 [ %range.4.i, %do.cond.i ], [ undef, %entry ]
%code.2.i = phi i32 [ %code.4.i, %do.cond.i ], [ undef, %entry ]
%.range.2.i = select i1 undef, i32 undef, i32 %range.2.i
%.code.2.i = select i1 undef, i32 undef, i32 %code.2.i
br i1 undef, label %do.cond.i, label %if.else.i
if.else.i: ; preds = %do.body66.i
%sub91.i = sub i32 %.range.2.i, undef
%sub92.i = sub i32 %.code.2.i, undef
br label %do.cond.i
do.cond.i: ; preds = %if.else.i, %do.body66.i
%range.4.i = phi i32 [ %sub91.i, %if.else.i ], [ undef, %do.body66.i ]
%code.4.i = phi i32 [ %sub92.i, %if.else.i ], [ %.code.2.i, %do.body66.i ]
br i1 undef, label %do.body66.i, label %do.end1006.i
do.end1006.i: ; preds = %do.cond.i
%.range.4.i = select i1 undef, i32 undef, i32 %range.4.i
%.code.4.i = select i1 undef, i32 undef, i32 %code.4.i
store i32 %.range.4.i, i32* %range20.i, align 4
store i32 %.code.4.i, i32* %code21.i, align 4
ret void
}

View File

@ -0,0 +1,38 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 }
define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* nocapture %info) {
entry:
br i1 undef, label %if.else, label %if.then
if.then: ; preds = %entry
ret void
if.else: ; preds = %entry
%m_numConstraintRows4 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 0
%nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 1
br i1 undef, label %land.lhs.true.i.1, label %if.then7.1
land.lhs.true.i.1: ; preds = %if.else
br i1 undef, label %for.inc.1, label %if.then7.1
if.then7.1: ; preds = %land.lhs.true.i.1, %if.else
%inc.1 = add nsw i32 0, 1
store i32 %inc.1, i32* %m_numConstraintRows4, align 4
%dec.1 = add nsw i32 6, -1
store i32 %dec.1, i32* %nub5, align 4
br label %for.inc.1
for.inc.1: ; preds = %if.then7.1, %land.lhs.true.i.1
%0 = phi i32 [ %dec.1, %if.then7.1 ], [ 6, %land.lhs.true.i.1 ]
%1 = phi i32 [ %inc.1, %if.then7.1 ], [ 0, %land.lhs.true.i.1 ]
%inc.2 = add nsw i32 %1, 1
store i32 %inc.2, i32* %m_numConstraintRows4, align 4
%dec.2 = add nsw i32 %0, -1
store i32 %dec.2, i32* %nub5, align 4
unreachable
}

View File

@ -0,0 +1,38 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332 = type { float, [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, [4 x float], float, float, [4 x float], float, float, [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330] }
%class.btVector3.5.30.65.90.115.140.175.185.260.280.330 = type { [4 x float] }
%class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 }
define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this) {
entry:
%arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
%arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
%0 = load float* %arrayidx36, align 4
%add587 = fadd float undef, undef
%sub600 = fsub float %add587, undef
store float %sub600, float* undef, align 4
%sub613 = fsub float %add587, %sub600
store float %sub613, float* %arrayidx26, align 4
%add626 = fadd float %0, undef
%sub639 = fsub float %add626, undef
%sub652 = fsub float %add626, %sub639
store float %sub652, float* %arrayidx36, align 4
br i1 undef, label %if.else1609, label %if.then1595
if.then1595: ; preds = %entry
br i1 undef, label %return, label %for.body.lr.ph.i.i1702
for.body.lr.ph.i.i1702: ; preds = %if.then1595
unreachable
if.else1609: ; preds = %entry
unreachable
return: ; preds = %if.then1595
ret void
}

View File

@ -0,0 +1,40 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { double*, double*, double*, double** }
; Function Attrs: nounwind ssp uwtable
define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
entry:
%_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
%0 = load double** %_M_cur2.i.i, align 8
%_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
%_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
%1 = load double** %_M_cur2.i.i81, align 8
%_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
%2 = load double** %_M_first3.i.i83, align 8
br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
while.cond.i.preheader: ; preds = %entry
br label %while.cond.i
while.cond.i: ; preds = %while.body.i, %while.cond.i.preheader
br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.body.i
while.body.i: ; preds = %while.cond.i
br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i
_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; preds = %while.body.i, %while.cond.i, %entry
%3 = phi double* [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ]
%4 = phi double* [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ]
store double* %4, double** %_M_cur2.i.i, align 8
store double* %3, double** %_M_first3.i.i, align 8
br i1 undef, label %if.then.i55, label %while.cond
if.then.i55: ; preds = %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
br label %while.cond
while.cond: ; preds = %while.cond, %if.then.i55, %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
br label %while.cond
}

View File

@ -0,0 +1,46 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; Function Attrs: nounwind ssp uwtable
define void @main() #0 {
entry:
br i1 undef, label %while.body, label %while.end
while.body: ; preds = %entry
unreachable
while.end: ; preds = %entry
br i1 undef, label %for.end80, label %for.body75.lr.ph
for.body75.lr.ph: ; preds = %while.end
br label %for.body75
for.body75: ; preds = %for.body75, %for.body75.lr.ph
br label %for.body75
for.end80: ; preds = %while.end
br i1 undef, label %for.end300, label %for.body267.lr.ph
for.body267.lr.ph: ; preds = %for.end80
br label %for.body267
for.body267: ; preds = %for.body267, %for.body267.lr.ph
%s.71010 = phi double [ 0.000000e+00, %for.body267.lr.ph ], [ %add297, %for.body267 ]
%mul269 = fmul double undef, undef
%mul270 = fmul double %mul269, %mul269
%add282 = fadd double undef, undef
%mul283 = fmul double %mul269, %add282
%add293 = fadd double undef, undef
%mul294 = fmul double %mul270, %add293
%add295 = fadd double undef, %mul294
%div296 = fdiv double %mul283, %add295
%add297 = fadd double %s.71010, %div296
br i1 undef, label %for.body267, label %for.end300
for.end300: ; preds = %for.body267, %for.end80
unreachable
}
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -0,0 +1,24 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; Function Attrs: nounwind ssp uwtable
define fastcc void @dct36(double* %inbuf) #0 {
entry:
%arrayidx41 = getelementptr inbounds double* %inbuf, i64 2
%arrayidx44 = getelementptr inbounds double* %inbuf, i64 1
%0 = load double* %arrayidx44, align 8, !tbaa !0
%add46 = fadd double %0, undef
store double %add46, double* %arrayidx41, align 8, !tbaa !0
%1 = load double* %inbuf, align 8, !tbaa !0
%add49 = fadd double %1, %0
store double %add49, double* %arrayidx44, align 8, !tbaa !0
ret void
}
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
!0 = metadata !{metadata !"double", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}

View File

@ -0,0 +1,66 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; Function Attrs: nounwind ssp uwtable
define void @RCModelEstimator() {
entry:
br i1 undef, label %for.body.lr.ph, label %for.end.thread
for.end.thread: ; preds = %entry
unreachable
for.body.lr.ph: ; preds = %entry
br i1 undef, label %for.end, label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %for.body.lr.ph
br i1 undef, label %for.body3, label %if.end103
for.cond14.preheader: ; preds = %for.inc11
br i1 undef, label %for.body16.lr.ph, label %if.end103
for.body16.lr.ph: ; preds = %for.cond14.preheader
br label %for.body16
for.body3: ; preds = %for.inc11, %for.end
br i1 undef, label %if.then7, label %for.inc11
if.then7: ; preds = %for.body3
br label %for.inc11
for.inc11: ; preds = %if.then7, %for.body3
br i1 false, label %for.cond14.preheader, label %for.body3
for.body16: ; preds = %for.body16, %for.body16.lr.ph
br i1 undef, label %for.end39, label %for.body16
for.end39: ; preds = %for.body16
br i1 undef, label %if.end103, label %for.cond45.preheader
for.cond45.preheader: ; preds = %for.end39
br i1 undef, label %if.then88, label %if.else
if.then88: ; preds = %for.cond45.preheader
%mul89 = fmul double 0.000000e+00, 0.000000e+00
%mul90 = fmul double 0.000000e+00, 0.000000e+00
%sub91 = fsub double %mul89, %mul90
%div92 = fdiv double %sub91, undef
%mul94 = fmul double 0.000000e+00, 0.000000e+00
%mul95 = fmul double 0.000000e+00, 0.000000e+00
%sub96 = fsub double %mul94, %mul95
%div97 = fdiv double %sub96, undef
br label %if.end103
if.else: ; preds = %for.cond45.preheader
br label %if.end103
if.end103: ; preds = %if.else, %if.then88, %for.end39, %for.cond14.preheader, %for.end
%0 = phi double [ 0.000000e+00, %for.end39 ], [ %div97, %if.then88 ], [ 0.000000e+00, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
%1 = phi double [ undef, %for.end39 ], [ %div92, %if.then88 ], [ undef, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
ret void
}

View File

@ -0,0 +1,23 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
; Function Attrs: nounwind ssp uwtable
define void @intrapred_luma() #0 {
entry:
%conv153 = trunc i32 undef to i16
%arrayidx154 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 12
store i16 %conv153, i16* %arrayidx154, align 8, !tbaa !0
%arrayidx155 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 11
store i16 %conv153, i16* %arrayidx155, align 2, !tbaa !0
%arrayidx156 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 10
store i16 %conv153, i16* %arrayidx156, align 4, !tbaa !0
ret void
}
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
!0 = metadata !{metadata !"short", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}

View File

@ -0,0 +1,53 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define void @main() {
entry:
br label %for.body
for.body: ; preds = %for.end44, %entry
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %if.then25, %for.body
br label %for.body6
for.body6: ; preds = %for.inc21, %for.cond4.preheader
br label %for.body12
for.body12: ; preds = %if.end, %for.body6
%fZImg.069 = phi double [ undef, %for.body6 ], [ %add19, %if.end ]
%fZReal.068 = phi double [ undef, %for.body6 ], [ %add20, %if.end ]
%mul13 = fmul double %fZReal.068, %fZReal.068
%mul14 = fmul double %fZImg.069, %fZImg.069
%add15 = fadd double %mul13, %mul14
%cmp16 = fcmp ogt double %add15, 4.000000e+00
br i1 %cmp16, label %for.inc21, label %if.end
if.end: ; preds = %for.body12
%mul18 = fmul double undef, %fZImg.069
%add19 = fadd double undef, %mul18
%sub = fsub double %mul13, %mul14
%add20 = fadd double undef, %sub
br i1 undef, label %for.body12, label %for.inc21
for.inc21: ; preds = %if.end, %for.body12
br i1 undef, label %for.end23, label %for.body6
for.end23: ; preds = %for.inc21
br i1 undef, label %if.then25, label %if.then26
if.then25: ; preds = %for.end23
br i1 undef, label %for.end44, label %for.cond4.preheader
if.then26: ; preds = %for.end23
unreachable
for.end44: ; preds = %if.then25
br i1 undef, label %for.end48, label %for.body
for.end48: ; preds = %for.end44
ret void
}

View File

@ -0,0 +1,28 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct.rc4_state.0.24 = type { i32, i32, [256 x i32] }
define void @rc4_crypt(%struct.rc4_state.0.24* nocapture %s) {
entry:
%x1 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 0
%y2 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 1
br i1 undef, label %for.body, label %for.end
for.body: ; preds = %for.body, %entry
%x.045 = phi i32 [ %conv4, %for.body ], [ undef, %entry ]
%conv4 = and i32 undef, 255
%conv7 = and i32 undef, 255
%idxprom842 = zext i32 %conv7 to i64
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%x.0.lcssa = phi i32 [ undef, %entry ], [ %conv4, %for.body ]
%y.0.lcssa = phi i32 [ undef, %entry ], [ %conv7, %for.body ]
store i32 %x.0.lcssa, i32* %x1, align 4
store i32 %y.0.lcssa, i32* %y2, align 4
ret void
}

View File

@ -0,0 +1,113 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171 = type { i32, i32, i32, i32, i32, i32, [8 x i8] }
define void @SIM4() {
entry:
br i1 undef, label %return, label %lor.lhs.false
lor.lhs.false: ; preds = %entry
br i1 undef, label %return, label %if.end
if.end: ; preds = %lor.lhs.false
br i1 undef, label %for.end605, label %for.body.lr.ph
for.body.lr.ph: ; preds = %if.end
br label %for.body
for.body: ; preds = %for.inc603, %for.body.lr.ph
br i1 undef, label %for.inc603, label %if.end12
if.end12: ; preds = %for.body
br i1 undef, label %land.lhs.true, label %land.lhs.true167
land.lhs.true: ; preds = %if.end12
br i1 undef, label %if.then17, label %land.lhs.true167
if.then17: ; preds = %land.lhs.true
br i1 undef, label %if.end98, label %land.rhs.lr.ph
land.rhs.lr.ph: ; preds = %if.then17
unreachable
if.end98: ; preds = %if.then17
%from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 1
br i1 undef, label %land.lhs.true167, label %if.then103
if.then103: ; preds = %if.end98
%.sub100 = select i1 undef, i32 250, i32 undef
%mul114 = shl nsw i32 %.sub100, 2
%from1115 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
%cond125 = select i1 undef, i32 undef, i32 %mul114
br label %for.cond.i
for.cond.i: ; preds = %land.rhs.i874, %if.then103
%row.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %.sub100, %if.then103 ]
%col.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %cond125, %if.then103 ]
br i1 undef, label %land.rhs.i874, label %for.end.i
land.rhs.i874: ; preds = %for.cond.i
br i1 undef, label %for.cond.i, label %for.end.i
for.end.i: ; preds = %land.rhs.i874, %for.cond.i
br i1 undef, label %if.then.i, label %if.end.i
if.then.i: ; preds = %for.end.i
%add14.i = add nsw i32 %row.0.i, undef
%add15.i = add nsw i32 %col.0.i, undef
br label %extend_bw.exit
if.end.i: ; preds = %for.end.i
%add16.i = add i32 %cond125, %.sub100
%cmp26514.i = icmp slt i32 %add16.i, 0
br i1 %cmp26514.i, label %for.end33.i, label %for.body28.lr.ph.i
for.body28.lr.ph.i: ; preds = %if.end.i
br label %for.end33.i
for.end33.i: ; preds = %for.body28.lr.ph.i, %if.end.i
br i1 undef, label %for.end58.i, label %for.body52.lr.ph.i
for.body52.lr.ph.i: ; preds = %for.end33.i
br label %for.end58.i
for.end58.i: ; preds = %for.body52.lr.ph.i, %for.end33.i
br label %while.cond260.i
while.cond260.i: ; preds = %land.rhs263.i, %for.end58.i
br i1 undef, label %land.rhs263.i, label %while.end275.i
land.rhs263.i: ; preds = %while.cond260.i
br i1 undef, label %while.cond260.i, label %while.end275.i
while.end275.i: ; preds = %land.rhs263.i, %while.cond260.i
br label %extend_bw.exit
extend_bw.exit: ; preds = %while.end275.i, %if.then.i
%add14.i1262 = phi i32 [ %add14.i, %if.then.i ], [ undef, %while.end275.i ]
%add15.i1261 = phi i32 [ %add15.i, %if.then.i ], [ undef, %while.end275.i ]
br i1 false, label %if.then157, label %land.lhs.true167
if.then157: ; preds = %extend_bw.exit
%add158 = add nsw i32 %add14.i1262, 1
store i32 %add158, i32* %from299, align 4
%add160 = add nsw i32 %add15.i1261, 1
store i32 %add160, i32* %from1115, align 4
br label %land.lhs.true167
land.lhs.true167: ; preds = %if.then157, %extend_bw.exit, %if.end98, %land.lhs.true, %if.end12
unreachable
for.inc603: ; preds = %for.body
br i1 undef, label %for.body, label %for.end605
for.end605: ; preds = %for.inc603, %if.end
unreachable
return: ; preds = %lor.lhs.false, %entry
ret void
}

View File

@ -0,0 +1,65 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct.Ray.5.11.53.113.119.137.149.185.329.389.415 = type { %struct.Vec.0.6.48.108.114.132.144.180.324.384.414, %struct.Vec.0.6.48.108.114.132.144.180.324.384.414 }
%struct.Vec.0.6.48.108.114.132.144.180.324.384.414 = type { double, double, double }
; Function Attrs: ssp uwtable
define void @main() #0 {
entry:
br i1 undef, label %cond.true, label %cond.end
cond.true: ; preds = %entry
unreachable
cond.end: ; preds = %entry
br label %invoke.cont
invoke.cont: ; preds = %invoke.cont, %cond.end
br i1 undef, label %arrayctor.cont, label %invoke.cont
arrayctor.cont: ; preds = %invoke.cont
%agg.tmp99208.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 0, i32 0
%agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 0, i32 1
%agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 1, i32 0
%agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 1, i32 1
br label %for.cond36.preheader
for.cond36.preheader: ; preds = %_Z5clampd.exit.1, %arrayctor.cont
br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1
cond.false51.us: ; preds = %for.body42.lr.ph.us
unreachable
cond.true48.us: ; preds = %for.body42.lr.ph.us
br i1 undef, label %cond.true63.us, label %cond.false66.us
cond.false66.us: ; preds = %cond.true48.us
%add.i276.us = fadd double 0.000000e+00, undef
%add.i264.us = fadd double %add.i276.us, 0.000000e+00
%add4.i267.us = fadd double undef, 0xBFA5CC2D1960285F
%mul.i254.us = fmul double %add.i264.us, 1.400000e+02
%mul2.i256.us = fmul double %add4.i267.us, 1.400000e+02
%add.i243.us = fadd double %mul.i254.us, 5.000000e+01
%add4.i246.us = fadd double %mul2.i256.us, 5.200000e+01
%mul.i.i.us = fmul double undef, %add.i264.us
%mul2.i.i.us = fmul double undef, %add4.i267.us
store double %add.i243.us, double* %agg.tmp99208.sroa.0.0.idx, align 8
store double %add4.i246.us, double* %agg.tmp99208.sroa.1.8.idx388, align 8
store double %mul.i.i.us, double* %agg.tmp101211.sroa.0.0.idx, align 8
store double %mul2.i.i.us, double* %agg.tmp101211.sroa.1.8.idx390, align 8
unreachable
cond.true63.us: ; preds = %cond.true48.us
unreachable
for.body42.lr.ph.us: ; preds = %for.cond36.preheader
br i1 undef, label %cond.true48.us, label %cond.false51.us
_Z5clampd.exit.1: ; preds = %for.cond36.preheader
br label %for.cond36.preheader
}
attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -0,0 +1,46 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601 = type { %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600, %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 }
%struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double }
; Function Attrs: ssp uwtable
define void @_Z8radianceRK3RayiPt() #0 {
entry:
br i1 undef, label %if.then78, label %if.then38
if.then38: ; preds = %entry
%mul.i.i790 = fmul double undef, undef
%mul3.i.i792 = fmul double undef, undef
%mul.i764 = fmul double undef, %mul3.i.i792
%mul4.i767 = fmul double undef, undef
%sub.i768 = fsub double %mul.i764, %mul4.i767
%mul6.i770 = fmul double undef, %mul.i.i790
%mul9.i772 = fmul double undef, %mul3.i.i792
%sub10.i773 = fsub double %mul6.i770, %mul9.i772
%mul.i736 = fmul double undef, %sub.i768
%mul2.i738 = fmul double undef, %sub10.i773
%mul.i727 = fmul double undef, %mul.i736
%mul2.i729 = fmul double undef, %mul2.i738
%add.i716 = fadd double undef, %mul.i727
%add4.i719 = fadd double undef, %mul2.i729
%add.i695 = fadd double undef, %add.i716
%add4.i698 = fadd double undef, %add4.i719
%mul.i.i679 = fmul double undef, %add.i695
%mul2.i.i680 = fmul double undef, %add4.i698
%agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
store double %mul.i.i679, double* %agg.tmp74663.sroa.0.0.idx, align 8
%agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 1
store double %mul2.i.i680, double* %agg.tmp74663.sroa.1.8.idx943, align 8
br label %return
if.then78: ; preds = %entry
br label %return
return: ; preds = %if.then78, %if.then38
ret void
}
attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -50,7 +50,8 @@ entry:
; } ; }
; CHECK: @extr_user ; CHECK: @extr_user
; CHECK: load i32* ; CHECK: load <4 x i32>
; CHECK-NEXT: extractelement <4 x i32>
; CHECK: store <4 x i32> ; CHECK: store <4 x i32>
; CHECK-NEXT: ret ; CHECK-NEXT: ret
define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) { define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
@ -79,7 +80,8 @@ entry:
; In this example we have an external user that is not the first element in the vector. ; In this example we have an external user that is not the first element in the vector.
; CHECK: @extr_user1 ; CHECK: @extr_user1
; CHECK: load i32* ; CHECK: load <4 x i32>
; CHECK-NEXT: extractelement <4 x i32>
; CHECK: store <4 x i32> ; CHECK: store <4 x i32>
; CHECK-NEXT: ret ; CHECK-NEXT: ret
define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) { define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {

View File

@ -3,12 +3,13 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0" target triple = "x86_64-apple-macosx10.8.0"
; At this point we can't vectorize only parts of the tree.
; CHECK: test ; CHECK: test
; CHECK: sitofp i8 ; CHECK: insertelement <2 x i8>
; CHECK-NEXT: sitofp i8 ; CHECK: insertelement <2 x i8>
; CHECK-NEXT: insertelement ; CHECK: sitofp <2 x i8>
; CHECK-NEXT: insertelement ; CHECK: fmul <2 x double>
; CHECK-NEXT: fmul <2 x double>
; CHECK: ret ; CHECK: ret
define i32 @test(double* nocapture %A, i8* nocapture %B) { define i32 @test(double* nocapture %A, i8* nocapture %B) {
entry: entry:
@ -18,7 +19,7 @@ entry:
%add = add i8 %0, 3 %add = add i8 %0, 3
%add4 = add i8 %1, 3 %add4 = add i8 %1, 3
%conv6 = sitofp i8 %add to double %conv6 = sitofp i8 %add to double
%conv7 = sitofp i8 %add4 to double ; <--- This is inefficient. The chain stops here. %conv7 = sitofp i8 %add4 to double
%mul = fmul double %conv6, %conv6 %mul = fmul double %conv6, %conv6
%add8 = fadd double %mul, 1.000000e+00 %add8 = fadd double %mul, 1.000000e+00
%mul9 = fmul double %conv7, %conv7 %mul9 = fmul double %conv7, %conv7

View File

@ -43,3 +43,19 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
ret void ret void
} }
; Make sure we don't crash on this one.
define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i64 %i) {
%1 = add i64 %i, 1
%2 = getelementptr inbounds i32* %x, i64 %1
%3 = getelementptr inbounds i32* %y, i64 %1
%4 = load i32* %3, align 4
%5 = add nsw i32 undef, %4
store i32 %5, i32* %2, align 4
%6 = add i64 %i, 2
%7 = getelementptr inbounds i32* %x, i64 %6
%8 = getelementptr inbounds i32* %y, i64 %6
%9 = load i32* %8, align 4
%10 = add nsw i32 undef, %9
store i32 %10, i32* %7, align 4
ret void
}