mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
SLPVectorizer: Implement DCE as part of vectorization.
This is a complete re-write if the bottom-up vectorization class. Before this commit we scanned the instruction tree 3 times. First in search of merge points for the trees. Second, for estimating the cost. And finally for vectorization. There was a lot of code duplication and adding the DCE exposed bugs. The new design is simpler and DCE was a part of the design. In this implementation we build the tree once. After that we estimate the cost by scanning the different entries in the constructed tree (in any order). The vectorization phase also works on the built tree. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185774 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
95a1b3484d
commit
369cc938d2
File diff suppressed because it is too large
Load Diff
38
test/Transforms/SLPVectorizer/X86/crash_7zip.ll
Normal file
38
test/Transforms/SLPVectorizer/X86/crash_7zip.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, i16*, i8*, i8*, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] }
|
||||
%struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 }
|
||||
|
||||
define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p) {
|
||||
entry:
|
||||
%range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 4
|
||||
%code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 5
|
||||
br label %do.body66.i
|
||||
|
||||
do.body66.i: ; preds = %do.cond.i, %entry
|
||||
%range.2.i = phi i32 [ %range.4.i, %do.cond.i ], [ undef, %entry ]
|
||||
%code.2.i = phi i32 [ %code.4.i, %do.cond.i ], [ undef, %entry ]
|
||||
%.range.2.i = select i1 undef, i32 undef, i32 %range.2.i
|
||||
%.code.2.i = select i1 undef, i32 undef, i32 %code.2.i
|
||||
br i1 undef, label %do.cond.i, label %if.else.i
|
||||
|
||||
if.else.i: ; preds = %do.body66.i
|
||||
%sub91.i = sub i32 %.range.2.i, undef
|
||||
%sub92.i = sub i32 %.code.2.i, undef
|
||||
br label %do.cond.i
|
||||
|
||||
do.cond.i: ; preds = %if.else.i, %do.body66.i
|
||||
%range.4.i = phi i32 [ %sub91.i, %if.else.i ], [ undef, %do.body66.i ]
|
||||
%code.4.i = phi i32 [ %sub92.i, %if.else.i ], [ %.code.2.i, %do.body66.i ]
|
||||
br i1 undef, label %do.body66.i, label %do.end1006.i
|
||||
|
||||
do.end1006.i: ; preds = %do.cond.i
|
||||
%.range.4.i = select i1 undef, i32 undef, i32 %range.4.i
|
||||
%.code.4.i = select i1 undef, i32 undef, i32 %code.4.i
|
||||
store i32 %.range.4.i, i32* %range20.i, align 4
|
||||
store i32 %.code.4.i, i32* %code21.i, align 4
|
||||
ret void
|
||||
}
|
38
test/Transforms/SLPVectorizer/X86/crash_bullet.ll
Normal file
38
test/Transforms/SLPVectorizer/X86/crash_bullet.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 }
|
||||
|
||||
define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* nocapture %info) {
|
||||
entry:
|
||||
br i1 undef, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
ret void
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%m_numConstraintRows4 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 0
|
||||
%nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 1
|
||||
br i1 undef, label %land.lhs.true.i.1, label %if.then7.1
|
||||
|
||||
land.lhs.true.i.1: ; preds = %if.else
|
||||
br i1 undef, label %for.inc.1, label %if.then7.1
|
||||
|
||||
if.then7.1: ; preds = %land.lhs.true.i.1, %if.else
|
||||
%inc.1 = add nsw i32 0, 1
|
||||
store i32 %inc.1, i32* %m_numConstraintRows4, align 4
|
||||
%dec.1 = add nsw i32 6, -1
|
||||
store i32 %dec.1, i32* %nub5, align 4
|
||||
br label %for.inc.1
|
||||
|
||||
for.inc.1: ; preds = %if.then7.1, %land.lhs.true.i.1
|
||||
%0 = phi i32 [ %dec.1, %if.then7.1 ], [ 6, %land.lhs.true.i.1 ]
|
||||
%1 = phi i32 [ %inc.1, %if.then7.1 ], [ 0, %land.lhs.true.i.1 ]
|
||||
%inc.2 = add nsw i32 %1, 1
|
||||
store i32 %inc.2, i32* %m_numConstraintRows4, align 4
|
||||
%dec.2 = add nsw i32 %0, -1
|
||||
store i32 %dec.2, i32* %nub5, align 4
|
||||
unreachable
|
||||
}
|
38
test/Transforms/SLPVectorizer/X86/crash_bullet2.ll
Normal file
38
test/Transforms/SLPVectorizer/X86/crash_bullet2.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332 = type { float, [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, [4 x float], float, float, [4 x float], float, float, [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330] }
|
||||
%class.btVector3.5.30.65.90.115.140.175.185.260.280.330 = type { [4 x float] }
|
||||
%class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 }
|
||||
|
||||
define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this) {
|
||||
entry:
|
||||
%arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
|
||||
%arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
|
||||
%0 = load float* %arrayidx36, align 4
|
||||
%add587 = fadd float undef, undef
|
||||
%sub600 = fsub float %add587, undef
|
||||
store float %sub600, float* undef, align 4
|
||||
%sub613 = fsub float %add587, %sub600
|
||||
store float %sub613, float* %arrayidx26, align 4
|
||||
%add626 = fadd float %0, undef
|
||||
%sub639 = fsub float %add626, undef
|
||||
%sub652 = fsub float %add626, %sub639
|
||||
store float %sub652, float* %arrayidx36, align 4
|
||||
br i1 undef, label %if.else1609, label %if.then1595
|
||||
|
||||
if.then1595: ; preds = %entry
|
||||
br i1 undef, label %return, label %for.body.lr.ph.i.i1702
|
||||
|
||||
for.body.lr.ph.i.i1702: ; preds = %if.then1595
|
||||
unreachable
|
||||
|
||||
if.else1609: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
return: ; preds = %if.then1595
|
||||
ret void
|
||||
}
|
||||
|
40
test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
Normal file
40
test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { double*, double*, double*, double** }
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
|
||||
entry:
|
||||
%_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
|
||||
%0 = load double** %_M_cur2.i.i, align 8
|
||||
%_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
|
||||
%_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
|
||||
%1 = load double** %_M_cur2.i.i81, align 8
|
||||
%_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
|
||||
%2 = load double** %_M_first3.i.i83, align 8
|
||||
br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
|
||||
|
||||
while.cond.i.preheader: ; preds = %entry
|
||||
br label %while.cond.i
|
||||
|
||||
while.cond.i: ; preds = %while.body.i, %while.cond.i.preheader
|
||||
br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.body.i
|
||||
|
||||
while.body.i: ; preds = %while.cond.i
|
||||
br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i
|
||||
|
||||
_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; preds = %while.body.i, %while.cond.i, %entry
|
||||
%3 = phi double* [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ]
|
||||
%4 = phi double* [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ]
|
||||
store double* %4, double** %_M_cur2.i.i, align 8
|
||||
store double* %3, double** %_M_first3.i.i, align 8
|
||||
br i1 undef, label %if.then.i55, label %while.cond
|
||||
|
||||
if.then.i55: ; preds = %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
|
||||
br label %while.cond
|
||||
|
||||
while.cond: ; preds = %while.cond, %if.then.i55, %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
|
||||
br label %while.cond
|
||||
}
|
46
test/Transforms/SLPVectorizer/X86/crash_flop7.ll
Normal file
46
test/Transforms/SLPVectorizer/X86/crash_flop7.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define void @main() #0 {
|
||||
entry:
|
||||
br i1 undef, label %while.body, label %while.end
|
||||
|
||||
while.body: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
while.end: ; preds = %entry
|
||||
br i1 undef, label %for.end80, label %for.body75.lr.ph
|
||||
|
||||
for.body75.lr.ph: ; preds = %while.end
|
||||
br label %for.body75
|
||||
|
||||
for.body75: ; preds = %for.body75, %for.body75.lr.ph
|
||||
br label %for.body75
|
||||
|
||||
for.end80: ; preds = %while.end
|
||||
br i1 undef, label %for.end300, label %for.body267.lr.ph
|
||||
|
||||
for.body267.lr.ph: ; preds = %for.end80
|
||||
br label %for.body267
|
||||
|
||||
for.body267: ; preds = %for.body267, %for.body267.lr.ph
|
||||
%s.71010 = phi double [ 0.000000e+00, %for.body267.lr.ph ], [ %add297, %for.body267 ]
|
||||
%mul269 = fmul double undef, undef
|
||||
%mul270 = fmul double %mul269, %mul269
|
||||
%add282 = fadd double undef, undef
|
||||
%mul283 = fmul double %mul269, %add282
|
||||
%add293 = fadd double undef, undef
|
||||
%mul294 = fmul double %mul270, %add293
|
||||
%add295 = fadd double undef, %mul294
|
||||
%div296 = fdiv double %mul283, %add295
|
||||
%add297 = fadd double %s.71010, %div296
|
||||
br i1 undef, label %for.body267, label %for.end300
|
||||
|
||||
for.end300: ; preds = %for.body267, %for.end80
|
||||
unreachable
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
24
test/Transforms/SLPVectorizer/X86/crash_lame.ll
Normal file
24
test/Transforms/SLPVectorizer/X86/crash_lame.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define fastcc void @dct36(double* %inbuf) #0 {
|
||||
entry:
|
||||
%arrayidx41 = getelementptr inbounds double* %inbuf, i64 2
|
||||
%arrayidx44 = getelementptr inbounds double* %inbuf, i64 1
|
||||
%0 = load double* %arrayidx44, align 8, !tbaa !0
|
||||
%add46 = fadd double %0, undef
|
||||
store double %add46, double* %arrayidx41, align 8, !tbaa !0
|
||||
%1 = load double* %inbuf, align 8, !tbaa !0
|
||||
%add49 = fadd double %1, %0
|
||||
store double %add49, double* %arrayidx44, align 8, !tbaa !0
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!0 = metadata !{metadata !"double", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
66
test/Transforms/SLPVectorizer/X86/crash_lencod.ll
Normal file
66
test/Transforms/SLPVectorizer/X86/crash_lencod.ll
Normal file
@ -0,0 +1,66 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define void @RCModelEstimator() {
|
||||
entry:
|
||||
br i1 undef, label %for.body.lr.ph, label %for.end.thread
|
||||
|
||||
for.end.thread: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
br i1 undef, label %for.end, label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
br i1 undef, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %for.body.lr.ph
|
||||
br i1 undef, label %for.body3, label %if.end103
|
||||
|
||||
for.cond14.preheader: ; preds = %for.inc11
|
||||
br i1 undef, label %for.body16.lr.ph, label %if.end103
|
||||
|
||||
for.body16.lr.ph: ; preds = %for.cond14.preheader
|
||||
br label %for.body16
|
||||
|
||||
for.body3: ; preds = %for.inc11, %for.end
|
||||
br i1 undef, label %if.then7, label %for.inc11
|
||||
|
||||
if.then7: ; preds = %for.body3
|
||||
br label %for.inc11
|
||||
|
||||
for.inc11: ; preds = %if.then7, %for.body3
|
||||
br i1 false, label %for.cond14.preheader, label %for.body3
|
||||
|
||||
for.body16: ; preds = %for.body16, %for.body16.lr.ph
|
||||
br i1 undef, label %for.end39, label %for.body16
|
||||
|
||||
for.end39: ; preds = %for.body16
|
||||
br i1 undef, label %if.end103, label %for.cond45.preheader
|
||||
|
||||
for.cond45.preheader: ; preds = %for.end39
|
||||
br i1 undef, label %if.then88, label %if.else
|
||||
|
||||
if.then88: ; preds = %for.cond45.preheader
|
||||
%mul89 = fmul double 0.000000e+00, 0.000000e+00
|
||||
%mul90 = fmul double 0.000000e+00, 0.000000e+00
|
||||
%sub91 = fsub double %mul89, %mul90
|
||||
%div92 = fdiv double %sub91, undef
|
||||
%mul94 = fmul double 0.000000e+00, 0.000000e+00
|
||||
%mul95 = fmul double 0.000000e+00, 0.000000e+00
|
||||
%sub96 = fsub double %mul94, %mul95
|
||||
%div97 = fdiv double %sub96, undef
|
||||
br label %if.end103
|
||||
|
||||
if.else: ; preds = %for.cond45.preheader
|
||||
br label %if.end103
|
||||
|
||||
if.end103: ; preds = %if.else, %if.then88, %for.end39, %for.cond14.preheader, %for.end
|
||||
%0 = phi double [ 0.000000e+00, %for.end39 ], [ %div97, %if.then88 ], [ 0.000000e+00, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
|
||||
%1 = phi double [ undef, %for.end39 ], [ %div92, %if.then88 ], [ undef, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
|
||||
ret void
|
||||
}
|
||||
|
23
test/Transforms/SLPVectorizer/X86/crash_lencod2.ll
Normal file
23
test/Transforms/SLPVectorizer/X86/crash_lencod2.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define void @intrapred_luma() #0 {
|
||||
entry:
|
||||
%conv153 = trunc i32 undef to i16
|
||||
%arrayidx154 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 12
|
||||
store i16 %conv153, i16* %arrayidx154, align 8, !tbaa !0
|
||||
%arrayidx155 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 11
|
||||
store i16 %conv153, i16* %arrayidx155, align 2, !tbaa !0
|
||||
%arrayidx156 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 10
|
||||
store i16 %conv153, i16* %arrayidx156, align 4, !tbaa !0
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!0 = metadata !{metadata !"short", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
53
test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
Normal file
53
test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
define void @main() {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.end44, %entry
|
||||
br label %for.cond4.preheader
|
||||
|
||||
for.cond4.preheader: ; preds = %if.then25, %for.body
|
||||
br label %for.body6
|
||||
|
||||
for.body6: ; preds = %for.inc21, %for.cond4.preheader
|
||||
br label %for.body12
|
||||
|
||||
for.body12: ; preds = %if.end, %for.body6
|
||||
%fZImg.069 = phi double [ undef, %for.body6 ], [ %add19, %if.end ]
|
||||
%fZReal.068 = phi double [ undef, %for.body6 ], [ %add20, %if.end ]
|
||||
%mul13 = fmul double %fZReal.068, %fZReal.068
|
||||
%mul14 = fmul double %fZImg.069, %fZImg.069
|
||||
%add15 = fadd double %mul13, %mul14
|
||||
%cmp16 = fcmp ogt double %add15, 4.000000e+00
|
||||
br i1 %cmp16, label %for.inc21, label %if.end
|
||||
|
||||
if.end: ; preds = %for.body12
|
||||
%mul18 = fmul double undef, %fZImg.069
|
||||
%add19 = fadd double undef, %mul18
|
||||
%sub = fsub double %mul13, %mul14
|
||||
%add20 = fadd double undef, %sub
|
||||
br i1 undef, label %for.body12, label %for.inc21
|
||||
|
||||
for.inc21: ; preds = %if.end, %for.body12
|
||||
br i1 undef, label %for.end23, label %for.body6
|
||||
|
||||
for.end23: ; preds = %for.inc21
|
||||
br i1 undef, label %if.then25, label %if.then26
|
||||
|
||||
if.then25: ; preds = %for.end23
|
||||
br i1 undef, label %for.end44, label %for.cond4.preheader
|
||||
|
||||
if.then26: ; preds = %for.end23
|
||||
unreachable
|
||||
|
||||
for.end44: ; preds = %if.then25
|
||||
br i1 undef, label %for.end48, label %for.body
|
||||
|
||||
for.end48: ; preds = %for.end44
|
||||
ret void
|
||||
}
|
||||
|
28
test/Transforms/SLPVectorizer/X86/crash_rc4.ll
Normal file
28
test/Transforms/SLPVectorizer/X86/crash_rc4.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%struct.rc4_state.0.24 = type { i32, i32, [256 x i32] }
|
||||
|
||||
define void @rc4_crypt(%struct.rc4_state.0.24* nocapture %s) {
|
||||
entry:
|
||||
%x1 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 0
|
||||
%y2 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 1
|
||||
br i1 undef, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%x.045 = phi i32 [ %conv4, %for.body ], [ undef, %entry ]
|
||||
%conv4 = and i32 undef, 255
|
||||
%conv7 = and i32 undef, 255
|
||||
%idxprom842 = zext i32 %conv7 to i64
|
||||
br i1 undef, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
%x.0.lcssa = phi i32 [ undef, %entry ], [ %conv4, %for.body ]
|
||||
%y.0.lcssa = phi i32 [ undef, %entry ], [ %conv7, %for.body ]
|
||||
store i32 %x.0.lcssa, i32* %x1, align 4
|
||||
store i32 %y.0.lcssa, i32* %y2, align 4
|
||||
ret void
|
||||
}
|
||||
|
113
test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
Normal file
113
test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
Normal file
@ -0,0 +1,113 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171 = type { i32, i32, i32, i32, i32, i32, [8 x i8] }
|
||||
|
||||
define void @SIM4() {
|
||||
entry:
|
||||
br i1 undef, label %return, label %lor.lhs.false
|
||||
|
||||
lor.lhs.false: ; preds = %entry
|
||||
br i1 undef, label %return, label %if.end
|
||||
|
||||
if.end: ; preds = %lor.lhs.false
|
||||
br i1 undef, label %for.end605, label %for.body.lr.ph
|
||||
|
||||
for.body.lr.ph: ; preds = %if.end
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.inc603, %for.body.lr.ph
|
||||
br i1 undef, label %for.inc603, label %if.end12
|
||||
|
||||
if.end12: ; preds = %for.body
|
||||
br i1 undef, label %land.lhs.true, label %land.lhs.true167
|
||||
|
||||
land.lhs.true: ; preds = %if.end12
|
||||
br i1 undef, label %if.then17, label %land.lhs.true167
|
||||
|
||||
if.then17: ; preds = %land.lhs.true
|
||||
br i1 undef, label %if.end98, label %land.rhs.lr.ph
|
||||
|
||||
land.rhs.lr.ph: ; preds = %if.then17
|
||||
unreachable
|
||||
|
||||
if.end98: ; preds = %if.then17
|
||||
%from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 1
|
||||
br i1 undef, label %land.lhs.true167, label %if.then103
|
||||
|
||||
if.then103: ; preds = %if.end98
|
||||
%.sub100 = select i1 undef, i32 250, i32 undef
|
||||
%mul114 = shl nsw i32 %.sub100, 2
|
||||
%from1115 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
|
||||
%cond125 = select i1 undef, i32 undef, i32 %mul114
|
||||
br label %for.cond.i
|
||||
|
||||
for.cond.i: ; preds = %land.rhs.i874, %if.then103
|
||||
%row.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %.sub100, %if.then103 ]
|
||||
%col.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %cond125, %if.then103 ]
|
||||
br i1 undef, label %land.rhs.i874, label %for.end.i
|
||||
|
||||
land.rhs.i874: ; preds = %for.cond.i
|
||||
br i1 undef, label %for.cond.i, label %for.end.i
|
||||
|
||||
for.end.i: ; preds = %land.rhs.i874, %for.cond.i
|
||||
br i1 undef, label %if.then.i, label %if.end.i
|
||||
|
||||
if.then.i: ; preds = %for.end.i
|
||||
%add14.i = add nsw i32 %row.0.i, undef
|
||||
%add15.i = add nsw i32 %col.0.i, undef
|
||||
br label %extend_bw.exit
|
||||
|
||||
if.end.i: ; preds = %for.end.i
|
||||
%add16.i = add i32 %cond125, %.sub100
|
||||
%cmp26514.i = icmp slt i32 %add16.i, 0
|
||||
br i1 %cmp26514.i, label %for.end33.i, label %for.body28.lr.ph.i
|
||||
|
||||
for.body28.lr.ph.i: ; preds = %if.end.i
|
||||
br label %for.end33.i
|
||||
|
||||
for.end33.i: ; preds = %for.body28.lr.ph.i, %if.end.i
|
||||
br i1 undef, label %for.end58.i, label %for.body52.lr.ph.i
|
||||
|
||||
for.body52.lr.ph.i: ; preds = %for.end33.i
|
||||
br label %for.end58.i
|
||||
|
||||
for.end58.i: ; preds = %for.body52.lr.ph.i, %for.end33.i
|
||||
br label %while.cond260.i
|
||||
|
||||
while.cond260.i: ; preds = %land.rhs263.i, %for.end58.i
|
||||
br i1 undef, label %land.rhs263.i, label %while.end275.i
|
||||
|
||||
land.rhs263.i: ; preds = %while.cond260.i
|
||||
br i1 undef, label %while.cond260.i, label %while.end275.i
|
||||
|
||||
while.end275.i: ; preds = %land.rhs263.i, %while.cond260.i
|
||||
br label %extend_bw.exit
|
||||
|
||||
extend_bw.exit: ; preds = %while.end275.i, %if.then.i
|
||||
%add14.i1262 = phi i32 [ %add14.i, %if.then.i ], [ undef, %while.end275.i ]
|
||||
%add15.i1261 = phi i32 [ %add15.i, %if.then.i ], [ undef, %while.end275.i ]
|
||||
br i1 false, label %if.then157, label %land.lhs.true167
|
||||
|
||||
if.then157: ; preds = %extend_bw.exit
|
||||
%add158 = add nsw i32 %add14.i1262, 1
|
||||
store i32 %add158, i32* %from299, align 4
|
||||
%add160 = add nsw i32 %add15.i1261, 1
|
||||
store i32 %add160, i32* %from1115, align 4
|
||||
br label %land.lhs.true167
|
||||
|
||||
land.lhs.true167: ; preds = %if.then157, %extend_bw.exit, %if.end98, %land.lhs.true, %if.end12
|
||||
unreachable
|
||||
|
||||
for.inc603: ; preds = %for.body
|
||||
br i1 undef, label %for.body, label %for.end605
|
||||
|
||||
for.end605: ; preds = %for.inc603, %if.end
|
||||
unreachable
|
||||
|
||||
return: ; preds = %lor.lhs.false, %entry
|
||||
ret void
|
||||
}
|
||||
|
65
test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
Normal file
65
test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
Normal file
@ -0,0 +1,65 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%struct.Ray.5.11.53.113.119.137.149.185.329.389.415 = type { %struct.Vec.0.6.48.108.114.132.144.180.324.384.414, %struct.Vec.0.6.48.108.114.132.144.180.324.384.414 }
|
||||
%struct.Vec.0.6.48.108.114.132.144.180.324.384.414 = type { double, double, double }
|
||||
|
||||
; Function Attrs: ssp uwtable
|
||||
define void @main() #0 {
|
||||
entry:
|
||||
br i1 undef, label %cond.true, label %cond.end
|
||||
|
||||
cond.true: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
cond.end: ; preds = %entry
|
||||
br label %invoke.cont
|
||||
|
||||
invoke.cont: ; preds = %invoke.cont, %cond.end
|
||||
br i1 undef, label %arrayctor.cont, label %invoke.cont
|
||||
|
||||
arrayctor.cont: ; preds = %invoke.cont
|
||||
%agg.tmp99208.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 0, i32 0
|
||||
%agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 0, i32 1
|
||||
%agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 1, i32 0
|
||||
%agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 1, i32 1
|
||||
br label %for.cond36.preheader
|
||||
|
||||
for.cond36.preheader: ; preds = %_Z5clampd.exit.1, %arrayctor.cont
|
||||
br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1
|
||||
|
||||
cond.false51.us: ; preds = %for.body42.lr.ph.us
|
||||
unreachable
|
||||
|
||||
cond.true48.us: ; preds = %for.body42.lr.ph.us
|
||||
br i1 undef, label %cond.true63.us, label %cond.false66.us
|
||||
|
||||
cond.false66.us: ; preds = %cond.true48.us
|
||||
%add.i276.us = fadd double 0.000000e+00, undef
|
||||
%add.i264.us = fadd double %add.i276.us, 0.000000e+00
|
||||
%add4.i267.us = fadd double undef, 0xBFA5CC2D1960285F
|
||||
%mul.i254.us = fmul double %add.i264.us, 1.400000e+02
|
||||
%mul2.i256.us = fmul double %add4.i267.us, 1.400000e+02
|
||||
%add.i243.us = fadd double %mul.i254.us, 5.000000e+01
|
||||
%add4.i246.us = fadd double %mul2.i256.us, 5.200000e+01
|
||||
%mul.i.i.us = fmul double undef, %add.i264.us
|
||||
%mul2.i.i.us = fmul double undef, %add4.i267.us
|
||||
store double %add.i243.us, double* %agg.tmp99208.sroa.0.0.idx, align 8
|
||||
store double %add4.i246.us, double* %agg.tmp99208.sroa.1.8.idx388, align 8
|
||||
store double %mul.i.i.us, double* %agg.tmp101211.sroa.0.0.idx, align 8
|
||||
store double %mul2.i.i.us, double* %agg.tmp101211.sroa.1.8.idx390, align 8
|
||||
unreachable
|
||||
|
||||
cond.true63.us: ; preds = %cond.true48.us
|
||||
unreachable
|
||||
|
||||
for.body42.lr.ph.us: ; preds = %for.cond36.preheader
|
||||
br i1 undef, label %cond.true48.us, label %cond.false51.us
|
||||
|
||||
_Z5clampd.exit.1: ; preds = %for.cond36.preheader
|
||||
br label %for.cond36.preheader
|
||||
}
|
||||
|
||||
attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
46
test/Transforms/SLPVectorizer/X86/crash_smallpt2.ll
Normal file
46
test/Transforms/SLPVectorizer/X86/crash_smallpt2.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601 = type { %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600, %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 }
|
||||
%struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double }
|
||||
|
||||
; Function Attrs: ssp uwtable
|
||||
define void @_Z8radianceRK3RayiPt() #0 {
|
||||
entry:
|
||||
br i1 undef, label %if.then78, label %if.then38
|
||||
|
||||
if.then38: ; preds = %entry
|
||||
%mul.i.i790 = fmul double undef, undef
|
||||
%mul3.i.i792 = fmul double undef, undef
|
||||
%mul.i764 = fmul double undef, %mul3.i.i792
|
||||
%mul4.i767 = fmul double undef, undef
|
||||
%sub.i768 = fsub double %mul.i764, %mul4.i767
|
||||
%mul6.i770 = fmul double undef, %mul.i.i790
|
||||
%mul9.i772 = fmul double undef, %mul3.i.i792
|
||||
%sub10.i773 = fsub double %mul6.i770, %mul9.i772
|
||||
%mul.i736 = fmul double undef, %sub.i768
|
||||
%mul2.i738 = fmul double undef, %sub10.i773
|
||||
%mul.i727 = fmul double undef, %mul.i736
|
||||
%mul2.i729 = fmul double undef, %mul2.i738
|
||||
%add.i716 = fadd double undef, %mul.i727
|
||||
%add4.i719 = fadd double undef, %mul2.i729
|
||||
%add.i695 = fadd double undef, %add.i716
|
||||
%add4.i698 = fadd double undef, %add4.i719
|
||||
%mul.i.i679 = fmul double undef, %add.i695
|
||||
%mul2.i.i680 = fmul double undef, %add4.i698
|
||||
%agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
|
||||
store double %mul.i.i679, double* %agg.tmp74663.sroa.0.0.idx, align 8
|
||||
%agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 1
|
||||
store double %mul2.i.i680, double* %agg.tmp74663.sroa.1.8.idx943, align 8
|
||||
br label %return
|
||||
|
||||
if.then78: ; preds = %entry
|
||||
br label %return
|
||||
|
||||
return: ; preds = %if.then78, %if.then38
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
@ -50,7 +50,8 @@ entry:
|
||||
; }
|
||||
|
||||
; CHECK: @extr_user
|
||||
; CHECK: load i32*
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK-NEXT: extractelement <4 x i32>
|
||||
; CHECK: store <4 x i32>
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
|
||||
@ -79,7 +80,8 @@ entry:
|
||||
|
||||
; In this example we have an external user that is not the first element in the vector.
|
||||
; CHECK: @extr_user1
|
||||
; CHECK: load i32*
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK-NEXT: extractelement <4 x i32>
|
||||
; CHECK: store <4 x i32>
|
||||
; CHECK-NEXT: ret
|
||||
define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
|
||||
|
@ -3,12 +3,13 @@
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
; At this point we can't vectorize only parts of the tree.
|
||||
|
||||
; CHECK: test
|
||||
; CHECK: sitofp i8
|
||||
; CHECK-NEXT: sitofp i8
|
||||
; CHECK-NEXT: insertelement
|
||||
; CHECK-NEXT: insertelement
|
||||
; CHECK-NEXT: fmul <2 x double>
|
||||
; CHECK: insertelement <2 x i8>
|
||||
; CHECK: insertelement <2 x i8>
|
||||
; CHECK: sitofp <2 x i8>
|
||||
; CHECK: fmul <2 x double>
|
||||
; CHECK: ret
|
||||
define i32 @test(double* nocapture %A, i8* nocapture %B) {
|
||||
entry:
|
||||
@ -18,7 +19,7 @@ entry:
|
||||
%add = add i8 %0, 3
|
||||
%add4 = add i8 %1, 3
|
||||
%conv6 = sitofp i8 %add to double
|
||||
%conv7 = sitofp i8 %add4 to double ; <--- This is inefficient. The chain stops here.
|
||||
%conv7 = sitofp i8 %add4 to double
|
||||
%mul = fmul double %conv6, %conv6
|
||||
%add8 = fadd double %mul, 1.000000e+00
|
||||
%mul9 = fmul double %conv7, %conv7
|
||||
|
@ -43,3 +43,19 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure we don't crash on this one.
|
||||
define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i64 %i) {
|
||||
%1 = add i64 %i, 1
|
||||
%2 = getelementptr inbounds i32* %x, i64 %1
|
||||
%3 = getelementptr inbounds i32* %y, i64 %1
|
||||
%4 = load i32* %3, align 4
|
||||
%5 = add nsw i32 undef, %4
|
||||
store i32 %5, i32* %2, align 4
|
||||
%6 = add i64 %i, 2
|
||||
%7 = getelementptr inbounds i32* %x, i64 %6
|
||||
%8 = getelementptr inbounds i32* %y, i64 %6
|
||||
%9 = load i32* %8, align 4
|
||||
%10 = add nsw i32 undef, %9
|
||||
store i32 %10, i32* %7, align 4
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user