mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-18 13:34:04 +00:00
Add a new pass "Loop Interchange"
This pass interchanges loops to provide a more cache-friendly memory access. For e.g. given a loop like - for(int i=0;i<N;i++) for(int j=0;j<N;j++) A[j][i] = A[j][i]+B[j][i]; is interchanged to - for(int j=0;j<N;j++) for(int i=0;i<N;i++) A[j][i] = A[j][i]+B[j][i]; This pass is currently disabled by default. To give a brief introduction it consists of 3 stages- LoopInterchangeLegality : Checks the legality of loop interchange based on Dependency matrix. LoopInterchangeProfitability: A very basic heuristic has been added to check for profitibility. This will evolve over time. LoopInterchangeTransform : Which does the actual transform. LNT Performance tests shows improvement in Polybench/linear-algebra/kernels/mvt and Polybench/linear-algebra/kernels/gemver becnmarks. TODO: 1) Add support for reductions and lcssa phi. 2) Improve profitability model. 3) Improve loop selection algorithm to select best loop for interchange. Currently the innermost loop is selected for interchange. 4) Improve compile time regression found in llvm lnt due to this pass. 5) Fix issues in Dependency Analysis module. A special thanks to Hal for reviewing this code. Review: http://reviews.llvm.org/D7499 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ee711b5b16
commit
52610d84ad
@ -166,6 +166,7 @@ void initializeLocalStackSlotPassPass(PassRegistry&);
|
||||
void initializeLoopDeletionPass(PassRegistry&);
|
||||
void initializeLoopExtractorPass(PassRegistry&);
|
||||
void initializeLoopInfoWrapperPassPass(PassRegistry&);
|
||||
void initializeLoopInterchangePass(PassRegistry &);
|
||||
void initializeLoopInstSimplifyPass(PassRegistry&);
|
||||
void initializeLoopRotatePass(PassRegistry&);
|
||||
void initializeLoopSimplifyPass(PassRegistry&);
|
||||
|
@ -95,6 +95,7 @@ namespace {
|
||||
(void) llvm::createLICMPass();
|
||||
(void) llvm::createLazyValueInfoPass();
|
||||
(void) llvm::createLoopExtractorPass();
|
||||
(void)llvm::createLoopInterchangePass();
|
||||
(void) llvm::createLoopSimplifyPass();
|
||||
(void) llvm::createLoopStrengthReducePass();
|
||||
(void) llvm::createLoopRerollPass();
|
||||
|
@ -138,6 +138,13 @@ FunctionPass *createInstructionCombiningPass();
|
||||
//
|
||||
Pass *createLICMPass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// LoopInterchange - This pass interchanges loops to provide a more
|
||||
// cache-friendly memory access patterns.
|
||||
//
|
||||
Pass *createLoopInterchangePass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// LoopStrengthReduce - This pass is strength reduces GEP instructions that use
|
||||
|
@ -77,6 +77,10 @@ static cl::opt<bool>
|
||||
EnableMLSM("mlsm", cl::init(true), cl::Hidden,
|
||||
cl::desc("Enable motion of merged load and store"));
|
||||
|
||||
static cl::opt<bool> EnableLoopInterchange(
|
||||
"enable-loopinterchange", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the new, experimental LoopInterchange Pass"));
|
||||
|
||||
PassManagerBuilder::PassManagerBuilder() {
|
||||
OptLevel = 2;
|
||||
SizeLevel = 0;
|
||||
@ -239,6 +243,8 @@ void PassManagerBuilder::populateModulePassManager(
|
||||
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
|
||||
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
|
||||
MPM.add(createLoopDeletionPass()); // Delete dead loops
|
||||
if (EnableLoopInterchange)
|
||||
MPM.add(createLoopInterchangePass()); // Interchange loops
|
||||
|
||||
if (!DisableUnrollLoops)
|
||||
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
|
||||
@ -454,6 +460,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
|
||||
// More loops are countable; try to optimize them.
|
||||
PM.add(createIndVarSimplifyPass());
|
||||
PM.add(createLoopDeletionPass());
|
||||
if (EnableLoopInterchange)
|
||||
PM.add(createLoopInterchangePass());
|
||||
|
||||
PM.add(createLoopVectorizePass(true, LoopVectorize));
|
||||
|
||||
// More scalar chains could be vectorized due to more alias information
|
||||
|
@ -18,6 +18,7 @@ add_llvm_library(LLVMScalarOpts
|
||||
LoopDeletion.cpp
|
||||
LoopIdiomRecognize.cpp
|
||||
LoopInstSimplify.cpp
|
||||
LoopInterchange.cpp
|
||||
LoopRerollPass.cpp
|
||||
LoopRotation.cpp
|
||||
LoopStrengthReduce.cpp
|
||||
|
1193
lib/Transforms/Scalar/LoopInterchange.cpp
Normal file
1193
lib/Transforms/Scalar/LoopInterchange.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
||||
initializeLoopDeletionPass(Registry);
|
||||
initializeLoopAccessAnalysisPass(Registry);
|
||||
initializeLoopInstSimplifyPass(Registry);
|
||||
initializeLoopInterchangePass(Registry);
|
||||
initializeLoopRotatePass(Registry);
|
||||
initializeLoopStrengthReducePass(Registry);
|
||||
initializeLoopRerollPass(Registry);
|
||||
|
58
test/Transforms/LoopInterchange/currentLimitation.ll
Normal file
58
test/Transforms/LoopInterchange/currentLimitation.ll
Normal file
@ -0,0 +1,58 @@
|
||||
; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
|
||||
;; These are test that fail to interchange due to current limitation. This will go off once we extend the loop interchange pass.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [100 x [100 x i32]] zeroinitializer
|
||||
@B = common global [100 x [100 x [100 x i32]]] zeroinitializer
|
||||
|
||||
;;--------------------------------------Test case 01------------------------------------
|
||||
;; [FIXME] This loop though valid is currently not interchanged due to the limitation that we cannot split the inner loop latch due to multiple use of inner induction
|
||||
;; variable.(used to increment the loop counter and to access A[j+1][i+1]
|
||||
;; for(int i=0;i<N-1;i++)
|
||||
;; for(int j=1;j<N-1;j++)
|
||||
;; A[j+1][i+1] = A[j+1][i+1] + k;
|
||||
|
||||
define void @interchange_01(i32 %k, i32 %N) {
|
||||
entry:
|
||||
%sub = add nsw i32 %N, -1
|
||||
%cmp26 = icmp sgt i32 %N, 1
|
||||
br i1 %cmp26, label %for.cond1.preheader.lr.ph, label %for.end17
|
||||
|
||||
for.cond1.preheader.lr.ph:
|
||||
%cmp324 = icmp sgt i32 %sub, 1
|
||||
%0 = add i32 %N, -2
|
||||
%1 = sext i32 %sub to i64
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond.loopexit:
|
||||
%cmp = icmp slt i64 %indvars.iv.next29, %1
|
||||
br i1 %cmp, label %for.cond1.preheader, label %for.end17
|
||||
|
||||
for.cond1.preheader:
|
||||
%indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.cond.loopexit ]
|
||||
%indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
|
||||
br i1 %cmp324, label %for.body4, label %for.cond.loopexit
|
||||
|
||||
for.body4:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.cond1.preheader ]
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29
|
||||
%2 = load i32, i32* %arrayidx7
|
||||
%add8 = add nsw i32 %2, %k
|
||||
store i32 %add8, i32* %arrayidx7
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.cond.loopexit, label %for.body4
|
||||
|
||||
for.end17:
|
||||
ret void
|
||||
}
|
||||
;; Inner loop not split so it is not interchanged.
|
||||
; CHECK-LABEL: @interchange_01
|
||||
; CHECK: for.body4:
|
||||
; CHECK-NEXT: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.body4.preheader ]
|
||||
; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK-NEXT: %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29
|
||||
|
557
test/Transforms/LoopInterchange/interchange.ll
Normal file
557
test/Transforms/LoopInterchange/interchange.ll
Normal file
@ -0,0 +1,557 @@
|
||||
; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
|
||||
;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [100 x [100 x i32]] zeroinitializer
|
||||
@B = common global [100 x i32] zeroinitializer
|
||||
@C = common global [100 x [100 x i32]] zeroinitializer
|
||||
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
|
||||
|
||||
declare void @foo(...)
|
||||
|
||||
;;--------------------------------------Test case 01------------------------------------
|
||||
;; for(int i=0;i<N;i++)
|
||||
;; for(int j=1;j<N;j++)
|
||||
;; A[j][i] = A[j][i]+k;
|
||||
|
||||
define void @interchange_01(i32 %k, i32 %N) {
|
||||
entry:
|
||||
%cmp21 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp21, label %for.cond1.preheader.lr.ph, label %for.end12
|
||||
|
||||
for.cond1.preheader.lr.ph:
|
||||
%cmp219 = icmp sgt i32 %N, 1
|
||||
%0 = add i32 %N, -1
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
|
||||
br i1 %cmp219, label %for.body3, label %for.inc10
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.cond1.preheader ]
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
||||
%1 = load i32, i32* %arrayidx5
|
||||
%add = add nsw i32 %1, %k
|
||||
store i32 %add, i32* %arrayidx5
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc10, label %for.body3
|
||||
|
||||
for.inc10:
|
||||
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
%lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
|
||||
%exitcond26 = icmp eq i32 %lftr.wideiv25, %0
|
||||
br i1 %exitcond26, label %for.end12, label %for.cond1.preheader
|
||||
|
||||
for.end12:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_01
|
||||
; CHECK: entry:
|
||||
; CHECK: %cmp21 = icmp sgt i32 %N, 0
|
||||
; CHECK: br i1 %cmp21, label %for.body3.preheader, label %for.end12
|
||||
; CHECK: for.cond1.preheader.lr.ph:
|
||||
; CHECK: br label %for.cond1.preheader
|
||||
; CHECK: for.cond1.preheader:
|
||||
; CHECK: %indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
|
||||
; CHECK: br i1 %cmp219, label %for.body3.split1, label %for.end12.loopexit
|
||||
; CHECK: for.body3.preheader:
|
||||
; CHECK: %cmp219 = icmp sgt i32 %N, 1
|
||||
; CHECK: %0 = add i32 %N, -1
|
||||
; CHECK: br label %for.body3
|
||||
; CHECK: for.body3:
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
|
||||
; CHECK: br label %for.cond1.preheader.lr.ph
|
||||
; CHECK: for.body3.split1:
|
||||
; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
||||
; CHECK: %1 = load i32, i32* %arrayidx5
|
||||
; CHECK: %add = add nsw i32 %1, %k
|
||||
; CHECK: store i32 %add, i32* %arrayidx5
|
||||
; CHECK: br label %for.inc10.loopexit
|
||||
; CHECK: for.body3.split:
|
||||
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
; CHECK: br i1 %exitcond, label %for.end12.loopexit, label %for.body3
|
||||
; CHECK: for.inc10.loopexit:
|
||||
; CHECK: br label %for.inc10
|
||||
; CHECK: for.inc10:
|
||||
; CHECK: %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
; CHECK: %lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
|
||||
; CHECK: %exitcond26 = icmp eq i32 %lftr.wideiv25, %0
|
||||
; CHECK: br i1 %exitcond26, label %for.body3.split, label %for.cond1.preheader
|
||||
; CHECK: for.end12.loopexit:
|
||||
; CHECK: br label %for.end12
|
||||
; CHECK: for.end12:
|
||||
; CHECK: ret void
|
||||
|
||||
;;--------------------------------------Test case 02-------------------------------------
|
||||
|
||||
;; for(int i=0;i<100;i++)
|
||||
;; for(int j=100;j>=0;j--)
|
||||
;; A[j][i] = A[j][i]+k;
|
||||
|
||||
define void @interchange_02(i32 %k) {
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc10 ]
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ 100, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
|
||||
%0 = load i32, i32* %arrayidx5
|
||||
%add = add nsw i32 %0, %k
|
||||
store i32 %add, i32* %arrayidx5
|
||||
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||
%cmp2 = icmp sgt i64 %indvars.iv, 0
|
||||
br i1 %cmp2, label %for.body3, label %for.inc10
|
||||
|
||||
for.inc10:
|
||||
%indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next20, 100
|
||||
br i1 %exitcond, label %for.end11, label %for.cond1.preheader
|
||||
|
||||
for.end11:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_02
|
||||
; CHECK: entry:
|
||||
; CHECK: br label %for.body3.preheader
|
||||
; CHECK: for.cond1.preheader.preheader:
|
||||
; CHECK: br label %for.cond1.preheader
|
||||
; CHECK: for.cond1.preheader:
|
||||
; CHECK: %indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
|
||||
; CHECK: br label %for.body3.split1
|
||||
; CHECK: for.body3.preheader:
|
||||
; CHECK: br label %for.body3
|
||||
; CHECK: for.body3:
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 100, %for.body3.preheader ]
|
||||
; CHECK: br label %for.cond1.preheader.preheader
|
||||
; CHECK: for.body3.split1: ; preds = %for.cond1.preheader
|
||||
; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
|
||||
; CHECK: %0 = load i32, i32* %arrayidx5
|
||||
; CHECK: %add = add nsw i32 %0, %k
|
||||
; CHECK: store i32 %add, i32* %arrayidx5
|
||||
; CHECK: br label %for.inc10
|
||||
; CHECK: for.body3.split:
|
||||
; CHECK: %indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||
; CHECK: %cmp2 = icmp sgt i64 %indvars.iv, 0
|
||||
; CHECK: br i1 %cmp2, label %for.body3, label %for.end11
|
||||
; CHECK: for.inc10:
|
||||
; CHECK: %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
|
||||
; CHECK: %exitcond = icmp eq i64 %indvars.iv.next20, 100
|
||||
; CHECK: br i1 %exitcond, label %for.body3.split, label %for.cond1.preheader
|
||||
; CHECK: for.end11:
|
||||
; CHECK: ret void
|
||||
|
||||
;;--------------------------------------Test case 03-------------------------------------
|
||||
;; Loops should not be interchanged in this case as it is not profitable.
|
||||
;; for(int i=0;i<100;i++)
|
||||
;; for(int j=0;j<100;j++)
|
||||
;; A[i][j] = A[i][j]+k;
|
||||
|
||||
define void @interchange_03(i32 %k) {
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc10 ]
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx5
|
||||
%add = add nsw i32 %0, %k
|
||||
store i32 %add, i32* %arrayidx5
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 100
|
||||
br i1 %exitcond, label %for.inc10, label %for.body3
|
||||
|
||||
for.inc10:
|
||||
%indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
|
||||
%exitcond23 = icmp eq i64 %indvars.iv.next22, 100
|
||||
br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
|
||||
|
||||
for.end12:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_03
|
||||
; CHECK: entry:
|
||||
; CHECK: br label %for.cond1.preheader.preheader
|
||||
; CHECK: for.cond1.preheader.preheader: ; preds = %entry
|
||||
; CHECK: br label %for.cond1.preheader
|
||||
; CHECK: for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc10
|
||||
; CHECK: %indvars.iv21 = phi i64 [ %indvars.iv.next22, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
|
||||
; CHECK: br label %for.body3.preheader
|
||||
; CHECK: for.body3.preheader: ; preds = %for.cond1.preheader
|
||||
; CHECK: br label %for.body3
|
||||
; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
|
||||
; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
|
||||
; CHECK: %0 = load i32, i32* %arrayidx5
|
||||
; CHECK: %add = add nsw i32 %0, %k
|
||||
; CHECK: store i32 %add, i32* %arrayidx5
|
||||
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK: %exitcond = icmp eq i64 %indvars.iv.next, 100
|
||||
; CHECK: br i1 %exitcond, label %for.inc10, label %for.body3
|
||||
; CHECK: for.inc10: ; preds = %for.body3
|
||||
; CHECK: %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
|
||||
; CHECK: %exitcond23 = icmp eq i64 %indvars.iv.next22, 100
|
||||
; CHECK: br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
|
||||
; CHECK: for.end12: ; preds = %for.inc10
|
||||
; CHECK: ret void
|
||||
|
||||
|
||||
;;--------------------------------------Test case 04-------------------------------------
|
||||
;; Loops should not be interchanged in this case as it is not legal due to dependency.
|
||||
;; for(int j=0;j<99;j++)
|
||||
;; for(int i=0;i<99;i++)
|
||||
;; A[j][i+1] = A[j+1][i]+k;
|
||||
|
||||
define void @interchange_04(i32 %k){
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
|
||||
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx5
|
||||
%add6 = add nsw i32 %0, %k
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
|
||||
store i32 %add6, i32* %arrayidx11
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 99
|
||||
br i1 %exitcond, label %for.inc12, label %for.body3
|
||||
|
||||
for.inc12:
|
||||
%exitcond25 = icmp eq i64 %indvars.iv.next24, 99
|
||||
br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
|
||||
|
||||
for.end14:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_04
|
||||
; CHECK: entry:
|
||||
; CHECK: br label %for.cond1.preheader
|
||||
; CHECK: for.cond1.preheader: ; preds = %for.inc12, %entry
|
||||
; CHECK: %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
|
||||
; CHECK: %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
||||
; CHECK: br label %for.body3
|
||||
; CHECK: for.body3: ; preds = %for.body3, %for.cond1.preheader
|
||||
; CHECK: %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
|
||||
; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
|
||||
; CHECK: %0 = load i32, i32* %arrayidx5
|
||||
; CHECK: %add6 = add nsw i32 %0, %k
|
||||
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK: %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
|
||||
; CHECK: store i32 %add6, i32* %arrayidx11
|
||||
; CHECK: %exitcond = icmp eq i64 %indvars.iv.next, 99
|
||||
; CHECK: br i1 %exitcond, label %for.inc12, label %for.body3
|
||||
; CHECK: for.inc12: ; preds = %for.body3
|
||||
; CHECK: %exitcond25 = icmp eq i64 %indvars.iv.next24, 99
|
||||
; CHECK: br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
|
||||
; CHECK: for.end14: ; preds = %for.inc12
|
||||
; CHECK: ret void
|
||||
|
||||
|
||||
|
||||
;;--------------------------------------Test case 05-------------------------------------
|
||||
;; Loops not tightly nested are not interchanged
|
||||
;; for(int j=0;j<N;j++) {
|
||||
;; B[j] = j+k;
|
||||
;; for(int i=0;i<N;i++)
|
||||
;; A[j][i] = A[j][i]+B[j];
|
||||
;; }
|
||||
|
||||
define void @interchange_05(i32 %k, i32 %N){
|
||||
entry:
|
||||
%cmp30 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp30, label %for.body.lr.ph, label %for.end17
|
||||
|
||||
for.body.lr.ph:
|
||||
%0 = add i32 %N, -1
|
||||
%1 = zext i32 %k to i64
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
|
||||
%2 = add nsw i64 %indvars.iv32, %1
|
||||
%arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
|
||||
%3 = trunc i64 %2 to i32
|
||||
store i32 %3, i32* %arrayidx
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
|
||||
%4 = load i32, i32* %arrayidx7
|
||||
%add10 = add nsw i32 %3, %4
|
||||
store i32 %add10, i32* %arrayidx7
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc15, label %for.body3
|
||||
|
||||
for.inc15:
|
||||
%indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
|
||||
%lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
|
||||
%exitcond36 = icmp eq i32 %lftr.wideiv35, %0
|
||||
br i1 %exitcond36, label %for.end17, label %for.body
|
||||
|
||||
for.end17:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_05
|
||||
; CHECK: entry:
|
||||
; CHECK: %cmp30 = icmp sgt i32 %N, 0
|
||||
; CHECK: br i1 %cmp30, label %for.body.lr.ph, label %for.end17
|
||||
; CHECK: for.body.lr.ph:
|
||||
; CHECK: %0 = add i32 %N, -1
|
||||
; CHECK: %1 = zext i32 %k to i64
|
||||
; CHECK: br label %for.body
|
||||
; CHECK: for.body:
|
||||
; CHECK: %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
|
||||
; CHECK: %2 = add nsw i64 %indvars.iv32, %1
|
||||
; CHECK: %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
|
||||
; CHECK: %3 = trunc i64 %2 to i32
|
||||
; CHECK: store i32 %3, i32* %arrayidx
|
||||
; CHECK: br label %for.body3.preheader
|
||||
; CHECK: for.body3.preheader:
|
||||
; CHECK: br label %for.body3
|
||||
; CHECK: for.body3:
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
|
||||
; CHECK: %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
|
||||
; CHECK: %4 = load i32, i32* %arrayidx7
|
||||
; CHECK: %add10 = add nsw i32 %3, %4
|
||||
; CHECK: store i32 %add10, i32* %arrayidx7
|
||||
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
; CHECK: br i1 %exitcond, label %for.inc15, label %for.body3
|
||||
; CHECK: for.inc15:
|
||||
; CHECK: %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
|
||||
; CHECK: %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
|
||||
; CHECK: %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
|
||||
; CHECK: br i1 %exitcond36, label %for.end17.loopexit, label %for.body
|
||||
; CHECK: for.end17.loopexit:
|
||||
; CHECK: br label %for.end17
|
||||
; CHECK: for.end17:
|
||||
; CHECK: ret void
|
||||
|
||||
|
||||
;;--------------------------------------Test case 06-------------------------------------
|
||||
;; Loops not tightly nested are not interchanged
|
||||
;; for(int j=0;j<N;j++) {
|
||||
;; foo();
|
||||
;; for(int i=2;i<N;i++)
|
||||
;; A[j][i] = A[j][i]+k;
|
||||
;; }
|
||||
|
||||
define void @interchange_06(i32 %k, i32 %N) {
|
||||
entry:
|
||||
%cmp22 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp22, label %for.body.lr.ph, label %for.end12
|
||||
|
||||
for.body.lr.ph:
|
||||
%0 = add i32 %N, -1
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv24 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next25, %for.inc10 ]
|
||||
tail call void (...)* @foo()
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body ]
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv24, i64 %indvars.iv
|
||||
%1 = load i32, i32* %arrayidx5
|
||||
%add = add nsw i32 %1, %k
|
||||
store i32 %add, i32* %arrayidx5
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc10, label %for.body3
|
||||
|
||||
for.inc10:
|
||||
%indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
|
||||
%lftr.wideiv26 = trunc i64 %indvars.iv24 to i32
|
||||
%exitcond27 = icmp eq i32 %lftr.wideiv26, %0
|
||||
br i1 %exitcond27, label %for.end12, label %for.body
|
||||
|
||||
for.end12:
|
||||
ret void
|
||||
}
|
||||
;; Here we are checking if the inner phi is not split then we have not interchanged.
|
||||
; CHECK-LABEL: @interchange_06
|
||||
; CHECK: phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body3.preheader ]
|
||||
; CHECK-NEXT: getelementptr
|
||||
; CHECK-NEXT: %1 = load
|
||||
|
||||
;;--------------------------------------Test case 07-------------------------------------
|
||||
;; FIXME:
|
||||
;; Test for interchange when we have an lcssa phi. This should ideally be interchanged but it is currently not supported.
|
||||
;; for(gi=1;gi<N;gi++)
|
||||
;; for(gj=1;gj<M;gj++)
|
||||
;; A[gj][gi] = A[gj - 1][gi] + C[gj][gi];
|
||||
|
||||
@gi = common global i32 0
|
||||
@gj = common global i32 0
|
||||
|
||||
define void @interchange_07(i32 %N, i32 %M){
|
||||
entry:
|
||||
store i32 1, i32* @gi
|
||||
%cmp21 = icmp sgt i32 %N, 1
|
||||
br i1 %cmp21, label %for.cond1.preheader.lr.ph, label %for.end16
|
||||
|
||||
for.cond1.preheader.lr.ph:
|
||||
%cmp218 = icmp sgt i32 %M, 1
|
||||
%gi.promoted = load i32, i32* @gi
|
||||
%0 = add i32 %M, -1
|
||||
%1 = sext i32 %gi.promoted to i64
|
||||
%2 = sext i32 %N to i64
|
||||
%3 = add i32 %gi.promoted, 1
|
||||
%4 = icmp slt i32 %3, %N
|
||||
%smax = select i1 %4, i32 %N, i32 %3
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader:
|
||||
%indvars.iv25 = phi i64 [ %1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next26, %for.inc14 ]
|
||||
br i1 %cmp218, label %for.body3, label %for.inc14
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.cond1.preheader ]
|
||||
%5 = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
|
||||
%6 = load i32, i32* %arrayidx5
|
||||
%arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
|
||||
%7 = load i32, i32* %arrayidx9
|
||||
%add = add nsw i32 %7, %6
|
||||
%arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv25
|
||||
store i32 %add, i32* %arrayidx13
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc14, label %for.body3
|
||||
|
||||
for.inc14:
|
||||
%inc.lcssa23 = phi i32 [ 1, %for.cond1.preheader ], [ %M, %for.body3 ]
|
||||
%indvars.iv.next26 = add nsw i64 %indvars.iv25, 1
|
||||
%cmp = icmp slt i64 %indvars.iv.next26, %2
|
||||
br i1 %cmp, label %for.cond1.preheader, label %for.cond.for.end16_crit_edge
|
||||
|
||||
for.cond.for.end16_crit_edge:
|
||||
store i32 %inc.lcssa23, i32* @gj
|
||||
store i32 %smax, i32* @gi
|
||||
br label %for.end16
|
||||
|
||||
for.end16:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_07
|
||||
; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
|
||||
; CHECK: %5 = add nsw i64 %indvars.iv, -1
|
||||
; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
|
||||
; CHECK: %6 = load i32, i32* %arrayidx5
|
||||
; CHECK: %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
|
||||
|
||||
;;------------------------------------------------Test case 08-------------------------------
|
||||
;; Test for interchange in loop nest greater than 2.
|
||||
;; for(int i=0;i<100;i++)
|
||||
;; for(int j=0;j<100;j++)
|
||||
;; for(int k=0;k<100;k++)
|
||||
;; D[i][k][j] = D[i][k][j]+t;
|
||||
|
||||
define void @interchange_08(i32 %t){
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %for.inc15, %entry
|
||||
%i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
|
||||
br label %for.cond4.preheader
|
||||
|
||||
for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
|
||||
%j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
|
||||
br label %for.body6
|
||||
|
||||
for.body6: ; preds = %for.body6, %for.cond4.preheader
|
||||
%k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
|
||||
%arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
|
||||
%0 = load i32, i32* %arrayidx8
|
||||
%add = add nsw i32 %0, %t
|
||||
store i32 %add, i32* %arrayidx8
|
||||
%inc = add nuw nsw i32 %k.026, 1
|
||||
%exitcond = icmp eq i32 %inc, 100
|
||||
br i1 %exitcond, label %for.inc12, label %for.body6
|
||||
|
||||
for.inc12: ; preds = %for.body6
|
||||
%inc13 = add nuw nsw i32 %j.027, 1
|
||||
%exitcond29 = icmp eq i32 %inc13, 100
|
||||
br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
|
||||
|
||||
for.inc15: ; preds = %for.inc12
|
||||
%inc16 = add nuw nsw i32 %i.028, 1
|
||||
%exitcond30 = icmp eq i32 %inc16, 100
|
||||
br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
|
||||
|
||||
for.end17: ; preds = %for.inc15
|
||||
ret void
|
||||
}
|
||||
; CHECK-LABEL: @interchange_08
|
||||
; CHECK: entry:
|
||||
; CHECK: br label %for.cond1.preheader.preheader
|
||||
; CHECK: for.cond1.preheader.preheader: ; preds = %entry
|
||||
; CHECK: br label %for.cond1.preheader
|
||||
; CHECK: for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc15
|
||||
; CHECK: %i.028 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond1.preheader.preheader ]
|
||||
; CHECK: br label %for.body6.preheader
|
||||
; CHECK: for.cond4.preheader.preheader: ; preds = %for.body6
|
||||
; CHECK: br label %for.cond4.preheader
|
||||
; CHECK: for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc12
|
||||
; CHECK: %j.027 = phi i32 [ %inc13, %for.inc12 ], [ 0, %for.cond4.preheader.preheader ]
|
||||
; CHECK: br label %for.body6.split1
|
||||
; CHECK: for.body6.preheader: ; preds = %for.cond1.preheader
|
||||
; CHECK: br label %for.body6
|
||||
; CHECK: for.body6: ; preds = %for.body6.preheader, %for.body6.split
|
||||
; CHECK: %k.026 = phi i32 [ %inc, %for.body6.split ], [ 0, %for.body6.preheader ]
|
||||
; CHECK: br label %for.cond4.preheader.preheader
|
||||
; CHECK: for.body6.split1: ; preds = %for.cond4.preheader
|
||||
; CHECK: %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
|
||||
; CHECK: %0 = load i32, i32* %arrayidx8
|
||||
; CHECK: %add = add nsw i32 %0, %t
|
||||
; CHECK: store i32 %add, i32* %arrayidx8
|
||||
; CHECK: br label %for.inc12
|
||||
; CHECK: for.body6.split: ; preds = %for.inc12
|
||||
; CHECK: %inc = add nuw nsw i32 %k.026, 1
|
||||
; CHECK: %exitcond = icmp eq i32 %inc, 100
|
||||
; CHECK: br i1 %exitcond, label %for.inc15, label %for.body6
|
||||
; CHECK: for.inc12: ; preds = %for.body6.split1
|
||||
; CHECK: %inc13 = add nuw nsw i32 %j.027, 1
|
||||
; CHECK: %exitcond29 = icmp eq i32 %inc13, 100
|
||||
; CHECK: br i1 %exitcond29, label %for.body6.split, label %for.cond4.preheader
|
||||
; CHECK: for.inc15: ; preds = %for.body6.split
|
||||
; CHECK: %inc16 = add nuw nsw i32 %i.028, 1
|
||||
; CHECK: %exitcond30 = icmp eq i32 %inc16, 100
|
||||
; CHECK: br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
|
||||
; CHECK: for.end17: ; preds = %for.inc15
|
||||
; CHECK: ret void
|
||||
|
205
test/Transforms/LoopInterchange/profitability.ll
Normal file
205
test/Transforms/LoopInterchange/profitability.ll
Normal file
@ -0,0 +1,205 @@
|
||||
; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
|
||||
;; We test profitability model in these test cases.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@A = common global [100 x [100 x i32]] zeroinitializer
|
||||
@B = common global [100 x [100 x i32]] zeroinitializer
|
||||
|
||||
;;---------------------------------------Test case 01---------------------------------
|
||||
;; Loops interchange will result in code vectorization and hence profitable. Check for interchange.
|
||||
;; for(int i=1;i<N;i++)
|
||||
;; for(int j=1;j<N;j++)
|
||||
;; A[j][i] = A[j - 1][i] + B[j][i];
|
||||
|
||||
define void @interchange_01(i32 %N) {
|
||||
entry:
|
||||
%cmp27 = icmp sgt i32 %N, 1
|
||||
br i1 %cmp27, label %for.cond1.preheader.lr.ph, label %for.end16
|
||||
|
||||
for.cond1.preheader.lr.ph:
|
||||
%0 = add i32 %N, -1
|
||||
br label %for.body3.preheader
|
||||
|
||||
for.body3.preheader:
|
||||
%indvars.iv30 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next31, %for.inc14 ]
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
|
||||
%1 = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %indvars.iv30
|
||||
%2 = load i32, i32* %arrayidx5
|
||||
%arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv30
|
||||
%3 = load i32, i32* %arrayidx9
|
||||
%add = add nsw i32 %3, %2
|
||||
%arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
|
||||
store i32 %add, i32* %arrayidx13
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc14, label %for.body3
|
||||
|
||||
for.inc14:
|
||||
%indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1
|
||||
%lftr.wideiv32 = trunc i64 %indvars.iv30 to i32
|
||||
%exitcond33 = icmp eq i32 %lftr.wideiv32, %0
|
||||
br i1 %exitcond33, label %for.end16, label %for.body3.preheader
|
||||
|
||||
for.end16:
|
||||
ret void
|
||||
}
|
||||
;; Here we are checking partial .ll to check if loop are interchanged.
|
||||
; CHECK-LABEL: @interchange_01
|
||||
; CHECK: for.body3.preheader: ; preds = %for.inc14, %for.cond1.preheader.lr.ph
|
||||
; CHECK: %indvars.iv30 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next31, %for.inc14 ]
|
||||
; CHECK: br label %for.body3.split2
|
||||
|
||||
; CHECK: for.body3.preheader1: ; preds = %entry
|
||||
; CHECK: br label %for.body3
|
||||
|
||||
; CHECK: for.body3: ; preds = %for.body3.preheader1, %for.body3.split
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader1 ]
|
||||
; CHECK: br label %for.cond1.preheader.lr.ph
|
||||
|
||||
; CHECK: for.body3.split2: ; preds = %for.body3.preheader
|
||||
; CHECK: %1 = add nsw i64 %indvars.iv, -1
|
||||
; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %indvars.iv30
|
||||
; CHECK: %2 = load i32, i32* %arrayidx5
|
||||
; CHECK: %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv, i64 %indvars.iv30
|
||||
; CHECK: %3 = load i32, i32* %arrayidx9
|
||||
; CHECK: %add = add nsw i32 %3, %2
|
||||
; CHECK: %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv30
|
||||
; CHECK: store i32 %add, i32* %arrayidx13
|
||||
; CHECK: br label %for.inc14
|
||||
|
||||
|
||||
;; ---------------------------------------Test case 02---------------------------------
|
||||
;; Check loop interchange profitability model.
|
||||
;; This tests profitability model when operands of getelementpointer and not exactly the induction variable but some
|
||||
;; arithmetic operation on them.
|
||||
;; for(int i=1;i<N;i++)
|
||||
;; for(int j=1;j<N;j++)
|
||||
;; A[j-1][i-1] = A[j - 1][i-1] + B[j-1][i-1];
|
||||
|
||||
define void @interchange_02(i32 %N) {
|
||||
entry:
|
||||
%cmp32 = icmp sgt i32 %N, 1
|
||||
br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end21
|
||||
|
||||
for.cond1.preheader.lr.ph:
|
||||
%0 = add i32 %N, -1
|
||||
br label %for.body3.lr.ph
|
||||
|
||||
for.body3.lr.ph:
|
||||
%indvars.iv35 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next36, %for.inc19 ]
|
||||
%1 = add nsw i64 %indvars.iv35, -1
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%2 = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %2, i64 %1
|
||||
%3 = load i32, i32* %arrayidx6
|
||||
%arrayidx12 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %2, i64 %1
|
||||
%4 = load i32, i32* %arrayidx12
|
||||
%add = add nsw i32 %4, %3
|
||||
store i32 %add, i32* %arrayidx6
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc19, label %for.body3
|
||||
|
||||
for.inc19:
|
||||
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
|
||||
%lftr.wideiv38 = trunc i64 %indvars.iv35 to i32
|
||||
%exitcond39 = icmp eq i32 %lftr.wideiv38, %0
|
||||
br i1 %exitcond39, label %for.end21, label %for.body3.lr.ph
|
||||
|
||||
for.end21:
|
||||
ret void
|
||||
}
|
||||
; CHECK-LABEL: @interchange_02
|
||||
; CHECK: for.body3.lr.ph: ; preds = %for.inc19, %for.cond1.preheader.lr.ph
|
||||
; CHECK: %indvars.iv35 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next36, %for.inc19 ]
|
||||
; CHECK: %0 = add nsw i64 %indvars.iv35, -1
|
||||
; CHECK: br label %for.body3.split1
|
||||
|
||||
; CHECK: for.body3.preheader: ; preds = %entry
|
||||
; CHECK: %1 = add i32 %N, -1
|
||||
; CHECK: br label %for.body3
|
||||
|
||||
; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3.split
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
|
||||
; CHECK: br label %for.cond1.preheader.lr.ph
|
||||
|
||||
; CHECK: for.body3.split1: ; preds = %for.body3.lr.ph
|
||||
; CHECK: %2 = add nsw i64 %indvars.iv, -1
|
||||
; CHECK: %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %2, i64 %0
|
||||
; CHECK: %3 = load i32, i32* %arrayidx6
|
||||
; CHECK: %arrayidx12 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %2, i64 %0
|
||||
; CHECK: %4 = load i32, i32* %arrayidx12
|
||||
; CHECK: %add = add nsw i32 %4, %3
|
||||
; CHECK: store i32 %add, i32* %arrayidx6
|
||||
; CHECK: br label %for.inc19
|
||||
|
||||
|
||||
;;---------------------------------------Test case 03---------------------------------
|
||||
;; Loops interchange is not profitable.
|
||||
;; for(int i=1;i<N;i++)
|
||||
;; for(int j=1;j<N;j++)
|
||||
;; A[i-1][j-1] = A[i - 1][j-1] + B[i][j];
|
||||
|
||||
define void @interchange_03(i32 %N){
|
||||
entry:
|
||||
%cmp31 = icmp sgt i32 %N, 1
|
||||
br i1 %cmp31, label %for.cond1.preheader.lr.ph, label %for.end19
|
||||
|
||||
for.cond1.preheader.lr.ph:
|
||||
%0 = add i32 %N, -1
|
||||
br label %for.body3.lr.ph
|
||||
|
||||
for.body3.lr.ph:
|
||||
%indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ]
|
||||
%1 = add nsw i64 %indvars.iv34, -1
|
||||
br label %for.body3
|
||||
|
||||
for.body3:
|
||||
%indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%2 = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2
|
||||
%3 = load i32, i32* %arrayidx6
|
||||
%arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv
|
||||
%4 = load i32, i32* %arrayidx10
|
||||
%add = add nsw i32 %4, %3
|
||||
store i32 %add, i32* %arrayidx6
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %0
|
||||
br i1 %exitcond, label %for.inc17, label %for.body3
|
||||
|
||||
for.inc17:
|
||||
%indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1
|
||||
%lftr.wideiv37 = trunc i64 %indvars.iv34 to i32
|
||||
%exitcond38 = icmp eq i32 %lftr.wideiv37, %0
|
||||
br i1 %exitcond38, label %for.end19, label %for.body3.lr.ph
|
||||
|
||||
for.end19:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_03
|
||||
; CHECK: for.body3.lr.ph:
|
||||
; CHECK: %indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ]
|
||||
; CHECK: %1 = add nsw i64 %indvars.iv34, -1
|
||||
; CHECK: br label %for.body3.preheader
|
||||
; CHECK: for.body3.preheader:
|
||||
; CHECK: br label %for.body3
|
||||
; CHECK: for.body3:
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
|
||||
; CHECK: %2 = add nsw i64 %indvars.iv, -1
|
||||
; CHECK: %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2
|
||||
; CHECK: %3 = load i32, i32* %arrayidx6
|
||||
; CHECK: %arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv
|
||||
; CHECK: %4 = load i32, i32* %arrayidx10
|
Loading…
x
Reference in New Issue
Block a user