From 734d7c2a7e6cc12776d67689d018eb1bd1c4cd72 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Tue, 26 Apr 2011 20:45:33 +0000
Subject: [PATCH] make a couple of changes to the standard pass pipeline: 1.
 Only run the early (in the module pass pipe) instcombine/simplifycfg    if
 the "unit at a time" passes they are cleaning up after runs.

2. Move the "clean up after the unroller" pass to the very end of the
   function-level pass pipeline.  Loop unroll uses instsimplify now,
   so it doesn't create a ton of trash.  Moving instcombine later allows
   it to clean up after opportunities are exposed by GVN, DSE, etc.

3. Introduce some phase ordering tests for things that are specifically
   intended to be simplified by the full optimizer as a whole.

This resolves PR2338, and is progress towards PR6627, which will be
generating code that looks similar to test2.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130241 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Support/StandardPasses.h  |  8 +--
 test/Transforms/PhaseOrdering/basic.ll | 73 ++++++++++++++++++++++++++
 test/Transforms/ScalarRepl/dg.exp      |  2 +-
 3 files changed, 79 insertions(+), 4 deletions(-)
 create mode 100644 test/Transforms/PhaseOrdering/basic.ll

diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index d774faf3864..8dfd6f98abf 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -72,6 +72,7 @@ namespace llvm {
                                                 Pass *InliningPass) {
     createStandardAliasAnalysisPasses(PM);
 
+    // If all optimizations are disabled, just run the always-inline pass.
     if (OptimizationLevel == 0) {
       if (InliningPass)
         PM->add(InliningPass);
@@ -83,9 +84,10 @@ namespace llvm {
       
       PM->add(createIPSCCPPass());              // IP SCCP
       PM->add(createDeadArgEliminationPass());  // Dead argument elimination
+      
+      PM->add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+      PM->add(createCFGSimplificationPass());   // Clean up after IPCP & DAE
     }
-    PM->add(createInstructionCombiningPass());  // Clean up after IPCP & DAE
-    PM->add(createCFGSimplificationPass());     // Clean up after IPCP & DAE
     
     // Start of CallGraph SCC passes.
     if (UnitAtATime && HaveExceptions)
@@ -120,7 +122,6 @@ namespace llvm {
     PM->add(createLoopDeletionPass());          // Delete dead loops
     if (UnrollLoops)
       PM->add(createLoopUnrollPass());          // Unroll small loops
-    PM->add(createInstructionCombiningPass());  // Clean up after the unroller
     if (OptimizationLevel > 1)
       PM->add(createGVNPass());                 // Remove redundancies
     PM->add(createMemCpyOptPass());             // Remove memcpy / form memset
@@ -134,6 +135,7 @@ namespace llvm {
     PM->add(createDeadStoreEliminationPass());  // Delete dead stores
     PM->add(createAggressiveDCEPass());         // Delete dead instructions
     PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+    PM->add(createInstructionCombiningPass());  // Clean up after everything.
 
     if (UnitAtATime) {
       PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
new file mode 100644
index 00000000000..aeb2bc4d1a2
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -0,0 +1,73 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.7"
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+
+; PR2338
+define void @test1() nounwind ssp {
+  %retval = alloca i32, align 4
+  %i = alloca i8*, align 8
+  %call = call i8* @malloc(i64 1)
+  store i8* %call, i8** %i, align 8
+  %tmp = load i8** %i, align 8
+  store i8 1, i8* %tmp
+  %tmp1 = load i8** %i, align 8
+  call void @free(i8* %tmp1)
+  ret void
+
+; CHECK: @test1
+; CHECK-NEXT: ret void
+}
+
+
+; PR6627 - This whole nasty sequence should be flattened down to a single
+; 32-bit comparison.
+define void @test2(i8* %arrayidx) nounwind ssp {
+entry:
+  %xx = bitcast i8* %arrayidx to i32*
+  %x1 = load i32* %xx, align 4
+  %tmp = trunc i32 %x1 to i8
+  %conv = zext i8 %tmp to i32
+  %cmp = icmp eq i32 %conv, 127
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1
+  %tmp5 = load i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
+  %cmp7 = icmp eq i32 %conv6, 69
+  br i1 %cmp7, label %land.lhs.true9, label %if.end
+
+land.lhs.true9:                                   ; preds = %land.lhs.true
+  %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2
+  %tmp13 = load i8* %arrayidx12, align 1
+  %conv14 = zext i8 %tmp13 to i32
+  %cmp15 = icmp eq i32 %conv14, 76
+  br i1 %cmp15, label %land.lhs.true17, label %if.end
+
+land.lhs.true17:                                  ; preds = %land.lhs.true9
+  %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3
+  %tmp21 = load i8* %arrayidx20, align 1
+  %conv22 = zext i8 %tmp21 to i32
+  %cmp23 = icmp eq i32 %conv22, 70
+  br i1 %cmp23, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true17
+  %call25 = call i32 (...)* @doo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %land.lhs.true17, %land.lhs.true9, %land.lhs.true, %entry
+  ret void
+
+; CHECK: @test2
+; CHECK: %x1 = load i32* %xx, align 4
+; CHECK-NEXT: icmp eq i32 %x1, 1179403647
+; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
+}
+
+declare i32 @doo(...)
+
diff --git a/test/Transforms/ScalarRepl/dg.exp b/test/Transforms/ScalarRepl/dg.exp
index f2005891a59..39954d8a498 100644
--- a/test/Transforms/ScalarRepl/dg.exp
+++ b/test/Transforms/ScalarRepl/dg.exp
@@ -1,3 +1,3 @@
 load_lib llvm.exp
 
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]