From acdb4aaf9b1f2edd96163c27bcc4e0557014f51e Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 7 Jan 2012 03:16:50 +0000 Subject: [PATCH] LSR: Don't optimize loops if an outer loop has no preheader. LoopSimplify may not run on some outer loops, e.g. because of indirect branches. SCEVExpander simply cannot handle outer loops with no preheaders. Fixes rdar://10655343 SCEVExpander segfault. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147718 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 9 +++- .../LoopStrengthReduce/2012-02-nopreheader.ll | 44 +++++++++++++++++-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index f59e156c93a..8f3a5ab071c 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3842,8 +3842,15 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. - if (!L->isLoopSimplifyForm()) return; + if (!L->isLoopSimplifyForm()) + return; + // All outer loops must have preheaders, or SCEVExpander may not be able to + // materialize an AddRecExpr whose Start is an outer AddRecExpr. + for (const Loop *OuterLoop = L; (OuterLoop = OuterLoop->getParentLoop());) { + if (!OuterLoop->getLoopPreheader()) + return; + } // If there's no interesting work to be done, bail early. if (IU.empty()) return; diff --git a/test/Transforms/LoopStrengthReduce/2012-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-02-nopreheader.ll index 78d4ef7bd82..d7f5723188c 100644 --- a/test/Transforms/LoopStrengthReduce/2012-02-nopreheader.ll +++ b/test/Transforms/LoopStrengthReduce/2012-02-nopreheader.ll @@ -1,6 +1,7 @@ ; RUN: opt -loop-reduce -S < %s | FileCheck %s ; ; "SelectionDAGBuilder shouldn't visit PHI nodes!" assert. +; SCEVExpander segfault on simple test case target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-f128:128:128-n8:16:32" target triple = "i386-apple-darwin" @@ -9,11 +10,12 @@ target triple = "i386-apple-darwin" ; This involves a nested AddRec, the outer AddRec's loop invariant components ; cannot find a preheader, so they should be expanded in the loop header ; (bb7.lr.ph.us) below the existing phi i.12.us. +; Currently, LSR won't kick in on such loops. ; CHECK: @nopreheader -; CHECK: bb7.lr.ph.us: -; CHECK: %lsr.iv = phi float* ; CHECK: bb7.us: -; CHECK: %lsr.iv2 = phi float* +; CHECK-NOT: phi float* +; CHECK: %j.01.us = phi i32 +; CHECK-NOT: phi float* define void @nopreheader(float* nocapture %a, i32 %n) nounwind { entry: %0 = sdiv i32 %n, undef @@ -48,3 +50,39 @@ bb9: ; preds = %bb9, %bb8.preheader return: ; preds = %bb9, %bb9.us, %bb10.preheader ret void } + +; In this case, SCEVExpander simply cannot materialize the AddRecExpr +; that LSR picks. We must detect that %bb8.preheader does not have a +; preheader and avoid performing LSR on %bb7. +; CHECK: @nopreheader2 +; CHECK: bb7: +; CHECK: %indvar = phi i32 +define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind { +entry: + indirectbr i8* undef, [label %bb] + +bb: ; preds = %bb, %entry + indirectbr i8* undef, [label %bb3, label %bb] + +bb3: ; preds = %bb3, %bb + indirectbr i8* undef, [label %bb8.preheader, label %bb3] + +bb8.preheader: ; preds = %bb9, %bb3 + %indvar5 = phi i32 [ %indvar.next6, %bb9 ], [ 0, %bb3 ] + %tmp26 = add i32 %indvar5, 13 + indirectbr i8* null, [label %bb7] + +bb7: ; preds = %bb8.preheader, %bb7 + %indvar = phi i32 [ 0, %bb8.preheader ], [ %indvar.next, %bb7 ] + %scevgep = getelementptr [200 x i32]* %Array2, i32 %tmp26, i32 %indvar + store i32 undef, i32* %scevgep, align 4 + %indvar.next = add i32 %indvar, 1 + indirectbr i8* undef, [label %bb9, label %bb7] + +bb9: ; preds = %bb7 + %indvar.next6 = add i32 %indvar5, 1 + indirectbr i8* undef, [label %return, label %bb8.preheader] + +return: ; preds = %bb9 + ret void +}