diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index e16e990bad2..0786b51f535 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -29,9 +29,7 @@
 namespace llvm {
   class APInt;
   class ConstantInt;
-  class Instruction;
   class Type;
-  class ConstantRange;
   class SCEVHandle;
   class ScalarEvolution;
 
@@ -282,6 +280,11 @@ namespace llvm {
     /// object is returned.
     SCEVHandle getSCEVAtScope(Value *V, const Loop *L) const;
 
+    /// isLoopGuardedByCond - Test whether entry to the loop is protected by
+    /// a conditional between LHS and RHS.
+    bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                             SCEV *LHS, SCEV *RHS);
+
     /// getIterationCount - If the specified loop has a predictable iteration
     /// count, return it, otherwise return a SCEVCouldNotCompute object.
     SCEVHandle getIterationCount(const Loop *L) const;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index c05ba8d0ef2..59e76c0538f 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1404,6 +1404,11 @@ namespace {
     SCEVHandle getSCEVAtScope(SCEV *V, const Loop *L);
 
 
+    /// isLoopGuardedByCond - Test whether entry to the loop is protected by
+    /// a conditional between LHS and RHS.
+    bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                             SCEV *LHS, SCEV *RHS);
+
     /// hasLoopInvariantIterationCount - Return true if the specified loop has
     /// an analyzable loop-invariant iteration count.
     bool hasLoopInvariantIterationCount(const Loop *L);
@@ -1476,10 +1481,6 @@ namespace {
     /// found.
     BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
-    /// executesAtLeastOnce - Test whether entry to the loop is protected by
-    /// a conditional between LHS and RHS.
-    bool executesAtLeastOnce(const Loop *L, bool isSigned, SCEV *LHS, SCEV *RHS);
-
     /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
     /// in the header of its containing loop, we know the loop executes a
     /// constant number of times, and the PHI node is just a recurrence
@@ -2726,9 +2727,10 @@ ScalarEvolutionsImpl::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   return 0;
 }
 
-/// executesAtLeastOnce - Test whether entry to the loop is protected by
+/// isLoopGuardedByCond - Test whether entry to the loop is protected by
 /// a conditional between LHS and RHS.
-bool ScalarEvolutionsImpl::executesAtLeastOnce(const Loop *L, bool isSigned,
+bool ScalarEvolutionsImpl::isLoopGuardedByCond(const Loop *L,
+                                               ICmpInst::Predicate Pred,
                                                SCEV *LHS, SCEV *RHS) {
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *PreheaderDest = L->getHeader();
@@ -2759,26 +2761,62 @@ bool ScalarEvolutionsImpl::executesAtLeastOnce(const Loop *L, bool isSigned,
     else
       Cond = ICI->getInversePredicate();
 
-    switch (Cond) {
-    case ICmpInst::ICMP_UGT:
-      if (isSigned) continue;
-      std::swap(PreCondLHS, PreCondRHS);
-      Cond = ICmpInst::ICMP_ULT;
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (!isSigned) continue;
-      std::swap(PreCondLHS, PreCondRHS);
-      Cond = ICmpInst::ICMP_SLT;
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (isSigned) continue;
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (!isSigned) continue;
-      break;
-    default:
-      continue;
-    }
+    if (Cond == Pred)
+      ; // An exact match.
+    else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+      ; // The actual condition is beyond sufficient.
+    else
+      // Check a few special cases.
+      switch (Cond) {
+      case ICmpInst::ICMP_UGT:
+        if (Pred == ICmpInst::ICMP_ULT) {
+          std::swap(PreCondLHS, PreCondRHS);
+          Cond = ICmpInst::ICMP_ULT;
+          break;
+        }
+        continue;
+      case ICmpInst::ICMP_SGT:
+        if (Pred == ICmpInst::ICMP_SLT) {
+          std::swap(PreCondLHS, PreCondRHS);
+          Cond = ICmpInst::ICMP_SLT;
+          break;
+        }
+        continue;
+      case ICmpInst::ICMP_NE:
+        // Expressions like (x >u 0) are often canonicalized to (x != 0),
+        // so check for this case by checking if the NE is comparing against
+        // a minimum or maximum constant.
+        if (!ICmpInst::isTrueWhenEqual(Pred))
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+            const APInt &A = CI->getValue();
+            switch (Pred) {
+            case ICmpInst::ICMP_SLT:
+              if (A.isMaxSignedValue()) break;
+              continue;
+            case ICmpInst::ICMP_SGT:
+              if (A.isMinSignedValue()) break;
+              continue;
+            case ICmpInst::ICMP_ULT:
+              if (A.isMaxValue()) break;
+              continue;
+            case ICmpInst::ICMP_UGT:
+              if (A.isMinValue()) break;
+              continue;
+            default:
+              continue;
+            }
+            Cond = ICmpInst::ICMP_NE;
+            // NE is symmetric but the original comparison may not be. Swap
+            // the operands if necessary so that they match below.
+            if (isa<SCEVConstant>(LHS))
+              std::swap(PreCondLHS, PreCondRHS);
+            break;
+          }
+        continue;
+      default:
+        // We weren't able to reconcile the condition.
+        continue;
+      }
 
     if (!PreCondLHS->getType()->isInteger()) continue;
 
@@ -2819,7 +2857,8 @@ HowManyLessThans(SCEV *LHS, SCEV *RHS, const Loop *L, bool isSigned) {
     // First, we get the value of the LHS in the first iteration: n
     SCEVHandle Start = AddRec->getOperand(0);
 
-    if (executesAtLeastOnce(L, isSigned,
+    if (isLoopGuardedByCond(L,
+                            isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
                             SE.getMinusSCEV(AddRec->getOperand(0), One), RHS)) {
       // Since we know that the condition is true in order to enter the loop,
       // we know that it will run exactly m-n times.
@@ -2997,6 +3036,13 @@ void ScalarEvolution::setSCEV(Value *V, const SCEVHandle &H) {
 }
 
 
+bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          SCEV *LHS, SCEV *RHS) {
+  return ((ScalarEvolutionsImpl*)Impl)->isLoopGuardedByCond(L, Pred,
+                                                            LHS, RHS);
+}
+
 SCEVHandle ScalarEvolution::getIterationCount(const Loop *L) const {
   return ((ScalarEvolutionsImpl*)Impl)->getIterationCount(L);
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index fabbf6e19e5..ccd25d8fb50 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -89,13 +90,14 @@ namespace {
 
     void EliminatePointerRecurrence(PHINode *PN, BasicBlock *Preheader,
                                     SmallPtrSet<Instruction*, 16> &DeadInsts);
-    Instruction *LinearFunctionTestReplace(Loop *L, SCEV *IterationCount,
-                                           SCEVExpander &RW);
+    void LinearFunctionTestReplace(Loop *L, SCEVHandle IterationCount, Value *IndVar,
+                                   BasicBlock *ExitingBlock,
+                                   BranchInst *BI,
+                                   SCEVExpander &Rewriter);
     void RewriteLoopExitValues(Loop *L, SCEV *IterationCount);
 
     void DeleteTriviallyDeadInstructions(SmallPtrSet<Instruction*, 16> &Insts);
 
-    void OptimizeCanonicalIVType(Loop *L);
     void HandleFloatingPointIV(Loop *L, PHINode *PH, 
                                SmallPtrSet<Instruction*, 16> &DeadInsts);
   };
@@ -225,68 +227,54 @@ void IndVarSimplify::EliminatePointerRecurrence(PHINode *PN,
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
 /// is actually a much broader range than just linear tests.
-///
-/// This method returns a "potentially dead" instruction whose computation chain
-/// should be deleted when convenient.
-Instruction *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
-                                                       SCEV *IterationCount,
-                                                       SCEVExpander &RW) {
-  // Find the exit block for the loop.  We can currently only handle loops with
-  // a single exit.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1) return 0;
-  BasicBlock *ExitBlock = ExitBlocks[0];
-
-  // Make sure there is only one predecessor block in the loop.
-  BasicBlock *ExitingBlock = 0;
-  for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
-       PI != PE; ++PI)
-    if (L->contains(*PI)) {
-      if (ExitingBlock == 0)
-        ExitingBlock = *PI;
-      else
-        return 0;  // Multiple exits from loop to this block.
-    }
-  assert(ExitingBlock && "Loop info is broken");
-
-  if (!isa<BranchInst>(ExitingBlock->getTerminator()))
-    return 0;  // Can't rewrite non-branch yet
-  BranchInst *BI = cast<BranchInst>(ExitingBlock->getTerminator());
-  assert(BI->isConditional() && "Must be conditional to be part of loop!");
-
-  Instruction *PotentiallyDeadInst = dyn_cast<Instruction>(BI->getCondition());
-  
+void IndVarSimplify::LinearFunctionTestReplace(Loop *L,
+                                   SCEVHandle IterationCount,
+                                   Value *IndVar,
+                                   BasicBlock *ExitingBlock,
+                                   BranchInst *BI,
+                                   SCEVExpander &Rewriter) {
   // If the exiting block is not the same as the backedge block, we must compare
   // against the preincremented value, otherwise we prefer to compare against
   // the post-incremented value.
-  BasicBlock *Header = L->getHeader();
-  pred_iterator HPI = pred_begin(Header);
-  assert(HPI != pred_end(Header) && "Loop with zero preds???");
-  if (!L->contains(*HPI)) ++HPI;
-  assert(HPI != pred_end(Header) && L->contains(*HPI) &&
-         "No backedge in loop?");
+  Value *CmpIndVar;
+  if (ExitingBlock == L->getLoopLatch()) {
+    // What ScalarEvolution calls the "iteration count" is actually the
+    // number of times the branch is taken. Add one to get the number
+    // of times the branch is executed. If this addition may overflow,
+    // we have to be more pessimistic and cast the induction variable
+    // before doing the add.
+    SCEVHandle Zero = SE->getIntegerSCEV(0, IterationCount->getType());
+    SCEVHandle N =
+      SE->getAddExpr(IterationCount,
+                     SE->getIntegerSCEV(1, IterationCount->getType()));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      IterationCount = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      IterationCount = SE->getTruncateOrZeroExtend(IterationCount,
+                                                   IndVar->getType());
+      IterationCount =
+        SE->getAddExpr(IterationCount,
+                       SE->getIntegerSCEV(1, IndVar->getType()));
+    }
 
-  SCEVHandle TripCount = IterationCount;
-  Value *IndVar;
-  if (*HPI == ExitingBlock) {
     // The IterationCount expression contains the number of times that the
     // backedge actually branches to the loop header.  This is one less than the
     // number of times the loop executes, so add one to it.
-    ConstantInt *OneC = ConstantInt::get(IterationCount->getType(), 1);
-    TripCount = SE->getAddExpr(IterationCount, SE->getConstant(OneC));
-    IndVar = L->getCanonicalInductionVariableIncrement();
+    CmpIndVar = L->getCanonicalInductionVariableIncrement();
   } else {
     // We have to use the preincremented value...
-    IndVar = L->getCanonicalInductionVariable();
+    IterationCount = SE->getTruncateOrZeroExtend(IterationCount,
+                                                 IndVar->getType());
+    CmpIndVar = IndVar;
   }
-  
-  DOUT << "INDVARS: LFTR: TripCount = " << *TripCount
-       << "  IndVar = " << *IndVar << "\n";
 
   // Expand the code for the iteration count into the preheader of the loop.
   BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = RW.expandCodeFor(TripCount, Preheader->getTerminator());
+  Value *ExitCnt = Rewriter.expandCodeFor(IterationCount,
+                                          Preheader->getTerminator());
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   ICmpInst::Predicate Opcode;
@@ -295,14 +283,18 @@ Instruction *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
   else
     Opcode = ICmpInst::ICMP_EQ;
 
-  Value *Cond = new ICmpInst(Opcode, IndVar, ExitCnt, "exitcond", BI);
+  DOUT << "INDVARS: Rewriting loop exit condition to:\n"
+       << "      LHS:" << *CmpIndVar // includes a newline
+       << "       op:\t"
+       << (Opcode == ICmpInst::ICMP_NE ? "!=" : "=") << "\n"
+       << "      RHS:\t" << *IterationCount << "\n";
+
+  Value *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
   BI->setCondition(Cond);
   ++NumLFTR;
   Changed = true;
-  return PotentiallyDeadInst;
 }
 
-
 /// RewriteLoopExitValues - Check to see if this loop has a computable
 /// loop-invariant execution count.  If so, this means that we can compute the
 /// final value of any expressions that are recurrent in the loop, and
@@ -444,15 +436,100 @@ bool IndVarSimplify::doInitialization(Loop *L, LPPassManager &LPM) {
   return Changed;
 }
 
-bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// getEffectiveIndvarType - Determine the widest type that the
+/// induction-variable PHINode Phi is cast to.
+///
+static const Type *getEffectiveIndvarType(const PHINode *Phi) {
+  const Type *Ty = Phi->getType();
 
+  for (Value::use_const_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+       UI != UE; ++UI) {
+    const Type *CandidateType = NULL;
+    if (const ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
+      CandidateType = ZI->getDestTy();
+    else if (const SExtInst *SI = dyn_cast<SExtInst>(UI))
+      CandidateType = SI->getDestTy();
+    if (CandidateType &&
+        CandidateType->getPrimitiveSizeInBits() >
+          Ty->getPrimitiveSizeInBits())
+      Ty = CandidateType;
+  }
+
+  return Ty;
+}
+
+/// isOrigIVAlwaysNonNegative - Analyze the original induction variable
+/// in the loop to determine whether it would ever have a negative
+/// value.
+///
+/// TODO: This duplicates a fair amount of ScalarEvolution logic.
+/// Perhaps this can be merged with ScalarEvolution::getIterationCount.
+///
+static bool isOrigIVAlwaysNonNegative(const Loop *L,
+                                      const Instruction *OrigCond) {
+  // Verify that the loop is sane and find the exit condition.
+  const ICmpInst *Cmp = dyn_cast<ICmpInst>(OrigCond);
+  if (!Cmp) return false;
+
+  // For now, analyze only SLT loops for signed overflow.
+  if (Cmp->getPredicate() != ICmpInst::ICMP_SLT) return false;
+
+  // Get the increment instruction. Look past SExtInsts if we will
+  // be able to prove that the original induction variable doesn't
+  // undergo signed overflow.
+  const Value *OrigIncrVal = Cmp->getOperand(0);
+  const Value *IncrVal = OrigIncrVal;
+  if (SExtInst *SI = dyn_cast<SExtInst>(Cmp->getOperand(0))) {
+    if (!isa<ConstantInt>(Cmp->getOperand(1)) ||
+        !cast<ConstantInt>(Cmp->getOperand(1))->getValue()
+          .isSignedIntN(IncrVal->getType()->getPrimitiveSizeInBits()))
+      return false;
+    IncrVal = SI->getOperand(0);
+  }
+
+  // For now, only analyze induction variables that have simple increments.
+  const BinaryOperator *IncrOp = dyn_cast<BinaryOperator>(IncrVal);
+  if (!IncrOp ||
+      IncrOp->getOpcode() != Instruction::Add ||
+      !isa<ConstantInt>(IncrOp->getOperand(1)) ||
+      !cast<ConstantInt>(IncrOp->getOperand(1))->equalsInt(1))
+    return false;
+
+  // Make sure the PHI looks like a normal IV.
+  const PHINode *PN = dyn_cast<PHINode>(IncrOp->getOperand(0));
+  if (!PN || PN->getNumIncomingValues() != 2)
+    return false;
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge = !IncomingEdge;
+  if (!L->contains(PN->getIncomingBlock(BackEdge)) ||
+      PN->getIncomingValue(BackEdge) != IncrOp)
+    return false;
+
+  // For now, only analyze loops with a constant start value, so that
+  // we can easily determine if the start value is non-negative and
+  // not a maximum value which would wrap on the first iteration.
+  const Value *InitialVal = PN->getIncomingValue(IncomingEdge);
+  if (!isa<ConstantInt>(InitialVal) ||
+      cast<ConstantInt>(InitialVal)->getValue().isNegative() ||
+      cast<ConstantInt>(InitialVal)->getValue().isMaxSignedValue())
+    return false;
+
+  // The original induction variable will start at some non-negative
+  // non-max value, it counts up by one, and the loop iterates only
+  // while it remans less than (signed) some value in the same type.
+  // As such, it will always be non-negative.
+  return true;
+}
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
 
   Changed = false;
-  BasicBlock *Header    = L->getHeader();
+  BasicBlock *Header       = L->getHeader();
+  BasicBlock *ExitingBlock = L->getExitingBlock();
   SmallPtrSet<Instruction*, 16> DeadInsts;
-  
+
   // Verify the input to the pass in already in LCSSA form.
   assert(L->isLCSSAForm());
 
@@ -486,35 +563,23 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     }
   }
 
-  // If there are no induction variables in the loop, there is nothing more to
-  // do.
-  if (IndVars.empty()) {
-    // Actually, if we know how many times the loop iterates, lets insert a
-    // canonical induction variable to help subsequent passes.
-    if (!isa<SCEVCouldNotCompute>(IterationCount)) {
-      SCEVExpander Rewriter(*SE, *LI);
-      Rewriter.getOrInsertCanonicalInductionVariable(L,
-                                                     IterationCount->getType());
-      if (Instruction *I = LinearFunctionTestReplace(L, IterationCount,
-                                                     Rewriter)) {
-        SmallPtrSet<Instruction*, 16> InstructionsToDelete;
-        InstructionsToDelete.insert(I);
-        DeleteTriviallyDeadInstructions(InstructionsToDelete);
-      }
-    }
-    return Changed;
+  // Compute the type of the largest recurrence expression, and collect
+  // the set of the types of the other recurrence expressions.
+  const Type *LargestType = 0;
+  SmallSetVector<const Type *, 4> SizesToInsert;
+  if (!isa<SCEVCouldNotCompute>(IterationCount)) {
+    LargestType = IterationCount->getType();
+    SizesToInsert.insert(IterationCount->getType());
   }
-
-  // Compute the type of the largest recurrence expression.
-  //
-  const Type *LargestType = IndVars[0].first->getType();
-  bool DifferingSizes = false;
-  for (unsigned i = 1, e = IndVars.size(); i != e; ++i) {
-    const Type *Ty = IndVars[i].first->getType();
-    DifferingSizes |= 
-      Ty->getPrimitiveSizeInBits() != LargestType->getPrimitiveSizeInBits();
-    if (Ty->getPrimitiveSizeInBits() > LargestType->getPrimitiveSizeInBits())
-      LargestType = Ty;
+  for (unsigned i = 0, e = IndVars.size(); i != e; ++i) {
+    const PHINode *PN = IndVars[i].first;
+    SizesToInsert.insert(PN->getType());
+    const Type *EffTy = getEffectiveIndvarType(PN);
+    SizesToInsert.insert(EffTy);
+    if (!LargestType ||
+        EffTy->getPrimitiveSizeInBits() >
+          LargestType->getPrimitiveSizeInBits())
+      LargestType = EffTy;
   }
 
   // Create a rewriter object which we'll use to transform the code with.
@@ -522,17 +587,32 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Now that we know the largest of of the induction variables in this loop,
   // insert a canonical induction variable of the largest size.
-  Value *IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
-  ++NumInserted;
-  Changed = true;
-  DOUT << "INDVARS: New CanIV: " << *IndVar;
-
-  if (!isa<SCEVCouldNotCompute>(IterationCount)) {
-    IterationCount = SE->getTruncateOrZeroExtend(IterationCount, LargestType);
-    if (Instruction *DI = LinearFunctionTestReplace(L, IterationCount,Rewriter))
-      DeadInsts.insert(DI);
+  Value *IndVar = 0;
+  if (!SizesToInsert.empty()) {
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+    ++NumInserted;
+    Changed = true;
+    DOUT << "INDVARS: New CanIV: " << *IndVar;
   }
 
+  // If we have a trip count expression, rewrite the loop's exit condition
+  // using it.  We can currently only handle loops with a single exit.
+  bool OrigIVAlwaysNonNegative = false;
+  if (!isa<SCEVCouldNotCompute>(IterationCount) && ExitingBlock)
+    // Can't rewrite non-branch yet.
+    if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) {
+      if (Instruction *OrigCond = dyn_cast<Instruction>(BI->getCondition())) {
+        // Determine if the OrigIV will ever have a non-zero sign bit.
+        OrigIVAlwaysNonNegative = isOrigIVAlwaysNonNegative(L, OrigCond);
+
+        // We'll be replacing the original condition, so it'll be dead.
+        DeadInsts.insert(OrigCond);
+      }
+
+      LinearFunctionTestReplace(L, IterationCount, IndVar,
+                                ExitingBlock, BI, Rewriter);
+    }
+
   // Now that we have a canonical induction variable, we can rewrite any
   // recurrences in terms of the induction variable.  Start with the auxillary
   // induction variables, and recursively rewrite any of their uses.
@@ -541,21 +621,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // If there were induction variables of other sizes, cast the primary
   // induction variable to the right size for them, avoiding the need for the
   // code evaluation methods to insert induction variables of different sizes.
-  if (DifferingSizes) {
-    SmallVector<unsigned,4> InsertedSizes;
-    InsertedSizes.push_back(LargestType->getPrimitiveSizeInBits());
-    for (unsigned i = 0, e = IndVars.size(); i != e; ++i) {
-      unsigned ithSize = IndVars[i].first->getType()->getPrimitiveSizeInBits();
-      if (std::find(InsertedSizes.begin(), InsertedSizes.end(), ithSize)
-          == InsertedSizes.end()) {
-        PHINode *PN = IndVars[i].first;
-        InsertedSizes.push_back(ithSize);
-        Instruction *New = new TruncInst(IndVar, PN->getType(), "indvar",
-                                         InsertPt);
-        Rewriter.addInsertedValue(New, SE->getSCEV(New));
-        DOUT << "INDVARS: Made trunc IV for " << *PN
-             << "   NewVal = " << *New << "\n";
-      }
+  for (unsigned i = 0, e = SizesToInsert.size(); i != e; ++i) {
+    const Type *Ty = SizesToInsert[i];
+    if (Ty != LargestType) {
+      Instruction *New = new TruncInst(IndVar, Ty, "indvar", InsertPt);
+      Rewriter.addInsertedValue(New, SE->getSCEV(New));
+      DOUT << "INDVARS: Made trunc IV for type " << *Ty << ": "
+           << *New << "\n";
     }
   }
 
@@ -568,6 +640,23 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
          << "   into = " << *NewVal << "\n";
     NewVal->takeName(PN);
 
+    /// If the new canonical induction variable is wider than the original,
+    /// and the original has uses that are casts to wider types, see if the
+    /// truncate and extend can be omitted.
+    if (isa<TruncInst>(NewVal))
+      for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+           UI != UE; ++UI)
+        if (isa<ZExtInst>(UI) ||
+            (isa<SExtInst>(UI) && OrigIVAlwaysNonNegative)) {
+          Value *TruncIndVar = IndVar;
+          if (TruncIndVar->getType() != UI->getType())
+            TruncIndVar = new TruncInst(IndVar, UI->getType(), "truncindvar",
+                                        InsertPt);
+          UI->replaceAllUsesWith(TruncIndVar);
+          if (Instruction *DeadUse = dyn_cast<Instruction>(*UI))
+            DeadInsts.insert(DeadUse);
+        }
+
     // Replace the old PHI Node with the inserted computation.
     PN->replaceAllUsesWith(NewVal);
     DeadInsts.insert(PN);
@@ -603,125 +692,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 #endif
 
   DeleteTriviallyDeadInstructions(DeadInsts);
-  OptimizeCanonicalIVType(L);
   assert(L->isLCSSAForm());
   return Changed;
 }
 
-/// OptimizeCanonicalIVType - If loop induction variable is always
-/// sign or zero extended then extend the type of the induction 
-/// variable.
-void IndVarSimplify::OptimizeCanonicalIVType(Loop *L) {
-  PHINode *PH = L->getCanonicalInductionVariable();
-  if (!PH) return;
-  
-  // Check loop iteration count.
-  SCEVHandle IC = SE->getIterationCount(L);
-  if (isa<SCEVCouldNotCompute>(IC)) return;
-  SCEVConstant *IterationCount = dyn_cast<SCEVConstant>(IC);
-  if (!IterationCount) return;
-
-  unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
-  unsigned BackEdge     = IncomingEdge^1;
-  
-  // Check IV uses. If all IV uses are either SEXT or ZEXT (except
-  // IV increment instruction) then this IV is suitable for this
-  // transformation.
-  bool isSEXT = false;
-  BinaryOperator *Incr = NULL;
-  const Type *NewType = NULL;
-  for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end(); 
-      UI != UE; ++UI) {
-    const Type *CandidateType = NULL;
-    if (ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
-      CandidateType = ZI->getDestTy();
-    else if (SExtInst *SI = dyn_cast<SExtInst>(UI)) {
-      CandidateType = SI->getDestTy();
-      isSEXT = true;
-    }
-    else if ((Incr = dyn_cast<BinaryOperator>(UI))) {
-      // Validate IV increment instruction.
-      if (PH->getIncomingValue(BackEdge) == Incr)
-        continue;
-    }
-    if (!CandidateType) {
-      NewType = NULL;
-      break;
-    }
-    if (!NewType)
-      NewType = CandidateType;
-    else if (NewType != CandidateType) {
-      NewType = NULL;
-      break;
-    }
-  }
-
-  // IV uses are not suitable then avoid this transformation.
-  if (!NewType || !Incr)
-    return;
-
-  // IV increment instruction has two uses, one is loop exit condition
-  // and second is the IV (phi node) itself.
-  ICmpInst *Exit = NULL;
-  for(Value::use_iterator II = Incr->use_begin(), IE = Incr->use_end();
-      II != IE; ++II) {
-    if (PH == *II)  continue;
-    Exit = dyn_cast<ICmpInst>(*II);
-    break;
-  }
-  if (!Exit) return;
-  ConstantInt *EV = dyn_cast<ConstantInt>(Exit->getOperand(0));
-  if (!EV) 
-    EV = dyn_cast<ConstantInt>(Exit->getOperand(1));
-  if (!EV) return;
-
-  // Check iteration count max value to avoid loops that wrap around IV.
-  APInt ICount = IterationCount->getValue()->getValue();
-  if (ICount.isNegative()) return;
-  uint32_t BW = PH->getType()->getPrimitiveSizeInBits();
-  APInt Max = (isSEXT ? APInt::getSignedMaxValue(BW) : APInt::getMaxValue(BW));
-  if (ICount.getZExtValue() > Max.getZExtValue())  return;                         
-
-  // Extend IV type.
-
-  SCEVExpander Rewriter(*SE, *LI);
-  Value *NewIV = Rewriter.getOrInsertCanonicalInductionVariable(L,NewType);
-  PHINode *NewPH = cast<PHINode>(NewIV);
-  Instruction *NewIncr = cast<Instruction>(NewPH->getIncomingValue(BackEdge));
-
-  // Replace all SEXT or ZEXT uses.
-  SmallVector<Instruction *, 4> PHUses;
-  for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end(); 
-      UI != UE; ++UI) {
-      Instruction *I = cast<Instruction>(UI);
-      PHUses.push_back(I);
-  }
-  while (!PHUses.empty()){
-    Instruction *Use = PHUses.back(); PHUses.pop_back();
-    if (Incr == Use) continue;
-    
-    SE->deleteValueFromRecords(Use);
-    Use->replaceAllUsesWith(NewIV);
-    Use->eraseFromParent();
-  }
-
-  // Replace exit condition.
-  ConstantInt *NEV = ConstantInt::get(NewType, EV->getZExtValue());
-  Instruction *NE = new ICmpInst(Exit->getPredicate(),
-                                 NewIncr, NEV, "new.exit", 
-                                 Exit->getParent()->getTerminator());
-  SE->deleteValueFromRecords(Exit);
-  Exit->replaceAllUsesWith(NE);
-  Exit->eraseFromParent();
-  
-  // Remove old IV and increment instructions.
-  SE->deleteValueFromRecords(PH);
-  PH->removeIncomingValue((unsigned)0);
-  PH->removeIncomingValue((unsigned)0);
-  SE->deleteValueFromRecords(Incr);
-  Incr->eraseFromParent();
-}
-
 /// Return true if it is OK to use SIToFPInst for an inducation variable
 /// with given inital and exit values.
 static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
diff --git a/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll b/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll
new file mode 100644
index 00000000000..703fce4e292
--- /dev/null
+++ b/test/Transforms/IndVarsSimplify/promote-iv-to-eliminate-casts.ll
@@ -0,0 +1,62 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep sext
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i64 @test(i64* nocapture %first, i32 %count) nounwind readonly {
+entry:
+	%t0 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%result.02 = phi i64 [ %t5, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=1]
+	%n.01 = phi i32 [ %t6, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%t1 = sext i32 %n.01 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %first, i64 %t1		; <i64*> [#uses=1]
+	%t3 = load i64* %t2, align 8		; <i64> [#uses=1]
+	%t4 = lshr i64 %t3, 4		; <i64> [#uses=1]
+	%t5 = add i64 %t4, %result.02		; <i64> [#uses=2]
+	%t6 = add i32 %n.01, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%t7 = icmp slt i32 %t6, %count		; <i1> [#uses=1]
+	br i1 %t7, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	%.lcssa = phi i64 [ %t5, %bb1 ]		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%result.0.lcssa = phi i64 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i64> [#uses=1]
+	ret i64 %result.0.lcssa
+}
+
+define void @foo(i16 signext %N, i32* nocapture %P) nounwind {
+entry:
+	%t0 = icmp sgt i16 %N, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.01 = phi i16 [ %t3, %bb1 ], [ 0, %bb.nph ]		; <i16> [#uses=2]
+	%t1 = sext i16 %i.01 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr i32* %P, i64 %t1		; <i32*> [#uses=1]
+	store i32 123, i32* %t2, align 4
+	%t3 = add i16 %i.01, 1		; <i16> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%t4 = icmp slt i16 %t3, %N		; <i1> [#uses=1]
+	br i1 %t4, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}