diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 477d88e2905..e1f8fa421f5 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 /// InsertCastOfTo - Insert a cast of V to the specified type, doing what
@@ -442,6 +443,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
   const Loop *L = S->getLoop();
 
+  // First check for an existing canonical IV in a suitable type.
+  PHINode *CanonicalIV = 0;
+  if (PHINode *PN = L->getCanonicalInductionVariable())
+    if (SE.isSCEVable(PN->getType()) &&
+        isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) &&
+        SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+      CanonicalIV = PN;
+
+  // Rewrite an AddRec in terms of the canonical induction variable, if
+  // its type is more narrow.
+  if (CanonicalIV &&
+      SE.getTypeSizeInBits(CanonicalIV->getType()) >
+      SE.getTypeSizeInBits(Ty)) {
+    SCEVHandle Start = SE.getAnyExtendExpr(S->getStart(),
+                                           CanonicalIV->getType());
+    SCEVHandle Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
+                                          CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
+    BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+    BasicBlock::iterator NewInsertPt =
+      next(BasicBlock::iterator(cast<Instruction>(V)));
+    while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+                      NewInsertPt);
+    setInsertionPoint(SaveInsertPt);
+    return V;
+  }
+
   // {X,+,F} --> X + {0,+,F}
   if (!S->getStart()->isZero()) {
     std::vector<SCEVHandle> NewOps(S->getOperands());
@@ -475,6 +504,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // {0,+,1} --> Insert a canonical induction variable into the loop!
   if (S->isAffine() &&
       S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) {
+    // If there's a canonical IV, just use it.
+    if (CanonicalIV) {
+      assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+             "IVs with types different from the canonical IV should "
+             "already have been handled!");
+      return CanonicalIV;
+    }
+
     // Create and insert the PHI node for the induction variable in the
     // specified loop.
     BasicBlock *Header = L->getHeader();
@@ -502,18 +539,16 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     return PN;
   }
 
+  // {0,+,F} --> {0,+,1} * F
   // Get the canonical induction variable I for this loop.
-  Value *I = getOrInsertCanonicalInductionVariable(L, Ty);
+  Value *I = CanonicalIV ?
+             CanonicalIV :
+             getOrInsertCanonicalInductionVariable(L, Ty);
 
   // If this is a simple linear addrec, emit it now as a special case.
   if (S->isAffine()) {   // {0,+,F} --> i*F
     Value *F = expandCodeFor(S->getOperand(1), Ty);
-    
-    // If the step is by one, just return the inserted IV.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(F))
-      if (CI->getValue() == 1)
-        return I;
-    
+
     // If the insert point is directly inside of the loop, emit the multiply at
     // the insert point.  Otherwise, L is a loop that is a parent of the insert
     // point loop.  If we can, move the multiply to the outer most loop that it
@@ -548,9 +583,17 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   // into this folder.
   SCEVHandle IH = SE.getUnknown(I);   // Get I as a "symbolic" SCEV.
 
-  SCEVHandle V = S->evaluateAtIteration(IH, SE);
+  // Promote S up to the canonical IV type, if the cast is foldable.
+  SCEVHandle NewS = S;
+  SCEVHandle Ext = SE.getNoopOrAnyExtend(S, I->getType());
+  if (isa<SCEVAddRecExpr>(Ext))
+    NewS = Ext;
+
+  SCEVHandle V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
   //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
 
+  // Truncate the result down to the original type, if needed.
+  SCEVHandle T = SE.getTruncateOrNoop(V, Ty);
   return expand(V);
 }
 
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 83503fdea24..38b11985519 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -168,7 +168,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 
   // Expand the code for the iteration count into the preheader of the loop.
   BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(),
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
                                           Preheader->getTerminator());
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
@@ -392,10 +392,31 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
   if (NeedCannIV) {
+    // Check to see if the loop already has a canonical-looking induction
+    // variable. If one is present and it's wider than the planned canonical
+    // induction variable, temporarily remove it, so that the Rewriter
+    // doesn't attempt to reuse it.
+    PHINode *OldCannIV = L->getCanonicalInductionVariable();
+    if (OldCannIV) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        OldCannIV = 0;
+    }
+
     IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+
     ++NumInserted;
     Changed = true;
     DOUT << "INDVARS: New CanIV: " << *IndVar;
+
+    // Now that the official induction variable is established, reinsert
+    // the old canonical-looking variable after it so that the IR remains
+    // consistent. It will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    if (OldCannIV)
+      OldCannIV->insertAfter(cast<Instruction>(IndVar));
   }
 
   // If we have a trip count expression, rewrite the loop's exit condition
@@ -459,8 +480,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
          E = List.end(); UI != E; ++UI) {
       SCEVHandle Offset = UI->getOffset();
       Value *Op = UI->getOperandValToReplace();
+      const Type *UseTy = Op->getType();
       Instruction *User = UI->getUser();
-      bool isSigned = UI->isSigned();
 
       // Compute the final addrec to expand into code.
       SCEVHandle AR = IU->getReplacementExpr(*UI);
@@ -471,7 +492,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         // Expand loop-invariant values in the loop preheader. They will
         // be sunk to the exit block later, if possible.
         NewVal =
-          Rewriter.expandCodeFor(AR, LargestType,
+          Rewriter.expandCodeFor(AR, UseTy,
                                  L->getLoopPreheader()->getTerminator());
         Rewriter.setInsertionPoint(I);
         ++NumReplaced;
@@ -485,74 +506,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
         if (!Stride->isLoopInvariant(L))
           continue;
 
-        const Type *IVTy = Offset->getType();
-        const Type *UseTy = Op->getType();
-
-        // Promote the Offset and Stride up to the canonical induction
-        // variable's bit width.
-        SCEVHandle PromotedOffset = Offset;
-        SCEVHandle PromotedStride = Stride;
-        if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
-          // It doesn't matter for correctness whether zero or sign extension
-          // is used here, since the value is truncated away below, but if the
-          // value is signed, sign extension is more likely to be folded.
-          if (isSigned) {
-            PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
-            PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
-          } else {
-            PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
-            // If the stride is obviously negative, use sign extension to
-            // produce things like x-1 instead of x+255.
-            if (isa<SCEVConstant>(PromotedStride) &&
-                cast<SCEVConstant>(PromotedStride)
-                  ->getValue()->getValue().isNegative())
-              PromotedStride = SE->getSignExtendExpr(PromotedStride,
-                                                     LargestType);
-            else
-              PromotedStride = SE->getZeroExtendExpr(PromotedStride,
-                                                     LargestType);
-          }
-        }
-
-        // Create the SCEV representing the offset from the canonical
-        // induction variable, still in the canonical induction variable's
-        // type, so that all expanded arithmetic is done in the same type.
-        SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
-                                             PromotedStride, L);
-        // Add the PromotedOffset as a separate step, because it may not be
-        // loop-invariant.
-        NewAR = SE->getAddExpr(NewAR, PromotedOffset);
-
-        // Expand the addrec into instructions.
-        Value *V = Rewriter.expandCodeFor(NewAR);
-
-        // Insert an explicit cast if necessary to truncate the value
-        // down to the original stride type. This is done outside of
-        // SCEVExpander because in SCEV expressions, a truncate of an
-        // addrec is always folded.
-        if (LargestType != IVTy) {
-          if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
-            NewAR = SE->getTruncateExpr(NewAR, IVTy);
-          if (Rewriter.isInsertedExpression(NewAR))
-            V = Rewriter.expandCodeFor(NewAR);
-          else {
-            V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
-                                                                IVTy, false),
-                                        V, IVTy);
-            assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
-                   "LargestType wasn't actually the largest type!");
-            // Force the rewriter to use this trunc whenever this addrec
-            // appears so that it doesn't insert new phi nodes or
-            // arithmetic in a different type.
-            Rewriter.addInsertedValue(V, NewAR);
-          }
-        }
-
-        DOUT << "INDVARS: Made offset-and-trunc IV for offset "
-             << *IVTy << " " << *Offset << ": ";
-        DEBUG(WriteAsOperand(*DOUT, V, false));
-        DOUT << "\n";
-
         // Now expand it into actual Instructions and patch it into place.
         NewVal = Rewriter.expandCodeFor(AR, UseTy);
       }
diff --git a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
index d70e6049797..b4a2c506189 100644
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | opt -indvars  | llvm-dis | %prcontext Loop: 1 | grep %indvar
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | %prcontext ^Loop: 1 | grep %Canonical
 
 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
diff --git a/test/Transforms/IndVarSimplify/masked-iv.ll b/test/Transforms/IndVarSimplify/masked-iv.ll
new file mode 100644
index 00000000000..c7583c9655a
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/masked-iv.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep trunc | count 1
+
+; Indvars should do the IV arithmetic in the canonical IV type (i64),
+; and only use one truncation.
+
+define void @foo(i64* %A, i64* %B, i64 %n, i64 %a, i64 %s) nounwind {
+entry:
+	%t0 = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.01 = phi i64 [ %t6, %bb ], [ %a, %bb.preheader ]		; <i64> [#uses=3]
+	%t1 = and i64 %i.01, 255		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %A, i64 %t1		; <i64*> [#uses=1]
+	store i64 %i.01, i64* %t2, align 8
+	%t6 = add i64 %i.01, %s		; <i64> [#uses=1]
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}