diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 6262d916b8e..2977afa1792 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1154,70 +1154,81 @@ Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI,
     // We win, no conversion needed.
     return NV;
   } 
+
+  // If the result type of the 'union' is a pointer, then this must be ptr->ptr
+  // cast.  Anything else would result in NV being an integer.
+  if (isa<PointerType>(NV->getType())) {
+    assert(isa<PointerType>(LI->getType()));
+    return new BitCastInst(NV, LI->getType(), LI->getName(), LI);
+  }
   
-  if (const VectorType *PTy = dyn_cast<VectorType>(NV->getType())) {
+  if (const VectorType *VTy = dyn_cast<VectorType>(NV->getType())) {
     // If the result alloca is a vector type, this is either an element
     // access or a bitcast to another vector type.
-    if (isa<VectorType>(LI->getType())) {
-      NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
-    } else {
-      // Must be an element access.
-      const TargetData &TD = getAnalysis<TargetData>();
-      unsigned Elt = Offset/TD.getABITypeSizeInBits(PTy->getElementType());
-      NV = new ExtractElementInst(NV, ConstantInt::get(Type::Int32Ty, Elt),
-                                  "tmp", LI);
+    if (isa<VectorType>(LI->getType()))
+      return new BitCastInst(NV, LI->getType(), LI->getName(), LI);
+
+    // Otherwise it must be an element access.
+    const TargetData &TD = getAnalysis<TargetData>();
+    unsigned Elt = 0;
+    if (Offset) {
+      unsigned EltSize = TD.getABITypeSizeInBits(VTy->getElementType());
+      Elt = Offset/EltSize;
+      Offset -= EltSize*Elt;
     }
-  } else if (isa<PointerType>(NV->getType())) {
-    assert(isa<PointerType>(LI->getType()));
-    // Must be ptr->ptr cast.  Anything else would result in NV being
-    // an integer.
+    NV = new ExtractElementInst(NV, ConstantInt::get(Type::Int32Ty, Elt),
+                                "tmp", LI);
+    
+    // If we're done, return this element.
+    if (NV->getType() == LI->getType() && Offset == 0)
+      return NV;
+  }
+  
+  const IntegerType *NTy = cast<IntegerType>(NV->getType());
+  
+  // If this is a big-endian system and the load is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  const TargetData &TD = getAnalysis<TargetData>();
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+    TD.getTypeStoreSizeInBits(LI->getType()) - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+  
+  // Note: we support negative bitwidths (with shl) which are not defined.
+  // We do this to support (f.e.) loads off the end of a structure where
+  // only some bits are used.
+  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+    NV = BinaryOperator::createLShr(NV, 
+                                    ConstantInt::get(NV->getType(),ShAmt),
+                                    LI->getName(), LI);
+  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+    NV = BinaryOperator::createShl(NV, 
+                                   ConstantInt::get(NV->getType(),-ShAmt),
+                                   LI->getName(), LI);
+  
+  // Finally, unconditionally truncate the integer to the right width.
+  unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType());
+  if (LIBitWidth < NTy->getBitWidth())
+    NV = new TruncInst(NV, IntegerType::get(LIBitWidth),
+                       LI->getName(), LI);
+  
+  // If the result is an integer, this is a trunc or bitcast.
+  if (isa<IntegerType>(LI->getType())) {
+    // Should be done.
+  } else if (LI->getType()->isFloatingPoint()) {
+    // Just do a bitcast, we know the sizes match up.
     NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
   } else {
-    const IntegerType *NTy = cast<IntegerType>(NV->getType());
-    
-    // If this is a big-endian system and the load is narrower than the
-    // full alloca type, we need to do a shift to get the right bits.
-    int ShAmt = 0;
-    const TargetData &TD = getAnalysis<TargetData>();
-    if (TD.isBigEndian()) {
-      // On big-endian machines, the lowest bit is stored at the bit offset
-      // from the pointer given by getTypeStoreSizeInBits.  This matters for
-      // integers with a bitwidth that is not a multiple of 8.
-      ShAmt = TD.getTypeStoreSizeInBits(NTy) -
-      TD.getTypeStoreSizeInBits(LI->getType()) - Offset;
-    } else {
-      ShAmt = Offset;
-    }
-    
-    // Note: we support negative bitwidths (with shl) which are not defined.
-    // We do this to support (f.e.) loads off the end of a structure where
-    // only some bits are used.
-    if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
-      NV = BinaryOperator::createLShr(NV, 
-                                      ConstantInt::get(NV->getType(),ShAmt),
-                                      LI->getName(), LI);
-    else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
-      NV = BinaryOperator::createShl(NV, 
-                                     ConstantInt::get(NV->getType(),-ShAmt),
-                                     LI->getName(), LI);
-    
-    // Finally, unconditionally truncate the integer to the right width.
-    unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType());
-    if (LIBitWidth < NTy->getBitWidth())
-      NV = new TruncInst(NV, IntegerType::get(LIBitWidth),
-                         LI->getName(), LI);
-    
-    // If the result is an integer, this is a trunc or bitcast.
-    if (isa<IntegerType>(LI->getType())) {
-      assert(NV->getType() == LI->getType() && "Truncate wasn't enough?");
-    } else if (LI->getType()->isFloatingPoint()) {
-      // Just do a bitcast, we know the sizes match up.
-      NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
-    } else {
-      // Otherwise must be a pointer.
-      NV = new IntToPtrInst(NV, LI->getType(), LI->getName(), LI);
-    }
+    // Otherwise must be a pointer.
+    NV = new IntToPtrInst(NV, LI->getType(), LI->getName(), LI);
   }
+  assert(NV->getType() == LI->getType() && "Didn't convert right?");
   return NV;
 }
 
diff --git a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
new file mode 100644
index 00000000000..9ec5fa38122
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+	%struct..0anon = type { <1 x i64> }
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%c = alloca %struct..0anon		; <%struct..0anon*> [#uses=2]
+	%tmp2 = getelementptr %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
+	store <1 x i64> zeroinitializer, <1 x i64>* %tmp2, align 8
+	%tmp7 = getelementptr %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
+	%tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]*		; <[2 x i32]*> [#uses=1]
+	%tmp9 = getelementptr [2 x i32]* %tmp78, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp10 = load i32* %tmp9, align 4		; <i32> [#uses=0]
+	unreachable
+}