diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 00d9ba071ef..ed3f770afec 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -156,6 +156,8 @@ public:
   // Node replacement helpers
   void ReplacedNode(SDNode *N) {
     LegalizedNodes.erase(N);
+    if (UpdatedNodes)
+      UpdatedNodes->insert(N);
   }
   void ReplaceNode(SDNode *Old, SDNode *New) {
     DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5dd542842e4..f63beab44fd 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19787,7 +19787,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
 
   EVT VT = InVec.getValueType();
 
-  bool HasShuffleIntoBitcast = false;
   if (InVec.getOpcode() == ISD::BITCAST) {
     // Don't duplicate a load with other uses.
     if (!InVec.hasOneUse())
@@ -19796,7 +19795,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
     if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
       return SDValue();
     InVec = InVec.getOperand(0);
-    HasShuffleIntoBitcast = true;
   }
 
   if (!isTargetShuffle(InVec.getOpcode()))
@@ -19839,17 +19837,16 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
   if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
     return SDValue();
 
-  if (HasShuffleIntoBitcast) {
-    // If there's a bitcast before the shuffle, check if the load type and
-    // alignment is valid.
-    unsigned Align = LN0->getAlignment();
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    unsigned NewAlign = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+  EVT EltVT = N->getValueType(0);
+  // If there's a bitcast before the shuffle, check if the load type and
+  // alignment is valid.
+  unsigned Align = LN0->getAlignment();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+      EltVT.getTypeForEVT(*DAG.getContext()));
 
-    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
-      return SDValue();
-  }
+  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
+    return SDValue();
 
   // All checks match so transform back to vector_shuffle so that DAG combiner
   // can finish the job
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index cadc0fb723f..3e31b4b190b 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | FileCheck %s
 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | FileCheck %s
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 define i32 @t(<2 x i64>* %val) nounwind  {
 ; CHECK-LABEL: t:
 ; CHECK-NOT: movd
@@ -23,3 +25,22 @@ undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3>
   %y = extractelement <8 x i32> %Shuff68, i32 0
   ret i32 %y
 }
+
+; This case could easily end up inf-looping in the DAG combiner due to an
+; low alignment load of the vector which prevents us from reliably forming a
+; narrow load.
+; FIXME: It would be nice to detect whether the target has fast and legal
+; unaligned loads and use them here.
+define void @t3() {
+; CHECK-LABEL: t3:
+;
+; This movs the entire vector, shuffling the high double down. If we fixed the
+; FIXME above it would just move the high double directly.
+; CHECK: movhpd %xmm
+
+bb:
+  %tmp13 = load <2 x double>* undef, align 1
+  %.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1
+  store double %.sroa.3.24.vec.extract, double* undef, align 8
+  unreachable
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 7868081553c..6f49a03cb8b 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1,11 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3
 
-; XFAIL: *
-; Only @stress_test0 is expected to fail, but XFAIL is not that selective. I
-; expect this to be unxfailed soon enough that we won't regress the other tests
-; in the interim.
-
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"