diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index baf39c270d0..2d0cd960394 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6268,31 +6268,27 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, if (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); - if (ISD::isNormalLoad(V.getNode())) { - // Is the original load suitable? - LoadSDNode *LN0 = cast(V); + if (!ISD::isNormalLoad(V.getNode())) + return false; - // FIXME: avoid the multi-use bug that is preventing lots of - // of foldings to be detected, this is still wrong of course, but - // give the temporary desired behavior, and if it happens that - // the load has real more uses, during isel it will not fold, and - // will generate poor code. - if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse() - return false; + // Is the original load suitable? + LoadSDNode *LN0 = cast(V); - if (!HasShuffleIntoBitcast) - return true; + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + return false; - // If there's a bitcast before the shuffle, check if the load type and - // alignment is valid. - unsigned Align = LN0->getAlignment(); - unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); + if (!HasShuffleIntoBitcast) + return true; - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) - return false; - } + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return false; return true; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d6ae3af0d4e..4becf99bfa2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1236,10 +1236,10 @@ let Predicates = [HasAVX] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst), (VMOVHPSmr addr:$dst, VR128:$src)>; def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; } @@ -1259,7 +1259,7 @@ let Predicates = [HasSSE1] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst), (MOVHPSmr addr:$dst, VR128:$src)>; } @@ -1279,7 +1279,7 @@ let Predicates = [HasSSE2] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))),addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; }