diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 96a58c11ef7..a2e0036e38d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5897,7 +5897,31 @@ static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG, St->isVolatile(), St->getAlignment()); } - // TODO: 2 32-bit copies. + // Otherwise, lower to two 32-bit copies. + SDOperand LoAddr = Ld->getBasePtr(); + SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, + DAG.getConstant(MVT::i32, 4)); + + SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr, + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->getAlignment()); + SDOperand HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr, + Ld->getSrcValue(), Ld->getSrcValueOffset()+4, + Ld->isVolatile(), + MinAlign(Ld->getAlignment(), 4)); + + LoAddr = St->getBasePtr(); + HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr, + DAG.getConstant(MVT::i32, 4)); + + SDOperand LoSt = DAG.getStore(LoLd.getValue(1), LoLd, LoAddr, + St->getSrcValue(), St->getSrcValueOffset(), + St->isVolatile(), St->getAlignment()); + SDOperand HiSt = DAG.getStore(HiLd.getValue(1), HiLd, HiAddr, + St->getSrcValue(), St->getSrcValueOffset()+4, + St->isVolatile(), + MinAlign(St->getAlignment(), 4)); + return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt); } return SDOperand(); } diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll index 8cf36e05a83..da17a04a466 100644 --- a/test/CodeGen/X86/mmx-copy-gprs.ll +++ b/test/CodeGen/X86/mmx-copy-gprs.ll @@ -1,4 +1,5 @@ ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax} +; RUN: llvm-as < %s | llc -march=x86 | grep {movl.*4(%eax),} ; This test should use GPRs to copy the mmx value, not MMX regs. Using mmx regs, ; increases the places that need to use emms. @@ -6,9 +7,9 @@ ; rdar://5741668 target triple = "x86_64-apple-darwin8" -define i32 @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind { +define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind { entry: %tmp1 = load <1 x i64>* %y, align 8 ; <<1 x i64>> [#uses=1] store <1 x i64> %tmp1, <1 x i64>* %x, align 8 - ret i32 undef + ret void }