diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 35f3c25500a..2de0fa60f32 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -314,7 +314,7 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, { ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, { ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, { ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, { ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, { ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7bd0bace909..d66d05ac730 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1822,7 +1822,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && Opc != ARM::VST1q64PseudoWB_fixed) || + if ((NumVecs > 2 && Opc != ARM::VST1q64PseudoWB_fixed) || !isa(Inc.getNode())) Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2c564a7e8e6..1e3410221e1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1673,12 +1673,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8PseudoWB_fixed : VSTQWBPseudo; -def VST2d16PseudoWB_fixed : VSTQWBPseudo; -def VST2d32PseudoWB_fixed : VSTQWBPseudo; -def VST2d8PseudoWB_register : VSTQWBPseudo; -def VST2d16PseudoWB_register : VSTQWBPseudo; -def VST2d32PseudoWB_register : VSTQWBPseudo; +def VST2d8PseudoWB_fixed : VSTQWBfixedPseudo; +def VST2d16PseudoWB_fixed : VSTQWBfixedPseudo; +def VST2d32PseudoWB_fixed : VSTQWBfixedPseudo; +def VST2d8PseudoWB_register : VSTQWBregisterPseudo; +def VST2d16PseudoWB_register : VSTQWBregisterPseudo; +def VST2d32PseudoWB_register : VSTQWBregisterPseudo; def VST2q8PseudoWB_fixed : VSTQQWBPseudo; def VST2q16PseudoWB_fixed : VSTQQWBPseudo; diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 915a84b6776..497214316a9 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -110,6 +110,15 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { ret void } +define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind { +;CHECK: vst2update +;CHECK: vst2.16 {d16, d17}, [r0]! + %tmp1 = load <4 x i16>* %B + tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2) + %t5 = getelementptr inbounds i8* %out, i32 16 + ret i8* %t5 +} + declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind