diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8560dfe463c..440839933df 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4909,7 +4909,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(0))->getValue(); switch (IntNo) { default: return SDOperand(); // Don't custom lower most intrinsics. - // Comparison intrinsics. + // Comparison intrinsics. case Intrinsic::x86_sse_comieq_ss: case Intrinsic::x86_sse_comilt_ss: case Intrinsic::x86_sse_comile_ss: @@ -5010,6 +5010,95 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); } + + // Fix vector shift instructions where the last operand is a non-immediate + // i32 value. + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDOperand ShAmt = Op.getOperand(2); + if (isa(ShAmt)) + return SDOperand(); + + unsigned NewIntNo = 0; + MVT::ValueType ShAmtVT = MVT::v4i32; + switch (IntNo) { + case Intrinsic::x86_sse2_pslli_w: + NewIntNo = Intrinsic::x86_sse2_psll_w; + break; + case Intrinsic::x86_sse2_pslli_d: + NewIntNo = Intrinsic::x86_sse2_psll_d; + break; + case Intrinsic::x86_sse2_pslli_q: + NewIntNo = Intrinsic::x86_sse2_psll_q; + break; + case Intrinsic::x86_sse2_psrli_w: + NewIntNo = Intrinsic::x86_sse2_psrl_w; + break; + case Intrinsic::x86_sse2_psrli_d: + NewIntNo = Intrinsic::x86_sse2_psrl_d; + break; + case Intrinsic::x86_sse2_psrli_q: + NewIntNo = Intrinsic::x86_sse2_psrl_q; + break; + case Intrinsic::x86_sse2_psrai_w: + NewIntNo = Intrinsic::x86_sse2_psra_w; + break; + case Intrinsic::x86_sse2_psrai_d: + NewIntNo = Intrinsic::x86_sse2_psra_d; + break; + default: { + ShAmtVT = MVT::v2i32; + switch (IntNo) { + case Intrinsic::x86_mmx_pslli_w: + NewIntNo = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntNo = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntNo = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntNo = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntNo = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntNo = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntNo = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntNo = Intrinsic::x86_mmx_psra_d; + break; + default: abort(); // Can't reach here. + } + break; + } + } + MVT::ValueType VT = Op.getValueType(); + ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt)); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT, + DAG.getConstant(NewIntNo, MVT::i32), + Op.getOperand(1), ShAmt); + } } } diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll index 82eeafd0752..277cf075cb9 100644 --- a/test/CodeGen/X86/mmx-shift.ll +++ b/test/CodeGen/X86/mmx-shift.ll @@ -1,6 +1,7 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psrlw define i64 @t1(<1 x i64> %mm1) nounwind { entry: @@ -19,3 +20,13 @@ entry: } declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone + +define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind { +entry: + %tmp6 = bitcast <1 x i64> %mm1 to <4 x i16> ; <<4 x i16>> [#uses=1] + %tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone ; <<4 x i16>> [#uses=1] + %retval1314 = bitcast <4 x i16> %tmp8 to i64 ; [#uses=1] + ret i64 %retval1314 +} + +declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone diff --git a/test/CodeGen/X86/vec_shift3.ll b/test/CodeGen/X86/vec_shift3.ll new file mode 100644 index 00000000000..2641c5d5967 --- /dev/null +++ b/test/CodeGen/X86/vec_shift3.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllq +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 2 + +define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind { +entry: + %tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 %bits ) nounwind readnone ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp3 +} + +define <2 x i64> @t2(<2 x i64> %x1) nounwind { +entry: + %tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp3 +} + +define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind { +entry: + %tmp2 = bitcast <2 x i64> %x1 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w( <8 x i16> %tmp2, i32 %bits ) nounwind readnone ; <<8 x i16>> [#uses=1] + %tmp5 = bitcast <8 x i16> %tmp4 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp5 +} + +declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone +declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone