diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 048fc6ff084..c7d676e05e4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4831,6 +4831,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::x86_avx_vinsertf128_pd_256: + case Intrinsic::x86_avx_vinsertf128_ps_256: + case Intrinsic::x86_avx_vinsertf128_si_256: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + uint64_t Idx = (cast(I.getArgOperand(2))->getZExtValue() & 1) * + ElVT.getVectorNumElements(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, MVT::i32)); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4a670817d3b..56e997c1fba 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6606,15 +6606,6 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } -let Predicates = [HasAVX] in { -def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -} - //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll index cda1331da32..def2212db93 100644 --- a/test/CodeGen/X86/avx-vinsertf128.ll +++ b/test/CodeGen/X86/avx-vinsertf128.ll @@ -56,3 +56,51 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly { %2 = add <8 x i32> %1, %v1 ret <8 x i32> %2 } + +; CHECK: insert_pd +define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) { +; CHECK: vinsertf128 +%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0) +ret <4 x double> %res +} + +; CHECK: insert_undef_pd +define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { +; CHECK: vmovaps %ymm1, %ymm0 +%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) +ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone + + +; CHECK: insert_ps +define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) { +; CHECK: vinsertf128 +%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0) +ret <8 x float> %res +} + +; CHECK: insert_undef_ps +define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { +; CHECK: vmovaps %ymm1, %ymm0 +%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) +ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone + + +; CHECK: insert_si +define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) { +; CHECK: vinsertf128 +%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0) +ret <8 x i32> %res +} + +; CHECK: insert_undef_si +define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { +; CHECK: vmovaps %ymm1, %ymm0 +%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) +ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone +