From 04715c9915c0c71871887aae8c64ebc477bab955 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 5 Feb 2015 13:23:07 +0000 Subject: [PATCH] [X86][MMX] Handle i32->mmx conversion using movd Implement a BITCAST dag combine to transform i32->mmx conversion patterns into a X86 specific node (MMX_MOVW2D) and guarantee that moves between i32 and x86mmx are better handled, i.e., don't use store-load to do the conversion.. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228293 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 21 ++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 4 ++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 3 +++ lib/Target/X86/X86InstrMMX.td | 10 +++++++++ test/CodeGen/X86/bitcast-mmx.ll | 29 ++++++++++--------------- test/CodeGen/X86/vec_extract-mmx.ll | 5 ++--- 6 files changed, 52 insertions(+), 20 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 17d0fce54f8..eff03f030c5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1675,6 +1675,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); @@ -22986,6 +22987,25 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are +/// special and don't usually play with other vector types, it's better to +/// handle them early to be sure we emit efficient code by avoiding +/// store-load conversions. +static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getValueType(0) != MVT::x86mmx || + N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR || + N->getOperand(0)->getValueType(0) != MVT::v2i32) + return SDValue(); + + SDValue V = N->getOperand(0); + ConstantSDNode *C = dyn_cast(V.getOperand(1)); + if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32) + return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)), + N->getValueType(0), V.getOperand(0)); + + return SDValue(); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// into a somewhat faster sequence. For i686, the best sequence is apparently @@ -26129,6 +26149,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT: case X86ISD::SHRUNKBLEND: return PerformSELECTCombine(N, DAG, DCI, Subtarget); + case ISD::BITCAST: return PerformBITCASTCombine(N, DAG); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 346f98e6bca..d90dcc71451 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -158,6 +158,10 @@ namespace llvm { /// vector to a GPR. MMX_MOVD2W, + /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector + /// and zero out the high word. + MMX_MOVW2D, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index f65292e7602..fd0a4ba28e8 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -18,6 +18,9 @@ // Low word of MMX to GPR. def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>; +// GPR to low word of MMX. +def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1, + [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>; //===----------------------------------------------------------------------===// // MMX Pattern Fragments diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index b5262c5cdac..fbec0625418 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -229,6 +229,16 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), [(set VR64:$dst, (x86mmx (scalar_to_vector (loadi32 addr:$src))))], IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>; + +let Predicates = [HasMMX] in { + let AddedComplexity = 15 in + def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)), + (MMX_MOVD64rr GR32:$src)>; + let AddedComplexity = 20 in + def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))), + (MMX_MOVD64rm addr:$src)>; +} + let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>, diff --git a/test/CodeGen/X86/bitcast-mmx.ll b/test/CodeGen/X86/bitcast-mmx.ll index 6bfe6e84adc..616b21fca04 100644 --- a/test/CodeGen/X86/bitcast-mmx.ll +++ b/test/CodeGen/X86/bitcast-mmx.ll @@ -22,11 +22,10 @@ entry: define i64 @t1(i64 %x, i32 %n) { ; CHECK-LABEL: t1: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: movd %rdi, %mm0 -; CHECK-NEXT: movd %esi, %xmm0 -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: psllq -{{[0-9]+}}(%rsp), %mm0 -; CHECK-NEXT: movd %mm0, %rax +; CHECK-NEXT: movd %esi, %mm0 +; CHECK-NEXT: movd %rdi, %mm1 +; CHECK-NEXT: psllq %mm0, %mm1 +; CHECK-NEXT: movd %mm1, %rax ; CHECK-NEXT: retq entry: %0 = bitcast i64 %x to x86_mmx @@ -38,15 +37,12 @@ entry: define i64 @t2(i64 %x, i32 %n, i32 %w) { ; CHECK-LABEL: t2: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: movd %edx, %xmm0 -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %mm0 -; CHECK-NEXT: movd %esi, %xmm0 -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: psllq -{{[0-9]+}}(%rsp), %mm0 -; CHECK-NEXT: movd %rdi, %mm1 -; CHECK-NEXT: por %mm0, %mm1 -; CHECK-NEXT: movd %mm1, %rax +; CHECK-NEXT: movd %esi, %mm0 +; CHECK-NEXT: movd %edx, %mm1 +; CHECK-NEXT: psllq %mm0, %mm1 +; CHECK-NEXT: movd %rdi, %mm0 +; CHECK-NEXT: por %mm1, %mm0 +; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: %0 = insertelement <2 x i32> undef, i32 %w, i32 0 @@ -63,9 +59,8 @@ define i64 @t3(<1 x i64>* %y, i32* %n) { ; CHECK-LABEL: t3: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq (%rdi), %mm0 -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: psllq -{{[0-9]+}}(%rsp), %mm0 +; CHECK-NEXT: movd (%rsi), %mm1 +; CHECK-NEXT: psllq %mm1, %mm0 ; CHECK-NEXT: movd %mm0, %rax ; CHECK-NEXT: retq entry: diff --git a/test/CodeGen/X86/vec_extract-mmx.ll b/test/CodeGen/X86/vec_extract-mmx.ll index af65a965f70..c7780b02376 100644 --- a/test/CodeGen/X86/vec_extract-mmx.ll +++ b/test/CodeGen/X86/vec_extract-mmx.ll @@ -23,9 +23,8 @@ define i32 @test0(<1 x i64>* %v4) { define i32 @test1(i32* nocapture readonly %ptr) { ; CHECK-LABEL: test1: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: pshufw $232, -{{[0-9]+}}(%rsp), %mm0 +; CHECK-NEXT: movd (%rdi), %mm0 +; CHECK-NEXT: pshufw $232, %mm0, %mm0 ; CHECK-NEXT: movd %mm0, %eax ; CHECK-NEXT: emms ; CHECK-NEXT: retq