From 32510207382720a97c682e098d45cb1371c48639 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 4 Sep 2012 12:49:02 +0000 Subject: [PATCH] This patch optimizes shuffle instruction - generates 2 instructions instead of 4. Since this specific shuffle is widely used in many workloads we have ~10% performance on them. shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> vmovaps (%rdx), %ymm0 vshufps $8, %ymm0, %ymm0, %ymm0 vmovaps (%rcx), %ymm1 vshufps $8, %ymm0, %ymm1, %ymm1 vunpcklps %ymm0, %ymm1, %ymm0 vmovaps (%rcx), %ymm0 vmovsldup (%rdx), %ymm1 vblendps $85, %ymm0, %ymm1, %ymm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 31 +++++++++++++++--------------- test/CodeGen/X86/avx-shuffle.ll | 10 ++++------ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ce5e61f0f3f..9f487a6b0f2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3506,25 +3506,26 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) MatchOddMask = false; } - static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; - static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; - const int *CompactionMask; - if (MatchEvenMask) - CompactionMask = CompactionMaskEven; - else if (MatchOddMask) - CompactionMask = CompactionMaskOdd; - else + if (!MatchEvenMask && !MatchOddMask) return SDValue(); - + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); - SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), - UndefNode, CompactionMask); - SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), - UndefNode, CompactionMask); - static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; - return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); + SDValue Op0 = SVOp->getOperand(0); + SDValue Op1 = SVOp->getOperand(1); + + if (MatchEvenMask) { + // Shift the second operand right to 32 bits. + static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 }; + Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask); + } else { + // Shift the first operand left to 32 bits. + static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 }; + Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask); + } + static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask); } /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index 9b41709a3b1..ec11654b355 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -229,9 +229,8 @@ define <8 x float> @test17(<4 x float> %y) { } ; CHECK: test18 -; CHECK: vshufps -; CHECK: vshufps -; CHECK: vunpcklps +; CHECK: vmovshdup +; CHECK: vblendps ; CHECK: ret define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> @@ -239,9 +238,8 @@ define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { } ; CHECK: test19 -; CHECK: vshufps -; CHECK: vshufps -; CHECK: vunpcklps +; CHECK: vmovsldup +; CHECK: vblendps ; CHECK: ret define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32>