From ef8d6999f379c1a551d89b9cce7ca6dc7246fe52 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 11 Aug 2011 21:50:44 +0000 Subject: [PATCH] Add a dag combine to xform 256-bit shuffles into simple vector inserts and extracts. This simple combine makes us generate only 1 instruction instead of 11 in the v8 case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137362 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 55 +++++++++++++++++++++++++++- test/CodeGen/X86/avx-vextractf128.ll | 18 +++++++++ test/CodeGen/X86/avx-vinsertf128.ll | 18 +++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/avx-vextractf128.ll create mode 100644 test/CodeGen/X86/avx-vinsertf128.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 039901769bd..10ab70750e3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11548,6 +11548,38 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } +/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the +/// same as extracting the high 128-bit part of 256-bit vector and then +/// inserting the result into the low part of a new 256-bit vector +static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j) + if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || + SVOp->getMaskElt(j) >= 0) + return false; + + return true; +} + +/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the +/// same as extracting the low 128-bit part of 256-bit vector and then +/// inserting the result into the high part of a new 256-bit vector +static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + // vector_shuffle or + for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j) + if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || + SVOp->getMaskElt(j) >= 0) + return false; + + return true; +} + /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { @@ -11556,6 +11588,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); if (V1.getOpcode() == ISD::CONCAT_VECTORS && V2.getOpcode() == ISD::CONCAT_VECTORS) { @@ -11580,7 +11613,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, // To match the shuffle mask, the first half of the mask should // be exactly the first vector, and all the rest a splat with the // first element of the second one. - int NumElems = VT.getVectorNumElements(); for (int i = 0; i < NumElems/2; ++i) if (!isUndefOrEqual(SVOp->getMaskElt(i), i) || !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) @@ -11594,6 +11626,27 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, InsV); } + //===--------------------------------------------------------------------===// + // Combine some shuffles into subvector extracts and inserts: + // + + // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + if (isShuffleHigh128VectorInsertLow(SVOp)) { + SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), + V, DAG.getConstant(0, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + // vector_shuffle or + if (isShuffleLow128VectorInsertHigh(SVOp)) { + SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), + V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + return SDValue(); } diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll new file mode 100644 index 00000000000..dccf901b259 --- /dev/null +++ b/test/CodeGen/X86/avx-vextractf128.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK-NOT: vunpck +; CHECK: vextractf128 $1 +define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle +} + +; CHECK-NOT: vunpck +; CHECK: vextractf128 $1 +define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll new file mode 100644 index 00000000000..b54b57b8d15 --- /dev/null +++ b/test/CodeGen/X86/avx-vinsertf128.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK-NOT: vunpck +; CHECK: vinsertf128 $1 +define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle +} + +; CHECK-NOT: vunpck +; CHECK: vinsertf128 $1 +define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} +