From 44b5e6de8cb7a5562f698078415a3a9b608b8ed6 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 2 Apr 2012 07:11:12 +0000 Subject: [PATCH] Optimizing swizzles of complex shuffles may generate additional complex shuffles. Do not try to optimize swizzles of shuffles if the source shuffle has more than a single user, except when the source shuffle is also a swizzle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153864 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++++++- test/CodeGen/X86/SwizzleShuff.ll | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5e88fcbb0e1..08946070b44 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7792,6 +7792,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SmallVector NewMask; ShuffleVectorSDNode *OtherSV = cast(N0); + // If the source shuffle has more than one user then do not try to optimize + // it because it may generate a more complex shuffle node. However, if the + // source shuffle is also a swizzle (a single source shuffle), our + // transformation is still likely to reduce the number of shuffles and only + // generate a simple shuffle node. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF && !N0.hasOneUse()) + return SDValue(); + EVT InVT = N0.getValueType(); int InNumElts = InVT.getVectorNumElements(); @@ -7808,7 +7816,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } - + assert(NewMask.size() == VT.getVectorNumElements() && "Invalid mask size"); return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0), OtherSV->getOperand(1), &NewMask[0]); } diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll index 11b702e3d1b..224556deda1 100644 --- a/test/CodeGen/X86/SwizzleShuff.ll +++ b/test/CodeGen/X86/SwizzleShuff.ll @@ -12,3 +12,20 @@ define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) { store <4 x i8> %C, <4 x i8>* %pA ret void } + +; CHECK: multi_use_swizzle +; CHECK: mov +; CHECK-NEXT: shuf +; CHECK-NEXT: shuf +; CHECK-NEXT: shuf +; CHECK-NEXT: xor +; CHECK-NEXT: ret +define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) { + %A = load <4 x i32>* %pA + %B = load <4 x i32>* %pB + %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> + %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> + %R = xor <4 x i32> %S1, %S2 + ret <4 x i32> %R +}