diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eb160957501..44e212aef00 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9125,6 +9125,44 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ShuffleVectorSDNode *SVN = cast(N); + + SmallVector Ops; + EVT ConcatVT = N0.getOperand(0).getValueType(); + unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); + unsigned NumConcats = NumElts / NumElemsPerConcat; + + // Look at every vector that's inserted. We're looking for exact + // subvector-sized copies from a concatenated vector + for (unsigned I = 0; I != NumConcats; ++I) { + // Make sure we're dealing with a copy. + unsigned Begin = I * NumElemsPerConcat; + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) + return SDValue(); + + for (unsigned J = 1; J != NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + } + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + Ops.size()); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -9226,6 +9264,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + Level < AfterLegalizeVectorOps && + (N1.getOpcode() == ISD::UNDEF || + (N1.getOpcode() == ISD::CONCAT_VECTORS && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { + SDValue V = partitionShuffleOfConcats(N, DAG); + + if (V.getNode()) + return V; + } + // If this shuffle node is simply a swizzle of another shuffle node, // and it reverses the swizzle of the previous shuffle then we can // optimize shuffle(shuffle(x, undef), undef) -> x. diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll new file mode 100644 index 00000000000..e9e0fe3239a --- /dev/null +++ b/test/CodeGen/ARM/dagcombine-concatvector.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s + +; PR15525 +; CHECK: test1: +; CHECK: ldr.w [[REG:r[0-9]+]], [sp] +; CHECK-NEXT: vmov {{d[0-9]+}}, r1, r2 +; CHECK-NEXT: vmov {{d[0-9]+}}, r3, [[REG]] +; CHECK-NEXT: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0] +; CHECK-NEXT: bx lr +define void @test1(i8* %arg, [4 x i64] %vec.coerce) { +bb: + %tmp = extractvalue [4 x i64] %vec.coerce, 0 + %tmp2 = bitcast i64 %tmp to <8 x i8> + %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <16 x i32> + %tmp4 = extractvalue [4 x i64] %vec.coerce, 1 + %tmp5 = bitcast i64 %tmp4 to <8 x i8> + %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> + %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> + tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2) + ret void +} + +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)