diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 310da09edf5..85f5df911db 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9122,6 +9122,45 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(N->getValueType(0)); + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR + // nodes often generate nop CONCAT_VECTOR nodes. + // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that + // place the incoming vectors at the exact same location. + SDValue SingleSource = SDValue(); + unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getOpcode() == ISD::UNDEF) + continue; + + // Check if this is the identity extract: + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // Find the single incoming vector for the extract_subvector. + if (SingleSource.getNode()) { + if (Op.getOperand(0) != SingleSource) + return SDValue(); + } else { + SingleSource = Op.getOperand(0); + } + + unsigned IdentityIndex = i * PartNumElem; + ConstantSDNode *CS = dyn_cast(Op.getOperand(1)); + // The extract index must be constant. + if (!CS) + return SDValue(); + + // Check that we are reading from the identity index. + if (CS->getZExtValue() != IdentityIndex) + return SDValue(); + } + + if (SingleSource.getNode()) + return SingleSource; + return SDValue(); } diff --git a/test/CodeGen/ARM/nop_concat_vectors.ll b/test/CodeGen/ARM/nop_concat_vectors.ll new file mode 100644 index 00000000000..c81090095a9 --- /dev/null +++ b/test/CodeGen/ARM/nop_concat_vectors.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s + +;CHECK: _foo +;CHECK-NOT: vld1.32 +;CHECK-NOT: vst1.32 +;CHECK: bx +define void @foo(<16 x i8>* %J) { + %A = load <16 x i8>* %J + %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> + %T2 = shufflevector <8 x i8> %T1, <8 x i8> undef, <16 x i32> + store <16 x i8> %T2, <16 x i8>* %J + ret void +}