diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d029906fc31..9fa5572e815 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7253,7 +7253,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. EVT OutScalarTy = N->getValueType(0).getScalarType(); - bool validTypes = SourceType != MVT::Other && + bool ValidTypes = SourceType != MVT::Other && isPowerOf2_32(OutScalarTy.getSizeInBits()) && isPowerOf2_32(SourceType.getSizeInBits()); @@ -7263,7 +7263,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // will be type-legalized to complex code sequences. // We perform this optimization only before the operation legalizer because we // may introduce illegal operations. - if (LegalTypes && !LegalOperations && validTypes) { + if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && + ValidTypes) { bool isLE = TLI.isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); @@ -7322,15 +7323,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { break; } - // If the input vector type disagrees with the result of the build_vector, - // we can't make a shuffle. + // We allow up to two distinct input vectors. SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); - if (ExtractedFromVec.getValueType() != VT) { - VecIn1 = VecIn2 = SDValue(0, 0); - break; - } - - // Otherwise, remember this. We allow up to two distinct input vectors. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; @@ -7345,7 +7339,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -7371,14 +7365,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Mask.push_back(Idx+NumInScalars); } - // Add count and size info. - if (!isTypeLegal(VT)) + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // We don't support shuffeling between TWO values of different types. + if (VecIn2.getNode() != 0) + return SDValue(); + + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) + return SDValue(); + + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } + + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT) || !isTypeLegal(VecIn1.getValueType()) || + !isTypeLegal(VecIn2.getValueType())) return SDValue(); // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; - Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + Ops[1] = VecIn2; return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index 8b7af20b4a9..97709352760 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep rot %t1.s | count 85 +; RUN: grep rot %t1.s | count 86 ; RUN: grep roth %t1.s | count 8 ; RUN: grep roti.*5 %t1.s | count 1 ; RUN: grep roti.*27 %t1.s | count 1 diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll index 795a7b3dd45..b630e9d1461 100755 --- a/test/CodeGen/X86/avx-zext.ll +++ b/test/CodeGen/X86/avx-zext.ll @@ -3,6 +3,7 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { ;CHECK: zext_8i16_to_8i32 ;CHECK: vpunpckhwd +;CHECK: ret %B = zext <8 x i16> %A to <8 x i32> ret <8 x i32>%B @@ -11,7 +12,19 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { ;CHECK: zext_4i32_to_4i64 ;CHECK: vpunpckhdq +;CHECK: ret %B = zext <4 x i32> %A to <4 x i64> ret <4 x i64>%B } + + +define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { +;CHECK: zext_8i8_to_8i32 +;CHECK: vpunpckhwd +;CHECK: vpunpcklwd +;CHECK: vinsertf128 +;CHECK: ret + %t = zext <8 x i8> %z to <8 x i32> + ret <8 x i32> %t +}