diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1de51ee51ae..ec6a5bad5b7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2428,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. static bool isMOVLMask(const SmallVectorImpl &Mask, MVT VT) { - int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4) + if (VT.getVectorElementType().getSizeInBits() < 32) return false; + + int NumElts = VT.getVectorNumElements(); if (!isUndefOrEqual(Mask[0], NumElts)) return false; @@ -3082,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Special case for single non-zero, non-undef, element. - if (NumNonZero == 1 && NumElems <= 4) { + if (NumNonZero == 1) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue Item = Op.getOperand(Idx); @@ -3123,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd - // depending on what the source datatype is. Because we can only get here - // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. - if (Idx == 0 && - // Don't do this for i64 values on x86-32. - (EVT != MVT::i64 || Subtarget->is64Bit())) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + // depending on what the source datatype is. + if (Idx == 0) { + if (NumZero == 0) { + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || + (EVT == MVT::i64 && Subtarget->is64Bit())) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + DAG); + } else if (EVT == MVT::i16 || EVT == MVT::i8) { + Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); + MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, + Subtarget->hasSSE2(), DAG); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + } } // Is it a vector logical left shift? diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 38c54712f18..43fadc219bb 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -577,30 +577,13 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -// Move scalar to MMX zero-extended -// movd to MMX register zero-extends -let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; -} - let AddedComplexity = 20 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), (MMX_MOVZDI2PDIrm addr:$src)>; } // Clear top half. let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; - def : Pat<(v4i16 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; def : Pat<(v2i32 (X86vzmovl VR64:$src)), (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; } diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll new file mode 100644 index 00000000000..220423aa986 --- /dev/null +++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll @@ -0,0 +1,37 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1 +; RUN: grep movzwl %t1 | count 2 +; RUN: grep movzbl %t1 | count 2 +; RUN: grep movd %t1 | count 4 + +define <4 x i16> @a(i32* %x1) nounwind { + %x2 = load i32* %x1 + %x3 = lshr i32 %x2, 1 + %x = trunc i32 %x3 to i16 + %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0 + ret <4 x i16> %r +} + +define <8 x i16> @b(i32* %x1) nounwind { + %x2 = load i32* %x1 + %x3 = lshr i32 %x2, 1 + %x = trunc i32 %x3 to i16 + %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0 + ret <8 x i16> %r +} + +define <8 x i8> @c(i32* %x1) nounwind { + %x2 = load i32* %x1 + %x3 = lshr i32 %x2, 1 + %x = trunc i32 %x3 to i8 + %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0 + ret <8 x i8> %r +} + +define <16 x i8> @d(i32* %x1) nounwind { + %x2 = load i32* %x1 + %x3 = lshr i32 %x2, 1 + %x = trunc i32 %x3 to i8 + %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0 + ret <16 x i8> %r +} +