Get rid of some bogus patterns for X86vzmovl. Don't create VZEXT_MOVL

nodes for vectors with an i16 element type.  Add an optimization for 
building a vector which is all zeros/undef except for the bottom 
element, where the bottom element is an i8 or i16.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72988 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eli Friedman 2009-06-06 06:05:10 +00:00
parent 9d47b8d8ea
commit 1041553424
3 changed files with 59 additions and 29 deletions

View File

@ -2428,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
int NumElts = VT.getVectorNumElements();
if (NumElts != 2 && NumElts != 4)
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
int NumElts = VT.getVectorNumElements();
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
@ -3082,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1 && NumElems <= 4) {
if (NumNonZero == 1) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDValue Item = Op.getOperand(Idx);
@ -3123,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
// depending on what the source datatype is. Because we can only get here
// when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
if (Idx == 0 &&
// Don't do this for i64 values on x86-32.
(EVT != MVT::i64 || Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
// depending on what the source datatype is.
if (Idx == 0) {
if (NumZero == 0) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
} else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
(EVT == MVT::i64 && Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
DAG);
} else if (EVT == MVT::i16 || EVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
}
}
// Is it a vector logical left shift?

View File

@ -577,30 +577,13 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
// Move scalar to MMX zero-extended
// movd to MMX register zero-extends
let AddedComplexity = 15 in {
def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
(MMX_MOVZDI2PDIrr GR32:$src)>;
def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
(MMX_MOVZDI2PDIrr GR32:$src)>;
}
let AddedComplexity = 20 in {
def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
(MMX_MOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
(MMX_MOVZDI2PDIrm addr:$src)>;
def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
(MMX_MOVZDI2PDIrm addr:$src)>;
}
// Clear top half.
let AddedComplexity = 15 in {
def : Pat<(v8i8 (X86vzmovl VR64:$src)),
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
def : Pat<(v4i16 (X86vzmovl VR64:$src)),
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
def : Pat<(v2i32 (X86vzmovl VR64:$src)),
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
}

View File

@ -0,0 +1,37 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx,+sse2 > %t1
; RUN: grep movzwl %t1 | count 2
; RUN: grep movzbl %t1 | count 2
; RUN: grep movd %t1 | count 4
define <4 x i16> @a(i32* %x1) nounwind {
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i16
%r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
ret <4 x i16> %r
}
define <8 x i16> @b(i32* %x1) nounwind {
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i16
%r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
ret <8 x i16> %r
}
define <8 x i8> @c(i32* %x1) nounwind {
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i8
%r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
ret <8 x i8> %r
}
define <16 x i8> @d(i32* %x1) nounwind {
%x2 = load i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i8
%r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
ret <16 x i8> %r
}