- When DAG combiner is folding a bit convert into a BUILD_VECTOR, it should check if it's essentially a SCALAR_TO_VECTOR. Avoid turning (v8i16) <10, u, u, u> to <10, 0, u, u, u, u, u, u>. Instead, simply convert it to a SCALAR_TO_VECTOR of the proper type.

- X86 now normalize SCALAR_TO_VECTOR to (BIT_CONVERT (v4i32 SCALAR_TO_VECTOR)). Get rid of X86ISD::S2VEC.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47290 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2008-02-18 23:04:32 +00:00
parent e0cfecf47d
commit efec751a1b
8 changed files with 128 additions and 59 deletions

View File

@ -611,6 +611,11 @@ namespace ISD {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const SDNode *N);
/// isScalarToVector - Return true if the specified node is a
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
/// element is not an undef.
bool isScalarToVector(const SDNode *N);
/// isDebugLabel - Return true if the specified node represents a debug
/// label (i.e. ISD::LABEL or TargetInstrInfo::LABEL node and third operand
/// is 0).

View File

@ -3450,14 +3450,16 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT::ValueType DstEltVT) {
Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
}
MVT::ValueType VT = MVT::getVectorType(DstEltVT,
Ops.size());
MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
}
// Finally, this must be the case where we are shrinking elements: each input
// turns into multiple outputs.
bool isS2V = ISD::isScalarToVector(BV);
unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
MVT::ValueType VT = MVT::getVectorType(DstEltVT,
NumOutputsPerInput * BV->getNumOperands());
SmallVector<SDOperand, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
@ -3466,18 +3468,19 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT::ValueType DstEltVT) {
continue;
}
uint64_t OpVal = cast<ConstantSDNode>(BV->getOperand(i))->getValue();
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
unsigned ThisVal = OpVal & ((1ULL << DstBitSize)-1);
OpVal >>= DstBitSize;
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
if (isS2V && i == 0 && j == 0 && ThisVal == OpVal)
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Ops[0]);
OpVal >>= DstBitSize;
}
// For big endian targets, swap the order of the pieces of each element.
if (TLI.isBigEndian())
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
MVT::ValueType VT = MVT::getVectorType(DstEltVT, Ops.size());
return DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size());
}

View File

@ -176,6 +176,27 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
return true;
}
/// isScalarToVector - Return true if the specified node is a
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
/// element is not an undef.
bool ISD::isScalarToVector(const SDNode *N) {
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
return true;
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
if (N->getOperand(0).getOpcode() == ISD::UNDEF)
return false;
unsigned NumElems = N->getNumOperands();
for (unsigned i = 1; i < NumElems; ++i) {
SDOperand V = N->getOperand(i);
if (V.getOpcode() != ISD::UNDEF)
return false;
}
return true;
}
/// isDebugLabel - Return true if the specified node represents a debug
/// label (i.e. ISD::LABEL or TargetInstrInfo::LABEL node and third operand
/// is 0).

View File

@ -583,7 +583,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
}
@ -3834,7 +3833,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
MVT::ValueType VT = MVT::v2i32;
switch (Op.getValueType()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
VT = MVT::v4i32;
break;
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@ -5357,7 +5365,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::S2VEC: return "X86ISD::S2VEC";
case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";

View File

@ -166,10 +166,6 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
/// S2VEC - X86 version of SCALAR_TO_VECTOR. The destination base does not
/// have to match the operand type.
S2VEC,
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,

View File

@ -156,12 +156,13 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]
//===----------------------------------------------------------------------===//
// Data Transfer Instructions
let neverHasSideEffects = 1 in
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
let isSimpleLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>;
let isSimpleLoad = 1, isReMaterializable = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
@ -547,27 +548,25 @@ def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
def : Pat<(v8i8 (vector_shuffle immAllZerosV_bc,
(v8i8 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
MMX_MOVL_shuffle_mask)),
(MMX_MOVZDI2PDIrr GR32:$src)>;
def : Pat<(v4i16 (vector_shuffle immAllZerosV_bc,
(v4i16 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
(MMX_MOVZDI2PDIrr GR32:$src)>;
def : Pat<(v2i32 (vector_shuffle immAllZerosV,
(v2i32 (MMX_X86s2vec GR32:$src)), MMX_MOVL_shuffle_mask)),
(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
MMX_MOVL_shuffle_mask)),
(MMX_MOVZDI2PDIrr GR32:$src)>;
}
// Scalar to v2i32 / v4i16 / v8i8. The source may be a GR32, but only the lower
// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
// 8 or 16-bits matter.
def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
def : Pat<(v2i32 (MMX_X86s2vec GR32:$src)), (MMX_MOVD64rr GR32:$src)>;
def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
(MMX_MOVD64rr GR32:$src)>;
def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
(MMX_MOVD64rr GR32:$src)>;
// Patterns to perform canonical versions of vector shuffling.
let AddedComplexity = 10 in {

View File

@ -34,7 +34,6 @@ def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@ -1781,22 +1780,6 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
(bitconvert (memopv2i64 addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
}
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
@ -1871,16 +1854,61 @@ defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
defm PSLLW : PDI_binop_rm_int<0xF1, "psllw", int_x86_sse2_psll_w>;
defm PSLLD : PDI_binop_rm_int<0xF2, "pslld", int_x86_sse2_psll_d>;
defm PSLLQ : PDI_binop_rm_int<0xF3, "psllq", int_x86_sse2_psll_q>;
defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
defm PSRLW : PDI_binop_rm_int<0xD1, "psrlw", int_x86_sse2_psrl_w>;
defm PSRLD : PDI_binop_rm_int<0xD2, "psrld", int_x86_sse2_psrl_d>;
defm PSRLQ : PDI_binop_rm_int<0xD3, "psrlq", int_x86_sse2_psrl_q>;
defm PSRAW : PDI_binop_rm_int<0xE1, "psraw", int_x86_sse2_psra_w>;
defm PSRAD : PDI_binop_rm_int<0xE2, "psrad", int_x86_sse2_psra_d>;
// Some immediate variants need to match a bit_convert.
def PSLLWri : PDIi8<0x71, MRM6r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psllw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1,
(bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSLLDri : PDIi8<0x72, MRM6r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"pslld\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
def PSLLQri : PDIi8<0x73, MRM6r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psllq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1,
(bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRLWri : PDIi8<0x71, MRM2r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrlw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1,
(bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRLDri : PDIi8<0x72, MRM2r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrld\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
def PSRLQri : PDIi8<0x73, MRM2r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrlq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1,
(bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRAWri : PDIi8<0x71, MRM4r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psraw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1,
(bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRADri : PDIi8<0x72, MRM4r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrad\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
// PSRAQ doesn't exist in SSE[1-3].
// 128-bit logical shifts.
@ -2729,13 +2757,6 @@ let Predicates = [HasSSE2] in
def : Pat<(fextend (loadf32 addr:$src)),
(CVTSS2SDrm addr:$src)>;
// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
// 16-bits matter.
def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
Requires<[HasSSE2]>;
def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
Requires<[HasSSE2]>;
// bit_convert
let Predicates = [HasSSE2] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;

View File

@ -0,0 +1,17 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep CPI
define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind {
%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
ret <2 x i64> %tmp3
}
define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind {
%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
ret <4 x i32> %tmp2
}
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone