- Improved v8i16 shuffle lowering. It now uses pshuflw and pshufhw as much as

possible before resorting to pextrw and pinsrw.
- Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles.
- Improves (i16 extract_vector_element 0) codegen by recognizing
  (i32 extract_vector_element 0) does not require a pextrw.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44836 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng
2007-12-11 01:46:18 +00:00
parent 844e0f9def
commit 14b32e1941
3 changed files with 312 additions and 97 deletions

View File

@@ -23,6 +23,7 @@
#include "llvm/GlobalVariable.h" #include "llvm/GlobalVariable.h"
#include "llvm/Function.h" #include "llvm/Function.h"
#include "llvm/Intrinsics.h" #include "llvm/Intrinsics.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h" #include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/CallingConvLower.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h" #include "llvm/ParameterAttributes.h"
using namespace llvm; using namespace llvm;
@@ -2714,7 +2716,7 @@ static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
if (Arg.getOpcode() == ISD::UNDEF) continue; if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val > 4) if (Val >= 4)
return false; return false;
} }
@@ -3130,6 +3132,8 @@ static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
return V; return V;
} }
/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is
/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x>
SDOperand SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd. // All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3154,7 +3158,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
unsigned NumNonZero = 0; unsigned NumNonZero = 0;
unsigned NonZeros = 0; unsigned NonZeros = 0;
unsigned NumNonZeroImms = 0; unsigned NumNonZeroImms = 0;
std::set<SDOperand> Values; SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) { for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i); SDOperand Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::UNDEF) { if (Elt.getOpcode() != ISD::UNDEF) {
@@ -3314,59 +3318,179 @@ static
SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG, SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) { TargetLowering &TLI) {
SDOperand NewV;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
if (isPSHUFHW_PSHUFLWMask(PermMask.Val)) { MVT::ValueType PtrVT = TLI.getPointerTy();
// Handle v8i16 shuffle high / low shuffle node pair. SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
SmallVector<SDOperand, 8> MaskVec; PermMask.Val->op_end());
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(PermMask.getOperand(i)); // First record which half of which vector the low elements come from.
for (unsigned i = 4; i != 8; ++i) SmallVector<unsigned, 4> LowQuad(4);
MaskVec.push_back(DAG.getConstant(i, MaskEVT)); for (unsigned i = 0; i < 4; ++i) {
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); SDOperand Elt = MaskElts[i];
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask); if (Elt.getOpcode() == ISD::UNDEF)
MaskVec.clear(); continue;
for (unsigned i = 0; i != 4; ++i) unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
MaskVec.push_back(DAG.getConstant(i, MaskEVT)); int QuadIdx = EltIdx / 4;
for (unsigned i = 4; i != 8; ++i) ++LowQuad[QuadIdx];
MaskVec.push_back(PermMask.getOperand(i)); }
Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); int BestLowQuad = -1;
return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask); unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LowQuad[i] > MaxQuad) {
BestLowQuad = i;
MaxQuad = LowQuad[i];
}
} }
// Lower than into extracts and inserts but try to do as few as possible. // Record which half of which vector the high elements come from.
SmallVector<unsigned, 4> HighQuad(4);
for (unsigned i = 4; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++HighQuad[QuadIdx];
}
int BestHighQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HighQuad[i] > MaxQuad) {
BestHighQuad = i;
MaxQuad = HighQuad[i];
}
}
// If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
if (BestLowQuad != -1 || BestHighQuad != -1) {
// First sort the 4 chunks in order using shufpd.
SmallVector<SDOperand, 8> MaskVec;
if (BestLowQuad != -1)
MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
if (BestHighQuad != -1)
MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
// Now sort high and low parts separately.
BitVector InOrder(8);
if (BestLowQuad != -1) {
// Sort lower half in order using PSHUFLW.
MaskVec.clear();
bool AnyOutOrder = false;
for (unsigned i = 0; i != 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestLowQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
if (BestHighQuad != -1) {
// Sort high half in order using PSHUFHW if possible.
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
bool AnyOutOrder = false;
for (unsigned i = 4; i != 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestHighQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
SDOperand Elt = MaskElts[i];
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i)
continue;
SDOperand ExtOp = (EltIdx < 8)
? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT))
: DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
// PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
///as few as possible.
// First, let's find out how many elements are already in the right order. // First, let's find out how many elements are already in the right order.
unsigned V1InOrder = 0; unsigned V1InOrder = 0;
unsigned V1FromV1 = 0; unsigned V1FromV1 = 0;
unsigned V2InOrder = 0; unsigned V2InOrder = 0;
unsigned V2FromV2 = 0; unsigned V2FromV2 = 0;
SmallVector<unsigned, 8> V1Elts; SmallVector<SDOperand, 8> V1Elts;
SmallVector<unsigned, 8> V2Elts; SmallVector<SDOperand, 8> V2Elts;
for (unsigned i = 0; i < 8; ++i) { for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = PermMask.getOperand(i); SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) { if (Elt.getOpcode() == ISD::UNDEF) {
V1Elts.push_back(i); V1Elts.push_back(Elt);
V2Elts.push_back(i); V2Elts.push_back(Elt);
++V1InOrder; ++V1InOrder;
++V2InOrder; ++V2InOrder;
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
++V1InOrder;
} else if (EltIdx == i+8) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i, MaskEVT));
++V2InOrder;
} else if (EltIdx < 8) {
V1Elts.push_back(Elt);
++V1FromV1;
} else { } else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
if (EltIdx == i) { ++V2FromV2;
V1Elts.push_back(i);
V2Elts.push_back(i+8);
++V1InOrder;
} else if (EltIdx == i+8) {
V1Elts.push_back(i+8);
V2Elts.push_back(i);
++V2InOrder;
} else {
V1Elts.push_back(EltIdx);
V2Elts.push_back(EltIdx);
if (EltIdx < 8)
++V1FromV1;
else
++V2FromV2;
}
} }
} }
@@ -3377,33 +3501,92 @@ SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
std::swap(V1FromV1, V2FromV2); std::swap(V1FromV1, V2FromV2);
} }
MVT::ValueType PtrVT = TLI.getPointerTy(); if ((V1FromV1 + V1InOrder) != 8) {
if (V1FromV1) { // Some elements are from V2.
// If there are elements that are from V1 but out of place, if (V1FromV1) {
// then first sort them in place // If there are elements that are from V1 but out of place,
SmallVector<SDOperand, 8> MaskVec; // then first sort them in place
for (unsigned i = 0; i < 8; ++i) { SmallVector<SDOperand, 8> MaskVec;
unsigned EltIdx = V1Elts[i]; for (unsigned i = 0; i < 8; ++i) {
if (EltIdx >= 8) SDOperand Elt = V1Elts[i];
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); if (Elt.getOpcode() == ISD::UNDEF) {
else MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx >= 8)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
else
MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
}
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
} }
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx < 8)
continue;
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
} else {
// All elements are from V1.
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
}
/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones if possible. This can be done when every pair / quad of shuffle mask
/// elements point to elements in the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
unsigned Scale = NumElems / 4;
SmallVector<SDOperand, 4> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
unsigned StartIdx = ~0U;
for (unsigned j = 0; j < Scale; ++j) {
SDOperand Elt = PermMask.getOperand(i+j);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (StartIdx == ~0U)
StartIdx = EltIdx - (EltIdx % Scale);
if (EltIdx != StartIdx + j)
return SDOperand();
}
if (StartIdx == ~0U)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
} }
// Now let's insert elements from the other vector. V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
for (unsigned i = 0; i < 8; ++i) { V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2);
unsigned EltIdx = V1Elts[i]; return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2,
if (EltIdx < 8) DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4));
continue;
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
V1 = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V1, ExtOp,
DAG.getConstant(i, PtrVT));
}
return V1;
} }
SDOperand SDOperand
@@ -3544,18 +3727,31 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
} }
} }
// Handle v8i16 specifically since SSE can do byte extraction and insertion. // If the shuffle can be rewritten as a 4 wide shuffle, then do it!
if (VT == MVT::v8i16) if (VT == MVT::v8i16 || VT == MVT::v16i8) {
return LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this);
if (NewOp.Val)
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
}
if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { // Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
if (NewOp.Val)
return NewOp;
}
// Handle all 4 wide cases with a number of shuffles.
if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
// Don't do this for MMX. // Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType(); MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs; SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems); Locs.reserve(NumElems);
SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); SmallVector<SDOperand, 8> Mask1(NumElems,
SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand, 8> Mask2(NumElems,
DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0; unsigned NumHi = 0;
unsigned NumLo = 0; unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be // If no more than two elements come from either vector. This can be
@@ -3661,6 +3857,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType(); MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8. // TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) { if (MVT::getSizeInBits(VT) == 16) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result. // Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1); MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
@@ -3669,7 +3872,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
DAG.getValueType(VT)); DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert); return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) { } else if (MVT::getSizeInBits(VT) == 32) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0) if (Idx == 0)
return Op; return Op;
@@ -3686,12 +3888,12 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size()); &IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy())); DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) { } else if (MVT::getSizeInBits(VT) == 64) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0) if (Idx == 0)
return Op; return Op;
@@ -3706,6 +3908,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size()); &IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,

View File

@@ -1,37 +1,28 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 4
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 6
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 3
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 2
define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) {
%tmp1 = load <8 x i16>* %A %tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B %tmp2 = load <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
store <8 x i16> %tmp3, <8 x i16>* %res ret <8 x i16> %tmp3
ret void
} }
define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
%tmp1 = load <8 x i16>* %A %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
%tmp2 = load <8 x i16>* %B ret <8 x i16> %tmp
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 >
store <8 x i16> %tmp3, <8 x i16>* %res
ret void
} }
define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
%tmp1 = load <8 x i16>* %A %tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
%tmp2 = load <8 x i16>* %B ret <8 x i16> %tmp
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
store <8 x i16> %tmp3, <8 x i16>* %res
ret void
} }
define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) {
%tmp1 = load <8 x i16>* %A %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
%tmp2 = load <8 x i16>* %B ret <8 x i16> %tmp
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
store <8 x i16> %tmp3, <8 x i16>* %res
ret void
} }

View File

@@ -0,0 +1,21 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlhps | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 1
define <8 x i16> @t1(<8 x i16> %A, <8 x i16> %B) {
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
ret <8 x i16> %tmp
}
define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
}
define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
ret <8 x i16> %tmp
}