mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-03 02:31:26 +00:00
move insert, extract, concat helper functions closer to related helper functions; NFCI
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232781 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8154ef7589
commit
2326d50776
@ -76,162 +76,6 @@ static cl::opt<int> ReciprocalEstimateRefinementSteps(
|
||||
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
|
||||
SDValue V2);
|
||||
|
||||
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl,
|
||||
unsigned vectorWidth) {
|
||||
assert((vectorWidth == 128 || vectorWidth == 256) &&
|
||||
"Unsupported vector width");
|
||||
EVT VT = Vec.getValueType();
|
||||
EVT ElVT = VT.getVectorElementType();
|
||||
unsigned Factor = VT.getSizeInBits()/vectorWidth;
|
||||
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
|
||||
VT.getVectorNumElements()/Factor);
|
||||
|
||||
// Extract from UNDEF is UNDEF.
|
||||
if (Vec.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(ResultVT);
|
||||
|
||||
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
|
||||
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
|
||||
|
||||
// This is the index of the first element of the vectorWidth-bit chunk
|
||||
// we want.
|
||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
|
||||
* ElemsPerChunk);
|
||||
|
||||
// If the input is a buildvector just emit a smaller one.
|
||||
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
|
||||
makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
|
||||
ElemsPerChunk));
|
||||
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
|
||||
}
|
||||
|
||||
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
|
||||
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
|
||||
/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
|
||||
/// instructions or a simple subregister reference. Idx is an index in the
|
||||
/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
|
||||
/// lowering EXTRACT_VECTOR_ELT operations easier.
|
||||
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert((Vec.getValueType().is256BitVector() ||
|
||||
Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
|
||||
return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
|
||||
}
|
||||
|
||||
/// Generate a DAG to grab 256-bits from a 512-bit vector.
|
||||
static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
|
||||
return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
|
||||
}
|
||||
|
||||
static SDValue InsertSubVector(SDValue Result, SDValue Vec,
|
||||
unsigned IdxVal, SelectionDAG &DAG,
|
||||
SDLoc dl, unsigned vectorWidth) {
|
||||
assert((vectorWidth == 128 || vectorWidth == 256) &&
|
||||
"Unsupported vector width");
|
||||
// Inserting UNDEF is Result
|
||||
if (Vec.getOpcode() == ISD::UNDEF)
|
||||
return Result;
|
||||
EVT VT = Vec.getValueType();
|
||||
EVT ElVT = VT.getVectorElementType();
|
||||
EVT ResultVT = Result.getValueType();
|
||||
|
||||
// Insert the relevant vectorWidth bits.
|
||||
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
|
||||
|
||||
// This is the index of the first element of the vectorWidth-bit chunk
|
||||
// we want.
|
||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
|
||||
* ElemsPerChunk);
|
||||
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
|
||||
}
|
||||
|
||||
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
|
||||
/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
|
||||
/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
|
||||
/// simple superregister reference. Idx is an index in the 128 bits
|
||||
/// we want. It need not be aligned to a 128-bit boundary. That makes
|
||||
/// lowering INSERT_VECTOR_ELT operations easier.
|
||||
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
|
||||
|
||||
// For insertion into the zero index (low half) of a 256-bit vector, it is
|
||||
// more efficient to generate a blend with immediate instead of an insert*128.
|
||||
// We are still creating an INSERT_SUBVECTOR below with an undef node to
|
||||
// extend the subvector to the size of the result vector. Make sure that
|
||||
// we are not recursing on that node by checking for undef here.
|
||||
if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
|
||||
Result.getOpcode() != ISD::UNDEF) {
|
||||
EVT ResultVT = Result.getValueType();
|
||||
SDValue ZeroIndex = DAG.getIntPtrConstant(0);
|
||||
SDValue Undef = DAG.getUNDEF(ResultVT);
|
||||
SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
|
||||
Vec, ZeroIndex);
|
||||
|
||||
// The blend instruction, and therefore its mask, depend on the data type.
|
||||
MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
|
||||
if (ScalarType.isFloatingPoint()) {
|
||||
// Choose either vblendps (float) or vblendpd (double).
|
||||
unsigned ScalarSize = ScalarType.getSizeInBits();
|
||||
assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
|
||||
unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
|
||||
SDValue Mask = DAG.getConstant(MaskVal, MVT::i8);
|
||||
return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
|
||||
}
|
||||
|
||||
const X86Subtarget &Subtarget =
|
||||
static_cast<const X86Subtarget &>(DAG.getSubtarget());
|
||||
|
||||
// AVX2 is needed for 256-bit integer blend support.
|
||||
// Integers must be cast to 32-bit because there is only vpblendd;
|
||||
// vpblendw can't be used for this because it has a handicapped mask.
|
||||
|
||||
// If we don't have AVX2, then cast to float. Using a wrong domain blend
|
||||
// is still more efficient than using the wrong domain vinsertf128 that
|
||||
// will be created by InsertSubVector().
|
||||
MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
|
||||
|
||||
SDValue Mask = DAG.getConstant(0x0f, MVT::i8);
|
||||
Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
|
||||
Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
|
||||
return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
|
||||
}
|
||||
|
||||
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
|
||||
}
|
||||
|
||||
static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
|
||||
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
|
||||
}
|
||||
|
||||
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
|
||||
/// instructions. This is used because creating CONCAT_VECTOR nodes of
|
||||
/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
|
||||
/// large BUILD_VECTORS.
|
||||
static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
|
||||
unsigned NumElems, SelectionDAG &DAG,
|
||||
SDLoc dl) {
|
||||
SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
|
||||
return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
|
||||
}
|
||||
|
||||
static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
|
||||
unsigned NumElems, SelectionDAG &DAG,
|
||||
SDLoc dl) {
|
||||
SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
|
||||
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
|
||||
}
|
||||
|
||||
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
const X86Subtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
@ -4097,6 +3941,162 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
|
||||
}
|
||||
|
||||
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl,
|
||||
unsigned vectorWidth) {
|
||||
assert((vectorWidth == 128 || vectorWidth == 256) &&
|
||||
"Unsupported vector width");
|
||||
EVT VT = Vec.getValueType();
|
||||
EVT ElVT = VT.getVectorElementType();
|
||||
unsigned Factor = VT.getSizeInBits()/vectorWidth;
|
||||
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
|
||||
VT.getVectorNumElements()/Factor);
|
||||
|
||||
// Extract from UNDEF is UNDEF.
|
||||
if (Vec.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(ResultVT);
|
||||
|
||||
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
|
||||
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
|
||||
|
||||
// This is the index of the first element of the vectorWidth-bit chunk
|
||||
// we want.
|
||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
|
||||
* ElemsPerChunk);
|
||||
|
||||
// If the input is a buildvector just emit a smaller one.
|
||||
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
|
||||
makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
|
||||
ElemsPerChunk));
|
||||
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
|
||||
}
|
||||
|
||||
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
|
||||
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
|
||||
/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
|
||||
/// instructions or a simple subregister reference. Idx is an index in the
|
||||
/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
|
||||
/// lowering EXTRACT_VECTOR_ELT operations easier.
|
||||
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert((Vec.getValueType().is256BitVector() ||
|
||||
Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
|
||||
return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
|
||||
}
|
||||
|
||||
/// Generate a DAG to grab 256-bits from a 512-bit vector.
|
||||
static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
|
||||
return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
|
||||
}
|
||||
|
||||
static SDValue InsertSubVector(SDValue Result, SDValue Vec,
|
||||
unsigned IdxVal, SelectionDAG &DAG,
|
||||
SDLoc dl, unsigned vectorWidth) {
|
||||
assert((vectorWidth == 128 || vectorWidth == 256) &&
|
||||
"Unsupported vector width");
|
||||
// Inserting UNDEF is Result
|
||||
if (Vec.getOpcode() == ISD::UNDEF)
|
||||
return Result;
|
||||
EVT VT = Vec.getValueType();
|
||||
EVT ElVT = VT.getVectorElementType();
|
||||
EVT ResultVT = Result.getValueType();
|
||||
|
||||
// Insert the relevant vectorWidth bits.
|
||||
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
|
||||
|
||||
// This is the index of the first element of the vectorWidth-bit chunk
|
||||
// we want.
|
||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
|
||||
* ElemsPerChunk);
|
||||
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
|
||||
}
|
||||
|
||||
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
|
||||
/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
|
||||
/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
|
||||
/// simple superregister reference. Idx is an index in the 128 bits
|
||||
/// we want. It need not be aligned to a 128-bit boundary. That makes
|
||||
/// lowering INSERT_VECTOR_ELT operations easier.
|
||||
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
|
||||
|
||||
// For insertion into the zero index (low half) of a 256-bit vector, it is
|
||||
// more efficient to generate a blend with immediate instead of an insert*128.
|
||||
// We are still creating an INSERT_SUBVECTOR below with an undef node to
|
||||
// extend the subvector to the size of the result vector. Make sure that
|
||||
// we are not recursing on that node by checking for undef here.
|
||||
if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
|
||||
Result.getOpcode() != ISD::UNDEF) {
|
||||
EVT ResultVT = Result.getValueType();
|
||||
SDValue ZeroIndex = DAG.getIntPtrConstant(0);
|
||||
SDValue Undef = DAG.getUNDEF(ResultVT);
|
||||
SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
|
||||
Vec, ZeroIndex);
|
||||
|
||||
// The blend instruction, and therefore its mask, depend on the data type.
|
||||
MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
|
||||
if (ScalarType.isFloatingPoint()) {
|
||||
// Choose either vblendps (float) or vblendpd (double).
|
||||
unsigned ScalarSize = ScalarType.getSizeInBits();
|
||||
assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
|
||||
unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
|
||||
SDValue Mask = DAG.getConstant(MaskVal, MVT::i8);
|
||||
return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
|
||||
}
|
||||
|
||||
const X86Subtarget &Subtarget =
|
||||
static_cast<const X86Subtarget &>(DAG.getSubtarget());
|
||||
|
||||
// AVX2 is needed for 256-bit integer blend support.
|
||||
// Integers must be cast to 32-bit because there is only vpblendd;
|
||||
// vpblendw can't be used for this because it has a handicapped mask.
|
||||
|
||||
// If we don't have AVX2, then cast to float. Using a wrong domain blend
|
||||
// is still more efficient than using the wrong domain vinsertf128 that
|
||||
// will be created by InsertSubVector().
|
||||
MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
|
||||
|
||||
SDValue Mask = DAG.getConstant(0x0f, MVT::i8);
|
||||
Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
|
||||
Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
|
||||
return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
|
||||
}
|
||||
|
||||
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
|
||||
}
|
||||
|
||||
static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
|
||||
SelectionDAG &DAG, SDLoc dl) {
|
||||
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
|
||||
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
|
||||
}
|
||||
|
||||
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
|
||||
/// instructions. This is used because creating CONCAT_VECTOR nodes of
|
||||
/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
|
||||
/// large BUILD_VECTORS.
|
||||
static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
|
||||
unsigned NumElems, SelectionDAG &DAG,
|
||||
SDLoc dl) {
|
||||
SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
|
||||
return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
|
||||
}
|
||||
|
||||
static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
|
||||
unsigned NumElems, SelectionDAG &DAG,
|
||||
SDLoc dl) {
|
||||
SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
|
||||
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
|
||||
}
|
||||
|
||||
/// getOnesVector - Returns a vector of specified type with all bits set.
|
||||
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
|
||||
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
|
||||
|
Loading…
x
Reference in New Issue
Block a user