mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-04 22:07:27 +00:00
Added INSERT and EXTRACT intructions from AVX-512 ISA.
All insertf*/extractf* functions replaced with insert/extract since we have insertf and inserti forms. Added lowering for INSERT_VECTOR_ELT / EXTRACT_VECTOR_ELT for 512-bit vectors. Added lowering for EXTRACT/INSERT subvector for 512-bit vectors. Added a test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187491 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f3068d02e5
commit
8395251c0a
@ -58,17 +58,14 @@ STATISTIC(NumTailCalls, "Number of tail calls");
|
|||||||
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
|
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
|
||||||
SDValue V2);
|
SDValue V2);
|
||||||
|
|
||||||
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
|
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
|
||||||
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
|
SelectionDAG &DAG, SDLoc dl,
|
||||||
/// simple subregister reference. Idx is an index in the 128 bits we
|
unsigned vectorWidth) {
|
||||||
/// want. It need not be aligned to a 128-bit bounday. That makes
|
assert((vectorWidth == 128 || vectorWidth == 256) &&
|
||||||
/// lowering EXTRACT_VECTOR_ELT operations easier.
|
"Unsupported vector width");
|
||||||
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
|
|
||||||
SelectionDAG &DAG, SDLoc dl) {
|
|
||||||
EVT VT = Vec.getValueType();
|
EVT VT = Vec.getValueType();
|
||||||
assert(VT.is256BitVector() && "Unexpected vector size!");
|
|
||||||
EVT ElVT = VT.getVectorElementType();
|
EVT ElVT = VT.getVectorElementType();
|
||||||
unsigned Factor = VT.getSizeInBits()/128;
|
unsigned Factor = VT.getSizeInBits()/vectorWidth;
|
||||||
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
|
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
|
||||||
VT.getVectorNumElements()/Factor);
|
VT.getVectorNumElements()/Factor);
|
||||||
|
|
||||||
@ -76,13 +73,12 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
|
|||||||
if (Vec.getOpcode() == ISD::UNDEF)
|
if (Vec.getOpcode() == ISD::UNDEF)
|
||||||
return DAG.getUNDEF(ResultVT);
|
return DAG.getUNDEF(ResultVT);
|
||||||
|
|
||||||
// Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
|
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
|
||||||
// we can match to VEXTRACTF128.
|
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
|
||||||
unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
|
|
||||||
|
|
||||||
// This is the index of the first element of the 128-bit chunk
|
// This is the index of the first element of the vectorWidth-bit chunk
|
||||||
// we want.
|
// we want.
|
||||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
|
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
|
||||||
* ElemsPerChunk);
|
* ElemsPerChunk);
|
||||||
|
|
||||||
// If the input is a buildvector just emit a smaller one.
|
// If the input is a buildvector just emit a smaller one.
|
||||||
@ -95,37 +91,69 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
|
|||||||
VecIdx);
|
VecIdx);
|
||||||
|
|
||||||
return Result;
|
return Result;
|
||||||
|
|
||||||
|
}
|
||||||
|
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
|
||||||
|
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
|
||||||
|
/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
|
||||||
|
/// instructions or a simple subregister reference. Idx is an index in the
|
||||||
|
/// 128 bits we want. It need not be aligned to a 128-bit bounday. That makes
|
||||||
|
/// lowering EXTRACT_VECTOR_ELT operations easier.
|
||||||
|
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
|
||||||
|
SelectionDAG &DAG, SDLoc dl) {
|
||||||
|
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
|
||||||
|
return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a DAG to grab 256-bits from a 512-bit vector.
|
||||||
|
static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
|
||||||
|
SelectionDAG &DAG, SDLoc dl) {
|
||||||
|
assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
|
||||||
|
return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SDValue InsertSubVector(SDValue Result, SDValue Vec,
|
||||||
|
unsigned IdxVal, SelectionDAG &DAG,
|
||||||
|
SDLoc dl, unsigned vectorWidth) {
|
||||||
|
assert((vectorWidth == 128 || vectorWidth == 256) &&
|
||||||
|
"Unsupported vector width");
|
||||||
|
// Inserting UNDEF is Result
|
||||||
|
if (Vec.getOpcode() == ISD::UNDEF)
|
||||||
|
return Result;
|
||||||
|
EVT VT = Vec.getValueType();
|
||||||
|
EVT ElVT = VT.getVectorElementType();
|
||||||
|
EVT ResultVT = Result.getValueType();
|
||||||
|
|
||||||
|
// Insert the relevant vectorWidth bits.
|
||||||
|
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
|
||||||
|
|
||||||
|
// This is the index of the first element of the vectorWidth-bit chunk
|
||||||
|
// we want.
|
||||||
|
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
|
||||||
|
* ElemsPerChunk);
|
||||||
|
|
||||||
|
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
|
||||||
|
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
|
||||||
|
VecIdx);
|
||||||
|
}
|
||||||
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
|
/// Generate a DAG to put 128-bits into a vector > 128 bits. This
|
||||||
/// sets things up to match to an AVX VINSERTF128 instruction or a
|
/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
|
||||||
|
/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
|
||||||
/// simple superregister reference. Idx is an index in the 128 bits
|
/// simple superregister reference. Idx is an index in the 128 bits
|
||||||
/// we want. It need not be aligned to a 128-bit bounday. That makes
|
/// we want. It need not be aligned to a 128-bit bounday. That makes
|
||||||
/// lowering INSERT_VECTOR_ELT operations easier.
|
/// lowering INSERT_VECTOR_ELT operations easier.
|
||||||
static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
|
static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
|
||||||
unsigned IdxVal, SelectionDAG &DAG,
|
unsigned IdxVal, SelectionDAG &DAG,
|
||||||
SDLoc dl) {
|
SDLoc dl) {
|
||||||
// Inserting UNDEF is Result
|
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
|
||||||
if (Vec.getOpcode() == ISD::UNDEF)
|
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
|
||||||
return Result;
|
}
|
||||||
|
|
||||||
EVT VT = Vec.getValueType();
|
static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
|
||||||
assert(VT.is128BitVector() && "Unexpected vector size!");
|
unsigned IdxVal, SelectionDAG &DAG,
|
||||||
|
SDLoc dl) {
|
||||||
EVT ElVT = VT.getVectorElementType();
|
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
|
||||||
EVT ResultVT = Result.getValueType();
|
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
|
||||||
|
|
||||||
// Insert the relevant 128 bits.
|
|
||||||
unsigned ElemsPerChunk = 128/ElVT.getSizeInBits();
|
|
||||||
|
|
||||||
// This is the index of the first element of the 128-bit chunk
|
|
||||||
// we want.
|
|
||||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128)
|
|
||||||
* ElemsPerChunk);
|
|
||||||
|
|
||||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
|
|
||||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
|
|
||||||
VecIdx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
|
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
|
||||||
@ -139,6 +167,13 @@ static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
|
|||||||
return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
|
return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
|
||||||
|
unsigned NumElems, SelectionDAG &DAG,
|
||||||
|
SDLoc dl) {
|
||||||
|
SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
|
||||||
|
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
|
||||||
|
}
|
||||||
|
|
||||||
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
|
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
|
||||||
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
|
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||||
bool is64Bit = Subtarget->is64Bit();
|
bool is64Bit = Subtarget->is64Bit();
|
||||||
@ -1261,6 +1296,147 @@ void X86TargetLowering::resetOperationActions() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
|
||||||
|
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
|
||||||
|
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
|
||||||
|
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
|
||||||
|
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
|
||||||
|
|
||||||
|
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
|
||||||
|
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
|
||||||
|
|
||||||
|
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
|
||||||
|
setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FADD, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
|
||||||
|
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
|
||||||
|
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
|
||||||
|
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
|
||||||
|
|
||||||
|
|
||||||
|
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
||||||
|
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
||||||
|
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
|
||||||
|
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
|
||||||
|
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
|
||||||
|
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::TRUNCATE, MVT::i1, Legal);
|
||||||
|
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
|
||||||
|
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
|
||||||
|
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
|
||||||
|
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
|
||||||
|
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
|
||||||
|
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
|
||||||
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
|
||||||
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
|
||||||
|
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
|
||||||
|
setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
|
||||||
|
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
|
||||||
|
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
|
||||||
|
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::ADD, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::ADD, MVT::v16i32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SUB, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::SUB, MVT::v16i32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SRL, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::SRL, MVT::v16i32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SHL, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::SHL, MVT::v16i32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SRA, MVT::v8i64, Custom);
|
||||||
|
setOperationAction(ISD::SRA, MVT::v16i32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::AND, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::OR, MVT::v8i64, Legal);
|
||||||
|
setOperationAction(ISD::XOR, MVT::v8i64, Legal);
|
||||||
|
|
||||||
|
// Custom lower several nodes.
|
||||||
|
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
|
||||||
|
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
|
||||||
|
MVT VT = (MVT::SimpleValueType)i;
|
||||||
|
|
||||||
|
// Extract subvector is special because the value type
|
||||||
|
// (result) is 256/128-bit but the source is 512-bit wide.
|
||||||
|
if (VT.is128BitVector() || VT.is256BitVector())
|
||||||
|
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||||
|
|
||||||
|
if (VT.getVectorElementType() == MVT::i1)
|
||||||
|
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
|
||||||
|
|
||||||
|
// Do not attempt to custom lower other non-512-bit vectors
|
||||||
|
if (!VT.is512BitVector())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (VT != MVT::v8i64) {
|
||||||
|
setOperationAction(ISD::XOR, VT, Promote);
|
||||||
|
AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
|
||||||
|
setOperationAction(ISD::OR, VT, Promote);
|
||||||
|
AddPromotedToType (ISD::OR, VT, MVT::v8i64);
|
||||||
|
setOperationAction(ISD::AND, VT, Promote);
|
||||||
|
AddPromotedToType (ISD::AND, VT, MVT::v8i64);
|
||||||
|
}
|
||||||
|
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
|
||||||
|
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
|
||||||
|
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
|
||||||
|
setOperationAction(ISD::VSELECT, VT, Legal);
|
||||||
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||||
|
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
|
||||||
|
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||||
|
}
|
||||||
|
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
|
||||||
|
MVT VT = (MVT::SimpleValueType)i;
|
||||||
|
|
||||||
|
// Do not attempt to promote non-256-bit vectors
|
||||||
|
if (!VT.is512BitVector())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
setOperationAction(ISD::LOAD, VT, Promote);
|
||||||
|
AddPromotedToType (ISD::LOAD, VT, MVT::v8i64);
|
||||||
|
setOperationAction(ISD::SELECT, VT, Promote);
|
||||||
|
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
|
||||||
|
}
|
||||||
|
}// has AVX-512
|
||||||
|
|
||||||
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
|
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
|
||||||
// of this type with custom code.
|
// of this type with custom code.
|
||||||
for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
|
for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
|
||||||
@ -2007,12 +2183,18 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||||||
RC = &X86::FR32RegClass;
|
RC = &X86::FR32RegClass;
|
||||||
else if (RegVT == MVT::f64)
|
else if (RegVT == MVT::f64)
|
||||||
RC = &X86::FR64RegClass;
|
RC = &X86::FR64RegClass;
|
||||||
|
else if (RegVT.is512BitVector())
|
||||||
|
RC = &X86::VR512RegClass;
|
||||||
else if (RegVT.is256BitVector())
|
else if (RegVT.is256BitVector())
|
||||||
RC = &X86::VR256RegClass;
|
RC = &X86::VR256RegClass;
|
||||||
else if (RegVT.is128BitVector())
|
else if (RegVT.is128BitVector())
|
||||||
RC = &X86::VR128RegClass;
|
RC = &X86::VR128RegClass;
|
||||||
else if (RegVT == MVT::x86mmx)
|
else if (RegVT == MVT::x86mmx)
|
||||||
RC = &X86::VR64RegClass;
|
RC = &X86::VR64RegClass;
|
||||||
|
else if (RegVT == MVT::v8i1)
|
||||||
|
RC = &X86::VK8RegClass;
|
||||||
|
else if (RegVT == MVT::v16i1)
|
||||||
|
RC = &X86::VK16RegClass;
|
||||||
else
|
else
|
||||||
llvm_unreachable("Unknown argument type!");
|
llvm_unreachable("Unknown argument type!");
|
||||||
|
|
||||||
@ -4053,42 +4235,59 @@ static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isVEXTRACTF128Index - Return true if the specified
|
/// isVEXTRACTIndex - Return true if the specified
|
||||||
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
|
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
|
||||||
/// suitable for input to VEXTRACTF128.
|
/// suitable for instruction that extract 128 or 256 bit vectors
|
||||||
bool X86::isVEXTRACTF128Index(SDNode *N) {
|
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
|
||||||
|
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
|
||||||
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
|
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// The index should be aligned on a 128-bit boundary.
|
// The index should be aligned on a vecWidth-bit boundary.
|
||||||
uint64_t Index =
|
uint64_t Index =
|
||||||
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
|
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
|
||||||
|
|
||||||
MVT VT = N->getValueType(0).getSimpleVT();
|
MVT VT = N->getValueType(0).getSimpleVT();
|
||||||
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
|
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
|
||||||
bool Result = (Index * ElSize) % 128 == 0;
|
bool Result = (Index * ElSize) % vecWidth == 0;
|
||||||
|
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
|
/// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
|
||||||
/// operand specifies a subvector insert that is suitable for input to
|
/// operand specifies a subvector insert that is suitable for input to
|
||||||
/// VINSERTF128.
|
/// insertion of 128 or 256-bit subvectors
|
||||||
bool X86::isVINSERTF128Index(SDNode *N) {
|
static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
|
||||||
|
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
|
||||||
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
|
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
|
||||||
return false;
|
return false;
|
||||||
|
// The index should be aligned on a vecWidth-bit boundary.
|
||||||
// The index should be aligned on a 128-bit boundary.
|
|
||||||
uint64_t Index =
|
uint64_t Index =
|
||||||
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
|
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
|
||||||
|
|
||||||
MVT VT = N->getValueType(0).getSimpleVT();
|
MVT VT = N->getValueType(0).getSimpleVT();
|
||||||
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
|
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
|
||||||
bool Result = (Index * ElSize) % 128 == 0;
|
bool Result = (Index * ElSize) % vecWidth == 0;
|
||||||
|
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool X86::isVINSERT128Index(SDNode *N) {
|
||||||
|
return isVINSERTIndex(N, 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool X86::isVINSERT256Index(SDNode *N) {
|
||||||
|
return isVINSERTIndex(N, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool X86::isVEXTRACT128Index(SDNode *N) {
|
||||||
|
return isVEXTRACTIndex(N, 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool X86::isVEXTRACT256Index(SDNode *N) {
|
||||||
|
return isVEXTRACTIndex(N, 256);
|
||||||
|
}
|
||||||
|
|
||||||
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
||||||
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
|
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
|
||||||
/// Handles 128-bit and 256-bit.
|
/// Handles 128-bit and 256-bit.
|
||||||
@ -4192,12 +4391,10 @@ static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
|
|||||||
return (Val - i) * EltSize;
|
return (Val - i) * EltSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
|
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
|
||||||
/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
|
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
|
||||||
/// instructions.
|
|
||||||
unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
|
|
||||||
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
|
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
|
||||||
llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
|
llvm_unreachable("Illegal extract subvector for VEXTRACT");
|
||||||
|
|
||||||
uint64_t Index =
|
uint64_t Index =
|
||||||
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
|
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
|
||||||
@ -4205,16 +4402,14 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
|
|||||||
MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
|
MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
|
||||||
MVT ElVT = VecVT.getVectorElementType();
|
MVT ElVT = VecVT.getVectorElementType();
|
||||||
|
|
||||||
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
|
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
|
||||||
return Index / NumElemsPerChunk;
|
return Index / NumElemsPerChunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getInsertVINSERTF128Immediate - Return the appropriate immediate
|
static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
|
||||||
/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
|
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
|
||||||
/// instructions.
|
|
||||||
unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
|
|
||||||
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
|
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
|
||||||
llvm_unreachable("Illegal insert subvector for VINSERTF128");
|
llvm_unreachable("Illegal insert subvector for VINSERT");
|
||||||
|
|
||||||
uint64_t Index =
|
uint64_t Index =
|
||||||
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
|
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
|
||||||
@ -4222,10 +4417,38 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
|
|||||||
MVT VecVT = N->getValueType(0).getSimpleVT();
|
MVT VecVT = N->getValueType(0).getSimpleVT();
|
||||||
MVT ElVT = VecVT.getVectorElementType();
|
MVT ElVT = VecVT.getVectorElementType();
|
||||||
|
|
||||||
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
|
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
|
||||||
return Index / NumElemsPerChunk;
|
return Index / NumElemsPerChunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// getExtractVEXTRACT128Immediate - Return the appropriate immediate
|
||||||
|
/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
|
||||||
|
/// and VINSERTI128 instructions.
|
||||||
|
unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
|
||||||
|
return getExtractVEXTRACTImmediate(N, 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// getExtractVEXTRACT256Immediate - Return the appropriate immediate
|
||||||
|
/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
|
||||||
|
/// and VINSERTI64x4 instructions.
|
||||||
|
unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
|
||||||
|
return getExtractVEXTRACTImmediate(N, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// getInsertVINSERT128Immediate - Return the appropriate immediate
|
||||||
|
/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
|
||||||
|
/// and VINSERTI128 instructions.
|
||||||
|
unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
|
||||||
|
return getInsertVINSERTImmediate(N, 128);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// getInsertVINSERT256Immediate - Return the appropriate immediate
|
||||||
|
/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
|
||||||
|
/// and VINSERTI64x4 instructions.
|
||||||
|
unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
|
||||||
|
return getInsertVINSERTImmediate(N, 256);
|
||||||
|
}
|
||||||
|
|
||||||
/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
|
/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
|
||||||
/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
|
/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
|
||||||
/// Handles 256-bit.
|
/// Handles 256-bit.
|
||||||
@ -5715,19 +5938,22 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
|||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
MVT ResVT = Op.getValueType().getSimpleVT();
|
MVT ResVT = Op.getValueType().getSimpleVT();
|
||||||
|
|
||||||
assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
|
assert((ResVT.is256BitVector() ||
|
||||||
|
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
|
||||||
|
|
||||||
SDValue V1 = Op.getOperand(0);
|
SDValue V1 = Op.getOperand(0);
|
||||||
SDValue V2 = Op.getOperand(1);
|
SDValue V2 = Op.getOperand(1);
|
||||||
unsigned NumElems = ResVT.getVectorNumElements();
|
unsigned NumElems = ResVT.getVectorNumElements();
|
||||||
|
if(ResVT.is256BitVector())
|
||||||
|
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
|
||||||
|
|
||||||
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
|
return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
||||||
assert(Op.getNumOperands() == 2);
|
assert(Op.getNumOperands() == 2);
|
||||||
|
|
||||||
// 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
|
// AVX/AVX-512 can use the vinsertf128 instruction to create 256-bit vectors
|
||||||
// from two other 128-bit ones.
|
// from two other 128-bit ones.
|
||||||
return LowerAVXCONCAT_VECTORS(Op, DAG);
|
return LowerAVXCONCAT_VECTORS(Op, DAG);
|
||||||
}
|
}
|
||||||
@ -7197,6 +7423,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
|
|||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
|
SDLoc dl(Op);
|
||||||
if (!isa<ConstantSDNode>(Op.getOperand(1)))
|
if (!isa<ConstantSDNode>(Op.getOperand(1)))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
@ -7205,17 +7432,19 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
|||||||
|
|
||||||
// If this is a 256-bit vector result, first extract the 128-bit vector and
|
// If this is a 256-bit vector result, first extract the 128-bit vector and
|
||||||
// then extract the element from the 128-bit vector.
|
// then extract the element from the 128-bit vector.
|
||||||
if (VecVT.is256BitVector()) {
|
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
|
||||||
SDLoc dl(Op.getNode());
|
|
||||||
unsigned NumElems = VecVT.getVectorNumElements();
|
|
||||||
SDValue Idx = Op.getOperand(1);
|
SDValue Idx = Op.getOperand(1);
|
||||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||||
|
|
||||||
// Get the 128-bit vector.
|
// Get the 128-bit vector.
|
||||||
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
|
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
|
||||||
|
EVT EltVT = VecVT.getVectorElementType();
|
||||||
|
|
||||||
if (IdxVal >= NumElems/2)
|
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
|
||||||
IdxVal -= NumElems/2;
|
|
||||||
|
//if (IdxVal >= NumElems/2)
|
||||||
|
// IdxVal -= NumElems/2;
|
||||||
|
IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
|
||||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
|
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
|
||||||
DAG.getConstant(IdxVal, MVT::i32));
|
DAG.getConstant(IdxVal, MVT::i32));
|
||||||
}
|
}
|
||||||
@ -7229,7 +7458,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
|||||||
}
|
}
|
||||||
|
|
||||||
MVT VT = Op.getValueType().getSimpleVT();
|
MVT VT = Op.getValueType().getSimpleVT();
|
||||||
SDLoc dl(Op);
|
|
||||||
// TODO: handle v16i8.
|
// TODO: handle v16i8.
|
||||||
if (VT.getSizeInBits() == 16) {
|
if (VT.getSizeInBits() == 16) {
|
||||||
SDValue Vec = Op.getOperand(0);
|
SDValue Vec = Op.getOperand(0);
|
||||||
@ -7350,19 +7578,20 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
|
|
||||||
// If this is a 256-bit vector result, first extract the 128-bit vector,
|
// If this is a 256-bit vector result, first extract the 128-bit vector,
|
||||||
// insert the element into the extracted half and then place it back.
|
// insert the element into the extracted half and then place it back.
|
||||||
if (VT.is256BitVector()) {
|
if (VT.is256BitVector() || VT.is512BitVector()) {
|
||||||
if (!isa<ConstantSDNode>(N2))
|
if (!isa<ConstantSDNode>(N2))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Get the desired 128-bit vector half.
|
// Get the desired 128-bit vector half.
|
||||||
unsigned NumElems = VT.getVectorNumElements();
|
|
||||||
unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
|
unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
|
||||||
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
|
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
|
||||||
|
|
||||||
// Insert the element into the desired half.
|
// Insert the element into the desired half.
|
||||||
bool Upper = IdxVal >= NumElems/2;
|
unsigned NumEltsIn128 = 128/EltVT.getSizeInBits();
|
||||||
|
unsigned IdxIn128 = IdxVal - (IdxVal/NumEltsIn128) * NumEltsIn128;
|
||||||
|
|
||||||
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
|
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
|
||||||
DAG.getConstant(Upper ? IdxVal-NumElems/2 : IdxVal, MVT::i32));
|
DAG.getConstant(IdxIn128, MVT::i32));
|
||||||
|
|
||||||
// Insert the changed part back to the 256-bit vector
|
// Insert the changed part back to the 256-bit vector
|
||||||
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
|
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
|
||||||
@ -7395,9 +7624,10 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
|||||||
// vector and then insert into the 256-bit vector.
|
// vector and then insert into the 256-bit vector.
|
||||||
if (!OpVT.is128BitVector()) {
|
if (!OpVT.is128BitVector()) {
|
||||||
// Insert into a 128-bit vector.
|
// Insert into a 128-bit vector.
|
||||||
|
unsigned SizeFactor = OpVT.getSizeInBits()/128;
|
||||||
EVT VT128 = EVT::getVectorVT(*Context,
|
EVT VT128 = EVT::getVectorVT(*Context,
|
||||||
OpVT.getVectorElementType(),
|
OpVT.getVectorElementType(),
|
||||||
OpVT.getVectorNumElements() / 2);
|
OpVT.getVectorNumElements() / SizeFactor);
|
||||||
|
|
||||||
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
|
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
|
||||||
|
|
||||||
@ -7420,16 +7650,22 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
|||||||
// upper bits of a vector.
|
// upper bits of a vector.
|
||||||
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
|
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
if (Subtarget->hasFp256()) {
|
SDLoc dl(Op);
|
||||||
SDLoc dl(Op.getNode());
|
SDValue In = Op.getOperand(0);
|
||||||
SDValue Vec = Op.getNode()->getOperand(0);
|
SDValue Idx = Op.getOperand(1);
|
||||||
SDValue Idx = Op.getNode()->getOperand(1);
|
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||||
|
EVT ResVT = Op.getValueType();
|
||||||
|
EVT InVT = In.getValueType();
|
||||||
|
|
||||||
if (Op.getNode()->getValueType(0).is128BitVector() &&
|
if (Subtarget->hasFp256()) {
|
||||||
Vec.getNode()->getValueType(0).is256BitVector() &&
|
if (ResVT.is128BitVector() &&
|
||||||
|
(InVT.is256BitVector() || InVT.is512BitVector()) &&
|
||||||
isa<ConstantSDNode>(Idx)) {
|
isa<ConstantSDNode>(Idx)) {
|
||||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
return Extract128BitVector(In, IdxVal, DAG, dl);
|
||||||
return Extract128BitVector(Vec, IdxVal, DAG, dl);
|
}
|
||||||
|
if (ResVT.is256BitVector() && InVT.is512BitVector() &&
|
||||||
|
isa<ConstantSDNode>(Idx)) {
|
||||||
|
return Extract256BitVector(In, IdxVal, DAG, dl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@ -7446,12 +7682,20 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
|
|||||||
SDValue SubVec = Op.getNode()->getOperand(1);
|
SDValue SubVec = Op.getNode()->getOperand(1);
|
||||||
SDValue Idx = Op.getNode()->getOperand(2);
|
SDValue Idx = Op.getNode()->getOperand(2);
|
||||||
|
|
||||||
if (Op.getNode()->getValueType(0).is256BitVector() &&
|
if ((Op.getNode()->getValueType(0).is256BitVector() ||
|
||||||
|
Op.getNode()->getValueType(0).is512BitVector()) &&
|
||||||
SubVec.getNode()->getValueType(0).is128BitVector() &&
|
SubVec.getNode()->getValueType(0).is128BitVector() &&
|
||||||
isa<ConstantSDNode>(Idx)) {
|
isa<ConstantSDNode>(Idx)) {
|
||||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||||
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
|
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Op.getNode()->getValueType(0).is512BitVector() &&
|
||||||
|
SubVec.getNode()->getValueType(0).is256BitVector() &&
|
||||||
|
isa<ConstantSDNode>(Idx)) {
|
||||||
|
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||||
|
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
@ -434,25 +434,45 @@ namespace llvm {
|
|||||||
|
|
||||||
/// Define some predicates that are used for node matching.
|
/// Define some predicates that are used for node matching.
|
||||||
namespace X86 {
|
namespace X86 {
|
||||||
/// isVEXTRACTF128Index - Return true if the specified
|
/// isVEXTRACT128Index - Return true if the specified
|
||||||
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
|
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
|
||||||
/// suitable for input to VEXTRACTF128.
|
/// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions.
|
||||||
bool isVEXTRACTF128Index(SDNode *N);
|
bool isVEXTRACT128Index(SDNode *N);
|
||||||
|
|
||||||
/// isVINSERTF128Index - Return true if the specified
|
/// isVINSERT128Index - Return true if the specified
|
||||||
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
|
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
|
||||||
/// suitable for input to VINSERTF128.
|
/// suitable for input to VINSERTF128, VINSERTI128 instructions.
|
||||||
bool isVINSERTF128Index(SDNode *N);
|
bool isVINSERT128Index(SDNode *N);
|
||||||
|
|
||||||
/// getExtractVEXTRACTF128Immediate - Return the appropriate
|
/// isVEXTRACT256Index - Return true if the specified
|
||||||
|
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
|
||||||
|
/// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions.
|
||||||
|
bool isVEXTRACT256Index(SDNode *N);
|
||||||
|
|
||||||
|
/// isVINSERT256Index - Return true if the specified
|
||||||
|
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
|
||||||
|
/// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions.
|
||||||
|
bool isVINSERT256Index(SDNode *N);
|
||||||
|
|
||||||
|
/// getExtractVEXTRACT128Immediate - Return the appropriate
|
||||||
/// immediate to extract the specified EXTRACT_SUBVECTOR index
|
/// immediate to extract the specified EXTRACT_SUBVECTOR index
|
||||||
/// with VEXTRACTF128 instructions.
|
/// with VEXTRACTF128, VEXTRACTI128 instructions.
|
||||||
unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
|
unsigned getExtractVEXTRACT128Immediate(SDNode *N);
|
||||||
|
|
||||||
/// getInsertVINSERTF128Immediate - Return the appropriate
|
/// getInsertVINSERT128Immediate - Return the appropriate
|
||||||
/// immediate to insert at the specified INSERT_SUBVECTOR index
|
/// immediate to insert at the specified INSERT_SUBVECTOR index
|
||||||
/// with VINSERTF128 instructions.
|
/// with VINSERTF128, VINSERT128 instructions.
|
||||||
unsigned getInsertVINSERTF128Immediate(SDNode *N);
|
unsigned getInsertVINSERT128Immediate(SDNode *N);
|
||||||
|
|
||||||
|
/// getExtractVEXTRACT256Immediate - Return the appropriate
|
||||||
|
/// immediate to extract the specified EXTRACT_SUBVECTOR index
|
||||||
|
/// with VEXTRACTF64X4, VEXTRACTI64x4 instructions.
|
||||||
|
unsigned getExtractVEXTRACT256Immediate(SDNode *N);
|
||||||
|
|
||||||
|
/// getInsertVINSERT256Immediate - Return the appropriate
|
||||||
|
/// immediate to insert at the specified INSERT_SUBVECTOR index
|
||||||
|
/// with VINSERTF64x4, VINSERTI64x4 instructions.
|
||||||
|
unsigned getInsertVINSERT256Immediate(SDNode *N);
|
||||||
|
|
||||||
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
|
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
|
||||||
/// constant +0.0.
|
/// constant +0.0.
|
||||||
|
339
lib/Target/X86/X86InstrAVX512.td
Normal file
339
lib/Target/X86/X86InstrAVX512.td
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
// Bitcasts between 512-bit vector types. Return the original type since
|
||||||
|
// no instruction is needed for the conversion
|
||||||
|
let Predicates = [HasAVX512] in {
|
||||||
|
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||||
|
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||||
|
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
|
||||||
|
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
|
||||||
|
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
|
||||||
|
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
|
||||||
|
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||||
|
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||||
|
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
|
||||||
|
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
|
||||||
|
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||||
|
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||||
|
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
|
||||||
|
def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
|
||||||
|
def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
|
||||||
|
|
||||||
|
// Bitcasts between 256-bit vector types. Return the original type since
|
||||||
|
// no instruction is needed for the conversion
|
||||||
|
def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
|
||||||
|
def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
|
||||||
|
def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
|
||||||
|
def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
|
||||||
|
def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
|
||||||
|
def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
|
||||||
|
def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
|
||||||
|
def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
|
||||||
|
def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
|
||||||
|
def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
|
||||||
|
def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
|
||||||
|
def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
|
||||||
|
def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
|
||||||
|
def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
|
||||||
|
def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
|
||||||
|
def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AVX-512 - VECTOR INSERT
|
||||||
|
//
|
||||||
|
// -- 32x8 form --
|
||||||
|
let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
|
||||||
|
def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, VR128X:$src2, i8imm:$src3),
|
||||||
|
"vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, f128mem:$src2, i8imm:$src3),
|
||||||
|
"vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- 64x4 fp form --
|
||||||
|
let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in {
|
||||||
|
def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, VR256X:$src2, i8imm:$src3),
|
||||||
|
"vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512, VEX_W;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, i256mem:$src2, i8imm:$src3),
|
||||||
|
"vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
|
||||||
|
}
|
||||||
|
// -- 32x4 integer form --
|
||||||
|
let neverHasSideEffects = 1 in {
|
||||||
|
def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, VR128X:$src2, i8imm:$src3),
|
||||||
|
"vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, i128mem:$src2, i8imm:$src3),
|
||||||
|
"vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
let neverHasSideEffects = 1 in {
|
||||||
|
// -- 64x4 form --
|
||||||
|
def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, VR256X:$src2, i8imm:$src3),
|
||||||
|
"vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512, VEX_W;
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
|
||||||
|
(ins VR512:$src1, i256mem:$src2, i8imm:$src3),
|
||||||
|
"vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
|
[]>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
|
||||||
|
}
|
||||||
|
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
|
||||||
|
(bc_v4i32 (loadv2i64 addr:$src2)),
|
||||||
|
(iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
|
||||||
|
(iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert128_imm VR512:$ins))>;
|
||||||
|
|
||||||
|
def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
|
||||||
|
(iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
|
||||||
|
def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
|
||||||
|
(iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
|
||||||
|
(iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
|
||||||
|
(bc_v8i32 (loadv4i64 addr:$src2)),
|
||||||
|
(iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
|
||||||
|
(INSERT_get_vinsert256_imm VR512:$ins))>;
|
||||||
|
|
||||||
|
// vinsertps - insert f32 to XMM
|
||||||
|
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
|
||||||
|
(ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
|
||||||
|
!strconcat("vinsertps{z}",
|
||||||
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
|
[(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
|
||||||
|
EVEX_4V;
|
||||||
|
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
|
||||||
|
(ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
|
||||||
|
!strconcat("vinsertps{z}",
|
||||||
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
|
[(set VR128X:$dst, (X86insrtps VR128X:$src1,
|
||||||
|
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
||||||
|
imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||||
|
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AVX-512 VECTOR EXTRACT
|
||||||
|
//---
|
||||||
|
let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
|
||||||
|
// -- 32x4 form --
|
||||||
|
def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
|
||||||
|
(ins VR512:$src1, i8imm:$src2),
|
||||||
|
"vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512;
|
||||||
|
def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
|
||||||
|
(ins f128mem:$dst, VR512:$src1, i8imm:$src2),
|
||||||
|
"vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
|
||||||
|
|
||||||
|
// -- 64x4 form --
|
||||||
|
def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
|
||||||
|
(ins VR512:$src1, i8imm:$src2),
|
||||||
|
"vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512, VEX_W;
|
||||||
|
let mayStore = 1 in
|
||||||
|
def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
|
||||||
|
(ins f256mem:$dst, VR512:$src1, i8imm:$src2),
|
||||||
|
"vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let neverHasSideEffects = 1 in {
|
||||||
|
// -- 32x4 form --
|
||||||
|
def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
|
||||||
|
(ins VR512:$src1, i8imm:$src2),
|
||||||
|
"vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512;
|
||||||
|
def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
|
||||||
|
(ins i128mem:$dst, VR512:$src1, i8imm:$src2),
|
||||||
|
"vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
|
||||||
|
|
||||||
|
// -- 64x4 form --
|
||||||
|
def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
|
||||||
|
(ins VR512:$src1, i8imm:$src2),
|
||||||
|
"vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512, VEX_W;
|
||||||
|
let mayStore = 1 in
|
||||||
|
def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
|
||||||
|
(ins i256mem:$dst, VR512:$src1, i8imm:$src2),
|
||||||
|
"vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[]>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
|
||||||
|
}
|
||||||
|
|
||||||
|
def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
|
||||||
|
(v4f32 (VEXTRACTF32x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract128_imm VR128X:$ext)))>;
|
||||||
|
|
||||||
|
def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
|
||||||
|
(v4i32 (VEXTRACTF32x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract128_imm VR128X:$ext)))>;
|
||||||
|
|
||||||
|
def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
|
||||||
|
(v2f64 (VEXTRACTF32x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract128_imm VR128X:$ext)))>;
|
||||||
|
|
||||||
|
def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
|
||||||
|
(v2i64 (VEXTRACTI32x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract128_imm VR128X:$ext)))>;
|
||||||
|
|
||||||
|
|
||||||
|
def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
|
||||||
|
(v8f32 (VEXTRACTF64x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract256_imm VR256X:$ext)))>;
|
||||||
|
|
||||||
|
def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
|
||||||
|
(v8i32 (VEXTRACTI64x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract256_imm VR256X:$ext)))>;
|
||||||
|
|
||||||
|
def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
|
||||||
|
(v4f64 (VEXTRACTF64x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract256_imm VR256X:$ext)))>;
|
||||||
|
|
||||||
|
def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
|
||||||
|
(v4i64 (VEXTRACTI64x4rr VR512:$src1,
|
||||||
|
(EXTRACT_get_vextract256_imm VR256X:$ext)))>;
|
||||||
|
|
||||||
|
// A 256-bit subvector extract from the first 512-bit vector position
|
||||||
|
// is a subregister copy that needs no instruction.
|
||||||
|
def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
|
||||||
|
(v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
|
||||||
|
def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
|
||||||
|
(v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
|
||||||
|
def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
|
||||||
|
(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
|
||||||
|
def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
|
||||||
|
(v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
|
||||||
|
|
||||||
|
// zmm -> xmm
|
||||||
|
def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
|
||||||
|
(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
|
||||||
|
def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
|
||||||
|
(v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
|
||||||
|
def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
|
||||||
|
(v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
|
||||||
|
def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
|
||||||
|
(v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
|
||||||
|
|
||||||
|
|
||||||
|
// A 128-bit subvector insert to the first 512-bit vector position
|
||||||
|
// is a subregister copy that needs no instruction.
|
||||||
|
def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
|
||||||
|
(INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||||
|
sub_ymm)>;
|
||||||
|
def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
|
||||||
|
(INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||||
|
sub_ymm)>;
|
||||||
|
def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
|
||||||
|
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||||
|
sub_ymm)>;
|
||||||
|
def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
|
||||||
|
(INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||||
|
sub_ymm)>;
|
||||||
|
|
||||||
|
def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
|
||||||
|
def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
|
||||||
|
def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
|
||||||
|
def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
|
||||||
|
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
|
||||||
|
|
@ -405,28 +405,54 @@ def BYTE_imm : SDNodeXForm<imm, [{
|
|||||||
return getI32Imm(N->getZExtValue() >> 3);
|
return getI32Imm(N->getZExtValue() >> 3);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
|
// EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
|
||||||
// to VEXTRACTF128 imm.
|
// to VEXTRACTF128/VEXTRACTI128 imm.
|
||||||
def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
|
def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
|
||||||
return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
|
return getI8Imm(X86::getExtractVEXTRACT128Immediate(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
|
// INSERT_get_vinsert128_imm xform function: convert insert_subvector index to
|
||||||
// VINSERTF128 imm.
|
// VINSERTF128/VINSERTI128 imm.
|
||||||
def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
|
def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{
|
||||||
return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
|
return getI8Imm(X86::getInsertVINSERT128Immediate(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
|
// EXTRACT_get_vextract256_imm xform function: convert extract_subvector index
|
||||||
|
// to VEXTRACTF64x4 imm.
|
||||||
|
def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{
|
||||||
|
return getI8Imm(X86::getExtractVEXTRACT256Immediate(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
// INSERT_get_vinsert256_imm xform function: convert insert_subvector index to
|
||||||
|
// VINSERTF64x4 imm.
|
||||||
|
def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
|
||||||
|
return getI8Imm(X86::getInsertVINSERT256Immediate(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
|
||||||
(extract_subvector node:$bigvec,
|
(extract_subvector node:$bigvec,
|
||||||
node:$index), [{
|
node:$index), [{
|
||||||
return X86::isVEXTRACTF128Index(N);
|
return X86::isVEXTRACT128Index(N);
|
||||||
}], EXTRACT_get_vextractf128_imm>;
|
}], EXTRACT_get_vextract128_imm>;
|
||||||
|
|
||||||
def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
|
def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
|
||||||
node:$index),
|
node:$index),
|
||||||
(insert_subvector node:$bigvec, node:$smallvec,
|
(insert_subvector node:$bigvec, node:$smallvec,
|
||||||
node:$index), [{
|
node:$index), [{
|
||||||
return X86::isVINSERTF128Index(N);
|
return X86::isVINSERT128Index(N);
|
||||||
}], INSERT_get_vinsertf128_imm>;
|
}], INSERT_get_vinsert128_imm>;
|
||||||
|
|
||||||
|
|
||||||
|
def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index),
|
||||||
|
(extract_subvector node:$bigvec,
|
||||||
|
node:$index), [{
|
||||||
|
return X86::isVEXTRACT256Index(N);
|
||||||
|
}], EXTRACT_get_vextract256_imm>;
|
||||||
|
|
||||||
|
def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
|
||||||
|
node:$index),
|
||||||
|
(insert_subvector node:$bigvec, node:$smallvec,
|
||||||
|
node:$index), [{
|
||||||
|
return X86::isVINSERT256Index(N);
|
||||||
|
}], INSERT_get_vinsert256_imm>;
|
||||||
|
|
||||||
|
@ -1861,6 +1861,7 @@ include "X86InstrXOP.td"
|
|||||||
|
|
||||||
// SSE, MMX and 3DNow! vector support.
|
// SSE, MMX and 3DNow! vector support.
|
||||||
include "X86InstrSSE.td"
|
include "X86InstrSSE.td"
|
||||||
|
include "X86InstrAVX512.td"
|
||||||
include "X86InstrMMX.td"
|
include "X86InstrMMX.td"
|
||||||
include "X86Instr3DNow.td"
|
include "X86Instr3DNow.td"
|
||||||
|
|
||||||
|
@ -7586,62 +7586,62 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
|
|
||||||
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
|
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
|
def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX1Only] in {
|
let Predicates = [HasAVX1Only] in {
|
||||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
|
|
||||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
|
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
|
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2)),
|
(bc_v4i32 (memopv2i64 addr:$src2)),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
|
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
|
||||||
(bc_v16i8 (memopv2i64 addr:$src2)),
|
(bc_v16i8 (memopv2i64 addr:$src2)),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
|
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
|
||||||
(bc_v8i16 (memopv2i64 addr:$src2)),
|
(bc_v8i16 (memopv2i64 addr:$src2)),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -7661,59 +7661,59 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
|
|||||||
|
|
||||||
// AVX1 patterns
|
// AVX1 patterns
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v4f32 (VEXTRACTF128rr
|
(v4f32 (VEXTRACTF128rr
|
||||||
(v8f32 VR256:$src1),
|
(v8f32 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v2f64 (VEXTRACTF128rr
|
(v2f64 (VEXTRACTF128rr
|
||||||
(v4f64 VR256:$src1),
|
(v4f64 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
|
|
||||||
def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1),
|
def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1),
|
def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX1Only] in {
|
let Predicates = [HasAVX1Only] in {
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v2i64 (VEXTRACTF128rr
|
(v2i64 (VEXTRACTF128rr
|
||||||
(v4i64 VR256:$src1),
|
(v4i64 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v4i32 (VEXTRACTF128rr
|
(v4i32 (VEXTRACTF128rr
|
||||||
(v8i32 VR256:$src1),
|
(v8i32 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v8i16 (VEXTRACTF128rr
|
(v8i16 (VEXTRACTF128rr
|
||||||
(v16i16 VR256:$src1),
|
(v16i16 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v16i8 (VEXTRACTF128rr
|
(v16i8 (VEXTRACTF128rr
|
||||||
(v32i8 VR256:$src1),
|
(v32i8 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
|
|
||||||
def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
|
def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
|
def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
|
def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
|
def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
(VEXTRACTF128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -8191,42 +8191,42 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
|
|
||||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
|
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rm VR256:$src1, addr:$src2,
|
(VINSERTI128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
|
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2)),
|
(bc_v4i32 (memopv2i64 addr:$src2)),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rm VR256:$src1, addr:$src2,
|
(VINSERTI128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
|
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
|
||||||
(bc_v16i8 (memopv2i64 addr:$src2)),
|
(bc_v16i8 (memopv2i64 addr:$src2)),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rm VR256:$src1, addr:$src2,
|
(VINSERTI128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
|
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
|
||||||
(bc_v8i16 (memopv2i64 addr:$src2)),
|
(bc_v8i16 (memopv2i64 addr:$src2)),
|
||||||
(iPTR imm)),
|
(iPTR imm)),
|
||||||
(VINSERTI128rm VR256:$src1, addr:$src2,
|
(VINSERTI128rm VR256:$src1, addr:$src2,
|
||||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
(INSERT_get_vinsert128_imm VR256:$ins))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -8245,39 +8245,39 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
|
|||||||
VEX, VEX_L;
|
VEX, VEX_L;
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v2i64 (VEXTRACTI128rr
|
(v2i64 (VEXTRACTI128rr
|
||||||
(v4i64 VR256:$src1),
|
(v4i64 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v4i32 (VEXTRACTI128rr
|
(v4i32 (VEXTRACTI128rr
|
||||||
(v8i32 VR256:$src1),
|
(v8i32 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v8i16 (VEXTRACTI128rr
|
(v8i16 (VEXTRACTI128rr
|
||||||
(v16i16 VR256:$src1),
|
(v16i16 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)),
|
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
|
||||||
(v16i8 (VEXTRACTI128rr
|
(v16i8 (VEXTRACTI128rr
|
||||||
(v32i8 VR256:$src1),
|
(v32i8 VR256:$src1),
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
|
||||||
|
|
||||||
def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1),
|
def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1),
|
def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1),
|
def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1),
|
def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
|
||||||
(iPTR imm))), addr:$dst),
|
(iPTR imm))), addr:$dst),
|
||||||
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
(VEXTRACTI128mr addr:$dst, VR256:$src1,
|
||||||
(EXTRACT_get_vextractf128_imm VR128:$ext))>;
|
(EXTRACT_get_vextract128_imm VR128:$ext))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
44
test/CodeGen/X86/avx512-insert-extract.ll
Normal file
44
test/CodeGen/X86/avx512-insert-extract.ll
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||||
|
|
||||||
|
;CHECK: test1
|
||||||
|
;CHECK: vinsertps
|
||||||
|
;CHECK: vinsertf32x4
|
||||||
|
;CHECK: ret
|
||||||
|
define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
|
||||||
|
%rrr = load float* %br
|
||||||
|
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
|
||||||
|
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
|
||||||
|
ret <16 x float> %rrr3
|
||||||
|
}
|
||||||
|
|
||||||
|
;CHECK: test2
|
||||||
|
;CHECK: vinsertf32x4
|
||||||
|
;CHECK: vextractf32x4
|
||||||
|
;CHECK: vinsertf32x4
|
||||||
|
;CHECK: ret
|
||||||
|
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
|
||||||
|
%rrr = load double* %br
|
||||||
|
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
|
||||||
|
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
|
||||||
|
ret <8 x double> %rrr3
|
||||||
|
}
|
||||||
|
|
||||||
|
;CHECK: test3
|
||||||
|
;CHECK: vextractf32x4
|
||||||
|
;CHECK: vinsertf32x4
|
||||||
|
;CHECK: ret
|
||||||
|
define <16 x float> @test3(<16 x float> %x) nounwind {
|
||||||
|
%eee = extractelement <16 x float> %x, i32 4
|
||||||
|
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1
|
||||||
|
ret <16 x float> %rrr2
|
||||||
|
}
|
||||||
|
|
||||||
|
;CHECK: test4
|
||||||
|
;CHECK: vextracti32x4
|
||||||
|
;CHECK: vinserti32x4
|
||||||
|
;CHECK: ret
|
||||||
|
define <8 x i64> @test4(<8 x i64> %x) nounwind {
|
||||||
|
%eee = extractelement <8 x i64> %x, i32 4
|
||||||
|
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
|
||||||
|
ret <8 x i64> %rrr2
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user