mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-16 20:40:16 +00:00
AVX-512: Added legal type MVT::i1 and VK1 register for it.
Added scalar compare VCMPSS, VCMPSD. Implemented LowerSELECT for scalar FP operations. I replaced FSETCCss, FSETCCsd with one node type FSETCCs. Node extract_vector_elt(v16i1/v8i1, idx) returns an element of type i1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197384 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
64f5838550
commit
376a81d8ce
@ -478,6 +478,7 @@ struct ContextDecision {
|
||||
ENUM_ENTRY(TYPE_XMM128, "16-byte") \
|
||||
ENUM_ENTRY(TYPE_XMM256, "32-byte") \
|
||||
ENUM_ENTRY(TYPE_XMM512, "64-byte") \
|
||||
ENUM_ENTRY(TYPE_VK1, "1-bit") \
|
||||
ENUM_ENTRY(TYPE_VK8, "8-bit") \
|
||||
ENUM_ENTRY(TYPE_VK16, "16-bit") \
|
||||
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
|
||||
|
@ -1306,9 +1306,13 @@ void X86TargetLowering::resetOperationActions() {
|
||||
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
|
||||
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
|
||||
|
||||
addRegisterClass(MVT::i1, &X86::VK1RegClass);
|
||||
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
|
||||
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::i1, Custom);
|
||||
setOperationAction(ISD::XOR, MVT::i1, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
|
||||
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
|
||||
@ -1376,6 +1380,8 @@ void X86TargetLowering::resetOperationActions() {
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
|
||||
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
|
||||
@ -2221,6 +2227,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
RC = &X86::VR128RegClass;
|
||||
else if (RegVT == MVT::x86mmx)
|
||||
RC = &X86::VR64RegClass;
|
||||
else if (RegVT == MVT::i1)
|
||||
RC = &X86::VK1RegClass;
|
||||
else if (RegVT == MVT::v8i1)
|
||||
RC = &X86::VK8RegClass;
|
||||
else if (RegVT == MVT::v16i1)
|
||||
@ -7669,6 +7677,39 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Extract one bit from mask vector, like v16i1 or v8i1.
|
||||
/// AVX-512 feature.
|
||||
static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
SDLoc dl(Vec);
|
||||
MVT VecVT = Vec.getSimpleValueType();
|
||||
SDValue Idx = Op.getOperand(1);
|
||||
MVT EltVT = Op.getSimpleValueType();
|
||||
|
||||
assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
|
||||
|
||||
// variable index can't be handled in mask registers,
|
||||
// extend vector to VR512
|
||||
if (!isa<ConstantSDNode>(Idx)) {
|
||||
MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
|
||||
SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
|
||||
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
ExtVT.getVectorElementType(), Ext, Idx);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
|
||||
}
|
||||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
if (IdxVal) {
|
||||
unsigned MaxSift = VecVT.getSizeInBits() - 1;
|
||||
Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
|
||||
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
|
||||
Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
|
||||
DAG.getConstant(MaxSift, MVT::i8));
|
||||
}
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i1, Vec,
|
||||
DAG.getIntPtrConstant(0));
|
||||
}
|
||||
|
||||
SDValue
|
||||
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
@ -7676,6 +7717,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
MVT VecVT = Vec.getSimpleValueType();
|
||||
SDValue Idx = Op.getOperand(1);
|
||||
|
||||
if (Op.getSimpleValueType() == MVT::i1)
|
||||
return ExtractBitFromMaskVector(Op, DAG);
|
||||
|
||||
if (!isa<ConstantSDNode>(Idx)) {
|
||||
if (VecVT.is512BitVector() ||
|
||||
(VecVT.is256BitVector() && Subtarget->hasInt256() &&
|
||||
@ -9681,11 +9726,17 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
|
||||
/// equivalent.
|
||||
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
|
||||
SelectionDAG &DAG) const {
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
|
||||
SDLoc dl(Op0);
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
|
||||
if (C->getAPIntValue() == 0)
|
||||
return EmitTest(Op0, X86CC, DAG);
|
||||
|
||||
SDLoc dl(Op0);
|
||||
if (Op0.getValueType() == MVT::i1) {
|
||||
Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, DAG.getConstant(-1, MVT::i1));
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op0, Op0);
|
||||
}
|
||||
}
|
||||
|
||||
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
|
||||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
|
||||
// Do the comparison at i32 if it's smaller. This avoids subregister
|
||||
@ -10121,7 +10172,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
|
||||
|
||||
assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
|
||||
assert((VT == MVT::i8 || (Subtarget->hasAVX512() && VT == MVT::i1))
|
||||
&& "SetCC type must be 8-bit or 1-bit integer");
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDLoc dl(Op);
|
||||
@ -10234,8 +10286,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
|
||||
|
||||
if (SSECC != 8) {
|
||||
unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd;
|
||||
SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1,
|
||||
if (Subtarget->hasAVX512()) {
|
||||
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
|
||||
DAG.getConstant(SSECC, MVT::i8));
|
||||
return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
|
||||
}
|
||||
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
|
||||
DAG.getConstant(SSECC, MVT::i8));
|
||||
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
|
||||
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
|
||||
@ -13774,8 +13830,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::CMPMU: return "X86ISD::CMPMU";
|
||||
case X86ISD::SETCC: return "X86ISD::SETCC";
|
||||
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
|
||||
case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
|
||||
case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
|
||||
case X86ISD::FSETCC: return "X86ISD::FSETCC";
|
||||
case X86ISD::CMOV: return "X86ISD::CMOV";
|
||||
case X86ISD::BRCOND: return "X86ISD::BRCOND";
|
||||
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
|
||||
@ -13870,7 +13925,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::TESTP: return "X86ISD::TESTP";
|
||||
case X86ISD::TESTM: return "X86ISD::TESTM";
|
||||
case X86ISD::KORTEST: return "X86ISD::KORTEST";
|
||||
case X86ISD::KTEST: return "X86ISD::KTEST";
|
||||
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
|
||||
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
|
||||
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
|
||||
@ -16420,44 +16474,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
EltNo);
|
||||
}
|
||||
|
||||
/// Extract one bit from mask vector, like v16i1 or v8i1.
|
||||
/// AVX-512 feature.
|
||||
static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue Vec = N->getOperand(0);
|
||||
SDLoc dl(Vec);
|
||||
MVT VecVT = Vec.getSimpleValueType();
|
||||
SDValue Idx = N->getOperand(1);
|
||||
MVT EltVT = N->getSimpleValueType(0);
|
||||
|
||||
assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) ||
|
||||
"Unexpected operands in ExtractBitFromMaskVector");
|
||||
|
||||
// variable index
|
||||
if (!isa<ConstantSDNode>(Idx)) {
|
||||
MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
|
||||
SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
|
||||
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
ExtVT.getVectorElementType(), Ext);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
|
||||
}
|
||||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
|
||||
MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits());
|
||||
unsigned MaxShift = VecVT.getSizeInBits() - 1;
|
||||
Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec);
|
||||
Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec,
|
||||
DAG.getConstant(MaxShift - IdxVal, ScalarVT));
|
||||
Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec,
|
||||
DAG.getConstant(MaxShift, ScalarVT));
|
||||
|
||||
if (VecVT == MVT::v16i1) {
|
||||
Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec);
|
||||
}
|
||||
return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec);
|
||||
}
|
||||
|
||||
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
|
||||
/// generation and convert it from being a bunch of shuffles and extracts
|
||||
/// to a simple store and scalar loads to extract the elements.
|
||||
@ -16469,10 +16485,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
SDValue InputVector = N->getOperand(0);
|
||||
|
||||
if (InputVector.getValueType().getVectorElementType() == MVT::i1 &&
|
||||
!DCI.isBeforeLegalize())
|
||||
return ExtractBitFromMaskVector(N, DAG);
|
||||
|
||||
// Detect whether we are trying to convert from mmx to i32 and the bitcast
|
||||
// from mmx to v2i32 has a single usage.
|
||||
if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
|
||||
@ -17616,17 +17628,16 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
|
||||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
|
||||
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
|
||||
X86ISD::NodeType NTOperator = is64BitFP ?
|
||||
X86ISD::FSETCCsd : X86ISD::FSETCCss;
|
||||
// FIXME: need symbolic constants for these magic numbers.
|
||||
// See X86ATTInstPrinter.cpp:printSSECC().
|
||||
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
|
||||
SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
|
||||
SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01,
|
||||
DAG.getConstant(x86cc, MVT::i8));
|
||||
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
|
||||
MVT IntVT = (is64BitFP ? MVT::i64 : MVT::i32);
|
||||
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
|
||||
OnesOrZeroesF);
|
||||
SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
|
||||
DAG.getConstant(1, MVT::i32));
|
||||
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
|
||||
DAG.getConstant(1, IntVT));
|
||||
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
|
||||
return OneBitOfTruth;
|
||||
}
|
||||
|
@ -94,6 +94,9 @@ namespace llvm {
|
||||
/// operand, usually produced by a CMP instruction.
|
||||
SETCC,
|
||||
|
||||
/// X86 Select
|
||||
SELECT,
|
||||
|
||||
// Same as SETCC except it's materialized with a sbb and the value is all
|
||||
// one's or all zero's.
|
||||
SETCC_CARRY, // R = carry_bit ? ~0 : 0
|
||||
@ -101,7 +104,7 @@ namespace llvm {
|
||||
/// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
|
||||
/// Operands are two FP values to compare; result is a mask of
|
||||
/// 0s or 1s. Generally DTRT for C/C++ with NaNs.
|
||||
FSETCCss, FSETCCsd,
|
||||
FSETCC,
|
||||
|
||||
/// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
|
||||
/// result in an integer GPR. Needs masking for scalar result.
|
||||
@ -314,7 +317,6 @@ namespace llvm {
|
||||
|
||||
// OR/AND test for masks
|
||||
KORTEST,
|
||||
KTEST,
|
||||
|
||||
// Several flavors of instructions with vector shuffle behaviors.
|
||||
PALIGNR,
|
||||
|
@ -683,6 +683,42 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Compare Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
|
||||
multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
|
||||
Operand CC, SDNode OpNode, ValueType VT,
|
||||
PatFrag ld_frag, string asm, string asm_alt> {
|
||||
def rr : AVX512Ii8<0xC2, MRMSrcReg,
|
||||
(outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
|
||||
[(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
|
||||
IIC_SSE_ALU_F32S_RR>, EVEX_4V;
|
||||
def rm : AVX512Ii8<0xC2, MRMSrcMem,
|
||||
(outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
|
||||
[(set VK1:$dst, (OpNode (VT RC:$src1),
|
||||
(ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
|
||||
(outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
|
||||
def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
|
||||
(outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
|
||||
"vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
|
||||
XS;
|
||||
defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
|
||||
"vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
|
||||
XD, VEX_W;
|
||||
}
|
||||
|
||||
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
|
||||
@ -734,10 +770,10 @@ multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
|
||||
// Accept explicit immediate argument form instead of comparison code.
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
|
||||
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
|
||||
asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
@ -864,8 +900,14 @@ let Predicates = [HasAVX512] in {
|
||||
def : Pat<(store (v16i1 VK16:$src), addr:$dst),
|
||||
(KMOVWmk addr:$dst, VK16:$src)>;
|
||||
|
||||
def : Pat<(store (v8i1 VK8:$src), addr:$dst),
|
||||
(KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>;
|
||||
def : Pat<(store VK8:$src, addr:$dst),
|
||||
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
|
||||
|
||||
def : Pat<(i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
|
||||
|
||||
def : Pat<(v8i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
|
||||
}
|
||||
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -878,6 +920,12 @@ let Predicates = [HasAVX512] in {
|
||||
(EXTRACT_SUBREG
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
|
||||
sub_8bit)>;
|
||||
|
||||
def : Pat<(i1 (extractelt VK16:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK16:$src, VK1)>;
|
||||
def : Pat<(i1 (extractelt VK8:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK8:$src, VK1)>;
|
||||
|
||||
}
|
||||
|
||||
// Mask unary operation
|
||||
@ -945,6 +993,19 @@ let isCommutable = 1 in {
|
||||
defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>;
|
||||
}
|
||||
|
||||
def : Pat<(xor VK1:$src1, VK1:$src2),
|
||||
(COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
||||
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
|
||||
|
||||
def : Pat<(or VK1:$src1, VK1:$src2),
|
||||
(COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
||||
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
|
||||
|
||||
def : Pat<(not VK1:$src),
|
||||
(COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src, VK16),
|
||||
(COPY_TO_REGCLASS (VCMPSSZrr (f32 (IMPLICIT_DEF)),
|
||||
(f32 (IMPLICIT_DEF)), (i8 0)), VK16)), VK1)>;
|
||||
|
||||
multiclass avx512_mask_binop_int<string IntName, string InstName> {
|
||||
let Predicates = [HasAVX512] in
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
|
||||
@ -1016,7 +1077,10 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
}
|
||||
|
||||
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
|
||||
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>;
|
||||
|
||||
def : Pat<(X86cmp VK1:$src1, VK1:$src2),
|
||||
(KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
|
||||
(COPY_TO_REGCLASS VK1:$src2, VK16))>;
|
||||
|
||||
// Mask shift
|
||||
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
@ -1034,8 +1098,8 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
|
||||
VEX, OpSize, TA, VEX_W;
|
||||
}
|
||||
|
||||
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>;
|
||||
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>;
|
||||
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
|
||||
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
|
||||
|
||||
// Mask setting all 0s or 1s
|
||||
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
|
||||
@ -1046,7 +1110,7 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
|
||||
}
|
||||
|
||||
multiclass avx512_mask_setop_w<PatFrag Val> {
|
||||
defm B : avx512_mask_setop<VK8, v8i1, Val>;
|
||||
defm B : avx512_mask_setop<VK8, v8i1, Val>;
|
||||
defm W : avx512_mask_setop<VK16, v16i1, Val>;
|
||||
}
|
||||
|
||||
@ -1341,6 +1405,12 @@ multiclass avx512_move_scalar <string asm, RegisterClass RC,
|
||||
[(set VR128X:$dst, (vt (OpNode VR128X:$src1,
|
||||
(scalar_to_vector RC:$src2))))],
|
||||
IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
|
||||
[], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
|
||||
def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
|
||||
@ -1359,6 +1429,13 @@ let ExeDomain = SSEPackedDouble in
|
||||
defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
|
||||
loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
|
||||
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
|
||||
VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
|
||||
|
||||
def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
|
||||
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
|
||||
VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
|
||||
|
||||
// For the disassembler
|
||||
let isCodeGenOnly = 1 in {
|
||||
|
@ -59,8 +59,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
|
||||
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
|
||||
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
|
||||
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
|
||||
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
|
||||
def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
|
||||
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
|
||||
//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
|
||||
def X86pshufb : SDNode<"X86ISD::PSHUFB",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
@ -130,9 +130,14 @@ def X86IntCmpMask : SDTypeProfile<1, 2,
|
||||
def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
|
||||
def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
|
||||
|
||||
def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
def X86CmpMaskCC :
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
def X86CmpMaskCCScalar :
|
||||
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
|
||||
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
|
||||
def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
|
||||
def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
|
||||
|
||||
def X86vshl : SDNode<"X86ISD::VSHL",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
@ -155,10 +160,10 @@ def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
|
||||
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
|
||||
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
|
||||
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
|
||||
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>>;
|
||||
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
|
||||
|
||||
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
|
@ -3015,6 +3015,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline static bool MaskRegClassContains(unsigned Reg) {
|
||||
return X86::VK8RegClass.contains(Reg) ||
|
||||
X86::VK16RegClass.contains(Reg) ||
|
||||
X86::VK1RegClass.contains(Reg);
|
||||
}
|
||||
static
|
||||
unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
|
||||
if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
|
||||
@ -3024,13 +3029,10 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
|
||||
SrcReg = get512BitSuperRegister(SrcReg);
|
||||
return X86::VMOVAPSZrr;
|
||||
}
|
||||
if ((X86::VK8RegClass.contains(DestReg) ||
|
||||
X86::VK16RegClass.contains(DestReg)) &&
|
||||
(X86::VK8RegClass.contains(SrcReg) ||
|
||||
X86::VK16RegClass.contains(SrcReg)))
|
||||
if (MaskRegClassContains(DestReg) &&
|
||||
MaskRegClassContains(SrcReg))
|
||||
return X86::KMOVWkk;
|
||||
if ((X86::VK8RegClass.contains(DestReg) ||
|
||||
X86::VK16RegClass.contains(DestReg)) &&
|
||||
if (MaskRegClassContains(DestReg) &&
|
||||
(X86::GR32RegClass.contains(SrcReg) ||
|
||||
X86::GR16RegClass.contains(SrcReg) ||
|
||||
X86::GR8RegClass.contains(SrcReg))) {
|
||||
@ -3040,8 +3042,7 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
|
||||
if ((X86::GR32RegClass.contains(DestReg) ||
|
||||
X86::GR16RegClass.contains(DestReg) ||
|
||||
X86::GR8RegClass.contains(DestReg)) &&
|
||||
(X86::VK8RegClass.contains(SrcReg) ||
|
||||
X86::VK16RegClass.contains(SrcReg))) {
|
||||
MaskRegClassContains(SrcReg)) {
|
||||
DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
|
||||
return X86::KMOVWrk;
|
||||
}
|
||||
|
@ -23,8 +23,8 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
|
||||
|
||||
def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
|
||||
|
||||
def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
//def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
|
||||
def SDTX86Cmov : SDTypeProfile<1, 4,
|
||||
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
|
||||
|
@ -2299,23 +2299,23 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
|
||||
}
|
||||
}
|
||||
|
||||
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32,
|
||||
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
|
||||
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
SSE_ALU_F32S>,
|
||||
XS, VEX_4V, VEX_LIG;
|
||||
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64,
|
||||
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
|
||||
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
SSE_ALU_F32S>, // same latency as 32 bit compare
|
||||
XD, VEX_4V, VEX_LIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32,
|
||||
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
|
||||
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
|
||||
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
|
||||
XS;
|
||||
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
|
||||
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
|
||||
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
|
||||
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
|
||||
SSE_ALU_F64S>,
|
||||
|
@ -463,9 +463,11 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
256, (sequence "YMM%u", 0, 31)>;
|
||||
|
||||
def VK1 : RegisterClass<"X86", [i1], 1, (sequence "K%u", 0, 7)>;
|
||||
def VK8 : RegisterClass<"X86", [v8i1], 8, (sequence "K%u", 0, 7)>;
|
||||
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)>;
|
||||
|
||||
def VK1WM : RegisterClass<"X86", [i1], 1, (sub VK1, K0)>;
|
||||
def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)>;
|
||||
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
|
||||
|
||||
|
@ -99,27 +99,21 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test11
|
||||
;CHECK: movl $260
|
||||
;CHECK: bextrl
|
||||
;CHECK: movl $268
|
||||
;CHECK: bextrl
|
||||
;CHECK: vpcmpltud
|
||||
;CKECK: kshiftlw $11
|
||||
;CKECK: kshiftrw $15
|
||||
;CHECK: kxorw
|
||||
;CHECK: kortestw
|
||||
;CHECK: jne
|
||||
;CHECK: ret
|
||||
;CHECK: ret
|
||||
define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
||||
%cmp_res = icmp ult <16 x i32> %a, %b
|
||||
%ia = extractelement <16 x i1> %cmp_res, i32 4
|
||||
%ib = extractelement <16 x i1> %cmp_res, i32 12
|
||||
|
||||
br i1 %ia, label %A, label %B
|
||||
|
||||
A:
|
||||
ret <16 x i32>%b
|
||||
B:
|
||||
%c = add <16 x i32>%b, %a
|
||||
br i1 %ib, label %C, label %D
|
||||
C:
|
||||
%c1 = sub <16 x i32>%c, %a
|
||||
ret <16 x i32>%c1
|
||||
D:
|
||||
%c2 = mul <16 x i32>%c, %a
|
||||
ret <16 x i32>%c2
|
||||
ret <16 x i32>%c
|
||||
}
|
||||
|
@ -20,3 +20,22 @@ define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
|
||||
ret <8 x i64> %res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @select02
|
||||
; CHECK: cmpless %xmm0, %xmm3, %k1
|
||||
; CHECK-NEXT: vmovss %xmm2, {{.*}}%xmm1 {%k1}
|
||||
; CHECK: ret
|
||||
define float @select02(float %a, float %b, float %c, float %eps) {
|
||||
%cmp = fcmp oge float %a, %eps
|
||||
%cond = select i1 %cmp, float %c, float %b
|
||||
ret float %cond
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @select03
|
||||
; CHECK: cmplesd %xmm0, %xmm3, %k1
|
||||
; CHECK-NEXT: vmovsd %xmm2, {{.*}}%xmm1 {%k1}
|
||||
; CHECK: ret
|
||||
define double @select03(double %a, double %b, double %c, double %eps) {
|
||||
%cmp = fcmp oge double %a, %eps
|
||||
%cond = select i1 %cmp, double %c, double %b
|
||||
ret double %cond
|
||||
}
|
||||
|
@ -8,8 +8,8 @@ define i32 @isint_return(double %d) nounwind {
|
||||
%e = sitofp i32 %i to double
|
||||
; CHECK: cmpeqsd
|
||||
%c = fcmp oeq double %d, %e
|
||||
; CHECK-NEXT: movd
|
||||
; CHECK-NEXT: andl
|
||||
; CHECK-NEXT: movq
|
||||
; CHECK-NEXT: andq
|
||||
%z = zext i1 %c to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
@ -1269,6 +1269,8 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
|
||||
TYPE("VR256", TYPE_XMM256)
|
||||
TYPE("VR256X", TYPE_XMM256)
|
||||
TYPE("VR512", TYPE_XMM512)
|
||||
TYPE("VK1", TYPE_VK1)
|
||||
TYPE("VK1WM", TYPE_VK1)
|
||||
TYPE("VK8", TYPE_VK8)
|
||||
TYPE("VK8WM", TYPE_VK8)
|
||||
TYPE("VK16", TYPE_VK16)
|
||||
@ -1340,6 +1342,7 @@ OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
|
||||
ENCODING("VR256", ENCODING_RM)
|
||||
ENCODING("VR256X", ENCODING_RM)
|
||||
ENCODING("VR512", ENCODING_RM)
|
||||
ENCODING("VK1", ENCODING_RM)
|
||||
ENCODING("VK8", ENCODING_RM)
|
||||
ENCODING("VK16", ENCODING_RM)
|
||||
errs() << "Unhandled R/M register encoding " << s << "\n";
|
||||
@ -1367,8 +1370,10 @@ OperandEncoding RecognizableInstr::roRegisterEncodingFromString
|
||||
ENCODING("FR64X", ENCODING_REG)
|
||||
ENCODING("FR32X", ENCODING_REG)
|
||||
ENCODING("VR512", ENCODING_REG)
|
||||
ENCODING("VK1", ENCODING_REG)
|
||||
ENCODING("VK8", ENCODING_REG)
|
||||
ENCODING("VK16", ENCODING_REG)
|
||||
ENCODING("VK1WM", ENCODING_REG)
|
||||
ENCODING("VK8WM", ENCODING_REG)
|
||||
ENCODING("VK16WM", ENCODING_REG)
|
||||
errs() << "Unhandled reg/opcode register encoding " << s << "\n";
|
||||
@ -1389,6 +1394,7 @@ OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString
|
||||
ENCODING("VR128X", ENCODING_VVVV)
|
||||
ENCODING("VR256X", ENCODING_VVVV)
|
||||
ENCODING("VR512", ENCODING_VVVV)
|
||||
ENCODING("VK1", ENCODING_VVVV)
|
||||
ENCODING("VK8", ENCODING_VVVV)
|
||||
ENCODING("VK16", ENCODING_VVVV)
|
||||
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
|
||||
@ -1398,6 +1404,7 @@ OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString
|
||||
OperandEncoding RecognizableInstr::writemaskRegisterEncodingFromString
|
||||
(const std::string &s,
|
||||
bool hasOpSizePrefix) {
|
||||
ENCODING("VK1WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK8WM", ENCODING_WRITEMASK)
|
||||
ENCODING("VK16WM", ENCODING_WRITEMASK)
|
||||
errs() << "Unhandled mask register encoding " << s << "\n";
|
||||
|
Loading…
x
Reference in New Issue
Block a user