mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-19 04:32:19 +00:00
AVX-512: Scalar ERI intrinsics
including SAE mode and memory operand. Added AVX512_maskable_scalar template, that should cover all scalar instructions in the future. The main difference between AVX512_maskable_scalar<> and AVX512_maskable<> is using X86select instead of vselect. I need it, because I can't create vselect node for MVT::i1 mask for scalar instruction. http://reviews.llvm.org/D6378 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222820 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e98441590b
commit
10c8f38047
@ -16799,6 +16799,23 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
||||
return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
|
||||
}
|
||||
|
||||
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
|
||||
SDValue PreservedSrc,
|
||||
const X86Subtarget *Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
if (isAllOnes(Mask))
|
||||
return Op;
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
SDLoc dl(Op);
|
||||
// The mask should be of type MVT::i1
|
||||
SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
|
||||
|
||||
if (PreservedSrc.getOpcode() == ISD::UNDEF)
|
||||
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
|
||||
}
|
||||
|
||||
static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
|
||||
switch (IntNo) {
|
||||
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
|
||||
@ -16872,6 +16889,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
RoundingMode),
|
||||
Mask, Src0, Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_SCALAR_MASK_RM: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src0 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
SDValue RoundingMode = Op.getOperand(5);
|
||||
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
|
||||
RoundingMode),
|
||||
Mask, Src0, Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_2OP_MASK: {
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
|
||||
Op.getOperand(2)),
|
||||
|
@ -23,7 +23,11 @@ class X86VectorVTInfo<int numelts, ValueType EltVT, RegisterClass rc,
|
||||
// Suffix used in the instruction mnemonic.
|
||||
string Suffix = suffix;
|
||||
|
||||
string VTName = "v" # NumElts # EltVT;
|
||||
int NumEltsInVT = !if (!eq (NumElts, 1),
|
||||
!if (!eq (EltVT.Size, 32), 4,
|
||||
!if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts);
|
||||
|
||||
string VTName = "v" # NumEltsInVT # EltVT;
|
||||
|
||||
// The vector VT.
|
||||
ValueType VT = !cast<ValueType>(VTName);
|
||||
@ -57,9 +61,11 @@ class X86VectorVTInfo<int numelts, ValueType EltVT, RegisterClass rc,
|
||||
// case of i64 element types for sub-512 integer vectors. For now, keep
|
||||
// MemOpFrag undefined in these cases.
|
||||
PatFrag MemOpFrag =
|
||||
!if (!eq (NumElts#EltTypeName, "1f32"), !cast<PatFrag>("memopfsf32"),
|
||||
!if (!eq (NumElts#EltTypeName, "1f64"), !cast<PatFrag>("memopfsf64"),
|
||||
!if (!eq (TypeVariantName, "f"), !cast<PatFrag>("memop" # VTName),
|
||||
!if (!eq (EltTypeName, "i64"), !cast<PatFrag>("memop" # VTName),
|
||||
!if (!eq (VTName, "v16i32"), !cast<PatFrag>("memop" # VTName), ?)));
|
||||
!if (!eq (VTName, "v16i32"), !cast<PatFrag>("memop" # VTName), ?)))));
|
||||
|
||||
// The corresponding float type, e.g. v16f32 for v16i32
|
||||
// Note: For EltSize < 32, FloatVT is illegal and TableGen
|
||||
@ -114,6 +120,10 @@ def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
|
||||
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
|
||||
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
|
||||
|
||||
// the scalar staff
|
||||
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
|
||||
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
|
||||
|
||||
class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
|
||||
X86VectorVTInfo i128> {
|
||||
X86VectorVTInfo info512 = i512;
|
||||
@ -183,7 +193,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, dag MaskingRHS,
|
||||
string Round = "",
|
||||
SDNode Select = vselect, string Round = "",
|
||||
string MaskingConstraint = "",
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> :
|
||||
@ -192,11 +202,11 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
[(set _.RC:$dst, RHS)],
|
||||
[(set _.RC:$dst, MaskingRHS)],
|
||||
[(set _.RC:$dst,
|
||||
(vselect _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
|
||||
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
|
||||
Round, MaskingConstraint, NoItinerary, IsCommutable>;
|
||||
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the instruction. In the masking case, the
|
||||
// the zero-masking variant of the vector instruction. In the masking case, the
|
||||
// perserved vector elements come from a new dummy input operand tied to $dst.
|
||||
multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins, string OpcodeStr,
|
||||
@ -208,8 +218,23 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
|
||||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect _.KRCWM:$mask, RHS, _.RC:$src0), Round,
|
||||
"$src0 = $dst", itin, IsCommutable>;
|
||||
(vselect _.KRCWM:$mask, RHS, _.RC:$src0), vselect,
|
||||
Round, "$src0 = $dst", itin, IsCommutable>;
|
||||
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the scalar instruction.
|
||||
multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS, string Round = "",
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> :
|
||||
AVX512_maskable_common<O, F, _, Outs, Ins,
|
||||
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
|
||||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(X86select _.KRCWM:$mask, RHS, _.RC:$src0), X86select,
|
||||
Round, "$src0 = $dst", itin, IsCommutable>;
|
||||
|
||||
// Similar to AVX512_maskable but in this case one of the source operands
|
||||
// ($src1) is already tied to $dst so we just use that for the preserved
|
||||
@ -4190,60 +4215,40 @@ def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
|
||||
(VRCP14PDZr VR512:$src)>;
|
||||
|
||||
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
|
||||
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop> {
|
||||
let hasSideEffects = 0, Predicates = [HasERI] in {
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
|
||||
def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
|
||||
[]>, EVEX_4V, EVEX_B;
|
||||
let mayLoad = 1 in {
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
|
||||
SDNode OpNode> {
|
||||
|
||||
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
|
||||
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
|
||||
|
||||
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(OpNode (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
}
|
||||
|
||||
defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
|
||||
VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
|
||||
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
|
||||
|
||||
def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
|
||||
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
|
||||
|
||||
def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
|
||||
(v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
|
||||
|
||||
def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
|
||||
(v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
|
||||
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
|
||||
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
|
||||
EVEX_CD8<64, CD8VT1>, VEX_W;
|
||||
}
|
||||
|
||||
let hasSideEffects = 0, Predicates = [HasERI] in {
|
||||
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
|
||||
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
|
||||
}
|
||||
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
|
||||
|
||||
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
@ -4256,12 +4261,14 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src), OpcodeStr,
|
||||
"$src", "$src",
|
||||
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
|
||||
(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
|
||||
"{sae}">, EVEX_B;
|
||||
|
||||
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
|
||||
(OpNode (_.FloatVT
|
||||
(bitconvert (_.LdFrag addr:$src))), (i32 FROUND_CURRENT))>;
|
||||
(bitconvert (_.LdFrag addr:$src))),
|
||||
(i32 FROUND_CURRENT))>;
|
||||
|
||||
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
|
||||
|
@ -205,6 +205,8 @@ def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
|
||||
def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
|
||||
SDTCisVec<0>, SDTCisInt<2>]>;
|
||||
def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
|
||||
SDTCisVec<0>, SDTCisInt<3>]>;
|
||||
|
||||
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
|
||||
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
|
||||
@ -265,7 +267,10 @@ def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
|
||||
|
||||
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
|
||||
def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
|
||||
def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
|
||||
def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
|
||||
|
||||
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
|
||||
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
|
||||
|
||||
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
||||
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
|
||||
|
@ -21,7 +21,7 @@ enum IntrinsicType {
|
||||
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
|
||||
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK
|
||||
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM
|
||||
};
|
||||
|
||||
struct IntrinsicData {
|
||||
@ -221,8 +221,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
|
||||
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
|
||||
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
|
||||
|
@ -64,16 +64,53 @@ define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
|
||||
declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
|
||||
; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
|
||||
; CHECK: vrcp28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
|
||||
; CHECK: vrcp28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0) {
|
||||
; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 7, i32 8) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0) {
|
||||
; CHECK: vrsqrt28ss %xmm1, %xmm0, %xmm2 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
|
||||
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 7, i32 8) ;
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0) {
|
||||
; CHECK: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
|
||||
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 7, i32 8) ;
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
|
||||
|
||||
define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr ) {
|
||||
; CHECK: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
|
||||
%mem = load double * %ptr, align 8
|
||||
%mem_v = insertelement <2 x double> undef, double %mem, i32 0
|
||||
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ;
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr ) {
|
||||
; CHECK: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
|
||||
%ptr1 = getelementptr double* %ptr, i32 18
|
||||
%mem = load double * %ptr1, align 8
|
||||
%mem_v = insertelement <2 x double> undef, double %mem, i32 0
|
||||
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ;
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user