mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-19 04:32:19 +00:00
AVX-512: recommitted 229837 + bugfix + test
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230223 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d8e5adcd92
commit
fdafc8fd5e
@ -3193,12 +3193,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
|
||||
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
@ -14419,9 +14419,20 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src0 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
SDValue RoundingMode = Op.getOperand(5);
|
||||
// There are 2 kinds of intrinsics in this group:
|
||||
// (1) With supress-all-exceptions (sae) - 6 operands
|
||||
// (2) With rounding mode and sae - 7 operands.
|
||||
if (Op.getNumOperands() == 6) {
|
||||
SDValue Sae = Op.getOperand(5);
|
||||
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
|
||||
Sae),
|
||||
Mask, Src0, Subtarget, DAG);
|
||||
}
|
||||
assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
|
||||
SDValue RoundingMode = Op.getOperand(5);
|
||||
SDValue Sae = Op.getOperand(6);
|
||||
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
|
||||
RoundingMode),
|
||||
RoundingMode, Sae),
|
||||
Mask, Src0, Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_2OP_MASK: {
|
||||
|
@ -393,7 +393,8 @@ namespace llvm {
|
||||
FMSUB_RND,
|
||||
FNMSUB_RND,
|
||||
FMADDSUB_RND,
|
||||
FMSUBADD_RND,
|
||||
FMSUBADD_RND,
|
||||
RNDSCALE,
|
||||
|
||||
// Compress and expand
|
||||
COMPRESS,
|
||||
|
@ -101,6 +101,8 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
|
||||
!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
|
||||
SSEPackedInt));
|
||||
|
||||
RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
|
||||
|
||||
// A vector type of the same width with element type i32. This is used to
|
||||
// create the canonical constant zero node ImmAllZerosV.
|
||||
ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
|
||||
@ -4652,7 +4654,6 @@ let ExeDomain = d in {
|
||||
} // ExeDomain
|
||||
}
|
||||
|
||||
|
||||
defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
|
||||
loadv16f32, SSEPackedSingle>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
@ -4672,52 +4673,72 @@ def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
|
||||
FROUND_CURRENT)),
|
||||
(VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
|
||||
|
||||
multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
||||
Operand x86memop, RegisterClass RC, Domain d> {
|
||||
let ExeDomain = d in {
|
||||
def r : AVX512AIi8<opc, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, i32u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
multiclass
|
||||
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
|
||||
def m : AVX512AIi8<opc, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
} // ExeDomain
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
|
||||
|
||||
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$src3), (i32 FROUND_NO_EXC))), "{sae}">, EVEX_B;
|
||||
|
||||
let mayLoad = 1 in
|
||||
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScale (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>;
|
||||
def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>;
|
||||
def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>;
|
||||
def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
|
||||
def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
|
||||
|
||||
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x1))), _.FRC)>;
|
||||
def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x2))), _.FRC)>;
|
||||
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x3))), _.FRC)>;
|
||||
def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x4))), _.FRC)>;
|
||||
def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xc))), _.FRC)>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
|
||||
SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
|
||||
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
|
||||
SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
|
||||
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
|
||||
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(ffloor FR32X:$src),
|
||||
(VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
|
||||
def : Pat<(f64 (ffloor FR64X:$src)),
|
||||
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
|
||||
def : Pat<(f32 (fnearbyint FR32X:$src)),
|
||||
(VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
|
||||
def : Pat<(f64 (fnearbyint FR64X:$src)),
|
||||
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
|
||||
def : Pat<(f32 (fceil FR32X:$src)),
|
||||
(VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
|
||||
def : Pat<(f64 (fceil FR64X:$src)),
|
||||
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
|
||||
def : Pat<(f32 (frint FR32X:$src)),
|
||||
(VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
|
||||
def : Pat<(f64 (frint FR64X:$src)),
|
||||
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
|
||||
def : Pat<(f32 (ftrunc FR32X:$src)),
|
||||
(VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
|
||||
def : Pat<(f64 (ftrunc FR64X:$src)),
|
||||
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
|
||||
}
|
||||
|
||||
def : Pat<(v16f32 (ffloor VR512:$src)),
|
||||
(VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
|
||||
def : Pat<(v16f32 (fnearbyint VR512:$src)),
|
||||
@ -4739,7 +4760,7 @@ def : Pat<(v8f64 (frint VR512:$src)),
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
|
||||
def : Pat<(v8f64 (ftrunc VR512:$src)),
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
|
||||
|
||||
}
|
||||
//-------------------------------------------------
|
||||
// Integer truncate and extend operations
|
||||
//-------------------------------------------------
|
||||
|
@ -223,6 +223,8 @@ def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
|
||||
SDTCisVec<0>, SDTCisInt<2>]>;
|
||||
def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
|
||||
SDTCisVec<0>, SDTCisInt<3>]>;
|
||||
def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
|
||||
SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
|
||||
|
||||
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
|
||||
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
|
||||
@ -299,6 +301,7 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
|
||||
|
||||
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
|
||||
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
|
||||
def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
|
||||
|
||||
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
|
||||
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
|
||||
|
@ -378,6 +378,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::RNDSCALE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::RNDSCALE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
|
||||
X86ISD::FSUB_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
|
||||
@ -396,8 +400,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
|
||||
|
@ -87,3 +87,20 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define double @nearbyint_f64(double %a) {
|
||||
; CHECK-LABEL: nearbyint_f64
|
||||
; CHECK: vroundsd $12
|
||||
%res = call double @llvm.nearbyint.f64(double %a)
|
||||
ret double %res
|
||||
}
|
||||
declare double @llvm.nearbyint.f64(double %p)
|
||||
|
||||
define float @floor_f32(float %a) {
|
||||
; CHECK-LABEL: floor_f32
|
||||
; CHECK: vroundss $1
|
||||
%res = call float @llvm.floor.f32(float %a)
|
||||
ret float %res
|
||||
}
|
||||
declare float @llvm.floor.f32(float %p)
|
||||
|
||||
|
||||
|
@ -79,3 +79,28 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %a) {
|
||||
ret <8 x double> %res
|
||||
}
|
||||
declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
|
||||
|
||||
define double @nearbyint_f64(double %a) {
|
||||
; CHECK-LABEL: nearbyint_f64
|
||||
; CHECK: vrndscalesd $12, {{.*}}encoding: [0x62,0xf3,0xfd,0x08,0x0b,0xc0,0x0c]
|
||||
%res = call double @llvm.nearbyint.f64(double %a)
|
||||
ret double %res
|
||||
}
|
||||
declare double @llvm.nearbyint.f64(double %p)
|
||||
|
||||
define float @floor_f32(float %a) {
|
||||
; CHECK-LABEL: floor_f32
|
||||
; CHECK: vrndscaless $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x01]
|
||||
%res = call float @llvm.floor.f32(float %a)
|
||||
ret float %res
|
||||
}
|
||||
declare float @llvm.floor.f32(float %p)
|
||||
|
||||
define float @floor_f32m(float* %aptr) {
|
||||
; CHECK-LABEL: floor_f32m
|
||||
; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01]
|
||||
%a = load float* %aptr, align 4
|
||||
%res = call float @llvm.floor.f32(float %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user