mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
[x86] Change AVX512 intrinsics to take a 8-bit immediate for the comparision kind instead of a 32-bit immediate. This better aligns with the emitted instruction. It also matches SSE and AVX1 equivalents. Also add auto upgrade support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226430 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
eb71fa415e
commit
6f91bd79f3
@ -3882,10 +3882,10 @@ let TargetPrefix = "x86" in {
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i8_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
|
@ -60,6 +60,21 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit
|
||||
// immediates have changed their type from i32 to i8.
|
||||
static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID,
|
||||
Function *&NewFn) {
|
||||
// Check that the last argument is an i32.
|
||||
Type *LastArgType = F->getFunctionType()->getParamType(2);
|
||||
if (!LastArgType->isIntegerTy(32))
|
||||
return false;
|
||||
|
||||
// Move this function aside and map down.
|
||||
F->setName(F->getName() + ".old");
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
assert(F && "Illegal to upgrade a non-existent Function.");
|
||||
|
||||
@ -206,6 +221,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
|
||||
NewFn);
|
||||
|
||||
if (Name == "x86.avx512.mask.cmp.ps.512")
|
||||
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
|
||||
NewFn);
|
||||
if (Name == "x86.avx512.mask.cmp.pd.512")
|
||||
return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
|
||||
NewFn);
|
||||
|
||||
// frcz.ss/sd may need to have an argument dropped
|
||||
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
|
||||
F->setName(Name + ".old");
|
||||
@ -547,6 +569,21 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
case Intrinsic::x86_avx512_mask_cmp_ps_512:
|
||||
case Intrinsic::x86_avx512_mask_cmp_pd_512: {
|
||||
// Need to truncate the last argument from i32 to i8 -- this argument models
|
||||
// an inherently 8-bit immediate operand to these x86 instructions.
|
||||
SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
|
||||
CI->arg_operands().end());
|
||||
|
||||
// Replace the last argument with a trunc.
|
||||
Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
|
||||
|
||||
CallInst *NewCall = Builder.CreateCall(NewFn, Args);
|
||||
CI->replaceAllUsesWith(NewCall);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1577,25 +1577,25 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
|
||||
imm:$cc), VK8)>;
|
||||
|
||||
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1),
|
||||
(v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc)), GR16)>;
|
||||
|
||||
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1),
|
||||
(v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc)), GR8)>;
|
||||
|
||||
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1),
|
||||
(v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1),
|
||||
FROUND_CURRENT)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc)), GR16)>;
|
||||
|
||||
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1),
|
||||
(v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1),
|
||||
FROUND_CURRENT)),
|
||||
(COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc)), GR8)>;
|
||||
|
@ -555,10 +555,6 @@ def AVXCC : Operand<i8> {
|
||||
def i8immZExt5 : ImmLeaf<i8, [{
|
||||
return Imm >= 0 && Imm < 32;
|
||||
}]>;
|
||||
// AVX-512 uses a 32-bit immediate in their intrinsics
|
||||
def i32immZExt5 : ImmLeaf<i32, [{
|
||||
return Imm >= 0 && Imm < 32;
|
||||
}]>;
|
||||
|
||||
class ImmSExtAsmOperandClass : AsmOperandClass {
|
||||
let SuperClasses = [ImmAsmOperand];
|
||||
|
Loading…
Reference in New Issue
Block a user