mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-29 10:32:47 +00:00
Remove 256-bit AVX non-temporal store intrinsics. Similar was previously done for 128-bit.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156375 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
85a4406959
commit
189bce48c7
@ -1282,16 +1282,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], []>;
|
||||
}
|
||||
|
||||
// Cacheability support ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx_movnt_dq_256 : GCCBuiltin<"__builtin_ia32_movntdq256">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty], []>;
|
||||
def int_x86_avx_movnt_pd_256 : GCCBuiltin<"__builtin_ia32_movntpd256">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>;
|
||||
def int_x86_avx_movnt_ps_256 : GCCBuiltin<"__builtin_ia32_movntps256">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>;
|
||||
}
|
||||
|
||||
// Conditional load ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
|
||||
|
@ -3336,13 +3336,6 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||
IIC_SSE_MOVNT>, VEX;
|
||||
}
|
||||
|
||||
def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
|
||||
(VMOVNTDQYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
|
||||
(VMOVNTPDYmr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
|
||||
(VMOVNTPSYmr addr:$dst, VR256:$src)>;
|
||||
|
||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movntps\t{$src, $dst|$dst, $src}",
|
||||
|
@ -57,7 +57,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name.startswith("x86.sse2.pcmpgt.") ||
|
||||
Name.startswith("x86.avx2.pcmpeq.") ||
|
||||
Name.startswith("x86.avx2.pcmpgt.") ||
|
||||
Name.startswith("x86.avx.vpermil.")) {
|
||||
Name.startswith("x86.avx.vpermil.") ||
|
||||
Name == "x86.avx.movnt.dq.256" ||
|
||||
Name == "x86.avx.movnt.pd.256" ||
|
||||
Name == "x86.avx.movnt.ps.256") {
|
||||
NewFn = 0;
|
||||
return true;
|
||||
}
|
||||
@ -118,15 +121,40 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
"pcmpgt");
|
||||
// need to sign extend since icmp returns vector of i1
|
||||
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
|
||||
} else if (Name == "llvm.x86.avx.movnt.dq.256" ||
|
||||
Name == "llvm.x86.avx.movnt.ps.256" ||
|
||||
Name == "llvm.x86.avx.movnt.pd.256") {
|
||||
IRBuilder<> Builder(C);
|
||||
Builder.SetInsertPoint(CI->getParent(), CI);
|
||||
|
||||
Module *M = F->getParent();
|
||||
SmallVector<Value *, 1> Elts;
|
||||
Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
|
||||
MDNode *Node = MDNode::get(C, Elts);
|
||||
|
||||
Value *Arg0 = CI->getArgOperand(0);
|
||||
Value *Arg1 = CI->getArgOperand(1);
|
||||
|
||||
// Convert the type of the pointer to a pointer to the stored type.
|
||||
Value *BC = Builder.CreateBitCast(Arg0,
|
||||
PointerType::getUnqual(Arg1->getType()),
|
||||
"cast");
|
||||
StoreInst *SI = Builder.CreateStore(Arg1, BC);
|
||||
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
|
||||
SI->setAlignment(16);
|
||||
|
||||
// Remove intrinsic.
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
} else {
|
||||
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
|
||||
if (Name.startswith("llvm.x86.avx.vpermil.pd.256"))
|
||||
if (Name == "llvm.x86.avx.vpermil.pd.256")
|
||||
PD256 = true;
|
||||
else if (Name.startswith("llvm.x86.avx.vpermil.pd"))
|
||||
else if (Name == "llvm.x86.avx.vpermil.pd")
|
||||
PD128 = true;
|
||||
else if (Name.startswith("llvm.x86.avx.vpermil.ps.256"))
|
||||
else if (Name == "llvm.x86.avx.vpermil.ps.256")
|
||||
PS256 = true;
|
||||
else if (Name.startswith("llvm.x86.avx.vpermil.ps"))
|
||||
else if (Name == "llvm.x86.avx.vpermil.ps")
|
||||
PS128 = true;
|
||||
|
||||
if (PD256 || PD128 || PS256 || PS128) {
|
||||
|
@ -2555,3 +2555,27 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
|
||||
ret i32 %tmp
|
||||
}
|
||||
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
|
||||
|
||||
; CHECK: movntdq
|
||||
define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
|
||||
%a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
|
||||
tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
|
||||
|
||||
; CHECK: movntps
|
||||
define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
|
||||
tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
|
||||
|
||||
; CHECK: movntpd
|
||||
define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
|
||||
; add operation forces the execution domain.
|
||||
%a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
|
||||
tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
|
||||
|
Loading…
Reference in New Issue
Block a user