mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-16 00:33:10 +00:00
Replace the "movnt" intrinsics with a native store + nontemporal metadata bit.
<rdar://problem/8460511> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130791 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7d6b6a05b5
commit
9493a285d1
@ -1875,21 +1875,6 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
|
|||||||
// SSE 1 & 2 - Non-temporal stores
|
// SSE 1 & 2 - Non-temporal stores
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
|
|
||||||
(ins i128mem:$dst, VR128:$src),
|
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
|
|
||||||
def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
|
|
||||||
(ins i128mem:$dst, VR128:$src),
|
|
||||||
"movntpd\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
|
|
||||||
|
|
||||||
let ExeDomain = SSEPackedInt in
|
|
||||||
def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
|
|
||||||
(ins f128mem:$dst, VR128:$src),
|
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
|
|
||||||
|
|
||||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||||
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f128mem:$dst, VR128:$src),
|
(ins f128mem:$dst, VR128:$src),
|
||||||
@ -1906,12 +1891,16 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
|
|||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v2f64 VR128:$src),
|
[(alignednontemporalstore (v2f64 VR128:$src),
|
||||||
addr:$dst)]>, VEX;
|
addr:$dst)]>, VEX;
|
||||||
|
|
||||||
let ExeDomain = SSEPackedInt in
|
let ExeDomain = SSEPackedInt in
|
||||||
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
|
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||||
(ins f128mem:$dst, VR128:$src),
|
(ins f128mem:$dst, VR128:$src),
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v4f32 VR128:$src),
|
[(alignednontemporalstore (v4f32 VR128:$src),
|
||||||
addr:$dst)]>, VEX;
|
addr:$dst)]>, VEX;
|
||||||
|
|
||||||
|
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(VMOVNTDQmr addr:$dst, VR128:$src)>;
|
||||||
|
|
||||||
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
|
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||||
(ins f256mem:$dst, VR256:$src),
|
(ins f256mem:$dst, VR256:$src),
|
||||||
@ -1943,18 +1932,6 @@ def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
|
|||||||
def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
|
def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
|
||||||
(VMOVNTPSYmr addr:$dst, VR256:$src)>;
|
(VMOVNTPSYmr addr:$dst, VR256:$src)>;
|
||||||
|
|
||||||
def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
|
|
||||||
def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
|
||||||
"movntpd\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
|
|
||||||
|
|
||||||
let ExeDomain = SSEPackedInt in
|
|
||||||
def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
|
|
||||||
|
|
||||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
@ -1972,22 +1949,19 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
|||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
||||||
|
|
||||||
|
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(MOVNTDQmr addr:$dst, VR128:$src)>;
|
||||||
|
|
||||||
// There is no AVX form for instructions below this point
|
// There is no AVX form for instructions below this point
|
||||||
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||||
"movnti\t{$src, $dst|$dst, $src}",
|
"movnti\t{$src, $dst|$dst, $src}",
|
||||||
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
|
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
|
||||||
TB, Requires<[HasSSE2]>;
|
TB, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||||
"movnti\t{$src, $dst|$dst, $src}",
|
"movnti\t{$src, $dst|$dst, $src}",
|
||||||
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
|
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
|
||||||
TB, Requires<[HasSSE2]>;
|
TB, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
}
|
}
|
||||||
def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
|
||||||
"movnti\t{$src, $dst|$dst, $src}",
|
|
||||||
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
|
|
||||||
TB, Requires<[HasSSE2]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE 1 & 2 - Misc Instructions (No AVX form)
|
// SSE 1 & 2 - Misc Instructions (No AVX form)
|
||||||
|
@ -533,6 +533,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||||||
// Calls to these instructions are transformed into unaligned loads.
|
// Calls to these instructions are transformed into unaligned loads.
|
||||||
NewFn = 0;
|
NewFn = 0;
|
||||||
return true;
|
return true;
|
||||||
|
} else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
|
||||||
|
Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
|
||||||
|
Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
|
||||||
|
Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
|
||||||
|
// Calls to these instructions are transformed into nontemporal stores.
|
||||||
|
NewFn = 0;
|
||||||
|
return true;
|
||||||
} else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
|
} else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
|
||||||
// This is an SSE/MMX instruction.
|
// This is an SSE/MMX instruction.
|
||||||
const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
|
const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
|
||||||
@ -973,6 +980,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
if (!CI->use_empty())
|
if (!CI->use_empty())
|
||||||
CI->replaceAllUsesWith(BC);
|
CI->replaceAllUsesWith(BC);
|
||||||
|
|
||||||
|
// Remove intrinsic.
|
||||||
|
CI->eraseFromParent();
|
||||||
|
} else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
|
||||||
|
F->getName() == "llvm.x86.sse2.movnt.dq" ||
|
||||||
|
F->getName() == "llvm.x86.sse2.movnt.pd" ||
|
||||||
|
F->getName() == "llvm.x86.sse2.movnt.i") {
|
||||||
|
IRBuilder<> Builder(C);
|
||||||
|
Builder.SetInsertPoint(CI->getParent(), CI);
|
||||||
|
|
||||||
|
Module *M = F->getParent();
|
||||||
|
SmallVector<Value *, 1> Elts;
|
||||||
|
Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
|
||||||
|
MDNode *Node = MDNode::get(C, Elts);
|
||||||
|
|
||||||
|
Value *Arg0 = CI->getArgOperand(0);
|
||||||
|
Value *Arg1 = CI->getArgOperand(1);
|
||||||
|
|
||||||
|
// Convert the type of the pointer to a pointer to the stored type.
|
||||||
|
Value *BC = Builder.CreateBitCast(Arg0,
|
||||||
|
PointerType::getUnqual(Arg1->getType()),
|
||||||
|
"cast");
|
||||||
|
StoreInst *SI = Builder.CreateStore(Arg1, BC);
|
||||||
|
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
|
||||||
|
SI->setAlignment(16);
|
||||||
|
|
||||||
// Remove intrinsic.
|
// Remove intrinsic.
|
||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
} else {
|
} else {
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
; RUN: not grep {llvm\\.x86\\.sse2\\.loadu}
|
; RUN: not grep {llvm\\.x86\\.sse2\\.loadu}
|
||||||
; RUN: llvm-as < %s | llvm-dis | \
|
; RUN: llvm-as < %s | llvm-dis | \
|
||||||
; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
|
; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
|
||||||
|
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||||
|
|
||||||
declare i32 @llvm.ctpop.i28(i28 %val)
|
declare i32 @llvm.ctpop.i28(i28 %val)
|
||||||
declare i32 @llvm.cttz.i29(i29 %val)
|
declare i32 @llvm.cttz.i29(i29 %val)
|
||||||
@ -91,3 +92,20 @@ define void @test_loadu(i8* %a, double* %b) {
|
|||||||
%v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
|
%v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone
|
||||||
|
declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone
|
||||||
|
declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone
|
||||||
|
declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone
|
||||||
|
|
||||||
|
define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
|
||||||
|
; CHECK: store{{.*}}nontemporal
|
||||||
|
call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
|
||||||
|
; CHECK: store{{.*}}nontemporal
|
||||||
|
call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
|
||||||
|
; CHECK: store{{.*}}nontemporal
|
||||||
|
call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
|
||||||
|
; CHECK: store{{.*}}nontemporal
|
||||||
|
call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
19
test/CodeGen/X86/nontemporal.ll
Normal file
19
test/CodeGen/X86/nontemporal.ll
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
|
||||||
|
|
||||||
|
define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
|
||||||
|
; CHECK: movntps
|
||||||
|
%cast = bitcast i8* %B to <4 x float>*
|
||||||
|
store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
|
||||||
|
; CHECK: movntdq
|
||||||
|
%cast1 = bitcast i8* %B to <2 x i64>*
|
||||||
|
store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
|
||||||
|
; CHECK: movntpd
|
||||||
|
%cast2 = bitcast i8* %B to <2 x double>*
|
||||||
|
store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
|
||||||
|
; CHECK: movnti
|
||||||
|
%cast3 = bitcast i8* %B to i32*
|
||||||
|
store i32 %D, i32* %cast3, align 16, !nontemporal !0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
!0 = metadata !{i32 1}
|
Loading…
x
Reference in New Issue
Block a user