mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
Add support for emitting non-temporal stores for DAGs marked
non-temporal. Fix from r96241 for botched encoding of MOVNTDQ. Add documentation for !nontemporal metadata. Add a simpler movnt testcase. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96386 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
713bc585ef
commit
8939b0d8a9
@ -4074,8 +4074,9 @@ Instruction</a> </div>
|
|||||||
|
|
||||||
<h5>Syntax:</h5>
|
<h5>Syntax:</h5>
|
||||||
<pre>
|
<pre>
|
||||||
<result> = load <ty>* <pointer>[, align <alignment>]
|
<result> = load <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]
|
||||||
<result> = volatile load <ty>* <pointer>[, align <alignment>]
|
<result> = volatile load <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]
|
||||||
|
!<index> = !{ i32 1 }
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
<h5>Overview:</h5>
|
<h5>Overview:</h5>
|
||||||
@ -4088,7 +4089,7 @@ Instruction</a> </div>
|
|||||||
marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
|
marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
|
||||||
number or order of execution of this <tt>load</tt> with other
|
number or order of execution of this <tt>load</tt> with other
|
||||||
volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
|
volatile <tt>load</tt> and <tt><a href="#i_store">store</a></tt>
|
||||||
instructions. </p>
|
instructions.</p>
|
||||||
|
|
||||||
<p>The optional constant "align" argument specifies the alignment of the
|
<p>The optional constant "align" argument specifies the alignment of the
|
||||||
operation (that is, the alignment of the memory address). A value of 0 or an
|
operation (that is, the alignment of the memory address). A value of 0 or an
|
||||||
@ -4098,6 +4099,14 @@ Instruction</a> </div>
|
|||||||
alignment results in an undefined behavior. Underestimating the alignment may
|
alignment results in an undefined behavior. Underestimating the alignment may
|
||||||
produce less efficient code. An alignment of 1 is always safe.</p>
|
produce less efficient code. An alignment of 1 is always safe.</p>
|
||||||
|
|
||||||
|
<p>The optional !nontemporal metadata must reference a single metatadata
|
||||||
|
name <index> corresponding to a metadata node with one i32 entry of
|
||||||
|
value 1. The existance of the !nontemporal metatadata on the
|
||||||
|
instruction tells the optimizer and code generator that this load is
|
||||||
|
not expected to be reused in the cache. The code generator may
|
||||||
|
select special instructions to save cache bandwidth, such as the
|
||||||
|
MOVNT intruction on x86.</p>
|
||||||
|
|
||||||
<h5>Semantics:</h5>
|
<h5>Semantics:</h5>
|
||||||
<p>The location of memory pointed to is loaded. If the value being loaded is of
|
<p>The location of memory pointed to is loaded. If the value being loaded is of
|
||||||
scalar type then the number of bytes read does not exceed the minimum number
|
scalar type then the number of bytes read does not exceed the minimum number
|
||||||
@ -4124,8 +4133,8 @@ Instruction</a> </div>
|
|||||||
|
|
||||||
<h5>Syntax:</h5>
|
<h5>Syntax:</h5>
|
||||||
<pre>
|
<pre>
|
||||||
store <ty> <value>, <ty>* <pointer>[, align <alignment>] <i>; yields {void}</i>
|
store <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>] <i>; yields {void}</i>
|
||||||
volatile store <ty> <value>, <ty>* <pointer>[, align <alignment>] <i>; yields {void}</i>
|
volatile store <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>] <i>; yields {void}</i>
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
<h5>Overview:</h5>
|
<h5>Overview:</h5>
|
||||||
@ -4150,6 +4159,15 @@ Instruction</a> </div>
|
|||||||
alignment results in an undefined behavior. Underestimating the alignment may
|
alignment results in an undefined behavior. Underestimating the alignment may
|
||||||
produce less efficient code. An alignment of 1 is always safe.</p>
|
produce less efficient code. An alignment of 1 is always safe.</p>
|
||||||
|
|
||||||
|
<p>The optional !nontemporal metadata must reference a single metatadata
|
||||||
|
name <index> corresponding to a metadata node with one i32 entry of
|
||||||
|
value 1. The existance of the !nontemporal metatadata on the
|
||||||
|
instruction tells the optimizer and code generator that this load is
|
||||||
|
not expected to be reused in the cache. The code generator may
|
||||||
|
select special instructions to save cache bandwidth, such as the
|
||||||
|
MOVNT intruction on x86.</p>
|
||||||
|
|
||||||
|
|
||||||
<h5>Semantics:</h5>
|
<h5>Semantics:</h5>
|
||||||
<p>The contents of memory are updated to contain '<tt><value></tt>' at the
|
<p>The contents of memory are updated to contain '<tt><value></tt>' at the
|
||||||
location specified by the '<tt><pointer></tt>' operand. If
|
location specified by the '<tt><pointer></tt>' operand. If
|
||||||
|
@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
|
|||||||
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
|
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
|
||||||
def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
|
def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
|
||||||
|
|
||||||
|
// MOVNT Support
|
||||||
|
// Like 'store', but requires the non-temporal bit to be set
|
||||||
|
def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(st node:$val, node:$ptr), [{
|
||||||
|
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
|
||||||
|
return ST->isNonTemporal();
|
||||||
|
return false;
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(st node:$val, node:$ptr), [{
|
||||||
|
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
|
||||||
|
return ST->isNonTemporal() && !ST->isTruncatingStore() &&
|
||||||
|
ST->getAddressingMode() == ISD::UNINDEXED &&
|
||||||
|
ST->getAlignment() >= 16;
|
||||||
|
return false;
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(st node:$val, node:$ptr), [{
|
||||||
|
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
|
||||||
|
return ST->isNonTemporal() &&
|
||||||
|
ST->getAlignment() < 16;
|
||||||
|
return false;
|
||||||
|
}]>;
|
||||||
|
|
||||||
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
|
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
|
||||||
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
|
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
|
||||||
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
|
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
|
||||||
@ -1013,10 +1039,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
|
|||||||
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
|
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
|
||||||
|
|
||||||
// Non-temporal stores
|
// Non-temporal stores
|
||||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||||
"movntps\t{$src, $dst|$dst, $src}",
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
|
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||||
|
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
|
"movntps\t{$src, $dst|$dst, $src}",
|
||||||
|
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
||||||
|
|
||||||
|
def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
|
[(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
|
||||||
|
|
||||||
|
def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
|
||||||
|
(MOVNTDQ_64mr VR128:$src, addr:$dst)>;
|
||||||
|
|
||||||
|
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||||
|
"movnti\t{$src, $dst|$dst, $src}",
|
||||||
|
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
|
||||||
|
TB, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
|
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||||
|
"movnti\t{$src, $dst|$dst, $src}",
|
||||||
|
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
|
||||||
|
TB, Requires<[HasSSE2]>;
|
||||||
|
}
|
||||||
|
|
||||||
// Load, store, and memory fence
|
// Load, store, and memory fence
|
||||||
def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
|
def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
|
||||||
|
|
||||||
@ -2298,17 +2347,30 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
|
|||||||
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
|
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
|
||||||
|
|
||||||
// Non-temporal stores
|
// Non-temporal stores
|
||||||
def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||||
"movntpd\t{$src, $dst|$dst, $src}",
|
"movntpd\t{$src, $dst|$dst, $src}",
|
||||||
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
|
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
|
||||||
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
"movntdq\t{$src, $dst|$dst, $src}",
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
|
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
|
||||||
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||||
"movnti\t{$src, $dst|$dst, $src}",
|
"movnti\t{$src, $dst|$dst, $src}",
|
||||||
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
|
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
|
||||||
TB, Requires<[HasSSE2]>;
|
TB, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||||
|
def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
|
"movntpd\t{$src, $dst|$dst, $src}",
|
||||||
|
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
|
||||||
|
|
||||||
|
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||||
|
"movntdq\t{$src, $dst|$dst, $src}",
|
||||||
|
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
|
||||||
|
|
||||||
|
def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
|
||||||
|
(MOVNTDQmr VR128:$src, addr:$dst)>;
|
||||||
|
}
|
||||||
|
|
||||||
// Flush cache
|
// Flush cache
|
||||||
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
|
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
|
||||||
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
|
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
|
||||||
|
22
test/CodeGen/X86/2010-02-11-NonTemporal.ll
Normal file
22
test/CodeGen/X86/2010-02-11-NonTemporal.ll
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||||
|
; CHECK: movnt
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
!0 = metadata !{ i32 1 }
|
||||||
|
|
||||||
|
define void @sub_(i32* noalias %n) {
|
||||||
|
"file movnt.f90, line 2, bb1":
|
||||||
|
%n1 = alloca i32*, align 8
|
||||||
|
%i = alloca i32, align 4
|
||||||
|
%"$LCS_0" = alloca i64, align 8
|
||||||
|
%"$LCS_S2" = alloca <2 x double>, align 16
|
||||||
|
%r9 = load <2 x double>* %"$LCS_S2", align 8
|
||||||
|
%r10 = load i64* %"$LCS_0", align 8
|
||||||
|
%r11 = inttoptr i64 %r10 to <2 x double>*
|
||||||
|
store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0
|
||||||
|
br label %"file movnt.f90, line 18, bb5"
|
||||||
|
|
||||||
|
"file movnt.f90, line 18, bb5":
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user