mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 23:31:37 +00:00
llvm.memory.barrier, and impl for x86 and alpha
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47204 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
527c250a90
commit
22c5c1b2df
@ -204,6 +204,11 @@
|
||||
<li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><a href="#int_atomics">Atomic intrinsics</a>
|
||||
<ol>
|
||||
<li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><a href="#int_general">General intrinsics</a>
|
||||
<ol>
|
||||
<li><a href="#int_var_annotation">
|
||||
@ -5232,6 +5237,107 @@ declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <n
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<div class="doc_subsection">
|
||||
<a name="int_atomics">Atomic Operations and Synchronization Intrinsics</a>
|
||||
</div>
|
||||
|
||||
<div class="doc_text">
|
||||
<p>
|
||||
These intrinsic functions expand the "universal IR" of LLVM to represent
|
||||
hardware constructs for atomic operations and memory synchronization. This
|
||||
provides an interface to the hardware, not an interface to the programmer. It
|
||||
is aimed at a low enough level to allow any programming models or APIs which
|
||||
need atomic behaviors to map cleanly onto it. It is also modeled primarily on
|
||||
hardware behavior. Just as hardware provides a "universal IR" for source
|
||||
languages, it also provides a starting point for developing a "universal"
|
||||
atomic operation and synchronization IR.
|
||||
</p>
|
||||
<p>
|
||||
These do <em>not</em> form an API such as high-level threading libraries,
|
||||
software transaction memory systems, atomic primitives, and intrinsic
|
||||
functions as found in BSD, GNU libc, atomic_ops, APR, and other system and
|
||||
application libraries. The hardware interface provided by LLVM should allow
|
||||
a clean implementation of all of these APIs and parallel programming models.
|
||||
No one model or paradigm should be selected above others unless the hardware
|
||||
itself ubiquitously does so.
|
||||
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- _______________________________________________________________________ -->
|
||||
<div class="doc_subsubsection">
|
||||
<a name="int_memory_barrier">'<tt>llvm.memory.barrier</tt>' Intrinsic</a>
|
||||
</div>
|
||||
<div class="doc_text">
|
||||
<h5>Syntax:</h5>
|
||||
<pre>
|
||||
declare void @llvm.memory.barrier( i1 <ll>, i1 <ls>, i1 <sl>, i1 <ss>,
|
||||
i1 <device> )
|
||||
|
||||
</pre>
|
||||
<h5>Overview:</h5>
|
||||
<p>
|
||||
The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between
|
||||
specific pairs of memory access types.
|
||||
</p>
|
||||
<h5>Arguments:</h5>
|
||||
<p>
|
||||
The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
|
||||
The first four arguments enables a specific barrier as listed below. The fith
|
||||
argument specifies that the barrier applies to io or device or uncached memory.
|
||||
|
||||
</p>
|
||||
<ul>
|
||||
<li><tt>ll</tt>: load-load barrier</li>
|
||||
<li><tt>ls</tt>: load-store barrier</li>
|
||||
<li><tt>sl</tt>: store-load barrier</li>
|
||||
<li><tt>ss</tt>: store-store barrier</li>
|
||||
<li><tt>device</tt>: barrier applies to device and uncached memory also.
|
||||
</ul>
|
||||
<h5>Semantics:</h5>
|
||||
<p>
|
||||
This intrinsic causes the system to enforce some ordering constraints upon
|
||||
the loads and stores of the program. This barrier does not indicate
|
||||
<em>when</em> any events will occur, it only enforces an <em>order</em> in
|
||||
which they occur. For any of the specified pairs of load and store operations
|
||||
(f.ex. load-load, or store-load), all of the first operations preceding the
|
||||
barrier will complete before any of the second operations succeeding the
|
||||
barrier begin. Specifically the semantics for each pairing is as follows:
|
||||
</p>
|
||||
<ul>
|
||||
<li><tt>ll</tt>: All loads before the barrier must complete before any load
|
||||
after the barrier begins.</li>
|
||||
|
||||
<li><tt>ls</tt>: All loads before the barrier must complete before any
|
||||
store after the barrier begins.</li>
|
||||
<li><tt>ss</tt>: All stores before the barrier must complete before any
|
||||
store after the barrier begins.</li>
|
||||
<li><tt>sl</tt>: All stores before the barrier must complete before any
|
||||
load after the barrier begins.</li>
|
||||
</ul>
|
||||
<p>
|
||||
These semantics are applied with a logical "and" behavior when more than one
|
||||
is enabled in a single memory barrier intrinsic.
|
||||
</p>
|
||||
<p>
|
||||
Backends may implement stronger barriers than those requested when they do not
|
||||
support as fine grained a barrier as requested. Some architectures do not
|
||||
need all types of barriers and on such architectures, these become noops.
|
||||
</p>
|
||||
<h5>Example:</h5>
|
||||
<pre>
|
||||
%ptr = malloc i32
|
||||
store i32 4, %ptr
|
||||
|
||||
%result1 = load i32* %ptr <i>; yields {i32}:result1 = 4</i>
|
||||
call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false )
|
||||
<i>; guarantee the above finishes</i>
|
||||
store i32 8, %ptr <i>; before this begins</i>
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
|
||||
<!-- ======================================================================= -->
|
||||
<div class="doc_subsection">
|
||||
<a name="int_general">General Intrinsics</a>
|
||||
|
@ -589,6 +589,14 @@ namespace ISD {
|
||||
// TRAP - Trapping instruction
|
||||
TRAP,
|
||||
|
||||
// OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
|
||||
// store-store, device)
|
||||
// This corresponds to the atomic.barrier intrinsic.
|
||||
// it takes an input chain, 4 operands to specify the type of barrier, an
|
||||
// operand specifying if the barrier applies to device and uncached memory
|
||||
// and produces an output chain.
|
||||
MEMBARRIER,
|
||||
|
||||
// BUILTIN_OP_END - This must be the last enum value in this list.
|
||||
BUILTIN_OP_END
|
||||
};
|
||||
|
@ -262,6 +262,11 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty,
|
||||
llvm_ptr_ty], []>,
|
||||
GCCBuiltin<"__builtin_init_trampoline">;
|
||||
|
||||
//===------------------------- Atomic Intrinsics --------------------------===//
|
||||
//
|
||||
def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty,
|
||||
llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>;
|
||||
|
||||
//===-------------------------- Other Intrinsics --------------------------===//
|
||||
//
|
||||
def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
|
||||
|
@ -1133,6 +1133,16 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
|
||||
}
|
||||
break;
|
||||
|
||||
case ISD::MEMBARRIER: {
|
||||
assert(Node->getNumOperands() == 6 && "Invalid MemBarrier node!");
|
||||
SDOperand Ops[6];
|
||||
Ops[0] = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
|
||||
for (int x = 1; x < 6; ++x)
|
||||
Ops[x] = PromoteOp(Node->getOperand(x));
|
||||
Result = DAG.UpdateNodeOperands(Result, &Ops[0], 6);
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::Constant: {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Node);
|
||||
unsigned opAction =
|
||||
|
@ -3792,6 +3792,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
return "<<Unknown Target Node>>";
|
||||
}
|
||||
|
||||
case ISD::MEMBARRIER: return "MemBarrier";
|
||||
case ISD::PCMARKER: return "PCMarker";
|
||||
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
|
||||
case ISD::SRCVALUE: return "SrcValue";
|
||||
|
@ -3046,6 +3046,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
|
||||
DAG.setRoot(DAG.getNode(ISD::TRAP, MVT::Other, getRoot()));
|
||||
return 0;
|
||||
}
|
||||
case Intrinsic::memory_barrier: {
|
||||
SDOperand Ops[6];
|
||||
Ops[0] = getRoot();
|
||||
for (int x = 1; x < 6; ++x)
|
||||
Ops[x] = getValue(I.getOperand(x));
|
||||
|
||||
DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, MVT::Other, &Ops[0], 6));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,6 +62,14 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
|
||||
let Inst{20-16} = 0;
|
||||
let Inst{15-0} = fc;
|
||||
}
|
||||
class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
|
||||
: InstAlpha<opcode, asmstr, itin> {
|
||||
let OutOperandList = (ops);
|
||||
let InOperandList = (ops);
|
||||
let Inst{25-21} = 0;
|
||||
let Inst{20-16} = 0;
|
||||
let Inst{15-0} = fc;
|
||||
}
|
||||
|
||||
class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
|
||||
: InstAlpha<opcode, asmstr, itin> {
|
||||
|
@ -568,8 +568,14 @@ def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
|
||||
def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
|
||||
(LDQl texternalsym:$ext, GPRC:$RB)>;
|
||||
|
||||
|
||||
def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
|
||||
def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
|
||||
def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
|
||||
|
||||
def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
|
||||
(WMB)>;
|
||||
def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
|
||||
(MB)>;
|
||||
|
||||
//Basic Floating point ops
|
||||
|
||||
@ -959,6 +965,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
|
||||
//S_floating : IEEE Single
|
||||
//T_floating : IEEE Double
|
||||
|
||||
|
||||
//Unused instructions
|
||||
//Mnemonic Format Opcode Description
|
||||
//CALL_PAL Pcd 00 Trap to PALcode
|
||||
|
@ -185,6 +185,11 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert
|
||||
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
|
||||
]>;
|
||||
|
||||
def STDMemBarrier : SDTypeProfile<0, 5, [
|
||||
SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
|
||||
SDTCisInt<0>
|
||||
]>;
|
||||
|
||||
class SDCallSeqStart<list<SDTypeConstraint> constraints> :
|
||||
SDTypeProfile<0, 1, constraints>;
|
||||
class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
|
||||
@ -329,6 +334,8 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>;
|
||||
def ret : SDNode<"ISD::RET" , SDTNone, [SDNPHasChain]>;
|
||||
def trap : SDNode<"ISD::TRAP" , SDTNone,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
|
||||
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
|
||||
// and truncst (see below).
|
||||
|
@ -1509,3 +1509,10 @@ void test(double *P) {
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
handling llvm.memory.barrier on pre SSE2 cpus
|
||||
|
||||
should generate:
|
||||
lock ; mov %esp, %esp
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -2149,6 +2149,14 @@ def LFENCE : I<0xAE, MRM5m, (outs), (ins),
|
||||
def MFENCE : I<0xAE, MRM6m, (outs), (ins),
|
||||
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
|
||||
|
||||
//TODO: custom lower this so as to never even generate the noop
|
||||
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
|
||||
(i8 0)), (NOOP)>;
|
||||
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
|
||||
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
|
||||
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
|
||||
(i8 1)), (MFENCE)>;
|
||||
|
||||
// Alias instructions that map zero vector to pxor / xorp* for sse.
|
||||
let isReMaterializable = 1 in
|
||||
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
|
||||
|
8
test/CodeGen/Alpha/mb.ll
Normal file
8
test/CodeGen/Alpha/mb.ll
Normal file
@ -0,0 +1,8 @@
|
||||
; RUN: llvm-as < %s | llc -march=alpha | grep mb
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
|
||||
ret void
|
||||
}
|
8
test/CodeGen/Alpha/wmb.ll
Normal file
8
test/CodeGen/Alpha/wmb.ll
Normal file
@ -0,0 +1,8 @@
|
||||
; RUN: llvm-as < %s | llc -march=alpha | grep wmb
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true , i1 true)
|
||||
ret void
|
||||
}
|
8
test/CodeGen/X86/lfence.ll
Normal file
8
test/CodeGen/X86/lfence.ll
Normal file
@ -0,0 +1,8 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep lfence
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 true)
|
||||
ret void
|
||||
}
|
20
test/CodeGen/X86/mfence.ll
Normal file
20
test/CodeGen/X86/mfence.ll
Normal file
@ -0,0 +1,20 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep sfence
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lfence
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mfence
|
||||
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 true)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 true)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 true)
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 true)
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 true)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 true)
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 true)
|
||||
ret void
|
||||
}
|
27
test/CodeGen/X86/nofence.ll
Normal file
27
test/CodeGen/X86/nofence.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep fence
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 true, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false)
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 true, i1 true, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 true, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 true, i1 true, i1 false)
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 true, i1 true, i1 true, i1 false)
|
||||
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
|
||||
ret void
|
||||
}
|
8
test/CodeGen/X86/sfence.ll
Normal file
8
test/CodeGen/X86/sfence.ll
Normal file
@ -0,0 +1,8 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep sfence
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true)
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user