2011-07-18 21:23:42 +00:00
|
|
|
; RUN: llc < %s -march=ppc64 | FileCheck %s
|
2015-04-11 13:40:36 +00:00
|
|
|
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
|
|
|
|
; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
|
2008-04-19 02:30:38 +00:00
|
|
|
|
2011-06-18 06:05:24 +00:00
|
|
|
define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: exchange_and_add:
|
2011-07-18 21:23:42 +00:00
|
|
|
; CHECK: ldarx
|
2011-09-26 21:30:17 +00:00
|
|
|
%tmp = atomicrmw add i64* %mem, i64 %val monotonic
|
2011-07-18 21:23:42 +00:00
|
|
|
; CHECK: stdcx.
|
2011-06-18 06:05:24 +00:00
|
|
|
ret i64 %tmp
|
2008-04-19 02:30:38 +00:00
|
|
|
}
|
|
|
|
|
2015-03-10 20:51:07 +00:00
|
|
|
define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
|
|
|
|
; CHECK-LABEL: exchange_and_add8:
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: lbarx
|
2015-03-10 20:51:07 +00:00
|
|
|
%tmp = atomicrmw add i8* %mem, i8 %val monotonic
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: stbcx.
|
2015-03-10 20:51:07 +00:00
|
|
|
ret i8 %tmp
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
|
|
|
|
; CHECK-LABEL: exchange_and_add16:
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: lharx
|
2015-03-10 20:51:07 +00:00
|
|
|
%tmp = atomicrmw add i16* %mem, i16 %val monotonic
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: sthcx.
|
2015-03-10 20:51:07 +00:00
|
|
|
ret i16 %tmp
|
|
|
|
}
|
|
|
|
|
2011-06-18 06:05:24 +00:00
|
|
|
define i64 @exchange_and_cmp(i64* %mem) nounwind {
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: exchange_and_cmp:
|
2011-07-18 21:23:42 +00:00
|
|
|
; CHECK: ldarx
|
IR: add "cmpxchg weak" variant to support permitted failure.
This commit adds a weak variant of the cmpxchg operation, as described
in C++11. A cmpxchg instruction with this modifier is permitted to
fail to store, even if the comparison indicated it should.
As a result, cmpxchg instructions must return a flag indicating
success in addition to their original iN value loaded. Thus, for
uniformity *all* cmpxchg instructions now return "{ iN, i1 }". The
second flag is 1 when the store succeeded.
At the DAG level, a new ATOMIC_CMP_SWAP_WITH_SUCCESS node has been
added as the natural representation for the new cmpxchg instructions.
It is a strong cmpxchg.
By default this gets Expanded to the existing ATOMIC_CMP_SWAP during
Legalization, so existing backends should see no change in behaviour.
If they wish to deal with the enhanced node instead, they can call
setOperationAction on it. Beware: as a node with 2 results, it cannot
be selected from TableGen.
Currently, no use is made of the extra information provided in this
patch. Test updates are almost entirely adapting the input IR to the
new scheme.
Summary for out of tree users:
------------------------------
+ Legacy Bitcode files are upgraded during read.
+ Legacy assembly IR files will be invalid.
+ Front-ends must adapt to different type for "cmpxchg".
+ Backends should be unaffected by default.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210903 91177308-0d34-0410-b5e6-96231b3b80d8
2014-06-13 14:24:07 +00:00
|
|
|
%tmppair = cmpxchg i64* %mem, i64 0, i64 1 monotonic monotonic
|
|
|
|
%tmp = extractvalue { i64, i1 } %tmppair, 0
|
2011-07-18 21:23:42 +00:00
|
|
|
; CHECK: stdcx.
|
|
|
|
; CHECK: stdcx.
|
2011-06-18 06:05:24 +00:00
|
|
|
ret i64 %tmp
|
2008-04-19 02:30:38 +00:00
|
|
|
}
|
|
|
|
|
2015-03-10 20:51:07 +00:00
|
|
|
define i8 @exchange_and_cmp8(i8* %mem) nounwind {
|
|
|
|
; CHECK-LABEL: exchange_and_cmp8:
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: lbarx
|
2015-03-10 20:51:07 +00:00
|
|
|
%tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
|
|
|
|
%tmp = extractvalue { i8, i1 } %tmppair, 0
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: stbcx.
|
|
|
|
; CHECK-P8U: stbcx.
|
2015-03-10 20:51:07 +00:00
|
|
|
ret i8 %tmp
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @exchange_and_cmp16(i16* %mem) nounwind {
|
|
|
|
; CHECK-LABEL: exchange_and_cmp16:
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: lharx
|
2015-03-10 20:51:07 +00:00
|
|
|
%tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
|
|
|
|
%tmp = extractvalue { i16, i1 } %tmppair, 0
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: sthcx.
|
|
|
|
; CHECK-P8U: sthcx.
|
2015-03-10 20:51:07 +00:00
|
|
|
ret i16 %tmp
|
|
|
|
}
|
|
|
|
|
2011-06-18 06:05:24 +00:00
|
|
|
define i64 @exchange(i64* %mem, i64 %val) nounwind {
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: exchange:
|
2011-07-18 21:23:42 +00:00
|
|
|
; CHECK: ldarx
|
2011-09-26 21:30:17 +00:00
|
|
|
%tmp = atomicrmw xchg i64* %mem, i64 1 monotonic
|
2011-07-18 21:23:42 +00:00
|
|
|
; CHECK: stdcx.
|
2011-06-18 06:05:24 +00:00
|
|
|
ret i64 %tmp
|
2008-04-19 02:30:38 +00:00
|
|
|
}
|
2012-12-25 17:22:53 +00:00
|
|
|
|
2015-03-10 20:51:07 +00:00
|
|
|
define i8 @exchange8(i8* %mem, i8 %val) nounwind {
|
|
|
|
; CHECK-LABEL: exchange8:
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: lbarx
|
2015-03-10 20:51:07 +00:00
|
|
|
%tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: stbcx.
|
2015-03-10 20:51:07 +00:00
|
|
|
ret i8 %tmp
|
|
|
|
}
|
|
|
|
|
|
|
|
define i16 @exchange16(i16* %mem, i16 %val) nounwind {
|
|
|
|
; CHECK-LABEL: exchange16:
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: lharx
|
2015-03-10 20:51:07 +00:00
|
|
|
%tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
|
2015-04-11 13:40:36 +00:00
|
|
|
; CHECK-P8U: sthcx.
|
2015-03-10 20:51:07 +00:00
|
|
|
ret i16 %tmp
|
|
|
|
}
|
|
|
|
|
2012-12-25 17:22:53 +00:00
|
|
|
define void @atomic_store(i64* %mem, i64 %val) nounwind {
|
|
|
|
entry:
|
|
|
|
; CHECK: @atomic_store
|
|
|
|
store atomic i64 %val, i64* %mem release, align 64
|
2015-04-23 18:30:38 +00:00
|
|
|
; CHECK: lwsync
|
[Power] Improve the expansion of atomic loads/stores
Summary:
Atomic loads and store of up to the native size (32 bits, or 64 for PPC64)
can be lowered to a simple load or store instruction (as the synchronization
is already handled by AtomicExpand, and the atomicity is guaranteed thanks to
the alignment requirements of atomic accesses). This is exactly what this patch
does. Previously, these were implemented by complex
load-linked/store-conditional loops.. an obvious performance problem.
For example, this patch turns
```
define void @store_i8_unordered(i8* %mem) {
store atomic i8 42, i8* %mem unordered, align 1
ret void
}
```
from
```
_store_i8_unordered: ; @store_i8_unordered
; BB#0:
rlwinm r2, r3, 3, 27, 28
li r4, 42
xori r5, r2, 24
rlwinm r2, r3, 0, 0, 29
li r3, 255
slw r4, r4, r5
slw r3, r3, r5
and r4, r4, r3
LBB4_1: ; =>This Inner Loop Header: Depth=1
lwarx r5, 0, r2
andc r5, r5, r3
or r5, r4, r5
stwcx. r5, 0, r2
bne cr0, LBB4_1
; BB#2:
blr
```
into
```
_store_i8_unordered: ; @store_i8_unordered
; BB#0:
li r2, 42
stb r2, 0(r3)
blr
```
which looks like a pretty clear win to me.
Test Plan:
fixed the tests + new test for indexed accesses + make check-all
Reviewers: jfb, wschmidt, hfinkel
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D5587
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218922 91177308-0d34-0410-b5e6-96231b3b80d8
2014-10-02 22:27:07 +00:00
|
|
|
; CHECK-NOT: stdcx
|
|
|
|
; CHECK: std
|
2012-12-25 17:22:53 +00:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define i64 @atomic_load(i64* %mem) nounwind {
|
|
|
|
entry:
|
|
|
|
; CHECK: @atomic_load
|
2015-02-27 21:17:42 +00:00
|
|
|
%tmp = load atomic i64, i64* %mem acquire, align 64
|
[Power] Improve the expansion of atomic loads/stores
Summary:
Atomic loads and store of up to the native size (32 bits, or 64 for PPC64)
can be lowered to a simple load or store instruction (as the synchronization
is already handled by AtomicExpand, and the atomicity is guaranteed thanks to
the alignment requirements of atomic accesses). This is exactly what this patch
does. Previously, these were implemented by complex
load-linked/store-conditional loops.. an obvious performance problem.
For example, this patch turns
```
define void @store_i8_unordered(i8* %mem) {
store atomic i8 42, i8* %mem unordered, align 1
ret void
}
```
from
```
_store_i8_unordered: ; @store_i8_unordered
; BB#0:
rlwinm r2, r3, 3, 27, 28
li r4, 42
xori r5, r2, 24
rlwinm r2, r3, 0, 0, 29
li r3, 255
slw r4, r4, r5
slw r3, r3, r5
and r4, r4, r3
LBB4_1: ; =>This Inner Loop Header: Depth=1
lwarx r5, 0, r2
andc r5, r5, r3
or r5, r4, r5
stwcx. r5, 0, r2
bne cr0, LBB4_1
; BB#2:
blr
```
into
```
_store_i8_unordered: ; @store_i8_unordered
; BB#0:
li r2, 42
stb r2, 0(r3)
blr
```
which looks like a pretty clear win to me.
Test Plan:
fixed the tests + new test for indexed accesses + make check-all
Reviewers: jfb, wschmidt, hfinkel
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D5587
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218922 91177308-0d34-0410-b5e6-96231b3b80d8
2014-10-02 22:27:07 +00:00
|
|
|
; CHECK-NOT: ldarx
|
|
|
|
; CHECK: ld
|
2015-04-23 18:30:38 +00:00
|
|
|
; CHECK: lwsync
|
2012-12-25 17:22:53 +00:00
|
|
|
ret i64 %tmp
|
|
|
|
}
|
|
|
|
|