diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index afa6cf090d0..e089047d013 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -166,13 +166,6 @@ See CodeGen/SystemZ/alloca-01.ll for an example. -- -Atomic loads and stores use the default compare-and-swap based implementation. -This is much too conservative in practice, since the architecture guarantees -that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are -inherently atomic. - --- - If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. -- diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index af5b52f1aee..b0a8fca7de7 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -134,10 +134,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); - // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP. - // FIXME: probably much too conservative. - setOperationAction(ISD::ATOMIC_LOAD, VT, Expand); - setOperationAction(ISD::ATOMIC_STORE, VT, Expand); + // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and + // stores, putting a serialization instruction after the stores. + setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); + setOperationAction(ISD::ATOMIC_STORE, VT, Custom); // No special instructions for these. setOperationAction(ISD::CTPOP, VT, Expand); @@ -2001,11 +2001,32 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +// Op is an atomic load. Lower it into a normal volatile load. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast(Op.getNode()); + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), + Node->getChain(), Node->getBasePtr(), + Node->getMemoryVT(), Node->getMemOperand()); +} + +// Op is an atomic store. Lower it into a normal volatile store followed +// by a serialization. +SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast(Op.getNode()); + SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), + Node->getBasePtr(), Node->getMemoryVT(), + Node->getMemOperand()); + return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, + Chain), 0); +} + // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first // two into the fullword ATOMIC_LOADW_* operation given by Opcode. -SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, - SelectionDAG &DAG, - unsigned Opcode) const { +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, + SelectionDAG &DAG, + unsigned Opcode) const { AtomicSDNode *Node = cast(Op.getNode()); // 32-bit operations need no code outside the main loop. @@ -2195,27 +2216,31 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::OR: return lowerOR(Op, DAG); case ISD::ATOMIC_SWAP: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); + case ISD::ATOMIC_STORE: + return lowerATOMIC_STORE(Op, DAG); + case ISD::ATOMIC_LOAD: + return lowerATOMIC_LOAD(Op, DAG); case ISD::ATOMIC_LOAD_ADD: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); case ISD::ATOMIC_LOAD_AND: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); case ISD::ATOMIC_LOAD_OR: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); case ISD::ATOMIC_LOAD_XOR: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); case ISD::ATOMIC_LOAD_NAND: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); case ISD::ATOMIC_LOAD_MIN: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); case ISD::ATOMIC_LOAD_MAX: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); case ISD::ATOMIC_LOAD_UMIN: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); case ISD::ATOMIC_LOAD_UMAX: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); case ISD::ATOMIC_CMP_SWAP: return lowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STACKSAVE: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 0b36f9fc7fe..4cbb30da8b5 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -276,8 +276,10 @@ private: SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG, - unsigned Opcode) const; + SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, + unsigned Opcode) const; SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/SystemZ/atomic-load-01.ll b/test/CodeGen/SystemZ/atomic-load-01.ll index a5bc8833e78..f3acd605b01 100644 --- a/test/CodeGen/SystemZ/atomic-load-01.ll +++ b/test/CodeGen/SystemZ/atomic-load-01.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; The CS-based sequence is probably far too conservative. define i8 @f1(i8 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: lb %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i8 *%src seq_cst, align 1 ret i8 %val diff --git a/test/CodeGen/SystemZ/atomic-load-02.ll b/test/CodeGen/SystemZ/atomic-load-02.ll index 2c9bbdb488a..d9bec60f4c1 100644 --- a/test/CodeGen/SystemZ/atomic-load-02.ll +++ b/test/CodeGen/SystemZ/atomic-load-02.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; The CS-based sequence is probably far too conservative. define i16 @f1(i16 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: lh %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i16 *%src seq_cst, align 2 ret i16 %val diff --git a/test/CodeGen/SystemZ/atomic-load-03.ll b/test/CodeGen/SystemZ/atomic-load-03.ll index 1fb41f5e39a..7e5eb9249a9 100644 --- a/test/CodeGen/SystemZ/atomic-load-03.ll +++ b/test/CodeGen/SystemZ/atomic-load-03.ll @@ -2,12 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; Using CS is probably too conservative. -define i32 @f1(i32 %dummy, i32 *%src) { +define i32 @f1(i32 *%src) { ; CHECK-LABEL: f1: -; CHECK: lhi %r2, 0 -; CHECK: cs %r2, %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: l %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i32 *%src seq_cst, align 4 ret i32 %val diff --git a/test/CodeGen/SystemZ/atomic-load-04.ll b/test/CodeGen/SystemZ/atomic-load-04.ll index 92cac406e20..c7a9a98a425 100644 --- a/test/CodeGen/SystemZ/atomic-load-04.ll +++ b/test/CodeGen/SystemZ/atomic-load-04.ll @@ -2,12 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; Using CSG is probably too conservative. -define i64 @f1(i64 %dummy, i64 *%src) { +define i64 @f1(i64 *%src) { ; CHECK-LABEL: f1: -; CHECK: lghi %r2, 0 -; CHECK: csg %r2, %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: lg %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i64 *%src seq_cst, align 8 ret i64 %val diff --git a/test/CodeGen/SystemZ/atomic-store-01.ll b/test/CodeGen/SystemZ/atomic-store-01.ll index 53ed24f623c..952e1a91216 100644 --- a/test/CodeGen/SystemZ/atomic-store-01.ll +++ b/test/CodeGen/SystemZ/atomic-store-01.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; The CS-based sequence is probably far too conservative. define void @f1(i8 %val, i8 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: stc %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i8 %val, i8 *%src seq_cst, align 1 ret void diff --git a/test/CodeGen/SystemZ/atomic-store-02.ll b/test/CodeGen/SystemZ/atomic-store-02.ll index 42d6695b51d..c9576e55656 100644 --- a/test/CodeGen/SystemZ/atomic-store-02.ll +++ b/test/CodeGen/SystemZ/atomic-store-02.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; The CS-based sequence is probably far too conservative. define void @f1(i16 %val, i16 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: sth %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i16 %val, i16 *%src seq_cst, align 2 ret void diff --git a/test/CodeGen/SystemZ/atomic-store-03.ll b/test/CodeGen/SystemZ/atomic-store-03.ll index 846c86fd366..459cb6a94e1 100644 --- a/test/CodeGen/SystemZ/atomic-store-03.ll +++ b/test/CodeGen/SystemZ/atomic-store-03.ll @@ -2,14 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; Using CS is probably too conservative. define void @f1(i32 %val, i32 *%src) { ; CHECK-LABEL: f1: -; CHECK: l %r0, 0(%r3) -; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: cs %r0, %r2, 0(%r3) -; CHECK: jl [[LABEL]] +; CHECK: st %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i32 %val, i32 *%src seq_cst, align 4 ret void diff --git a/test/CodeGen/SystemZ/atomic-store-04.ll b/test/CodeGen/SystemZ/atomic-store-04.ll index 24615b11565..7f2406eb546 100644 --- a/test/CodeGen/SystemZ/atomic-store-04.ll +++ b/test/CodeGen/SystemZ/atomic-store-04.ll @@ -2,14 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; Using CS is probably too conservative. define void @f1(i64 %val, i64 *%src) { ; CHECK-LABEL: f1: -; CHECK: lg %r0, 0(%r3) -; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: csg %r0, %r2, 0(%r3) -; CHECK: jl [[LABEL]] +; CHECK: stg %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i64 %val, i64 *%src seq_cst, align 8 ret void diff --git a/test/CodeGen/SystemZ/cond-store-01.ll b/test/CodeGen/SystemZ/cond-store-01.ll index d55ea2133e8..62e9796fa21 100644 --- a/test/CodeGen/SystemZ/cond-store-01.ll +++ b/test/CodeGen/SystemZ/cond-store-01.ll @@ -347,11 +347,10 @@ define void @f19(i8 *%ptr, i8 %alt, i32 %limit) { define void @f20(i8 *%ptr, i8 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f20: -; CHECK: cs {{%r[0-9]+}}, -; CHECK: jl +; CHECK: lb {{%r[0-9]+}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: -; CHECK: stc {{%r[0-9]+}}, +; CHECK: stc {{%r[0-9]+}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load atomic i8 *%ptr unordered, align 1 @@ -367,7 +366,7 @@ define void @f21(i8 *%ptr, i8 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: lb %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: cs {{%r[0-9]+}}, +; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i8 *%ptr diff --git a/test/CodeGen/SystemZ/cond-store-02.ll b/test/CodeGen/SystemZ/cond-store-02.ll index 91bc4860b38..4fbcdaba510 100644 --- a/test/CodeGen/SystemZ/cond-store-02.ll +++ b/test/CodeGen/SystemZ/cond-store-02.ll @@ -347,11 +347,10 @@ define void @f19(i16 *%ptr, i16 %alt, i32 %limit) { define void @f20(i16 *%ptr, i16 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f20: -; CHECK: cs {{%r[0-9]+}}, -; CHECK: jl +; CHECK: lh {{%r[0-9]+}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: -; CHECK: sth {{%r[0-9]+}}, +; CHECK: sth {{%r[0-9]+}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load atomic i16 *%ptr unordered, align 2 @@ -367,7 +366,7 @@ define void @f21(i16 *%ptr, i16 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: lh %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: cs {{%r[0-9]+}}, +; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i16 *%ptr diff --git a/test/CodeGen/SystemZ/cond-store-03.ll b/test/CodeGen/SystemZ/cond-store-03.ll index d4fd48d6132..4b22555d0d6 100644 --- a/test/CodeGen/SystemZ/cond-store-03.ll +++ b/test/CodeGen/SystemZ/cond-store-03.ll @@ -272,7 +272,7 @@ define void @f15(i32 *%ptr, i32 %alt, i32 %limit) { define void @f16(i32 *%ptr, i32 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f16: -; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: l {{%r[0-5]}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: ; CHECK: st {{%r[0-5]}}, 0(%r2) @@ -291,7 +291,7 @@ define void @f17(i32 *%ptr, i32 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: l %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: st %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i32 *%ptr diff --git a/test/CodeGen/SystemZ/cond-store-04.ll b/test/CodeGen/SystemZ/cond-store-04.ll index fc565c432ff..346b51a17d7 100644 --- a/test/CodeGen/SystemZ/cond-store-04.ll +++ b/test/CodeGen/SystemZ/cond-store-04.ll @@ -164,7 +164,7 @@ define void @f9(i64 *%ptr, i64 %alt, i32 %limit) { define void @f10(i64 *%ptr, i64 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CSG. ; CHECK-LABEL: f10: -; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: lg {{%r[0-5]}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: ; CHECK: stg {{%r[0-5]}}, 0(%r2) @@ -183,7 +183,7 @@ define void @f11(i64 *%ptr, i64 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: lg %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: stg %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i64 *%ptr