diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 567a6b7451f..d95361eed15 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1093,20 +1093,6 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } break; - case ISD::ATOMIC_LOAD_SUB: - // Try to convert subtractions of constants to additions. - if (ConstantSDNode *Op2 = dyn_cast(Node->getOperand(2))) { - uint64_t Value = -Op2->getZExtValue(); - EVT VT = Node->getValueType(0); - if (VT == MVT::i32 || isInt<32>(Value)) { - SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1), - CurDAG->getConstant(int32_t(Value), VT) }; - Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD, - Node->getVTList(), Ops, array_lengthof(Ops)); - } - } - break; - case SystemZISD::SELECT_CCMASK: { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index e916771efa8..25972007df1 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -160,6 +160,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); + // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are + // available, or if the operand is constant. + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + // No special instructions for these. setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); @@ -2266,6 +2270,44 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, return DAG.getMergeValues(RetOps, 2, DL); } +// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations +// two into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit +// operations into additions. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast(Op.getNode()); + EVT MemVT = Node->getMemoryVT(); + if (MemVT == MVT::i32 || MemVT == MVT::i64) { + // A full-width operation. + assert(Op.getValueType() == MemVT && "Mismatched VTs"); + SDValue Src2 = Node->getVal(); + SDValue NegSrc2; + SDLoc DL(Src2); + + if (ConstantSDNode *Op2 = dyn_cast(Src2)) { + // Use an addition if the operand is constant and either LAA(G) is + // available or the negative value is in the range of A(G)FHI. + int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); + if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1()) + NegSrc2 = DAG.getConstant(Value, MemVT); + } else if (TM.getSubtargetImpl()->hasInterlockedAccess1()) + // Use LAA(G) if available. + NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), + Src2); + + if (NegSrc2.getNode()) + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, + Node->getChain(), Node->getBasePtr(), NegSrc2, + Node->getMemOperand(), Node->getOrdering(), + Node->getSynchScope()); + + // Use the node as-is. + return Op; + } + + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); +} + // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two // into a fullword ATOMIC_CMP_SWAPW operation. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, @@ -2394,7 +2436,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::ATOMIC_LOAD_ADD: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: - return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); + return lowerATOMIC_LOAD_SUB(Op, DAG); case ISD::ATOMIC_LOAD_AND: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); case ISD::ATOMIC_LOAD_OR: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 01f09c37372..2caa0bcb6df 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -283,6 +283,7 @@ private: SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const; + SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 91c3d3c3d0d..e1af0932c23 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1204,70 +1204,86 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { - def LAA : LoadAndOpRSY<"laa", 0xEBF8, null_frag, GR32>; - def LAAG : LoadAndOpRSY<"laag", 0xEBE8, null_frag, GR64>; + def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>; + def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>; def LAAL : LoadAndOpRSY<"laal", 0xEBFA, null_frag, GR32>; def LAALG : LoadAndOpRSY<"laalg", 0xEBEA, null_frag, GR64>; - def LAN : LoadAndOpRSY<"lan", 0xEBF4, null_frag, GR32>; - def LANG : LoadAndOpRSY<"lang", 0xEBE4, null_frag, GR64>; - def LAO : LoadAndOpRSY<"lao", 0xEBF6, null_frag, GR32>; - def LAOG : LoadAndOpRSY<"laog", 0xEBE6, null_frag, GR64>; - def LAX : LoadAndOpRSY<"lax", 0xEBF7, null_frag, GR32>; - def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, null_frag, GR64>; + def LAN : LoadAndOpRSY<"lan", 0xEBF4, atomic_load_and_32, GR32>; + def LANG : LoadAndOpRSY<"lang", 0xEBE4, atomic_load_and_64, GR64>; + def LAO : LoadAndOpRSY<"lao", 0xEBF6, atomic_load_or_32, GR32>; + def LAOG : LoadAndOpRSY<"laog", 0xEBE6, atomic_load_or_64, GR64>; + def LAX : LoadAndOpRSY<"lax", 0xEBF7, atomic_load_xor_32, GR32>; + def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, atomic_load_xor_64, GR64>; } -def ATOMIC_SWAPW : AtomicLoadWBinaryReg; -def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32; -def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64; +def ATOMIC_SWAPW : AtomicLoadWBinaryReg; +def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32; +def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64; -def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg; -def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64; +def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg; +def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64; +} -def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg; -def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64; +def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg; +def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32; +def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64; -def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg; -def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64; +def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg; +def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64; +} def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64; +} def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64; +} def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg; def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm AssemblerPredicate<"Feature"##intname, extname>, SubtargetFeature; +class SystemZMissingFeature + : Predicate<"!Subtarget.has"##intname##"()">; + def FeatureDistinctOps : SystemZFeature< "distinct-ops", "DistinctOps", "Assume that the distinct-operands facility is installed" @@ -45,6 +48,7 @@ def FeatureInterlockedAccess1 : SystemZFeature< "interlocked-access1", "InterlockedAccess1", "Assume that interlocked-access facility 1 is installed" >; +def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; def : Processor<"generic", NoItineraries, []>; def : Processor<"z10", NoItineraries, []>; diff --git a/test/CodeGen/SystemZ/atomicrmw-add-05.ll b/test/CodeGen/SystemZ/atomicrmw-add-05.ll new file mode 100644 index 00000000000..956c0d9642c --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-add-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic additions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: laa %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check addition of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: laa %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAA range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: laa %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laa %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAA range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: laa %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: laa %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-add-06.ll b/test/CodeGen/SystemZ/atomicrmw-add-06.ll new file mode 100644 index 00000000000..f508858d156 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-add-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic additions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: laag %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check addition of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], 1 +; CHECK: laag %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAAG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: laag %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laag %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAAG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: laag %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: laag %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-and-05.ll b/test/CodeGen/SystemZ/atomicrmw-and-05.ll new file mode 100644 index 00000000000..f0b999c6043 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-and-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic ANDs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check AND of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lan %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check AND of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: lan %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAN range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lan %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lan %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAN range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lan %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: lan %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-and-06.ll b/test/CodeGen/SystemZ/atomicrmw-and-06.ll new file mode 100644 index 00000000000..e5b71945d57 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-and-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic ANDs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check AND of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: lang %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check AND of -2, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], -2 +; CHECK: lang %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -2 seq_cst + ret i64 %res +} + +; Check the high end of the LANG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: lang %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lang %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LANG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: lang %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: lang %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-or-05.ll b/test/CodeGen/SystemZ/atomicrmw-or-05.ll new file mode 100644 index 00000000000..b38654ca6f0 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-or-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic ORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check OR of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lao %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check OR of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: lao %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAO range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lao %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lao %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAO range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lao %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: lao %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-or-06.ll b/test/CodeGen/SystemZ/atomicrmw-or-06.ll new file mode 100644 index 00000000000..30874abfe4a --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-or-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic ORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check OR of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: laog %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check OR of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], 1 +; CHECK: laog %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAOG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: laog %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laog %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAOG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: laog %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: laog %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-05.ll b/test/CodeGen/SystemZ/atomicrmw-sub-05.ll new file mode 100644 index 00000000000..7668f0e2a7a --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-sub-05.ll @@ -0,0 +1,69 @@ +; Test 32-bit atomic subtractions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lcr [[NEG:%r[0-5]]], %r4 +; CHECK: laa %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check addition of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], -1 +; CHECK: laa %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAA range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lcr [[NEG:%r[0-5]]], %r4 +; CHECK: laa %r2, [[NEG]], 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK-DAG: lcr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, 524288 +; CHECK: laa %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAA range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lcr [[NEG:%r[0-5]]], %r4 +; CHECK: laa %r2, [[NEG]], -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK-DAG: lcr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, -524292 +; CHECK: laa %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-06.ll b/test/CodeGen/SystemZ/atomicrmw-sub-06.ll new file mode 100644 index 00000000000..5d11bdf96cd --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-sub-06.ll @@ -0,0 +1,69 @@ +; Test 64-bit atomic subtractions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK: laag %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check addition of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], -1 +; CHECK: laag %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAAG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK: laag %r2, [[NEG]], 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK-DAG: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, 524288 +; CHECK: laag %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAAG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK: laag %r2, [[NEG]], -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK-DAG: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, -524296 +; CHECK: laag %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-05.ll b/test/CodeGen/SystemZ/atomicrmw-xor-05.ll new file mode 100644 index 00000000000..e9e7d30b357 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xor-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic ORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check OR of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lax %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check OR of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: lax %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAX range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lax %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lax %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAX range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lax %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: lax %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-06.ll b/test/CodeGen/SystemZ/atomicrmw-xor-06.ll new file mode 100644 index 00000000000..0870c6476f6 --- /dev/null +++ b/test/CodeGen/SystemZ/atomicrmw-xor-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic XORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check XOR of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: laxg %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check XOR of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], 1 +; CHECK: laxg %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAXG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: laxg %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laxg %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAXG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: laxg %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: laxg %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +}