diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp index e352f88afc4..1d845a5b5e4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -558,6 +558,7 @@ void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); break; + case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1e8429221a6..5fb496ba0b4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -176,6 +176,7 @@ namespace { private: SDNode *Select(SDValue N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomicLoadAdd(SDNode *Node, MVT NVT); bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); bool MatchLoad(SDValue N, X86ISelAddressMode &AM); @@ -1431,6 +1432,153 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { array_lengthof(Ops)); } +SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, MVT NVT) { + if (Node->hasAnyUseOfValue(0)) + return 0; + + // Optimize common patterns for __sync_add_and_fetch and + // __sync_sub_and_fetch where the result is not used. This allows us + // to use "lock" version of add, sub, inc, dec instructions. + // FIXME: Do not use special instructions but instead add the "lock" + // prefix to the target node somehow. The extra information will then be + // transferred to machine instruction and it denotes the prefix. + SDValue Chain = Node->getOperand(0); + SDValue Ptr = Node->getOperand(1); + SDValue Val = Node->getOperand(2); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + return 0; + + bool isInc = false, isDec = false, isSub = false, isCN = false; + ConstantSDNode *CN = dyn_cast(Val); + if (CN) { + isCN = true; + int64_t CNVal = CN->getSExtValue(); + if (CNVal == 1) + isInc = true; + else if (CNVal == -1) + isDec = true; + else if (CNVal >= 0) + Val = CurDAG->getTargetConstant(CNVal, NVT); + else { + isSub = true; + Val = CurDAG->getTargetConstant(-CNVal, NVT); + } + } else if (Val.hasOneUse() && + Val.getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0))) { + isSub = true; + Val = Val.getOperand(1); + } + + unsigned Opc = 0; + switch (NVT.getSimpleVT()) { + default: return 0; + case MVT::i8: + if (isInc) + Opc = X86::LOCK_INC8m; + else if (isDec) + Opc = X86::LOCK_DEC8m; + else if (isSub) { + if (isCN) + Opc = X86::LOCK_SUB8mi; + else + Opc = X86::LOCK_SUB8mr; + } else { + if (isCN) + Opc = X86::LOCK_ADD8mi; + else + Opc = X86::LOCK_ADD8mr; + } + break; + case MVT::i16: + if (isInc) + Opc = X86::LOCK_INC16m; + else if (isDec) + Opc = X86::LOCK_DEC16m; + else if (isSub) { + if (isCN) { + if (Predicate_i16immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB16mi8; + else + Opc = X86::LOCK_SUB16mi; + } else + Opc = X86::LOCK_SUB16mr; + } else { + if (isCN) { + if (Predicate_i16immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD16mi8; + else + Opc = X86::LOCK_ADD16mi; + } else + Opc = X86::LOCK_ADD16mr; + } + break; + case MVT::i32: + if (isInc) + Opc = X86::LOCK_INC32m; + else if (isDec) + Opc = X86::LOCK_DEC32m; + else if (isSub) { + if (isCN) { + if (Predicate_i32immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB32mi8; + else + Opc = X86::LOCK_SUB32mi; + } else + Opc = X86::LOCK_SUB32mr; + } else { + if (isCN) { + if (Predicate_i32immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD32mi8; + else + Opc = X86::LOCK_ADD32mi; + } else + Opc = X86::LOCK_ADD32mr; + } + break; + case MVT::i64: + if (isInc) + Opc = X86::LOCK_INC64m; + else if (isDec) + Opc = X86::LOCK_DEC64m; + else if (isSub) { + Opc = X86::LOCK_SUB64mr; + if (isCN) { + if (Predicate_i64immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB64mi8; + else if (Predicate_i64immSExt32(Val.getNode())) + Opc = X86::LOCK_SUB64mi32; + } + } else { + Opc = X86::LOCK_ADD64mr; + if (isCN) { + if (Predicate_i64immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD64mi8; + else if (Predicate_i64immSExt32(Val.getNode())) + Opc = X86::LOCK_ADD64mi32; + } + } + break; + } + + DebugLoc dl = Node->getDebugLoc(); + SDValue Undef = SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, + dl, NVT), 0); + SDValue MemOp = CurDAG->getMemOperand(cast(Node)->getMemOperand()); + if (isInc || isDec) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, MemOp, Chain }; + SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7), 0); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, MemOp, Chain }; + SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 8), 0); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + } +} + SDNode *X86DAGToDAGISel::Select(SDValue N) { SDNode *Node = N.getNode(); MVT NVT = Node->getValueType(0); @@ -1475,6 +1623,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { case X86ISD::ATOMSWAP64_DAG: return SelectAtomic64(Node, X86::ATOMSWAP6432); + case ISD::ATOMIC_LOAD_ADD: { + SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); + if (RetVal) + return RetVal; + break; + } + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c8eed721eb2..c4ed89e384b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2746,6 +2746,15 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { return Mask; } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +bool X86::isZeroNode(SDValue Elt) { + return ((isa(Elt) && + cast(Elt)->getZExtValue() == 0) || + (isa(Elt) && + cast(Elt)->getValueAPF().isPosZero())); +} + /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, @@ -2852,15 +2861,6 @@ static bool isSplatVector(SDNode *N) { return true; } -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. -static inline bool isZeroNode(SDValue Elt) { - return ((isa(Elt) && - cast(Elt)->getZExtValue() == 0) || - (isa(Elt) && - cast(Elt)->getValueAPF().isPosZero())); -} - /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved /// to an zero vector. /// FIXME: move to dag combiner / method on ShuffleVectorSDNode @@ -2874,13 +2874,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { unsigned Opc = V2.getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) + if (Opc != ISD::BUILD_VECTOR || + !X86::isZeroNode(V2.getOperand(Idx-NumElems))) return false; } else if (Idx >= 0) { unsigned Opc = V1.getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || + !X86::isZeroNode(V1.getOperand(Idx))) return false; } } @@ -3048,7 +3050,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, continue; } SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); - if (Elt.getNode() && isZeroNode(Elt)) + if (Elt.getNode() && X86::isZeroNode(Elt)) ++NumZeros; else break; @@ -3221,7 +3223,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (Elt.getOpcode() != ISD::Constant && Elt.getOpcode() != ISD::ConstantFP) IsAllConstants = false; - if (isZeroNode(Elt)) + if (X86::isZeroNode(Elt)) NumZero++; else { NonZeros |= (1 << i); @@ -3298,7 +3300,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Is it a vector logical left shift? if (NumElems == 2 && Idx == 1 && - isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { + X86::isZeroNode(Op.getOperand(0)) && + !X86::isZeroNode(Op.getOperand(1))) { unsigned NumBits = VT.getSizeInBits(); return getVShift(true, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c3da894f0e4..579b42fab27 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -336,6 +336,10 @@ namespace llvm { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned getShufflePSHUFLWImmediate(SDNode *N); + + /// isZeroNode - Returns true if Elt is a constant zero or a floating point + /// constant +0.0. + bool isZeroNode(SDValue Elt); } //===--------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 1dd7e07964e..427f6db51c8 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1380,11 +1380,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xadd\t$val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; + def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xchg\t$val, $ptr", [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), + (ins i64mem:$dst, i64i32imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), + (ins i64mem:$dst, i64i32imm:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "lock\n\t" + "inc{q}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "lock\n\t" + "dec{q}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c547c76ec14..49da970ca24 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3255,6 +3255,78 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), TB, LOCK; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), + "lock\n\t" + "inc{b}\t$dst", []>, LOCK; +def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), + "lock\n\t" + "inc{w}\t$dst", []>, OpSize, LOCK; +def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), + "lock\n\t" + "inc{l}\t$dst", []>, LOCK; + +def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), + "lock\n\t" + "dec{b}\t$dst", []>, LOCK; +def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), + "lock\n\t" + "dec{w}\t$dst", []>, OpSize, LOCK; +def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), + "lock\n\t" + "dec{l}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll index 00bcdf82e8d..72efa16866b 100644 --- a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll +++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll @@ -1,9 +1,12 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep xadd +; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s @var = external global i64 ; [#uses=1] define i32 @main() nounwind { entry: +; CHECK: main: +; CHECK: lock +; CHECK: decq tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 ) ; :0 [#uses=0] unreachable } diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll new file mode 100644 index 00000000000..c0092108f14 --- /dev/null +++ b/test/CodeGen/X86/atomic_add.ll @@ -0,0 +1,217 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s + +; rdar://7103704 + +define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: sub1: +; CHECK: subl + %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v) ; [#uses=0] + ret void +} + +define void @inc4(i64* nocapture %p) nounwind ssp { +entry: +; CHECK: inc4: +; CHECK: incq + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; [#uses=0] + ret void +} + +declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind + +define void @add8(i64* nocapture %p) nounwind ssp { +entry: +; CHECK: add8: +; CHECK: addq $2 + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2) ; [#uses=0] + ret void +} + +define void @add4(i64* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: add4: +; CHECK: addq + %0 = sext i32 %v to i64 ; [#uses=1] + %1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0) ; [#uses=0] + ret void +} + +define void @inc3(i8* nocapture %p) nounwind ssp { +entry: +; CHECK: inc3: +; CHECK: incb + %0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1) ; [#uses=0] + ret void +} + +declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind + +define void @add7(i8* nocapture %p) nounwind ssp { +entry: +; CHECK: add7: +; CHECK: addb $2 + %0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2) ; [#uses=0] + ret void +} + +define void @add3(i8* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: add3: +; CHECK: addb + %0 = trunc i32 %v to i8 ; [#uses=1] + %1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0) ; [#uses=0] + ret void +} + +define void @inc2(i16* nocapture %p) nounwind ssp { +entry: +; CHECK: inc2: +; CHECK: incw + %0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1) ; [#uses=0] + ret void +} + +declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind + +define void @add6(i16* nocapture %p) nounwind ssp { +entry: +; CHECK: add6: +; CHECK: addw $2 + %0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2) ; [#uses=0] + ret void +} + +define void @add2(i16* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: add2: +; CHECK: addw + %0 = trunc i32 %v to i16 ; [#uses=1] + %1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0) ; [#uses=0] + ret void +} + +define void @inc1(i32* nocapture %p) nounwind ssp { +entry: +; CHECK: inc1: +; CHECK: incl + %0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1) ; [#uses=0] + ret void +} + +declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind + +define void @add5(i32* nocapture %p) nounwind ssp { +entry: +; CHECK: add5: +; CHECK: addl $2 + %0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2) ; [#uses=0] + ret void +} + +define void @add1(i32* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: add1: +; CHECK: addl + %0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v) ; [#uses=0] + ret void +} + +define void @dec4(i64* nocapture %p) nounwind ssp { +entry: +; CHECK: dec4: +; CHECK: decq + %0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1) ; [#uses=0] + ret void +} + +declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind + +define void @sub8(i64* nocapture %p) nounwind ssp { +entry: +; CHECK: sub8: +; CHECK: subq $2 + %0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2) ; [#uses=0] + ret void +} + +define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: sub4: +; CHECK: subq + %0 = sext i32 %v to i64 ; [#uses=1] + %1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0) ; [#uses=0] + ret void +} + +define void @dec3(i8* nocapture %p) nounwind ssp { +entry: +; CHECK: dec3: +; CHECK: decb + %0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1) ; [#uses=0] + ret void +} + +declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind + +define void @sub7(i8* nocapture %p) nounwind ssp { +entry: +; CHECK: sub7: +; CHECK: subb $2 + %0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2) ; [#uses=0] + ret void +} + +define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: sub3: +; CHECK: subb + %0 = trunc i32 %v to i8 ; [#uses=1] + %1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0) ; [#uses=0] + ret void +} + +define void @dec2(i16* nocapture %p) nounwind ssp { +entry: +; CHECK: dec2: +; CHECK: decw + %0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1) ; [#uses=0] + ret void +} + +declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind + +define void @sub6(i16* nocapture %p) nounwind ssp { +entry: +; CHECK: sub6: +; CHECK: subw $2 + %0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2) ; [#uses=0] + ret void +} + +define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp { +entry: +; CHECK: sub2: +; CHECK: subw + %0 = trunc i32 %v to i16 ; [#uses=1] + %1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0) ; [#uses=0] + ret void +} + +define void @dec1(i32* nocapture %p) nounwind ssp { +entry: +; CHECK: dec1: +; CHECK: decl + %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1) ; [#uses=0] + ret void +} + +declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind + +define void @sub5(i32* nocapture %p) nounwind ssp { +entry: +; CHECK: sub5: +; CHECK: subl $2 + %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2) ; [#uses=0] + ret void +}