diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index cd589f59010..4412b45e5bd 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -27,6 +27,7 @@ class JITCodeEmitter; class MachineInstr; class MCInst; class TargetLowering; +class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -48,6 +49,8 @@ FunctionPass *createThumb2SizeReductionPass(); /// \brief Creates an ARM-specific Target Transformation Info pass. ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); +FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM); + void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/Target/ARM/ARMAtomicExpandPass.cpp new file mode 100644 index 00000000000..33cdda5d6e1 --- /dev/null +++ b/lib/Target/ARM/ARMAtomicExpandPass.cpp @@ -0,0 +1,397 @@ +//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// appropriate (intrinsic-based) ldrex/strex loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-atomic-expand" +#include "ARM.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +namespace { + class ARMAtomicExpandPass : public FunctionPass { + const TargetLowering *TLI; + public: + static char ID; // Pass identification, replacement for typeid + explicit ARMAtomicExpandPass(const TargetMachine *TM = 0) + : FunctionPass(ID), TLI(TM->getTargetLowering()) {} + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicStore(StoreInst *LI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + + /// Perform a load-linked operation on Addr, returning a "Value *" with the + /// corresponding pointee type. This may entail some non-trivial operations + /// to truncate or reconstruct illegal types since intrinsics must be legal + Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord); + + /// Perform a store-conditional operation to Addr. Return the status of the + /// store: 0 if the it succeeded, non-zero otherwise. + Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, + AtomicOrdering Ord); + + /// Return true if the given (atomic) instruction should be expanded by this + /// pass. + bool shouldExpandAtomic(Instruction *Inst); + }; +} + +char ARMAtomicExpandPass::ID = 0; + +FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) { + return new ARMAtomicExpandPass(TM); +} + +bool ARMAtomicExpandPass::runOnFunction(Function &F) { + SmallVector AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa(&Inst) || isa(&Inst) || + (isa(&Inst) && cast(&Inst)->isAtomic()) || + (isa(&Inst) && cast(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!shouldExpandAtomic(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast(Inst)) + MadeChange |= expandAtomicRMW(AI); + else if (AtomicCmpXchgInst *CI = dyn_cast(Inst)) + MadeChange |= expandAtomicCmpXchg(CI); + else if (LoadInst *LI = dyn_cast(Inst)) + MadeChange |= expandAtomicLoad(LI); + else if (StoreInst *SI = dyn_cast(Inst)) + MadeChange |= expandAtomicStore(SI); + else + llvm_unreachable("Unknown atomic instruction"); + } + + return MadeChange; +} + +bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) { + // Load instructions don't actually need a leading fence, even in the + // SequentiallyConsistent case. + AtomicOrdering MemOpOrder = + TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); + + // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is + // an ldrexd (A3.5.3). + IRBuilder<> Builder(LI); + Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder); + + insertTrailingFence(Builder, LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) { + // The only atomic 64-bit store on ARM is an strexd that succeeds, which means + // we need a loop and the entire instruction is essentially an "atomicrmw + // xchg" that ignores the value loaded. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal; + switch (AI->getOperation()) { + case AtomicRMWInst::Xchg: + NewVal = AI->getValOperand(); + break; + case AtomicRMWInst::Add: + NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Sub: + NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::And: + NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Nand: + NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()), + "new"); + break; + case AtomicRMWInst::Or: + NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Xor: + NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + default: + llvm_unreachable("Unknown atomic op"); + } + + Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + insertTrailingFence(Builder, Order); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + AtomicOrdering Order = CI->getSuccessOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The standard expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %cmpxchg.end + // cmpxchg.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + BasicBlock *TryStoreBB = + BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = + storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + // Finally, make sure later instructions don't get reordered with a fence if + // necessary. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + insertTrailingFence(Builder, Order); + + CI->replaceAllUsesWith(Loaded); + CI->eraseFromParent(); + + return true; +} + +Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast(Addr->getType())->getElementType()); +} + +Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TLI->getInsertFencesForAtomic()) + return Ord; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + Builder.CreateFence(Release); + + // The exclusive operations don't need any barrier if we're adding separate + // fences. + return Monotonic; +} + +void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TLI->getInsertFencesForAtomic()) + return; + + if (Ord == Acquire || Ord == AcquireRelease) + Builder.CreateFence(Acquire); + else if (Ord == SequentiallyConsistent) + Builder.CreateFence(SequentiallyConsistent); +} + +bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong: + if (StoreInst *SI = dyn_cast(Inst)) + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; + else if (LoadInst *LI = dyn_cast(Inst)) + return LI->getType()->getPrimitiveSizeInBits() == 64; + + // For the real atomic operations, we have ldrex/strex up to 64 bits. + return Inst->getType()->getPrimitiveSizeInBits() <= 64; +} diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 970c63342c0..70e11c50e6d 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -252,8 +252,6 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, @@ -2411,38 +2409,6 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16,unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) { - Op = Op64; - VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); - } else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); -} - SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); @@ -3320,85 +3286,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - - case ISD::ATOMIC_LOAD: - if (cast(N)->getMemoryVT() == MVT::i64) - return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); - else - break; - - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_ADD_I8, - ARM::ATOMIC_LOAD_ADD_I16, - ARM::ATOMIC_LOAD_ADD_I32, - ARM::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_SUB_I8, - ARM::ATOMIC_LOAD_SUB_I16, - ARM::ATOMIC_LOAD_SUB_I32, - ARM::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_AND_I8, - ARM::ATOMIC_LOAD_AND_I16, - ARM::ATOMIC_LOAD_AND_I32, - ARM::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_OR_I8, - ARM::ATOMIC_LOAD_OR_I16, - ARM::ATOMIC_LOAD_OR_I32, - ARM::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_XOR_I8, - ARM::ATOMIC_LOAD_XOR_I16, - ARM::ATOMIC_LOAD_XOR_I32, - ARM::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_NAND_I8, - ARM::ATOMIC_LOAD_NAND_I16, - ARM::ATOMIC_LOAD_NAND_I32, - ARM::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_MIN_I8, - ARM::ATOMIC_LOAD_MIN_I16, - ARM::ATOMIC_LOAD_MIN_I32, - ARM::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_MAX_I8, - ARM::ATOMIC_LOAD_MAX_I16, - ARM::ATOMIC_LOAD_MAX_I32, - ARM::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_UMIN_I8, - ARM::ATOMIC_LOAD_UMIN_I16, - ARM::ATOMIC_LOAD_UMIN_I32, - ARM::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_UMAX_I8, - ARM::ATOMIC_LOAD_UMAX_I16, - ARM::ATOMIC_LOAD_UMAX_I32, - ARM::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(N, - ARM::ATOMIC_SWAP_I8, - ARM::ATOMIC_SWAP_I16, - ARM::ATOMIC_SWAP_I32, - ARM::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(N, - ARM::ATOMIC_CMP_SWAP_I8, - ARM::ATOMIC_CMP_SWAP_I16, - ARM::ATOMIC_CMP_SWAP_I32, - ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 737007aa4cd..310d845db11 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -737,28 +737,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { - // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and - // handled normally. + // ATOMIC_FENCE needs custom lowering; the others should have been expanded + // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - // Custom lowering for 64-bit ops - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. @@ -913,44 +901,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - bool isThumb2, unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, - {ARM::LDREXH, ARM::t2LDREXH}, - {ARM::LDREX, ARM::t2LDREX}, - {ARM::LDREXD, ARM::t2LDREXD}}; - static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, - {ARM::LDAEXH, ARM::t2LDAEXH}, - {ARM::LDAEX, ARM::t2LDAEX}, - {ARM::LDAEXD, ARM::t2LDAEXD}}; - static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, - {ARM::STREXH, ARM::t2STREXH}, - {ARM::STREX, ARM::t2STREX}, - {ARM::STREXD, ARM::t2STREXD}}; - static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, - {ARM::STLEXH, ARM::t2STLEXH}, - {ARM::STLEX, ARM::t2STLEX}, - {ARM::STLEXD, ARM::t2STLEXD}}; - - const unsigned (*LoadOps)[2], (*StoreOps)[2]; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; - StrOpc = StoreOps[Log2_32(Size)][isThumb2]; -} - // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -6028,35 +5978,6 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -static void -ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, - SelectionDAG &DAG) { - SDLoc dl(Node); - assert (Node->getValueType(0) == MVT::i64 && - "Only know how to expand i64 atomics"); - AtomicSDNode *AN = cast(Node); - - SmallVector Ops; - Ops.push_back(Node->getOperand(0)); // Chain - Ops.push_back(Node->getOperand(1)); // Ptr - for(unsigned i=2; igetNumOperands(); i++) { - // Low part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(0))); - // High part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(1))); - } - SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = DAG.getAtomic( - Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), - cast(Node)->getMemOperand(), AN->getSuccessOrdering(), - AN->getFailureOrdering(), AN->getSynchScope()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); - Results.push_back(Result.getValue(2)); -} - static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, @@ -6174,22 +6095,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; - case ISD::ATOMIC_STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG); - return; } if (Res.getNode()) Results.push_back(Res); @@ -6199,531 +6104,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, // ARM Scheduler Hooks //===----------------------------------------------------------------------===// -MachineBasicBlock * -ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned scratch = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); - - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); - MRI.constrainRegClass(newval, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(dest).addReg(oldval)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // scratch2, dest, incr - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(incr).addReg(dest)).addReg(0); - else - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(dest).addReg(incr)).addReg(0); - } - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - bool signExtend, - ARMCC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - unsigned oldval = dest; - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc, extendOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); - case 1: - extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case 2: - extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case 4: - extendOpc = 0; - break; - } - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // (sign extend dest, if required) - // cmp dest, incr - // cmov.cond scratch2, incr, dest - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - - // Sign extend the value, if necessary. - if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass - : &ARM::GPRnopcRegClass); - if (!isThumb2) - MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) - .addReg(dest) - .addImm(0)); - } - - // Build compare and cmov instructions. - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(oldval).addReg(incr)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg, - bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - unsigned vallo = MI->getOperand(3).getReg(); - unsigned valhi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = - static_cast(MI->getOperand(IsCmpxchg ? 7 : 5).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); - MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg || IsMinMax) - contBB = MF->CreateMachineBasicBlock(LLVM_BB); - if (IsCmpxchg) - cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - - MF->insert(It, loopMBB); - if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); - if (IsCmpxchg) MF->insert(It, cont2BB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned storesuccess = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrexd r2, r3, ptr - // r0, r2, incr - // r1, r3, incr - // strexd storesuccess, r0, r1, ptr - // cmp storesuccess, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define) - .addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - - unsigned StoreLo, StoreHi; - if (IsCmpxchg) { - // Add early exit - for (unsigned i = 0; i < 2; i++) { - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : - ARM::CMPrr)) - .addReg(i == 0 ? destlo : desthi) - .addReg(i == 0 ? vallo : valhi)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(i == 0 ? contBB : cont2BB); - BB = (i == 0 ? contBB : cont2BB); - } - - // Copy to physregs for strexd - StoreLo = MI->getOperand(5).getReg(); - StoreHi = MI->getOperand(6).getReg(); - } else if (Op1) { - // Perform binary operation - unsigned tmpRegLo = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) - .addReg(destlo).addReg(vallo)) - .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); - unsigned tmpRegHi = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) - .addReg(desthi).addReg(valhi)) - .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - - StoreLo = tmpRegLo; - StoreHi = tmpRegHi; - } else { - // Copy to physregs for strexd - StoreLo = vallo; - StoreHi = valhi; - } - if (IsMinMax) { - // Compare and branch to exit block. - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(contBB); - BB = contBB; - StoreLo = vallo; - StoreHi = valhi; - } - - // Store - if (isThumb2) { - MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); - MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); - } else { - // Marshal a pair... - unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(StoreLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) - .addReg(r1) - .addReg(StoreHi) - .addImm(ARM::gsub_1); - - // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StorePair).addReg(ptr)); - } - // Cmp+jump - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(storesuccess).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { - - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - } - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); - - if (isThumb2) { - MIB.addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr); - - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); - - // Copy GPRPair0 into dest. (This copy will normally be coalesced.) - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - AddDefaultPred(MIB); - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -7654,130 +7034,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); return BB; } - case ARM::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - - case ARM::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - - case ARM::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - - case ARM::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - - case ARM::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - - case ARM::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - - case ARM::ATOMIC_LOAD_MIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); - - case ARM::ATOMIC_LOAD_MAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); - - case ARM::ATOMIC_LOAD_UMIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); - - case ARM::ATOMIC_LOAD_UMAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); - - case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); - case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); - case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); - - case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); - case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); - case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); - - case ARM::ATOMIC_LOAD_I64: - return EmitAtomicLoad64(MI, BB); - - case ARM::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, - isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, - isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, - isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, - isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_SWAP_I64: - return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMIC_CMP_SWAP_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMIC_LOAD_MIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMIC_LOAD_UMIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 022945f625a..f33e6db9d77 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -548,29 +548,6 @@ namespace llvm { SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; - MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode) const; - MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Op1, - unsigned Op2, - bool NeedsCarry = false, - bool IsCmpxchg = false, - bool IsMinMax = false, - ARMCC::CondCodes CC = ARMCC::AL) const; - MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - bool signExtend, - ARMCC::CondCodes Cond) const; - MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI, - MachineBasicBlock *BB) const; - void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index dfcc11edcd9..1f09c9f07d2 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4336,209 +4336,6 @@ let usesCustomInserter = 1, Defs = [CPSR] in { // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; - -// Atomic pseudo-insts which will be lowered to ldrex/strex loops. -// (64-bit pseudos use a hand-written selection code). - let mayLoad = 1, mayStore = 1 in { - def ATOMIC_LOAD_ADD_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_ADD_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_ADD_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_ADD_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, - GPR:$set1, GPR:$set2, i32imm:$ordering), - NoItinerary, []>; - } - let mayLoad = 1 in - def ATOMIC_LOAD_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, i32imm:$ordering), - NoItinerary, []>; } let usesCustomInserter = 1 in { diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6a6d7ed01c5..4ae539a1bcc 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -226,6 +226,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { } bool ARMPassConfig::addPreISel() { + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) + addPass(createARMAtomicExpandPass(TM)); + if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 9b5fa75fe2a..8e148839132 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -17,6 +17,7 @@ add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen A15SDOptimizer.cpp ARMAsmPrinter.cpp + ARMAtomicExpandPass.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 7cf45ebde76..a881d5fd715 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -55,8 +55,8 @@ define i64 @test3(i64* %ptr, i64 %val) { ; CHECK-LABEL: test3: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]] +; CHECK-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]] +; CHECK-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -65,8 +65,8 @@ define i64 @test3(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test3: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -80,8 +80,8 @@ define i64 @test4(i64* %ptr, i64 %val) { ; CHECK-LABEL: test4: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]] +; CHECK-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]] +; CHECK-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -90,8 +90,8 @@ define i64 @test4(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test4: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -105,8 +105,8 @@ define i64 @test5(i64* %ptr, i64 %val) { ; CHECK-LABEL: test5: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]] +; CHECK-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]] +; CHECK-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -115,8 +115,8 @@ define i64 @test5(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test5: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -151,8 +151,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-LABEL: test7: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: cmp [[REG1]] -; CHECK: cmpeq [[REG2]] +; CHECK-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1 +; CHECK-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2 +; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK: bne ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp @@ -162,9 +163,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-THUMB-LABEL: test7: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: cmp [[REG1]] -; CHECK-THUMB: it eq -; CHECK-THUMB: cmpeq [[REG2]] +; CHECK-THUMB-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 +; CHECK-THUMB-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 +; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB: bne ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp @@ -216,9 +217,18 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK-LABEL: test10: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: blt +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwls [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwle [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -227,9 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test10: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: blt +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movls.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movle [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -243,9 +262,18 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK-LABEL: test11: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: blo +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwls [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwls [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -255,9 +283,18 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test11: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: blo +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movls.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movls [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -271,9 +308,18 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK-LABEL: test12: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: bge +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwhi [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwgt [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -282,9 +328,18 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test12: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: bge +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movhi.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movgt [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -298,9 +353,18 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK-LABEL: test13: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] -; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] -; CHECK: bhs +; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 +; CHECK: cmp [[REG1]], r1 +; CHECK: movwhi [[CARRY_LO]], #1 +; CHECK: cmp [[REG2]], r2 +; CHECK: movwhi [[CARRY_HI]], #1 +; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK: cmp [[CARRY_HI]], #0 +; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 +; CHECK: movne [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne @@ -309,9 +373,18 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK-THUMB-LABEL: test13: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] -; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] -; CHECK-THUMB: bhs +; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0 +; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0 +; CHECK-THUMB: cmp [[REG1]], r2 +; CHECK-THUMB: movhi.w [[CARRY_LO]], #1 +; CHECK-THUMB: cmp [[REG2]], r3 +; CHECK-THUMB: movhi [[CARRY_HI]], #1 +; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]] +; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 +; CHECK-THUMB: cmp [[CARRY_HI]], #0 +; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 +; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll index 87e76a48c5e..1ca78bfd1e3 100644 --- a/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-THUMB @var8 = global i8 0 @var16 = global i16 0 @@ -15,7 +15,7 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -38,7 +38,7 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -61,7 +61,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -75,7 +75,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { +define void @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: %old = atomicrmw add i64* @var64, i64 %offset monotonic ; CHECK-NOT: dmb @@ -84,10 +84,10 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 ; CHECK-NEXT: adc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -95,9 +95,9 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { @@ -109,7 +109,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -132,7 +132,7 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -155,7 +155,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -169,7 +169,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { +define void @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: %old = atomicrmw sub i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb @@ -178,10 +178,10 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 ; CHECK-NEXT: sbc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 ; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -189,9 +189,9 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { @@ -203,7 +203,7 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -226,7 +226,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -249,7 +249,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -263,7 +263,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { +define void @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64: %old = atomicrmw and i64* @var64, i64 %offset acquire ; CHECK-NOT: dmb @@ -272,20 +272,20 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 +; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { @@ -297,7 +297,7 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -320,7 +320,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -343,7 +343,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -357,7 +357,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { +define void @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64: %old = atomicrmw or i64* @var64, i64 %offset release ; CHECK-NOT: dmb @@ -366,20 +366,20 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 +; CHECK: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { @@ -391,7 +391,7 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -414,7 +414,7 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -437,7 +437,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 @@ -451,7 +451,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { +define void @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64: %old = atomicrmw xor i64* @var64, i64 %offset monotonic ; CHECK-NOT: dmb @@ -460,20 +460,20 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 +; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { @@ -485,7 +485,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] @@ -507,7 +507,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] @@ -529,7 +529,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] @@ -542,7 +542,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { +define void @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64: %old = atomicrmw xchg i64* @var64, i64 %offset acquire ; CHECK-NOT: dmb @@ -551,7 +551,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] @@ -560,28 +560,28 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_min_i8(i8 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: %old = atomicrmw min i8* @var8, i8 %offset acquire ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK-DAG: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK-DAG: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it ge -; CHECK: movge r[[OLDX]], r0 -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; Thumb mode: it le +; CHECK: movle r[[OLDX]], r[[OLD]] +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -591,23 +591,23 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_min_i16(i16 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: %old = atomicrmw min i16* @var16, i16 %offset release ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 +; CHECK: movt [[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it ge -; CHECK: movge r[[OLDX]], r0 -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; Thumb mode: it le +; CHECK: movle r[[OLDX]], r[[OLD]] +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -626,13 +626,13 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lt -; CHECK: movlt r[[NEW]], r[[OLD]] +; Thumb mode: it le +; CHECK: movle r[[NEW]], r[[OLD]] ; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 @@ -643,7 +643,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { +define void @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: %old = atomicrmw min i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb @@ -652,41 +652,50 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: blt .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwls [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwle [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_max_i8(i8 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: %old = atomicrmw max i8* @var8, i8 %offset seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it le -; CHECK: movle r[[OLDX]], r0 -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; Thumb mode: it gt +; CHECK: movgt r[[OLDX]], r[[OLD]] +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -696,7 +705,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_max_i16(i16 signext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: %old = atomicrmw max i16* @var16, i16 %offset acquire ; CHECK-NOT: dmb @@ -705,13 +714,13 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLDX]], r0 -; Thumb mode: it le -; CHECK: movle r[[OLDX]], r0 +; Thumb mode: it gt +; CHECK: movgt r[[OLDX]], r[[OLD]] ; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 @@ -731,7 +740,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 @@ -748,7 +757,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { +define void @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64: %old = atomicrmw max i64* @var64, i64 %offset monotonic ; CHECK-NOT: dmb @@ -757,41 +766,50 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: bge .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwhi [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwgt [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_umin_i8(i8 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: %old = atomicrmw umin i8* @var8, i8 %offset monotonic ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lo -; CHECK: movlo r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: uxtb r[[OLDX]], r[[OLD]] +; CHECK-NEXT: cmp r[[OLDX]], r0 +; Thumb mode: it ls +; CHECK: movls r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -801,23 +819,23 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_umin_i16(i16 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: %old = atomicrmw umin i16* @var16, i16 %offset acquire ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 +; CHECK: movt [[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lo -; CHECK: movlo r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: uxth r[[OLDX]], r[[OLD]] +; CHECK-NEXT: cmp r[[OLDX]], r0 +; Thumb mode: it ls +; CHECK: movls r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -836,13 +854,13 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 ; CHECK-NEXT: cmp r[[OLD]], r0 -; Thumb mode: it lo -; CHECK: movlo r[[NEW]], r[[OLD]] +; Thumb mode: it ls +; CHECK: movls r[[NEW]], r[[OLD]] ; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 @@ -853,50 +871,59 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { +define void @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64: - %old = atomicrmw umin i64* @var64, i64 %offset acq_rel + %old = atomicrmw umin i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: blo .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwls [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwls [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { +define i8 @test_atomic_load_umax_i8(i8 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: %old = atomicrmw umax i8* @var8, i8 %offset acq_rel ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 +; CHECK: movt [[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: uxtb r[[OLDX:[0-9]+]], r[[OLD]] +; CHECK-NEXT: cmp r[[OLDX]], r0 ; Thumb mode: it hi ; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -906,23 +933,23 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ret i8 %old } -define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { +define i16 @test_atomic_load_umax_i16(i16 zeroext %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: %old = atomicrmw umax i16* @var16, i16 %offset monotonic ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 +; CHECK: movt [[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: uxth r[[OLDX:[0-9]+]], r[[OLD]] +; CHECK-NEXT: cmp r[[OLDX]], r0 ; Thumb mode: it hi ; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -941,7 +968,7 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 @@ -958,34 +985,43 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ret i32 %old } -define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { +define void @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64: - %old = atomicrmw umax i64* @var64, i64 %offset release + %old = atomicrmw umax i64* @var64, i64 %offset seq_cst ; CHECK-NOT: dmb ; CHECK-NOT: mcr ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 -; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3 -; CHECK-NEXT: BB#2: -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0 +; CHECK-ARM: cmp [[OLD1]], r0 +; CHECK-ARM: movwhi [[LOCARRY]], #1 +; CHECK-ARM: cmp [[OLD2]], r1 +; CHECK-ARM: movwhi [[HICARRY]], #1 +; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]] +; CHECK-ARM: cmp [[HICARRY]], #0 +; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 +; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 +; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] +; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD1]] -; CHECK-NEXT: mov r1, r[[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } -define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { +define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire ; CHECK-NOT: dmb @@ -994,14 +1030,15 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var8 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: uxtb r[[OLDX:[0-9]+]], r[[OLD]] +; CHECK-NEXT: cmp r[[OLDX]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1011,7 +1048,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ret i8 %old } -define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { +define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst ; CHECK-NOT: dmb @@ -1020,14 +1057,15 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var16 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: uxth r[[OLDX:[0-9]+]], r[[OLD]] +; CHECK-NEXT: cmp r[[OLDX]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1046,14 +1084,14 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var32 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1063,7 +1101,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ret i32 %old } -define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { +define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic ; CHECK-NOT: dmb @@ -1072,24 +1110,24 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: -; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] +; CHECK: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp [[OLD1]], r0 -; Thumb mode: it eq -; CHECK: cmpeq [[OLD2]], r1 +; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 +; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 +; CHECK: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r2, r3 is a reasonable guess. -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] +; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: mov r0, [[OLD1]] -; CHECK-NEXT: mov r1, [[OLD2]] - ret i64 %old +; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, i64* @var64 + ret void } define i8 @test_atomic_load_monotonic_i8() nounwind { @@ -1303,13 +1341,13 @@ define void @test_atomic_store_release_i64(i64 %val) nounwind { store atomic i64 %val, i64* @var64 release, align 8 ; CHECK-NOT: dmb ; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 +; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var64 +; CHECK: movt [[ADDR]], :upper16:var64 ; CHECK: .LBB{{[0-9]+}}_1: ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, {{.*}}[[ADDR]] ; CHECK-NEXT: cmp [[STATUS]], #0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 ; CHECK-NOT: dmb @@ -1337,7 +1375,7 @@ atomic_ver: ; The key point here is that the second dmb isn't immediately followed by the ; simple_ver basic block, which LLVM attempted to do when DMB had been marked ; with isBarrier. For now, look for something that looks like "somewhere". -; CHECK-NEXT: mov +; CHECK-NEXT: {{mov|bx}} somewhere: %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver] ret i32 %combined diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll index 5befc228e03..1a7092a430d 100644 --- a/test/CodeGen/ARM/atomicrmw_minmax.ll +++ b/test/CodeGen/ARM/atomicrmw_minmax.ll @@ -15,7 +15,7 @@ define i32 @min(i8 %ctx, i32* %ptr, i32 %val) { ; CHECK: ldrex ; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]] -; CHECK: movlo {{r[0-9]*}}, [[old]] +; CHECK: movls {{r[0-9]*}}, [[old]] %old = atomicrmw umin i32* %ptr, i32 %val monotonic ret i32 %old }