From 4d3f3294535a3b622c715f2d9675d4f3e86c3378 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 31 Aug 2011 17:52:22 +0000 Subject: [PATCH] 64-bit atomic cmpxchg for ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138868 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 20 ++++-- lib/Target/ARM/ARMISelLowering.cpp | 101 +++++++++++++++++++++-------- lib/Target/ARM/ARMISelLowering.h | 3 +- lib/Target/ARM/ARMInstrInfo.td | 6 +- test/CodeGen/ARM/atomic-64bit.ll | 15 +++++ 5 files changed, 108 insertions(+), 37 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 37e895650b1..73a4322b820 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2312,16 +2312,20 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { } SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { - SDValue Chain = Node->getOperand(0); - SDValue In1 = Node->getOperand(1); - SDValue In2L = Node->getOperand(2); - SDValue In2H = Node->getOperand(3); + SmallVector Ops; + Ops.push_back(Node->getOperand(1)); // Ptr + Ops.push_back(Node->getOperand(2)); // Low part of Val1 + Ops.push_back(Node->getOperand(3)); // High part of Val1 + if (Opc == ARM::ATOMCMPXCHG6432) { + Ops.push_back(Node->getOperand(4)); // Low part of Val2 + Ops.push_back(Node->getOperand(5)); // High part of Val2 + } + Ops.push_back(Node->getOperand(0)); // Chain MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); - const SDValue Ops[] = { In1, In2L, In2H, Chain}; SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::i32, MVT::Other, + Ops.data() ,Ops.size()); cast(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } @@ -3121,6 +3125,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectAtomic64(N, ARM::ATOMAND6432); case ARMISD::ATOMSWAP64_DAG: return SelectAtomic64(N, ARM::ATOMSWAP6432); + case ARMISD::ATOMCMPXCHG64_DAG: + return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 550c8daaee5..a6f71a2014d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -618,6 +618,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } else { @@ -4854,24 +4855,34 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { } static void -ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl& Results, - SelectionDAG &DAG, unsigned NewOp) { +ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, + SelectionDAG &DAG, unsigned NewOp) { EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); assert (T == MVT::i64 && "Only know how to expand i64 atomics"); - SDValue Chain = Node->getOperand(0); - SDValue In1 = Node->getOperand(1); - SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0)); - SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1)); - SDValue Ops[] = { Chain, In1, In2L, In2H }; + SmallVector Ops; + Ops.push_back(Node->getOperand(0)); // Chain + Ops.push_back(Node->getOperand(1)); // Ptr + // Low part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(0))); + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(1))); + if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(0))); + // High part of Val2 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(1))); + } SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64, + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, cast(Node)->getMemOperand()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); } @@ -4949,28 +4960,29 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, Res = Expand64BitShift(N, DAG, Subtarget); break; case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); return; case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); return; case ISD::ATOMIC_SWAP: - ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); + return; + case ISD::ATOMIC_CMP_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); return; - //case ISD::ATOMIC_CMP_SWAP: - // ReplaceATOMIC_CMPXCHG_64(N, Results, DAG); } if (Res.getNode()) Results.push_back(Res); @@ -5293,7 +5305,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, - bool NeedsCarry) const { + bool NeedsCarry, bool IsCmpxchg) const { // This also handles ATOMIC_SWAP, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -5321,8 +5333,17 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *contBB, *cont2BB; + if (IsCmpxchg) { + contBB = MF->CreateMachineBasicBlock(LLVM_BB); + cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); + } MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); + if (IsCmpxchg) { + MF->insert(It, contBB); + MF->insert(It, cont2BB); + } MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -5363,7 +5384,27 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // Copy r2/r3 into dest. (This copy will normally be coalesced.) BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); - if (Op1) { + + if (IsCmpxchg) { + // Add early exit + for (unsigned i = 0; i < 2; i++) { + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : + ARM::CMPrr)) + .addReg(i == 0 ? destlo : desthi) + .addReg(i == 0 ? vallo : valhi)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(i == 0 ? contBB : cont2BB); + BB = (i == 0 ? contBB : cont2BB); + } + + // Copy to physregs for strexd + unsigned setlo = MI->getOperand(5).getReg(); + unsigned sethi = MI->getOperand(6).getReg(); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); + } else if (Op1) { // Perform binary operation AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) .addReg(destlo).addReg(vallo)) @@ -5537,21 +5578,27 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMADD6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, - isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, true); + isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, + /*NeedsCarry*/ true); case ARM::ATOMSUB6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, true); + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true); case ARM::ATOMOR6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, - isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, false); + isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); case ARM::ATOMXOR6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, - isThumb2 ? ARM::t2EORrr : ARM::EORrr, false); + isThumb2 ? ARM::t2EORrr : ARM::EORrr); case ARM::ATOMAND6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, - isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, false); + isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); case ARM::ATOMSWAP6432: return EmitAtomicBinary64(MI, BB, 0, 0, false); + case ARM::ATOMCMPXCHG6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ false, /*IsCmpxchg*/true); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3641d0dfe77..5bf85d482bd 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -507,7 +507,8 @@ namespace llvm { MachineBasicBlock *BB, unsigned Op1, unsigned Op2, - bool NeedsCarry) const; + bool NeedsCarry = false, + bool IsCmpxchg = false) const; MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index abac9f271ec..26f1fe1f74d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -69,8 +69,6 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTARMatomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>, - SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>; def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, @@ -1635,6 +1633,10 @@ def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$src1, GPR:$src2), NoItinerary, []>; +def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2), + NoItinerary, []>; } def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>, diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 9a2b99804ba..abe1acc6d11 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -81,3 +81,18 @@ define i64 @test6(i64* %ptr, i64 %val) { %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst ret i64 %r } + +define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { +; CHECK: test7 +; CHECK: dmb ish +; CHECK: ldrexd r2, r3 +; CHECK: cmp r2 +; CHECK: cmpeq r3 +; CHECK: bne +; CHECK: strexd {{r[0-9]+}}, r0, r1 +; CHECK: cmp +; CHECK: bne +; CHECK: dmb ish + %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst + ret i64 %r +}