Some 64-bit atomic operations on ARM. 64-bit cmpxchg coming next.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138845 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eli Friedman 2011-08-31 00:31:29 +00:00
parent 8f00ffce50
commit 2bdffe4882
5 changed files with 360 additions and 1 deletions

View File

@ -254,6 +254,8 @@ private:
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@ -2309,6 +2311,21 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
const SDValue Ops[] = { In1, In2L, In2H, Chain};
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
MVT::i32, MVT::i32, MVT::Other, Ops,
array_lengthof(Ops));
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
return ResNode;
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
@ -3089,6 +3106,21 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
case ARMISD::ATOMOR64_DAG:
return SelectAtomic64(N, ARM::ATOMOR6432);
case ARMISD::ATOMXOR64_DAG:
return SelectAtomic64(N, ARM::ATOMXOR6432);
case ARMISD::ATOMADD64_DAG:
return SelectAtomic64(N, ARM::ATOMADD6432);
case ARMISD::ATOMSUB64_DAG:
return SelectAtomic64(N, ARM::ATOMSUB6432);
case ARMISD::ATOMNAND64_DAG:
return SelectAtomic64(N, ARM::ATOMNAND6432);
case ARMISD::ATOMAND64_DAG:
return SelectAtomic64(N, ARM::ATOMAND6432);
case ARMISD::ATOMSWAP64_DAG:
return SelectAtomic64(N, ARM::ATOMSWAP6432);
}
return SelectCode(N);

View File

@ -611,6 +611,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
} else {
@ -4846,6 +4853,29 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
SelectionDAG &DAG, unsigned NewOp) {
EVT T = Node->getValueType(0);
DebugLoc dl = Node->getDebugLoc();
assert (T == MVT::i64 && "Only know how to expand i64 atomics");
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(0));
SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
Node->getOperand(2), DAG.getIntPtrConstant(1));
SDValue Ops[] = { Chain, In1, In2L, In2H };
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
cast<MemSDNode>(Node)->getMemOperand());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@ -4918,6 +4948,29 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
case ISD::ATOMIC_LOAD_ADD:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
return;
case ISD::ATOMIC_LOAD_AND:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
return;
case ISD::ATOMIC_LOAD_NAND:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
return;
case ISD::ATOMIC_LOAD_OR:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
return;
case ISD::ATOMIC_LOAD_SUB:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
return;
case ISD::ATOMIC_LOAD_XOR:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
return;
case ISD::ATOMIC_SWAP:
ReplaceATOMIC_BINARY_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
return;
//case ISD::ATOMIC_CMP_SWAP:
// ReplaceATOMIC_CMPXCHG_64(N, Results, DAG);
}
if (Res.getNode())
Results.push_back(Res);
@ -5237,6 +5290,113 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
return BB;
}
MachineBasicBlock *
ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
bool NeedsCarry) const {
// This also handles ATOMIC_SWAP, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned destlo = MI->getOperand(0).getReg();
unsigned desthi = MI->getOperand(1).getReg();
unsigned ptr = MI->getOperand(2).getReg();
unsigned vallo = MI->getOperand(3).getReg();
unsigned valhi = MI->getOperand(4).getReg();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
if (isThumb2) {
MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass);
MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass);
MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass);
}
unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
TargetRegisterClass *TRC =
isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
unsigned storesuccess = MRI.createVirtualRegister(TRC);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// loopMBB:
// ldrexd r2, r3, ptr
// <binopa> r0, r2, incr
// <binopb> r1, r3, incr
// strexd storesuccess, r0, r1, ptr
// cmp storesuccess, #0
// bne- loopMBB
// fallthrough --> exitMBB
//
// Note that the registers are explicitly specified because there is not any
// way to force the register allocator to allocate a register pair.
//
// FIXME: The hardcoded registers are not necessary for Thumb2, but we
// need to properly enforce the restriction that the two output registers
// for ldrexd must be different.
BB = loopMBB;
// Load
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
.addReg(ARM::R2, RegState::Define)
.addReg(ARM::R3, RegState::Define).addReg(ptr));
// Copy r2/r3 into dest. (This copy will normally be coalesced.)
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
if (Op1) {
// Perform binary operation
AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
.addReg(destlo).addReg(vallo))
.addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
.addReg(desthi).addReg(valhi)).addReg(0);
} else {
// Copy to physregs for strexd
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
}
// Store
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
// Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
@ -5374,6 +5534,25 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
case ARM::ATOMADD6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, true);
case ARM::ATOMSUB6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, true);
case ARM::ATOMOR6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, false);
case ARM::ATOMXOR6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr, false);
case ARM::ATOMAND6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, false);
case ARM::ATOMSWAP6432:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the

View File

@ -211,7 +211,17 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
VST4LN_UPD
VST4LN_UPD,
// 64-bit atomic ops (value split into two registers)
ATOMADD64_DAG,
ATOMSUB64_DAG,
ATOMOR64_DAG,
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
ATOMCMPXCHG64_DAG
};
}
@ -493,6 +503,11 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned Size,
unsigned BinOpcode) const;
MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Op1,
unsigned Op2,
bool NeedsCarry) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,

View File

@ -69,6 +69,8 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
def SDTARMatomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
@ -162,6 +164,28 @@ def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
def ARMAtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def ARMAtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def ARMAtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def ARMAtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def ARMAtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def ARMAtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def ARMAtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTARMatomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@ -1609,6 +1633,32 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
// (These psuedos use a hand-written selection code).
let usesCustomInserter = 1, Uses = [CPSR] in {
def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2),
NoItinerary, []>;
}
def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;

View File

@ -0,0 +1,83 @@
; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
define i64 @test1(i64* %ptr, i64 %val) {
; CHECK: test1
; CHECK: dmb ish
; CHECK: ldrexd r2, r3
; CHECK: adds r0, r2
; CHECK: adc r1, r3
; CHECK: strexd {{r[0-9]+}}, r0, r1
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
%r = atomicrmw add i64* %ptr, i64 %val seq_cst
ret i64 %r
}
define i64 @test2(i64* %ptr, i64 %val) {
; CHECK: test2
; CHECK: dmb ish
; CHECK: ldrexd r2, r3
; CHECK: subs r0, r2
; CHECK: sbc r1, r3
; CHECK: strexd {{r[0-9]+}}, r0, r1
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
%r = atomicrmw sub i64* %ptr, i64 %val seq_cst
ret i64 %r
}
define i64 @test3(i64* %ptr, i64 %val) {
; CHECK: test3
; CHECK: dmb ish
; CHECK: ldrexd r2, r3
; CHECK: and r0, r2
; CHECK: and r1, r3
; CHECK: strexd {{r[0-9]+}}, r0, r1
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
%r = atomicrmw and i64* %ptr, i64 %val seq_cst
ret i64 %r
}
define i64 @test4(i64* %ptr, i64 %val) {
; CHECK: test4
; CHECK: dmb ish
; CHECK: ldrexd r2, r3
; CHECK: orr r0, r2
; CHECK: orr r1, r3
; CHECK: strexd {{r[0-9]+}}, r0, r1
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
%r = atomicrmw or i64* %ptr, i64 %val seq_cst
ret i64 %r
}
define i64 @test5(i64* %ptr, i64 %val) {
; CHECK: test5
; CHECK: dmb ish
; CHECK: ldrexd r2, r3
; CHECK: eor r0, r2
; CHECK: eor r1, r3
; CHECK: strexd {{r[0-9]+}}, r0, r1
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
%r = atomicrmw xor i64* %ptr, i64 %val seq_cst
ret i64 %r
}
define i64 @test6(i64* %ptr, i64 %val) {
; CHECK: test6
; CHECK: dmb ish
; CHECK: ldrexd r2, r3
; CHECK: strexd {{r[0-9]+}}, r0, r1
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
%r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
ret i64 %r
}