From 35b3df6e31f9aac70fb471d74e39f899dfbd689f Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Thu, 29 Nov 2012 14:41:25 +0000 Subject: [PATCH] Added atomic 64 min/max/umin/umax instrinsics support in the ARM backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168886 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 9 ++++ lib/Target/ARM/ARMISelLowering.cpp | 83 ++++++++++++++++++++++++++---- lib/Target/ARM/ARMISelLowering.h | 10 +++- lib/Target/ARM/ARMInstrInfo.td | 12 +++++ test/CodeGen/ARM/atomic-64bit.ll | 61 ++++++++++++++++++++++ 5 files changed, 163 insertions(+), 12 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 66fb83ca7b0..de9c8eb2d88 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3327,6 +3327,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectAtomic64(N, ARM::ATOMSWAP6432); case ARMISD::ATOMCMPXCHG64_DAG: return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); + + case ARMISD::ATOMMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMMIN6432); + case ARMISD::ATOMUMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMUMIN6432); + case ARMISD::ATOMMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMMAX6432); + case ARMISD::ATOMUMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMUMAX6432); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b2af378977b..d139a568a74 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -695,7 +695,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); @@ -5406,6 +5410,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_CMP_SWAP: ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); return; + case ISD::ATOMIC_LOAD_MIN: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); + return; + case ISD::ATOMIC_LOAD_UMIN: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); + return; + case ISD::ATOMIC_LOAD_MAX: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); + return; + case ISD::ATOMIC_LOAD_UMAX: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -5745,7 +5761,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg) const { + bool NeedsCarry, bool IsCmpxchg, + bool IsMinMax, ARMCC::CondCodes CC) const { // This also handles ATOMIC_SWAP, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -5774,16 +5791,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg) { + if (IsCmpxchg || IsMinMax) contBB = MF->CreateMachineBasicBlock(LLVM_BB); + if (IsCmpxchg) cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - } MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); - if (IsCmpxchg) { - MF->insert(It, contBB); - MF->insert(It, cont2BB); - } + if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); + if (IsCmpxchg) MF->insert(It, cont2BB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -5821,6 +5837,22 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // Load unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned GPRPair2; + if (IsMinMax) { + //We need an extra double register for doing min/max. + unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(undef) + .addReg(vallo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) + .addReg(r1) + .addReg(valhi) + .addImm(ARM::gsub_1); + } AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) .addReg(GPRPair0, RegState::Define).addReg(ptr)); @@ -5866,7 +5898,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); unsigned tmpRegHi = MRI.createVirtualRegister(TRC); AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) - .addReg(desthi).addReg(valhi)).addReg(0); + .addReg(desthi).addReg(valhi)) + .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); @@ -5893,10 +5926,20 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addReg(valhi) .addImm(ARM::gsub_1); } + unsigned GPRPairStore = GPRPair1; + if (IsMinMax) { + // Compare and branch to exit block. + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(contBB); + BB = contBB; + GPRPairStore = GPRPair2; + } // Store AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(GPRPair1).addReg(ptr)); + .addReg(GPRPairStore).addReg(ptr)); // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); @@ -6894,6 +6937,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); + case ARM::ATOMMIN6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::LE); + case ARM::ATOMMAX6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::GE); + case ARM::ATOMUMIN6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::LS); + case ARM::ATOMUMAX6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9cbf866f9b5..a42ff7849e8 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -232,7 +232,11 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG + ATOMCMPXCHG64_DAG, + ATOMMIN64_DAG, + ATOMUMIN64_DAG, + ATOMMAX64_DAG, + ATOMUMAX64_DAG }; } @@ -532,7 +536,9 @@ namespace llvm { unsigned Op1, unsigned Op2, bool NeedsCarry = false, - bool IsCmpxchg = false) const; + bool IsCmpxchg = false, + bool IsMinMax = false, + ARMCC::CondCodes CC = ARMCC::AL) const; MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 5cf7c3c3725..12712c007b2 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1624,6 +1624,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, GPR:$set1, GPR:$set2), NoItinerary, []>; +def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; } def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index be51e3c1290..69da6221b7d 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -126,3 +126,64 @@ define void @test9(i64* %ptr, i64 %val) { store atomic i64 %val, i64* %ptr seq_cst, align 8 ret void } + +define i64 @test10(i64* %ptr, i64 %val) { +; CHECK: test10: +; CHECK: dmb ish +; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] +; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] +; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] +; CHECK: ble +; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK: cmp +; CHECK: bne +; CHECK: dmb ish + %r = atomicrmw min i64* %ptr, i64 %val seq_cst + ret i64 %r +} + +define i64 @test11(i64* %ptr, i64 %val) { +; CHECK: test11: +; CHECK: dmb ish +; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] +; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] +; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] +; CHECK: bls +; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK: cmp +; CHECK: bne +; CHECK: dmb ish + %r = atomicrmw umin i64* %ptr, i64 %val seq_cst + ret i64 %r +} + +define i64 @test12(i64* %ptr, i64 %val) { +; CHECK: test12: +; CHECK: dmb ish +; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] +; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] +; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] +; CHECK: bge +; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK: cmp +; CHECK: bne +; CHECK: dmb ish + %r = atomicrmw max i64* %ptr, i64 %val seq_cst + ret i64 %r +} + +define i64 @test13(i64* %ptr, i64 %val) { +; CHECK: test13: +; CHECK: dmb ish +; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] +; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] +; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] +; CHECK: bhs +; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK: cmp +; CHECK: bne +; CHECK: dmb ish + %r = atomicrmw umax i64* %ptr, i64 %val seq_cst + ret i64 %r +} +