Fix 64-bit atomic operations in Thumb mode.

The ARM and Thumb variants of LDREXD and STREXD have different constraints and
take different operands. Previously the code expanding atomic operations didn't
take this into account and asserted in Thumb mode.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173780 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-01-29 09:06:13 +00:00
parent e6482fabd2
commit 0adfdedacb
2 changed files with 193 additions and 74 deletions

View File

@ -5969,9 +5969,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
} }
unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0; MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax) if (IsCmpxchg || IsMinMax)
@ -6009,42 +6006,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// cmp storesuccess, #0 // cmp storesuccess, #0
// bne- loopMBB // bne- loopMBB
// fallthrough --> exitMBB // fallthrough --> exitMBB
//
// Note that the registers are explicitly specified because there is not any
// way to force the register allocator to allocate a register pair.
//
// FIXME: The hardcoded registers are not necessary for Thumb2, but we
// need to properly enforce the restriction that the two output registers
// for ldrexd must be different.
BB = loopMBB; BB = loopMBB;
// Load // Load
unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); if (isThumb2) {
unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
unsigned GPRPair2; .addReg(destlo, RegState::Define)
if (IsMinMax) { .addReg(desthi, RegState::Define)
//We need an extra double register for doing min/max. .addReg(ptr));
unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); } else {
unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); .addReg(GPRPair0, RegState::Define).addReg(ptr));
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) // Copy r2/r3 into dest. (This copy will normally be coalesced.)
.addReg(undef) BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
.addReg(vallo) .addReg(GPRPair0, 0, ARM::gsub_0);
.addImm(ARM::gsub_0); BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) .addReg(GPRPair0, 0, ARM::gsub_1);
.addReg(r1)
.addReg(valhi)
.addImm(ARM::gsub_1);
} }
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) unsigned StoreLo, StoreHi;
.addReg(GPRPair0, RegState::Define).addReg(ptr));
// Copy r2/r3 into dest. (This copy will normally be coalesced.)
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
.addReg(GPRPair0, 0, ARM::gsub_0);
BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
.addReg(GPRPair0, 0, ARM::gsub_1);
if (IsCmpxchg) { if (IsCmpxchg) {
// Add early exit // Add early exit
for (unsigned i = 0; i < 2; i++) { for (unsigned i = 0; i < 2; i++) {
@ -6060,19 +6041,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
} }
// Copy to physregs for strexd // Copy to physregs for strexd
unsigned setlo = MI->getOperand(5).getReg(); StoreLo = MI->getOperand(5).getReg();
unsigned sethi = MI->getOperand(6).getReg(); StoreHi = MI->getOperand(6).getReg();
unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
.addReg(undef)
.addReg(setlo)
.addImm(ARM::gsub_0);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
.addReg(r1)
.addReg(sethi)
.addImm(ARM::gsub_1);
} else if (Op1) { } else if (Op1) {
// Perform binary operation // Perform binary operation
unsigned tmpRegLo = MRI.createVirtualRegister(TRC); unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
@ -6084,32 +6054,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addReg(desthi).addReg(valhi)) .addReg(desthi).addReg(valhi))
.addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); StoreLo = tmpRegLo;
BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); StoreHi = tmpRegHi;
unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
.addReg(UndefPair)
.addReg(tmpRegLo)
.addImm(ARM::gsub_0);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
.addReg(r1)
.addReg(tmpRegHi)
.addImm(ARM::gsub_1);
} else { } else {
// Copy to physregs for strexd // Copy to physregs for strexd
unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); StoreLo = vallo;
unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); StoreHi = valhi;
BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
.addReg(UndefPair)
.addReg(vallo)
.addImm(ARM::gsub_0);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
.addReg(r1)
.addReg(valhi)
.addImm(ARM::gsub_1);
} }
unsigned GPRPairStore = GPRPair1;
if (IsMinMax) { if (IsMinMax) {
// Compare and branch to exit block. // Compare and branch to exit block.
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@ -6117,12 +6068,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->addSuccessor(exitMBB); BB->addSuccessor(exitMBB);
BB->addSuccessor(contBB); BB->addSuccessor(contBB);
BB = contBB; BB = contBB;
GPRPairStore = GPRPair2; StoreLo = vallo;
StoreHi = valhi;
} }
// Store // Store
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) if (isThumb2) {
.addReg(GPRPairStore).addReg(ptr)); AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
.addReg(UndefPair)
.addReg(StoreLo)
.addImm(ARM::gsub_0);
BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
.addReg(r1)
.addReg(StoreHi)
.addImm(ARM::gsub_1);
// ...and store it
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump // Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0)); .addReg(storesuccess).addImm(0));

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB
define i64 @test1(i64* %ptr, i64 %val) { define i64 @test1(i64* %ptr, i64 %val) {
; CHECK: test1: ; CHECK: test1:
@ -10,6 +11,17 @@ define i64 @test1(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test1:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw add i64* %ptr, i64 %val seq_cst %r = atomicrmw add i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -24,6 +36,17 @@ define i64 @test2(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test2:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw sub i64* %ptr, i64 %val seq_cst %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -38,6 +61,17 @@ define i64 @test3(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test3:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw and i64* %ptr, i64 %val seq_cst %r = atomicrmw and i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -52,6 +86,17 @@ define i64 @test4(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test4:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw or i64* %ptr, i64 %val seq_cst %r = atomicrmw or i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -66,6 +111,17 @@ define i64 @test5(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test5:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw xor i64* %ptr, i64 %val seq_cst %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -78,6 +134,15 @@ define i64 @test6(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test6:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw xchg i64* %ptr, i64 %val seq_cst %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -93,6 +158,19 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test7:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: cmp [[REG1]]
; CHECK-THUMB: it eq
; CHECK-THUMB: cmpeq [[REG2]]
; CHECK-THUMB: bne
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
ret i64 %r ret i64 %r
} }
@ -109,6 +187,18 @@ define i64 @test8(i64* %ptr) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test8:
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: cmp [[REG1]]
; CHECK-THUMB: it eq
; CHECK-THUMB: cmpeq [[REG2]]
; CHECK-THUMB: bne
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = load atomic i64* %ptr seq_cst, align 8 %r = load atomic i64* %ptr seq_cst, align 8
ret i64 %r ret i64 %r
} }
@ -123,6 +213,15 @@ define void @test9(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test9:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
store atomic i64 %val, i64* %ptr seq_cst, align 8 store atomic i64 %val, i64* %ptr seq_cst, align 8
ret void ret void
} }
@ -138,6 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test10:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: blt
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw min i64* %ptr, i64 %val seq_cst %r = atomicrmw min i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -153,6 +264,19 @@ define i64 @test11(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test11:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: blo
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw umin i64* %ptr, i64 %val seq_cst %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -168,6 +292,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test12:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: bge
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw max i64* %ptr, i64 %val seq_cst %r = atomicrmw max i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }
@ -183,6 +319,17 @@ define i64 @test13(i64* %ptr, i64 %val) {
; CHECK: cmp ; CHECK: cmp
; CHECK: bne ; CHECK: bne
; CHECK: dmb ish ; CHECK: dmb ish
; CHECK-THUMB: test13:
; CHECK-THUMB: dmb ish
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: bhs
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
; CHECK-THUMB: dmb ish
%r = atomicrmw umax i64* %ptr, i64 %val seq_cst %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
ret i64 %r ret i64 %r
} }