[ARM, Fix] Fix emitLeading/TrailingFence on old ARM processors

Summary:
I had only tested this code for ARMv7 and ARMv8. This patch adds several
fallback paths if the processor does not support dmb ish:
- dmb sy if a cortex-M with support for dmb
- mcr p15, #0, r0, c7, c10, #5 for ARMv6 (special instruction equivalent to a DMB)
These fallback paths were chosen based on the code for fence seq_cst.

Thanks to luqmana for having noticed this bug.

Test Plan: Added more cases to atomic-load-store.ll + make check-all

Reviewers: jfb, t.p.northover, luqmana

Subscribers: aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D5304

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217965 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Robin Morisset 2014-09-17 17:41:16 +00:00
parent 507636288f
commit 30486fa3de
3 changed files with 50 additions and 4 deletions

View File

@ -10984,11 +10984,33 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) { Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
ARM_MB::MemBOpt Domain) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Module *M = Builder.GetInsertBlock()->getParent()->getParent();
// First, if the target has no DMB, see what fallback we can use.
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
ArrayRef<Value*> args = {Builder.getInt32(15), Builder.getInt32(0),
Builder.getInt32(0), Builder.getInt32(7),
Builder.getInt32(10), Builder.getInt32(5)};
return Builder.CreateCall(MCR, args);
} else {
// Instead of using barriers, atomic accesses on these subtargets use
// libcalls.
llvm_unreachable("makeDMB on a target so old that it has no barriers");
}
} else {
Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
// Only a full system barrier exists in the M-class architectures.
Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
Constant *CDomain = Builder.getInt32(Domain); Constant *CDomain = Builder.getInt32(Domain);
Builder.CreateCall(DMB, CDomain); return Builder.CreateCall(DMB, CDomain);
}
} }
// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html

View File

@ -393,6 +393,7 @@ namespace llvm {
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
bool hasLoadLinkedStoreConditional() const override; bool hasLoadLinkedStoreConditional() const override;
Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override; AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,

View File

@ -3,6 +3,8 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO
; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE ; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE
; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4 ; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
; RUN: llc < %s -mtriple=armv6-apple-ios | FileCheck %s -check-prefix=ARMV6
; RUN: llc < %s -mtriple=thumbv7m-apple-ios | FileCheck %s -check-prefix=THUMBM
define void @test1(i32* %ptr, i32 %val1) { define void @test1(i32* %ptr, i32 %val1) {
; ARM-LABEL: test1 ; ARM-LABEL: test1
@ -15,6 +17,14 @@ define void @test1(i32* %ptr, i32 %val1) {
; THUMBTWO: dmb {{ish$}} ; THUMBTWO: dmb {{ish$}}
; THUMBTWO-NEXT: str ; THUMBTWO-NEXT: str
; THUMBTWO-NEXT: dmb {{ish$}} ; THUMBTWO-NEXT: dmb {{ish$}}
; ARMV6-LABEL: test1
; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5
; ARMV6: str
; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5
; THUMBM-LABEL: test1
; THUMBM: dmb sy
; THUMBM: str
; THUMBM: dmb sy
store atomic i32 %val1, i32* %ptr seq_cst, align 4 store atomic i32 %val1, i32* %ptr seq_cst, align 4
ret void ret void
} }
@ -28,6 +38,12 @@ define i32 @test2(i32* %ptr) {
; THUMBTWO-LABEL: test2 ; THUMBTWO-LABEL: test2
; THUMBTWO: ldr ; THUMBTWO: ldr
; THUMBTWO-NEXT: dmb {{ish$}} ; THUMBTWO-NEXT: dmb {{ish$}}
; ARMV6-LABEL: test2
; ARMV6: ldr
; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5
; THUMBM-LABEL: test2
; THUMBM: ldr
; THUMBM: dmb sy
%val = load atomic i32* %ptr seq_cst, align 4 %val = load atomic i32* %ptr seq_cst, align 4
ret i32 %val ret i32 %val
} }
@ -55,6 +71,11 @@ define void @test3(i8* %ptr1, i8* %ptr2) {
; THUMBONE-NOT: dmb ; THUMBONE-NOT: dmb
; THUMBONE: strb ; THUMBONE: strb
; THUMBONE-NOT: dmb ; THUMBONE-NOT: dmb
; ARMV6-LABEL: test3
; ARMV6-NOT: mcr
; THUMBM-LABEL: test3
; THUMBM-NOT: dmb sy
%val = load atomic i8* %ptr1 unordered, align 1 %val = load atomic i8* %ptr1 unordered, align 1
store atomic i8 %val, i8* %ptr2 unordered, align 1 store atomic i8 %val, i8* %ptr2 unordered, align 1
ret void ret void
@ -64,6 +85,8 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
; THUMBONE-LABEL: test4 ; THUMBONE-LABEL: test4
; THUMBONE: ___sync_val_compare_and_swap_1 ; THUMBONE: ___sync_val_compare_and_swap_1
; THUMBONE: ___sync_lock_test_and_set_1 ; THUMBONE: ___sync_lock_test_and_set_1
; ARMV6-LABEL: test4
; THUMBM-LABEL: test4
%val = load atomic i8* %ptr1 seq_cst, align 1 %val = load atomic i8* %ptr1 seq_cst, align 1
store atomic i8 %val, i8* %ptr2 seq_cst, align 1 store atomic i8 %val, i8* %ptr2 seq_cst, align 1
ret void ret void