From 421397ac00b2a83a74c78685aa22e3b640d898aa Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Wed, 2 Apr 2014 09:03:43 +0000 Subject: [PATCH] Remove duplicated DMB instructions ARM specific optimiztion, finding places in ARM machine code where 2 dmbs follow one another, and eliminating one of them. Patch by Reinoud Elhorst. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARM.h | 1 + lib/Target/ARM/ARMOptimizeBarriersPass.cpp | 101 +++++++++++++++++++++ lib/Target/ARM/ARMTargetMachine.cpp | 1 + lib/Target/ARM/CMakeLists.txt | 1 + test/CodeGen/ARM/optimize-dmbs-v7.ll | 74 +++++++++++++++ 5 files changed, 178 insertions(+) create mode 100644 lib/Target/ARM/ARMOptimizeBarriersPass.cpp create mode 100644 test/CodeGen/ARM/optimize-dmbs-v7.ll diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 4c811ce0ba3..cd589f59010 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -42,6 +42,7 @@ FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); +FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass(); /// \brief Creates an ARM-specific Target Transformation Info pass. diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp new file mode 100644 index 00000000000..20619faa667 --- /dev/null +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -0,0 +1,101 @@ +//===-- ARMOptimizeBarriersPass - two DMBs without a memory access in between, +//removed one -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------------------------------===// + +#define DEBUG_TYPE "double barriers" + +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMInstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +STATISTIC(NumDMBsRemoved, "Number of DMBs removed"); + +namespace { +class ARMOptimizeBarriersPass : public MachineFunctionPass { +public: + static char ID; + ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "optimise barriers pass"; + } + +private: +}; +char ARMOptimizeBarriersPass::ID = 0; +} + +// Returns whether the instruction can safely move past a DMB instruction +// The current implementation allows this iif MI does not have any possible +// memory access +static bool CanMovePastDMB(const MachineInstr *MI) { + return !(MI->mayLoad() || + MI->mayStore() || + MI->hasUnmodeledSideEffects() || + MI->isCall() || + MI->isReturn()); +} + +bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { + // Vector to store the DMBs we will remove after the first iteration + std::vector ToRemove; + // DMBType is the Imm value of the first operand. It determines whether it's a + // DMB ish, dmb sy, dmb osh, etc + int64_t DMBType = -1; + + // Find a dmb. If we can move it until the next dmb, tag the second one for + // removal + for (auto &MBB : MF) { + // Will be true when we have seen a DMB, and not seen any instruction since + // that cannot move past a DMB + bool IsRemovableNextDMB = false; + for (auto &MI : MBB) { + if (MI.getOpcode() == ARM::DMB) { + if (IsRemovableNextDMB) { + // If the Imm of this DMB is the same as that of the last DMB, we can + // tag this second DMB for removal + if (MI.getOperand(0).getImm() == DMBType) { + ToRemove.push_back(&MI); + } else { + // If it has a different DMBType, we cannot remove it, but will scan + // for the next DMB, recording this DMB's type as last seen DMB type + DMBType = MI.getOperand(0).getImm(); + } + } else { + // After we see a DMB, a next one is removable + IsRemovableNextDMB = true; + DMBType = MI.getOperand(0).getImm(); + } + } else if (!CanMovePastDMB(&MI)) { + // If we find an instruction unable to pass past a DMB, a next DMB is + // not removable + IsRemovableNextDMB = false; + } + } + } + // Remove the tagged DMB + for (auto MI : ToRemove) { + MI->eraseFromParent(); + ++NumDMBsRemoved; + } + + return NumDMBsRemoved > 0; +} + +/// createARMOptimizeBarriersPass - Returns an instance of the remove double +/// barriers +/// pass. +FunctionPass *llvm::createARMOptimizeBarriersPass() { + return new ARMOptimizeBarriersPass(); +} diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 17b7abe0834..6a6d7ed01c5 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -296,6 +296,7 @@ bool ARMPassConfig::addPreEmitPass() { addPass(&UnpackMachineBundlesID); } + addPass(createARMOptimizeBarriersPass()); addPass(createARMConstantIslandPass()); return true; diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index eff99c27e43..9b5fa75fe2a 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_target(ARMCodeGen ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp ARMRegisterInfo.cpp + ARMOptimizeBarriersPass.cpp ARMSelectionDAGInfo.cpp ARMSubtarget.cpp ARMTargetMachine.cpp diff --git a/test/CodeGen/ARM/optimize-dmbs-v7.ll b/test/CodeGen/ARM/optimize-dmbs-v7.ll new file mode 100644 index 00000000000..64f5e202d36 --- /dev/null +++ b/test/CodeGen/ARM/optimize-dmbs-v7.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -mtriple=armv7 -mattr=+db | FileCheck %s + +@x1 = global i32 0, align 4 +@x2 = global i32 0, align 4 + +define void @test() { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.013 = phi i32 [ 1, %entry ], [ %inc6, %for.body ] + store atomic i32 %i.013, i32* @x1 seq_cst, align 4 + store atomic i32 %i.013, i32* @x1 seq_cst, align 4 + store atomic i32 %i.013, i32* @x2 seq_cst, align 4 + %inc6 = add nsw i32 %i.013, 1 + %exitcond = icmp eq i32 %inc6, 2 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void + +; The for.body contains 3 seq_cst stores. +; Hence it should have 3 dmb;str;dmb sequences with the middle dmbs collapsed +; CHECK: %for.body +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK: str + +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK: str + +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK: str + +; CHECK-NOT: str +; CHECK: dmb +; CHECK-NOT: dmb +; CHECK-NOT: str +; CHECK: %for.end +} + +define void @test2() { + call void @llvm.arm.dmb(i32 11) + tail call void @test() + call void @llvm.arm.dmb(i32 11) + ret void +; the call should prevent the two dmbs from collapsing +; CHECK: test2: +; CHECK: dmb +; CHECK-NEXT: bl +; CHECK-NEXT: dmb +} + +define void @test3() { + call void @llvm.arm.dmb(i32 11) + call void @llvm.arm.dsb(i32 9) + call void @llvm.arm.dmb(i32 11) + ret void +; the call should prevent the two dmbs from collapsing +; CHECK: test3: +; CHECK: dmb +; CHECK-NEXT: dsb +; CHECK-NEXT: dmb + +} + + +declare void @llvm.arm.dmb(i32) +declare void @llvm.arm.dsb(i32)