Remove duplicated DMB instructions

ARM specific optimiztion, finding places in ARM machine code where 2 dmbs
follow one another, and eliminating one of them.

Patch by Reinoud Elhorst.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205409 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Renato Golin 2014-04-02 09:03:43 +00:00
parent 9ee14e3522
commit 421397ac00
5 changed files with 178 additions and 0 deletions

View File

@ -42,6 +42,7 @@ FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass(); FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass(); FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2ITBlockPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass(); FunctionPass *createThumb2SizeReductionPass();
/// \brief Creates an ARM-specific Target Transformation Info pass. /// \brief Creates an ARM-specific Target Transformation Info pass.

View File

@ -0,0 +1,101 @@
//===-- ARMOptimizeBarriersPass - two DMBs without a memory access in between,
//removed one -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===------------------------------------------------------------------------------------------===//
#define DEBUG_TYPE "double barriers"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
STATISTIC(NumDMBsRemoved, "Number of DMBs removed");
namespace {
class ARMOptimizeBarriersPass : public MachineFunctionPass {
public:
static char ID;
ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "optimise barriers pass";
}
private:
};
char ARMOptimizeBarriersPass::ID = 0;
}
// Returns whether the instruction can safely move past a DMB instruction
// The current implementation allows this iif MI does not have any possible
// memory access
static bool CanMovePastDMB(const MachineInstr *MI) {
return !(MI->mayLoad() ||
MI->mayStore() ||
MI->hasUnmodeledSideEffects() ||
MI->isCall() ||
MI->isReturn());
}
bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) {
// Vector to store the DMBs we will remove after the first iteration
std::vector<MachineInstr *> ToRemove;
// DMBType is the Imm value of the first operand. It determines whether it's a
// DMB ish, dmb sy, dmb osh, etc
int64_t DMBType = -1;
// Find a dmb. If we can move it until the next dmb, tag the second one for
// removal
for (auto &MBB : MF) {
// Will be true when we have seen a DMB, and not seen any instruction since
// that cannot move past a DMB
bool IsRemovableNextDMB = false;
for (auto &MI : MBB) {
if (MI.getOpcode() == ARM::DMB) {
if (IsRemovableNextDMB) {
// If the Imm of this DMB is the same as that of the last DMB, we can
// tag this second DMB for removal
if (MI.getOperand(0).getImm() == DMBType) {
ToRemove.push_back(&MI);
} else {
// If it has a different DMBType, we cannot remove it, but will scan
// for the next DMB, recording this DMB's type as last seen DMB type
DMBType = MI.getOperand(0).getImm();
}
} else {
// After we see a DMB, a next one is removable
IsRemovableNextDMB = true;
DMBType = MI.getOperand(0).getImm();
}
} else if (!CanMovePastDMB(&MI)) {
// If we find an instruction unable to pass past a DMB, a next DMB is
// not removable
IsRemovableNextDMB = false;
}
}
}
// Remove the tagged DMB
for (auto MI : ToRemove) {
MI->eraseFromParent();
++NumDMBsRemoved;
}
return NumDMBsRemoved > 0;
}
/// createARMOptimizeBarriersPass - Returns an instance of the remove double
/// barriers
/// pass.
FunctionPass *llvm::createARMOptimizeBarriersPass() {
return new ARMOptimizeBarriersPass();
}

View File

@ -296,6 +296,7 @@ bool ARMPassConfig::addPreEmitPass() {
addPass(&UnpackMachineBundlesID); addPass(&UnpackMachineBundlesID);
} }
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass()); addPass(createARMConstantIslandPass());
return true; return true;

View File

@ -34,6 +34,7 @@ add_llvm_target(ARMCodeGen
ARMMCInstLower.cpp ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp ARMMachineFunctionInfo.cpp
ARMRegisterInfo.cpp ARMRegisterInfo.cpp
ARMOptimizeBarriersPass.cpp
ARMSelectionDAGInfo.cpp ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp ARMSubtarget.cpp
ARMTargetMachine.cpp ARMTargetMachine.cpp

View File

@ -0,0 +1,74 @@
; RUN: llc < %s -mtriple=armv7 -mattr=+db | FileCheck %s
@x1 = global i32 0, align 4
@x2 = global i32 0, align 4
define void @test() {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%i.013 = phi i32 [ 1, %entry ], [ %inc6, %for.body ]
store atomic i32 %i.013, i32* @x1 seq_cst, align 4
store atomic i32 %i.013, i32* @x1 seq_cst, align 4
store atomic i32 %i.013, i32* @x2 seq_cst, align 4
%inc6 = add nsw i32 %i.013, 1
%exitcond = icmp eq i32 %inc6, 2
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
; The for.body contains 3 seq_cst stores.
; Hence it should have 3 dmb;str;dmb sequences with the middle dmbs collapsed
; CHECK: %for.body
; CHECK-NOT: str
; CHECK: dmb
; CHECK-NOT: dmb
; CHECK: str
; CHECK-NOT: str
; CHECK: dmb
; CHECK-NOT: dmb
; CHECK: str
; CHECK-NOT: str
; CHECK: dmb
; CHECK-NOT: dmb
; CHECK: str
; CHECK-NOT: str
; CHECK: dmb
; CHECK-NOT: dmb
; CHECK-NOT: str
; CHECK: %for.end
}
define void @test2() {
call void @llvm.arm.dmb(i32 11)
tail call void @test()
call void @llvm.arm.dmb(i32 11)
ret void
; the call should prevent the two dmbs from collapsing
; CHECK: test2:
; CHECK: dmb
; CHECK-NEXT: bl
; CHECK-NEXT: dmb
}
define void @test3() {
call void @llvm.arm.dmb(i32 11)
call void @llvm.arm.dsb(i32 9)
call void @llvm.arm.dmb(i32 11)
ret void
; the call should prevent the two dmbs from collapsing
; CHECK: test3:
; CHECK: dmb
; CHECK-NEXT: dsb
; CHECK-NEXT: dmb
}
declare void @llvm.arm.dmb(i32)
declare void @llvm.arm.dsb(i32)