From ae69a2a12bd0af3fa81957f7896d1a54ad69dbb2 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 19 Jun 2009 23:17:27 +0000 Subject: [PATCH] Enable arm pre-allocation load / store multiple optimization pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73791 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 52 ++++++++++++++++-------- lib/Target/ARM/ARMTargetMachine.cpp | 5 --- test/CodeGen/ARM/ldrd.ll | 6 +-- test/CodeGen/ARM/stm.ll | 2 +- 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5cdb244ca96..59cf125a9b9 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -992,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } -static bool IsSafeToMove(bool isLd, unsigned Base, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallPtrSet MoveOps, - const TargetRegisterInfo *TRI) { +static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallPtrSet &MemOps, + SmallSet &MemRegs, + const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? // FIXME: This is overly conservative. We should make use of alias information // some day. + SmallSet AddedRegPressure; while (++I != E) { + if (MemOps.count(&*I)) + continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) return false; @@ -1013,16 +1018,26 @@ static bool IsSafeToMove(bool isLd, unsigned Base, // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (TID.mayStore() && !MoveOps.count(&*I)) + if (TID.mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { MachineOperand &MO = I->getOperand(j); - if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef() && TRI->regsOverlap(Reg, Base)) return false; + if (Reg != Base && !MemRegs.count(Reg)) + AddedRegPressure.insert(Reg); } } - return true; + + // Estimate register pressure increase due to the transformation. + if (MemRegs.size() <= 4) + // Ok if we are moving small number of instructions. + return true; + return AddedRegPressure.size() <= MemRegs.size() * 2; } bool @@ -1123,29 +1138,33 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOffset = Offset; LastBytes = Bytes; LastOpcode = Opcode; - if (++NumMove == 4) + if (++NumMove == 8) // FIXME: Tune break; } if (NumMove <= 1) Ops.pop_back(); else { - SmallPtrSet MoveOps; - for (int i = NumMove-1; i >= 0; --i) - MoveOps.insert(Ops[i]); + SmallPtrSet MemOps; + SmallSet MemRegs; + for (int i = NumMove-1; i >= 0; --i) { + MemOps.insert(Ops[i]); + MemRegs.insert(Ops[i]->getOperand(0).getReg()); + } // Be conservative, if the instructions are too far apart, don't // move them. We want to limit the increase of register pressure. - bool DoMove = (LastLoc - FirstLoc) < NumMove*4; + bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. if (DoMove) - DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); + DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, + MemOps, MemRegs, TRI); if (!DoMove) { for (unsigned i = 0; i != NumMove; ++i) Ops.pop_back(); } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) + while (InsertPos != MBB->end() && MemOps.count(InsertPos)) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -1279,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } if (StopHere) { - // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. + // Found a duplicate (a base+offset combination that's seen earlier). + // Backtrack. --Loc; break; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index dec023231e1..8006b9be32e 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,9 +23,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt -EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden, - cl::desc("Enable pre-regalloc load store optimization pass")); static cl::opt DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, cl::desc("Disable load store optimization pass")); static cl::opt DisableIfConversion("disable-arm-if-conversion",cl::Hidden, @@ -155,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (!EnablePreLdStOpti) - return false; // FIXME: temporarily disabling load / store optimization pass for Thumb mode. if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) PM.add(createARMLoadStoreOptimizationPass(true)); diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 09bc5fccea0..f1bee058a0f 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin -arm-pre-alloc-loadstore-opti | grep ldrd -; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin -arm-pre-alloc-loadstore-opti | not grep ldrd -; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -arm-pre-alloc-loadstore-opti | not grep ldrd +; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | grep ldrd +; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin | not grep ldrd +; RUN: llvm-as < %s | llc -mtriple=armv6-eabi | not grep ldrd ; rdar://r6949835 @b = external global i64* diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll index 585645b0214..ed5e4c5f594 100644 --- a/test/CodeGen/ARM/stm.ll +++ b/test/CodeGen/ARM/stm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 -arm-pre-alloc-loadstore-opti | grep stm | count 2 +; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[32 x i8]*> [#uses=1] @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[26 x i8]*> [#uses=1]