From 834b08af8d3d8fc6c76ac6ca40674565689e8d7f Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Wed, 18 Nov 2009 03:34:27 +0000 Subject: [PATCH] Add a target hook to allow changing the tail duplication limit based on the contents of the block to be duplicated. Use this for ARM Cortex A8/9 to be more aggressive tail duplicating indirect branches, since it makes it much more likely that they will be predicted in the branch target buffer. Testcase coming soon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@89187 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 7 +++++++ lib/CodeGen/BranchFolding.cpp | 9 +++++---- lib/Target/ARM/ARMBaseInstrInfo.cpp | 10 ++++++++++ lib/Target/ARM/ARMBaseInstrInfo.h | 3 +++ lib/Target/ARM/ARMSubtarget.cpp | 2 ++ lib/Target/ARM/ARMSubtarget.h | 5 +++++ 6 files changed, 32 insertions(+), 4 deletions(-) diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 40b7780dfd2..43fd54e183a 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -536,6 +536,13 @@ public: /// length. virtual unsigned getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const; + + /// TailDuplicationLimit - Returns the limit on the number of instructions + /// in basic block MBB beyond which it will not be tail-duplicated. + virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const { + return DefaultLimit; + } }; /// TargetInstrInfoImpl - This is the default implementation of diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index d344af06fd5..94bfb7204ba 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1033,12 +1033,13 @@ bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, if (TailBB->isSuccessor(TailBB)) return false; - // Duplicate up to one less than the tail-merge threshold. When optimizing - // for size, duplicate only one, because one branch instruction can be - // eliminated to compensate for the duplication. + // Set the limit on the number of instructions to duplicate, with a default + // of one less than the tail-merge threshold. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. unsigned MaxDuplicateCount = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ? - 1 : (TailMergeSize - 1); + 1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1); // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 828041c64ec..b50b6098dd2 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1005,6 +1005,16 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); } +unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const { + // If the target processor can predict indirect branches, it is highly + // desirable to duplicate them, since it can often make them predictable. + if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) && + getSubtarget().hasBranchTargetBuffer()) + return DefaultLimit + 2; + return DefaultLimit; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 29855e2c616..73e854faf2f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -272,6 +272,9 @@ public: virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, const MachineRegisterInfo *MRI) const; + + virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const; }; static inline diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 87b78a9ee5c..432ed78c19a 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -109,6 +109,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } + HasBranchTargetBuffer = (CPUString == "cortex-a8" || + CPUString == "cortex-a9"); } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index a95c8a5ccff..3d0e01e99b7 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -50,6 +50,9 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// HasBranchTargetBuffer - True if processor can predict indirect branches. + bool HasBranchTargetBuffer; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -123,6 +126,8 @@ protected: bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } bool hasThumb2() const { return ThumbMode >= Thumb2; } + bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; } + bool isR9Reserved() const { return IsR9Reserved; } const std::string & getCPUString() const { return CPUString; }