From 2c04dae715b05017d7d2c19ab4f8cb37c1e650ae Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Wed, 28 Oct 2009 22:10:20 +0000 Subject: [PATCH] Reimplement BranchFolding change to avoid tail merging for a 1 instruction common tail, except when the OptimizeForSize function attribute is present. Radar 7338114. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85441 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 28 ++++++++++--------- test/CodeGen/X86/2008-02-18-TailMergingBug.ll | 2 +- test/CodeGen/X86/2008-05-12-tailmerge-5.ll | 2 +- .../X86/convert-2-addr-3-addr-inc64.ll | 2 +- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 66c5aa5ff78..7bc25ab9e95 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -18,6 +18,7 @@ #define DEBUG_TYPE "branchfolding" #include "BranchFolding.h" +#include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -465,22 +466,23 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, CurMPIter!=B && CurMPIter->first==CurHash; --CurMPIter) { for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) { - unsigned CommonTailLen = ComputeCommonTailLength( - CurMPIter->second, - I->second, - TrialBBI1, TrialBBI2); + unsigned CommonTailLen = ComputeCommonTailLength(CurMPIter->second, + I->second, + TrialBBI1, TrialBBI2); // If we will have to split a block, there should be at least - // minCommonTailLength instructions in common; if not, at worst - // we will be replacing a fallthrough into the common tail with a - // branch, which at worst breaks even with falling through into - // the duplicated common tail, so 1 instruction in common is enough. - // We will always pick a block we do not have to split as the common - // tail if there is one. - // (Empty blocks will get forwarded and need not be considered.) + // minCommonTailLength instructions in common. Otherwise, if we are + // optimizing for code size, 1 instruction in common is enough. At + // worst we will be replacing a fallthrough into the common tail with a + // branch, which at worst breaks even with falling through into the + // duplicated common tail. We will always pick a block we do not have + // to split as the common tail if there is one. (Empty blocks will get + // forwarded and need not be considered.) + MachineFunction *MF = CurMPIter->second->getParent(); if (CommonTailLen >= minCommonTailLength || (CommonTailLen > 0 && - (TrialBBI1==CurMPIter->second->begin() || - TrialBBI2==I->second->begin()))) { + MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + (TrialBBI1 == CurMPIter->second->begin() || + TrialBBI2 == I->second->begin()))) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll index 9b52c5c0699..7463a0eebf3 100644 --- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll +++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll @@ -3,7 +3,7 @@ @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1] -define void @minmax(float* %result) nounwind { +define void @minmax(float* %result) nounwind optsize { entry: %tmp2 = load float* %result, align 4 ; [#uses=6] %tmp4 = getelementptr float* %result, i32 2 ; [#uses=5] diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll index 1f95a2409fe..4852e89c4d9 100644 --- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll +++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin8" %struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] } -define void @passing2(i64 %str.0, i64 %str.1, i16 signext %s, i32 %j, i8 signext %c, i16 signext %t, i16 signext %u, i8 signext %d) nounwind { +define void @passing2(i64 %str.0, i64 %str.1, i16 signext %s, i32 %j, i8 signext %c, i16 signext %t, i16 signext %u, i8 signext %d) nounwind optsize { entry: %str_addr = alloca %struct.BoundaryAlignment ; <%struct.BoundaryAlignment*> [#uses=7] %s_addr = alloca i16 ; [#uses=1] diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll index 2b4b83259b8..337f1b2a8e7 100644 --- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll +++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll @@ -2,7 +2,7 @@ ; RUN: grep {asm-printer} | grep {Number of machine instrs printed} | grep 5 ; RUN: grep {leal 1(\%rsi),} %t -define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2) nounwind { +define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2) nounwind optsize { entry: %0 = add i32 %i2, 1 ; [#uses=1] %1 = sext i32 %0 to i64 ; [#uses=1]