diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 8f519407ccd..9dec22ec78a 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -264,14 +264,8 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { return Hash; } -/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks -/// with no successors, we hash two instructions, because cross-jumping -/// only saves code when at least two instructions are removed (since a -/// branch must be inserted). For blocks with a successor, one of the -/// two blocks to be tail-merged will end with a branch already, so -/// it gains to cross-jump even for one instruction. -static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, - unsigned minCommonTailLength) { +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { MachineBasicBlock::const_iterator I = MBB->end(); if (I == MBB->begin()) return 0; // Empty MBB. @@ -283,20 +277,8 @@ static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, return 0; // MBB empty except for debug info. --I; } - unsigned Hash = HashMachineInstr(I); - if (I == MBB->begin() || minCommonTailLength == 1) - return Hash; // Single instr MBB. - - --I; - while (I->isDebugValue()) { - if (I==MBB->begin()) - return Hash; // MBB with single non-debug instr. - --I; - } - // Hash in the second-to-last instruction. - Hash ^= HashMachineInstr(I) << 2; - return Hash; + return HashMachineInstr(I); } /// ComputeCommonTailLength - Given two machine basic blocks, compute the number @@ -811,7 +793,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } // See if we can do any tail merging on those. @@ -897,8 +879,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // reinsert conditional branch only, for now TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U), - *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } if (MergePotentials.size() >= 2) diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll index ce57d736c16..d9cac8022b2 100644 --- a/test/CodeGen/ARM/ifcvt2.ll +++ b/test/CodeGen/ARM/ifcvt2.ll @@ -1,7 +1,8 @@ -; RUN: llc < %s -march=arm -; RUN: llc < %s -march=arm | grep bxlt | count 1 -; RUN: llc < %s -march=arm | grep bxgt | count 1 -; RUN: llc < %s -march=arm | grep bxge | count 1 +; RUN: llc < %s -march=arm > %t +; RUN: grep bxlt %t | count 1 +; RUN: grep bxgt %t | count 1 +; RUN: not grep bxge %t +; RUN: not grep bxle %t define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { %tmp2 = icmp sgt i32 %c, 10 diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index 8677ce53597..bca2ae346a6 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -9,7 +9,7 @@ entry: ret void } -define void @t1(i32 %a, i32 %b) { +define i32 @t1(i32 %a, i32 %b) { ; CHECK: t1: ; CHECK: ldmialt sp!, {r7, pc} entry: @@ -18,8 +18,8 @@ entry: cond_true: ; preds = %entry tail call void @foo( i32 %b ) - ret void + ret i32 0 UnifiedReturnBlock: ; preds = %entry - ret void + ret i32 1 } diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index bfb7f6eabc7..e3086a332a8 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s - -; A fix for PR5204 will require this check to be changed. +; PR5204 %"struct.__gnu_cxx::__normal_iterator, std::allocator > >" = type { i8* } %"struct.__gnu_cxx::new_allocator" = type <{ i8 }> @@ -11,11 +10,9 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string,std::allocator >"* %this, %"struct.std::basic_string,std::allocator >"* %__str) { ; CHECK: _ZNKSs7compareERKSs: -; CHECK: it ne -; CHECK-NEXT: ldmiane.w -; CHECK-NEXT: itt eq -; CHECK-NEXT: subeq.w -; CHECK-NEXT: ldmiaeq.w +; CHECK: it eq +; CHECK-NEXT: subeq.w r0, r6, r8 +; CHECK-NEXT: ldmia.w sp, {r4, r5, r6, r8, r9, pc} entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=3] %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %__str) ; [#uses=3] diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll index 129838457b2..1d2af7a5474 100644 --- a/test/CodeGen/Thumb2/thumb2-branch.ll +++ b/test/CodeGen/Thumb2/thumb2-branch.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -define void @f1(i32 %a, i32 %b, i32* %v) { +define i32 @f1(i32 %a, i32 %b, i32* %v) { entry: ; CHECK: f1: ; CHECK: bne LBB @@ -9,13 +9,13 @@ entry: cond_true: ; preds = %entry store i32 0, i32* %v - ret void + ret i32 0 return: ; preds = %entry - ret void + ret i32 1 } -define void @f2(i32 %a, i32 %b, i32* %v) { +define i32 @f2(i32 %a, i32 %b, i32* %v) { entry: ; CHECK: f2: ; CHECK: bge LBB @@ -24,13 +24,13 @@ entry: cond_true: ; preds = %entry store i32 0, i32* %v - ret void + ret i32 0 return: ; preds = %entry - ret void + ret i32 1 } -define void @f3(i32 %a, i32 %b, i32* %v) { +define i32 @f3(i32 %a, i32 %b, i32* %v) { entry: ; CHECK: f3: ; CHECK: bhs LBB @@ -39,13 +39,13 @@ entry: cond_true: ; preds = %entry store i32 0, i32* %v - ret void + ret i32 0 return: ; preds = %entry - ret void + ret i32 1 } -define void @f4(i32 %a, i32 %b, i32* %v) { +define i32 @f4(i32 %a, i32 %b, i32* %v) { entry: ; CHECK: f4: ; CHECK: blo LBB @@ -54,8 +54,8 @@ entry: cond_true: ; preds = %entry store i32 0, i32* %v - ret void + ret i32 0 return: ; preds = %entry - ret void + ret i32 1 } diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index 4d93bd719f7..9662ad6cd74 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -406,3 +406,26 @@ bb12: return: ret void } + +; Tail-merging should merge the two ret instructions since one side +; can fall-through into the ret and the other side has to branch anyway. + +; CHECK: TESTE: +; CHECK: imulq +; CHECK-NEXT: LBB8_2: +; CHECK-NEXT: ret + +define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone { +entry: + %cmp = icmp slt i64 %parami, 1 ; [#uses=1] + %varx.0 = select i1 %cmp, i64 1, i64 %parami ; [#uses=1] + %cmp410 = icmp slt i64 %paraml, 1 ; [#uses=1] + br i1 %cmp410, label %for.end, label %bb.nph + +bb.nph: ; preds = %entry + %tmp15 = mul i64 %paraml, %parami ; [#uses=1] + ret i64 %tmp15 + +for.end: ; preds = %entry + ret i64 %varx.0 +}