diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index dd17d881c51..6606316735a 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -519,21 +519,24 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, return true; // If both blocks have an unconditional branch temporarily stripped out, - // treat that as an additional common instruction. - if (MBB1 != PredBB && MBB2 != PredBB && + // count that as an additional common instruction for the following + // heuristics. + unsigned EffectiveTailLen = CommonTailLen; + if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && !MBB1->back().getDesc().isBarrier() && !MBB2->back().getDesc().isBarrier()) - --minCommonTailLength; + ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. - if (CommonTailLen >= minCommonTailLength) + if (EffectiveTailLen >= minCommonTailLength) return true; - // If we are optimizing for code size, 1 instruction in common is enough if - // we don't have to split a block. At worst we will be replacing a - // fallthrough into the common tail with a branch, which at worst breaks - // even with falling through into the duplicated common tail. - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + // If we are optimizing for code size, 2 instructions in common is enough if + // we don't have to split a block. At worst we will be introducing 1 new + // branch instruction, which is likely to be smaller than the 2 + // instructions that would be deleted in the merge. + if (EffectiveTailLen >= 2 && + MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index fdf96d60866..0d86e561325 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -293,3 +293,116 @@ return: } declare void @func() + +; one - One instruction may be tail-duplicated even with optsize. + +; CHECK: one: +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $0, XYZ(%rip) + +@XYZ = external global i32 + +define void @one() nounwind optsize { +entry: + %0 = icmp eq i32 undef, 0 + br i1 %0, label %bbx, label %bby + +bby: + switch i32 undef, label %bb7 [ + i32 16, label %return + ] + +bb7: + volatile store i32 0, i32* @XYZ + unreachable + +bbx: + switch i32 undef, label %bb12 [ + i32 128, label %return + ] + +bb12: + volatile store i32 0, i32* @XYZ + unreachable + +return: + ret void +} + +; two - Same as one, but with two instructions in the common +; tail instead of one. This is too much to be merged, given +; the optsize attribute. + +; CHECK: two: +; CHECK-NOT: XYZ +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $1, XYZ(%rip) +; CHECK-NOT: XYZ +; CHECK: ret + +define void @two() nounwind optsize { +entry: + %0 = icmp eq i32 undef, 0 + br i1 %0, label %bbx, label %bby + +bby: + switch i32 undef, label %bb7 [ + i32 16, label %return + ] + +bb7: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +bbx: + switch i32 undef, label %bb12 [ + i32 128, label %return + ] + +bb12: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +return: + ret void +} + +; two_nosize - Same as two, but without the optsize attribute. +; Now two instructions are enough to be tail-duplicated. + +; CHECK: two_nosize: +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $1, XYZ(%rip) +; CHECK: movl $0, XYZ(%rip) +; CHECK: movl $1, XYZ(%rip) + +define void @two_nosize() nounwind { +entry: + %0 = icmp eq i32 undef, 0 + br i1 %0, label %bbx, label %bby + +bby: + switch i32 undef, label %bb7 [ + i32 16, label %return + ] + +bb7: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +bbx: + switch i32 undef, label %bb12 [ + i32 128, label %return + ] + +bb12: + volatile store i32 0, i32* @XYZ + volatile store i32 1, i32* @XYZ + unreachable + +return: + ret void +}