diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index dd17d881c51..6606316735a 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -519,21 +519,24 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
     return true;
 
   // If both blocks have an unconditional branch temporarily stripped out,
-  // treat that as an additional common instruction.
-  if (MBB1 != PredBB && MBB2 != PredBB && 
+  // count that as an additional common instruction for the following
+  // heuristics.
+  unsigned EffectiveTailLen = CommonTailLen;
+  if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && 
       !MBB1->back().getDesc().isBarrier() &&
       !MBB2->back().getDesc().isBarrier())
-    --minCommonTailLength;
+    ++EffectiveTailLen;
 
   // Check if the common tail is long enough to be worthwhile.
-  if (CommonTailLen >= minCommonTailLength)
+  if (EffectiveTailLen >= minCommonTailLength)
     return true;
 
-  // If we are optimizing for code size, 1 instruction in common is enough if
-  // we don't have to split a block.  At worst we will be replacing a
-  // fallthrough into the common tail with a branch, which at worst breaks
-  // even with falling through into the duplicated common tail.
-  if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+  // If we are optimizing for code size, 2 instructions in common is enough if
+  // we don't have to split a block.  At worst we will be introducing 1 new
+  // branch instruction, which is likely to be smaller than the 2
+  // instructions that would be deleted in the merge.
+  if (EffectiveTailLen >= 2 &&
+      MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index fdf96d60866..0d86e561325 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -293,3 +293,116 @@ return:
 }
 
 declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}