diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index f20dcae7168..873b2ec328e 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -173,6 +173,7 @@ private:
                          bool WantResult = true);
 
   // Emit functions.
+  bool emitCompareAndBranch(const BranchInst *BI);
   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
@@ -1927,30 +1928,125 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
   }
 }
 
-/// \brief Check if the comparison against zero and the following branch can be
-/// folded into a single instruction (CBZ or CBNZ).
-static bool canFoldZeroIntoBranch(const CmpInst *CI) {
-  CmpInst::Predicate Predicate = CI->getPredicate();
-  if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
-    return false;
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
 
-  Type *Ty = CI->getOperand(0)->getType();
-  if (!Ty->isIntegerTy())
-    return false;
+  const Value *LHS = CI->getOperand(0);
+  const Value *RHS = CI->getOperand(1);
+
+  Type *Ty = LHS->getType();
+    if (!Ty->isIntegerTy())
+      return false;
 
   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
     return false;
 
-  if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
-    if (C->isNullValue())
-      return true;
+  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 
-  if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
-    if (C->isNullValue())
-      return true;
+  // Try to take advantage of fallthrough opportunities.
+  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+    std::swap(TBB, FBB);
+    Predicate = CmpInst::getInversePredicate(Predicate);
+  }
 
-  return false;
+  int TestBit = -1;
+  bool IsCmpNE;
+  if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
+    if (const auto *C = dyn_cast<ConstantInt>(LHS))
+      if (C->isNullValue())
+        std::swap(LHS, RHS);
+
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (!cast<ConstantInt>(RHS)->isNullValue())
+      return false;
+
+    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+      if (AI->getOpcode() == Instruction::And) {
+        const Value *AndLHS = AI->getOperand(0);
+        const Value *AndRHS = AI->getOperand(1);
+
+        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+          if (C->getValue().isPowerOf2())
+            std::swap(AndLHS, AndRHS);
+
+        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+          if (C->getValue().isPowerOf2()) {
+            TestBit = C->getValue().logBase2();
+            LHS = AndLHS;
+          }
+      }
+    IsCmpNE = Predicate == CmpInst::ICMP_NE;
+  } else if (Predicate == CmpInst::ICMP_SLT) {
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (!cast<ConstantInt>(RHS)->isNullValue())
+      return false;
+
+    TestBit = BW - 1;
+    IsCmpNE = true;
+  } else if (Predicate == CmpInst::ICMP_SGT) {
+    if (!isa<ConstantInt>(RHS))
+      return false;
+
+    if (cast<ConstantInt>(RHS)->getValue() != -1)
+      return false;
+
+    TestBit = BW - 1;
+    IsCmpNE = false;
+  } else
+    return false;
+
+  static const unsigned OpcTable[2][2][2] = {
+    { {AArch64::CBZW,  AArch64::CBZX },
+      {AArch64::CBNZW, AArch64::CBNZX} },
+    { {AArch64::TBZW,  AArch64::TBZX },
+      {AArch64::TBNZW, AArch64::TBNZX} }
+  };
+
+  bool IsBitTest = TestBit != -1;
+  bool Is64Bit = BW == 64;
+  if (TestBit < 32 && TestBit >= 0)
+    Is64Bit = false;
+  
+  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+  const MCInstrDesc &II = TII.get(Opc);
+
+  unsigned SrcReg = getRegForValue(LHS);
+  if (!SrcReg)
+    return false;
+  bool SrcIsKill = hasTrivialKill(LHS);
+
+  if (BW == 64 && !Is64Bit) {
+    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+                                        AArch64::sub_32);
+    SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
+  }
+
+  // Emit the combined compare and branch instruction.
+  MachineInstrBuilder MIB =
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+          .addReg(SrcReg, getKillRegState(SrcIsKill));
+  if (IsBitTest)
+    MIB.addImm(TestBit);
+  MIB.addMBB(TBB);
+
+  // Obtain the branch weight and add the TrueBB to the successor list.
+  uint32_t BranchWeight = 0;
+  if (FuncInfo.BPI)
+    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+                                               TBB->getBasicBlock());
+  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+  fastEmitBranch(FBB, DbgLoc);
+
+  return true;
 }
 
 bool AArch64FastISel::selectBranch(const Instruction *I) {
@@ -1980,50 +2076,16 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
         return true;
       }
 
+      // Try to emit a combined compare-and-branch first.
+      if (emitCompareAndBranch(BI))
+        return true;
+
       // Try to take advantage of fallthrough opportunities.
       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
         std::swap(TBB, FBB);
         Predicate = CmpInst::getInversePredicate(Predicate);
       }
 
-      // Try to optimize comparisons against zero.
-      if (canFoldZeroIntoBranch(CI)) {
-        const Value *LHS = CI->getOperand(0);
-        const Value *RHS = CI->getOperand(1);
-
-        // Canonicalize zero values to the RHS.
-        if (const auto *C = dyn_cast<ConstantInt>(LHS))
-          if (C->isNullValue())
-            std::swap(LHS, RHS);
-
-        static const unsigned OpcTable[2][2] = {
-          {AArch64::CBZW,  AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX}
-        };
-        bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
-        bool Is64Bit = LHS->getType()->isIntegerTy(64);
-        unsigned Opc = OpcTable[IsCmpNE][Is64Bit];
-
-        unsigned SrcReg = getRegForValue(LHS);
-        if (!SrcReg)
-          return false;
-        bool SrcIsKill = hasTrivialKill(LHS);
-
-        // Emit the combined compare and branch instruction.
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
-            .addReg(SrcReg, getKillRegState(SrcIsKill))
-            .addMBB(TBB);
-
-        // Obtain the branch weight and add the TrueBB to the successor list.
-        uint32_t BranchWeight = 0;
-        if (FuncInfo.BPI)
-          BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                     TBB->getBasicBlock());
-        FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-        fastEmitBranch(FBB, DbgLoc);
-        return true;
-      }
-
       // Emit the cmp.
       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
         return false;
diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll
new file mode 100644
index 00000000000..9fd10cb7a47
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -0,0 +1,125 @@
+; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define i32 @icmp_eq_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i8
+; CHECK:       tbz {{w[0-9]+}}, #0, {{LBB.+_2}}
+  %1 = and i8 %a, 1
+  %2 = icmp eq i8 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i16
+; CHECK:       tbz w0, #1, {{LBB.+_2}}
+  %1 = and i16 %a, 2
+  %2 = icmp eq i16 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i32(i32 %a) {
+; CHECK-LABEL: icmp_eq_i32
+; CHECK:       tbz w0, #2, {{LBB.+_2}}
+  %1 = and i32 %a, 4
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_1
+; CHECK:       tbz w0, #3, {{LBB.+_2}}
+  %1 = and i64 %a, 8
+  %2 = icmp eq i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_2
+; CHECK:       tbz x0, #32, {{LBB.+_2}}
+  %1 = and i64 %a, 4294967296
+  %2 = icmp eq i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i8
+; CHECK:       tbnz w0, #0, {{LBB.+_2}}
+  %1 = and i8 %a, 1
+  %2 = icmp ne i8 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i16
+; CHECK:       tbnz w0, #1, {{LBB.+_2}}
+  %1 = and i16 %a, 2
+  %2 = icmp ne i16 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i32(i32 %a) {
+; CHECK-LABEL: icmp_ne_i32
+; CHECK:       tbnz w0, #2, {{LBB.+_2}}
+  %1 = and i32 %a, 4
+  %2 = icmp ne i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_1
+; CHECK:       tbnz w0, #3, {{LBB.+_2}}
+  %1 = and i64 %a, 8
+  %2 = icmp ne i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_2
+; CHECK:       tbnz x0, #32, {{LBB.+_2}}
+  %1 = and i64 %a, 4294967296
+  %2 = icmp ne i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
+!1 = metadata !{metadata !"branch_weights", i32 2147483647, i32 0}