diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index f20dcae7168..873b2ec328e 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -173,6 +173,7 @@ private: bool WantResult = true); // Emit functions. + bool emitCompareAndBranch(const BranchInst *BI); bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); @@ -1927,30 +1928,125 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { } } -/// \brief Check if the comparison against zero and the following branch can be -/// folded into a single instruction (CBZ or CBNZ). -static bool canFoldZeroIntoBranch(const CmpInst *CI) { - CmpInst::Predicate Predicate = CI->getPredicate(); - if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) - return false; +/// \brief Try to emit a combined compare-and-branch instruction. +bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { + assert(isa(BI->getCondition()) && "Expected cmp instruction"); + const CmpInst *CI = cast(BI->getCondition()); + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); - Type *Ty = CI->getOperand(0)->getType(); - if (!Ty->isIntegerTy()) - return false; + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + Type *Ty = LHS->getType(); + if (!Ty->isIntegerTy()) + return false; unsigned BW = cast(Ty)->getBitWidth(); if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) return false; - if (const auto *C = dyn_cast(CI->getOperand(0))) - if (C->isNullValue()) - return true; + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - if (const auto *C = dyn_cast(CI->getOperand(1))) - if (C->isNullValue()) - return true; + // Try to take advantage of fallthrough opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } - return false; + int TestBit = -1; + bool IsCmpNE; + if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) { + if (const auto *C = dyn_cast(LHS)) + if (C->isNullValue()) + std::swap(LHS, RHS); + + if (!isa(RHS)) + return false; + + if (!cast(RHS)->isNullValue()) + return false; + + if (const auto *AI = dyn_cast(LHS)) + if (AI->getOpcode() == Instruction::And) { + const Value *AndLHS = AI->getOperand(0); + const Value *AndRHS = AI->getOperand(1); + + if (const auto *C = dyn_cast(AndLHS)) + if (C->getValue().isPowerOf2()) + std::swap(AndLHS, AndRHS); + + if (const auto *C = dyn_cast(AndRHS)) + if (C->getValue().isPowerOf2()) { + TestBit = C->getValue().logBase2(); + LHS = AndLHS; + } + } + IsCmpNE = Predicate == CmpInst::ICMP_NE; + } else if (Predicate == CmpInst::ICMP_SLT) { + if (!isa(RHS)) + return false; + + if (!cast(RHS)->isNullValue()) + return false; + + TestBit = BW - 1; + IsCmpNE = true; + } else if (Predicate == CmpInst::ICMP_SGT) { + if (!isa(RHS)) + return false; + + if (cast(RHS)->getValue() != -1) + return false; + + TestBit = BW - 1; + IsCmpNE = false; + } else + return false; + + static const unsigned OpcTable[2][2][2] = { + { {AArch64::CBZW, AArch64::CBZX }, + {AArch64::CBNZW, AArch64::CBNZX} }, + { {AArch64::TBZW, AArch64::TBZX }, + {AArch64::TBNZW, AArch64::TBNZX} } + }; + + bool IsBitTest = TestBit != -1; + bool Is64Bit = BW == 64; + if (TestBit < 32 && TestBit >= 0) + Is64Bit = false; + + unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; + const MCInstrDesc &II = TII.get(Opc); + + unsigned SrcReg = getRegForValue(LHS); + if (!SrcReg) + return false; + bool SrcIsKill = hasTrivialKill(LHS); + + if (BW == 64 && !Is64Bit) { + SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, + AArch64::sub_32); + SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); + } + + // Emit the combined compare and branch instruction. + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + if (IsBitTest) + MIB.addImm(TestBit); + MIB.addMBB(TBB); + + // Obtain the branch weight and add the TrueBB to the successor list. + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TBB, BranchWeight); + fastEmitBranch(FBB, DbgLoc); + + return true; } bool AArch64FastISel::selectBranch(const Instruction *I) { @@ -1980,50 +2076,16 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return true; } + // Try to emit a combined compare-and-branch first. + if (emitCompareAndBranch(BI)) + return true; + // Try to take advantage of fallthrough opportunities. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); Predicate = CmpInst::getInversePredicate(Predicate); } - // Try to optimize comparisons against zero. - if (canFoldZeroIntoBranch(CI)) { - const Value *LHS = CI->getOperand(0); - const Value *RHS = CI->getOperand(1); - - // Canonicalize zero values to the RHS. - if (const auto *C = dyn_cast(LHS)) - if (C->isNullValue()) - std::swap(LHS, RHS); - - static const unsigned OpcTable[2][2] = { - {AArch64::CBZW, AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX} - }; - bool IsCmpNE = Predicate == CmpInst::ICMP_NE; - bool Is64Bit = LHS->getType()->isIntegerTy(64); - unsigned Opc = OpcTable[IsCmpNE][Is64Bit]; - - unsigned SrcReg = getRegForValue(LHS); - if (!SrcReg) - return false; - bool SrcIsKill = hasTrivialKill(LHS); - - // Emit the combined compare and branch instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addMBB(TBB); - - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); - return true; - } - // Emit the cmp. if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll new file mode 100644 index 00000000000..9fd10cb7a47 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-tbz.ll @@ -0,0 +1,125 @@ +; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define i32 @icmp_eq_i8(i8 zeroext %a) { +; CHECK-LABEL: icmp_eq_i8 +; CHECK: tbz {{w[0-9]+}}, #0, {{LBB.+_2}} + %1 = and i8 %a, 1 + %2 = icmp eq i8 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_eq_i16(i16 zeroext %a) { +; CHECK-LABEL: icmp_eq_i16 +; CHECK: tbz w0, #1, {{LBB.+_2}} + %1 = and i16 %a, 2 + %2 = icmp eq i16 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_eq_i32(i32 %a) { +; CHECK-LABEL: icmp_eq_i32 +; CHECK: tbz w0, #2, {{LBB.+_2}} + %1 = and i32 %a, 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_eq_i64_1(i64 %a) { +; CHECK-LABEL: icmp_eq_i64_1 +; CHECK: tbz w0, #3, {{LBB.+_2}} + %1 = and i64 %a, 8 + %2 = icmp eq i64 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_eq_i64_2(i64 %a) { +; CHECK-LABEL: icmp_eq_i64_2 +; CHECK: tbz x0, #32, {{LBB.+_2}} + %1 = and i64 %a, 4294967296 + %2 = icmp eq i64 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_ne_i8(i8 zeroext %a) { +; CHECK-LABEL: icmp_ne_i8 +; CHECK: tbnz w0, #0, {{LBB.+_2}} + %1 = and i8 %a, 1 + %2 = icmp ne i8 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_ne_i16(i16 zeroext %a) { +; CHECK-LABEL: icmp_ne_i16 +; CHECK: tbnz w0, #1, {{LBB.+_2}} + %1 = and i16 %a, 2 + %2 = icmp ne i16 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_ne_i32(i32 %a) { +; CHECK-LABEL: icmp_ne_i32 +; CHECK: tbnz w0, #2, {{LBB.+_2}} + %1 = and i32 %a, 4 + %2 = icmp ne i32 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_ne_i64_1(i64 %a) { +; CHECK-LABEL: icmp_ne_i64_1 +; CHECK: tbnz w0, #3, {{LBB.+_2}} + %1 = and i64 %a, 8 + %2 = icmp ne i64 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +define i32 @icmp_ne_i64_2(i64 %a) { +; CHECK-LABEL: icmp_ne_i64_2 +; CHECK: tbnz x0, #32, {{LBB.+_2}} + %1 = and i64 %a, 4294967296 + %2 = icmp ne i64 %1, 0 + br i1 %2, label %bb1, label %bb2, !prof !0 +bb1: + ret i32 1 +bb2: + ret i32 0 +} + +!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647} +!1 = metadata !{metadata !"branch_weights", i32 2147483647, i32 0}