From 23b0065f29198c58a446c49a1f4de9c2e01d2606 Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Thu, 14 May 2015 14:15:08 +0000 Subject: [PATCH] [Hexagon] Check for underflow/wrap in hardware loop pass If the loop trip count may underflow or wrap, the compiler should not generate a hardware loop since the trip count will be incorrect. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237365 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 364 +++++++++++++++++--- test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll | 45 +++ test/CodeGen/Hexagon/hwloop-recursion.ll | 64 ++++ test/CodeGen/Hexagon/hwloop-wrap.ll | 22 ++ test/CodeGen/Hexagon/hwloop-wrap2.ll | 67 ++++ 5 files changed, 506 insertions(+), 56 deletions(-) create mode 100644 test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll create mode 100644 test/CodeGen/Hexagon/hwloop-recursion.ll create mode 100644 test/CodeGen/Hexagon/hwloop-wrap.ll create mode 100644 test/CodeGen/Hexagon/hwloop-wrap2.ll diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 8433ec4c775..c06d9e0a4e6 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -95,6 +95,7 @@ namespace { } private: + typedef std::map LoopFeederMap; /// Kinds of comparisons in the compare instructions. struct Comparison { @@ -203,14 +204,44 @@ namespace { /// defined. If the instructions are out of order, try to reorder them. bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); - /// \brief Get the instruction that loads an immediate value into \p R, - /// or 0 if such an instruction does not exist. - MachineInstr *defWithImmediate(unsigned R); + /// \brief Return true if MO and MI pair is visited only once. If visited + /// more than once, this indicates there is recursion. In such a case, + /// return false. + bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, + const MachineOperand *MO, + LoopFeederMap &LoopFeederPhi) const; - /// \brief Get the immediate value referenced to by \p MO, either for - /// immediate operands, or for register operands, where the register - /// was defined with an immediate value. - int64_t getImmediate(MachineOperand &MO); + /// \brief Return true if the Phi may generate a value that may underflow, + /// or may wrap. + bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Return true if the induction variable may underflow an unsigned + /// value in the first iteration. + bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, + const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const; + + /// \brief Check if the given operand has a compile-time known constant + /// value. Return true if yes, and false otherwise. When returning true, set + /// Val to the corresponding constant value. + bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; + + /// \brief Check if the operand has a compile-time known constant value. + bool isImmediate(const MachineOperand &MO) const { + int64_t V; + return checkForImmediate(MO, V); + } + + /// \brief Return the immediate for the specified operand. + int64_t getImmediate(const MachineOperand &MO) const { + int64_t V; + if (!checkForImmediate(MO, V)) + llvm_unreachable("Invalid operand"); + return V; + } /// \brief Reset the given machine operand to now refer to a new immediate /// value. Assumes that the operand was already referencing an immediate @@ -384,15 +415,16 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, unsigned PhiOpReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiOpReg); unsigned UpdOpc = DI->getOpcode(); - bool isAdd = (UpdOpc == Hexagon::A2_addi); + bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp); if (isAdd) { - // If the register operand to the add is the PHI we're - // looking at, this meets the induction pattern. + // If the register operand to the add is the PHI we're looking at, this + // meets the induction pattern. unsigned IndReg = DI->getOperand(1).getReg(); - if (MRI->getVRegDef(IndReg) == Phi) { + MachineOperand &Opnd2 = DI->getOperand(2); + int64_t V; + if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { unsigned UpdReg = DI->getOperand(0).getReg(); - int64_t V = DI->getOperand(2).getImm(); IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -670,8 +702,10 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, End = &EndValInstr->getOperand(1); } - assert (Start->isReg() || Start->isImm()); - assert (End->isReg() || End->isImm()); + if (!Start->isReg() && !Start->isImm()) + return nullptr; + if (!End->isReg() && !End->isImm()) + return nullptr; bool CmpLess = Cmp & Comparison::L; bool CmpGreater = Cmp & Comparison::G; @@ -682,12 +716,20 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // Loop going while iv is "less" with the iv value going down. Must wrap. return nullptr; - // If loop executes while iv is "greater" with the iv value going up, then - // the iv must wrap. if (CmpGreater && IVBump > 0) // Loop going while iv is "greater" with the iv value going up. Must wrap. return nullptr; + // Phis that may feed into the loop. + LoopFeederMap LoopFeederPhi; + + // Check if the inital value may be zero and can be decremented in the first + // iteration. If the value is zero, the endloop instruction will not decrement + // the loop counter, so we shoudn't generate a hardware loop in this case. + if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, + LoopFeederPhi)) + return nullptr; + if (Start->isImm() && End->isImm()) { // Both, start and end are immediates. int64_t StartV = Start->getImm(); @@ -710,14 +752,16 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, if (CmpHasEqual) Dist = Dist > 0 ? Dist+1 : Dist-1; - // assert (CmpLess => Dist > 0); - assert ((!CmpLess || Dist > 0) && "Loop should never iterate!"); - // assert (CmpGreater => Dist < 0); - assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!"); + // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, + // CmpGreater should imply Dist < 0. These conditions could actually + // fail, for example, in unreachable code (which may still appear to be + // reachable in the CFG). + if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) + return nullptr; // "Normalized" distance, i.e. with the bump set to +-1. - int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump - : (-Dist + (-IVBump-1)) / (-IVBump); + int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump + : (-Dist + (-IVBump - 1)) / (-IVBump); assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); uint64_t Count = Dist1; @@ -979,7 +1023,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI, MachineOperand &Use = *J; MachineInstr *UseMI = Use.getParent(); - // If the phi node has a user that is not MI, bail... + // If the phi node has a user that is not MI, bail. if (MI != UseMI) return false; } @@ -1230,7 +1274,6 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, return true; } - bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI) { assert (BumpI != CmpI && "Bump and compare in the same instruction?"); @@ -1271,35 +1314,226 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, return FoundBump; } +/// This function is required to break recursion. Visiting phis in a loop may +/// result in recursion during compilation. We break the recursion by making +/// sure that we visit a MachineOperand and its definition in a +/// MachineInstruction only once. If we attempt to visit more than once, then +/// there is recursion, and will return false. +bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, + MachineInstr *MI, + const MachineOperand *MO, + LoopFeederMap &LoopFeederPhi) const { + if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { + const std::vector &Blocks = L->getBlocks(); + DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); + // Ignore all BBs that form Loop. + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + if (A == MBB) + return false; + } + MachineInstr *Def = MRI->getVRegDef(MO->getReg()); + LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); + return true; + } else + // Already visited node. + return false; +} -MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { +/// Return true if a Phi may generate a value that can underflow. +/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. +bool HexagonHardwareLoops::phiMayWrapOrUnderflow( + MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, + MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { + assert(Phi->isPHI() && "Expecting a Phi."); + // Walk through each Phi, and its used operands. Make sure that + // if there is recursion in Phi, we won't generate hardware loops. + for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) + if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) + if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, + Phi->getParent(), L, LoopFeederPhi)) + return true; + return false; +} + +/// Return true if the induction variable can underflow in the first iteration. +/// An example, is an initial unsigned value that is 0 and is decrement in the +/// first itertion of a do-while loop. In this case, we cannot generate a +/// hardware loop because the endloop instruction does not decrement the loop +/// counter if it is <= 1. We only need to perform this analysis if the +/// initial value is a register. +/// +/// This function assumes the initial value may underfow unless proven +/// otherwise. If the type is signed, then we don't care because signed +/// underflow is undefined. We attempt to prove the initial value is not +/// zero by perfoming a crude analysis of the loop counter. This function +/// checks if the initial value is used in any comparison prior to the loop +/// and, if so, assumes the comparison is a range check. This is inexact, +/// but will catch the simple cases. +bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( + const MachineOperand *InitVal, const MachineOperand *EndVal, + MachineBasicBlock *MBB, MachineLoop *L, + LoopFeederMap &LoopFeederPhi) const { + // Only check register values since they are unknown. + if (!InitVal->isReg()) + return false; + + if (!EndVal->isImm()) + return false; + + // A register value that is assigned an immediate is a known value, and it + // won't underflow in the first iteration. + int64_t Imm; + if (checkForImmediate(*InitVal, Imm)) + return (EndVal->getImm() == Imm); + + unsigned Reg = InitVal->getReg(); + + // We don't know the value of a physical register. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return true; + + MachineInstr *Def = MRI->getVRegDef(Reg); + if (!Def) + return true; + + // If the initial value is a Phi or copy and the operands may not underflow, + // then the definition cannot be underflow either. + if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), + L, LoopFeederPhi)) + return false; + if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), + EndVal, Def->getParent(), + L, LoopFeederPhi)) + return false; + + // Iterate over the uses of the initial value. If the initial value is used + // in a compare, then we assume this is a range check that ensures the loop + // doesn't underflow. This is not an exact test and should be improved. + for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), + E = MRI->use_instr_nodbg_end(); I != E; ++I) { + MachineInstr *MI = &*I; + unsigned CmpReg1 = 0, CmpReg2 = 0; + int CmpMask = 0, CmpValue = 0; + + if (!TII->analyzeCompare(MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) + continue; + + Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0); + if (Cmp == 0) + continue; + if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) + Cmp = Comparison::getNegatedComparison(Cmp); + if (CmpReg2 != 0 && CmpReg2 == Reg) + Cmp = Comparison::getSwappedComparison(Cmp); + + // Signed underflow is undefined. + if (Comparison::isSigned(Cmp)) + return false; + + // Check if there is a comparison of the inital value. If the initial value + // is greater than or not equal to another value, then assume this is a + // range check. + if ((Cmp & Comparison::G) || Cmp == Comparison::NE) + return false; + } + + // OK - this is a hack that needs to be improved. We really need to analyze + // the instructions performed on the initial value. This works on the simplest + // cases only. + if (!Def->isCopy() && !Def->isPHI()) + return false; + + return true; +} + +bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, + int64_t &Val) const { + if (MO.isImm()) { + Val = MO.getImm(); + return true; + } + if (!MO.isReg()) + return false; + + // MO is a register. Check whether it is defined as an immediate value, + // and if so, get the value of it in TV. That value will then need to be + // processed to handle potential subregisters in MO. + int64_t TV; + + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + return false; MachineInstr *DI = MRI->getVRegDef(R); unsigned DOpc = DI->getOpcode(); switch (DOpc) { + case TargetOpcode::COPY: case Hexagon::A2_tfrsi: case Hexagon::A2_tfrpi: case Hexagon::CONST32_Int_Real: - case Hexagon::CONST64_Int_Real: - return DI; + case Hexagon::CONST64_Int_Real: { + // Call recursively to avoid an extra check whether operand(1) is + // indeed an immediate (it could be a global address, for example), + // plus we can handle COPY at the same time. + if (!checkForImmediate(DI->getOperand(1), TV)) + return false; + break; + } + case Hexagon::A2_combineii: + case Hexagon::A4_combineir: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A2_combinew: { + const MachineOperand &S1 = DI->getOperand(1); + const MachineOperand &S2 = DI->getOperand(2); + int64_t V1, V2; + if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) + return false; + TV = V2 | (V1 << 32); + break; + } + case TargetOpcode::REG_SEQUENCE: { + const MachineOperand &S1 = DI->getOperand(1); + const MachineOperand &S3 = DI->getOperand(3); + int64_t V1, V3; + if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) + return false; + unsigned Sub2 = DI->getOperand(2).getImm(); + unsigned Sub4 = DI->getOperand(4).getImm(); + if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg) + TV = V1 | (V3 << 32); + else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg) + TV = V3 | (V1 << 32); + else + llvm_unreachable("Unexpected form of REG_SEQUENCE"); + break; + } + + default: + return false; } - return nullptr; + + // By now, we should have successfuly obtained the immediate value defining + // the register referenced in MO. Handle a potential use of a subregister. + switch (MO.getSubReg()) { + case Hexagon::subreg_loreg: + Val = TV & 0xFFFFFFFFULL; + break; + case Hexagon::subreg_hireg: + Val = (TV >> 32) & 0xFFFFFFFFULL; + break; + default: + Val = TV; + break; + } + return true; } - -int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) { - if (MO.isImm()) - return MO.getImm(); - assert(MO.isReg()); - unsigned R = MO.getReg(); - MachineInstr *DI = defWithImmediate(R); - assert(DI && "Need an immediate operand"); - // All currently supported "define-with-immediate" instructions have the - // actual immediate value in the operand(1). - int64_t v = DI->getOperand(1).getImm(); - return v; -} - - void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { if (MO.isImm()) { MO.setImm(Val); @@ -1314,11 +1548,19 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { unsigned NewR = MRI->createVirtualRegister(RC); MachineBasicBlock &B = *DI->getParent(); DebugLoc DL = DI->getDebugLoc(); - BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR) - .addImm(Val); + BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); MO.setReg(NewR); } +static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) { + // These two instructions are not extendable. + if (CmpOpc == Hexagon::A4_cmpbeqi) + return isUInt<8>(Imm); + if (CmpOpc == Hexagon::A4_cmpbgti) + return isInt<8>(Imm); + // The rest of the comparison-with-immediate instructions are extendable. + return true; +} bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineBasicBlock *Header = L->getHeader(); @@ -1359,9 +1601,10 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { // If the register operand to the add/sub is the PHI we are looking // at, this meets the induction pattern. unsigned IndReg = DI->getOperand(1).getReg(); - if (MRI->getVRegDef(IndReg) == Phi) { + MachineOperand &Opnd2 = DI->getOperand(2); + int64_t V; + if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { unsigned UpdReg = DI->getOperand(0).getReg(); - int64_t V = DI->getOperand(2).getImm(); IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -1440,8 +1683,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (MO.isImplicit()) continue; if (MO.isUse()) { - unsigned R = MO.getReg(); - if (!defWithImmediate(R)) { + if (!isImmediate(MO)) { CmpRegs.insert(MO.getReg()); continue; } @@ -1477,16 +1719,27 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (!CmpImmOp) return false; + // If the register is being compared against an immediate, try changing + // the compare instruction to use induction register and adjust the + // immediate operand. int64_t CmpImm = getImmediate(*CmpImmOp); int64_t V = RB.second; - if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit). - return false; - if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit). + // Handle Overflow (64-bit). + if (((V > 0) && (CmpImm > INT64_MAX - V)) || + ((V < 0) && (CmpImm < INT64_MIN - V))) return false; CmpImm += V; - // Some forms of cmp-immediate allow u9 and s10. Assume the worst case - // scenario, i.e. an 8-bit value. - if (CmpImmOp->isImm() && !isInt<8>(CmpImm)) + // Most comparisons of register against an immediate value allow + // the immediate to be constant-extended. There are some exceptions + // though. Make sure the new combination will work. + if (CmpImmOp->isImm()) + if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm)) + return false; + + // It is not valid to do this transformation on an unsigned comparison + // because it may underflow. + Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0); + if (!Cmp || Comparison::isUnsigned(Cmp)) return false; // Make sure that the compare happens after the bump. Otherwise, @@ -1511,7 +1764,6 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { return false; } - /// \brief Create a preheader for a given loop. MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( MachineLoop *L) { diff --git a/test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll b/test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll new file mode 100644 index 00000000000..7c5ea031ffa --- /dev/null +++ b/test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll @@ -0,0 +1,45 @@ +;RUN: llc -march=hexagon < %s | FileCheck %s + +; Test that a hardware loop is not generaetd due to a potential +; underflow. + +; CHECK-NOT: loop0 + +define i32 @main() #0 { +entry: + br label %while.cond.outer + +while.cond.outer.loopexit: + %.lcssa = phi i32 [ %0, %for.body.preheader ] + br label %while.cond.outer + +while.cond.outer: + %i.0.ph = phi i32 [ 0, %entry ], [ 3, %while.cond.outer.loopexit ] + %j.0.ph = phi i32 [ 0, %entry ], [ %.lcssa, %while.cond.outer.loopexit ] + %k.0.ph = phi i32 [ 0, %entry ], [ 1, %while.cond.outer.loopexit ] + br label %while.cond + +while.cond: + %i.0 = phi i32 [ %i.0.ph, %while.cond.outer ], [ %inc, %for.body.preheader ] + %j.0 = phi i32 [ %j.0.ph, %while.cond.outer ], [ %0, %for.body.preheader ] + %inc = add nsw i32 %i.0, 1 + %cmp = icmp slt i32 %i.0, 4 + br i1 %cmp, label %for.body.preheader, label %while.end + +for.body.preheader: + %0 = add i32 %j.0, 3 + %cmp5 = icmp eq i32 %inc, 3 + br i1 %cmp5, label %while.cond.outer.loopexit, label %while.cond + +while.end: + %k.0.ph.lcssa = phi i32 [ %k.0.ph, %while.cond ] + %inc.lcssa = phi i32 [ %inc, %while.cond ] + %j.0.lcssa = phi i32 [ %j.0, %while.cond ] + %cmp6 = icmp ne i32 %inc.lcssa, 5 + %cmp7 = icmp ne i32 %j.0.lcssa, 12 + %or.cond = or i1 %cmp6, %cmp7 + %cmp9 = icmp ne i32 %k.0.ph.lcssa, 1 + %or.cond12 = or i1 %or.cond, %cmp9 + %locflg.0 = zext i1 %or.cond12 to i32 + ret i32 %locflg.0 +} diff --git a/test/CodeGen/Hexagon/hwloop-recursion.ll b/test/CodeGen/Hexagon/hwloop-recursion.ll new file mode 100644 index 00000000000..8ab2dc37d02 --- /dev/null +++ b/test/CodeGen/Hexagon/hwloop-recursion.ll @@ -0,0 +1,64 @@ +; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s +; REQUIRES: asserts +; Check for successful compilation. + +@c = common global i32 0, align 4 +@e = common global i32 0, align 4 +@g = common global i32* null, align 4 +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@h = common global i32* null, align 4 +@d = common global i32 0, align 4 +@f = common global i32 0, align 4 + +define i32 @fn1([0 x i32]* nocapture readnone %p1) #0 { +entry: + %0 = load i32*, i32** @h, align 4 + %1 = load i32*, i32** @g, align 4 + %.pre = load i32, i32* @c, align 4 + br label %for.cond + +for.cond: + %2 = phi i32 [ %10, %if.end ], [ %.pre, %entry ] + store i32 %2, i32* @e, align 4 + %tobool5 = icmp eq i32 %2, 0 + br i1 %tobool5, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + %3 = sub i32 -5, %2 + %4 = urem i32 %3, 5 + %5 = sub i32 %3, %4 + br label %for.body + +for.body: + %add6 = phi i32 [ %2, %for.body.lr.ph ], [ %add, %for.body ] + %6 = load i32, i32* %1, align 4 + store i32 %6, i32* @a, align 4 + %add = add nsw i32 %add6, 5 + %tobool = icmp eq i32 %add, 0 + br i1 %tobool, label %for.cond1.for.end_crit_edge, label %for.body + +for.cond1.for.end_crit_edge: + %7 = add i32 %2, 5 + %8 = add i32 %7, %5 + store i32 %8, i32* @e, align 4 + br label %for.end + +for.end: + %9 = load i32, i32* @b, align 4 + %tobool2 = icmp eq i32 %9, 0 + br i1 %tobool2, label %if.end, label %if.then + +if.then: + store i32 0, i32* %0, align 4 + %.pre7 = load i32, i32* @c, align 4 + br label %if.end + +if.end: + %10 = phi i32 [ %2, %for.end ], [ %.pre7, %if.then ] + store i32 %10, i32* @d, align 4 + %11 = load i32, i32* @f, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* @f, align 4 + br label %for.cond +} diff --git a/test/CodeGen/Hexagon/hwloop-wrap.ll b/test/CodeGen/Hexagon/hwloop-wrap.ll new file mode 100644 index 00000000000..e0f6a87fd2e --- /dev/null +++ b/test/CodeGen/Hexagon/hwloop-wrap.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s + +; We shouldn't generate a hardware loop in this case because the initial +; value may be zero, which means the endloop instruction will not decrement +; the loop counter, and the loop will execute only once. + +; CHECK-NOT: loop0 + +define void @foo(i32 %count, i32 %v) #0 { +entry: + br label %do.body + +do.body: + %count.addr.0 = phi i32 [ %count, %entry ], [ %dec, %do.body ] + tail call void asm sideeffect "nop", ""() #1 + %dec = add i32 %count.addr.0, -1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %do.end, label %do.body + +do.end: + ret void +} diff --git a/test/CodeGen/Hexagon/hwloop-wrap2.ll b/test/CodeGen/Hexagon/hwloop-wrap2.ll new file mode 100644 index 00000000000..50675d6b681 --- /dev/null +++ b/test/CodeGen/Hexagon/hwloop-wrap2.ll @@ -0,0 +1,67 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s + +; Test that we do not generate a hardware loop due to a potential underflow. + +; CHECK-NOT: loop0 + +%struct.3 = type { i8*, i8, i8, i32, i32, i16, i16, i16, i16, i16, i16, i16, %struct.2* } +%struct.2 = type { i16, i16, i16, i16, %struct.1* } +%struct.1 = type { %struct.1*, %struct.0*, i32, i32, i16, [2 x i16], [2 x i16], i16 } +%struct.0 = type { %struct.0*, i32, i32, i32, i32, i32, i32, i16, i16, i16, i8, i8, i8, i8 } + +@pairArray = external global i32** +@carray = external global %struct.3** + +define void @test() #0 { +entry: + %0 = load i32**, i32*** @pairArray, align 4 + %1 = load %struct.3**, %struct.3*** @carray, align 4 + br i1 undef, label %for.end110, label %for.body + +for.body: + %row.0199 = phi i32 [ %inc109, %for.inc108 ], [ 1, %entry ] + %arrayidx = getelementptr inbounds i32*, i32** %0, i32 %row.0199 + %2 = load i32*, i32** %arrayidx, align 4 + br i1 undef, label %for.body48, label %for.inc108 + +for.cond45: + %cmp46 = icmp sgt i32 %dec58, 0 + br i1 %cmp46, label %for.body48, label %for.inc108 + +for.body48: + %i.1190 = phi i32 [ %dec58, %for.cond45 ], [ 0, %for.body ] + %arrayidx50 = getelementptr inbounds i32, i32* %2, i32 %i.1190 + %3 = load i32, i32* %arrayidx50, align 4 + %cmp53 = icmp slt i32 %3, 0 + %dec58 = add nsw i32 %i.1190, -1 + br i1 %cmp53, label %for.end59, label %for.cond45 + +for.end59: + %cmp60 = icmp slt i32 %i.1190, 0 + br i1 %cmp60, label %if.then65, label %for.inc108 + +if.then65: + br label %for.body80 + +for.body80: + %j.1196.in = phi i32 [ %j.1196, %for.body80 ], [ %i.1190, %if.then65 ] + %j.1196 = add nsw i32 %j.1196.in, 1 + %arrayidx81 = getelementptr inbounds i32, i32* %2, i32 %j.1196 + %4 = load i32, i32* %arrayidx81, align 4 + %arrayidx82 = getelementptr inbounds %struct.3*, %struct.3** %1, i32 %4 + %5 = load %struct.3*, %struct.3** %arrayidx82, align 4 + %cxcenter83 = getelementptr inbounds %struct.3, %struct.3* %5, i32 0, i32 3 + store i32 0, i32* %cxcenter83, align 4 + %6 = load i32, i32* %arrayidx81, align 4 + %arrayidx87 = getelementptr inbounds i32, i32* %2, i32 %j.1196.in + store i32 %6, i32* %arrayidx87, align 4 + %exitcond = icmp eq i32 %j.1196, 0 + br i1 %exitcond, label %for.inc108, label %for.body80 + +for.inc108: + %inc109 = add nsw i32 %row.0199, 1 + br i1 undef, label %for.body, label %for.end110 + +for.end110: + ret void +}