mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-26 02:22:29 +00:00 
			
		
		
		
	[Hexagon] Generate loop1 instruction for nested loops
loop1 is for the outer loop and loop0 is for the inner loop. Differential Revision: http://reviews.llvm.org/D9680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237266 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -159,7 +159,7 @@ namespace { | ||||
|                                        MachineOperand *InitialValue, | ||||
|                                        const MachineOperand *Endvalue, | ||||
|                                        int64_t IVBump) const; | ||||
|      | ||||
|  | ||||
|     /// \brief Analyze the statements in a loop to determine if the loop | ||||
|     /// has a computable trip count and, if so, return a value that represents | ||||
|     /// the trip count expression. | ||||
| @@ -179,15 +179,16 @@ namespace { | ||||
|  | ||||
|     /// \brief Return true if the instruction is not valid within a hardware | ||||
|     /// loop. | ||||
|     bool isInvalidLoopOperation(const MachineInstr *MI) const; | ||||
|     bool isInvalidLoopOperation(const MachineInstr *MI, | ||||
|                                 bool IsInnerHWLoop) const; | ||||
|  | ||||
|     /// \brief Return true if the loop contains an instruction that inhibits | ||||
|     /// using the hardware loop. | ||||
|     bool containsInvalidInstruction(MachineLoop *L) const; | ||||
|     bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; | ||||
|  | ||||
|     /// \brief Given a loop, check if we can convert it to a hardware loop. | ||||
|     /// If so, then perform the conversion and return true. | ||||
|     bool convertToHardwareLoop(MachineLoop *L); | ||||
|     bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); | ||||
|  | ||||
|     /// \brief Return true if the instruction is now dead. | ||||
|     bool isDead(const MachineInstr *MI, | ||||
| @@ -307,18 +308,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) | ||||
| INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", | ||||
|                     "Hexagon Hardware Loops", false, false) | ||||
|  | ||||
|  | ||||
| /// \brief Returns true if the instruction is a hardware loop instruction. | ||||
| static bool isHardwareLoop(const MachineInstr *MI) { | ||||
|   return MI->getOpcode() == Hexagon::J2_loop0r || | ||||
|     MI->getOpcode() == Hexagon::J2_loop0i; | ||||
| } | ||||
|  | ||||
| FunctionPass *llvm::createHexagonHardwareLoops() { | ||||
|   return new HexagonHardwareLoops(); | ||||
| } | ||||
|  | ||||
|  | ||||
| bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { | ||||
|   DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); | ||||
|  | ||||
| @@ -329,12 +322,12 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { | ||||
|   MDT = &getAnalysis<MachineDominatorTree>(); | ||||
|   TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); | ||||
|  | ||||
|   for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); | ||||
|        I != E; ++I) { | ||||
|     MachineLoop *L = *I; | ||||
|     if (!L->getParentLoop()) | ||||
|       Changed |= convertToHardwareLoop(L); | ||||
|   } | ||||
|   for (auto &L : *MLI) | ||||
|     if (!L->getParentLoop()) { | ||||
|       bool L0Used = false; | ||||
|       bool L1Used = false; | ||||
|       Changed |= convertToHardwareLoop(L, L0Used, L1Used); | ||||
|     } | ||||
|  | ||||
|   return Changed; | ||||
| } | ||||
| @@ -467,27 +460,27 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, | ||||
|   case Hexagon::C2_cmpeqi: | ||||
|   case Hexagon::C2_cmpeq: | ||||
|   case Hexagon::C2_cmpeqp: | ||||
|     Cmp = Comparison::Kind::EQ; | ||||
|     Cmp = Comparison::EQ; | ||||
|     break; | ||||
|   case Hexagon::C4_cmpneq: | ||||
|   case Hexagon::C4_cmpneqi: | ||||
|     Cmp = Comparison::Kind::NE; | ||||
|     Cmp = Comparison::NE; | ||||
|     break; | ||||
|   case Hexagon::C4_cmplte: | ||||
|     Cmp = Comparison::Kind::LEs; | ||||
|     Cmp = Comparison::LEs; | ||||
|     break; | ||||
|   case Hexagon::C4_cmplteu: | ||||
|     Cmp = Comparison::Kind::LEu; | ||||
|     Cmp = Comparison::LEu; | ||||
|     break; | ||||
|   case Hexagon::C2_cmpgtui: | ||||
|   case Hexagon::C2_cmpgtu: | ||||
|   case Hexagon::C2_cmpgtup: | ||||
|     Cmp = Comparison::Kind::GTu; | ||||
|     Cmp = Comparison::GTu; | ||||
|     break; | ||||
|   case Hexagon::C2_cmpgti: | ||||
|   case Hexagon::C2_cmpgt: | ||||
|   case Hexagon::C2_cmpgtp: | ||||
|     Cmp = Comparison::Kind::GTs; | ||||
|     Cmp = Comparison::GTs; | ||||
|     break; | ||||
|   default: | ||||
|     return (Comparison::Kind)0; | ||||
| @@ -749,7 +742,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, | ||||
|   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); | ||||
|   DebugLoc DL; | ||||
|   if (InsertPos != PH->end()) | ||||
|     InsertPos->getDebugLoc(); | ||||
|     DL = InsertPos->getDebugLoc(); | ||||
|  | ||||
|   // If Start is an immediate and End is a register, the trip count | ||||
|   // will be "reg - imm".  Hexagon's "subtract immediate" instruction | ||||
| @@ -828,7 +821,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, | ||||
|     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : | ||||
|                               (RegToImm ? TII->get(Hexagon::A2_subri) : | ||||
|                                           TII->get(Hexagon::A2_addi)); | ||||
|     if (RegToReg || RegToImm) {     | ||||
|     if (RegToReg || RegToImm) { | ||||
|       unsigned SubR = MRI->createVirtualRegister(IntRC); | ||||
|       MachineInstrBuilder SubIB = | ||||
|         BuildMI(*PH, InsertPos, DL, SubD, SubR); | ||||
| @@ -902,51 +895,50 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, | ||||
|   return new CountValue(CountValue::CV_Register, CountR, CountSR); | ||||
| } | ||||
|  | ||||
|  | ||||
| /// \brief Return true if the operation is invalid within hardware loop. | ||||
| bool HexagonHardwareLoops::isInvalidLoopOperation( | ||||
|       const MachineInstr *MI) const { | ||||
| bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, | ||||
|                                                   bool IsInnerHWLoop) const { | ||||
|  | ||||
|   // Call is not allowed because the callee may use a hardware loop except for | ||||
|   // the case when the call never returns. | ||||
|   if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr) | ||||
|     return true; | ||||
|  | ||||
|   // do not allow nested hardware loops | ||||
|   if (isHardwareLoop(MI)) | ||||
|     return true; | ||||
|  | ||||
|   // check if the instruction defines a hardware loop register | ||||
|   // Check if the instruction defines a hardware loop register. | ||||
|   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | ||||
|     const MachineOperand &MO = MI->getOperand(i); | ||||
|     if (!MO.isReg() || !MO.isDef()) | ||||
|       continue; | ||||
|     unsigned R = MO.getReg(); | ||||
|     if (R == Hexagon::LC0 || R == Hexagon::LC1 || | ||||
|         R == Hexagon::SA0 || R == Hexagon::SA1) | ||||
|     if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 || | ||||
|                           R == Hexagon::LC1 || R == Hexagon::SA1)) | ||||
|       return true; | ||||
|     if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1)) | ||||
|       return true; | ||||
|   } | ||||
|   return false; | ||||
| } | ||||
|  | ||||
|  | ||||
| /// \brief - Return true if the loop contains an instruction that inhibits | ||||
| /// the use of the hardware loop function. | ||||
| bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { | ||||
| /// \brief Return true if the loop contains an instruction that inhibits | ||||
| /// the use of the hardware loop instruction. | ||||
| bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, | ||||
|     bool IsInnerHWLoop) const { | ||||
|   const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); | ||||
|   DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); | ||||
|   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { | ||||
|     MachineBasicBlock *MBB = Blocks[i]; | ||||
|     for (MachineBasicBlock::iterator | ||||
|            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { | ||||
|       const MachineInstr *MI = &*MII; | ||||
|       if (isInvalidLoopOperation(MI)) | ||||
|       if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { | ||||
|         DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump();); | ||||
|         return true; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   return false; | ||||
| } | ||||
|  | ||||
|  | ||||
| /// \brief Returns true if the instruction is dead.  This was essentially | ||||
| /// copied from DeadMachineInstructionElim::isDead, but with special cases | ||||
| /// for inline asm, physical registers and instructions with side effects | ||||
| @@ -1041,19 +1033,47 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { | ||||
| /// | ||||
| /// The code makes several assumptions about the representation of the loop | ||||
| /// in llvm. | ||||
| bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { | ||||
| bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, | ||||
|                                                  bool &RecL0used, | ||||
|                                                  bool &RecL1used) { | ||||
|   // This is just for sanity. | ||||
|   assert(L->getHeader() && "Loop without a header?"); | ||||
|  | ||||
|   bool Changed = false; | ||||
|   bool L0Used = false; | ||||
|   bool L1Used = false; | ||||
|  | ||||
|   // Process nested loops first. | ||||
|   for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) | ||||
|     Changed |= convertToHardwareLoop(*I); | ||||
|   for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { | ||||
|     Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used); | ||||
|     L0Used |= RecL0used; | ||||
|     L1Used |= RecL1used; | ||||
|   } | ||||
|  | ||||
|   // If a nested loop has been converted, then we can't convert this loop. | ||||
|   if (Changed) | ||||
|   if (Changed && L0Used && L1Used) | ||||
|     return Changed; | ||||
|  | ||||
|   unsigned LOOP_i; | ||||
|   unsigned LOOP_r; | ||||
|   unsigned ENDLOOP; | ||||
|  | ||||
|   // Flag used to track loopN instruction: | ||||
|   // 1 - Hardware loop is being generated for the inner most loop. | ||||
|   // 0 - Hardware loop is being generated for the outer loop. | ||||
|   unsigned IsInnerHWLoop = 1; | ||||
|  | ||||
|   if (L0Used) { | ||||
|     LOOP_i = Hexagon::J2_loop1i; | ||||
|     LOOP_r = Hexagon::J2_loop1r; | ||||
|     ENDLOOP = Hexagon::ENDLOOP1; | ||||
|     IsInnerHWLoop = 0; | ||||
|   } else { | ||||
|     LOOP_i = Hexagon::J2_loop0i; | ||||
|     LOOP_r = Hexagon::J2_loop0r; | ||||
|     ENDLOOP = Hexagon::ENDLOOP0; | ||||
|   } | ||||
|  | ||||
| #ifndef NDEBUG | ||||
|   // Stop trying after reaching the limit (if any). | ||||
|   int Limit = HWLoopLimit; | ||||
| @@ -1065,10 +1085,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { | ||||
| #endif | ||||
|  | ||||
|   // Does the loop contain any invalid instructions? | ||||
|   if (containsInvalidInstruction(L)) | ||||
|   if (containsInvalidInstruction(L, IsInnerHWLoop)) | ||||
|     return false; | ||||
|  | ||||
|   MachineBasicBlock *LastMBB = L->getExitingBlock(); | ||||
|   MachineBasicBlock *LastMBB = getExitingBlock(L); | ||||
|   // Don't generate hw loop if the loop has more than one exit. | ||||
|   if (!LastMBB) | ||||
|     return false; | ||||
| @@ -1141,8 +1161,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { | ||||
|     BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) | ||||
|       .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); | ||||
|     // Add the Loop instruction to the beginning of the loop. | ||||
|     BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) | ||||
|       .addMBB(LoopStart) | ||||
|     BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) | ||||
|       .addReg(CountReg); | ||||
|   } else { | ||||
|     assert(TripCount->isImm() && "Expecting immediate value for trip count"); | ||||
| @@ -1150,14 +1169,14 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { | ||||
|     // if the immediate fits in the instructions.  Otherwise, we need to | ||||
|     // create a new virtual register. | ||||
|     int64_t CountImm = TripCount->getImm(); | ||||
|     if (!TII->isValidOffset(Hexagon::J2_loop0i, CountImm)) { | ||||
|     if (!TII->isValidOffset(LOOP_i, CountImm)) { | ||||
|       unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); | ||||
|       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) | ||||
|         .addImm(CountImm); | ||||
|       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0r)) | ||||
|       BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) | ||||
|         .addMBB(LoopStart).addReg(CountReg); | ||||
|     } else | ||||
|       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::J2_loop0i)) | ||||
|       BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) | ||||
|         .addMBB(LoopStart).addImm(CountImm); | ||||
|   } | ||||
|  | ||||
| @@ -1171,8 +1190,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { | ||||
|  | ||||
|   // Replace the loop branch with an endloop instruction. | ||||
|   DebugLoc LastIDL = LastI->getDebugLoc(); | ||||
|   BuildMI(*LastMBB, LastI, LastIDL, | ||||
|           TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); | ||||
|   BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); | ||||
|  | ||||
|   // The loop ends with either: | ||||
|   //  - a conditional branch followed by an unconditional branch, or | ||||
| @@ -1200,6 +1218,15 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { | ||||
|     removeIfDead(OldInsts[i]); | ||||
|  | ||||
|   ++NumHWLoops; | ||||
|  | ||||
|   // Set RecL1used and RecL0used only after hardware loop has been | ||||
|   // successfully generated. Doing it earlier can cause wrong loop instruction | ||||
|   // to be used. | ||||
|   if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. | ||||
|     RecL1used = true; | ||||
|   else | ||||
|     RecL0used = true; | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| @@ -1533,7 +1560,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( | ||||
|   if (Header->pred_size() > 2) { | ||||
|     // Ensure that the header has only two predecessors: the preheader and | ||||
|     // the loop latch.  Any additional predecessors of the header should | ||||
|     // join at the newly created preheader.  Inspect all PHI nodes from the | ||||
|     // join at the newly created preheader. Inspect all PHI nodes from the | ||||
|     // header and create appropriate corresponding PHI nodes in the preheader. | ||||
|  | ||||
|     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); | ||||
|   | ||||
							
								
								
									
										68
									
								
								test/CodeGen/Hexagon/hwloop-loop1.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								test/CodeGen/Hexagon/hwloop-loop1.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | ||||
| ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s | ||||
| ; | ||||
| ; Generate loop1 instruction for double loop sequence. | ||||
|  | ||||
| ; CHECK: loop0(.LBB{{.}}_{{.}}, #100) | ||||
| ; CHECK: endloop0 | ||||
| ; CHECK: loop1(.LBB{{.}}_{{.}}, #100) | ||||
| ; CHECK: loop0(.LBB{{.}}_{{.}}, #100) | ||||
| ; CHECK: endloop0 | ||||
| ; CHECK: endloop1 | ||||
|  | ||||
| define i32 @main() #0 { | ||||
| entry: | ||||
|   %array = alloca [100 x i32], align 8 | ||||
|   %doublearray = alloca [100 x [100 x i32]], align 8 | ||||
|   %0 = bitcast [100 x i32]* %array to i8* | ||||
|   call void @llvm.lifetime.start(i64 400, i8* %0) #1 | ||||
|   %1 = bitcast [100 x [100 x i32]]* %doublearray to i8* | ||||
|   call void @llvm.lifetime.start(i64 40000, i8* %1) #1 | ||||
|   %arrayidx1 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 10, i32 10 | ||||
|   %arrayidx2.gep = getelementptr [100 x i32], [100 x i32]* %array, i32 0, i32 0 | ||||
|   br label %for.body | ||||
|  | ||||
| for.body: | ||||
|   %2 = phi i32 [ undef, %entry ], [ %.pre, %for.body.for.body_crit_edge ] | ||||
|   %sum.031 = phi i32 [ undef, %entry ], [ %add, %for.body.for.body_crit_edge ] | ||||
|   %arrayidx2.phi = phi i32* [ %arrayidx2.gep, %entry ], [ %arrayidx2.inc, %for.body.for.body_crit_edge ] | ||||
|   %i.030 = phi i32 [ 1, %entry ], [ %phitmp, %for.body.for.body_crit_edge ] | ||||
|   %add = add nsw i32 %2, %sum.031 | ||||
|   %exitcond33 = icmp eq i32 %i.030, 100 | ||||
|   %arrayidx2.inc = getelementptr i32, i32* %arrayidx2.phi, i32 1 | ||||
|   br i1 %exitcond33, label %for.cond7.preheader.preheader, label %for.body.for.body_crit_edge | ||||
|  | ||||
| for.cond7.preheader.preheader: | ||||
|   br label %for.cond7.preheader | ||||
|  | ||||
| for.body.for.body_crit_edge: | ||||
|   %.pre = load i32, i32* %arrayidx2.inc, align 4 | ||||
|   %phitmp = add i32 %i.030, 1 | ||||
|   br label %for.body | ||||
|  | ||||
| for.cond7.preheader: | ||||
|   %i.129 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond7.preheader.preheader ] | ||||
|   br label %for.body9 | ||||
|  | ||||
| for.body9: | ||||
|   %j.028 = phi i32 [ 0, %for.cond7.preheader ], [ %inc13, %for.body9 ] | ||||
|   %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 %i.129, i32 %j.028 | ||||
|   store i32 %add, i32* %arrayidx11, align 4 | ||||
|   %inc13 = add nsw i32 %j.028, 1 | ||||
|   %exitcond = icmp eq i32 %inc13, 100 | ||||
|   br i1 %exitcond, label %for.inc15, label %for.body9 | ||||
|  | ||||
| for.inc15: | ||||
|   %inc16 = add nsw i32 %i.129, 1 | ||||
|   %exitcond32 = icmp eq i32 %inc16, 100 | ||||
|   br i1 %exitcond32, label %for.end17, label %for.cond7.preheader | ||||
|  | ||||
| for.end17: | ||||
|   %3 = load i32, i32* %arrayidx1, align 8 | ||||
|   call void @llvm.lifetime.end(i64 40000, i8* %1) #1 | ||||
|   call void @llvm.lifetime.end(i64 400, i8* %0) #1 | ||||
|   ret i32 %3 | ||||
| } | ||||
|  | ||||
| declare void @llvm.lifetime.start(i64, i8* nocapture) #1 | ||||
|  | ||||
| declare void @llvm.lifetime.end(i64, i8* nocapture) #1 | ||||
		Reference in New Issue
	
	Block a user