mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Teach LoopUnrollPass to respect loop unrolling hints in metadata.
[This is resubmitting r210721, which was reverted due to suspected breakage which turned out to be unrelated]. Some extra review comments were addressed. See D4090 and D4147 for more details. The Clang change that produces this metadata was committed in r210667 Patch by Mark Heffernan. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211076 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -18,8 +18,10 @@ | ||||
| #include "llvm/Analysis/ScalarEvolution.h" | ||||
| #include "llvm/Analysis/TargetTransformInfo.h" | ||||
| #include "llvm/IR/DataLayout.h" | ||||
| #include "llvm/IR/DiagnosticInfo.h" | ||||
| #include "llvm/IR/Dominators.h" | ||||
| #include "llvm/IR/IntrinsicInst.h" | ||||
| #include "llvm/IR/Metadata.h" | ||||
| #include "llvm/Support/CommandLine.h" | ||||
| #include "llvm/Support/Debug.h" | ||||
| #include "llvm/Support/raw_ostream.h" | ||||
| @@ -36,7 +38,8 @@ UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, | ||||
|  | ||||
| static cl::opt<unsigned> | ||||
| UnrollCount("unroll-count", cl::init(0), cl::Hidden, | ||||
|   cl::desc("Use this unroll count for all loops, for testing purposes")); | ||||
|   cl::desc("Use this unroll count for all loops including those with " | ||||
|            "unroll_count pragma values, for testing purposes")); | ||||
|  | ||||
| static cl::opt<bool> | ||||
| UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, | ||||
| @@ -47,6 +50,11 @@ static cl::opt<bool> | ||||
| UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, | ||||
|   cl::desc("Unroll loops with run-time trip counts")); | ||||
|  | ||||
| static cl::opt<unsigned> | ||||
| PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden, | ||||
|   cl::desc("Unrolled size limit for loops with an unroll(enable) or " | ||||
|            "unroll_count pragma.")); | ||||
|  | ||||
| namespace { | ||||
|   class LoopUnroll : public LoopPass { | ||||
|   public: | ||||
| @@ -109,6 +117,66 @@ namespace { | ||||
|       // For now, recreate dom info, if loop is unrolled. | ||||
|       AU.addPreserved<DominatorTreeWrapperPass>(); | ||||
|     } | ||||
|  | ||||
|     // Fill in the UnrollingPreferences parameter with values from the | ||||
|     // TargetTransformationInfo. | ||||
|     void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, | ||||
|                                  TargetTransformInfo::UnrollingPreferences &UP) { | ||||
|       UP.Threshold = CurrentThreshold; | ||||
|       UP.OptSizeThreshold = OptSizeUnrollThreshold; | ||||
|       UP.PartialThreshold = CurrentThreshold; | ||||
|       UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; | ||||
|       UP.Count = CurrentCount; | ||||
|       UP.MaxCount = UINT_MAX; | ||||
|       UP.Partial = CurrentAllowPartial; | ||||
|       UP.Runtime = CurrentRuntime; | ||||
|       TTI.getUnrollingPreferences(L, UP); | ||||
|     } | ||||
|  | ||||
|     // Select and return an unroll count based on parameters from | ||||
|     // user, unroll preferences, unroll pragmas, or a heuristic. | ||||
|     // SetExplicitly is set to true if the unroll count is is set by | ||||
|     // the user or a pragma rather than selected heuristically. | ||||
|     unsigned | ||||
|     selectUnrollCount(const Loop *L, unsigned TripCount, bool HasEnablePragma, | ||||
|                       unsigned PragmaCount, | ||||
|                       const TargetTransformInfo::UnrollingPreferences &UP, | ||||
|                       bool &SetExplicitly); | ||||
|  | ||||
|  | ||||
|     // Select threshold values used to limit unrolling based on a | ||||
|     // total unrolled size.  Parameters Threshold and PartialThreshold | ||||
|     // are set to the maximum unrolled size for fully and partially | ||||
|     // unrolled loops respectively. | ||||
|     void selectThresholds(const Loop *L, bool HasPragma, | ||||
|                           const TargetTransformInfo::UnrollingPreferences &UP, | ||||
|                           unsigned &Threshold, unsigned &PartialThreshold) { | ||||
|       // Determine the current unrolling threshold.  While this is | ||||
|       // normally set from UnrollThreshold, it is overridden to a | ||||
|       // smaller value if the current function is marked as | ||||
|       // optimize-for-size, and the unroll threshold was not user | ||||
|       // specified. | ||||
|       Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; | ||||
|       PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold; | ||||
|       if (!UserThreshold && | ||||
|           L->getHeader()->getParent()->getAttributes(). | ||||
|               hasAttribute(AttributeSet::FunctionIndex, | ||||
|                            Attribute::OptimizeForSize)) { | ||||
|         Threshold = UP.OptSizeThreshold; | ||||
|         PartialThreshold = UP.PartialOptSizeThreshold; | ||||
|       } | ||||
|       if (HasPragma) { | ||||
|         // If the loop has an unrolling pragma, we want to be more | ||||
|         // aggressive with unrolling limits.  Set thresholds to at | ||||
|         // least the PragmaTheshold value which is larger than the | ||||
|         // default limits. | ||||
|         if (Threshold != NoThreshold) | ||||
|           Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold); | ||||
|         if (PartialThreshold != NoThreshold) | ||||
|           PartialThreshold = | ||||
|               std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold); | ||||
|       } | ||||
|     } | ||||
|   }; | ||||
| } | ||||
|  | ||||
| @@ -151,6 +219,105 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, | ||||
|   return LoopSize; | ||||
| } | ||||
|  | ||||
| // Returns the value associated with the given metadata node name (for | ||||
| // example, "llvm.loopunroll.count").  If no such named metadata node | ||||
| // exists, then nullptr is returned. | ||||
| static const ConstantInt *GetUnrollMetadataValue(const Loop *L, | ||||
|                                                  StringRef Name) { | ||||
|   MDNode *LoopID = L->getLoopID(); | ||||
|   if (!LoopID) return nullptr; | ||||
|  | ||||
|   // First operand should refer to the loop id itself. | ||||
|   assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); | ||||
|   assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); | ||||
|  | ||||
|   for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { | ||||
|     const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); | ||||
|     if (!MD) continue; | ||||
|  | ||||
|     const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); | ||||
|     if (!S) continue; | ||||
|  | ||||
|     if (Name.equals(S->getString())) { | ||||
|       assert(MD->getNumOperands() == 2 && | ||||
|              "Unroll hint metadata should have two operands."); | ||||
|       return cast<ConstantInt>(MD->getOperand(1)); | ||||
|     } | ||||
|   } | ||||
|   return nullptr; | ||||
| } | ||||
|  | ||||
| // Returns true if the loop has an unroll(enable) pragma. | ||||
| static bool HasUnrollEnablePragma(const Loop *L) { | ||||
|   const ConstantInt *EnableValue = | ||||
|       GetUnrollMetadataValue(L, "llvm.loopunroll.enable"); | ||||
|   return (EnableValue && EnableValue->getZExtValue()); | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| // Returns true if the loop has an unroll(disable) pragma. | ||||
| static bool HasUnrollDisablePragma(const Loop *L) { | ||||
|   const ConstantInt *EnableValue = | ||||
|       GetUnrollMetadataValue(L, "llvm.loopunroll.enable"); | ||||
|   return (EnableValue && !EnableValue->getZExtValue()); | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| // If loop has an unroll_count pragma return the (necessarily | ||||
| // positive) value from the pragma.  Otherwise return 0. | ||||
| static unsigned UnrollCountPragmaValue(const Loop *L) { | ||||
|   const ConstantInt *CountValue = | ||||
|       GetUnrollMetadataValue(L, "llvm.loopunroll.count"); | ||||
|   if (CountValue) { | ||||
|     unsigned Count = CountValue->getZExtValue(); | ||||
|     assert(Count >= 1 && "Unroll count must be positive."); | ||||
|     return Count; | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| unsigned LoopUnroll::selectUnrollCount( | ||||
|     const Loop *L, unsigned TripCount, bool HasEnablePragma, | ||||
|     unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP, | ||||
|     bool &SetExplicitly) { | ||||
|   SetExplicitly = true; | ||||
|  | ||||
|   // User-specified count (either as a command-line option or | ||||
|   // constructor parameter) has highest precedence. | ||||
|   unsigned Count = UserCount ? CurrentCount : 0; | ||||
|  | ||||
|   // If there is no user-specified count, unroll pragmas have the next | ||||
|   // highest precendence. | ||||
|   if (Count == 0) { | ||||
|     if (PragmaCount) { | ||||
|       Count = PragmaCount; | ||||
|     } else if (HasEnablePragma) { | ||||
|       // unroll(enable) pragma without an unroll_count pragma | ||||
|       // indicates to unroll loop fully. | ||||
|       Count = TripCount; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (Count == 0) | ||||
|     Count = UP.Count; | ||||
|  | ||||
|   if (Count == 0) { | ||||
|     SetExplicitly = false; | ||||
|     if (TripCount == 0) | ||||
|       // Runtime trip count. | ||||
|       Count = UnrollRuntimeCount; | ||||
|     else | ||||
|       // Conservative heuristic: if we know the trip count, see if we can | ||||
|       // completely unroll (subject to the threshold, checked below); otherwise | ||||
|       // try to find greatest modulo of the trip count which is still under | ||||
|       // threshold value. | ||||
|       Count = TripCount; | ||||
|   } | ||||
|   if (TripCount && Count > TripCount) | ||||
|     return TripCount; | ||||
|   return Count; | ||||
| } | ||||
|  | ||||
| bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { | ||||
|   if (skipOptnoneFunction(L)) | ||||
|     return false; | ||||
| @@ -162,33 +329,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { | ||||
|   BasicBlock *Header = L->getHeader(); | ||||
|   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() | ||||
|         << "] Loop %" << Header->getName() << "\n"); | ||||
|   (void)Header; | ||||
|  | ||||
|   if (HasUnrollDisablePragma(L)) { | ||||
|     return false; | ||||
|   } | ||||
|   bool HasEnablePragma = HasUnrollEnablePragma(L); | ||||
|   unsigned PragmaCount = UnrollCountPragmaValue(L); | ||||
|   bool HasPragma = HasEnablePragma || PragmaCount > 0; | ||||
|  | ||||
|   TargetTransformInfo::UnrollingPreferences UP; | ||||
|   UP.Threshold = CurrentThreshold; | ||||
|   UP.OptSizeThreshold = OptSizeUnrollThreshold; | ||||
|   UP.PartialThreshold = CurrentThreshold; | ||||
|   UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; | ||||
|   UP.Count = CurrentCount; | ||||
|   UP.MaxCount = UINT_MAX; | ||||
|   UP.Partial = CurrentAllowPartial; | ||||
|   UP.Runtime = CurrentRuntime; | ||||
|   TTI.getUnrollingPreferences(L, UP); | ||||
|  | ||||
|   // Determine the current unrolling threshold.  While this is normally set | ||||
|   // from UnrollThreshold, it is overridden to a smaller value if the current | ||||
|   // function is marked as optimize-for-size, and the unroll threshold was | ||||
|   // not user specified. | ||||
|   unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; | ||||
|   unsigned PartialThreshold = | ||||
|     UserThreshold ? CurrentThreshold : UP.PartialThreshold; | ||||
|   if (!UserThreshold && | ||||
|       Header->getParent()->getAttributes(). | ||||
|         hasAttribute(AttributeSet::FunctionIndex, | ||||
|                      Attribute::OptimizeForSize)) { | ||||
|     Threshold = UP.OptSizeThreshold; | ||||
|     PartialThreshold = UP.PartialOptSizeThreshold; | ||||
|   } | ||||
|   getUnrollingPreferences(L, TTI, UP); | ||||
|  | ||||
|   // Find trip count and trip multiple if count is not available | ||||
|   unsigned TripCount = 0; | ||||
| @@ -202,79 +352,117 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { | ||||
|     TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); | ||||
|   } | ||||
|  | ||||
|   bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime; | ||||
|   // Select an initial unroll count.  This may be reduced later based | ||||
|   // on size thresholds. | ||||
|   bool CountSetExplicitly; | ||||
|   unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount, | ||||
|                                      UP, CountSetExplicitly); | ||||
|  | ||||
|   // Use a default unroll-count if the user doesn't specify a value | ||||
|   // and the trip count is a run-time value.  The default is different | ||||
|   // for run-time or compile-time trip count loops. | ||||
|   unsigned Count = UserCount ? CurrentCount : UP.Count; | ||||
|   if (Runtime && Count == 0 && TripCount == 0) | ||||
|     Count = UnrollRuntimeCount; | ||||
|  | ||||
|   if (Count == 0) { | ||||
|     // Conservative heuristic: if we know the trip count, see if we can | ||||
|     // completely unroll (subject to the threshold, checked below); otherwise | ||||
|     // try to find greatest modulo of the trip count which is still under | ||||
|     // threshold value. | ||||
|     if (TripCount == 0) | ||||
|       return false; | ||||
|     Count = TripCount; | ||||
|   unsigned NumInlineCandidates; | ||||
|   bool notDuplicatable; | ||||
|   unsigned LoopSize = | ||||
|       ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI); | ||||
|   DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n"); | ||||
|   uint64_t UnrolledSize = (uint64_t)LoopSize * Count; | ||||
|   if (notDuplicatable) { | ||||
|     DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable" | ||||
|                  << " instructions.\n"); | ||||
|     return false; | ||||
|   } | ||||
|   if (NumInlineCandidates != 0) { | ||||
|     DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n"); | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   // Enforce the threshold. | ||||
|   if (Threshold != NoThreshold && PartialThreshold != NoThreshold) { | ||||
|     unsigned NumInlineCandidates; | ||||
|     bool notDuplicatable; | ||||
|     unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, | ||||
|                                             notDuplicatable, TTI); | ||||
|     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n"); | ||||
|     if (notDuplicatable) { | ||||
|       DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable" | ||||
|             << " instructions.\n"); | ||||
|       return false; | ||||
|     } | ||||
|     if (NumInlineCandidates != 0) { | ||||
|       DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n"); | ||||
|       return false; | ||||
|     } | ||||
|     uint64_t Size = (uint64_t)LoopSize*Count; | ||||
|     if (TripCount != 1 && | ||||
|         (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { | ||||
|       if (Size > Threshold) | ||||
|         DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count | ||||
|                      << " because size: " << Size << ">" << Threshold << "\n"); | ||||
|   unsigned Threshold, PartialThreshold; | ||||
|   selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold); | ||||
|  | ||||
|       bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; | ||||
|       if (!AllowPartial && !(Runtime && TripCount == 0)) { | ||||
|         DEBUG(dbgs() << "  will not try to unroll partially because " | ||||
|               << "-unroll-allow-partial not given\n"); | ||||
|         return false; | ||||
|       } | ||||
|       if (TripCount) { | ||||
|         // Reduce unroll count to be modulo of TripCount for partial unrolling | ||||
|         Count = PartialThreshold / LoopSize; | ||||
|         while (Count != 0 && TripCount%Count != 0) | ||||
|           Count--; | ||||
|       } | ||||
|       else if (Runtime) { | ||||
|         // Reduce unroll count to be a lower power-of-two value | ||||
|         while (Count != 0 && Size > PartialThreshold) { | ||||
|           Count >>= 1; | ||||
|           Size = LoopSize*Count; | ||||
|         } | ||||
|       } | ||||
|       if (Count > UP.MaxCount) | ||||
|         Count = UP.MaxCount; | ||||
|       if (Count < 2) { | ||||
|         DEBUG(dbgs() << "  could not unroll partially\n"); | ||||
|         return false; | ||||
|       } | ||||
|       DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n"); | ||||
|   // Given Count, TripCount and thresholds determine the type of | ||||
|   // unrolling which is to be performed. | ||||
|   enum { Full = 0, Partial = 1, Runtime = 2 }; | ||||
|   int Unrolling; | ||||
|   if (TripCount && Count == TripCount) { | ||||
|     if (Threshold != NoThreshold && UnrolledSize > Threshold) { | ||||
|       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count | ||||
|                    << " because size: " << UnrolledSize << ">" << Threshold | ||||
|                    << "\n"); | ||||
|       Unrolling = Partial; | ||||
|     } else { | ||||
|       Unrolling = Full; | ||||
|     } | ||||
|   } else if (TripCount && Count < TripCount) { | ||||
|     Unrolling = Partial; | ||||
|   } else { | ||||
|     Unrolling = Runtime; | ||||
|   } | ||||
|  | ||||
|   // Reduce count based on the type of unrolling and the threshold values. | ||||
|   unsigned OriginalCount = Count; | ||||
|   bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; | ||||
|   if (Unrolling == Partial) { | ||||
|     bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; | ||||
|     if (!AllowPartial && !CountSetExplicitly) { | ||||
|       DEBUG(dbgs() << "  will not try to unroll partially because " | ||||
|                    << "-unroll-allow-partial not given\n"); | ||||
|       return false; | ||||
|     } | ||||
|     if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) { | ||||
|       // Reduce unroll count to be modulo of TripCount for partial unrolling. | ||||
|       Count = PartialThreshold / LoopSize; | ||||
|       while (Count != 0 && TripCount % Count != 0) | ||||
|         Count--; | ||||
|     } | ||||
|   } else if (Unrolling == Runtime) { | ||||
|     if (!AllowRuntime && !CountSetExplicitly) { | ||||
|       DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count " | ||||
|                    << "-unroll-runtime not given\n"); | ||||
|       return false; | ||||
|     } | ||||
|     // Reduce unroll count to be the largest power-of-two factor of | ||||
|     // the original count which satisfies the threshold limit. | ||||
|     while (Count != 0 && UnrolledSize > PartialThreshold) { | ||||
|       Count >>= 1; | ||||
|       UnrolledSize = LoopSize * Count; | ||||
|     } | ||||
|     if (Count > UP.MaxCount) | ||||
|       Count = UP.MaxCount; | ||||
|     DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n"); | ||||
|   } | ||||
|  | ||||
|   if (HasPragma) { | ||||
|     // Emit optimization remarks if we are unable to unroll the loop | ||||
|     // as directed by a pragma. | ||||
|     DebugLoc LoopLoc = L->getStartLoc(); | ||||
|     Function *F = Header->getParent(); | ||||
|     LLVMContext &Ctx = F->getContext(); | ||||
|     if (HasEnablePragma && PragmaCount == 0) { | ||||
|       if (TripCount && Count != TripCount) { | ||||
|         emitOptimizationRemarkMissed( | ||||
|             Ctx, DEBUG_TYPE, *F, LoopLoc, | ||||
|             "Unable to fully unroll loop as directed by unroll(enable) pragma " | ||||
|             "because unrolled size is too large."); | ||||
|       } else if (!TripCount) { | ||||
|         emitOptimizationRemarkMissed( | ||||
|             Ctx, DEBUG_TYPE, *F, LoopLoc, | ||||
|             "Unable to fully unroll loop as directed by unroll(enable) pragma " | ||||
|             "because loop has a runtime trip count."); | ||||
|       } | ||||
|     } else if (PragmaCount > 0 && Count != OriginalCount) { | ||||
|       emitOptimizationRemarkMissed( | ||||
|           Ctx, DEBUG_TYPE, *F, LoopLoc, | ||||
|           "Unable to unroll loop the number of times directed by " | ||||
|           "unroll_count pragma because unrolled size is too large."); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (Unrolling != Full && Count < 2) { | ||||
|     // Partial unrolling by 1 is a nop.  For full unrolling, a factor | ||||
|     // of 1 makes sense because loop control can be eliminated. | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   // Unroll the loop. | ||||
|   if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, this, &LPM)) | ||||
|   if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM)) | ||||
|     return false; | ||||
|  | ||||
|   return true; | ||||
|   | ||||
							
								
								
									
										285
									
								
								test/Transforms/LoopUnroll/unroll-pragmas.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										285
									
								
								test/Transforms/LoopUnroll/unroll-pragmas.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,285 @@ | ||||
| ; RUN: opt < %s -loop-unroll -S | FileCheck %s | ||||
|  | ||||
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||||
| target triple = "x86_64-unknown-linux-gnu" | ||||
|  | ||||
| ; loop4 contains a small loop which should be completely unrolled by | ||||
| ; the default unrolling heuristics.  It serves as a control for the | ||||
| ; unroll(disable) pragma test loop4_with_disable. | ||||
| ; | ||||
| ; CHECK-LABEL: @loop4( | ||||
| ; CHECK-NOT: br i1 | ||||
| define void @loop4(i32* nocapture %a) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 4 | ||||
|   br i1 %exitcond, label %for.end, label %for.body | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
|  | ||||
| ; #pragma clang loop unroll(disable) | ||||
| ; | ||||
| ; CHECK-LABEL: @loop4_with_disable( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK-NOT: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @loop4_with_disable(i32* nocapture %a) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 4 | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
| !1 = metadata !{metadata !1, metadata !2} | ||||
| !2 = metadata !{metadata !"llvm.loopunroll.enable", i1 false} | ||||
|  | ||||
| ; loop64 has a high enough count that it should *not* be unrolled by | ||||
| ; the default unrolling heuristic.  It serves as the control for the | ||||
| ; unroll(enable) pragma test loop64_with_.* tests below. | ||||
| ; | ||||
| ; CHECK-LABEL: @loop64( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK-NOT: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @loop64(i32* nocapture %a) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 64 | ||||
|   br i1 %exitcond, label %for.end, label %for.body | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
|  | ||||
| ; #pragma clang loop unroll(enable) | ||||
| ; Loop should be fully unrolled. | ||||
| ; | ||||
| ; CHECK-LABEL: @loop64_with_enable( | ||||
| ; CHECK-NOT: br i1 | ||||
| define void @loop64_with_enable(i32* nocapture %a) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 64 | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
| !3 = metadata !{metadata !3, metadata !4} | ||||
| !4 = metadata !{metadata !"llvm.loopunroll.enable", i1 true} | ||||
|  | ||||
| ; #pragma clang loop unroll_count(4) | ||||
| ; Loop should be unrolled 4 times. | ||||
| ; | ||||
| ; CHECK-LABEL: @loop64_with_count4( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK-NOT: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @loop64_with_count4(i32* nocapture %a) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 64 | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
| !5 = metadata !{metadata !5, metadata !6} | ||||
| !6 = metadata !{metadata !"llvm.loopunroll.count", i32 4} | ||||
|  | ||||
|  | ||||
| ; #pragma clang loop unroll_count(enable) unroll_count(4) | ||||
| ; Loop should be unrolled 4 times. | ||||
| ; | ||||
| ; CHECK-LABEL: @loop64_with_enable_and_count4( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK-NOT: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @loop64_with_enable_and_count4(i32* nocapture %a) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 64 | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
| !7 = metadata !{metadata !7, metadata !6, metadata !4} | ||||
|  | ||||
| ; #pragma clang loop unroll_count(enable) | ||||
| ; Full unrolling is requested, but loop has a dynamic trip count so | ||||
| ; no unrolling should occur. | ||||
| ; | ||||
| ; CHECK-LABEL: @dynamic_loop_with_enable( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK-NOT: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) { | ||||
| entry: | ||||
|   %cmp3 = icmp sgt i32 %b, 0 | ||||
|   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 | ||||
|  | ||||
| for.body:                                         ; preds = %entry, %for.body | ||||
|   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %lftr.wideiv = trunc i64 %indvars.iv.next to i32 | ||||
|   %exitcond = icmp eq i32 %lftr.wideiv, %b | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body, %entry | ||||
|   ret void | ||||
| } | ||||
| !8 = metadata !{metadata !8, metadata !4} | ||||
|  | ||||
| ; #pragma clang loop unroll_count(4) | ||||
| ; Loop has a dynamic trip count.  Unrolling should occur, but no | ||||
| ; conditional branches can be removed. | ||||
| ; | ||||
| ; CHECK-LABEL: @dynamic_loop_with_count4( | ||||
| ; CHECK-NOT: store | ||||
| ; CHECK: br i1 | ||||
| ; CHECK: store | ||||
| ; CHECK: br i1 | ||||
| ; CHECK: store | ||||
| ; CHECK: br i1 | ||||
| ; CHECK: store | ||||
| ; CHECK: br i1 | ||||
| ; CHECK: store | ||||
| ; CHECK: br i1 | ||||
| ; CHECK-NOT: br i1 | ||||
| define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) { | ||||
| entry: | ||||
|   %cmp3 = icmp sgt i32 %b, 0 | ||||
|   br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 | ||||
|  | ||||
| for.body:                                         ; preds = %entry, %for.body | ||||
|   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %lftr.wideiv = trunc i64 %indvars.iv.next to i32 | ||||
|   %exitcond = icmp eq i32 %lftr.wideiv, %b | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body, %entry | ||||
|   ret void | ||||
| } | ||||
| !9 = metadata !{metadata !9, metadata !6} | ||||
|  | ||||
| ; #pragma clang loop unroll_count(1) | ||||
| ; Loop should not be unrolled | ||||
| ; | ||||
| ; CHECK-LABEL: @unroll_1( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK-NOT: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @unroll_1(i32* nocapture %a, i32 %b) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 4 | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
| !10 = metadata !{metadata !10, metadata !11} | ||||
| !11 = metadata !{metadata !"llvm.loopunroll.count", i32 1} | ||||
|  | ||||
| ; #pragma clang loop unroll(enable) | ||||
| ; Loop has very high loop count (1 million) and full unrolling was requested. | ||||
| ; Loop should unrolled up to the pragma threshold, but not completely. | ||||
| ; | ||||
| ; CHECK-LABEL: @unroll_1M( | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: store i32 | ||||
| ; CHECK: br i1 | ||||
| define void @unroll_1M(i32* nocapture %a, i32 %b) { | ||||
| entry: | ||||
|   br label %for.body | ||||
|  | ||||
| for.body:                                         ; preds = %for.body, %entry | ||||
|   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||||
|   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv | ||||
|   %0 = load i32* %arrayidx, align 4 | ||||
|   %inc = add nsw i32 %0, 1 | ||||
|   store i32 %inc, i32* %arrayidx, align 4 | ||||
|   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
|   %exitcond = icmp eq i64 %indvars.iv.next, 1000000 | ||||
|   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 | ||||
|  | ||||
| for.end:                                          ; preds = %for.body | ||||
|   ret void | ||||
| } | ||||
| !12 = metadata !{metadata !12, metadata !4} | ||||
		Reference in New Issue
	
	Block a user