diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ab1068dfa70..f4893932b9b 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -319,8 +319,9 @@ private: class LoopVectorizationLegality { public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL, - DominatorTree *DT) - : TheLoop(L), SE(SE), DL(DL), DT(DT), Induction(0) {} + DominatorTree *DT, TargetTransformInfo* TTI, + AliasAnalysis* AA) + : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -404,6 +405,11 @@ public: /// induction descriptor. typedef MapVector InductionList; + /// Alias(Multi)Map stores the values (GEPs or underlying objects and their + /// respective Store/Load instruction(s) to calculate aliasing. + typedef DenseMap AliasMap; + typedef DenseMap > AliasMultiMap; + /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this /// loop, only that it is legal to do so. @@ -477,6 +483,14 @@ private: InductionKind isInductionVariable(PHINode *Phi); /// Return true if can compute the address bounds of Ptr within the loop. bool hasComputableBounds(Value *Ptr); + /// Return true if there is the chance of write reorder. + bool hasPossibleGlobalWriteReorder(Value *Object, + Instruction *Inst, + AliasMultiMap &WriteObjects, + unsigned MaxByteWidth); + /// Return the AA location for a load or a store. + AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst); + /// The loop that we evaluate. Loop *TheLoop; @@ -484,8 +498,12 @@ private: ScalarEvolution *SE; /// DataLayout analysis. DataLayout *DL; - // Dominators. + /// Dominators. DominatorTree *DT; + /// Target Info. + TargetTransformInfo *TTI; + /// Alias Analysis. + AliasAnalysis *AA; // --- vectorization state --- // @@ -612,6 +630,7 @@ struct LoopVectorize : public LoopPass { LoopInfo *LI; TargetTransformInfo *TTI; DominatorTree *DT; + AliasAnalysis *AA; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -623,12 +642,13 @@ struct LoopVectorize : public LoopPass { LI = &getAnalysis(); TTI = &getAnalysis(); DT = &getAnalysis(); + AA = getAnalysisIfAvailable(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT); + LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; @@ -2275,6 +2295,42 @@ void LoopVectorizationLegality::collectLoopUniforms() { } } +AliasAnalysis::Location +LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) { + if (StoreInst *Store = dyn_cast(Inst)) + return AA->getLocation(Store); + else if (LoadInst *Load = dyn_cast(Inst)) + return AA->getLocation(Load); + + llvm_unreachable("Should be either load or store instruction"); +} + +bool +LoopVectorizationLegality::hasPossibleGlobalWriteReorder( + Value *Object, + Instruction *Inst, + AliasMultiMap& WriteObjects, + unsigned MaxByteWidth) { + + AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst); + + std::vector::iterator + it = WriteObjects[Object].begin(), + end = WriteObjects[Object].end(); + + for (; it != end; ++it) { + Instruction* I = *it; + if (I == Inst) + continue; + + AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I); + if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth), + ThatLoc.getWithNewSize(MaxByteWidth))) + return true; + } + return false; +} + bool LoopVectorizationLegality::canVectorizeMemory() { if (TheLoop->isAnnotatedParallel()) { @@ -2337,9 +2393,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return true; } - // Holds the read and read-write *pointers* that we find. - ValueVector Reads; - ValueVector ReadWrites; + // Holds the read and read-write *pointers* that we find. These maps hold + // unique values for pointers (so no need for multi-map). + AliasMap Reads; + AliasMap ReadWrites; // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -2361,7 +2418,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // If we did *not* see this pointer before, insert it to // the read-write list. At this phase it is only a 'write' list. if (Seen.insert(Ptr)) - ReadWrites.push_back(Ptr); + ReadWrites.insert(std::make_pair(Ptr, ST)); } for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { @@ -2376,7 +2433,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr)) - Reads.push_back(Ptr); + Reads.insert(std::make_pair(Ptr, LD)); } // If we write (or read-write) to a single destination and there are no @@ -2389,22 +2446,27 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; - for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) - if (hasComputableBounds(*I)) { - PtrRtCheck.insert(SE, TheLoop, *I); - DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + AliasMap::iterator MI, ME; + for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { + Value *V = (*MI).first; + if (hasComputableBounds(V)) { + PtrRtCheck.insert(SE, TheLoop, V); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; break; } - for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) - if (hasComputableBounds(*I)) { - PtrRtCheck.insert(SE, TheLoop, *I); - DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } + for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { + Value *V = (*MI).first; + if (hasComputableBounds(V)) { + PtrRtCheck.insert(SE, TheLoop, V); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; break; } + } // Check that we did not collect too many pointers or found a // unsizeable pointer. @@ -2419,47 +2481,104 @@ bool LoopVectorizationLegality::canVectorizeMemory() { bool NeedRTCheck = false; + // Biggest vectorized access possible, vector width * unroll factor. + // TODO: We're being very pessimistic here, find a way to know the + // real access width before getting here. + unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) * + TTI->getMaximumUnrollFactor(); // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. - ValueSet WriteObjects; + // Note that WriteObjects duplicates the stores (indexed now by underlying + // objects) to avoid pointing to elements inside ReadWrites. + // TODO: Maybe create a new type where they can interact without duplication. + AliasMultiMap WriteObjects; ValueVector TempObjects; // Check that the read-writes do not conflict with other read-write // pointers. bool AllWritesIdentified = true; - for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) { - GetUnderlyingObjects(*I, TempObjects, DL); - for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); - it != e; ++it) { - if (!isIdentifiedObject(*it)) { - DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); + for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { + Value *Val = (*MI).first; + Instruction *Inst = (*MI).second; + + GetUnderlyingObjects(Val, TempObjects, DL); + for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end(); + UI != UE; ++UI) { + if (!isIdentifiedObject(*UI)) { + DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n"); NeedRTCheck = true; AllWritesIdentified = false; } - if (!WriteObjects.insert(*it)) { + + // Never seen it before, can't alias. + if (WriteObjects[*UI].empty()) { + DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n"); + WriteObjects[*UI].push_back(Inst); + continue; + } + // Direct alias found. + if (!AA || dyn_cast(*UI) == NULL) { DEBUG(dbgs() << "LV: Found a possible write-write reorder:" - << **it <<"\n"); + << **UI <<"\n"); return false; } + DEBUG(dbgs() << "LV: Found a conflicting global value:" + << **UI <<"\n"); + DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n"); + DEBUG(dbgs() << "LV: On value:" << *Val <<"\n"); + + // If global alias, make sure they do alias. + if (hasPossibleGlobalWriteReorder(*UI, + Inst, + WriteObjects, + MaxByteWidth)) { + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" + << *UI <<"\n"); + return false; + } + + // Didn't alias, insert into map for further reference. + WriteObjects[*UI].push_back(Inst); } TempObjects.clear(); } /// Check that the reads don't conflict with the read-writes. - for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) { - GetUnderlyingObjects(*I, TempObjects, DL); - for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); - it != e; ++it) { + for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { + Value *Val = (*MI).first; + GetUnderlyingObjects(Val, TempObjects, DL); + for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end(); + UI != UE; ++UI) { // If all of the writes are identified then we don't care if the read // pointer is identified or not. - if (!AllWritesIdentified && !isIdentifiedObject(*it)) { - DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); + if (!AllWritesIdentified && !isIdentifiedObject(*UI)) { + DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n"); NeedRTCheck = true; } - if (WriteObjects.count(*it)) { - DEBUG(dbgs() << "LV: Found a possible read/write reorder:" - << **it <<"\n"); + + // Never seen it before, can't alias. + if (WriteObjects[*UI].empty()) + continue; + // Direct alias found. + if (!AA || dyn_cast(*UI) == NULL) { + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" + << **UI <<"\n"); + return false; + } + DEBUG(dbgs() << "LV: Found a global value: " + << **UI <<"\n"); + Instruction *Inst = (*MI).second; + DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n"); + DEBUG(dbgs() << "LV: On value:" << *Val <<"\n"); + + // If global alias, make sure they do alias. + if (hasPossibleGlobalWriteReorder(*UI, + Inst, + WriteObjects, + MaxByteWidth)) { + DEBUG(dbgs() << "LV: Found a possible read-write reorder:" + << *UI <<"\n"); return false; } } diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll new file mode 100644 index 00000000000..e641c4caa65 --- /dev/null +++ b/test/Transforms/LoopVectorize/global_alias.ll @@ -0,0 +1,356 @@ +; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" + +%struct.anon = type { [100 x i32], i32, [100 x i32] } + +@Foo = common global %struct.anon zeroinitializer, align 4 +@PB = external global i32* +@PA = external global i32* + +; int noAlias01 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @noAlias01(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 + %2 = load i32* %arrayidx, align 4 + %3 = load i32* %a.addr, align 4 + %add = add nsw i32 %2, %3 + %4 = load i32* %i, align 4 + %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 + store i32 %add, i32* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %5 = load i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %6 = load i32* %a.addr, align 4 + %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 + %7 = load i32* %arrayidx2, align 4 + ret i32 %7 +} + +; int mayAlias01 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @mayAlias01(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %sub = sub nsw i32 100, %1 + %sub1 = sub nsw i32 %sub, 1 + %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 + %2 = load i32* %arrayidx, align 4 + %3 = load i32* %a.addr, align 4 + %add = add nsw i32 %2, %3 + %4 = load i32* %i, align 4 + %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %5 = load i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %6 = load i32* %a.addr, align 4 + %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 + %7 = load i32* %arrayidx3, align 4 + ret i32 %7 +} + +; int mayAlias02 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @mayAlias02(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1 + %2 = load i32* %arrayidx, align 4 + %3 = load i32* %a.addr, align 4 + %add = add nsw i32 %2, %3 + %4 = load i32* %i, align 4 + %sub = sub nsw i32 100, %4 + %sub1 = sub nsw i32 %sub, 1 + %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1 + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %5 = load i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %6 = load i32* %a.addr, align 4 + %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 + %7 = load i32* %arrayidx3, align 4 + ret i32 %7 +} + +; int mayAlias03 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @mayAlias03(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32** @PB, align 4 + %add.ptr = getelementptr inbounds i32* %1, i32 100 + %2 = load i32* %i, align 4 + %idx.neg = sub i32 0, %2 + %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg + %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1 + %3 = load i32* %add.ptr2, align 4 + %4 = load i32* %a.addr, align 4 + %add = add nsw i32 %3, %4 + %5 = load i32** @PA, align 4 + %6 = load i32* %i, align 4 + %add.ptr3 = getelementptr inbounds i32* %5, i32 %6 + store i32 %add, i32* %add.ptr3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %8 = load i32** @PA, align 4 + %9 = load i32* %a.addr, align 4 + %add.ptr4 = getelementptr inbounds i32* %8, i32 %9 + %10 = load i32* %add.ptr4, align 4 + ret i32 %10 +} + +; int mustAlias01 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @mustAlias01(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %sub = sub nsw i32 100, %1 + %sub1 = sub nsw i32 %sub, 1 + %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 + %2 = load i32* %arrayidx, align 4 + %3 = load i32* %a.addr, align 4 + %add = add nsw i32 %2, %3 + %4 = load i32* %i, align 4 + %add2 = add nsw i32 %4, 10 + %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %5 = load i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %6 = load i32* %a.addr, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 + %7 = load i32* %arrayidx4, align 4 + ret i32 %7 +} + +; int mustAlias02 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @mustAlias02(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %sub = sub nsw i32 100, %1 + %sub1 = sub nsw i32 %sub, 10 + %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 + %2 = load i32* %arrayidx, align 4 + %3 = load i32* %a.addr, align 4 + %add = add nsw i32 %2, %3 + %4 = load i32* %i, align 4 + %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4 + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %5 = load i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %6 = load i32* %a.addr, align 4 + %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 + %7 = load i32* %arrayidx3, align 4 + ret i32 %7 +} + +; int mustAlias03 (int a) { +; int i; +; for (i=0; i +; CHECK ret + +define i32 @mustAlias03(i32 %a) nounwind { +entry: + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 100 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %sub = sub nsw i32 100, %1 + %sub1 = sub nsw i32 %sub, 10 + %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1 + %2 = load i32* %arrayidx, align 4 + %3 = load i32* %a.addr, align 4 + %add = add nsw i32 %2, %3 + %4 = load i32* %i, align 4 + %add2 = add nsw i32 %4, 10 + %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %5 = load i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %6 = load i32* %a.addr, align 4 + %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6 + %7 = load i32* %arrayidx4, align 4 + ret i32 %7 +}