diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index a9521bb581e..63ad2970f48 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -701,17 +701,81 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, if (SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty); - // FIXME: If the input value is a chrec scev, and we can prove that the value + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the - // operands (often constants). This would allow analysis of something like + // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop()); + if (!isa(BECount)) { + // Compute the extent of AR and divide it by the step value. This is + // used to determine if it's safe to extend the stride value. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedBECount = + getTruncateOrZeroExtend(BECount, Start->getType()); + if (BECount == + getTruncateOrZeroExtend(CastedBECount, BECount->getType())) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + SCEVHandle ZMul = + getMulExpr(CastedBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no unsigned overflow. + if (getZeroExtendExpr(ZMul, WideTy) == + getMulExpr(getZeroExtendExpr(CastedBECount, WideTy), + getZeroExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, ZMul); + if (getZeroExtendExpr(Add, WideTy) == + getAddExpr(getZeroExtendExpr(Start, WideTy), + getZeroExtendExpr(ZMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + AR->getLoop()); + } + + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + SCEVHandle SMul = + getMulExpr(CastedBECount, + getTruncateOrSignExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no unsigned overflow. + if (getSignExtendExpr(SMul, WideTy) == + getMulExpr(getZeroExtendExpr(CastedBECount, WideTy), + getSignExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, SMul); + if (getZeroExtendExpr(Add, WideTy) == + getAddExpr(getZeroExtendExpr(Start, WideTy), + getSignExtendExpr(SMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + } SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)]; if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty); return Result; } -SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type *Ty) { +SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); @@ -726,10 +790,54 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, const Type * if (SCEVSignExtendExpr *SS = dyn_cast(Op)) return getSignExtendExpr(SS->getOperand(), Ty); - // FIXME: If the input value is a chrec scev, and we can prove that the value + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the - // operands (often constants). This would allow analysis of something like + // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (SCEVAddRecExpr *AR = dyn_cast(Op)) + if (AR->isAffine()) { + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + SCEVHandle BECount = getBackedgeTakenCount(AR->getLoop()); + if (!isa(BECount)) { + // Compute the extent of AR and divide it by the step value. This is + // used to determine if it's safe to extend the stride value. + SCEVHandle Start = AR->getStart(); + SCEVHandle Step = AR->getStepRecurrence(*this); + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + SCEVHandle CastedBECount = + getTruncateOrZeroExtend(BECount, Start->getType()); + if (BECount == + getTruncateOrZeroExtend(CastedBECount, BECount->getType())) { + const Type *WideTy = + IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + SCEVHandle SMul = + getMulExpr(CastedBECount, + getTruncateOrSignExtend(Step, Start->getType())); + // Check whether Start+Step*BECount has no signed overflow. + if (getSignExtendExpr(SMul, WideTy) == + getMulExpr(getSignExtendExpr(CastedBECount, WideTy), + getSignExtendExpr(Step, WideTy))) { + SCEVHandle Add = getAddExpr(Start, SMul); + if (getSignExtendExpr(Add, WideTy) == + getAddExpr(getSignExtendExpr(Start, WideTy), + getSignExtendExpr(SMul, WideTy))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + AR->getLoop()); + } + } + } + } SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)]; if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty); @@ -1962,20 +2070,36 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { /// hasLoopInvariantBackedgeTakenCount). /// SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - std::map::iterator I = BackedgeTakenCounts.find(L); - if (I == BackedgeTakenCounts.end()) { + // Initially insert a CouldNotCompute for this loop. If the insertion + // succeeds, procede to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + if (Pair.second) { SCEVHandle ItCount = ComputeBackedgeTakenCount(L); - I = BackedgeTakenCounts.insert(std::make_pair(L, ItCount)).first; if (ItCount != UnknownValue) { assert(ItCount->isLoopInvariant(L) && "Computed trip count isn't loop invariant for loop!"); ++NumTripCountsComputed; + + // Now that we know the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they + // are only conservative estimates made without the benefit + // of trip count information. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast(I); ++I) + deleteValueFromRecords(PN); + + // Update the value in the map. + Pair.first->second = ItCount; } else if (isa(L->getHeader()->begin())) { // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; } } - return I->second; + return Pair.first->second; } /// forgetLoopBackedgeTakenCount - This method should be called by the diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll new file mode 100644 index 00000000000..2ba3f830fdb --- /dev/null +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -0,0 +1,244 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep and %t +; RUN: not grep movz %t +; RUN: not grep sar %t +; RUN: not grep shl %t +; RUN: grep add %t | count 6 +; RUN: grep inc %t | count 4 +; RUN: grep dec %t | count 2 +; RUN: grep lea %t | count 2 + +; Optimize away zext-inreg and sext-inreg on the loop induction +; variable using trip-count information. + +define void @count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll new file mode 100644 index 00000000000..7ccfe855a6a --- /dev/null +++ b/test/CodeGen/X86/masked-iv-unsafe.ll @@ -0,0 +1,386 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep and %t | count 6 +; RUN: grep movzb %t | count 6 +; RUN: grep sar %t | count 12 + +; Don't optimize away zext-inreg and sext-inreg on the loop induction +; variable, because it isn't safe to do so in these cases. + +define void @count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 20 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 20 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, 18446744073709551615 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_up(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @still_another_count_down(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %indvar.i8 = and i64 %indvar, 255 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %indvar.i24 = and i64 %indvar, 16777215 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = add i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 10 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + +define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind { +entry: + br label %loop + +loop: + %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ] + %s0 = shl i64 %indvar, 8 + %indvar.i8 = ashr i64 %s0, 8 + %t0 = getelementptr double* %d, i64 %indvar.i8 + %t1 = load double* %t0 + %t2 = mul double %t1, 0.1 + store double %t2, double* %t0 + %s1 = shl i64 %indvar, 24 + %indvar.i24 = ashr i64 %s1, 24 + %t3 = getelementptr double* %d, i64 %indvar.i24 + %t4 = load double* %t3 + %t5 = mul double %t4, 2.3 + store double %t5, double* %t3 + %t6 = getelementptr double* %d, i64 %indvar + %t7 = load double* %t6 + %t8 = mul double %t7, 4.5 + store double %t8, double* %t6 + %indvar.next = sub i64 %indvar, 3 + %exitcond = icmp eq i64 %indvar.next, 0 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} + + + diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll index 06e231212ed..cb2f3aa5166 100644 --- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll +++ b/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpl \$8} +; RUN: llvm-as < %s | llc --x86-asm-syntax=att | grep {cmpq \$8} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin9" @@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin9" ; happens after the relevant use, so the comparison stride can be ; easily changed. -define void @foo() { +define void @foo() nounwind { entry: br label %loop @@ -14,9 +14,11 @@ loop: %indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ] ; [#uses=1] %i.2.0.us1534 = add i32 %indvar, 1 ; [#uses=3] %tmp628.us1540 = shl i32 %i.2.0.us1534, 1 ; [#uses=1] - %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; [#uses=0] + %tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64 ; [#uses=1] + store i64 %tmp645646647.us1547, i64* null %tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4 ; [#uses=2] - %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; [#uses=0] + %tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0 ; [#uses=1] + store i32 %tmp623.us1538, i32* null br i1 %tmp611.us1535, label %exit, label %loop exit: