if loop induction variable is always sign or zero extended then

extend the type of induction variable.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56017 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Devang Patel 2008-09-09 21:41:07 +00:00
parent bd6dc7a086
commit d22a849282
2 changed files with 176 additions and 1 deletions

View File

@ -93,6 +93,8 @@ namespace {
void RewriteLoopExitValues(Loop *L, SCEV *IterationCount);
void DeleteTriviallyDeadInstructions(std::set<Instruction*> &Insts);
void OptimizeCanonicalIVType(Loop *L);
};
}
@ -597,7 +599,122 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
#endif
DeleteTriviallyDeadInstructions(DeadInsts);
OptimizeCanonicalIVType(L);
assert(L->isLCSSAForm());
return Changed;
}
/// OptimizeCanonicalIVType - If loop induction variable is always
/// sign or zero extended then extend the type of induction
/// variable.
void IndVarSimplify::OptimizeCanonicalIVType(Loop *L) {
PHINode *PH = L->getCanonicalInductionVariable();
if (!PH) return;
// Check loop iteration count.
SCEVHandle IC = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IC)) return;
SCEVConstant *IterationCount = dyn_cast<SCEVConstant>(IC);
if (!IterationCount) return;
unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
// Check IV uses. If all IV uses are either SEXT or ZEXT (except
// IV increment instruction) then this IV is suitable for this
// transformstion.
bool isSEXT = false;
BinaryOperator *Incr = NULL;
const Type *NewType = NULL;
for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end();
UI != UE; ++UI) {
const Type *CandidateType = NULL;
if (ZExtInst *ZI = dyn_cast<ZExtInst>(UI))
CandidateType = ZI->getDestTy();
else if (SExtInst *SI = dyn_cast<SExtInst>(UI)) {
CandidateType = SI->getDestTy();
isSEXT = true;
}
else if ((Incr = dyn_cast<BinaryOperator>(UI))) {
// Validate IV increment instruction.
if (PH->getIncomingValue(BackEdge) == Incr)
continue;
}
if (!CandidateType) {
NewType = NULL;
break;
}
if (!NewType)
NewType = CandidateType;
else if (NewType != CandidateType) {
NewType = NULL;
break;
}
}
// IV uses are not suitable then avoid this transformation.
if (!NewType || !Incr)
return;
// IV increment instruction has two uses, one is loop exit condition
// and second is the IV (phi node) itself.
ICmpInst *Exit = NULL;
for(Value::use_iterator II = Incr->use_begin(), IE = Incr->use_end();
II != IE; ++II) {
if (PH == *II) continue;
Exit = dyn_cast<ICmpInst>(*II);
break;
}
if (!Exit) return;
ConstantInt *EV = dyn_cast<ConstantInt>(Exit->getOperand(0));
if (!EV)
EV = dyn_cast<ConstantInt>(Exit->getOperand(1));
if (!EV) return;
// Check iteration count max value to avoid loops that wrap around IV.
APInt ICount = IterationCount->getValue()->getValue();
if (ICount.isNegative()) return;
uint32_t BW = PH->getType()->getPrimitiveSizeInBits();
APInt Max = (isSEXT ? APInt::getSignedMaxValue(BW) : APInt::getMaxValue(BW));
if (ICount.getZExtValue() > Max.getZExtValue()) return;
// Extend IV type.
SCEVExpander Rewriter(*SE, *LI);
Value *NewIV = Rewriter.getOrInsertCanonicalInductionVariable(L,NewType);
PHINode *NewPH = cast<PHINode>(NewIV);
Instruction *NewIncr = cast<Instruction>(NewPH->getIncomingValue(BackEdge));
// Replace all SEXT or ZEXT uses.
SmallVector<Instruction *, 4> PHUses;
for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end();
UI != UE; ++UI) {
Instruction *I = cast<Instruction>(UI);
PHUses.push_back(I);
}
while (!PHUses.empty()){
Instruction *Use = PHUses.back(); PHUses.pop_back();
if (Incr == Use) continue;
SE->deleteValueFromRecords(Use);
Use->replaceAllUsesWith(NewIV);
Use->eraseFromParent();
}
// Replace exit condition.
ConstantInt *NEV = ConstantInt::get(NewType, EV->getZExtValue());
Instruction *NE = new ICmpInst(Exit->getPredicate(),
NewIncr, NEV, "new.exit",
Exit->getParent()->getTerminator());
SE->deleteValueFromRecords(Exit);
Exit->replaceAllUsesWith(NE);
Exit->eraseFromParent();
// Remove old IV and increment instructions.
SE->deleteValueFromRecords(PH);
PH->removeIncomingValue((unsigned)0);
PH->removeIncomingValue((unsigned)0);
SE->deleteValueFromRecords(Incr);
Incr->eraseFromParent();
}

View File

@ -0,0 +1,58 @@
; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep sext | count 1
; ModuleID = '<stdin>'
%struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>
%struct.ComponentInstanceRecord = type <{ [1 x i32] }>
%struct.DCPredictors = type { [5 x i16] }
%struct.DecodeTable = type { i16, i16, i16, i16, i8**, i8** }
%struct.ICMDataProcRecord = type <{ i16 (i8**, i32, i32)*, i32 }>
%struct.JPEGBitStream = type { i8*, i32, i32, i32, i32, i32, %struct.App1Marker*, i8*, i32, i16, i16, i32 }
%struct.JPEGGlobals = type { [2048 x i8], %struct.JPEGBitStream, i8*, i32, i32, %struct.ComponentInstanceRecord*, %struct.ComponentInstanceRecord*, i32, %struct.OpaqueQTMLMutex*, %struct.Rect, i32, i32, %struct.SharedGlobals, %struct.DCPredictors, i8, i8, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, %struct.YUVGeneralParams, i16, i16, i32, [5 x i16*], [5 x %struct.DecodeTable*], [5 x %struct.DecodeTable*], [5 x i8], [5 x i8], [4 x [65 x i16]], [4 x %struct.DecodeTable], [4 x %struct.DecodeTable], [4 x i8*], [4 x i8*], i16, i16, i32, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, [18 x i8], [18 x i8], [18 x i8], [18 x i8], i32, i32, i8**, i8**, i8, i8, i8, i8, i16, i16, %struct.App1Marker*, i8, i8, i8, i8, i32**, i8*, i16*, i8*, i16*, i8, [3 x i8], i32, [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i16*], [3 x i16*], [3 x i8**], [3 x %struct.DecodeTable*], [3 x %struct.DecodeTable*], [3 x i32], i32, [3 x i16*], i32, i32, i32, [3 x i32], i8, i8, i8, i8, %struct.ICMDataProcRecord*, i32, i32, i8**, i8**, i8**, i8**, i32, i32, i8*, i32, i32, i16*, i16*, i8*, i32, i32, i32, i32, i32, i32, i32, [16 x <2 x i64>], [1280 x i8], i8 }
%struct.OpaqueQTMLMutex = type opaque
%struct.Rect = type { i16, i16, i16, i16 }
%struct.SharedDGlobals = type { %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable }
%struct.SharedEGlobals = type { i8**, i8**, i8**, i8** }
%struct.SharedGlobals = type { %struct.SharedEGlobals*, %struct.SharedDGlobals* }
%struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 }
@llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind {
entry:
%tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70 ; <i32*> [#uses=1]
%tmp4412 = load i32* %tmp4311, align 16 ; <i32> [#uses=2]
%tmp4613 = icmp sgt i32 %tmp4412, 0 ; <i1> [#uses=1]
br i1 %tmp4613, label %bb, label %bb49
bb: ; preds = %bb28, %entry
%component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ] ; <i16> [#uses=2]
%tmp12 = sext i16 %component.09 to i32 ; <i32> [#uses=2]
%tmp6 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12 ; <i16**> [#uses=2]
%tmp7 = load i16** %tmp6, align 4 ; <i16*> [#uses=2]
%tmp235 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12 ; <i32*> [#uses=1]
%tmp246 = load i32* %tmp235, align 4 ; <i32> [#uses=2]
%tmp267 = icmp sgt i32 %tmp246, 0 ; <i1> [#uses=1]
br i1 %tmp267, label %bb8, label %bb28
bb8: ; preds = %bb8, %bb
%indvar = phi i32 [ 0, %bb ], [ %indvar.next2, %bb8 ] ; <i32> [#uses=3]
%theDCTBufferIter.01.rec = shl i32 %indvar, 6 ; <i32> [#uses=1]
%tmp10.rec = add i32 %theDCTBufferIter.01.rec, 64 ; <i32> [#uses=1]
%tmp10 = getelementptr i16* %tmp7, i32 %tmp10.rec ; <i16*> [#uses=1]
%i.02 = trunc i32 %indvar to i16 ; <i16> [#uses=1]
%tmp13 = add i16 %i.02, 1 ; <i16> [#uses=1]
%phitmp = sext i16 %tmp13 to i32 ; <i32> [#uses=1]
%tmp26 = icmp slt i32 %phitmp, %tmp246 ; <i1> [#uses=1]
%indvar.next2 = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %tmp26, label %bb8, label %bb28
bb28: ; preds = %bb8, %bb
%theDCTBufferIter.0.lcssa = phi i16* [ %tmp7, %bb ], [ %tmp10, %bb8 ] ; <i16*> [#uses=1]
store i16* %theDCTBufferIter.0.lcssa, i16** %tmp6, align 4
%tmp37 = add i16 %component.09, 1 ; <i16> [#uses=2]
%phitmp15 = sext i16 %tmp37 to i32 ; <i32> [#uses=1]
%tmp46 = icmp slt i32 %phitmp15, 42 ; <i1> [#uses=1]
br i1 %tmp46, label %bb, label %bb49
bb49: ; preds = %bb28, %entry
ret i16 0
}