Introduce a pointertracking pass.

For now this only computes the allocated size of the memory pointed to by a
pointer, and offset a pointer from allocated pointer.
The actual checkLimits part will come later, after another round of review.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75657 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Torok Edwin 2009-07-14 18:44:28 +00:00
parent 91bc4936a2
commit 969f28dfb6
5 changed files with 482 additions and 0 deletions

View File

@ -0,0 +1,132 @@
//===- PointerTracking.h - Pointer Bounds Tracking --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements tracking of pointer bounds.
// It knows that the libc functions "calloc" and "realloc" allocate memory, thus
// you should avoid using this pass if they mean something else for your
// language.
//
// All methods assume that the pointer is not NULL, if it is then the returned
// allocation size is wrong, and the result from checkLimits is wrong too.
// It also assumes that pointers are valid, and that it is not analyzing a
// use-after-free scenario.
// Due to these limitations the "size" returned by these methods should be
// considered as either 0 or the returned size.
//
// Another analysis pass should be used to find use-after-free/NULL dereference
// bugs.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_POINTERTRACKING_H
#define LLVM_ANALYSIS_POINTERTRACKING_H
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Support/PredIteratorCache.h"
namespace llvm {
class DominatorTree;
class ScalarEvolution;
class SCEV;
class Loop;
class LoopInfo;
class TargetData;
// Result from solver, assuming pointer is not NULL,
// and it is not a use-after-free situation.
enum SolverResult {
AlwaysFalse,// always false with above constraints
AlwaysTrue,// always true with above constraints
Unknown // it can sometimes be true, sometimes false, or it is undecided
};
class PointerTracking : public FunctionPass {
public:
typedef ICmpInst::Predicate Predicate;
static char ID;
PointerTracking();
virtual bool doInitialization(Module &M);
// If this pointer directly points to an allocation, return
// the number of elements of type Ty allocated.
// Otherwise return CouldNotCompute.
// Since allocations can fail by returning NULL, the real element count
// for every allocation is either 0 or the value returned by this function.
const SCEV *getAllocationElementCount(Value *P) const;
// Same as getAllocationSize() but returns size in bytes.
// We consider one byte as 8 bits.
const SCEV *getAllocationSizeInBytes(Value *V) const;
// Given a Pointer, determine a base pointer of known size, and an offset
// therefrom.
// When unable to determine, sets Base to NULL, and Limit/Offset to
// CouldNotCompute.
// BaseSize, and Offset are in bytes: Pointer == Base + Offset
void getPointerOffset(Value *Pointer, Value *&Base, const SCEV *& BaseSize,
const SCEV *&Offset) const;
// Compares the 2 scalar evolution expressions according to predicate,
// and if it can prove that the result is always true or always false
// return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown.
enum SolverResult compareSCEV(const SCEV *A, Predicate Pred, const SCEV *B,
const Loop *L);
// Determines whether the condition LHS <Pred> RHS is sufficient
// for the condition A <Pred> B to hold.
// Currently only ULT/ULE is supported.
// This errs on the side of returning false.
bool conditionSufficient(const SCEV *LHS, Predicate Pred1, const SCEV *RHS,
const SCEV *A, Predicate Pred2, const SCEV *B,
const Loop *L);
// Determines whether Offset is known to be always in [0, Limit) bounds.
// This errs on the side of returning Unknown.
enum SolverResult checkLimits(const SCEV *Offset, const SCEV *Limit,
BasicBlock *BB);
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
void print(raw_ostream &OS, const Module* = 0) const;
virtual void print(std::ostream &OS, const Module* = 0) const;
private:
Function *FF;
TargetData *TD;
ScalarEvolution *SE;
LoopInfo *LI;
DominatorTree *DT;
Function *callocFunc;
Function *reallocFunc;
PredIteratorCache predCache;
SmallPtrSet<const SCEV*, 1> analyzing;
enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred,
const SCEV *A, const SCEV *B) const;
static bool isMonotonic(const SCEV *S);
bool scevPositive(const SCEV *A, const Loop *L, bool strict=true) const;
bool conditionSufficient(Value *Cond, bool negated,
const SCEV *A, Predicate Pred, const SCEV *B);
Value *getConditionToReach(BasicBlock *A,
DomTreeNodeBase<BasicBlock> *B,
bool &negated);
Value *getConditionToReach(BasicBlock *A,
BasicBlock *B,
bool &negated);
const SCEV *computeAllocationCount(Value *P, const Type *&Ty) const;
const SCEV *computeAllocationCountForType(Value *P, const Type *Ty) const;
};
}
#endif

View File

@ -20,6 +20,7 @@
#include "llvm/Analysis/IntervalPartition.h"
#include "llvm/Analysis/LoopVR.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PointerTracking.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Assembly/PrintModulePass.h"
@ -136,6 +137,7 @@ namespace {
(void)new llvm::FindUsedTypes();
(void)new llvm::ScalarEvolution();
(void)new llvm::LoopVR();
(void)new llvm::PointerTracking();
((llvm::Function*)0)->viewCFGOnly();
llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0);
X.add((llvm::Value*)0, 0); // for -print-alias-sets

View File

@ -0,0 +1,261 @@
//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements tracking of pointer bounds.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PointerTracking.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
#include "llvm/Value.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
namespace llvm {
char PointerTracking::ID=0;
PointerTracking::PointerTracking() : FunctionPass(&ID) {}
bool PointerTracking::runOnFunction(Function &F) {
predCache.clear();
assert(analyzing.empty());
FF = &F;
TD = getAnalysisIfAvailable<TargetData>();
SE = &getAnalysis<ScalarEvolution>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
return false;
}
void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<DominatorTree>();
AU.addRequiredTransitive<LoopInfo>();
AU.addRequiredTransitive<ScalarEvolution>();
AU.setPreservesAll();
}
bool PointerTracking::doInitialization(Module &M) {
const Type *PTy = PointerType::getUnqual(Type::Int8Ty);
// Find calloc(i64, i64) or calloc(i32, i32).
callocFunc = M.getFunction("calloc");
if (callocFunc) {
const FunctionType *Ty = callocFunc->getFunctionType();
std::vector<const Type*> args, args2;
args.push_back(Type::Int64Ty);
args.push_back(Type::Int64Ty);
args2.push_back(Type::Int32Ty);
args2.push_back(Type::Int32Ty);
const FunctionType *Calloc1Type =
FunctionType::get(PTy, args, false);
const FunctionType *Calloc2Type =
FunctionType::get(PTy, args2, false);
if (Ty != Calloc1Type && Ty != Calloc2Type)
callocFunc = 0; // Give up
}
// Find realloc(i8*, i64) or realloc(i8*, i32).
reallocFunc = M.getFunction("realloc");
if (reallocFunc) {
const FunctionType *Ty = reallocFunc->getFunctionType();
std::vector<const Type*> args, args2;
args.push_back(PTy);
args.push_back(Type::Int64Ty);
args2.push_back(PTy);
args2.push_back(Type::Int32Ty);
const FunctionType *Realloc1Type =
FunctionType::get(PTy, args, false);
const FunctionType *Realloc2Type =
FunctionType::get(PTy, args2, false);
if (Ty != Realloc1Type && Ty != Realloc2Type)
reallocFunc = 0; // Give up
}
return false;
}
// Calculates the number of elements allocated for pointer P,
// the type of the element is stored in Ty.
const SCEV *PointerTracking::computeAllocationCount(Value *P,
const Type *&Ty) const {
Value *V = P->stripPointerCasts();
if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
Value *arraySize = AI->getArraySize();
Ty = AI->getAllocatedType();
// arraySize elements of type Ty.
return SE->getSCEV(arraySize);
}
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (GV->hasDefinitiveInitializer()) {
Constant *C = GV->getInitializer();
if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
Ty = ATy->getElementType();
return SE->getConstant(Type::Int32Ty, ATy->getNumElements());
}
}
Ty = GV->getType();
return SE->getConstant(Type::Int32Ty, 1);
//TODO: implement more tracking for globals
}
if (CallInst *CI = dyn_cast<CallInst>(V)) {
CallSite CS(CI);
Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
const Loop *L = LI->getLoopFor(CI->getParent());
if (F == callocFunc) {
Ty = Type::Int8Ty;
// calloc allocates arg0*arg1 bytes.
return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)),
SE->getSCEV(CS.getArgument(1))),
L);
} else if (F == reallocFunc) {
Ty = Type::Int8Ty;
// realloc allocates arg1 bytes.
return SE->getSCEVAtScope(CS.getArgument(1), L);
}
}
return SE->getCouldNotCompute();
}
// Calculates the number of elements of type Ty allocated for P.
const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
const Type *Ty)
const {
const Type *elementTy;
const SCEV *Count = computeAllocationCount(P, elementTy);
if (isa<SCEVCouldNotCompute>(Count))
return Count;
if (elementTy == Ty)
return Count;
if (!TD) // need TargetData from this point forward
return SE->getCouldNotCompute();
uint64_t elementSize = TD->getTypeAllocSize(elementTy);
uint64_t wantSize = TD->getTypeAllocSize(Ty);
if (elementSize == wantSize)
return Count;
if (elementSize % wantSize) //fractional counts not possible
return SE->getCouldNotCompute();
return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
elementSize/wantSize));
}
const SCEV *PointerTracking::getAllocationElementCount(Value *V) const {
// We only deal with pointers.
const PointerType *PTy = cast<PointerType>(V->getType());
return computeAllocationCountForType(V, PTy->getElementType());
}
const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const {
return computeAllocationCountForType(V, Type::Int8Ty);
}
// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too
enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
Predicate Pred,
const SCEV *A,
const SCEV *B) const {
if (SE->isLoopGuardedByCond(L, Pred, A, B))
return AlwaysTrue;
Pred = ICmpInst::getSwappedPredicate(Pred);
if (SE->isLoopGuardedByCond(L, Pred, B, A))
return AlwaysTrue;
Pred = ICmpInst::getInversePredicate(Pred);
if (SE->isLoopGuardedByCond(L, Pred, B, A))
return AlwaysFalse;
Pred = ICmpInst::getSwappedPredicate(Pred);
if (SE->isLoopGuardedByCond(L, Pred, A, B))
return AlwaysTrue;
return Unknown;
}
enum SolverResult PointerTracking::checkLimits(const SCEV *Offset,
const SCEV *Limit,
BasicBlock *BB)
{
//FIXME: merge implementation
return Unknown;
}
void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base,
const SCEV *&Limit,
const SCEV *&Offset) const
{
Pointer = Pointer->stripPointerCasts();
Base = Pointer->getUnderlyingObject();
Limit = getAllocationSizeInBytes(Base);
if (isa<SCEVCouldNotCompute>(Limit)) {
Base = 0;
Offset = Limit;
return;
}
Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base));
if (isa<SCEVCouldNotCompute>(Offset)) {
Base = 0;
Limit = Offset;
}
}
void PointerTracking::print(raw_ostream &OS, const Module* M) const {
// Calling some PT methods may cause caches to be updated, however
// this should be safe for the same reason its safe for SCEV.
PointerTracking &PT = *const_cast<PointerTracking*>(this);
for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
if (!isa<PointerType>(I->getType()))
continue;
Value *Base;
const SCEV *Limit, *Offset;
getPointerOffset(&*I, Base, Limit, Offset);
if (!Base)
continue;
if (Base == &*I) {
const SCEV *S = getAllocationElementCount(Base);
OS << *Base << " ==> " << *S << " elements, ";
OS << *Limit << " bytes allocated\n";
continue;
}
OS << &*I << " -- base: " << *Base;
OS << " offset: " << *Offset;
enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent());
switch (res) {
case AlwaysTrue:
OS << " always safe\n";
break;
case AlwaysFalse:
OS << " always unsafe\n";
break;
case Unknown:
OS << " <<unknown>>\n";
break;
}
}
}
void PointerTracking::print(std::ostream &o, const Module* M) const {
raw_os_ostream OS(o);
print(OS, M);
}
static RegisterPass<PointerTracking> X("pointertracking",
"Track pointer bounds", false, true);
}

View File

@ -0,0 +1,3 @@
load_lib llvm.exp
RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]

View File

@ -0,0 +1,84 @@
; RUN: llvm-as < %s | opt -pointertracking -analyze | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
@.str = internal constant [5 x i8] c"1234\00" ; <[5 x i8]*> [#uses=1]
@test1p = global i8* getelementptr ([5 x i8]* @.str, i32 0, i32 0), align 8 ; <i8**> [#uses=1]
@test1a = global [5 x i8] c"1234\00", align 1 ; <[5 x i8]*> [#uses=1]
@test2a = global [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4 ; <[5 x i32]*> [#uses=2]
@test2p = global i32* getelementptr ([5 x i32]* @test2a, i32 0, i32 0), align 8 ; <i32**> [#uses=1]
@test0p = common global i32* null, align 8 ; <i32**> [#uses=1]
@test0i = common global i32 0, align 4 ; <i32*> [#uses=1]
define i32 @foo0() nounwind {
entry:
%tmp = load i32** @test0p ; <i32*> [#uses=1]
%conv = bitcast i32* %tmp to i8* ; <i8*> [#uses=1]
%call = tail call i32 @bar(i8* %conv) nounwind ; <i32> [#uses=1]
%tmp1 = load i8** @test1p ; <i8*> [#uses=1]
%call2 = tail call i32 @bar(i8* %tmp1) nounwind ; <i32> [#uses=1]
%call3 = tail call i32 @bar(i8* getelementptr ([5 x i8]* @test1a, i32 0, i32 0)) nounwind ; <i32> [#uses=1]
%call5 = tail call i32 @bar(i8* bitcast ([5 x i32]* @test2a to i8*)) nounwind ; <i32> [#uses=1]
%tmp7 = load i32** @test2p ; <i32*> [#uses=1]
%conv8 = bitcast i32* %tmp7 to i8* ; <i8*> [#uses=1]
%call9 = tail call i32 @bar(i8* %conv8) nounwind ; <i32> [#uses=1]
%call11 = tail call i32 @bar(i8* bitcast (i32* @test0i to i8*)) nounwind ; <i32> [#uses=1]
%add = add i32 %call2, %call ; <i32> [#uses=1]
%add4 = add i32 %add, %call3 ; <i32> [#uses=1]
%add6 = add i32 %add4, %call5 ; <i32> [#uses=1]
%add10 = add i32 %add6, %call9 ; <i32> [#uses=1]
%add12 = add i32 %add10, %call11 ; <i32> [#uses=1]
ret i32 %add12
}
declare i32 @bar(i8*)
define i32 @foo1(i32 %n) nounwind {
entry:
; CHECK: 'foo1':
%test4a = alloca [10 x i8], align 1 ; <[10 x i8]*> [#uses=1]
; CHECK: %test4a =
; CHECK: ==> 1 elements, 10 bytes allocated
%test6a = alloca [10 x i32], align 4 ; <[10 x i32]*> [#uses=1]
; CHECK: %test6a =
; CHECK: ==> 1 elements, 40 bytes allocated
%vla = alloca i8, i32 %n, align 1 ; <i8*> [#uses=1]
; CHECK: %vla =
; CHECK: ==> %n elements, %n bytes allocated
%0 = shl i32 %n, 2 ; <i32> [#uses=1]
%vla7 = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1]
; CHECK: %vla7 =
; CHECK: ==> (4 * %n) elements, (4 * %n) bytes allocated
%call = call i32 @bar(i8* %vla) nounwind ; <i32> [#uses=1]
%arraydecay = getelementptr [10 x i8]* %test4a, i64 0, i64 0 ; <i8*> [#uses=1]
%call10 = call i32 @bar(i8* %arraydecay) nounwind ; <i32> [#uses=1]
%call11 = call i32 @bar(i8* %vla7) nounwind ; <i32> [#uses=1]
%ptrconv14 = bitcast [10 x i32]* %test6a to i8* ; <i8*> [#uses=1]
%call15 = call i32 @bar(i8* %ptrconv14) nounwind ; <i32> [#uses=1]
%add = add i32 %call10, %call ; <i32> [#uses=1]
%add12 = add i32 %add, %call11 ; <i32> [#uses=1]
%add16 = add i32 %add12, %call15 ; <i32> [#uses=1]
ret i32 %add16
}
define i32 @foo2(i32 %n) nounwind {
entry:
%call = malloc i8, i32 %n ; <i8*> [#uses=1]
; CHECK: %call =
; CHECK: ==> %n elements, %n bytes allocated
%call2 = tail call i8* @calloc(i64 2, i64 4) nounwind ; <i8*> [#uses=1]
; CHECK: %call2 =
; CHECK: ==> 8 elements, 8 bytes allocated
%call4 = tail call i8* @realloc(i8* null, i64 16) nounwind ; <i8*> [#uses=1]
; CHECK: %call4 =
; CHECK: ==> 16 elements, 16 bytes allocated
%call6 = tail call i32 @bar(i8* %call) nounwind ; <i32> [#uses=1]
%call8 = tail call i32 @bar(i8* %call2) nounwind ; <i32> [#uses=1]
%call10 = tail call i32 @bar(i8* %call4) nounwind ; <i32> [#uses=1]
%add = add i32 %call8, %call6 ; <i32> [#uses=1]
%add11 = add i32 %add, %call10 ; <i32> [#uses=1]
ret i32 %add11
}
declare noalias i8* @calloc(i64, i64) nounwind
declare noalias i8* @realloc(i8* nocapture, i64) nounwind