add a new pass to instrument loads and stores for run-time bounds checking

move EmitGEPOffset from InstCombine to Transforms/Utils/Local.h

(a draft of this) patch reviewed by Andrew, thanks.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157261 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nuno Lopes 2012-05-22 17:19:09 +00:00
parent 23e75da7e0
commit 5c525b59d5
9 changed files with 357 additions and 62 deletions

View File

@ -71,6 +71,7 @@ void initializeBasicCallGraphPass(PassRegistry&);
void initializeBlockExtractorPassPass(PassRegistry&);
void initializeBlockFrequencyInfoPass(PassRegistry&);
void initializeBlockPlacementPass(PassRegistry&);
void initializeBoundsCheckingPass(PassRegistry&);
void initializeBranchFolderPassPass(PassRegistry&);
void initializeBranchProbabilityInfoPass(PassRegistry&);
void initializeBreakCriticalEdgesPass(PassRegistry&);

View File

@ -55,6 +55,7 @@ namespace {
(void) llvm::createScalarEvolutionAliasAnalysisPass();
(void) llvm::createTypeBasedAliasAnalysisPass();
(void) llvm::createBlockPlacementPass();
(void) llvm::createBoundsCheckingPass();
(void) llvm::createBreakCriticalEdgesPass();
(void) llvm::createCFGSimplificationPass();
(void) llvm::createConstantMergePass();

View File

@ -325,6 +325,14 @@ Pass *createLowerAtomicPass();
//
Pass *createCorrelatedValuePropagationPass();
//===----------------------------------------------------------------------===//
//
// BoundsChecking - This pass instruments the code to perform run-time bounds
// checking on loads, stores, and other memory intrinsics.
// Penalty is the maximum run-time that is acceptable for the user.
//
FunctionPass *createBoundsCheckingPass(unsigned Penalty = 5);
//===----------------------------------------------------------------------===//
//
// ObjCARCAPElim - ObjC ARC autorelease pool elimination.

View File

@ -15,6 +15,11 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
#define LLVM_TRANSFORMS_UTILS_LOCAL_H
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Operator.h"
namespace llvm {
class User;
@ -160,6 +165,62 @@ static inline unsigned getKnownAlignment(Value *V, const TargetData *TD = 0) {
return getOrEnforceKnownAlignment(V, 0, TD);
}
/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
/// code necessary to compute the offset from the base pointer (without adding
/// in the base pointer). Return the result as a signed integer of intptr size.
template<typename IRBuilderTy>
Value *EmitGEPOffset(IRBuilderTy *Builder, const TargetData &TD, User *GEP) {
gep_type_iterator GTI = gep_type_begin(GEP);
Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
Value *Result = Constant::getNullValue(IntPtrTy);
// If the GEP is inbounds, we know that none of the addressing operations will
// overflow in an unsigned sense.
bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
// Build a mask for high order bits.
unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
++i, ++GTI) {
Value *Op = *i;
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
if (OpC->isZero()) continue;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
if (Size)
Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
GEP->getName()+".offs");
continue;
}
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
}
// Convert to correct type.
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
// We'll let instcombine(mul) convert this to a shl if possible.
Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
GEP->getName()+".idx", isInBounds /*NUW*/);
}
// Emit an add instruction.
Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
}
return Result;
}
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///

View File

@ -226,7 +226,7 @@ private:
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP, bool NoNUW = false);
Value *EmitGEPOffset(User *GEP);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old

View File

@ -420,67 +420,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
/// code necessary to compute the offset from the base pointer (without adding
/// in the base pointer). Return the result as a signed integer of intptr size.
/// If NoNUW is true, then the NUW flag is not used.
Value *InstCombiner::EmitGEPOffset(User *GEP, bool NoNUW) {
TargetData &TD = *getTargetData();
gep_type_iterator GTI = gep_type_begin(GEP);
Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
Value *Result = Constant::getNullValue(IntPtrTy);
// If the GEP is inbounds, we know that none of the addressing operations will
// overflow in an unsigned sense.
bool isInBounds = cast<GEPOperator>(GEP)->isInBounds() && !NoNUW;
// Build a mask for high order bits.
unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
++i, ++GTI) {
Value *Op = *i;
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
if (OpC->isZero()) continue;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
if (Size)
Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
GEP->getName()+".offs");
continue;
}
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC =
ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
}
// Convert to correct type.
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
// We'll let instcombine(mul) convert this to a shl if possible.
Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
GEP->getName()+".idx", isInBounds /*NUW*/);
}
// Emit an add instruction.
Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
}
return Result;
}
/// Optimize pointer differences into the same array into a size. Consider:
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.

View File

@ -87,6 +87,10 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
}
Value *InstCombiner::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(Builder, *getTargetData(), GEP);
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
/// type for example, or from a smaller to a larger illegal type.

View File

@ -0,0 +1,280 @@
//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a pass that instruments the code to perform run-time
// bounds checking on loads, stores, and other memory intrinsics.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "bounds-checking"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/TargetFolder.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
using namespace llvm;
STATISTIC(ChecksAdded, "Bounds checks added");
STATISTIC(ChecksSkipped, "Bounds checks skipped");
STATISTIC(ChecksUnable, "Bounds checks unable to add");
typedef IRBuilder<true, TargetFolder> BuilderTy;
namespace {
enum ConstTriState {
NotConst, Const, Dunno
};
struct BoundsChecking : public FunctionPass {
const TargetData *TD;
BuilderTy *Builder;
Function *Fn;
BasicBlock *TrapBB;
unsigned Penalty;
static char ID;
BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){
initializeBoundsCheckingPass(*PassRegistry::getPassRegistry());
}
BasicBlock *getTrapBB();
ConstTriState computeAllocSize(Value *Alloc, uint64_t &Size, Value* &SizeValue);
bool instrument(Value *Ptr, Value *Val);
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetData>();
}
};
}
char BoundsChecking::ID = 0;
INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking",
false, false)
/// getTrapBB - create a basic block that traps. All overflowing conditions
/// branch to this block. There's only one trap block per function.
BasicBlock *BoundsChecking::getTrapBB() {
if (TrapBB)
return TrapBB;
BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
Builder->SetInsertPoint(TrapBB);
llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
CallInst *TrapCall = Builder->CreateCall(F);
TrapCall->setDoesNotReturn();
TrapCall->setDoesNotThrow();
Builder->CreateUnreachable();
Builder->SetInsertPoint(PrevInsertPoint);
return TrapBB;
}
/// computeAllocSize - compute the object size allocated by an allocation
/// site. Returns NotConst if the size is not constant (in SizeValue), Const if
/// the size is constant (in Size), and Dunno if the size could not be
/// determined within the given maximum Penalty that the computation would
/// incurr at run-time.
ConstTriState BoundsChecking::computeAllocSize(Value *Alloc, uint64_t &Size,
Value* &SizeValue) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Alloc)) {
if (GV->hasDefinitiveInitializer()) {
Constant *C = GV->getInitializer();
Size = TD->getTypeAllocSize(C->getType());
return Const;
}
return Dunno;
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(Alloc)) {
if (!AI->getAllocatedType()->isSized())
return Dunno;
Size = TD->getTypeAllocSize(AI->getAllocatedType());
if (!AI->isArrayAllocation())
return Const; // we are done
Value *ArraySize = AI->getArraySize();
if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
Size *= C->getZExtValue();
return Const;
}
if (Penalty < 2)
return Dunno;
SizeValue = ConstantInt::get(ArraySize->getType(), Size);
SizeValue = Builder->CreateMul(SizeValue, ArraySize);
return NotConst;
} else if (CallInst *MI = extractMallocCall(Alloc)) {
SizeValue = MI->getArgOperand(0);
if (ConstantInt *CI = dyn_cast<ConstantInt>(SizeValue)) {
Size = CI->getZExtValue();
return Const;
}
return Penalty >= 2 ? NotConst : Dunno;
} else if (CallInst *MI = extractCallocCall(Alloc)) {
Value *Arg1 = MI->getArgOperand(0);
Value *Arg2 = MI->getArgOperand(1);
if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Arg1)) {
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Arg2)) {
Size = (CI1->getValue() * CI2->getValue()).getZExtValue();
return Const;
}
}
if (Penalty < 2)
return Dunno;
SizeValue = Builder->CreateMul(Arg1, Arg2);
return NotConst;
}
DEBUG(dbgs() << "computeAllocSize failed:\n" << *Alloc);
return Dunno;
}
bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType());
DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
Type *SizeTy = Type::getInt64Ty(Fn->getContext());
// Get to the real allocated thing and offset as fast as possible.
Ptr = Ptr->stripPointerCasts();
GEPOperator *GEP;
if ((GEP = dyn_cast<GEPOperator>(Ptr))) {
// check if we will be able to get the offset
if (!GEP->hasAllConstantIndices() && Penalty < 2) {
++ChecksUnable;
return false;
}
Ptr = GEP->getPointerOperand()->stripPointerCasts();
}
uint64_t Size = 0;
Value *SizeValue = 0;
ConstTriState ConstAlloc = computeAllocSize(Ptr, Size, SizeValue);
if (ConstAlloc == Dunno) {
++ChecksUnable;
return false;
}
assert(ConstAlloc == Const || SizeValue);
uint64_t Offset = 0;
Value *OffsetValue = 0;
if (GEP) {
if (GEP->hasAllConstantIndices()) {
SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
assert(GEP->getPointerOperandType()->isPointerTy());
Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
} else {
OffsetValue = EmitGEPOffset(Builder, *TD, GEP);
}
}
if (!OffsetValue && ConstAlloc == Const) {
if (Size < Offset || (Size - Offset) < NeededSize) {
// Out of bounds
Builder->CreateBr(getTrapBB());
++ChecksAdded;
return true;
}
// in bounds
++ChecksSkipped;
return false;
}
if (OffsetValue)
OffsetValue = Builder->CreateZExt(OffsetValue, SizeTy);
else
OffsetValue = ConstantInt::get(SizeTy, Offset);
if (SizeValue)
SizeValue = Builder->CreateZExt(SizeValue, SizeTy);
else
SizeValue = ConstantInt::get(SizeTy, Size);
Value *NeededSizeVal = ConstantInt::get(SizeTy, NeededSize);
Value *ObjSize = Builder->CreateSub(SizeValue, OffsetValue);
Value *Cmp1 = Builder->CreateICmpULT(SizeValue, OffsetValue);
Value *Cmp2 = Builder->CreateICmpULT(ObjSize, NeededSizeVal);
Value *Or = Builder->CreateOr(Cmp1, Cmp2);
// FIXME: add unlikely branch taken metadata?
Instruction *Inst = Builder->GetInsertPoint();
BasicBlock *OldBB = Inst->getParent();
BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
OldBB->getTerminator()->eraseFromParent();
BranchInst::Create(getTrapBB(), Cont, Or, OldBB);
++ChecksAdded;
return true;
}
bool BoundsChecking::runOnFunction(Function &F) {
TD = &getAnalysis<TargetData>();
TrapBB = 0;
Fn = &F;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
// touching instructions
std::vector<Instruction*> WorkList;
for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
Instruction *I = &*i;
if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||
isa<AtomicRMWInst>(I))
WorkList.push_back(I);
}
bool MadeChange = false;
while (!WorkList.empty()) {
Instruction *I = WorkList.back();
WorkList.pop_back();
Builder->SetInsertPoint(I);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
MadeChange |= instrument(LI->getPointerOperand(), LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
} else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
} else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(I)) {
MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
} else {
llvm_unreachable("unknown Instruction type");
}
}
return MadeChange;
}
FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) {
return new BoundsChecking(Penalty);
}

View File

@ -29,6 +29,7 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
initializeBlockPlacementPass(Registry);
initializeBoundsCheckingPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);