llvm-6502/lib/Transforms/Instrumentation/SanitizerCoverage.cpp

328 lines
13 KiB
C++
Raw Normal View History

//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Coverage instrumentation that works with AddressSanitizer
// and potentially with other Sanitizers.
//
// We create a Guard variable with the same linkage
// as the function and inject this code into the entry block (CoverageLevel=1)
// or all blocks (CoverageLevel>=2):
// if (Guard < 0) {
// __sanitizer_cov(&Guard);
// }
// The accesses to Guard are atomic. The rest of the logic is
// in __sanitizer_cov (it's fine to call it more than once).
//
// With CoverageLevel>=3 we also split critical edges this effectively
// instrumenting all edges.
//
// CoverageLevel>=4 add indirect call profiling implented as a function call.
//
// This coverage implementation provides very limited data:
// it only tells if a given function (block) was ever executed. No counters.
// But for many use cases this is what we need and the added slowdown small.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "sancov"
static const char *const kSanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const kSanCovName = "__sanitizer_cov";
static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
static const uint64_t kSanCtorAndDtorPriority = 1;
static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level",
cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
"3: all blocks and critical edges, "
"4: above plus indirect calls"),
cl::Hidden, cl::init(0));
static cl::opt<int> ClCoverageBlockThreshold(
"sanitizer-coverage-block-threshold",
cl::desc("Add coverage instrumentation only to the entry block if there "
"are more than this number of blocks."),
cl::Hidden, cl::init(1500));
static cl::opt<bool>
ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
cl::desc("Experimental basic-block tracing: insert "
"callbacks at every basic block"),
cl::Hidden, cl::init(false));
namespace {
class SanitizerCoverageModule : public ModulePass {
public:
SanitizerCoverageModule(int CoverageLevel = 0)
: ModulePass(ID),
CoverageLevel(std::max(CoverageLevel, (int)ClCoverageLevel)) {}
bool runOnModule(Module &M) override;
bool runOnFunction(Function &F);
static char ID; // Pass identification, replacement for typeid
const char *getPassName() const override {
return "SanitizerCoverageModule";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
}
private:
void InjectCoverageForIndirectCalls(Function &F,
ArrayRef<Instruction *> IndirCalls);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
ArrayRef<Instruction *> IndirCalls);
bool InjectTracing(Function &F, ArrayRef<BasicBlock *> AllBlocks);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
Function *SanCovFunction;
Function *SanCovIndirCallFunction;
Function *SanCovModuleInit;
Function *SanCovTraceEnter, *SanCovTraceBB;
InlineAsm *EmptyAsm;
Type *IntptrTy;
LLVMContext *C;
GlobalVariable *GuardArray;
int CoverageLevel;
};
} // namespace
static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
if (Function *F = dyn_cast<Function>(FuncOrBitcast))
return F;
std::string Err;
raw_string_ostream Stream(Err);
Stream << "SanitizerCoverage interface function redefined: "
<< *FuncOrBitcast;
report_fatal_error(Err);
}
bool SanitizerCoverageModule::runOnModule(Module &M) {
if (!CoverageLevel) return false;
C = &(M.getContext());
DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>();
IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits());
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
Function *CtorFunc =
Function::Create(FunctionType::get(VoidTy, false),
GlobalValue::InternalLinkage, kSanCovModuleCtorName, &M);
ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));
appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
SanCovFunction = checkInterfaceFunction(
M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr));
SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
SanCovModuleInit = checkInterfaceFunction(
M.getOrInsertFunction(kSanCovModuleInitName, Type::getVoidTy(*C),
Int32PtrTy, IntptrTy, nullptr));
SanCovModuleInit->setLinkage(Function::ExternalLinkage);
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
if (ClExperimentalTracing) {
SanCovTraceEnter = checkInterfaceFunction(
M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, IntptrTy, nullptr));
SanCovTraceBB = checkInterfaceFunction(
M.getOrInsertFunction(kSanCovTraceBB, VoidTy, IntptrTy, nullptr));
}
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
Type *Int32Ty = IRB.getInt32Ty();
GuardArray =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, "__sancov_gen_cov_tmp");
for (auto &F : M)
runOnFunction(F);
// Now we know how many elements we need. Create an array of guards
// with one extra element at the beginning for the size.
Type *Int32ArrayNTy =
ArrayType::get(Int32Ty, SanCovFunction->getNumUses() + 1);
GlobalVariable *RealGuardArray = new GlobalVariable(
M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
// Replace the dummy array with the real one.
GuardArray->replaceAllUsesWith(
IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
GuardArray->eraseFromParent();
// Call __sanitizer_cov_module_init
IRB.SetInsertPoint(CtorFunc->getEntryBlock().getTerminator());
IRB.CreateCall2(SanCovModuleInit,
IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()));
return true;
}
bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (F.empty()) return false;
if (F.getName().find(".module_ctor") != std::string::npos)
return false; // Should not instrument sanitizer init functions.
if (CoverageLevel >= 3)
SplitAllCriticalEdges(F, this);
SmallVector<Instruction*, 8> IndirCalls;
SmallVector<BasicBlock*, 16> AllBlocks;
for (auto &BB : F) {
AllBlocks.push_back(&BB);
if (CoverageLevel >= 4)
for (auto &Inst : BB) {
CallSite CS(&Inst);
if (CS && !CS.getCalledFunction())
IndirCalls.push_back(&Inst);
}
}
InjectCoverage(F, AllBlocks, IndirCalls);
InjectTracing(F, AllBlocks);
return true;
}
// Experimental support for tracing.
// Basicaly, insert a callback at the beginning of every basic block.
// Every callback gets a pointer to a uniqie global for internal storage.
bool SanitizerCoverageModule::InjectTracing(Function &F,
ArrayRef<BasicBlock *> AllBlocks) {
if (!ClExperimentalTracing) return false;
Type *Ty = ArrayType::get(IntptrTy, 1); // May need to use more words later.
for (auto BB : AllBlocks) {
IRBuilder<> IRB(BB->getFirstInsertionPt());
GlobalVariable *TraceCache = new GlobalVariable(
*F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(Ty), "__sancov_gen_trace_cache");
IRB.CreateCall(&F.getEntryBlock() == BB ? SanCovTraceEnter : SanCovTraceBB,
IRB.CreatePointerCast(TraceCache, IntptrTy));
}
return true;
}
bool
SanitizerCoverageModule::InjectCoverage(Function &F,
ArrayRef<BasicBlock *> AllBlocks,
ArrayRef<Instruction *> IndirCalls) {
if (!CoverageLevel) return false;
if (CoverageLevel == 1 ||
(unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
InjectCoverageAtBlock(F, F.getEntryBlock());
} else {
for (auto BB : AllBlocks)
InjectCoverageAtBlock(F, *BB);
}
InjectCoverageForIndirectCalls(F, IndirCalls);
return true;
}
// On every indirect call we call a run-time function
// __sanitizer_cov_indir_call* with two parameters:
// - callee address,
// - global cache array that contains kCacheSize pointers (zero-initialized).
// The cache is used to speed up recording the caller-callee pairs.
// The address of the caller is passed implicitly via caller PC.
// kCacheSize is encoded in the name of the run-time function.
void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty()) return;
const int kCacheSize = 16;
const int kCacheAlignment = 64; // Align for better performance.
Type *Ty = ArrayType::get(IntptrTy, kCacheSize);
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
Value *Callee = CS.getCalledValue();
if (dyn_cast<InlineAsm>(Callee)) continue;
GlobalVariable *CalleeCache = new GlobalVariable(
*F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
CalleeCache->setAlignment(kCacheAlignment);
IRB.CreateCall2(SanCovIndirCallFunction,
IRB.CreatePointerCast(Callee, IntptrTy),
IRB.CreatePointerCast(CalleeCache, IntptrTy));
}
}
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
BasicBlock &BB) {
BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
// Skip static allocas at the top of the entry block so they don't become
// dynamic when we split the block. If we used our optimized stack layout,
// then there will only be one alloca and it will come first.
for (; IP != BE; ++IP) {
AllocaInst *AI = dyn_cast<AllocaInst>(IP);
if (!AI || !AI->isStaticAlloca())
break;
}
DebugLoc EntryLoc = &BB == &F.getEntryBlock()
? IP->getDebugLoc().getFnDebugLoc(*C)
: IP->getDebugLoc();
IRBuilder<> IRB(IP);
IRB.SetCurrentDebugLocation(EntryLoc);
SmallVector<Value *, 1> Indices;
Value *GuardP = IRB.CreateAdd(
IRB.CreatePointerCast(GuardArray, IntptrTy),
ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4));
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
LoadInst *Load = IRB.CreateLoad(GuardP);
Load->setAtomic(Monotonic);
Load->setAlignment(4);
Load->setMetadata(F.getParent()->getMDKindID("nosanitize"),
IR: Split Metadata from Value Split `Metadata` away from the `Value` class hierarchy, as part of PR21532. Assembly and bitcode changes are in the wings, but this is the bulk of the change for the IR C++ API. I have a follow-up patch prepared for `clang`. If this breaks other sub-projects, I apologize in advance :(. Help me compile it on Darwin I'll try to fix it. FWIW, the errors should be easy to fix, so it may be simpler to just fix it yourself. This breaks the build for all metadata-related code that's out-of-tree. Rest assured the transition is mechanical and the compiler should catch almost all of the problems. Here's a quick guide for updating your code: - `Metadata` is the root of a class hierarchy with three main classes: `MDNode`, `MDString`, and `ValueAsMetadata`. It is distinct from the `Value` class hierarchy. It is typeless -- i.e., instances do *not* have a `Type`. - `MDNode`'s operands are all `Metadata *` (instead of `Value *`). - `TrackingVH<MDNode>` and `WeakVH` referring to metadata can be replaced with `TrackingMDNodeRef` and `TrackingMDRef`, respectively. If you're referring solely to resolved `MDNode`s -- post graph construction -- just use `MDNode*`. - `MDNode` (and the rest of `Metadata`) have only limited support for `replaceAllUsesWith()`. As long as an `MDNode` is pointing at a forward declaration -- the result of `MDNode::getTemporary()` -- it maintains a side map of its uses and can RAUW itself. Once the forward declarations are fully resolved RAUW support is dropped on the ground. This means that uniquing collisions on changing operands cause nodes to become "distinct". (This already happened fairly commonly, whenever an operand went to null.) If you're constructing complex (non self-reference) `MDNode` cycles, you need to call `MDNode::resolveCycles()` on each node (or on a top-level node that somehow references all of the nodes). Also, don't do that. Metadata cycles (and the RAUW machinery needed to construct them) are expensive. - An `MDNode` can only refer to a `Constant` through a bridge called `ConstantAsMetadata` (one of the subclasses of `ValueAsMetadata`). As a side effect, accessing an operand of an `MDNode` that is known to be, e.g., `ConstantInt`, takes three steps: first, cast from `Metadata` to `ConstantAsMetadata`; second, extract the `Constant`; third, cast down to `ConstantInt`. The eventual goal is to introduce `MDInt`/`MDFloat`/etc. and have metadata schema owners transition away from using `Constant`s when the type isn't important (and they don't care about referring to `GlobalValue`s). In the meantime, I've added transitional API to the `mdconst` namespace that matches semantics with the old code, in order to avoid adding the error-prone three-step equivalent to every call site. If your old code was: MDNode *N = foo(); bar(isa <ConstantInt>(N->getOperand(0))); baz(cast <ConstantInt>(N->getOperand(1))); bak(cast_or_null <ConstantInt>(N->getOperand(2))); bat(dyn_cast <ConstantInt>(N->getOperand(3))); bay(dyn_cast_or_null<ConstantInt>(N->getOperand(4))); you can trivially match its semantics with: MDNode *N = foo(); bar(mdconst::hasa <ConstantInt>(N->getOperand(0))); baz(mdconst::extract <ConstantInt>(N->getOperand(1))); bak(mdconst::extract_or_null <ConstantInt>(N->getOperand(2))); bat(mdconst::dyn_extract <ConstantInt>(N->getOperand(3))); bay(mdconst::dyn_extract_or_null<ConstantInt>(N->getOperand(4))); and when you transition your metadata schema to `MDInt`: MDNode *N = foo(); bar(isa <MDInt>(N->getOperand(0))); baz(cast <MDInt>(N->getOperand(1))); bak(cast_or_null <MDInt>(N->getOperand(2))); bat(dyn_cast <MDInt>(N->getOperand(3))); bay(dyn_cast_or_null<MDInt>(N->getOperand(4))); - A `CallInst` -- specifically, intrinsic instructions -- can refer to metadata through a bridge called `MetadataAsValue`. This is a subclass of `Value` where `getType()->isMetadataTy()`. `MetadataAsValue` is the *only* class that can legally refer to a `LocalAsMetadata`, which is a bridged form of non-`Constant` values like `Argument` and `Instruction`. It can also refer to any other `Metadata` subclass. (I'll break all your testcases in a follow-up commit, when I propagate this change to assembly.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223802 91177308-0d34-0410-b5e6-96231b3b80d8
2014-12-09 18:38:53 +00:00
MDNode::get(*C, None));
Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
Instruction *Ins = SplitBlockAndInsertIfThen(
Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(Ins);
IRB.SetCurrentDebugLocation(EntryLoc);
// __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
IRB.CreateCall(SanCovFunction, GuardP);
IRB.CreateCall(EmptyAsm); // Avoids callback merge.
}
char SanitizerCoverageModule::ID = 0;
INITIALIZE_PASS(SanitizerCoverageModule, "sancov",
"SanitizerCoverage: TODO."
"ModulePass", false, false)
ModulePass *llvm::createSanitizerCoverageModulePass(int CoverageLevel) {
return new SanitizerCoverageModule(CoverageLevel);
}