Implement categories for special case lists.

A special case list can now specify categories for specific globals,
which can be used to instruct an instrumentation pass to treat certain
functions or global variables in a specific way, such as by omitting
certain aspects of instrumentation while keeping others, or informing
the instrumentation pass that a specific uninstrumentable function
has certain semantics, thus allowing the pass to instrument callers
according to those semantics.

For example, AddressSanitizer now uses the "init" category instead of
global-init prefixes for globals whose initializers should not be
instrumented, but which in all other respects should be instrumented.

The motivating use case is DataFlowSanitizer, which will have a
number of different categories for uninstrumentable functions, such
as "functional" which specifies that a function has pure functional
semantics, or "discard" which indicates that a function's return
value should not be labelled.

Differential Revision: http://llvm-reviews.chandlerc.com/D1092

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185978 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Peter Collingbourne 2013-07-09 22:03:17 +00:00
parent c7087f8e42
commit 46e11c4c97
4 changed files with 182 additions and 53 deletions

View File

@ -1,4 +1,4 @@
//===-- SpecialCaseList.h - blacklist for sanitizers ------------*- C++ -*-===//
//===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -8,19 +8,34 @@
//
// This is a utility class for instrumentation passes (like AddressSanitizer
// or ThreadSanitizer) to avoid instrumenting some functions or global
// variables based on a user-supplied blacklist.
// variables based on a user-supplied list.
//
// The blacklist disables instrumentation of various functions and global
// variables. Each line contains a prefix, followed by a wild card expression.
// Empty lines and lines starting with "#" are ignored.
// The list can also specify categories for specific globals, which can be used
// to instruct an instrumentation pass to treat certain functions or global
// variables in a specific way, such as by omitting certain aspects of
// instrumentation while keeping others, or informing the instrumentation pass
// that a specific uninstrumentable function has certain semantics, thus
// allowing the pass to instrument callers according to those semantics.
//
// For example, AddressSanitizer uses the "init" category for globals whose
// initializers should not be instrumented, but which in all other respects
// should be instrumented.
//
// Each line contains a prefix, followed by a colon and a wild card expression,
// followed optionally by an equals sign and an instrumentation-specific
// category. Empty lines and lines starting with "#" are ignored.
// ---
// # Blacklisted items:
// fun:*_ZN4base6subtle*
// global:*global_with_bad_access_or_initialization*
// global-init:*global_with_initialization_issues*
// global-init-type:*Namespace::ClassName*
// global:*global_with_initialization_issues*=init
// type:*Namespace::ClassName*=init
// src:file_with_tricky_code.cc
// global-init-src:ignore-global-initializers-issues.cc
// src:ignore-global-initializers-issues.cc=init
//
// # Functions with pure functional semantics:
// fun:cos=functional
// fun:sin=functional
// ---
// Note that the wild card is in fact an llvm::Regex, but * is automatically
// replaced with .*
@ -44,20 +59,43 @@ class SpecialCaseList {
public:
SpecialCaseList(const StringRef Path);
SpecialCaseList(const MemoryBuffer *MB);
~SpecialCaseList();
/// Returns whether either this function or its source file are listed in the
/// given category, which may be omitted to search the empty category.
bool isIn(const Function &F, const StringRef Category = StringRef()) const;
/// Returns whether this global, its type or its source file are listed in the
/// given category, which may be omitted to search the empty category.
bool isIn(const GlobalVariable &G,
const StringRef Category = StringRef()) const;
/// Returns whether this module is listed in the given category, which may be
/// omitted to search the empty category.
bool isIn(const Module &M, const StringRef Category = StringRef()) const;
/// Returns whether either this function or its source file are listed in any
/// category. Category will contain the name of an arbitrary category in
/// which this function is listed.
bool findCategory(const Function &F, StringRef &Category) const;
/// Returns whether this global, its type or its source file are listed in any
/// category. Category will contain the name of an arbitrary category in
/// which this global is listed.
bool findCategory(const GlobalVariable &G, StringRef &Category) const;
/// Returns whether this module is listed in any category. Category will
/// contain the name of an arbitrary category in which this module is listed.
bool findCategory(const Module &M, StringRef &Category) const;
// Returns whether either this function or it's source file are blacklisted.
bool isIn(const Function &F) const;
// Returns whether either this global or it's source file are blacklisted.
bool isIn(const GlobalVariable &G) const;
// Returns whether this module is blacklisted by filename.
bool isIn(const Module &M) const;
// Returns whether a global should be excluded from initialization checking.
bool isInInit(const GlobalVariable &G) const;
private:
StringMap<Regex*> Entries;
StringMap<StringMap<Regex*> > Entries;
void init(const MemoryBuffer *MB);
bool inSection(const StringRef Section, const StringRef Query) const;
bool findCategory(const StringRef Section, const StringRef Query,
StringRef &Category) const;
bool inSectionCategory(const StringRef Section, const StringRef Query,
const StringRef Category) const;
};
} // namespace llvm

View File

@ -948,7 +948,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
bool GlobalHasDynamicInitializer =
DynamicallyInitializedGlobals.Contains(G);
// Don't check initialization order if this global is blacklisted.
GlobalHasDynamicInitializer &= !BL->isInInit(*G);
GlobalHasDynamicInitializer &= !BL->isIn(*G, "init");
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(

View File

@ -1,4 +1,4 @@
//===-- SpecialCaseList.cpp - blacklist for sanitizers --------------------===//
//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
//
// The LLVM Compiler Infrastructure
//
@ -9,13 +9,15 @@
//
// This is a utility class for instrumentation passes (like AddressSanitizer
// or ThreadSanitizer) to avoid instrumenting some functions or global
// variables based on a user-supplied blacklist.
// variables, or to instrument some functions or global variables in a specific
// way, based on a user-supplied list.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SpecialCaseList.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@ -50,7 +52,7 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
// Iterate through each line in the blacklist file.
SmallVector<StringRef, 16> Lines;
SplitString(MB->getBuffer(), Lines, "\n\r");
StringMap<std::string> Regexps;
StringMap<StringMap<std::string> > Regexps;
for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
I != E; ++I) {
// Ignore empty lines and lines starting with "#"
@ -59,12 +61,27 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
// Get our prefix and unparsed regexp.
std::pair<StringRef, StringRef> SplitLine = I->split(":");
StringRef Prefix = SplitLine.first;
std::string Regexp = SplitLine.second;
if (Regexp.empty()) {
if (SplitLine.second.empty()) {
// Missing ':' in the line.
report_fatal_error("malformed blacklist line: " + SplitLine.first);
}
std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
std::string Regexp = SplitRegexp.first;
StringRef Category = SplitRegexp.second;
// Backwards compatibility.
if (Prefix == "global-init") {
Prefix = "global";
Category = "init";
} else if (Prefix == "global-init-type") {
Prefix = "type";
Category = "init";
} else if (Prefix == "global-init-src") {
Prefix = "src";
Category = "init";
}
// Replace * with .*
for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
pos += strlen(".*")) {
@ -80,28 +97,40 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
}
// Add this regexp into the proper group by its prefix.
if (!Regexps[Prefix].empty())
Regexps[Prefix] += "|";
Regexps[Prefix] += Regexp;
if (!Regexps[Prefix][Category].empty())
Regexps[Prefix][Category] += "|";
Regexps[Prefix][Category] += Regexp;
}
// Iterate through each of the prefixes, and create Regexs for them.
for (StringMap<std::string>::const_iterator I = Regexps.begin(),
E = Regexps.end(); I != E; ++I) {
Entries[I->getKey()] = new Regex(I->getValue());
for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
E = Regexps.end();
I != E; ++I) {
for (StringMap<std::string>::const_iterator II = I->second.begin(),
IE = I->second.end();
II != IE; ++II) {
Entries[I->getKey()][II->getKey()] = new Regex(II->getValue());
}
}
}
bool SpecialCaseList::isIn(const Function &F) const {
return isIn(*F.getParent()) || inSection("fun", F.getName());
SpecialCaseList::~SpecialCaseList() {
for (StringMap<StringMap<Regex*> >::iterator I = Entries.begin(),
E = Entries.end();
I != E; ++I) {
DeleteContainerSeconds(I->second);
}
}
bool SpecialCaseList::isIn(const GlobalVariable &G) const {
return isIn(*G.getParent()) || inSection("global", G.getName());
bool SpecialCaseList::findCategory(const Function &F,
StringRef &Category) const {
return findCategory(*F.getParent(), Category) ||
findCategory("fun", F.getName(), Category);
}
bool SpecialCaseList::isIn(const Module &M) const {
return inSection("src", M.getModuleIdentifier());
bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
return isIn(*F.getParent(), Category) ||
inSectionCategory("fun", F.getName(), Category);
}
static StringRef GetGVTypeString(const GlobalVariable &G) {
@ -115,19 +144,56 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
return "<unknown type>";
}
bool SpecialCaseList::isInInit(const GlobalVariable &G) const {
return (isIn(*G.getParent()) ||
inSection("global-init", G.getName()) ||
inSection("global-init-type", GetGVTypeString(G)) ||
inSection("global-init-src", G.getParent()->getModuleIdentifier()));
bool SpecialCaseList::findCategory(const GlobalVariable &G,
StringRef &Category) const {
return findCategory(*G.getParent(), Category) ||
findCategory("global", G.getName(), Category) ||
findCategory("type", GetGVTypeString(G), Category);
}
bool SpecialCaseList::inSection(const StringRef Section,
const StringRef Query) const {
StringMap<Regex*>::const_iterator I = Entries.find(Section);
bool SpecialCaseList::isIn(const GlobalVariable &G,
const StringRef Category) const {
return isIn(*G.getParent(), Category) ||
inSectionCategory("global", G.getName(), Category) ||
inSectionCategory("type", GetGVTypeString(G), Category);
}
bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const {
return findCategory("src", M.getModuleIdentifier(), Category);
}
bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
return inSectionCategory("src", M.getModuleIdentifier(), Category);
}
bool SpecialCaseList::findCategory(const StringRef Section,
const StringRef Query,
StringRef &Category) const {
StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
Regex *FunctionRegex = I->getValue();
for (StringMap<Regex *>::const_iterator II = I->second.begin(),
IE = I->second.end();
II != IE; ++II) {
Regex *FunctionRegex = II->getValue();
if (FunctionRegex->match(Query)) {
Category = II->first();
return true;
}
}
return false;
}
bool SpecialCaseList::inSectionCategory(const StringRef Section,
const StringRef Query,
const StringRef Category) const {
StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
StringMap<Regex *>::const_iterator II = I->second.find(Category);
if (II == I->second.end()) return false;
Regex *FunctionRegex = II->getValue();
return FunctionRegex->match(Query);
}

View File

@ -82,6 +82,13 @@ TEST_F(SpecialCaseListTest, FunctionIsIn) {
"fun:bar\n"));
EXPECT_TRUE(SCL->isIn(*Foo));
EXPECT_TRUE(SCL->isIn(*Bar));
SCL.reset(makeSpecialCaseList("fun:foo=functional\n"));
EXPECT_TRUE(SCL->isIn(*Foo, "functional"));
StringRef Category;
EXPECT_TRUE(SCL->findCategory(*Foo, Category));
EXPECT_EQ("functional", Category);
EXPECT_FALSE(SCL->isIn(*Bar, "functional"));
}
TEST_F(SpecialCaseListTest, GlobalIsIn) {
@ -92,26 +99,44 @@ TEST_F(SpecialCaseListTest, GlobalIsIn) {
OwningPtr<SpecialCaseList> SCL(makeSpecialCaseList("global:foo\n"));
EXPECT_TRUE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_FALSE(SCL->isInInit(*Foo));
EXPECT_FALSE(SCL->isInInit(*Bar));
EXPECT_FALSE(SCL->isIn(*Foo, "init"));
EXPECT_FALSE(SCL->isIn(*Bar, "init"));
SCL.reset(makeSpecialCaseList("global:foo=init\n"));
EXPECT_FALSE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_TRUE(SCL->isIn(*Foo, "init"));
EXPECT_FALSE(SCL->isIn(*Bar, "init"));
SCL.reset(makeSpecialCaseList("global-init:foo\n"));
EXPECT_FALSE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_TRUE(SCL->isInInit(*Foo));
EXPECT_FALSE(SCL->isInInit(*Bar));
EXPECT_TRUE(SCL->isIn(*Foo, "init"));
EXPECT_FALSE(SCL->isIn(*Bar, "init"));
SCL.reset(makeSpecialCaseList("type:t2=init\n"));
EXPECT_FALSE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_FALSE(SCL->isIn(*Foo, "init"));
EXPECT_TRUE(SCL->isIn(*Bar, "init"));
SCL.reset(makeSpecialCaseList("global-init-type:t2\n"));
EXPECT_FALSE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_FALSE(SCL->isInInit(*Foo));
EXPECT_TRUE(SCL->isInInit(*Bar));
EXPECT_FALSE(SCL->isIn(*Foo, "init"));
EXPECT_TRUE(SCL->isIn(*Bar, "init"));
SCL.reset(makeSpecialCaseList("src:hello=init\n"));
EXPECT_FALSE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_TRUE(SCL->isIn(*Foo, "init"));
EXPECT_TRUE(SCL->isIn(*Bar, "init"));
SCL.reset(makeSpecialCaseList("global-init-src:hello\n"));
EXPECT_FALSE(SCL->isIn(*Foo));
EXPECT_FALSE(SCL->isIn(*Bar));
EXPECT_TRUE(SCL->isInInit(*Foo));
EXPECT_TRUE(SCL->isInInit(*Bar));
EXPECT_TRUE(SCL->isIn(*Foo, "init"));
EXPECT_TRUE(SCL->isIn(*Bar, "init"));
}
}