[asan] add code to detect global initialization fiasco in C/C++. The sub-pass is off by default for now. Patch by Reid Watson. Note: this patch changes the interface between LLVM and compiler-rt parts of asan. The corresponding patch to compiler-rt will follow.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162268 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kostya Serebryany 2012-08-21 08:24:25 +00:00
parent a182367e59
commit 9b9f87a87a
2 changed files with 207 additions and 63 deletions

View File

@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
// This flag may need to be replaced with -fasan-blacklist.
@ -171,6 +175,8 @@ struct AddressSanitizer : public ModulePass {
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool handleFunction(Module &M, Function &F);
void createInitializerPoisonCalls(Module &M,
Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool poisonStackInFunction(Module &M, Function &F);
virtual bool runOnModule(Module &M);
@ -178,7 +184,6 @@ struct AddressSanitizer : public ModulePass {
static char ID; // Pass identification, replacement for typeid
private:
uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@ -194,9 +199,12 @@ struct AddressSanitizer : public ModulePass {
}
Function *checkInterfaceFunction(Constant *FuncOrBitcast);
bool ShouldInstrumentGlobal(GlobalVariable *G);
void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison);
bool LooksLikeCodeInBug11395(Instruction *I);
void FindDynamicInitializers(Module &M);
bool HasDynamicInitializer(GlobalVariable *G);
LLVMContext *C;
TargetData *TD;
@ -213,6 +221,7 @@ struct AddressSanitizer : public ModulePass {
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
InlineAsm *EmptyAsm;
SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
};
} // namespace
@ -358,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
void AddressSanitizer::FindDynamicInitializers(Module& M) {
// Clang generates metadata identifying all dynamically initialized globals.
NamedMDNode *DynamicGlobals =
M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
if (!DynamicGlobals)
return;
for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
MDNode *MDN = DynamicGlobals->getOperand(i);
assert(MDN->getNumOperands() == 1);
Value *VG = MDN->getOperand(0);
// The optimizer may optimize away a global entirely, in which case we
// cannot instrument access to it.
if (!VG)
continue;
GlobalVariable *G = cast<GlobalVariable>(VG);
DynamicallyInitializedGlobals.insert(G);
}
}
// Returns true if a global variable is initialized dynamically in this TU.
bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
return DynamicallyInitializedGlobals.count(G);
}
void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
bool IsWrite;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
assert(Addr);
if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
// We are accessing a global scalar variable. Nothing to catch here.
return;
if (ClOpt && ClOptGlobals) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
if (!ClInitializers)
return;
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global has external linkage, we
// assume it has dynamic initialization, as it may have an initializer
// in a different TU.
if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
!HasDynamicInitializer(G))
return;
}
}
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
@ -462,68 +507,106 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
void AddressSanitizer::createInitializerPoisonCalls(Module &M,
Value *FirstAddr,
Value *LastAddr) {
// We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
// If that function is not present, this TU contains no globals, or they have
// all been optimized away
if (!GlobalInit)
return;
// Set up the arguments to our poison/unpoison functions.
IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
// Declare our poisoning and unpoisoning functions.
Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
// Add a call to poison all external globals before the given function starts.
IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
// Add calls to unpoison all globals before each return instruction.
for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
I != E; ++I) {
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
CallInst::Create(AsanUnpoisonGlobals, "", RI);
}
}
}
bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G);
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
// Touch only those globals that will not be defined in other modules.
// Don't handle ODR type linkages since other modules may be built w/o asan.
if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
G->getLinkage() != GlobalVariable::PrivateLinkage &&
G->getLinkage() != GlobalVariable::InternalLinkage)
return false;
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
// - Need to poison all copies, not just the main thread's one.
if (G->isThreadLocal())
return false;
// For now, just ignore this Alloca if the alignment is large.
if (G->getAlignment() > RedzoneSize) return false;
// Ignore all the globals with the names starting with "\01L_OBJC_".
// Many of those are put into the .cstring section. The linker compresses
// that section by removing the spare \0s after the string terminator, so
// our redzones get broken.
if ((G->getName().find("\01L_OBJC_") == 0) ||
(G->getName().find("\01l_OBJC_") == 0)) {
DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
return false;
}
if (G->hasSection()) {
StringRef Section(G->getSection());
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
// them.
if ((Section.find("__OBJC,") == 0) ||
(Section.find("__DATA, __objc_") == 0)) {
DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
return false;
}
// See http://code.google.com/p/address-sanitizer/issues/detail?id=32
// Constant CFString instances are compiled in the following way:
// -- the string buffer is emitted into
// __TEXT,__cstring,cstring_literals
// -- the constant NSConstantString structure referencing that buffer
// is placed into __DATA,__cfstring
// Therefore there's no point in placing redzones into __DATA,__cfstring.
// Moreover, it causes the linker to crash on OS X 10.7
if (Section.find("__DATA,__cfstring") == 0) {
DEBUG(dbgs() << "Ignoring CFString: " << *G);
return false;
}
}
return true;
}
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
bool AddressSanitizer::insertGlobalRedzones(Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
E = M.getGlobalList().end(); G != E; ++G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G);
if (!Ty->isSized()) continue;
if (!G->hasInitializer()) continue;
// Touch only those globals that will not be defined in other modules.
// Don't handle ODR type linkages since other modules may be built w/o asan.
if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
G->getLinkage() != GlobalVariable::PrivateLinkage &&
G->getLinkage() != GlobalVariable::InternalLinkage)
continue;
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
// - Need to poison all copies, not just the main thread's one.
if (G->isThreadLocal())
continue;
// For now, just ignore this Alloca if the alignment is large.
if (G->getAlignment() > RedzoneSize) continue;
// Ignore all the globals with the names starting with "\01L_OBJC_".
// Many of those are put into the .cstring section. The linker compresses
// that section by removing the spare \0s after the string terminator, so
// our redzones get broken.
if ((G->getName().find("\01L_OBJC_") == 0) ||
(G->getName().find("\01l_OBJC_") == 0)) {
DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
continue;
}
if (G->hasSection()) {
StringRef Section(G->getSection());
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
// them.
if ((Section.find("__OBJC,") == 0) ||
(Section.find("__DATA, __objc_") == 0)) {
DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
continue;
}
// See http://code.google.com/p/address-sanitizer/issues/detail?id=32
// Constant CFString instances are compiled in the following way:
// -- the string buffer is emitted into
// __TEXT,__cstring,cstring_literals
// -- the constant NSConstantString structure referencing that buffer
// is placed into __DATA,__cfstring
// Therefore there's no point in placing redzones into __DATA,__cfstring.
// Moreover, it causes the linker to crash on OS X 10.7
if (Section.find("__DATA,__cfstring") == 0) {
DEBUG(dbgs() << "Ignoring CFString: " << *G);
continue;
}
}
GlobalsToChange.push_back(G);
for (Module::GlobalListType::iterator G = M.global_begin(),
E = M.global_end(); G != E; ++G) {
if (ShouldInstrumentGlobal(G))
GlobalsToChange.push_back(G);
}
size_t n = GlobalsToChange.size();
@ -534,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
// size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n);
IntptrTy, IntptrTy,
IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
IRBuilder<> IRB(CtorInsertBefore);
if (ClInitializers)
FindDynamicInitializers(M);
// The addresses of the first and last dynamically initialized globals in
// this TU. Used in initialization order checking.
Value *FirstDynamic = 0, *LastDynamic = 0;
for (size_t i = 0; i < n; i++) {
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
@ -549,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
uint64_t RightRedzoneSize = RedzoneSize +
(RedzoneSize - (SizeInBytes % RedzoneSize));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
// Determine whether this global should be poisoned in initialization.
bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@ -583,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
// Populate the first and last globals declared in this TU.
if (ClInitializers && GlobalHasDynamicInitializer) {
LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
if (FirstDynamic == 0)
FirstDynamic = LastDynamic;
}
DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
}
@ -592,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
// Create calls for poisoning before initializers run and unpoisoning after.
if (ClInitializers && FirstDynamic && LastDynamic)
createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
kAsanRegisterGlobalsName, IRB.getVoidTy(),
IntptrTy, IntptrTy, NULL));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
IRB.CreateCall2(AsanRegisterGlobals,

View File

@ -0,0 +1,36 @@
; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@xxx = global i32 0, align 4
; Clang will emit the following metadata identifying @xxx as dynamically
; initialized.
!0 = metadata !{i32* @xxx}
!llvm.asan.dynamically_initialized_globals = !{!0}
define i32 @initializer() uwtable {
entry:
ret i32 42
}
define internal void @__cxx_global_var_init() section ".text.startup" {
entry:
%call = call i32 @initializer()
store i32 %call, i32* @xxx, align 4
ret void
}
define internal void @_GLOBAL__I_a() address_safety section ".text.startup" {
entry:
call void @__cxx_global_var_init()
ret void
}
; Clang indicated that @xxx was dynamically initailized.
; __asan_{before,after}_dynamic_init should be called from _GLOBAL__I_a
; CHECK: define internal void @_GLOBAL__I_a
; CHECK-NOT: ret
; CHECK: call void @__asan_before_dynamic_init
; CHECK: call void @__cxx_global_var_init
; CHECK: call void @__asan_after_dynamic_init
; CHECK: ret