diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6dbcde03cf2..290efe2ec00 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" @@ -79,7 +80,7 @@ static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *const kAsanInitName = "__asan_init_v3"; +static const char *const kAsanInitName = "__asan_init_v4"; static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const kAsanCovName = "__sanitizer_cov"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; @@ -215,28 +216,86 @@ STATISTIC(NumOptimizedAccessesToGlobalVar, "Number of optimized accesses to global vars"); namespace { -/// A set of dynamically initialized globals extracted from metadata. -class SetOfDynamicallyInitializedGlobals { +/// Frontend-provided metadata for global variables. +class GlobalsMetadata { public: - void Init(Module& M) { - // Clang generates metadata identifying all dynamically initialized globals. - NamedMDNode *DynamicGlobals = - M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); - if (!DynamicGlobals) + void init(Module& M) { + assert(!inited_); + inited_ = true; + NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); + if (!Globals) return; - for (const auto MDN : DynamicGlobals->operands()) { - assert(MDN->getNumOperands() == 1); - Value *VG = MDN->getOperand(0); - // The optimizer may optimize away a global entirely, in which case we - // cannot instrument access to it. - if (!VG) + for (auto MDN : Globals->operands()) { + // Format of the metadata node for the global: + // { + // global, + // source_location, + // i1 is_dynamically_initialized, + // i1 is_blacklisted + // } + assert(MDN->getNumOperands() == 4); + Value *V = MDN->getOperand(0); + // The optimizer may optimize away a global entirely. + if (!V) continue; - DynInitGlobals.insert(cast(VG)); + GlobalVariable *GV = cast(V); + if (Value *Loc = MDN->getOperand(1)) { + GlobalVariable *GVLoc = cast(Loc); + // We may already know the source location for GV, if it was merged + // with another global. + if (SourceLocation.insert(std::make_pair(GV, GVLoc)).second) + addSourceLocationGlobal(GVLoc); + } + ConstantInt *IsDynInit = cast(MDN->getOperand(2)); + if (IsDynInit->isOne()) + DynInitGlobals.insert(GV); + ConstantInt *IsBlacklisted = cast(MDN->getOperand(3)); + if (IsBlacklisted->isOne()) + BlacklistedGlobals.insert(GV); } } - bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; } + + GlobalVariable *getSourceLocation(GlobalVariable *G) const { + auto Pos = SourceLocation.find(G); + return (Pos != SourceLocation.end()) ? Pos->second : nullptr; + } + + /// Check if the global is dynamically initialized. + bool isDynInit(GlobalVariable *G) const { + return DynInitGlobals.count(G); + } + + /// Check if the global was blacklisted. + bool isBlacklisted(GlobalVariable *G) const { + return BlacklistedGlobals.count(G); + } + + /// Check if the global was generated to describe source location of another + /// global (we don't want to instrument them). + bool isSourceLocationGlobal(GlobalVariable *G) const { + return LocationGlobals.count(G); + } + private: - SmallSet DynInitGlobals; + bool inited_ = false; + DenseMap SourceLocation; + DenseSet DynInitGlobals; + DenseSet BlacklistedGlobals; + DenseSet LocationGlobals; + + void addSourceLocationGlobal(GlobalVariable *SourceLocGV) { + // Source location global is a struct with layout: + // { + // filename, + // i32 line_number, + // i32 column_number, + // } + LocationGlobals.insert(SourceLocGV); + ConstantStruct *Contents = + cast(SourceLocGV->getInitializer()); + GlobalVariable *FilenameGV = cast(Contents->getOperand(0)); + LocationGlobals.insert(FilenameGV); + } }; /// This struct defines the shadow mapping using the rule: @@ -351,7 +410,7 @@ struct AddressSanitizer : public FunctionPass { *AsanMemoryAccessCallbackSized[2]; Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; - SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; + GlobalsMetadata GlobalsMD; friend struct FunctionStackPoisoner; }; @@ -381,7 +440,7 @@ class AddressSanitizerModule : public ModulePass { SmallString<64> BlacklistFile; std::unique_ptr BL; - SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; + GlobalsMetadata GlobalsMD; Type *IntptrTy; LLVMContext *C; const DataLayout *DL; @@ -659,7 +718,7 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { // If a global variable does not have dynamic initialization we don't // have to instrument it. However, if a global does not have initializer // at all, we assume it has dynamic initializer (in other TU). - return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G); + return G->hasInitializer() && !GlobalsMD.isDynInit(G); } void @@ -866,7 +925,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); + // FIXME: Don't use the blacklist here, all the data should be collected + // by the frontend and passed in globals metadata. if (BL->isIn(*G)) return false; + if (GlobalsMD.isBlacklisted(G)) return false; + if (GlobalsMD.isSourceLocationGlobal(G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; if (GlobalWasGeneratedByAsan(G)) return false; // Our own global. @@ -967,7 +1030,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { - DynamicallyInitializedGlobals.Init(M); + GlobalsMD.init(M); SmallVector GlobalsToChange; @@ -986,10 +1049,11 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // const char *name; // const char *module_name; // size_t has_dynamic_init; + // void *source_location; // We initialize an array of such structures and pass it to a run-time call. - StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, - IntptrTy, IntptrTy, - IntptrTy, IntptrTy, NULL); + StructType *GlobalStructTy = + StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, + IntptrTy, IntptrTy, NULL); SmallVector Initializers(n); bool HasDynamicallyInitializedGlobals = false; @@ -1017,9 +1081,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); - // Determine whether this global should be poisoned in initialization. - bool GlobalHasDynamicInitializer = - DynamicallyInitializedGlobals.Contains(G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -1048,17 +1109,20 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { NewGlobal->takeName(G); G->eraseFromParent(); + bool GlobalHasDynamicInitializer = GlobalsMD.isDynInit(G); + GlobalVariable *SourceLoc = GlobalsMD.getSourceLocation(G); + Initializers[i] = ConstantStruct::get( - GlobalStructTy, - ConstantExpr::getPointerCast(NewGlobal, IntptrTy), + GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy), ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), + SourceLoc ? ConstantExpr::getPointerCast(SourceLoc, IntptrTy) + : ConstantInt::get(IntptrTy, 0), NULL); - // Populate the first and last globals declared in this TU. if (ClInitializers && GlobalHasDynamicInitializer) HasDynamicallyInitializedGlobals = true; @@ -1186,7 +1250,7 @@ bool AddressSanitizer::doInitialization(Module &M) { report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); - DynamicallyInitializedGlobals.Init(M); + GlobalsMD.init(M); C = &(M.getContext()); LongSize = DL->getPointerSizeInBits(); diff --git a/test/Instrumentation/AddressSanitizer/global_metadata.ll b/test/Instrumentation/AddressSanitizer/global_metadata.ll new file mode 100644 index 00000000000..9641c3ed4d7 --- /dev/null +++ b/test/Instrumentation/AddressSanitizer/global_metadata.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -asan -asan-module -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Globals: +@global = global i32 0, align 4 +@dyn_init_global = global i32 0, align 4 +@blacklisted_global = global i32 0, align 4 +@_ZZ4funcvE10static_var = internal global i32 0, align 4 +@.str = private unnamed_addr constant [14 x i8] c"Hello, world!\00", align 1 +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_asan_globals.cpp, i8* null }] + +; Sanitizer location descriptors: +@.str1 = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1 +@.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 } +@.asan_loc_descr1 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 7, i32 5 } +@.asan_loc_descr2 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 12, i32 14 } +@.asan_loc_descr4 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 14, i32 25 } + +; Check that globals were instrumented, but sanitizer location descriptors weren't: +; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, align 32 +; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, align 32 +; CHECK: @.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 } + +; Check that location decriptors were passed into __asan_register_globals: +; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* @.asan_loc_descr to i64) + +; Function Attrs: nounwind sanitize_address +define internal void @__cxx_global_var_init() #0 section ".text.startup" { +entry: + %0 = load i32* @global, align 4 + store i32 %0, i32* @dyn_init_global, align 4 + ret void +} + +; Function Attrs: nounwind sanitize_address +define void @_Z4funcv() #1 { +entry: + %literal = alloca i8*, align 8 + store i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0), i8** %literal, align 8 + ret void +} + +; Function Attrs: nounwind sanitize_address +define internal void @_GLOBAL__sub_I_asan_globals.cpp() #0 section ".text.startup" { +entry: + call void @__cxx_global_var_init() + ret void +} + +attributes #0 = { nounwind sanitize_address } +attributes #1 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.asan.globals = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = metadata !{i32* @global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr, i1 false, i1 false} +!1 = metadata !{i32* @dyn_init_global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr1, i1 true, i1 false} +!2 = metadata !{i32* @blacklisted_global, null, i1 false, i1 true} +!3 = metadata !{i32* @_ZZ4funcvE10static_var, { [22 x i8]*, i32, i32 }* @.asan_loc_descr2, i1 false, i1 false} +!4 = metadata !{[14 x i8]* @.str, { [22 x i8]*, i32, i32 }* @.asan_loc_descr4, i1 false, i1 false} +!5 = metadata !{metadata !"clang version 3.5.0 (211282)"} diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll index 7945e816ca3..816ab291566 100644 --- a/test/Instrumentation/AddressSanitizer/instrument_global.ll +++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll @@ -68,8 +68,8 @@ entry: } -!llvm.asan.dynamically_initialized_globals = !{!0} -!0 = metadata !{[10 x i32]* @GlobDy} +!llvm.asan.globals = !{!0} +!0 = metadata !{[10 x i32]* @GlobDy, null, i1 true, i1 false} ; CHECK-LABEL: define internal void @asan.module_ctor ; CHECK-NOT: ret diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll index 05e18b5a01b..83ff53f6f51 100644 --- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll +++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll @@ -7,9 +7,11 @@ target triple = "x86_64-unknown-linux-gnu" @YYY = global i32 0, align 4 ; W/o dynamic initializer. ; Clang will emit the following metadata identifying @xxx as dynamically ; initialized. -!0 = metadata !{i32* @xxx} -!1 = metadata !{i32* @XXX} -!llvm.asan.dynamically_initialized_globals = !{!0, !1} +!0 = metadata !{i32* @xxx, null, i1 true, i1 false} +!1 = metadata !{i32* @XXX, null, i1 true, i1 false} +!2 = metadata !{i32* @yyy, null, i1 false, i1 false} +!3 = metadata !{i32* @YYY, null, i1 false, i1 false} +!llvm.asan.globals = !{!0, !1, !2, !3} define i32 @initializer() uwtable { entry: