From f891336a25215a6d0720689ccce8bc31dbe3e9ac Mon Sep 17 00:00:00 2001 From: Anna Zaks Date: Fri, 27 Feb 2015 03:12:36 +0000 Subject: [PATCH] [asan] Skip promotable allocas to improve performance at -O0 Currently, the ASan executables built with -O0 are unnecessarily slow. The main reason is that ASan instrumentation pass inserts redundant checks around promotable allocas. These allocas do not get instrumented under -O1 because they get converted to virtual registered by mem2reg. With this patch, ASan instrumentation pass will only instrument non promotable allocas, giving us a speedup of 39% on a collection of benchmarks with -O0. (There is no measurable speedup at -O1.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230724 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Instrumentation/AddressSanitizer.cpp | 82 ++++++++++++------- .../AddressSanitizer/debug_info.ll | 4 +- .../do-not-instrument-promotable-allocas.ll | 21 +++++ .../instrument-dynamic-allocas.ll | 3 +- .../AddressSanitizer/lifetime-uar.ll | 4 +- .../AddressSanitizer/lifetime.ll | 5 ++ .../AddressSanitizer/stack_dynamic_alloca.ll | 4 + .../AddressSanitizer/stack_layout.ll | 18 ++++ 8 files changed, 105 insertions(+), 36 deletions(-) create mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 882aab0a70d..1a2cc55c60e 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -49,6 +49,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include #include @@ -165,6 +166,9 @@ static cl::opt ClMemoryAccessCallbackPrefix( cl::init("__asan_")); static cl::opt ClInstrumentAllocas("asan-instrument-allocas", cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false)); +static cl::opt ClSkipPromotableAllocas("asan-skip-promotable-allocas", + cl::desc("Do not instrument promotable allocas"), + cl::Hidden, cl::init(true)); // These flags allow to change the shadow mapping. // The shadow mapping looks like @@ -372,6 +376,17 @@ struct AddressSanitizer : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } + uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { + Type *Ty = AI->getAllocatedType(); + uint64_t SizeInBytes = DL->getTypeAllocSize(Ty); + return SizeInBytes; + } + /// Check if we want (and can) handle this alloca. + bool isInterestingAlloca(AllocaInst &AI) const; + /// If it is an interesting memory access, return the PointerOperand + /// and set IsWrite/Alignment. Otherwise return nullptr. + Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + unsigned *Alignment) const; void instrumentMop(Instruction *I, bool UseCalls); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, @@ -599,7 +614,7 @@ struct FunctionStackPoisoner : public InstVisitor { /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { - if (!isInterestingAlloca(AI)) return; + if (!ASan.isInterestingAlloca(AI)) return; StackAlignment = std::max(StackAlignment, AI.getAlignment()); if (isDynamicAlloca(AI)) @@ -653,19 +668,6 @@ struct FunctionStackPoisoner : public InstVisitor { bool isDynamicAlloca(AllocaInst &AI) const { return AI.isArrayAllocation() || !AI.isStaticAlloca(); } - - // Check if we want (and can) handle this alloca. - bool isInterestingAlloca(AllocaInst &AI) const { - return (AI.getAllocatedType()->isSized() && - // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0); - } - - uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { - Type *Ty = AI->getAllocatedType(); - uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty); - return SizeInBytes; - } /// Finds alloca where the value comes from. AllocaInst *findAllocaForValue(Value *V); void poisonRedZones(ArrayRef ShadowBytes, IRBuilder<> &IRB, @@ -775,38 +777,56 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { MI->eraseFromParent(); } -// If I is an interesting memory access, return the PointerOperand -// and set IsWrite/Alignment. Otherwise return nullptr. -static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - unsigned *Alignment) { +/// Check if we want (and can) handle this alloca. +bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { + return (AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); +} + +/// If I is an interesting memory access, return the PointerOperand +/// and set IsWrite/Alignment. Otherwise return nullptr. +Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, + bool *IsWrite, + unsigned *Alignment) const { // Skip memory accesses inserted by another instrumentation. if (I->getMetadata("nosanitize")) return nullptr; + + Value *PtrOperand = nullptr; if (LoadInst *LI = dyn_cast(I)) { if (!ClInstrumentReads) return nullptr; *IsWrite = false; *Alignment = LI->getAlignment(); - return LI->getPointerOperand(); - } - if (StoreInst *SI = dyn_cast(I)) { + PtrOperand = LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(I)) { if (!ClInstrumentWrites) return nullptr; *IsWrite = true; *Alignment = SI->getAlignment(); - return SI->getPointerOperand(); - } - if (AtomicRMWInst *RMW = dyn_cast(I)) { + PtrOperand = SI->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; *Alignment = 0; - return RMW->getPointerOperand(); - } - if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { + PtrOperand = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; *Alignment = 0; - return XCHG->getPointerOperand(); + PtrOperand = XCHG->getPointerOperand(); } - return nullptr; + + // Treat memory accesses to promotable allocas as non-interesting since they + // will not cause memory violations. This greatly speeds up the instrumented + // executable at -O0. + if (ClSkipPromotableAllocas) + if (auto AI = dyn_cast_or_null(PtrOperand)) + return isInterestingAlloca(*AI) ? AI : nullptr; + + return PtrOperand; } static bool isPointerOperand(Value *V) { @@ -1665,7 +1685,7 @@ void FunctionStackPoisoner::poisonStack() { SVD.reserve(AllocaVec.size()); for (AllocaInst *AI : AllocaVec) { ASanStackVariableDescription D = { AI->getName().data(), - getAllocaSizeInBytes(AI), + ASan.getAllocaSizeInBytes(AI), AI->getAlignment(), AI, 0}; SVD.push_back(D); } @@ -1856,7 +1876,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (AllocaInst *AI = dyn_cast(V)) // We're intested only in allocas we can handle. - return isInterestingAlloca(*AI) ? AI : nullptr; + return ASan.isInterestingAlloca(*AI) ? AI : nullptr; // See if we've already calculated (or started to calculate) alloca for a // given value. AllocaForValueMapTy::iterator I = AllocaForValue.find(V); diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll index c0939c5815d..9bfa8c1e06c 100644 --- a/test/Instrumentation/AddressSanitizer/debug_info.ll +++ b/test/Instrumentation/AddressSanitizer/debug_info.ll @@ -10,12 +10,12 @@ define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address { entry: %p.addr = alloca i32, align 4 %r = alloca i32, align 4 - store i32 %p, i32* %p.addr, align 4 + store volatile i32 %p, i32* %p.addr, align 4 call void @llvm.dbg.declare(metadata i32* %p.addr, metadata !10, metadata !{!"0x102"}), !dbg !11 call void @llvm.dbg.declare(metadata i32* %r, metadata !12, metadata !{!"0x102"}), !dbg !14 %0 = load i32* %p.addr, align 4, !dbg !14 %add = add nsw i32 %0, 1, !dbg !14 - store i32 %add, i32* %r, align 4, !dbg !14 + store volatile i32 %add, i32* %r, align 4, !dbg !14 %1 = load i32* %r, align 4, !dbg !15 ret i32 %1, !dbg !15 } diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll new file mode 100644 index 00000000000..be73248b466 --- /dev/null +++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s --check-prefix=CHECK + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +define i32 @test_promotable_allocas() sanitize_address { +entry: +; CHECK: %0 = alloca i32, align 4 +; CHECK: store i32 0, i32* %0, align 4 +; CHECK: %1 = load i32* %0, align 4 +; CHECK: ret i32 %1 + +; CHECK-NOT: __asan_stack_malloc_0 +; CHECK-NOT: icmp +; CHECK-NOT: call void @__asan_report_store4 + + %0 = alloca i32, align 4 + store i32 0, i32* %0, align 4 + %1 = load i32* %0, align 4 + ret i32 %1 +} diff --git a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll index 25807bb05a7..1234bc0e1ce 100644 --- a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll +++ b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll @@ -15,10 +15,11 @@ entry: ; CHECK-NOALLOCA-NOT: store i32 -875836469 %0 = alloca i32, align 4 %1 = alloca i8* - store i32 %len, i32* %0, align 4 + store volatile i32 %len, i32* %0, align 4 %2 = load i32* %0, align 4 %3 = zext i32 %2 to i64 %4 = alloca i8, i64 %3, align 32 + store volatile i8 0, i8* %4 ret void } diff --git a/test/Instrumentation/AddressSanitizer/lifetime-uar.ll b/test/Instrumentation/AddressSanitizer/lifetime-uar.ll index 25577de445b..efba8ce4e8c 100644 --- a/test/Instrumentation/AddressSanitizer/lifetime-uar.ll +++ b/test/Instrumentation/AddressSanitizer/lifetime-uar.ll @@ -17,8 +17,8 @@ entry: ; Memory is unpoisoned at llvm.lifetime.start ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 1) - store i32 0, i32* %retval - store i8 0, i8* %c, align 1 + store volatile i32 0, i32* %retval + store volatile i8 0, i8* %c, align 1 call void @llvm.lifetime.end(i64 1, i8* %c) ; Memory is poisoned at llvm.lifetime.end diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll index 175a07d51e6..ac324a98b98 100644 --- a/test/Instrumentation/AddressSanitizer/lifetime.ll +++ b/test/Instrumentation/AddressSanitizer/lifetime.ll @@ -12,6 +12,7 @@ entry: %i = alloca i32, align 4 %i.ptr = bitcast i32* %i to i8* call void @llvm.lifetime.start(i64 -1, i8* %i.ptr) + store volatile i8 0, i8* %i.ptr call void @llvm.lifetime.end(i64 -1, i8* %i.ptr) ; Check that lifetime with no size are ignored. @@ -30,6 +31,7 @@ define void @lifetime() sanitize_address { %i = alloca i32, align 4 %i.ptr = bitcast i32* %i to i8* call void @llvm.lifetime.start(i64 3, i8* %i.ptr) + store volatile i8 0, i8* %i.ptr ; Memory is unpoisoned at llvm.lifetime.start ; CHECK: %[[VAR:[^ ]*]] = ptrtoint i32* %{{[^ ]+}} to i64 ; CHECK-NEXT: call void @__asan_unpoison_stack_memory(i64 %[[VAR]], i64 3) @@ -43,12 +45,14 @@ define void @lifetime() sanitize_address { %arr = alloca [10 x i32], align 16 %arr.ptr = bitcast [10 x i32]* %arr to i8* call void @llvm.lifetime.start(i64 40, i8* %arr.ptr) + store volatile i8 0, i8* %arr.ptr ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 40) call void @llvm.lifetime.end(i64 40, i8* %arr.ptr) ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 40) ; One more lifetime start/end for the same variable %i. call void @llvm.lifetime.start(i64 4, i8* %i.ptr) + store volatile i8 0, i8* %i.ptr ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 4) call void @llvm.lifetime.end(i64 4, i8* %i.ptr) ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 4) @@ -68,6 +72,7 @@ entry: %i = alloca i64, align 4 %i.ptr = bitcast i64* %i to i8* call void @llvm.lifetime.start(i64 8, i8* %i.ptr) + store volatile i8 0, i8* %i.ptr ; CHECK: __asan_unpoison_stack_memory br i1 %x, label %bb0, label %bb1 diff --git a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll index 43711b7a1f2..a738f72843c 100644 --- a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll +++ b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll @@ -26,6 +26,8 @@ entry: ; CHECK: ret void %XXX = alloca [20 x i8], align 1 + %arr.ptr = bitcast [20 x i8]* %XXX to i8* + store volatile i8 0, i8* %arr.ptr ret void } @@ -37,6 +39,8 @@ entry: ; CHECK: ret void %XXX = alloca [20 x i8], align 1 + %arr.ptr = bitcast [20 x i8]* %XXX to i8* + store volatile i8 0, i8* %arr.ptr call void asm sideeffect "mov %%rbx, %%rcx", "~{dirflag},~{fpsr},~{flags}"() nounwind ret void } diff --git a/test/Instrumentation/AddressSanitizer/stack_layout.ll b/test/Instrumentation/AddressSanitizer/stack_layout.ll index 97e3bbb5872..6575dd62f87 100644 --- a/test/Instrumentation/AddressSanitizer/stack_layout.ll +++ b/test/Instrumentation/AddressSanitizer/stack_layout.ll @@ -26,6 +26,12 @@ entry: %XXX = alloca [10 x i8], align 1 %YYY = alloca [20 x i8], align 1 %ZZZ = alloca [30 x i8], align 1 + %arr1.ptr = bitcast [10 x i8]* %XXX to i8* + store volatile i8 0, i8* %arr1.ptr + %arr2.ptr = bitcast [20 x i8]* %YYY to i8* + store volatile i8 0, i8* %arr2.ptr + %arr3.ptr = bitcast [30 x i8]* %ZZZ to i8* + store volatile i8 0, i8* %arr3.ptr ret void } @@ -41,6 +47,12 @@ entry: %AAA = alloca [5 x i8], align 1 %BBB = alloca [55 x i8], align 1 %CCC = alloca [555 x i8], align 1 + %arr1.ptr = bitcast [5 x i8]* %AAA to i8* + store volatile i8 0, i8* %arr1.ptr + %arr2.ptr = bitcast [55 x i8]* %BBB to i8* + store volatile i8 0, i8* %arr2.ptr + %arr3.ptr = bitcast [555 x i8]* %CCC to i8* + store volatile i8 0, i8* %arr3.ptr ret void } @@ -57,5 +69,11 @@ entry: %AAA = alloca [128 x i8], align 16 %BBB = alloca [128 x i8], align 64 %CCC = alloca [128 x i8], align 256 + %arr1.ptr = bitcast [128 x i8]* %AAA to i8* + store volatile i8 0, i8* %arr1.ptr + %arr2.ptr = bitcast [128 x i8]* %BBB to i8* + store volatile i8 0, i8* %arr2.ptr + %arr3.ptr = bitcast [128 x i8]* %CCC to i8* + store volatile i8 0, i8* %arr3.ptr ret void }