From c0d18b669674d3b173e6a3eca6ada98871bb808f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 7 Apr 2012 19:22:18 +0000 Subject: [PATCH] Fix ValueTracking to conclude that debug intrinsics are safe to speculate. Without this, loop rotate (among many other places) would suddenly stop working in the presence of debug info. I found this looking at loop rotate, and have augmented its tests with a reduction out of a very hot loop in yacr2 where failing to do this rotation costs sometimes more than 10% in runtime performance, perturbing numerous downstream optimizations. This should have no impact on performance without debug info, but the change in performance when debug info is enabled can be extreme. As a consequence (and this how I got to this yak) any profiling of performance problems should be treated with deep suspicion -- they may have been wildly innacurate of debug info was enabled for profiling. =/ Just a heads up. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154263 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 8 +++++ test/Transforms/LoopRotate/dbgvalue.ll | 48 +++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c6b53a927db..a430f6281ef 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1854,6 +1854,14 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { switch (II->getIntrinsicID()) { + // These synthetic intrinsics have no side-effects, and just mark + // information about their operands. + // FIXME: There are other no-op synthetic instructions that potentially + // should be considered at least *safe* to speculate... + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + return true; + case Intrinsic::bswap: case Intrinsic::ctlz: case Intrinsic::ctpop: diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll index 92871780a4d..b32ee82d3a5 100644 --- a/test/Transforms/LoopRotate/dbgvalue.ll +++ b/test/Transforms/LoopRotate/dbgvalue.ll @@ -1,11 +1,13 @@ ; RUN: opt -S -loop-rotate %s | FileCheck %s +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp { +; CHECK: define i32 @tak ; CHECK: entry ; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x} -declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone - -define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp { entry: br label %tailrecurse @@ -35,7 +37,45 @@ return: ; preds = %if.end ret i32 %z.tr, !dbg !17 } -declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone +@channelColumns = external global i64 +@horzPlane = external global i8*, align 8 + +define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) { +; Ensure that the loop increment basic block is rotated into the tail of the +; body, even though it contains a debug intrinsic call. +; CHECK: define void @FindFreeHorzSeg +; CHECK: %dec = add +; CHECK-NEXT: tail call void @llvm.dbg.value +; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end + +entry: + br label %for.cond + +for.cond: + %i.0 = phi i64 [ %startCol, %entry ], [ %dec, %for.inc ] + %cmp = icmp eq i64 %i.0, 0 + br i1 %cmp, label %for.end, label %for.body + +for.body: + %0 = load i64* @channelColumns, align 8 + %mul = mul i64 %0, %row + %add = add i64 %mul, %i.0 + %1 = load i8** @horzPlane, align 8 + %arrayidx = getelementptr inbounds i8* %1, i64 %add + %2 = load i8* %arrayidx, align 1 + %tobool = icmp eq i8 %2, 0 + br i1 %tobool, label %for.inc, label %for.end + +for.inc: + %dec = add i64 %i.0, -1 + tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata undef) + br label %for.cond + +for.end: + %add1 = add i64 %i.0, 1 + store i64 %add1, i64* %rowStart, align 8 + ret void +} !llvm.dbg.sp = !{!0}