LoopVectorize: Preserve debug location info

radar://14169017

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185122 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer 2013-06-28 00:38:54 +00:00
parent 1bde770ccd
commit 57a7da8b23
2 changed files with 166 additions and 1 deletions

View File

@ -326,6 +326,49 @@ private:
EdgeMaskCache MaskCache;
};
/// \brief Set/reset the debug location in the IR builder using the RAII idiom.
class DebugLocSetter {
IRBuilder<> &Builder;
DebugLoc OldDL;
DebugLocSetter(const DebugLocSetter&);
DebugLocSetter &operator=(const DebugLocSetter&);
public:
/// \brief Set the debug location in the IRBuilder 'B' using the instruction
/// 'Inst'.
DebugLocSetter(IRBuilder<> &B, Instruction *Inst) : Builder(B) {
OldDL = Builder.getCurrentDebugLocation();
// Handle null instructions gracefully. This is so we can use a dyn_cast on
// values without nowing it is an instruction.
if (Inst)
Builder.SetCurrentDebugLocation(Inst->getDebugLoc());
}
~DebugLocSetter() {
Builder.SetCurrentDebugLocation(OldDL);
}
};
/// \brief Look for a meaningful debug location on the instruction or it's
/// operands.
static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
if (!I)
return I;
DebugLoc Empty;
if (I->getDebugLoc() != Empty)
return I;
for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) {
if (Instruction *OpInst = dyn_cast<Instruction>(*OI))
if (OpInst->getDebugLoc() != Empty)
return OpInst;
}
return I;
}
/// \brief Check if conditionally executed loads are hoistable.
///
/// This class has two functions: isHoistableLoad and canHoistAllLoads.
@ -1195,6 +1238,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// Handle consecutive loads/stores.
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
DebugLocSetter SetDL(Builder, Gep);
Value *PtrOperand = Gep->getPointerOperand();
Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
@ -1205,6 +1249,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Gep2->setName("gep.indvar.base");
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
DebugLocSetter SetDL(Builder, Gep);
assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
OrigLoop) && "Base ptr must be invariant");
@ -1237,6 +1282,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
} else {
// Use the induction element ptr.
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
DebugLocSetter SetDL(Builder, cast<Instruction>(Ptr));
VectorParts &PtrVal = getVectorValue(Ptr);
Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
}
@ -1245,6 +1291,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
if (SI) {
assert(!Legal->isUniform(SI->getPointerOperand()) &&
"We do not allow storing to uniform addresses");
DebugLocSetter SetDL(Builder, SI);
// We don't want to update the value in the map as it might be used in
// another expression. So don't use a reference type for "StoredVal".
VectorParts StoredVal = getVectorValue(SI->getValueOperand());
@ -1269,6 +1316,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
return;
}
// Handle loads.
assert(LI && "Must have a load instruction");
DebugLocSetter SetDL(Builder, LI);
for (unsigned Part = 0; Part < UF; ++Part) {
// Calculate the pointer for the specific unroll-part.
Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
@ -1292,6 +1342,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
DebugLocSetter SetDL(Builder, Instr);
// Find all of the vectorized parameters.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *SrcOp = Instr->getOperand(op);
@ -1519,6 +1571,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
// Generate the induction variable.
DebugLocSetter SetDL(Builder, getDebugLocFromInstOrOperands(OldInduction));
Induction = Builder.CreatePHI(IdxTy, 2, "index");
// The loop step is equal to the vectorization factor (num of SIMD elements)
// times the unroll factor (num of SIMD instructions).
@ -1527,6 +1580,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// This is the IR builder that we use to add all of the logic for bypassing
// the new vector loop.
IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
DebugLocSetter SetDLByPass(BypassBuilder,
getDebugLocFromInstOrOperands(OldInduction));
// We may need to extend the index in case there is a type mismatch.
// We know that the count starts at zero and does not overflow.
@ -2066,6 +2121,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
for (unsigned part = 0; part < UF; ++part) {
// This PHINode contains the vectorized reduction variable, or
// the initial value vector, if we bypass the vector loop.
DebugLocSetter SetDL(Builder, RdxDesc.LoopExitInstr);
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
@ -2079,6 +2136,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *ReducedPartRdx = RdxParts[0];
unsigned Op = getReductionBinOp(RdxDesc.Kind);
for (unsigned part = 1; part < UF; ++part) {
DebugLocSetter SetDL(Builder, dyn_cast<Instruction>(RdxParts[part]));
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
RdxParts[part], ReducedPartRdx,
@ -2096,6 +2155,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *TmpVec = ReducedPartRdx;
SmallVector<Constant*, 32> ShuffleMask(VF, 0);
for (unsigned i = VF; i != 1; i >>= 1) {
DebugLocSetter SetDL(Builder, dyn_cast<Instruction>(ReducedPartRdx));
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i/2; ++j)
ShuffleMask[j] = Builder.getInt32(i/2 + j);
@ -2118,7 +2178,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
}
// The result is in the first element of the vector.
Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
Value *Scalar0;
{
DebugLocSetter SetDL(Builder, dyn_cast<Instruction>(ReducedPartRdx));
Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
@ -2253,6 +2317,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// Check for PHI nodes that are lowered to vector selects.
if (P->getParent() != OrigLoop->getHeader()) {
DebugLocSetter SetDL(Builder, P);
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
@ -2295,6 +2360,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(P);
DebugLocSetter SetDL(Builder, P);
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
@ -2402,6 +2469,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::Xor: {
// Just widen binops.
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
DebugLocSetter SetDL(Builder, BinOp);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
@ -2428,6 +2496,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// instruction with a scalar condition. Otherwise, use vector-select.
bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
OrigLoop);
DebugLocSetter SetDL(Builder, it);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
@ -2452,6 +2521,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// Widen compares. Generate vector compares.
bool FCmp = (it->getOpcode() == Instruction::FCmp);
CmpInst *Cmp = dyn_cast<CmpInst>(it);
DebugLocSetter SetDL(Builder, it);
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
@ -2482,6 +2552,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::FPTrunc:
case Instruction::BitCast: {
CastInst *CI = dyn_cast<CastInst>(it);
DebugLocSetter SetDL(Builder, it);
/// Optimize the special case where the source is the induction
/// variable. Notice that we can only optimize the 'trunc' case
/// because: a. FP conversions lose precision, b. sext/zext may wrap,
@ -2509,6 +2580,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
if (isa<DbgInfoIntrinsic>(it))
break;
DebugLocSetter SetDL(Builder, it);
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);

View File

@ -0,0 +1,92 @@
; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Make sure we are preserving debug info in the vectorized code.
; CHECK: for.body.lr.ph
; CHECK: cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !21
; CHECK: vector.body
; CHECK: index {{.*}}, !dbg !21
; CHECK: getelementptr inbounds i32* %a, {{.*}}, !dbg !22
; CHECK: load <2 x i32>* {{.*}}, !dbg !22
; CHECK: add <2 x i32> {{.*}}, !dbg !22
; CHECK: add i64 %index, 2, !dbg !21
; CHECK: icmp eq i64 %index.next, %end.idx.rnd.down, !dbg !21
; CHECK: middle.block
; CHECK: add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg !22
; CHECK: extractelement <2 x i32> %bin.rdx, i32 0, !dbg !22
define i32 @f(i32* nocapture %a, i32 %size) #0 {
entry:
tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19
tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19
tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20
tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21
%cmp4 = icmp eq i32 %size, 0, !dbg !21
br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21
for.body.lr.ph: ; preds = %entry
br label %for.body, !dbg !21
for.body: ; preds = %for.body.lr.ph, %for.body
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%sum.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
%0 = load i32* %arrayidx, align 4, !dbg !22, !tbaa !23
%add = add i32 %0, %sum.05, !dbg !22
tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
%indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21
%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
%exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21
br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
for.cond.for.end_crit_edge: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
br label %for.end, !dbg !21
for.end: ; preds = %entry, %for.cond.for.end_crit_edge
%sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
ret i32 %sum.0.lcssa, !dbg !26
}
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata) #1
; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata) #1
attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18}
!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
!1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
!2 = metadata !{i32 0}
!3 = metadata !{metadata !4}
!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
!5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
!6 = metadata !{i32 786473, metadata !5} ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
!8 = metadata !{metadata !9, metadata !10, metadata !11}
!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16}
!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3]
!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4]
!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
!19 = metadata !{i32 3, i32 0, metadata !4, null}
!20 = metadata !{i32 4, i32 0, metadata !4, null}
!21 = metadata !{i32 5, i32 0, metadata !17, null}
!22 = metadata !{i32 6, i32 0, metadata !17, null}
!23 = metadata !{metadata !"int", metadata !24}
!24 = metadata !{metadata !"omnipotent char", metadata !25}
!25 = metadata !{metadata !"Simple C/C++ TBAA"}
!26 = metadata !{i32 7, i32 0, metadata !4, null}