AsmPrinter: Create a unified .debug_loc stream

This commit removes `DebugLocList` and replaces it with
`DebugLocStream`.

  - `DebugLocEntry` no longer contains its byte/comment streams.
  - The `DebugLocEntry` list for a variable/inlined-at pair is allocated
    on the stack, and released right after `DebugLocEntry::finalize()`
    (possible because of the refactoring in r231023).  Now, only one
    list is in memory at a time now.
  - There's a single unified stream for the `.debug_loc` section that
    persists, stored in the new `DebugLocStream` data structure.

The last point is important: this collapses the nested `SmallVector<>`s
from `DebugLocList` into unified streams.  We previously had something
like the following:

    vec<tuple<Label, CU,
              vec<tuple<BeginSym, EndSym,
                        vec<Value>,
                        vec<char>,
                        vec<string>>>>>

A `SmallVector` can avoid allocations, but is statically fairly large
for a vector: three pointers plus the size of the small storage, which
is the number of elements in small mode times the element size).
Nesting these is expensive, since an inner vector's size contributes to
the element size of an outer one.  (Nesting any vector is expensive...)

In the old data structure, the outer vector's *element* size was 632B,
excluding allocation costs for when the middle and inner vectors
exceeded their small sizes.  312B of this was for the "three" pointers
in the vector-tree beneath it.  If you assume 1M functions with an
average of 10 variable/inlined-at pairs each (in an LTO scenario),
that's almost 6GB (besides inner allocations), with almost 3GB for the
"three" pointers.

This came up in a heap profile a little while ago of a `clang -flto -g`
bootstrap, with `DwarfDebug::collectVariableInfo()` using something like
10-15% of the total memory.

With this commit, we have:

    tuple<vec<tuple<Label, CU, Offset>>,
          vec<tuple<BeginSym, EndSym, Offset, Offset>>,
          vec<char>,
          vec<string>>

The offsets are used to create `ArrayRef` slices of adjacent
`SmallVector`s.  This reduces the number of vectors to four (unrelated
to the number of variable/inlined-at pairs), and caps the number of
allocations at the same number.

Besides saving memory and limiting allocations, this is NFC.

I don't know my way around this code very well yet, but I wonder if we
could go further: why stream to a side-table, instead of directly to the
output stream?

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235229 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Duncan P. N. Exon Smith
2015-04-17 21:34:47 +00:00
parent 12a57fe1d3
commit 1186e7ae9e
8 changed files with 182 additions and 86 deletions

View File

@ -14,6 +14,7 @@
#include "DwarfDebug.h"
#include "ByteStreamer.h"
#include "DIEHash.h"
#include "DebugLocEntry.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "DwarfUnit.h"
@ -910,15 +911,12 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
continue;
// Handle multiple DBG_VALUE instructions describing one variable.
RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
DebugLocList &LocList = DotDebugLocEntries.back();
LocList.CU = &TheCU;
LocList.Label = Asm->createTempSymbol("debug_loc");
RegVar->setDebugLocListIndex(
DebugLocs.startList(&TheCU, Asm->createTempSymbol("debug_loc")));
// Build the location list for this variable.
buildLocationList(LocList.List, Ranges);
SmallVector<DebugLocEntry, 8> Entries;
buildLocationList(Entries, Ranges);
// If the variable has an MDBasicType, extract it. Basic types cannot have
// unique identifiers, so don't bother resolving the type with the
@ -927,8 +925,8 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
static_cast<const Metadata *>(IV.first->getType()));
// Finalize the entry by lowering it into a DWARF bytestream.
for (auto &Entry : LocList.List)
Entry.finalize(*Asm, BT);
for (auto &Entry : Entries)
Entry.finalize(*Asm, DebugLocs, BT);
}
// Collect info for variables that were optimized out.
@ -1465,12 +1463,12 @@ void DwarfDebug::emitDebugStr() {
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
}
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
auto Comment = Entry.getComments().begin();
auto End = Entry.getComments().end();
for (uint8_t Byte : Entry.getDWARFBytes())
const DebugLocStream::Entry &Entry) {
auto &&Comments = DebugLocs.getComments(Entry);
auto Comment = Comments.begin();
auto End = Comments.end();
for (uint8_t Byte : DebugLocs.getBytes(Entry))
Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
}
@ -1510,9 +1508,11 @@ static void emitDebugLocValue(const AsmPrinter &AP, const MDBasicType *BT,
// FIXME: ^
}
void DebugLocEntry::finalize(const AsmPrinter &AP, const MDBasicType *BT) {
BufferByteStreamer Streamer(DWARFBytes, Comments);
const DebugLocEntry::Value Value = Values[0];
void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream &Locs,
const MDBasicType *BT) {
Locs.startEntry(Begin, End);
BufferByteStreamer Streamer = Locs.getStreamer();
const DebugLocEntry::Value &Value = Values[0];
if (Value.isBitPiece()) {
// Emit all pieces that belong to the same variable and range.
assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
@ -1545,8 +1545,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, const MDBasicType *BT) {
}
}
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) {
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
Asm->OutStreamer.AddComment("Loc expr size");
MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
@ -1565,19 +1564,19 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
unsigned char Size = Asm->getDataLayout().getPointerSize();
for (const auto &DebugLoc : DotDebugLocEntries) {
Asm->OutStreamer.EmitLabel(DebugLoc.Label);
const DwarfCompileUnit *CU = DebugLoc.CU;
for (const auto &Entry : DebugLoc.List) {
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer.EmitLabel(List.Label);
const DwarfCompileUnit *CU = List.CU;
for (const auto &Entry : DebugLocs.getEntries(List)) {
// Set up the range. This range is relative to the entry point of the
// compile unit. This is a hard coded 0 for low_pc when we're emitting
// ranges, or the DW_AT_low_pc on the compile unit otherwise.
if (auto *Base = CU->getBaseAddress()) {
Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size);
Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size);
Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
} else {
Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size);
Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size);
Asm->OutStreamer.EmitSymbolValue(Entry.BeginSym, Size);
Asm->OutStreamer.EmitSymbolValue(Entry.EndSym, Size);
}
emitDebugLocEntryLocation(Entry);
@ -1590,17 +1589,17 @@ void DwarfDebug::emitDebugLoc() {
void DwarfDebug::emitDebugLocDWO() {
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
for (const auto &DebugLoc : DotDebugLocEntries) {
Asm->OutStreamer.EmitLabel(DebugLoc.Label);
for (const auto &Entry : DebugLoc.List) {
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer.EmitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
// Just always use start_length for now - at least that's one address
// rather than two. We could get fancier and try to, say, reuse an
// address we know we've emitted elsewhere (the start of the function?
// The start of the CU or CU subrange that encloses this range?)
Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
unsigned idx = AddrPool.getIndex(Entry.getBeginSym());
unsigned idx = AddrPool.getIndex(Entry.BeginSym);
Asm->EmitULEB128(idx);
Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4);
Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
emitDebugLocEntryLocation(Entry);
}