[DWARF parser] Fix broken address ranges construction.

Previous algorithm for constructing [Address ranges]->[Compile Units]
mapping was wrong. It somewhat relied on the assumption that address ranges
for different compile units may not overlap. It is not so.
For example, two compile units may contain the definition of the same
linkonce_odr function. These definitions will be merged at link-time,
resulting in equivalent .debug_ranges entries for both these units

Instead of sorting and merging original address ranges (from .debug_ranges
and .debug_aranges), implement a different approach: save endpoints
of all ranges, and then use a sweep-line approach to construct
the desired mapping. If we find that certain address maps to
several compilation units, we just pick any of them.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210860 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Alexey Samsonov
2014-06-12 23:58:49 +00:00
parent 9a81e28056
commit 740a75968a
5 changed files with 87 additions and 61 deletions

View File

@ -15,6 +15,7 @@
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <set>
using namespace llvm; using namespace llvm;
void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
@ -30,6 +31,7 @@ void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
uint64_t HighPC = Desc.getEndAddress(); uint64_t HighPC = Desc.getEndAddress();
appendRange(CUOffset, LowPC, HighPC); appendRange(CUOffset, LowPC, HighPC);
} }
ParsedCUOffsets.insert(CUOffset);
} }
} }
@ -56,69 +58,55 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
} }
} }
sortAndMinimize(); construct();
} }
void DWARFDebugAranges::clear() { void DWARFDebugAranges::clear() {
Endpoints.clear();
Aranges.clear(); Aranges.clear();
ParsedCUOffsets.clear(); ParsedCUOffsets.clear();
} }
void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
uint64_t HighPC) { uint64_t HighPC) {
if (!Aranges.empty()) { if (LowPC >= HighPC)
if (Aranges.back().CUOffset == CUOffset && return;
Aranges.back().HighPC() == LowPC) { Endpoints.emplace_back(LowPC, CUOffset, true);
Aranges.back().setHighPC(HighPC); Endpoints.emplace_back(HighPC, CUOffset, false);
return;
}
}
Aranges.push_back(Range(LowPC, HighPC, CUOffset));
} }
void DWARFDebugAranges::sortAndMinimize() { void DWARFDebugAranges::construct() {
const size_t orig_arange_size = Aranges.size(); std::multiset<uint32_t> ValidCUs; // Maintain the set of CUs describing
// Size of one? If so, no sorting is needed // a current address range.
if (orig_arange_size <= 1) std::sort(Endpoints.begin(), Endpoints.end());
return; uint64_t PrevAddress = -1ULL;
// Sort our address range entries for (const auto &E : Endpoints) {
std::stable_sort(Aranges.begin(), Aranges.end()); if (PrevAddress < E.Address && ValidCUs.size() > 0) {
// If the address range between two endpoints is described by some
// Most address ranges are contiguous from function to function // CU, first try to extend the last range in Aranges. If we can't
// so our new ranges will likely be smaller. We calculate the size // do it, start a new range.
// of the new ranges since although std::vector objects can be resized, if (!Aranges.empty() && Aranges.back().HighPC() == PrevAddress &&
// the will never reduce their allocated block size and free any excesss ValidCUs.find(Aranges.back().CUOffset) != ValidCUs.end()) {
// memory, so we might as well start a brand new collection so it is as Aranges.back().setHighPC(E.Address);
// small as possible. } else {
Aranges.emplace_back(PrevAddress, E.Address, *ValidCUs.begin());
// First calculate the size of the new minimal arange vector }
// so we don't have to do a bunch of re-allocations as we
// copy the new minimal stuff over to the new collection.
size_t minimal_size = 1;
for (size_t i = 1; i < orig_arange_size; ++i) {
if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i]))
++minimal_size;
}
// Else, make a new RangeColl that _only_ contains what we need.
RangeColl minimal_aranges;
minimal_aranges.resize(minimal_size);
uint32_t j = 0;
minimal_aranges[j] = Aranges[0];
for (size_t i = 1; i < orig_arange_size; ++i) {
if (Range::SortedOverlapCheck(minimal_aranges[j], Aranges[i])) {
minimal_aranges[j].setHighPC(Aranges[i].HighPC());
} else {
// Only increment j if we aren't merging
minimal_aranges[++j] = Aranges[i];
} }
// Update the set of valid CUs.
if (E.IsRangeStart) {
ValidCUs.insert(E.CUOffset);
} else {
auto CUPos = ValidCUs.find(E.CUOffset);
assert(CUPos != ValidCUs.end());
ValidCUs.erase(CUPos);
}
PrevAddress = E.Address;
} }
assert(j+1 == minimal_size); assert(ValidCUs.empty());
// Now swap our new minimal aranges into place. The local // Endpoints are not needed now.
// minimal_aranges will then contian the old big collection std::vector<RangeEndpoint> EmptyEndpoints;
// which will get freed. EmptyEndpoints.swap(Endpoints);
minimal_aranges.swap(Aranges);
} }
uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const { uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const {

View File

@ -27,9 +27,9 @@ private:
void clear(); void clear();
void extract(DataExtractor DebugArangesData); void extract(DataExtractor DebugArangesData);
// Use appendRange multiple times and then call sortAndMinimize. // Call appendRange multiple times and then call construct.
void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
void sortAndMinimize(); void construct();
struct Range { struct Range {
explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL, explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL,
@ -47,31 +47,39 @@ private:
return LowPC + Length; return LowPC + Length;
return -1ULL; return -1ULL;
} }
bool containsAddress(uint64_t Address) const { bool containsAddress(uint64_t Address) const {
return LowPC <= Address && Address < HighPC(); return LowPC <= Address && Address < HighPC();
} }
bool operator<(const Range &other) const {
bool operator <(const Range &other) const {
return LowPC < other.LowPC; return LowPC < other.LowPC;
} }
static bool SortedOverlapCheck(const Range &Left, const Range &Right) {
if (Left.CUOffset != Right.CUOffset)
return false;
return Left.HighPC() >= Right.LowPC;
}
uint64_t LowPC; // Start of address range. uint64_t LowPC; // Start of address range.
uint32_t Length; // End of address range (not including this address). uint32_t Length; // End of address range (not including this address).
uint32_t CUOffset; // Offset of the compile unit or die. uint32_t CUOffset; // Offset of the compile unit or die.
}; };
struct RangeEndpoint {
uint64_t Address;
uint32_t CUOffset;
bool IsRangeStart;
RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart)
: Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {}
bool operator<(const RangeEndpoint &Other) const {
return Address < Other.Address;
}
};
typedef std::vector<Range> RangeColl; typedef std::vector<Range> RangeColl;
typedef RangeColl::const_iterator RangeCollIterator; typedef RangeColl::const_iterator RangeCollIterator;
typedef DenseSet<uint32_t> ParsedCUOffsetColl;
std::vector<RangeEndpoint> Endpoints;
RangeColl Aranges; RangeColl Aranges;
ParsedCUOffsetColl ParsedCUOffsets; DenseSet<uint32_t> ParsedCUOffsets;
}; };
} }

View File

@ -0,0 +1,26 @@
void call();
struct S {
static void foo() { call(); call(); }
static void bar() { call(); call(); }
static void baz() {}
};
#ifdef FILE1
# define FUNC_NAME func1
# define FUNC_BODY \
S::foo(); S::bar(); S::baz();
#else
# define FUNC_NAME func2
# define FUNC_BODY \
S::bar();
#endif
void FUNC_NAME() {
FUNC_BODY
}
// Build instructions:
// $ clang -g -fPIC -c -DFILE1 arange-overlap.cc -o obj1.o
// $ clang -g -fPIC -c arange-overlap.cc -o obj2.o
// $ clang -shared obj1.o obj2.o -o <output>

Binary file not shown.

View File

@ -18,6 +18,7 @@ RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input
RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input
RUN: echo "%p/Inputs/llvm-symbolizer-dwo-test 0x400514" >> %t.input RUN: echo "%p/Inputs/llvm-symbolizer-dwo-test 0x400514" >> %t.input
RUN: echo "%p/Inputs/fission-ranges.elf-x86_64 0x720" >> %t.input RUN: echo "%p/Inputs/fission-ranges.elf-x86_64 0x720" >> %t.input
RUN: echo "%p/Inputs/arange-overlap.elf-x86_64 0x714" >> %t.input
RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \ RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
RUN: --default-arch=i386 < %t.input | FileCheck %s RUN: --default-arch=i386 < %t.input | FileCheck %s
@ -94,6 +95,9 @@ CHECK-NEXT: llvm-symbolizer-dwo-test.cc:11
CHECK: main CHECK: main
CHECK-NEXT: {{.*}}fission-ranges.cc:6 CHECK-NEXT: {{.*}}fission-ranges.cc:6
CHECK: _ZN1S3bazEv
CHECK-NEXT: {{.*}}arange-overlap.cc:6
RUN: echo "unexisting-file 0x1234" > %t.input2 RUN: echo "unexisting-file 0x1234" > %t.input2
RUN: llvm-symbolizer < %t.input2 RUN: llvm-symbolizer < %t.input2