ELFObjectWriter: deduplicate suffices in strtab

We already do this for shstrtab, so might as well do it for strtab. This
extracts the string table building code into a separate class. The idea
is to use it for other object formats too.

I mostly wanted to do this for the general principle, but it does save a
little bit on object file size. I tried this on a clang bootstrap and
saved 0.54% on the sum of object file sizes (1.14 MB out of 212 MB for
a release build).

Differential Revision: http://reviews.llvm.org/D3533

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207670 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hans Wennborg
2014-04-30 16:25:02 +00:00
parent b1c1b8a78d
commit a8febf2283
22 changed files with 282 additions and 165 deletions
+32 -86
View File
@@ -28,6 +28,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/StringTableBuilder.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
@@ -132,11 +133,11 @@ class ELFObjectWriter : public MCObjectWriter {
MCSymbolData *SymbolData;
uint64_t StringIndex;
uint32_t SectionIndex;
StringRef Name;
// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
return SymbolData->getSymbol().getName() <
RHS.SymbolData->getSymbol().getName();
return Name < RHS.Name;
}
};
@@ -149,13 +150,13 @@ class ELFObjectWriter : public MCObjectWriter {
llvm::DenseMap<const MCSectionData *, std::vector<ELFRelocationEntry>>
Relocations;
DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
StringTableBuilder ShStrTabBuilder;
/// @}
/// @name Symbol Table Data
/// @{
SmallString<256> StringTable;
StringTableBuilder StrTabBuilder;
std::vector<uint64_t> FileSymbolData;
std::vector<ELFSymbolData> LocalSymbolData;
std::vector<ELFSymbolData> ExternalSymbolData;
@@ -676,7 +677,6 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
SectionIndexMapTy &SectionIndexMap) {
// The string table must be emitted first because we need the index
// into the string table for all the symbol names.
assert(StringTable.size() && "Missing string table");
// FIXME: Make sure the start of the symbol table is aligned.
@@ -1031,27 +1031,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
MCELF::SetBinding(Data, ELF::STB_GLOBAL);
}
// Index 0 is always the empty string.
StringMap<uint64_t> StringIndexMap;
StringTable += '\x00';
// FIXME: We could optimize suffixes in strtab in the same way we
// optimize them in shstrtab.
for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(),
ie = Asm.file_names_end();
it != ie;
++it) {
StringRef Name = *it;
uint64_t &Entry = StringIndexMap[Name];
if (!Entry) {
Entry = StringTable.size();
StringTable += Name;
StringTable += '\x00';
}
FileSymbolData.push_back(Entry);
}
// Add the data for the symbols.
for (MCSymbolData &SD : Asm.symbols()) {
const MCSymbol &Symbol = SD.getSymbol();
@@ -1102,7 +1081,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
// @@ in defined ones.
StringRef Name = Symbol.getName();
SmallString<32> Buf;
size_t Pos = Name.find("@@@");
if (Pos != StringRef::npos) {
Buf += Name.substr(0, Pos);
@@ -1110,14 +1088,8 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
Buf += Name.substr(Pos + Skip);
Name = Buf;
}
MSD.Name = StrTabBuilder.add(Name);
uint64_t &Entry = StringIndexMap[Name];
if (!Entry) {
Entry = StringTable.size();
StringTable += Name;
StringTable += '\x00';
}
MSD.StringIndex = Entry;
if (MSD.SectionIndex == ELF::SHN_UNDEF)
UndefinedSymbolData.push_back(MSD);
else if (Local)
@@ -1126,6 +1098,21 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
ExternalSymbolData.push_back(MSD);
}
for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
StrTabBuilder.add(*i);
StrTabBuilder.finalize();
for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
FileSymbolData.push_back(StrTabBuilder.getOffset(*i));
for (ELFSymbolData& MSD : LocalSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
for (ELFSymbolData& MSD : ExternalSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
for (ELFSymbolData& MSD : UndefinedSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
// Symbols are required to be in lexicographic order.
array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
@@ -1436,23 +1423,6 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
}
}
static int compareBySuffix(const MCSectionELF *const *a,
const MCSectionELF *const *b) {
const StringRef &NameA = (*a)->getSectionName();
const StringRef &NameB = (*b)->getSectionName();
const unsigned sizeA = NameA.size();
const unsigned sizeB = NameB.size();
const unsigned len = std::min(sizeA, sizeB);
for (unsigned int i = 0; i < len; ++i) {
char ca = NameA[sizeA - i - 1];
char cb = NameB[sizeB - i - 1];
if (ca != cb)
return cb - ca;
}
return sizeB - sizeA;
}
void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
MCAsmLayout &Layout,
SectionIndexMapTy &SectionIndexMap,
@@ -1493,45 +1463,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
WriteSymbolTable(F, Asm, Layout, SectionIndexMap);
F = new MCDataFragment(&StrtabSD);
F->getContents().append(StringTable.begin(), StringTable.end());
F->getContents().append(StrTabBuilder.data().begin(),
StrTabBuilder.data().end());
F = new MCDataFragment(&ShstrtabSD);
std::vector<const MCSectionELF*> Sections;
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
// Section header string table.
for (auto it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(it->getSection());
Sections.push_back(&Section);
}
array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);
// Section header string table.
//
// The first entry of a string table holds a null character so skip
// section 0.
uint64_t Index = 1;
F->getContents().push_back('\x00');
for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
const MCSectionELF &Section = *Sections[I];
StringRef Name = Section.getSectionName();
if (I != 0) {
StringRef PreviousName = Sections[I - 1]->getSectionName();
if (PreviousName.endswith(Name)) {
SectionStringTableIndex[&Section] = Index - Name.size() - 1;
continue;
}
}
// Remember the index into the string table so we can write it
// into the sh_name field of the section header table.
SectionStringTableIndex[&Section] = Index;
Index += Name.size() + 1;
F->getContents().append(Name.begin(), Name.end());
F->getContents().push_back('\x00');
ShStrTabBuilder.add(Section.getSectionName());
}
ShStrTabBuilder.finalize();
F->getContents().append(ShStrTabBuilder.data().begin(),
ShStrTabBuilder.data().end());
}
void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
@@ -1599,7 +1544,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
switch(Section.getType()) {
case ELF::SHT_DYNAMIC:
sh_link = SectionStringTableIndex[&Section];
sh_link = ShStrTabBuilder.getOffset(Section.getSectionName());
sh_info = 0;
break;
@@ -1680,7 +1625,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
}
}
WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()),
Section.getType(),
Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
Alignment, Section.getEntrySize());
}