mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
LowerBitSets: Introduce global layout builder.
The builder is based on a layout algorithm that tries to keep members of small bit sets together. The new layout compresses Chromium's bit sets to around 15% of their original size. Differential Revision: http://reviews.llvm.org/D7796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230394 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7a1721e7c3
commit
0bf03cb473
@ -17,8 +17,10 @@ global variable.
|
||||
This will cause a link-time optimization pass to generate bitsets from the
|
||||
memory addresses referenced from the elements of the bitset metadata. The pass
|
||||
will lay out the referenced globals consecutively, so their definitions must
|
||||
be available at LTO time. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`,
|
||||
generates code to test whether a given pointer is a member of a bitset.
|
||||
be available at LTO time. The `GlobalLayoutBuilder`_ class is responsible for
|
||||
laying out the globals efficiently to minimize the sizes of the underlying
|
||||
bitsets. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`, generates code
|
||||
to test whether a given pointer is a member of a bitset.
|
||||
|
||||
:Example:
|
||||
|
||||
@ -64,3 +66,5 @@ generates code to test whether a given pointer is a member of a bitset.
|
||||
%d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1
|
||||
ret void
|
||||
}
|
||||
|
||||
.. _GlobalLayoutBuilder: http://llvm.org/klaus/llvm/blob/master/include/llvm/Transforms/IPO/LowerBitSets.h
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
@ -73,6 +74,69 @@ struct BitSetBuilder {
|
||||
BitSetInfo build();
|
||||
};
|
||||
|
||||
/// This class implements a layout algorithm for globals referenced by bit sets
|
||||
/// that tries to keep members of small bit sets together. This can
|
||||
/// significantly reduce bit set sizes in many cases.
|
||||
///
|
||||
/// It works by assembling fragments of layout from sets of referenced globals.
|
||||
/// Each set of referenced globals causes the algorithm to create a new
|
||||
/// fragment, which is assembled by appending each referenced global in the set
|
||||
/// into the fragment. If a referenced global has already been referenced by an
|
||||
/// fragment created earlier, we instead delete that fragment and append its
|
||||
/// contents into the fragment we are assembling.
|
||||
///
|
||||
/// By starting with the smallest fragments, we minimize the size of the
|
||||
/// fragments that are copied into larger fragments. This is most intuitively
|
||||
/// thought about when considering the case where the globals are virtual tables
|
||||
/// and the bit sets represent their derived classes: in a single inheritance
|
||||
/// hierarchy, the optimum layout would involve a depth-first search of the
|
||||
/// class hierarchy (and in fact the computed layout ends up looking a lot like
|
||||
/// a DFS), but a naive DFS would not work well in the presence of multiple
|
||||
/// inheritance. This aspect of the algorithm ends up fitting smaller
|
||||
/// hierarchies inside larger ones where that would be beneficial.
|
||||
///
|
||||
/// For example, consider this class hierarchy:
|
||||
///
|
||||
/// A B
|
||||
/// \ / | \
|
||||
/// C D E
|
||||
///
|
||||
/// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and
|
||||
/// bsE (E). If we laid out our objects by DFS traversing B followed by A, our
|
||||
/// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to
|
||||
/// cover the only 4 objects in its hierarchy, but not for bsA as it needs to
|
||||
/// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows:
|
||||
///
|
||||
/// Add bsC, fragments {{C}}
|
||||
/// Add bsD, fragments {{C}, {D}}
|
||||
/// Add bsE, fragments {{C}, {D}, {E}}
|
||||
/// Add bsA, fragments {{A, C}, {D}, {E}}
|
||||
/// Add bsB, fragments {{B, A, C, D, E}}
|
||||
///
|
||||
/// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3
|
||||
/// fewer) objects, at the cost of bsB needing to cover 1 more object.
|
||||
///
|
||||
/// The bit set lowering pass assigns an object index to each object that needs
|
||||
/// to be laid out, and calls addFragment for each bit set passing the object
|
||||
/// indices of its referenced globals. It then assembles a layout from the
|
||||
/// computed layout in the Fragments field.
|
||||
struct GlobalLayoutBuilder {
|
||||
/// The computed layout. Each element of this vector contains a fragment of
|
||||
/// layout (which may be empty) consisting of object indices.
|
||||
std::vector<std::vector<uint64_t>> Fragments;
|
||||
|
||||
/// Mapping from object index to fragment index.
|
||||
std::vector<uint64_t> FragmentMap;
|
||||
|
||||
GlobalLayoutBuilder(uint64_t NumObjects)
|
||||
: Fragments(1), FragmentMap(NumObjects) {}
|
||||
|
||||
/// Add \param F to the layout while trying to keep its indices contiguous.
|
||||
/// If a previously seen fragment uses any of \param F's indices, that
|
||||
/// fragment will be laid out inside \param F.
|
||||
void addFragment(const std::set<uint64_t> &F);
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
@ -118,6 +118,35 @@ BitSetInfo BitSetBuilder::build() {
|
||||
return BSI;
|
||||
}
|
||||
|
||||
void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
|
||||
// Create a new fragment to hold the layout for F.
|
||||
Fragments.emplace_back();
|
||||
std::vector<uint64_t> &Fragment = Fragments.back();
|
||||
uint64_t FragmentIndex = Fragments.size() - 1;
|
||||
|
||||
for (auto ObjIndex : F) {
|
||||
uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
|
||||
if (OldFragmentIndex == 0) {
|
||||
// We haven't seen this object index before, so just add it to the current
|
||||
// fragment.
|
||||
Fragment.push_back(ObjIndex);
|
||||
} else {
|
||||
// This index belongs to an existing fragment. Copy the elements of the
|
||||
// old fragment into this one and clear the old fragment. We don't update
|
||||
// the fragment map just yet, this ensures that any further references to
|
||||
// indices from the old fragment in this fragment do not insert any more
|
||||
// indices.
|
||||
std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
|
||||
Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
|
||||
OldFragment.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Update the fragment map to point our object indices to this fragment.
|
||||
for (uint64_t ObjIndex : Fragment)
|
||||
FragmentMap[ObjIndex] = FragmentIndex;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct LowerBitSets : public ModulePass {
|
||||
@ -485,27 +514,66 @@ bool LowerBitSets::buildBitSets(Module &M) {
|
||||
// Build the list of bitsets and referenced globals in this disjoint set.
|
||||
std::vector<MDString *> BitSets;
|
||||
std::vector<GlobalVariable *> Globals;
|
||||
llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
|
||||
llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
|
||||
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
|
||||
MI != GlobalClasses.member_end(); ++MI) {
|
||||
if ((*MI).is<MDString *>())
|
||||
if ((*MI).is<MDString *>()) {
|
||||
BitSetIndices[MI->get<MDString *>()] = BitSets.size();
|
||||
BitSets.push_back(MI->get<MDString *>());
|
||||
else
|
||||
} else {
|
||||
GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
|
||||
Globals.push_back(MI->get<GlobalVariable *>());
|
||||
}
|
||||
}
|
||||
|
||||
// Order bitsets and globals by name for determinism. TODO: We may later
|
||||
// want to use a more sophisticated ordering that lays out globals so as to
|
||||
// minimize the sizes of the bitsets.
|
||||
// For each bitset, build a set of indices that refer to globals referenced
|
||||
// by the bitset.
|
||||
std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
|
||||
if (BitSetNM) {
|
||||
for (MDNode *Op : BitSetNM->operands()) {
|
||||
// Op = { bitset name, global, offset }
|
||||
if (!Op->getOperand(1))
|
||||
continue;
|
||||
auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
|
||||
if (I == BitSetIndices.end())
|
||||
continue;
|
||||
|
||||
auto OpGlobal = cast<GlobalVariable>(
|
||||
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
|
||||
BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
|
||||
}
|
||||
}
|
||||
|
||||
// Order the sets of indices by size. The GlobalLayoutBuilder works best
|
||||
// when given small index sets first.
|
||||
std::stable_sort(
|
||||
BitSetMembers.begin(), BitSetMembers.end(),
|
||||
[](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
|
||||
return O1.size() < O2.size();
|
||||
});
|
||||
|
||||
// Create a GlobalLayoutBuilder and provide it with index sets as layout
|
||||
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments
|
||||
// as close together as possible.
|
||||
GlobalLayoutBuilder GLB(Globals.size());
|
||||
for (auto &&MemSet : BitSetMembers)
|
||||
GLB.addFragment(MemSet);
|
||||
|
||||
// Build a vector of globals with the computed layout.
|
||||
std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
|
||||
auto OGI = OrderedGlobals.begin();
|
||||
for (auto &&F : GLB.Fragments)
|
||||
for (auto &&Offset : F)
|
||||
*OGI++ = Globals[Offset];
|
||||
|
||||
// Order bitsets by name for determinism.
|
||||
std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
|
||||
return S1->getString() < S2->getString();
|
||||
});
|
||||
std::sort(Globals.begin(), Globals.end(),
|
||||
[](GlobalVariable *GV1, GlobalVariable *GV2) {
|
||||
return GV1->getName() < GV2->getName();
|
||||
});
|
||||
|
||||
// Build the bitsets from this disjoint set.
|
||||
buildBitSetsFromGlobals(M, BitSets, Globals);
|
||||
buildBitSetsFromGlobals(M, BitSets, OrderedGlobals);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
35
test/Transforms/LowerBitSets/layout.ll
Normal file
35
test/Transforms/LowerBitSets/layout.ll
Normal file
@ -0,0 +1,35 @@
|
||||
; RUN: opt -S -lowerbitsets < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32"
|
||||
|
||||
; Tests that this set of globals is laid out according to our layout algorithm
|
||||
; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h).
|
||||
; The chosen layout in this case is a, e, b, d, c.
|
||||
|
||||
; CHECK: private constant { i32, i32, i32, i32, i32 } { i32 1, i32 5, i32 2, i32 4, i32 3 }
|
||||
@a = constant i32 1
|
||||
@b = constant i32 2
|
||||
@c = constant i32 3
|
||||
@d = constant i32 4
|
||||
@e = constant i32 5
|
||||
|
||||
!0 = !{!"bitset1", i32* @a, i32 0}
|
||||
!1 = !{!"bitset1", i32* @b, i32 0}
|
||||
!2 = !{!"bitset1", i32* @c, i32 0}
|
||||
|
||||
!3 = !{!"bitset2", i32* @b, i32 0}
|
||||
!4 = !{!"bitset2", i32* @d, i32 0}
|
||||
|
||||
!5 = !{!"bitset3", i32* @a, i32 0}
|
||||
!6 = !{!"bitset3", i32* @e, i32 0}
|
||||
|
||||
!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6 }
|
||||
|
||||
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
|
||||
|
||||
define void @foo() {
|
||||
%x = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset1")
|
||||
%y = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset2")
|
||||
%z = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset3")
|
||||
ret void
|
||||
}
|
@ -62,3 +62,30 @@ TEST(LowerBitSets, BitSetBuilder) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LowerBitSets, GlobalLayoutBuilder) {
|
||||
struct {
|
||||
uint64_t NumObjects;
|
||||
std::vector<std::set<uint64_t>> Fragments;
|
||||
std::vector<uint64_t> WantLayout;
|
||||
} GLBTests[] = {
|
||||
{0, {}, {}},
|
||||
{4, {{0, 1}, {2, 3}}, {0, 1, 2, 3}},
|
||||
{3, {{0, 1}, {1, 2}}, {0, 1, 2}},
|
||||
{4, {{0, 1}, {1, 2}, {2, 3}}, {0, 1, 2, 3}},
|
||||
{4, {{0, 1}, {2, 3}, {1, 2}}, {0, 1, 2, 3}},
|
||||
{6, {{2, 5}, {0, 1, 2, 3, 4, 5}}, {0, 1, 2, 5, 3, 4}},
|
||||
};
|
||||
|
||||
for (auto &&T : GLBTests) {
|
||||
GlobalLayoutBuilder GLB(T.NumObjects);
|
||||
for (auto &&F : T.Fragments)
|
||||
GLB.addFragment(F);
|
||||
|
||||
std::vector<uint64_t> ComputedLayout;
|
||||
for (auto &&F : GLB.Fragments)
|
||||
ComputedLayout.insert(ComputedLayout.end(), F.begin(), F.end());
|
||||
|
||||
EXPECT_EQ(T.WantLayout, ComputedLayout);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user