mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 04:33:05 +00:00
LowerBitSets: Introduce global layout builder.
The builder is based on a layout algorithm that tries to keep members of small bit sets together. The new layout compresses Chromium's bit sets to around 15% of their original size. Differential Revision: http://reviews.llvm.org/D7796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230394 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7a1721e7c3
commit
0bf03cb473
@ -17,8 +17,10 @@ global variable.
|
|||||||
This will cause a link-time optimization pass to generate bitsets from the
|
This will cause a link-time optimization pass to generate bitsets from the
|
||||||
memory addresses referenced from the elements of the bitset metadata. The pass
|
memory addresses referenced from the elements of the bitset metadata. The pass
|
||||||
will lay out the referenced globals consecutively, so their definitions must
|
will lay out the referenced globals consecutively, so their definitions must
|
||||||
be available at LTO time. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`,
|
be available at LTO time. The `GlobalLayoutBuilder`_ class is responsible for
|
||||||
generates code to test whether a given pointer is a member of a bitset.
|
laying out the globals efficiently to minimize the sizes of the underlying
|
||||||
|
bitsets. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`, generates code
|
||||||
|
to test whether a given pointer is a member of a bitset.
|
||||||
|
|
||||||
:Example:
|
:Example:
|
||||||
|
|
||||||
@ -64,3 +66,5 @@ generates code to test whether a given pointer is a member of a bitset.
|
|||||||
%d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1
|
%d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.. _GlobalLayoutBuilder: http://llvm.org/klaus/llvm/blob/master/include/llvm/Transforms/IPO/LowerBitSets.h
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
@ -73,6 +74,69 @@ struct BitSetBuilder {
|
|||||||
BitSetInfo build();
|
BitSetInfo build();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// This class implements a layout algorithm for globals referenced by bit sets
|
||||||
|
/// that tries to keep members of small bit sets together. This can
|
||||||
|
/// significantly reduce bit set sizes in many cases.
|
||||||
|
///
|
||||||
|
/// It works by assembling fragments of layout from sets of referenced globals.
|
||||||
|
/// Each set of referenced globals causes the algorithm to create a new
|
||||||
|
/// fragment, which is assembled by appending each referenced global in the set
|
||||||
|
/// into the fragment. If a referenced global has already been referenced by an
|
||||||
|
/// fragment created earlier, we instead delete that fragment and append its
|
||||||
|
/// contents into the fragment we are assembling.
|
||||||
|
///
|
||||||
|
/// By starting with the smallest fragments, we minimize the size of the
|
||||||
|
/// fragments that are copied into larger fragments. This is most intuitively
|
||||||
|
/// thought about when considering the case where the globals are virtual tables
|
||||||
|
/// and the bit sets represent their derived classes: in a single inheritance
|
||||||
|
/// hierarchy, the optimum layout would involve a depth-first search of the
|
||||||
|
/// class hierarchy (and in fact the computed layout ends up looking a lot like
|
||||||
|
/// a DFS), but a naive DFS would not work well in the presence of multiple
|
||||||
|
/// inheritance. This aspect of the algorithm ends up fitting smaller
|
||||||
|
/// hierarchies inside larger ones where that would be beneficial.
|
||||||
|
///
|
||||||
|
/// For example, consider this class hierarchy:
|
||||||
|
///
|
||||||
|
/// A B
|
||||||
|
/// \ / | \
|
||||||
|
/// C D E
|
||||||
|
///
|
||||||
|
/// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and
|
||||||
|
/// bsE (E). If we laid out our objects by DFS traversing B followed by A, our
|
||||||
|
/// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to
|
||||||
|
/// cover the only 4 objects in its hierarchy, but not for bsA as it needs to
|
||||||
|
/// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows:
|
||||||
|
///
|
||||||
|
/// Add bsC, fragments {{C}}
|
||||||
|
/// Add bsD, fragments {{C}, {D}}
|
||||||
|
/// Add bsE, fragments {{C}, {D}, {E}}
|
||||||
|
/// Add bsA, fragments {{A, C}, {D}, {E}}
|
||||||
|
/// Add bsB, fragments {{B, A, C, D, E}}
|
||||||
|
///
|
||||||
|
/// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3
|
||||||
|
/// fewer) objects, at the cost of bsB needing to cover 1 more object.
|
||||||
|
///
|
||||||
|
/// The bit set lowering pass assigns an object index to each object that needs
|
||||||
|
/// to be laid out, and calls addFragment for each bit set passing the object
|
||||||
|
/// indices of its referenced globals. It then assembles a layout from the
|
||||||
|
/// computed layout in the Fragments field.
|
||||||
|
struct GlobalLayoutBuilder {
|
||||||
|
/// The computed layout. Each element of this vector contains a fragment of
|
||||||
|
/// layout (which may be empty) consisting of object indices.
|
||||||
|
std::vector<std::vector<uint64_t>> Fragments;
|
||||||
|
|
||||||
|
/// Mapping from object index to fragment index.
|
||||||
|
std::vector<uint64_t> FragmentMap;
|
||||||
|
|
||||||
|
GlobalLayoutBuilder(uint64_t NumObjects)
|
||||||
|
: Fragments(1), FragmentMap(NumObjects) {}
|
||||||
|
|
||||||
|
/// Add \param F to the layout while trying to keep its indices contiguous.
|
||||||
|
/// If a previously seen fragment uses any of \param F's indices, that
|
||||||
|
/// fragment will be laid out inside \param F.
|
||||||
|
void addFragment(const std::set<uint64_t> &F);
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -118,6 +118,35 @@ BitSetInfo BitSetBuilder::build() {
|
|||||||
return BSI;
|
return BSI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
|
||||||
|
// Create a new fragment to hold the layout for F.
|
||||||
|
Fragments.emplace_back();
|
||||||
|
std::vector<uint64_t> &Fragment = Fragments.back();
|
||||||
|
uint64_t FragmentIndex = Fragments.size() - 1;
|
||||||
|
|
||||||
|
for (auto ObjIndex : F) {
|
||||||
|
uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
|
||||||
|
if (OldFragmentIndex == 0) {
|
||||||
|
// We haven't seen this object index before, so just add it to the current
|
||||||
|
// fragment.
|
||||||
|
Fragment.push_back(ObjIndex);
|
||||||
|
} else {
|
||||||
|
// This index belongs to an existing fragment. Copy the elements of the
|
||||||
|
// old fragment into this one and clear the old fragment. We don't update
|
||||||
|
// the fragment map just yet, this ensures that any further references to
|
||||||
|
// indices from the old fragment in this fragment do not insert any more
|
||||||
|
// indices.
|
||||||
|
std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
|
||||||
|
Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
|
||||||
|
OldFragment.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the fragment map to point our object indices to this fragment.
|
||||||
|
for (uint64_t ObjIndex : Fragment)
|
||||||
|
FragmentMap[ObjIndex] = FragmentIndex;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct LowerBitSets : public ModulePass {
|
struct LowerBitSets : public ModulePass {
|
||||||
@ -485,27 +514,66 @@ bool LowerBitSets::buildBitSets(Module &M) {
|
|||||||
// Build the list of bitsets and referenced globals in this disjoint set.
|
// Build the list of bitsets and referenced globals in this disjoint set.
|
||||||
std::vector<MDString *> BitSets;
|
std::vector<MDString *> BitSets;
|
||||||
std::vector<GlobalVariable *> Globals;
|
std::vector<GlobalVariable *> Globals;
|
||||||
|
llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
|
||||||
|
llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
|
||||||
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
|
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
|
||||||
MI != GlobalClasses.member_end(); ++MI) {
|
MI != GlobalClasses.member_end(); ++MI) {
|
||||||
if ((*MI).is<MDString *>())
|
if ((*MI).is<MDString *>()) {
|
||||||
|
BitSetIndices[MI->get<MDString *>()] = BitSets.size();
|
||||||
BitSets.push_back(MI->get<MDString *>());
|
BitSets.push_back(MI->get<MDString *>());
|
||||||
else
|
} else {
|
||||||
|
GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
|
||||||
Globals.push_back(MI->get<GlobalVariable *>());
|
Globals.push_back(MI->get<GlobalVariable *>());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Order bitsets and globals by name for determinism. TODO: We may later
|
// For each bitset, build a set of indices that refer to globals referenced
|
||||||
// want to use a more sophisticated ordering that lays out globals so as to
|
// by the bitset.
|
||||||
// minimize the sizes of the bitsets.
|
std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
|
||||||
|
if (BitSetNM) {
|
||||||
|
for (MDNode *Op : BitSetNM->operands()) {
|
||||||
|
// Op = { bitset name, global, offset }
|
||||||
|
if (!Op->getOperand(1))
|
||||||
|
continue;
|
||||||
|
auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
|
||||||
|
if (I == BitSetIndices.end())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto OpGlobal = cast<GlobalVariable>(
|
||||||
|
cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
|
||||||
|
BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Order the sets of indices by size. The GlobalLayoutBuilder works best
|
||||||
|
// when given small index sets first.
|
||||||
|
std::stable_sort(
|
||||||
|
BitSetMembers.begin(), BitSetMembers.end(),
|
||||||
|
[](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
|
||||||
|
return O1.size() < O2.size();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create a GlobalLayoutBuilder and provide it with index sets as layout
|
||||||
|
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments
|
||||||
|
// as close together as possible.
|
||||||
|
GlobalLayoutBuilder GLB(Globals.size());
|
||||||
|
for (auto &&MemSet : BitSetMembers)
|
||||||
|
GLB.addFragment(MemSet);
|
||||||
|
|
||||||
|
// Build a vector of globals with the computed layout.
|
||||||
|
std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
|
||||||
|
auto OGI = OrderedGlobals.begin();
|
||||||
|
for (auto &&F : GLB.Fragments)
|
||||||
|
for (auto &&Offset : F)
|
||||||
|
*OGI++ = Globals[Offset];
|
||||||
|
|
||||||
|
// Order bitsets by name for determinism.
|
||||||
std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
|
std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
|
||||||
return S1->getString() < S2->getString();
|
return S1->getString() < S2->getString();
|
||||||
});
|
});
|
||||||
std::sort(Globals.begin(), Globals.end(),
|
|
||||||
[](GlobalVariable *GV1, GlobalVariable *GV2) {
|
|
||||||
return GV1->getName() < GV2->getName();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Build the bitsets from this disjoint set.
|
// Build the bitsets from this disjoint set.
|
||||||
buildBitSetsFromGlobals(M, BitSets, Globals);
|
buildBitSetsFromGlobals(M, BitSets, OrderedGlobals);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
35
test/Transforms/LowerBitSets/layout.ll
Normal file
35
test/Transforms/LowerBitSets/layout.ll
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
; RUN: opt -S -lowerbitsets < %s | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32"
|
||||||
|
|
||||||
|
; Tests that this set of globals is laid out according to our layout algorithm
|
||||||
|
; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h).
|
||||||
|
; The chosen layout in this case is a, e, b, d, c.
|
||||||
|
|
||||||
|
; CHECK: private constant { i32, i32, i32, i32, i32 } { i32 1, i32 5, i32 2, i32 4, i32 3 }
|
||||||
|
@a = constant i32 1
|
||||||
|
@b = constant i32 2
|
||||||
|
@c = constant i32 3
|
||||||
|
@d = constant i32 4
|
||||||
|
@e = constant i32 5
|
||||||
|
|
||||||
|
!0 = !{!"bitset1", i32* @a, i32 0}
|
||||||
|
!1 = !{!"bitset1", i32* @b, i32 0}
|
||||||
|
!2 = !{!"bitset1", i32* @c, i32 0}
|
||||||
|
|
||||||
|
!3 = !{!"bitset2", i32* @b, i32 0}
|
||||||
|
!4 = !{!"bitset2", i32* @d, i32 0}
|
||||||
|
|
||||||
|
!5 = !{!"bitset3", i32* @a, i32 0}
|
||||||
|
!6 = !{!"bitset3", i32* @e, i32 0}
|
||||||
|
|
||||||
|
!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6 }
|
||||||
|
|
||||||
|
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
|
||||||
|
|
||||||
|
define void @foo() {
|
||||||
|
%x = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset1")
|
||||||
|
%y = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset2")
|
||||||
|
%z = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset3")
|
||||||
|
ret void
|
||||||
|
}
|
@ -62,3 +62,30 @@ TEST(LowerBitSets, BitSetBuilder) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(LowerBitSets, GlobalLayoutBuilder) {
|
||||||
|
struct {
|
||||||
|
uint64_t NumObjects;
|
||||||
|
std::vector<std::set<uint64_t>> Fragments;
|
||||||
|
std::vector<uint64_t> WantLayout;
|
||||||
|
} GLBTests[] = {
|
||||||
|
{0, {}, {}},
|
||||||
|
{4, {{0, 1}, {2, 3}}, {0, 1, 2, 3}},
|
||||||
|
{3, {{0, 1}, {1, 2}}, {0, 1, 2}},
|
||||||
|
{4, {{0, 1}, {1, 2}, {2, 3}}, {0, 1, 2, 3}},
|
||||||
|
{4, {{0, 1}, {2, 3}, {1, 2}}, {0, 1, 2, 3}},
|
||||||
|
{6, {{2, 5}, {0, 1, 2, 3, 4, 5}}, {0, 1, 2, 5, 3, 4}},
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto &&T : GLBTests) {
|
||||||
|
GlobalLayoutBuilder GLB(T.NumObjects);
|
||||||
|
for (auto &&F : T.Fragments)
|
||||||
|
GLB.addFragment(F);
|
||||||
|
|
||||||
|
std::vector<uint64_t> ComputedLayout;
|
||||||
|
for (auto &&F : GLB.Fragments)
|
||||||
|
ComputedLayout.insert(ComputedLayout.end(), F.begin(), F.end());
|
||||||
|
|
||||||
|
EXPECT_EQ(T.WantLayout, ComputedLayout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user