mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	LowerBitSets: Introduce global layout builder.
The builder is based on a layout algorithm that tries to keep members of small bit sets together. The new layout compresses Chromium's bit sets to around 15% of their original size. Differential Revision: http://reviews.llvm.org/D7796 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230394 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -17,8 +17,10 @@ global variable. | ||||
| This will cause a link-time optimization pass to generate bitsets from the | ||||
| memory addresses referenced from the elements of the bitset metadata. The pass | ||||
| will lay out the referenced globals consecutively, so their definitions must | ||||
| be available at LTO time. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`, | ||||
| generates code to test whether a given pointer is a member of a bitset. | ||||
| be available at LTO time. The `GlobalLayoutBuilder`_ class is responsible for | ||||
| laying out the globals efficiently to minimize the sizes of the underlying | ||||
| bitsets. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`, generates code | ||||
| to test whether a given pointer is a member of a bitset. | ||||
|  | ||||
| :Example: | ||||
|  | ||||
| @@ -64,3 +66,5 @@ generates code to test whether a given pointer is a member of a bitset. | ||||
|       %d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1 | ||||
|       ret void | ||||
|     } | ||||
|  | ||||
| .. _GlobalLayoutBuilder: http://llvm.org/klaus/llvm/blob/master/include/llvm/Transforms/IPO/LowerBitSets.h | ||||
|   | ||||
| @@ -20,6 +20,7 @@ | ||||
|  | ||||
| #include <stdint.h> | ||||
| #include <limits> | ||||
| #include <set> | ||||
| #include <vector> | ||||
|  | ||||
| namespace llvm { | ||||
| @@ -73,6 +74,69 @@ struct BitSetBuilder { | ||||
|   BitSetInfo build(); | ||||
| }; | ||||
|  | ||||
| /// This class implements a layout algorithm for globals referenced by bit sets | ||||
| /// that tries to keep members of small bit sets together. This can | ||||
| /// significantly reduce bit set sizes in many cases. | ||||
| /// | ||||
| /// It works by assembling fragments of layout from sets of referenced globals. | ||||
| /// Each set of referenced globals causes the algorithm to create a new | ||||
| /// fragment, which is assembled by appending each referenced global in the set | ||||
| /// into the fragment. If a referenced global has already been referenced by an | ||||
| /// fragment created earlier, we instead delete that fragment and append its | ||||
| /// contents into the fragment we are assembling. | ||||
| /// | ||||
| /// By starting with the smallest fragments, we minimize the size of the | ||||
| /// fragments that are copied into larger fragments. This is most intuitively | ||||
| /// thought about when considering the case where the globals are virtual tables | ||||
| /// and the bit sets represent their derived classes: in a single inheritance | ||||
| /// hierarchy, the optimum layout would involve a depth-first search of the | ||||
| /// class hierarchy (and in fact the computed layout ends up looking a lot like | ||||
| /// a DFS), but a naive DFS would not work well in the presence of multiple | ||||
| /// inheritance. This aspect of the algorithm ends up fitting smaller | ||||
| /// hierarchies inside larger ones where that would be beneficial. | ||||
| /// | ||||
| /// For example, consider this class hierarchy: | ||||
| /// | ||||
| /// A       B | ||||
| ///   \   / | \ | ||||
| ///     C   D   E | ||||
| /// | ||||
| /// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and | ||||
| /// bsE (E). If we laid out our objects by DFS traversing B followed by A, our | ||||
| /// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to | ||||
| /// cover the only 4 objects in its hierarchy, but not for bsA as it needs to | ||||
| /// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows: | ||||
| /// | ||||
| /// Add bsC, fragments {{C}} | ||||
| /// Add bsD, fragments {{C}, {D}} | ||||
| /// Add bsE, fragments {{C}, {D}, {E}} | ||||
| /// Add bsA, fragments {{A, C}, {D}, {E}} | ||||
| /// Add bsB, fragments {{B, A, C, D, E}} | ||||
| /// | ||||
| /// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3 | ||||
| /// fewer) objects, at the cost of bsB needing to cover 1 more object. | ||||
| /// | ||||
| /// The bit set lowering pass assigns an object index to each object that needs | ||||
| /// to be laid out, and calls addFragment for each bit set passing the object | ||||
| /// indices of its referenced globals. It then assembles a layout from the | ||||
| /// computed layout in the Fragments field. | ||||
| struct GlobalLayoutBuilder { | ||||
|   /// The computed layout. Each element of this vector contains a fragment of | ||||
|   /// layout (which may be empty) consisting of object indices. | ||||
|   std::vector<std::vector<uint64_t>> Fragments; | ||||
|  | ||||
|   /// Mapping from object index to fragment index. | ||||
|   std::vector<uint64_t> FragmentMap; | ||||
|  | ||||
|   GlobalLayoutBuilder(uint64_t NumObjects) | ||||
|       : Fragments(1), FragmentMap(NumObjects) {} | ||||
|  | ||||
|   /// Add \param F to the layout while trying to keep its indices contiguous. | ||||
|   /// If a previously seen fragment uses any of \param F's indices, that | ||||
|   /// fragment will be laid out inside \param F. | ||||
|   void addFragment(const std::set<uint64_t> &F); | ||||
| }; | ||||
|  | ||||
| } // namespace llvm | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -118,6 +118,35 @@ BitSetInfo BitSetBuilder::build() { | ||||
|   return BSI; | ||||
| } | ||||
|  | ||||
| void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { | ||||
|   // Create a new fragment to hold the layout for F. | ||||
|   Fragments.emplace_back(); | ||||
|   std::vector<uint64_t> &Fragment = Fragments.back(); | ||||
|   uint64_t FragmentIndex = Fragments.size() - 1; | ||||
|  | ||||
|   for (auto ObjIndex : F) { | ||||
|     uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; | ||||
|     if (OldFragmentIndex == 0) { | ||||
|       // We haven't seen this object index before, so just add it to the current | ||||
|       // fragment. | ||||
|       Fragment.push_back(ObjIndex); | ||||
|     } else { | ||||
|       // This index belongs to an existing fragment. Copy the elements of the | ||||
|       // old fragment into this one and clear the old fragment. We don't update | ||||
|       // the fragment map just yet, this ensures that any further references to | ||||
|       // indices from the old fragment in this fragment do not insert any more | ||||
|       // indices. | ||||
|       std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; | ||||
|       Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); | ||||
|       OldFragment.clear(); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Update the fragment map to point our object indices to this fragment. | ||||
|   for (uint64_t ObjIndex : Fragment) | ||||
|     FragmentMap[ObjIndex] = FragmentIndex; | ||||
| } | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| struct LowerBitSets : public ModulePass { | ||||
| @@ -485,27 +514,66 @@ bool LowerBitSets::buildBitSets(Module &M) { | ||||
|     // Build the list of bitsets and referenced globals in this disjoint set. | ||||
|     std::vector<MDString *> BitSets; | ||||
|     std::vector<GlobalVariable *> Globals; | ||||
|     llvm::DenseMap<MDString *, uint64_t> BitSetIndices; | ||||
|     llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices; | ||||
|     for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); | ||||
|          MI != GlobalClasses.member_end(); ++MI) { | ||||
|       if ((*MI).is<MDString *>()) | ||||
|       if ((*MI).is<MDString *>()) { | ||||
|         BitSetIndices[MI->get<MDString *>()] = BitSets.size(); | ||||
|         BitSets.push_back(MI->get<MDString *>()); | ||||
|       else | ||||
|       } else { | ||||
|         GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size(); | ||||
|         Globals.push_back(MI->get<GlobalVariable *>()); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Order bitsets and globals by name for determinism. TODO: We may later | ||||
|     // want to use a more sophisticated ordering that lays out globals so as to | ||||
|     // minimize the sizes of the bitsets. | ||||
|     // For each bitset, build a set of indices that refer to globals referenced | ||||
|     // by the bitset. | ||||
|     std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); | ||||
|     if (BitSetNM) { | ||||
|       for (MDNode *Op : BitSetNM->operands()) { | ||||
|         // Op = { bitset name, global, offset } | ||||
|         if (!Op->getOperand(1)) | ||||
|           continue; | ||||
|         auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0))); | ||||
|         if (I == BitSetIndices.end()) | ||||
|           continue; | ||||
|  | ||||
|         auto OpGlobal = cast<GlobalVariable>( | ||||
|             cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); | ||||
|         BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Order the sets of indices by size. The GlobalLayoutBuilder works best | ||||
|     // when given small index sets first. | ||||
|     std::stable_sort( | ||||
|         BitSetMembers.begin(), BitSetMembers.end(), | ||||
|         [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { | ||||
|           return O1.size() < O2.size(); | ||||
|         }); | ||||
|  | ||||
|     // Create a GlobalLayoutBuilder and provide it with index sets as layout | ||||
|     // fragments. The GlobalLayoutBuilder tries to lay out members of fragments | ||||
|     // as close together as possible. | ||||
|     GlobalLayoutBuilder GLB(Globals.size()); | ||||
|     for (auto &&MemSet : BitSetMembers) | ||||
|       GLB.addFragment(MemSet); | ||||
|  | ||||
|     // Build a vector of globals with the computed layout. | ||||
|     std::vector<GlobalVariable *> OrderedGlobals(Globals.size()); | ||||
|     auto OGI = OrderedGlobals.begin(); | ||||
|     for (auto &&F : GLB.Fragments) | ||||
|       for (auto &&Offset : F) | ||||
|         *OGI++ = Globals[Offset]; | ||||
|  | ||||
|     // Order bitsets by name for determinism. | ||||
|     std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { | ||||
|       return S1->getString() < S2->getString(); | ||||
|     }); | ||||
|     std::sort(Globals.begin(), Globals.end(), | ||||
|               [](GlobalVariable *GV1, GlobalVariable *GV2) { | ||||
|                 return GV1->getName() < GV2->getName(); | ||||
|               }); | ||||
|  | ||||
|     // Build the bitsets from this disjoint set. | ||||
|     buildBitSetsFromGlobals(M, BitSets, Globals); | ||||
|     buildBitSetsFromGlobals(M, BitSets, OrderedGlobals); | ||||
|   } | ||||
|  | ||||
|   return true; | ||||
|   | ||||
							
								
								
									
										35
									
								
								test/Transforms/LowerBitSets/layout.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								test/Transforms/LowerBitSets/layout.ll
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| ; RUN: opt -S -lowerbitsets < %s | FileCheck %s | ||||
|  | ||||
| target datalayout = "e-p:32:32" | ||||
|  | ||||
| ; Tests that this set of globals is laid out according to our layout algorithm | ||||
| ; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h). | ||||
| ; The chosen layout in this case is a, e, b, d, c. | ||||
|  | ||||
| ; CHECK: private constant { i32, i32, i32, i32, i32 } { i32 1, i32 5, i32 2, i32 4, i32 3 } | ||||
| @a = constant i32 1 | ||||
| @b = constant i32 2 | ||||
| @c = constant i32 3 | ||||
| @d = constant i32 4 | ||||
| @e = constant i32 5 | ||||
|  | ||||
| !0 = !{!"bitset1", i32* @a, i32 0} | ||||
| !1 = !{!"bitset1", i32* @b, i32 0} | ||||
| !2 = !{!"bitset1", i32* @c, i32 0} | ||||
|  | ||||
| !3 = !{!"bitset2", i32* @b, i32 0} | ||||
| !4 = !{!"bitset2", i32* @d, i32 0} | ||||
|  | ||||
| !5 = !{!"bitset3", i32* @a, i32 0} | ||||
| !6 = !{!"bitset3", i32* @e, i32 0} | ||||
|  | ||||
| !llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6 } | ||||
|  | ||||
| declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone | ||||
|  | ||||
| define void @foo() { | ||||
|   %x = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset1") | ||||
|   %y = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset2") | ||||
|   %z = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset3") | ||||
|   ret void | ||||
| } | ||||
| @@ -62,3 +62,30 @@ TEST(LowerBitSets, BitSetBuilder) { | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| TEST(LowerBitSets, GlobalLayoutBuilder) { | ||||
|   struct { | ||||
|     uint64_t NumObjects; | ||||
|     std::vector<std::set<uint64_t>> Fragments; | ||||
|     std::vector<uint64_t> WantLayout; | ||||
|   } GLBTests[] = { | ||||
|     {0, {}, {}}, | ||||
|     {4, {{0, 1}, {2, 3}}, {0, 1, 2, 3}}, | ||||
|     {3, {{0, 1}, {1, 2}}, {0, 1, 2}}, | ||||
|     {4, {{0, 1}, {1, 2}, {2, 3}}, {0, 1, 2, 3}}, | ||||
|     {4, {{0, 1}, {2, 3}, {1, 2}}, {0, 1, 2, 3}}, | ||||
|     {6, {{2, 5}, {0, 1, 2, 3, 4, 5}}, {0, 1, 2, 5, 3, 4}}, | ||||
|   }; | ||||
|  | ||||
|   for (auto &&T : GLBTests) { | ||||
|     GlobalLayoutBuilder GLB(T.NumObjects); | ||||
|     for (auto &&F : T.Fragments) | ||||
|       GLB.addFragment(F); | ||||
|  | ||||
|     std::vector<uint64_t> ComputedLayout; | ||||
|     for (auto &&F : GLB.Fragments) | ||||
|       ComputedLayout.insert(ComputedLayout.end(), F.begin(), F.end()); | ||||
|  | ||||
|     EXPECT_EQ(T.WantLayout, ComputedLayout); | ||||
|   } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user