Bitcode: Serialize (and recover) use-list order

Predict and serialize use-list order in bitcode.  This makes the option
`-preserve-bc-use-list-order` work *most* of the time, but this is still
experimental.

  - Builds a full value-table up front in the writer, sets up a list of
    use-list orders to write out, and discards the table.  This is a
    simpler first step than determining the order from the various
    overlapping IDs of values on-the-fly.

  - The shuffles stored in the use-list order list have an unnecessarily
    large memory footprint.

  - `blockaddress` expressions cause functions to be materialized
    out-of-order.  For now I've ignored this problem, so use-list orders
    will be wrong for constants used by functions that have block
    addresses taken.  There are a couple of ways to fix this, but I
    don't have a concrete plan yet.

  - When materializing functions lazily, the use-lists for constants
    will not be correct.  This use case is out of scope: what should the
    use-list order be, if it's incomplete?

This is part of PR5680.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214125 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Duncan P. N. Exon Smith 2014-07-28 21:19:41 +00:00
parent 0aed6e729d
commit bd24fe8c7e
9 changed files with 287 additions and 103 deletions

View File

@ -330,7 +330,8 @@ namespace bitc {
};
enum UseListCodes {
USELIST_CODE_ENTRY = 1 // USELIST_CODE_ENTRY: TBD.
USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id]
USELIST_CODE_BB = 2 // BB: [index..., bb-id]
};
enum AttributeKindCodes {

View File

@ -16,10 +16,23 @@
#define LLVM_IR_USELISTORDER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include <vector>
namespace llvm {
class Module;
class Function;
class Value;
/// \brief Structure to hold a use-list order.
struct UseListOrder {
const Function *F;
const Value *V;
SmallVector<unsigned, 8> Shuffle;
};
typedef std::vector<UseListOrder> UseListOrderStack;
/// \brief Whether to preserve use-list ordering.
bool shouldPreserveBitcodeUseListOrder();

View File

@ -1620,9 +1620,8 @@ std::error_code BitcodeReader::ParseUseLists() {
if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
// Read all the records.
SmallVector<uint64_t, 64> Record;
while (1) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@ -1639,14 +1638,42 @@ std::error_code BitcodeReader::ParseUseLists() {
// Read a use list record.
Record.clear();
bool IsBB = false;
switch (Stream.readRecord(Entry.ID, Record)) {
default: // Default behavior: unknown type.
break;
case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
case bitc::USELIST_CODE_BB:
IsBB = true;
// fallthrough
case bitc::USELIST_CODE_DEFAULT: {
unsigned RecordLength = Record.size();
if (RecordLength < 1)
if (RecordLength < 3)
// Records should have at least an ID and two indexes.
return Error(InvalidRecord);
UseListRecords.push_back(Record);
unsigned ID = Record.back();
Record.pop_back();
Value *V;
if (IsBB) {
assert(ID < FunctionBBs.size() && "Basic block not found");
V = FunctionBBs[ID];
} else
V = ValueList[ID];
unsigned NumUses = 0;
SmallDenseMap<const Use *, unsigned, 16> Order;
for (const Use &U : V->uses()) {
if (NumUses > Record.size())
break;
Order[&U] = Record[NumUses++];
}
if (Order.size() != Record.size() || NumUses > Record.size())
// Mismatches can happen if the functions are being materialized lazily
// (out-of-order), or a value has been upgraded.
break;
V->sortUseList([&](const Use &L, const Use &R) {
return Order.lookup(&L) < Order.lookup(&R);
});
break;
}
}
@ -2298,6 +2325,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (std::error_code EC = ParseMetadata())
return EC;
break;
case bitc::USELIST_BLOCK_ID:
if (std::error_code EC = ParseUseLists())
return EC;
break;
}
continue;

View File

@ -138,7 +138,6 @@ class BitcodeReader : public GVMaterializer {
BitcodeReaderMDValueList MDValueList;
std::vector<Comdat *> ComdatList;
SmallVector<Instruction *, 64> InstructionList;
SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;

View File

@ -1602,6 +1602,39 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
Stream.ExitBlock();
}
static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
BitstreamWriter &Stream) {
assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
unsigned Code;
if (isa<BasicBlock>(Order.V))
Code = bitc::USELIST_CODE_BB;
else
Code = bitc::USELIST_CODE_DEFAULT;
SmallVector<uint64_t, 64> Record;
for (unsigned I : Order.Shuffle)
Record.push_back(I);
Record.push_back(VE.getValueID(Order.V));
Stream.EmitRecord(Code, Record);
}
static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
BitstreamWriter &Stream) {
auto hasMore = [&]() {
return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
};
if (!hasMore())
// Nothing to do.
return;
Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
while (hasMore()) {
WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream);
VE.UseListOrders.pop_back();
}
Stream.ExitBlock();
}
/// WriteFunction - Emit a function body to the module stream.
static void WriteFunction(const Function &F, ValueEnumerator &VE,
BitstreamWriter &Stream) {
@ -1670,6 +1703,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
if (NeedsMetadataAttachment)
WriteMetadataAttachment(F, VE, Stream);
if (shouldPreserveBitcodeUseListOrder())
WriteUseListBlock(&F, VE, Stream);
VE.purgeFunction();
Stream.ExitBlock();
}
@ -1835,98 +1870,6 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
// Sort the Users based on the order in which the reader parses the bitcode
// file.
static bool bitcodereader_order(const User *lhs, const User *rhs) {
// TODO: Implement.
return true;
}
static void WriteUseList(const Value *V, const ValueEnumerator &VE,
BitstreamWriter &Stream) {
// One or zero uses can't get out of order.
if (V->use_empty() || V->hasNUses(1))
return;
// Make a copy of the in-memory use-list for sorting.
SmallVector<const User*, 8> UserList(V->user_begin(), V->user_end());
// Sort the copy based on the order read by the BitcodeReader.
std::sort(UserList.begin(), UserList.end(), bitcodereader_order);
// TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
// sorted list (i.e., the expected BitcodeReader in-memory use-list).
// TODO: Emit the USELIST_CODE_ENTRYs.
}
static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
BitstreamWriter &Stream) {
VE.incorporateFunction(*F);
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI)
WriteUseList(AI, VE, Stream);
for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
++BB) {
WriteUseList(BB, VE, Stream);
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
++II) {
WriteUseList(II, VE, Stream);
for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
OI != E; ++OI) {
if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
isa<InlineAsm>(*OI))
WriteUseList(*OI, VE, Stream);
}
}
}
VE.purgeFunction();
}
// Emit use-lists.
static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
// XXX: this modifies the module, but in a way that should never change the
// behavior of any pass or codegen in LLVM. The problem is that GVs may
// contain entries in the use_list that do not exist in the Module and are
// not stored in the .bc file.
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I)
I->removeDeadConstantUsers();
// Write the global variables.
for (Module::const_global_iterator GI = M->global_begin(),
GE = M->global_end(); GI != GE; ++GI) {
WriteUseList(GI, VE, Stream);
// Write the global variable initializers.
if (GI->hasInitializer())
WriteUseList(GI->getInitializer(), VE, Stream);
}
// Write the functions.
for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
WriteUseList(FI, VE, Stream);
if (!FI->isDeclaration())
WriteFunctionUseList(FI, VE, Stream);
if (FI->hasPrefixData())
WriteUseList(FI->getPrefixData(), VE, Stream);
}
// Write the aliases.
for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
AI != AE; ++AI) {
WriteUseList(AI, VE, Stream);
WriteUseList(AI->getAliasee(), VE, Stream);
}
Stream.ExitBlock();
}
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
@ -1969,9 +1912,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
// Emit names for globals/functions etc.
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
// Emit use-lists.
// Emit module-level use-lists.
if (shouldPreserveBitcodeUseListOrder())
WriteModuleUseLists(M, VE, Stream);
WriteUseListBlock(nullptr, VE, Stream);
// Emit function bodies.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)

View File

@ -25,14 +25,207 @@
#include <algorithm>
using namespace llvm;
namespace {
typedef DenseMap<const Value *, std::pair<unsigned, bool>> OrderMap;
}
static void orderValue(const Value *V, OrderMap &OM) {
if (OM.lookup(V).first)
return;
if (const Constant *C = dyn_cast<Constant>(V))
if (C->getNumOperands() && !isa<GlobalValue>(C))
for (const Value *Op : C->operands())
if (!isa<BasicBlock>(Op))
orderValue(Op, OM);
// Note: we cannot cache this lookup above, since inserting into the map
// changes the map's size, and thus affects the ID.
OM[V].first = OM.size() + 1;
}
static OrderMap orderModule(const Module *M) {
// This needs to match the order used by ValueEnumerator::ValueEnumerator()
// and ValueEnumerator::incorporateFunction().
OrderMap OM;
for (const GlobalVariable &G : M->globals())
orderValue(&G, OM);
for (const Function &F : *M)
orderValue(&F, OM);
for (const GlobalAlias &A : M->aliases())
orderValue(&A, OM);
for (const GlobalVariable &G : M->globals())
if (G.hasInitializer())
orderValue(G.getInitializer(), OM);
for (const GlobalAlias &A : M->aliases())
orderValue(A.getAliasee(), OM);
for (const Function &F : *M)
if (F.hasPrefixData())
orderValue(F.getPrefixData(), OM);
for (const Function &F : *M) {
if (F.isDeclaration())
continue;
// Here we need to match the union of ValueEnumerator::incorporateFunction()
// and WriteFunction(). Basic blocks are implicitly declared before
// anything else (by declaring their size).
for (const BasicBlock &BB : F)
orderValue(&BB, OM);
for (const Argument &A : F.args())
orderValue(&A, OM);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
for (const Value *Op : I.operands())
if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
isa<InlineAsm>(*Op))
orderValue(Op, OM);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
orderValue(&I, OM);
}
return OM;
}
static void predictValueUseListOrderImpl(const Value *V, const Function *F,
unsigned ID, const OrderMap &OM,
UseListOrderStack &Stack) {
// Predict use-list order for this one.
typedef std::pair<const Use *, unsigned> Entry;
SmallVector<Entry, 64> List;
for (const Use &U : V->uses())
// Check if this user will be serialized.
if (OM.lookup(U.getUser()).first)
List.push_back(std::make_pair(&U, List.size()));
if (List.size() < 2)
// We may have lost some users.
return;
std::sort(List.begin(), List.end(),
[&OM, ID](const Entry &L, const Entry &R) {
const Use *LU = L.first;
const Use *RU = R.first;
auto LID = OM.lookup(LU->getUser()).first;
auto RID = OM.lookup(RU->getUser()).first;
// If ID is 4, then expect: 7 6 5 1 2 3.
if (LID < RID) {
if (RID < ID)
return true;
return false;
}
if (RID < LID) {
if (LID < ID)
return false;
return true;
}
// LID and RID are equal, so we have different operands of the same user.
// Assume operands are added in order for all instructions.
if (LU->getOperandNo() < RU->getOperandNo())
return LID < ID;
return ID < LID;
});
if (std::is_sorted(
List.begin(), List.end(),
[](const Entry &L, const Entry &R) { return L.second < R.second; }))
// Order is already correct.
return;
// Store the shuffle.
UseListOrder O;
O.V = V;
O.F = F;
for (auto &I : List)
O.Shuffle.push_back(I.second);
Stack.push_back(O);
}
static void predictValueUseListOrder(const Value *V, const Function *F,
OrderMap &OM, UseListOrderStack &Stack) {
auto &IDPair = OM[V];
assert(IDPair.first && "Unmapped value");
if (IDPair.second)
// Already predicted.
return;
// Do the actual prediction.
IDPair.second = true;
if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
// Recursive descent into constants.
if (const Constant *C = dyn_cast<Constant>(V))
if (C->getNumOperands() && !isa<GlobalValue>(C))
for (const Value *Op : C->operands())
if (isa<Constant>(Op) && !isa<GlobalValue>(Op))
predictValueUseListOrder(Op, F, OM, Stack);
}
static UseListOrderStack predictUseListOrder(const Module *M) {
OrderMap OM = orderModule(M);
// Use-list orders need to be serialized after all the users have been added
// to a value, or else the shuffles will be incomplete. Store them per
// function in a stack.
//
// Aside from function order, the order of values doesn't matter much here.
UseListOrderStack Stack;
// We want to visit the functions backward now so we can list function-local
// constants in the last Function they're used in. Module-level constants
// have already been visited above.
for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) {
const Function &F = *I;
if (F.isDeclaration())
continue;
for (const BasicBlock &BB : F)
predictValueUseListOrder(&BB, &F, OM, Stack);
for (const Argument &A : F.args())
predictValueUseListOrder(&A, &F, OM, Stack);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
for (const Value *Op : I.operands())
if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
isa<InlineAsm>(*Op))
predictValueUseListOrder(Op, &F, OM, Stack);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
predictValueUseListOrder(&I, &F, OM, Stack);
}
// Visit globals last, since the module-level use-list block will be seen
// before the function bodies are processed.
for (const GlobalVariable &G : M->globals())
predictValueUseListOrder(&G, nullptr, OM, Stack);
for (const Function &F : *M)
predictValueUseListOrder(&F, nullptr, OM, Stack);
for (const GlobalAlias &A : M->aliases())
predictValueUseListOrder(&A, nullptr, OM, Stack);
for (const GlobalVariable &G : M->globals())
if (G.hasInitializer())
predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
for (const GlobalAlias &A : M->aliases())
predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
for (const Function &F : *M)
if (F.hasPrefixData())
predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
return Stack;
}
static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
return V.first->getType()->isIntOrIntVectorTy();
}
/// ValueEnumerator - Enumerate module-level information.
ValueEnumerator::ValueEnumerator(const Module *M) {
if (shouldPreserveBitcodeUseListOrder())
UseListOrders = predictUseListOrder(M);
// Enumerate the global variables.
for (Module::const_global_iterator I = M->global_begin(),
E = M->global_end(); I != E; ++I)
EnumerateValue(I);

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/UseListOrder.h"
#include <vector>
namespace llvm {
@ -42,6 +43,9 @@ public:
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
UseListOrderStack UseListOrders;
private:
typedef DenseMap<Type*, unsigned> TypeMapType;
TypeMapType TypeMap;

View File

@ -1,5 +1,4 @@
; RUN: llvm-uselistorder < %s -preserve-bc-use-list-order
; XFAIL: *
@a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
@b = alias i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)

View File

@ -271,7 +271,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
case bitc::USELIST_BLOCK_ID:
switch(CodeID) {
default:return nullptr;
case bitc::USELIST_CODE_ENTRY: return "USELIST_CODE_ENTRY";
case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT";
case bitc::USELIST_CODE_BB: return "USELIST_CODE_BB";
}
}
}