Bitcode: Serialize (and recover) use-list order

Predict and serialize use-list order in bitcode. This makes the option `-preserve-bc-use-list-order` work *most* of the time, but this is still experimental. - Builds a full value-table up front in the writer, sets up a list of use-list orders to write out, and discards the table. This is a simpler first step than determining the order from the various overlapping IDs of values on-the-fly. - The shuffles stored in the use-list order list have an unnecessarily large memory footprint. - `blockaddress` expressions cause functions to be materialized out-of-order. For now I've ignored this problem, so use-list orders will be wrong for constants used by functions that have block addresses taken. There are a couple of ways to fix this, but I don't have a concrete plan yet. - When materializing functions lazily, the use-lists for constants will not be correct. This use case is out of scope: what should the use-list order be, if it's incomplete? This is part of PR5680. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214125 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-24 22:24:54 +00:00 · 2014-07-28 21:19:41 +00:00
parent 0aed6e729d
commit bd24fe8c7e
9 changed files with 287 additions and 103 deletions
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -330,7 +330,8 @@ namespace bitc {
  };

  enum UseListCodes {
-    USELIST_CODE_ENTRY = 1   // USELIST_CODE_ENTRY: TBD.
+    USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id]
+    USELIST_CODE_BB      = 2  // BB: [index..., bb-id]
  };

  enum AttributeKindCodes {
--- a/include/llvm/IR/UseListOrder.h
+++ b/include/llvm/IR/UseListOrder.h
@@ -16,10 +16,23 @@
 #define LLVM_IR_USELISTORDER_H

 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include <vector>

 namespace llvm {

 class Module;
+class Function;
+class Value;
+
+/// \brief Structure to hold a use-list order.
+struct UseListOrder {
+  const Function *F;
+  const Value *V;
+  SmallVector<unsigned, 8> Shuffle;
+};
+
+typedef std::vector<UseListOrder> UseListOrderStack;

 /// \brief Whether to preserve use-list ordering.
 bool shouldPreserveBitcodeUseListOrder();
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1620,9 +1620,8 @@ std::error_code BitcodeReader::ParseUseLists() {
  if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
    return Error(InvalidRecord);

-  SmallVector<uint64_t, 64> Record;
-
  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
  while (1) {
    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();

@@ -1639,14 +1638,42 @@ std::error_code BitcodeReader::ParseUseLists() {

    // Read a use list record.
    Record.clear();
+    bool IsBB = false;
    switch (Stream.readRecord(Entry.ID, Record)) {
    default:  // Default behavior: unknown type.
      break;
-    case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+    case bitc::USELIST_CODE_BB:
+      IsBB = true;
+      // fallthrough
+    case bitc::USELIST_CODE_DEFAULT: {
      unsigned RecordLength = Record.size();
-      if (RecordLength < 1)
+      if (RecordLength < 3)
+        // Records should have at least an ID and two indexes.
        return Error(InvalidRecord);
-      UseListRecords.push_back(Record);
+      unsigned ID = Record.back();
+      Record.pop_back();
+
+      Value *V;
+      if (IsBB) {
+        assert(ID < FunctionBBs.size() && "Basic block not found");
+        V = FunctionBBs[ID];
+      } else
+        V = ValueList[ID];
+      unsigned NumUses = 0;
+      SmallDenseMap<const Use *, unsigned, 16> Order;
+      for (const Use &U : V->uses()) {
+        if (NumUses > Record.size())
+          break;
+        Order[&U] = Record[NumUses++];
+      }
+      if (Order.size() != Record.size() || NumUses > Record.size())
+        // Mismatches can happen if the functions are being materialized lazily
+        // (out-of-order), or a value has been upgraded.
+        break;
+
+      V->sortUseList([&](const Use &L, const Use &R) {
+        return Order.lookup(&L) < Order.lookup(&R);
+      });
      break;
    }
    }
@@ -2298,6 +2325,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
        if (std::error_code EC = ParseMetadata())
          return EC;
        break;
+      case bitc::USELIST_BLOCK_ID:
+        if (std::error_code EC = ParseUseLists())
+          return EC;
+        break;
      }
      continue;

--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -138,7 +138,6 @@ class BitcodeReader : public GVMaterializer {
  BitcodeReaderMDValueList MDValueList;
  std::vector<Comdat *> ComdatList;
  SmallVector<Instruction *, 64> InstructionList;
-  SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;

  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1602,6 +1602,39 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
  Stream.ExitBlock();
 }

+static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
+                         BitstreamWriter &Stream) {
+  assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
+  unsigned Code;
+  if (isa<BasicBlock>(Order.V))
+    Code = bitc::USELIST_CODE_BB;
+  else
+    Code = bitc::USELIST_CODE_DEFAULT;
+
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned I : Order.Shuffle)
+    Record.push_back(I);
+  Record.push_back(VE.getValueID(Order.V));
+  Stream.EmitRecord(Code, Record);
+}
+
+static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
+                              BitstreamWriter &Stream) {
+  auto hasMore = [&]() {
+    return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
+  };
+  if (!hasMore())
+    // Nothing to do.
+    return;
+
+  Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+  while (hasMore()) {
+    WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream);
+    VE.UseListOrders.pop_back();
+  }
+  Stream.ExitBlock();
+}
+
 /// WriteFunction - Emit a function body to the module stream.
 static void WriteFunction(const Function &F, ValueEnumerator &VE,
                          BitstreamWriter &Stream) {
@@ -1670,6 +1703,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,

  if (NeedsMetadataAttachment)
    WriteMetadataAttachment(F, VE, Stream);
+  if (shouldPreserveBitcodeUseListOrder())
+    WriteUseListBlock(&F, VE, Stream);
  VE.purgeFunction();
  Stream.ExitBlock();
 }
@@ -1835,98 +1870,6 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
  Stream.ExitBlock();
 }

-// Sort the Users based on the order in which the reader parses the bitcode
-// file.
-static bool bitcodereader_order(const User *lhs, const User *rhs) {
-  // TODO: Implement.
-  return true;
-}
-
-static void WriteUseList(const Value *V, const ValueEnumerator &VE,
-                         BitstreamWriter &Stream) {
-
-  // One or zero uses can't get out of order.
-  if (V->use_empty() || V->hasNUses(1))
-    return;
-
-  // Make a copy of the in-memory use-list for sorting.
-  SmallVector<const User*, 8> UserList(V->user_begin(), V->user_end());
-
-  // Sort the copy based on the order read by the BitcodeReader.
-  std::sort(UserList.begin(), UserList.end(), bitcodereader_order);
-
-  // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
-  // sorted list (i.e., the expected BitcodeReader in-memory use-list).
-
-  // TODO: Emit the USELIST_CODE_ENTRYs.
-}
-
-static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
-                                 BitstreamWriter &Stream) {
-  VE.incorporateFunction(*F);
-
-  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-       AI != AE; ++AI)
-    WriteUseList(AI, VE, Stream);
-  for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
-       ++BB) {
-    WriteUseList(BB, VE, Stream);
-    for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
-         ++II) {
-      WriteUseList(II, VE, Stream);
-      for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
-           OI != E; ++OI) {
-        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
-            isa<InlineAsm>(*OI))
-          WriteUseList(*OI, VE, Stream);
-      }
-    }
-  }
-  VE.purgeFunction();
-}
-
-// Emit use-lists.
-static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
-                                BitstreamWriter &Stream) {
-  Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
-
-  // XXX: this modifies the module, but in a way that should never change the
-  // behavior of any pass or codegen in LLVM. The problem is that GVs may
-  // contain entries in the use_list that do not exist in the Module and are
-  // not stored in the .bc file.
-  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I)
-    I->removeDeadConstantUsers();
-
-  // Write the global variables.
-  for (Module::const_global_iterator GI = M->global_begin(),
-         GE = M->global_end(); GI != GE; ++GI) {
-    WriteUseList(GI, VE, Stream);
-
-    // Write the global variable initializers.
-    if (GI->hasInitializer())
-      WriteUseList(GI->getInitializer(), VE, Stream);
-  }
-
-  // Write the functions.
-  for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
-    WriteUseList(FI, VE, Stream);
-    if (!FI->isDeclaration())
-      WriteFunctionUseList(FI, VE, Stream);
-    if (FI->hasPrefixData())
-      WriteUseList(FI->getPrefixData(), VE, Stream);
-  }
-
-  // Write the aliases.
-  for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
-       AI != AE; ++AI) {
-    WriteUseList(AI, VE, Stream);
-    WriteUseList(AI->getAliasee(), VE, Stream);
-  }
-
-  Stream.ExitBlock();
-}
-
 /// WriteModule - Emit the specified module to the bitstream.
 static void WriteModule(const Module *M, BitstreamWriter &Stream) {
  Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
@@ -1969,9 +1912,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
  // Emit names for globals/functions etc.
  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);

-  // Emit use-lists.
+  // Emit module-level use-lists.
  if (shouldPreserveBitcodeUseListOrder())
-    WriteModuleUseLists(M, VE, Stream);
+    WriteUseListBlock(nullptr, VE, Stream);

  // Emit function bodies.
  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -25,14 +25,207 @@
 #include <algorithm>
 using namespace llvm;

+namespace {
+typedef DenseMap<const Value *, std::pair<unsigned, bool>> OrderMap;
+}
+
+static void orderValue(const Value *V, OrderMap &OM) {
+  if (OM.lookup(V).first)
+    return;
+
+  if (const Constant *C = dyn_cast<Constant>(V))
+    if (C->getNumOperands() && !isa<GlobalValue>(C))
+      for (const Value *Op : C->operands())
+        if (!isa<BasicBlock>(Op))
+          orderValue(Op, OM);
+
+  // Note: we cannot cache this lookup above, since inserting into the map
+  // changes the map's size, and thus affects the ID.
+  OM[V].first = OM.size() + 1;
+}
+
+static OrderMap orderModule(const Module *M) {
+  // This needs to match the order used by ValueEnumerator::ValueEnumerator()
+  // and ValueEnumerator::incorporateFunction().
+  OrderMap OM;
+
+  for (const GlobalVariable &G : M->globals())
+    orderValue(&G, OM);
+  for (const Function &F : *M)
+    orderValue(&F, OM);
+  for (const GlobalAlias &A : M->aliases())
+    orderValue(&A, OM);
+  for (const GlobalVariable &G : M->globals())
+    if (G.hasInitializer())
+      orderValue(G.getInitializer(), OM);
+  for (const GlobalAlias &A : M->aliases())
+    orderValue(A.getAliasee(), OM);
+  for (const Function &F : *M)
+    if (F.hasPrefixData())
+      orderValue(F.getPrefixData(), OM);
+
+  for (const Function &F : *M) {
+    if (F.isDeclaration())
+      continue;
+    // Here we need to match the union of ValueEnumerator::incorporateFunction()
+    // and WriteFunction().  Basic blocks are implicitly declared before
+    // anything else (by declaring their size).
+    for (const BasicBlock &BB : F)
+      orderValue(&BB, OM);
+    for (const Argument &A : F.args())
+      orderValue(&A, OM);
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        for (const Value *Op : I.operands())
+          if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
+              isa<InlineAsm>(*Op))
+            orderValue(Op, OM);
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        orderValue(&I, OM);
+  }
+  return OM;
+}
+
+static void predictValueUseListOrderImpl(const Value *V, const Function *F,
+                                         unsigned ID, const OrderMap &OM,
+                                         UseListOrderStack &Stack) {
+  // Predict use-list order for this one.
+  typedef std::pair<const Use *, unsigned> Entry;
+  SmallVector<Entry, 64> List;
+  for (const Use &U : V->uses())
+    // Check if this user will be serialized.
+    if (OM.lookup(U.getUser()).first)
+      List.push_back(std::make_pair(&U, List.size()));
+
+  if (List.size() < 2)
+    // We may have lost some users.
+    return;
+
+  std::sort(List.begin(), List.end(),
+            [&OM, ID](const Entry &L, const Entry &R) {
+    const Use *LU = L.first;
+    const Use *RU = R.first;
+    auto LID = OM.lookup(LU->getUser()).first;
+    auto RID = OM.lookup(RU->getUser()).first;
+    // If ID is 4, then expect: 7 6 5 1 2 3.
+    if (LID < RID) {
+      if (RID < ID)
+        return true;
+      return false;
+    }
+    if (RID < LID) {
+      if (LID < ID)
+        return false;
+      return true;
+    }
+    // LID and RID are equal, so we have different operands of the same user.
+    // Assume operands are added in order for all instructions.
+    if (LU->getOperandNo() < RU->getOperandNo())
+      return LID < ID;
+    return ID < LID;
+  });
+
+  if (std::is_sorted(
+          List.begin(), List.end(),
+          [](const Entry &L, const Entry &R) { return L.second < R.second; }))
+    // Order is already correct.
+    return;
+
+  // Store the shuffle.
+  UseListOrder O;
+  O.V = V;
+  O.F = F;
+  for (auto &I : List)
+    O.Shuffle.push_back(I.second);
+  Stack.push_back(O);
+}
+
+static void predictValueUseListOrder(const Value *V, const Function *F,
+                                     OrderMap &OM, UseListOrderStack &Stack) {
+  auto &IDPair = OM[V];
+  assert(IDPair.first && "Unmapped value");
+  if (IDPair.second)
+    // Already predicted.
+    return;
+
+  // Do the actual prediction.
+  IDPair.second = true;
+  if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
+    predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
+
+  // Recursive descent into constants.
+  if (const Constant *C = dyn_cast<Constant>(V))
+    if (C->getNumOperands() && !isa<GlobalValue>(C))
+      for (const Value *Op : C->operands())
+        if (isa<Constant>(Op) && !isa<GlobalValue>(Op))
+          predictValueUseListOrder(Op, F, OM, Stack);
+}
+
+static UseListOrderStack predictUseListOrder(const Module *M) {
+  OrderMap OM = orderModule(M);
+
+  // Use-list orders need to be serialized after all the users have been added
+  // to a value, or else the shuffles will be incomplete.  Store them per
+  // function in a stack.
+  //
+  // Aside from function order, the order of values doesn't matter much here.
+  UseListOrderStack Stack;
+
+  // We want to visit the functions backward now so we can list function-local
+  // constants in the last Function they're used in.  Module-level constants
+  // have already been visited above.
+  for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) {
+    const Function &F = *I;
+    if (F.isDeclaration())
+      continue;
+    for (const BasicBlock &BB : F)
+      predictValueUseListOrder(&BB, &F, OM, Stack);
+    for (const Argument &A : F.args())
+      predictValueUseListOrder(&A, &F, OM, Stack);
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        for (const Value *Op : I.operands())
+          if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
+              isa<InlineAsm>(*Op))
+            predictValueUseListOrder(Op, &F, OM, Stack);
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        predictValueUseListOrder(&I, &F, OM, Stack);
+  }
+
+  // Visit globals last, since the module-level use-list block will be seen
+  // before the function bodies are processed.
+  for (const GlobalVariable &G : M->globals())
+    predictValueUseListOrder(&G, nullptr, OM, Stack);
+  for (const Function &F : *M)
+    predictValueUseListOrder(&F, nullptr, OM, Stack);
+  for (const GlobalAlias &A : M->aliases())
+    predictValueUseListOrder(&A, nullptr, OM, Stack);
+  for (const GlobalVariable &G : M->globals())
+    if (G.hasInitializer())
+      predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
+  for (const GlobalAlias &A : M->aliases())
+    predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
+  for (const Function &F : *M)
+    if (F.hasPrefixData())
+      predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
+
+  return Stack;
+}
+
 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
  return V.first->getType()->isIntOrIntVectorTy();
 }

 /// ValueEnumerator - Enumerate module-level information.
 ValueEnumerator::ValueEnumerator(const Module *M) {
+  if (shouldPreserveBitcodeUseListOrder())
+    UseListOrders = predictUseListOrder(M);
+
  // Enumerate the global variables.
  for (Module::const_global_iterator I = M->global_begin(),
+
         E = M->global_end(); I != E; ++I)
    EnumerateValue(I);

--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/UniqueVector.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/UseListOrder.h"
 #include <vector>

 namespace llvm {
@@ -42,6 +43,9 @@ public:

  // For each value, we remember its Value* and occurrence frequency.
  typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+
+  UseListOrderStack UseListOrders;
+
 private:
  typedef DenseMap<Type*, unsigned> TypeMapType;
  TypeMapType TypeMap;
--- a/test/Bitcode/use-list-order.ll
+++ b/test/Bitcode/use-list-order.ll
@@ -1,5 +1,4 @@
 ; RUN: llvm-uselistorder < %s -preserve-bc-use-list-order
-; XFAIL: *

@a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
@b = alias i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -271,7 +271,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
  case bitc::USELIST_BLOCK_ID:
    switch(CodeID) {
    default:return nullptr;
-    case bitc::USELIST_CODE_ENTRY:   return "USELIST_CODE_ENTRY";
+    case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT";
+    case bitc::USELIST_CODE_BB:      return "USELIST_CODE_BB";
    }
  }
 }