Chris Lattner e17b658c79 Implement support for globally associating abbrevs with block IDs, which
relieves us from having to emit the abbrevs into each instance of the block.
This shrinks kc.bit from 3368K to 3333K, but will be a more significant win
once instructions are abbreviated.

The VST went from:

  Block ID #14 (VALUE_SYMTAB):
      Num Instances: 2345
         Total Size: 1.29508e+07b/1.61885e+06B/404713W
       Average Size: 5522.73b/690.342B/172.585W
          % of file: 48.0645
  Tot/Avg SubBlocks: 0/0
    Tot/Avg Abbrevs: 7035/3
    Tot/Avg Records: 120924/51.5667
      % Abbrev Recs: 100

to:

  Block ID #14 (VALUE_SYMTAB):
      Num Instances: 2345
         Total Size: 1.26713e+07b/1.58391e+06B/395978W
       Average Size: 5403.53b/675.442B/168.86W
          % of file: 47.5198
  Tot/Avg SubBlocks: 0/0
    Tot/Avg Abbrevs: 0/0
    Tot/Avg Records: 120924/51.5667
      % Abbrev Recs: 100

because we didn't emit the same 3 abbrevs 2345 times :)


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36767 91177308-0d34-0410-b5e6-96231b3b80d8
2007-05-05 00:17:00 +00:00

149 lines
5.2 KiB
C++

//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This header Bitcode enum values.
//
// The enum values defined in this file should be considered permanent. If
// new features are added, they should have values added at the end of the
// respective lists.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BITCODE_BITCODES_H
#define LLVM_BITCODE_BITCODES_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
namespace llvm {
namespace bitc {
enum StandardWidths {
BlockIDWidth = 8, // We use VBR-8 for block IDs.
CodeLenWidth = 4, // Codelen are VBR-4.
BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 32GB per block.
};
// The standard abbrev namespace always has a way to exit a block, enter a
// nested block, define abbrevs, and define an unabbreviated record.
enum FixedAbbrevIDs {
END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode.
ENTER_SUBBLOCK = 1,
/// DEFINE_ABBREV - Defines an abbrev for the current block. It consists
/// of a vbr5 for # operand infos. Each operand info is emitted with a
/// single bit to indicate if it is a literal encoding. If so, the value is
/// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed
/// by the info value as a vbr5 if needed.
DEFINE_ABBREV = 2,
// UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
// a vbr6 for the # operands, followed by vbr6's for each operand.
UNABBREV_RECORD = 3,
// This is not a code, this is a marker for the first abbrev assignment.
FIRST_APPLICATION_ABBREV = 4
};
/// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
/// block, which contains metadata about other blocks in the file.
enum StandardBlockIDs {
/// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
/// standard abbrevs that should be available to all blocks of a specified
/// ID.
BLOCKINFO_BLOCK_ID = 0,
// Block IDs 1-7 are reserved for future expansion.
FIRST_APPLICATION_BLOCKID = 8
};
/// BlockInfoCodes - The blockinfo block contains metadata about user-defined
/// blocks.
enum BlockInfoCodes {
BLOCKINFO_CODE_SETBID = 1 // SETBID: [blockid#]
// DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
// block, instead of the BlockInfo block.
// BLOCKNAME: give string name to block, if desired.
};
} // End bitc namespace
/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
/// This is actually a union of two different things:
/// 1. It could be a literal integer value ("the operand is always 17").
/// 2. It could be an encoding specification ("this operand encoded like so").
///
class BitCodeAbbrevOp {
uint64_t Val; // A literal value or data for an encoding.
bool IsLiteral : 1; // Indicate whether this is a literal value or not.
unsigned Enc : 3; // The encoding to use.
public:
enum Encoding {
Fixed = 1, // A fixed with field, Val specifies number of bits.
VBR = 2, // A VBR field where Val specifies the width of each chunk.
Array = 3 // A sequence of fields, next field species elt encoding.
};
BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {}
BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
: Val(Data), IsLiteral(false), Enc(E) {}
bool isLiteral() const { return IsLiteral; }
bool isEncoding() const { return !IsLiteral; }
// Accessors for literals.
uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
// Accessors for encoding info.
Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
uint64_t getEncodingData() const {
assert(isEncoding() && hasEncodingData());
return Val;
}
bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
static bool hasEncodingData(Encoding E) {
switch (E) {
default: assert(0 && "Unknown encoding");
case Fixed:
case VBR:
return true;
case Array:
return false;
}
}
};
/// BitCodeAbbrev - This class represents an abbreviation record. An
/// abbreviation allows a complex record that has redundancy to be stored in a
/// specialized format instead of the fully-general, fully-vbr, format.
class BitCodeAbbrev {
SmallVector<BitCodeAbbrevOp, 8> OperandList;
unsigned char RefCount; // Number of things using this.
~BitCodeAbbrev() {}
public:
BitCodeAbbrev() : RefCount(1) {}
void addRef() { ++RefCount; }
void dropRef() { if (--RefCount == 0) delete this; }
unsigned getNumOperandInfos() const { return OperandList.size(); }
const BitCodeAbbrevOp &getOperandInfo(unsigned N) const {
return OperandList[N];
}
void Add(const BitCodeAbbrevOp &OpInfo) {
OperandList.push_back(OpInfo);
}
};
} // End llvm namespace
#endif