From cb3d91b05b36a144dd0f3103ce4fda4cab356ad6 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 6 May 2007 00:53:07 +0000 Subject: [PATCH] add a denser encoding for null terminated strings, add a 6-bit abbrev as well. This shrinks kc++ from 2724088 to 2717360 bytes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@36821 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Bitcode/LLVMBitCodes.h | 17 ++++----- lib/Bitcode/Reader/BitcodeReader.cpp | 16 ++++++++- lib/Bitcode/Writer/BitcodeWriter.cpp | 44 ++++++++++++++++++----- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 2 ++ 4 files changed, 62 insertions(+), 17 deletions(-) diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 9850db4fcd2..5c586140119 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -107,14 +107,15 @@ namespace bitc { CST_CODE_FLOAT = 6, // FLOAT: [fpval] CST_CODE_AGGREGATE = 7, // AGGREGATE: [n x value number] CST_CODE_STRING = 8, // STRING: [values] - CST_CODE_CE_BINOP = 9, // CE_BINOP: [opcode, opval, opval] - CST_CODE_CE_CAST = 10, // CE_CAST: [opcode, opty, opval] - CST_CODE_CE_GEP = 11, // CE_GEP: [n x operands] - CST_CODE_CE_SELECT = 12, // CE_SELECT: [opval, opval, opval] - CST_CODE_CE_EXTRACTELT = 13, // CE_EXTRACTELT: [opty, opval, opval] - CST_CODE_CE_INSERTELT = 14, // CE_INSERTELT: [opval, opval, opval] - CST_CODE_CE_SHUFFLEVEC = 15, // CE_SHUFFLEVEC: [opval, opval, opval] - CST_CODE_CE_CMP = 16 // CE_CMP: [opty, opval, opval, pred] + CST_CODE_CSTRING = 9, // CSTRING: [values] + CST_CODE_CE_BINOP = 10, // CE_BINOP: [opcode, opval, opval] + CST_CODE_CE_CAST = 11, // CE_CAST: [opcode, opty, opval] + CST_CODE_CE_GEP = 12, // CE_GEP: [n x operands] + CST_CODE_CE_SELECT = 13, // CE_SELECT: [opval, opval, opval] + CST_CODE_CE_EXTRACTELT = 14, // CE_EXTRACTELT: [opty, opval, opval] + CST_CODE_CE_INSERTELT = 15, // CE_INSERTELT: [opval, opval, opval] + CST_CODE_CE_SHUFFLEVEC = 16, // CE_SHUFFLEVEC: [opval, opval, opval] + CST_CODE_CE_CMP = 17 // CE_CMP: [opty, opval, opval, pred] }; /// CastOpcodes - These are values used in the bitcode files to encode which diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index b1a001e1af4..c4e221fd369 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -651,12 +651,26 @@ bool BitcodeReader::ParseConstants() { unsigned Size = Record.size(); std::vector Elts; - for (unsigned i = 0; i != Size; ++i) Elts.push_back(ConstantInt::get(EltTy, Record[i])); V = ConstantArray::get(ATy, Elts); break; } + case bitc::CST_CODE_CSTRING: { // CSTRING: [values] + if (Record.empty()) + return Error("Invalid CST_AGGREGATE record"); + + const ArrayType *ATy = cast(CurTy); + const Type *EltTy = ATy->getElementType(); + + unsigned Size = Record.size(); + std::vector Elts; + for (unsigned i = 0; i != Size; ++i) + Elts.push_back(ConstantInt::get(EltTy, Record[i])); + Elts.push_back(Constant::getNullValue(EltTy)); + V = ConstantArray::get(ATy, Elts); + break; + } case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] if (Record.size() < 3) return Error("Invalid CE_BINOP record"); int Opc = GetDecodedBinaryOpcode(Record[0], CurTy); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 6cfa25ab74e..92475fc9e81 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -411,7 +411,9 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4); unsigned AggregateAbbrev = 0; - unsigned String7Abbrev = 0; + unsigned String8Abbrev = 0; + unsigned CString7Abbrev = 0; + unsigned CString6Abbrev = 0; // If this is a constant pool for the module, emit module-specific abbrevs. if (isGlobal) { // Abbrev for CST_CODE_AGGREGATE. @@ -425,8 +427,20 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + String8Abbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for CST_CODE_CSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); - String7Abbrev = Stream.EmitAbbrev(Abbv); + CString7Abbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for CST_CODE_CSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + CString6Abbrev = Stream.EmitAbbrev(Abbv); } // FIXME: Install and use abbrevs to reduce size. Install them globally so @@ -493,15 +507,29 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } } else if (isa(C) && cast(C)->isString()) { // Emit constant strings specially. - Code = bitc::CST_CODE_STRING; - bool isStr7 = true; - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + unsigned NumOps = C->getNumOperands(); + // If this is a null-terminated string, use the denser CSTRING encoding. + if (C->getOperand(NumOps-1)->isNullValue()) { + Code = bitc::CST_CODE_CSTRING; + --NumOps; // Don't encode the null, which isn't allowed by char6. + } else { + Code = bitc::CST_CODE_STRING; + AbbrevToUse = String8Abbrev; + } + bool isCStr7 = Code == bitc::CST_CODE_CSTRING; + bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING; + for (unsigned i = 0; i != NumOps; ++i) { unsigned char V = cast(C->getOperand(i))->getZExtValue(); Record.push_back(V); - isStr7 &= (V & 128) == 0; + isCStr7 &= (V & 128) == 0; + if (isCStrChar6) + isCStrChar6 = BitCodeAbbrevOp::isChar6(V); } - if (isStr7) - AbbrevToUse = String7Abbrev; + + if (isCStrChar6) + AbbrevToUse = CString6Abbrev; + else if (isCStr7) + AbbrevToUse = CString7Abbrev; } else if (isa(C) || isa(V) || isa(V)) { Code = bitc::CST_CODE_AGGREGATE; diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 3bbf5f8394e..8e10418e21f 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -163,6 +163,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID) { case bitc::CST_CODE_WIDE_INTEGER: return "WIDE_INTEGER"; case bitc::CST_CODE_FLOAT: return "FLOAT"; case bitc::CST_CODE_AGGREGATE: return "AGGREGATE"; + case bitc::CST_CODE_STRING: return "STRING"; + case bitc::CST_CODE_CSTRING: return "CSTRING"; case bitc::CST_CODE_CE_BINOP: return "CE_BINOP"; case bitc::CST_CODE_CE_CAST: return "CE_CAST"; case bitc::CST_CODE_CE_GEP: return "CE_GEP";