diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index b6073a44bb3..87489befc0e 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -206,12 +206,17 @@ enum class HashT : uint32_t; /// Trait for lookups into the on-disk hash table for the binary instrprof /// format. class InstrProfLookupTrait { - std::vector CountBuffer; + std::vector DataBuffer; IndexedInstrProf::HashT HashType; public: InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {} - typedef InstrProfRecord data_type; + struct data_type { + data_type(StringRef Name, ArrayRef Data) + : Name(Name), Data(Data) {} + StringRef Name; + ArrayRef Data; + }; typedef StringRef internal_key_type; typedef StringRef external_key_type; typedef uint64_t hash_value_type; @@ -234,25 +239,20 @@ public: return StringRef((const char *)D, N); } - InstrProfRecord ReadData(StringRef K, const unsigned char *D, offset_type N) { - if (N < 2 * sizeof(uint64_t) || N % sizeof(uint64_t)) { + data_type ReadData(StringRef K, const unsigned char *D, offset_type N) { + DataBuffer.clear(); + if (N % sizeof(uint64_t)) // The data is corrupt, don't try to read it. - CountBuffer.clear(); - return InstrProfRecord("", 0, CountBuffer); - } + return data_type("", DataBuffer); using namespace support; - - // The first stored value is the hash. - uint64_t Hash = endian::readNext(D); - // Each counter follows. - unsigned NumCounters = N / sizeof(uint64_t) - 1; - CountBuffer.clear(); - CountBuffer.reserve(NumCounters - 1); - for (unsigned I = 0; I < NumCounters; ++I) - CountBuffer.push_back(endian::readNext(D)); - - return InstrProfRecord(K, Hash, CountBuffer); + // We just treat the data as opaque here. It's simpler to handle in + // IndexedInstrProfReader. + unsigned NumEntries = N / sizeof(uint64_t); + DataBuffer.reserve(NumEntries); + for (unsigned I = 0; I < NumEntries; ++I) + DataBuffer.push_back(endian::readNext(D)); + return data_type(K, DataBuffer); } }; typedef OnDiskIterableChainedHashTable @@ -267,7 +267,11 @@ private: std::unique_ptr Index; /// Iterator over the profile data. InstrProfReaderIndex::data_iterator RecordIterator; - /// The maximal execution count among all fucntions. + /// Offset into our current data set. + size_t CurrentOffset; + /// The file format version of the profile data. + uint64_t FormatVersion; + /// The maximal execution count among all functions. uint64_t MaxFunctionCount; IndexedInstrProfReader(const IndexedInstrProfReader &) LLVM_DELETED_FUNCTION; @@ -275,7 +279,7 @@ private: LLVM_DELETED_FUNCTION; public: IndexedInstrProfReader(std::unique_ptr DataBuffer) - : DataBuffer(std::move(DataBuffer)), Index(nullptr) {} + : DataBuffer(std::move(DataBuffer)), Index(nullptr), CurrentOffset(0) {} /// Return true if the given buffer is in an indexed instrprof format. static bool hasFormat(const MemoryBuffer &DataBuffer); @@ -286,7 +290,7 @@ public: std::error_code readNextRecord(InstrProfRecord &Record) override; /// Fill Counts with the profile data for the given function name. - std::error_code getFunctionCounts(StringRef FuncName, uint64_t &FuncHash, + std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); /// Return the maximum of all known function counts. uint64_t getMaximumFunctionCount() { return MaxFunctionCount; } diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index 6e68bee30eb..93f4128c073 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -16,6 +16,7 @@ #define LLVM_PROFILEDATA_INSTRPROF_WRITER_H_ #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/DataTypes.h" @@ -28,13 +29,13 @@ namespace llvm { /// Writer for instrumentation based profile data. class InstrProfWriter { public: - struct CounterData { - uint64_t Hash; - std::vector Counts; - }; + typedef SmallDenseMap, 1> CounterData; private: StringMap FunctionData; + uint64_t MaxFunctionCount; public: + InstrProfWriter() : MaxFunctionCount(0) {} + /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h index 776170407bc..792863d0707 100644 --- a/lib/ProfileData/InstrProfIndexed.h +++ b/lib/ProfileData/InstrProfIndexed.h @@ -46,7 +46,7 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) { } const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" -const uint64_t Version = 1; +const uint64_t Version = 2; const HashT HashType = HashT::MD5; } diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index e8f64614df5..5c1993766aa 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -307,8 +307,8 @@ std::error_code IndexedInstrProfReader::readHeader() { return error(instrprof_error::bad_magic); // Read the version. - uint64_t Version = endian::readNext(Cur); - if (Version != IndexedInstrProf::Version) + FormatVersion = endian::readNext(Cur); + if (FormatVersion > IndexedInstrProf::Version) return error(instrprof_error::unsupported_version); // Read the maximal function count. @@ -331,18 +331,31 @@ std::error_code IndexedInstrProfReader::readHeader() { } std::error_code IndexedInstrProfReader::getFunctionCounts( - StringRef FuncName, uint64_t &FuncHash, std::vector &Counts) { - const auto &Iter = Index->find(FuncName); + StringRef FuncName, uint64_t FuncHash, std::vector &Counts) { + auto Iter = Index->find(FuncName); if (Iter == Index->end()) return error(instrprof_error::unknown_function); - // Found it. Make sure it's valid before giving back a result. - const InstrProfRecord &Record = *Iter; - if (Record.Name.empty()) - return error(instrprof_error::malformed); - FuncHash = Record.Hash; - Counts = Record.Counts; - return success(); + // Found it. Look for counters with the right hash. + ArrayRef Data = (*Iter).Data; + uint64_t NumCounts; + for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) { + // The function hash comes first. + uint64_t FoundHash = Data[I++]; + // In v1, we have at least one count. Later, we have the number of counts. + if (I == E) + return error(instrprof_error::malformed); + NumCounts = FormatVersion == 1 ? E - I : Data[I++]; + // If we have more counts than data, this is bogus. + if (I + NumCounts > E) + return error(instrprof_error::malformed); + // Check for a match and fill the vector if there is one. + if (FoundHash == FuncHash) { + Counts = Data.slice(I, NumCounts); + return success(); + } + } + return error(instrprof_error::hash_mismatch); } std::error_code @@ -351,10 +364,30 @@ IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { if (RecordIterator == Index->data_end()) return error(instrprof_error::eof); - // Read the next one. - Record = *RecordIterator; - ++RecordIterator; - if (Record.Name.empty()) + // Record the current function name. + Record.Name = (*RecordIterator).Name; + + ArrayRef Data = (*RecordIterator).Data; + // Valid data starts with a hash and either a count or the number of counts. + if (CurrentOffset + 1 > Data.size()) return error(instrprof_error::malformed); + // First we have a function hash. + Record.Hash = Data[CurrentOffset++]; + // In version 1 we knew the number of counters implicitly, but in newer + // versions we store the number of counters next. + uint64_t NumCounts = + FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++]; + if (CurrentOffset + NumCounts > Data.size()) + return error(instrprof_error::malformed); + // And finally the counts themselves. + Record.Counts = Data.slice(CurrentOffset, NumCounts); + + // If we've exhausted this function's data, increment the record. + CurrentOffset += NumCounts; + if (CurrentOffset == Data.size()) { + ++RecordIterator; + CurrentOffset = 0; + } + return success(); } diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index e55c2991813..1c4a4fede28 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -45,7 +45,9 @@ public: offset_type N = K.size(); LE.write(N); - offset_type M = (1 + V->Counts.size()) * sizeof(uint64_t); + offset_type M = 0; + for (const auto &Counts : *V) + M += (2 + Counts.second.size()) * sizeof(uint64_t); LE.write(M); return std::make_pair(N, M); @@ -59,9 +61,13 @@ public: offset_type) { using namespace llvm::support; endian::Writer LE(Out); - LE.write(V->Hash); - for (uint64_t I : V->Counts) - LE.write(I); + + for (const auto &Counts : *V) { + LE.write(Counts.first); + LE.write(Counts.second.size()); + for (uint64_t I : Counts.second) + LE.write(I); + } } }; } @@ -70,41 +76,44 @@ std::error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, uint64_t FunctionHash, ArrayRef Counters) { - auto Where = FunctionData.find(FunctionName); - if (Where == FunctionData.end()) { - // If this is the first time we've seen this function, just add it. - auto &Data = FunctionData[FunctionName]; - Data.Hash = FunctionHash; - Data.Counts = Counters; + auto &CounterData = FunctionData[FunctionName]; + + auto Where = CounterData.find(FunctionHash); + if (Where == CounterData.end()) { + // We've never seen a function with this name and hash, add it. + CounterData[FunctionHash] = Counters; + // We keep track of the max function count as we go for simplicity. + if (Counters[0] > MaxFunctionCount) + MaxFunctionCount = Counters[0]; return instrprof_error::success; } - auto &Data = Where->getValue(); - // We can only add to existing functions if they match, so we check the hash - // and number of counters. - if (Data.Hash != FunctionHash) - return instrprof_error::hash_mismatch; - if (Data.Counts.size() != Counters.size()) + // We're updating a function we've seen before. + auto &FoundCounters = Where->second; + // If the number of counters doesn't match we either have bad data or a hash + // collision. + if (FoundCounters.size() != Counters.size()) return instrprof_error::count_mismatch; - // These match, add up the counters. + for (size_t I = 0, E = Counters.size(); I < E; ++I) { - if (Data.Counts[I] + Counters[I] < Data.Counts[I]) + if (FoundCounters[I] + Counters[I] < FoundCounters[I]) return instrprof_error::counter_overflow; - Data.Counts[I] += Counters[I]; + FoundCounters[I] += Counters[I]; } + // We keep track of the max function count as we go for simplicity. + if (FoundCounters[0] > MaxFunctionCount) + MaxFunctionCount = FoundCounters[0]; + return instrprof_error::success; } void InstrProfWriter::write(raw_fd_ostream &OS) { OnDiskChainedHashTableGenerator Generator; - uint64_t MaxFunctionCount = 0; // Populate the hash table generator. - for (const auto &I : FunctionData) { + std::vector CounterBuffer; + for (const auto &I : FunctionData) Generator.insert(I.getKey(), &I.getValue()); - if (I.getValue().Counts[0] > MaxFunctionCount) - MaxFunctionCount = I.getValue().Counts[0]; - } using namespace llvm::support; endian::Writer LE(OS); diff --git a/test/tools/llvm-profdata/Inputs/compat.profdata.v1 b/test/tools/llvm-profdata/Inputs/compat.profdata.v1 new file mode 100644 index 00000000000..fd17459d091 Binary files /dev/null and b/test/tools/llvm-profdata/Inputs/compat.profdata.v1 differ diff --git a/test/tools/llvm-profdata/compat.proftext b/test/tools/llvm-profdata/compat.proftext new file mode 100644 index 00000000000..14da3374b5e --- /dev/null +++ b/test/tools/llvm-profdata/compat.proftext @@ -0,0 +1,47 @@ +# Compatibility tests for older profile format versions. These ensure +# that we don't break compatibility with an older profile version +# without noticing it. + +# The input file at %S/Inputs/compat.profdata.v1 was generated with +# llvm-profdata merge from r214548. + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function function_count_only --counts | FileCheck %s -check-prefix=FUNC_COUNT_ONLY +function_count_only +0 +1 +97531 +# FUNC_COUNT_ONLY: Hash: 0x{{0+$}} +# FUNC_COUNT_ONLY-NEXT: Counters: 1 +# FUNC_COUNT_ONLY-NEXT: Function count: 97531 +# FUNC_COUNT_ONLY-NEXT: Block counts: [] + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function "name with spaces" --counts | FileCheck %s -check-prefix=SPACES +name with spaces +1024 +2 +0 +0 +# SPACES: Hash: 0x{{0+}}400 +# SPACES-NEXT: Counters: 2 +# SPACES-NEXT: Function count: 0 +# SPACES-NEXT: Block counts: [0] + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function large_numbers --counts | FileCheck %s -check-prefix=LARGENUM +large_numbers +4611686018427387903 +6 +2305843009213693952 +1152921504606846976 +576460752303423488 +288230376151711744 +144115188075855872 +72057594037927936 +# LARGENUM: Hash: 0x3fffffffffffffff +# LARGENUM-NEXT: Counters: 6 +# LARGENUM-NEXT: Function count: 2305843009213693952 +# LARGENUM-NEXT: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936] + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 | FileCheck %s -check-prefix=SUMMARY +# SUMMARY: Total functions: 3 +# SUMMARY: Maximum function count: 2305843009213693952 +# SUMMARY: Maximum internal block count: 1152921504606846976 diff --git a/test/tools/llvm-profdata/hash-mismatch.proftext b/test/tools/llvm-profdata/hash-mismatch.proftext index e4f1a4ca431..fe0d4fb4f6b 100644 --- a/test/tools/llvm-profdata/hash-mismatch.proftext +++ b/test/tools/llvm-profdata/hash-mismatch.proftext @@ -1,6 +1,18 @@ -# RUN: llvm-profdata merge %s -o %t.out 2>&1 | FileCheck %s -# CHECK: hash-mismatch.proftext: foo: Function hash mismatch +# If we see the same function name, but with different hashes, make +# sure we keep both. +# RUN: llvm-profdata merge %s -o %t 2>&1 +# RUN: llvm-profdata show %t -all-functions -counts > %t.out + +# The function ordering is non-deterministic, so we need to do our +# checks in multiple runs. +# RUN: FileCheck -check-prefix=FOO3 -check-prefix=BOTH %s -input-file %t.out +# RUN: FileCheck -check-prefix=FOO4 -check-prefix=BOTH %s -input-file %t.out + +# FOO3: Hash: 0x{{0+}}3 +# FOO3-NEXT: Counters: 3 +# FOO3-NEXT: Function count: 1 +# FOO3-NEXT: Block counts: [2, 3] foo 3 3 @@ -8,6 +20,10 @@ foo 2 3 +# FOO4: Hash: 0x{{0+}}4 +# FOO4-NEXT: Counters: 4 +# FOO4-NEXT: Function count: 11 +# FOO4-NEXT: Block counts: [22, 33, 44] foo 4 4 @@ -15,3 +31,7 @@ foo 22 33 44 + +# BOTH: Total functions: 2 +# BOTH: Maximum function count: 11 +# BOTH: Maximum internal block count: 44