From 1a4b19ef9b870d8c914bcd5ceb520a64a9a2cc52 Mon Sep 17 00:00:00 2001 From: Talin Date: Sat, 18 Feb 2012 21:00:49 +0000 Subject: [PATCH] Hashing.h - utilities for hashing various data types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150890 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/Hashing.h | 180 ++++++++++++++++++++++++++++++++++ lib/Support/CMakeLists.txt | 1 + lib/Support/Hashing.cpp | 46 +++++++++ unittests/ADT/HashingTest.cpp | 57 +++++++++++ unittests/CMakeLists.txt | 1 + 5 files changed, 285 insertions(+) create mode 100644 include/llvm/ADT/Hashing.h create mode 100644 lib/Support/Hashing.cpp create mode 100644 unittests/ADT/HashingTest.cpp diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h new file mode 100644 index 00000000000..07c62efe779 --- /dev/null +++ b/include/llvm/ADT/Hashing.h @@ -0,0 +1,180 @@ +//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for computing hash values for various data types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_HASHING_H +#define LLVM_ADT_HASHING_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +/// Class to compute a hash value from multiple data fields of arbitrary +/// types. Note that if you are hashing a single data type, such as a +/// string, it may be cheaper to use a hash algorithm that is tailored +/// for that specific data type. +/// Typical Usage: +/// GeneralHash Hash; +/// Hash.add(someValue); +/// Hash.add(someOtherValue); +/// return Hash.finish(); +/// Adapted from MurmurHash2 by Austin Appleby +class GeneralHash { +private: + enum { + M = 0x5bd1e995 + }; + unsigned Hash; + unsigned Count; +public: + GeneralHash(unsigned Seed = 0) : Hash(Seed), Count(0) {} + + /// Add a pointer value. + /// Note: this adds pointers to the hash using sizes and endianness that + /// depend on the host. It doesn't matter however, because hashing on + /// pointer values is inherently unstable. + template + GeneralHash& add(const T *PtrVal) { + addBits(&PtrVal, &PtrVal + 1); + return *this; + } + + /// Add an ArrayRef of arbitrary data. + template + GeneralHash& add(ArrayRef ArrayVal) { + addBits(ArrayVal.begin(), ArrayVal.end()); + return *this; + } + + /// Add a string + GeneralHash& add(StringRef StrVal) { + addBits(StrVal.begin(), StrVal.end()); + return *this; + } + + /// Add an signed 32-bit integer. + GeneralHash& add(int32_t Data) { + addInt(uint32_t(Data)); + return *this; + } + + /// Add an unsigned 32-bit integer. + GeneralHash& add(uint32_t Data) { + addInt(Data); + return *this; + } + + /// Add an signed 64-bit integer. + GeneralHash& add(int64_t Data) { + addInt(uint64_t(Data)); + return *this; + } + + /// Add an unsigned 64-bit integer. + GeneralHash& add(uint64_t Data) { + addInt(Data); + return *this; + } + + /// Add a float + GeneralHash& add(float Data) { + union { + float D; uint32_t I; + }; + D = Data; + addInt(I); + return *this; + } + + /// Add a double + GeneralHash& add(double Data) { + union { + double D; uint64_t I; + }; + D = Data; + addInt(I); + return *this; + } + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + unsigned finish() { + mix(Count); + Hash ^= Hash >> 13; + Hash *= M; + Hash ^= Hash >> 15; + return Hash; + } + +private: + void mix(uint32_t Data) { + ++Count; + Data *= M; + Data ^= Data >> 24; + Data *= M; + Hash *= M; + Hash ^= Data; + } + + // Add a single uint32 value + void addInt(uint32_t Val) { + mix(Val); + } + + // Add a uint64 value + void addInt(uint64_t Val) { + mix(uint32_t(Val >> 32)); + mix(uint32_t(Val)); + } + + template + struct addBitsImpl { + static void add(GeneralHash &Hash, const T *I, const T *E) { + Hash.addUnaligned( + reinterpret_cast(I), + reinterpret_cast(E)); + } + }; + + template + struct addBitsImpl { + static void add(GeneralHash &Hash, const T *I, const T *E) { + Hash.addAligned( + reinterpret_cast(I), + reinterpret_cast(E)); + } + }; + + // Add a range of bits from I to E. + template + void addBits(const T *I, const T *E) { + addBitsImpl::Alignment_GreaterEqual_4Bytes>::add(*this, I, E); + } + + // Add a range of uint32s + void addAligned(const uint32_t *I, const uint32_t *E) { + while (I < E) { + mix(*I++); + } + } + + // Add a possibly unaligned sequence of bytes. + void addUnaligned(const uint8_t *I, const uint8_t *E); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 6cec47df67e..0b69238274e 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_library(LLVMSupport FoldingSet.cpp FormattedStream.cpp GraphWriter.cpp + Hashing.cpp IntEqClasses.cpp IntervalMap.cpp IntrusiveRefCntPtr.cpp diff --git a/lib/Support/Hashing.cpp b/lib/Support/Hashing.cpp new file mode 100644 index 00000000000..89b84530afa --- /dev/null +++ b/lib/Support/Hashing.cpp @@ -0,0 +1,46 @@ +//===-- llvm/ADT/Hashing.cpp - Utilities for hashing ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Hashing.h" + +namespace llvm { + +// Add a possibly unaligned sequence of bytes. +void GeneralHash::addUnaligned(const uint8_t *I, const uint8_t *E) { + ptrdiff_t Length = E - I; + if (uintptr_t(I) & 3 == 0) { + while (Length > 3) { + mix(*reinterpret_cast(I)); + I += 4; + Length -= 4; + } + } else { + while (Length > 3) { + mix( + uint32_t(I[0]) + + (uint32_t(I[1]) << 8) + + (uint32_t(I[2]) << 16) + + (uint32_t(I[3]) << 24)); + I += 4; + Length -= 4; + } + } + + if (Length & 3) { + uint32_t Data = 0; + switch (Length & 3) { + case 3: Data |= uint32_t(I[2]) << 16; // fall through + case 2: Data |= uint32_t(I[1]) << 8; // fall through + case 1: Data |= uint32_t(I[0]); break; + } + mix(Data); + } +} + +} diff --git a/unittests/ADT/HashingTest.cpp b/unittests/ADT/HashingTest.cpp new file mode 100644 index 00000000000..18bfb722f4a --- /dev/null +++ b/unittests/ADT/HashingTest.cpp @@ -0,0 +1,57 @@ +//===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hashing.h unit tests. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "llvm/ADT/Hashing.h" + +using namespace llvm; + +namespace { + +TEST(HashingTest, EmptyHashTest) { + GeneralHash Hash; + ASSERT_EQ(0u, Hash.finish()); +} + +TEST(HashingTest, IntegerHashTest) { + ASSERT_TRUE(GeneralHash().add(1).finish() == GeneralHash().add(1).finish()); + ASSERT_TRUE(GeneralHash().add(1).finish() != GeneralHash().add(2).finish()); +} + +TEST(HashingTest, StringHashTest) { + ASSERT_TRUE( + GeneralHash().add("abc").finish() == GeneralHash().add("abc").finish()); + ASSERT_TRUE( + GeneralHash().add("abc").finish() != GeneralHash().add("abcd").finish()); +} + +TEST(HashingTest, FloatHashTest) { + ASSERT_TRUE( + GeneralHash().add(1.0f).finish() == GeneralHash().add(1.0f).finish()); + ASSERT_TRUE( + GeneralHash().add(1.0f).finish() != GeneralHash().add(2.0f).finish()); +} + +TEST(HashingTest, DoubleHashTest) { + ASSERT_TRUE(GeneralHash().add(1.).finish() == GeneralHash().add(1.).finish()); + ASSERT_TRUE(GeneralHash().add(1.).finish() != GeneralHash().add(2.).finish()); +} + +TEST(HashingTest, IntegerArrayHashTest) { + int a[] = { 1, 2 }; + int b[] = { 1, 3 }; + ASSERT_TRUE(GeneralHash().add(a).finish() == GeneralHash().add(a).finish()); + ASSERT_TRUE(GeneralHash().add(a).finish() != GeneralHash().add(b).finish()); +} + +} diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 977efd790b7..8420cd4e101 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -60,6 +60,7 @@ add_llvm_unittest(ADT ADT/DenseMapTest.cpp ADT/DenseSetTest.cpp ADT/FoldingSet.cpp + ADT/HashingTest.cpp ADT/ilistTest.cpp ADT/ImmutableSetTest.cpp ADT/IntEqClassesTest.cpp