diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html index 0b1a875da1a..f4c03222b20 100644 --- a/docs/ProgrammersManual.html +++ b/docs/ProgrammersManual.html @@ -81,6 +81,7 @@ option
  • "llvm/ADT/SmallSet.h"
  • "llvm/ADT/SmallPtrSet.h"
  • "llvm/ADT/DenseSet.h"
  • +
  • "llvm/ADT/SparseSet.h"
  • "llvm/ADT/FoldingSet.h"
  • <set>
  • "llvm/ADT/SetVector.h"
  • @@ -1488,6 +1489,24 @@ href="#dss_densemap">DenseMap has. + +

    + "llvm/ADT/SparseSet.h" +

    + +
    + +

    SparseSet holds a small number of objects identified by unsigned keys of +moderate size. It uses a lot of memory, but provides operations that are +almost as fast as a vector. Typical keys are physical registers, virtual +registers, or numbered basic blocks.

    + +

    SparseSet is useful for algorithms that need very fast clear/find/insert/erase +and fast iteration over small sets. It is not intended for building composite +data structures.

    + +
    +

    "llvm/ADT/FoldingSet.h" diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h new file mode 100644 index 00000000000..eba5cc0df53 --- /dev/null +++ b/include/llvm/ADT/SparseSet.h @@ -0,0 +1,259 @@ +//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SparseSet class derived from the version described in +// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters +// on Programming Languages and Systems, Volume 2 Issue 1-4, March–Dec. 1993. +// +// A sparse set holds a small number of objects identified by integer keys from +// a moderately sized universe. The sparse set uses more memory than other +// containers in order to provide faster operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_SPARSESET_H +#define LLVM_ADT_SPARSESET_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +/// SparseSetFunctor - Objects in a SparseSet are identified by small integer +/// keys. A functor object is used to compute the key of an object. The +/// functor's operator() must return an unsigned smaller than the universe. +/// +/// The default functor implementation forwards to a getSparseSetKey() method +/// on the object. It is intended for sparse sets holding ad-hoc structs. +/// +template +struct SparseSetFunctor { + unsigned operator()(const ValueT &Val) { + return Val.getSparseSetKey(); + } +}; + +/// SparseSetFunctor - Provide a trivial identity functor for +/// SparseSet. +/// +template<> struct SparseSetFunctor { + unsigned operator()(unsigned Val) { return Val; } +}; + +/// SparseSet - Fast set implementation for objects that can be identified by +/// small unsigned keys. +/// +/// SparseSet allocates memory proportional to the size of the key universe, so +/// it is not recommended for building composite data structures. It is useful +/// for algorithms that require a single set with fast operations. +/// +/// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast +/// clear() and iteration as fast as a vector. The find(), insert(), and +/// erase() operations are all constant time, and typically faster than a hash +/// table. The iteration order doesn't depend on numerical key values, it only +/// depends on the order of insert() and erase() operations. When no elements +/// have been erased, the iteration order is the insertion order. +/// +/// Compared to BitVector, SparseSet uses 8x-40x more memory, but +/// offers constant-time clear() and size() operations as well as fast +/// iteration independent on the size of the universe. +/// +/// SparseSet contains a dense vector holding all the objects and a sparse +/// array holding indexes into the dense vector. Most of the memory is used by +/// the sparse array which is the size of the key universe. The SparseT +/// template parameter provides a space/speed tradeoff for sets holding many +/// elements. +/// +/// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse +/// array uses 4 x Universe bytes. +/// +/// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache +/// lines, but the sparse array is 4x smaller. N is the number of elements in +/// the set. +/// +/// For sets that may grow to thousands of elements, SparseT should be set to +/// uint16_t or uint32_t. +/// +/// @param ValueT The type of objects in the set. +/// @param SparseT An unsigned integer type. See above. +/// @param KeyFunctorT A functor that computes the unsigned key of a ValueT. +/// +template > +class SparseSet { + typedef SmallVector DenseT; + DenseT Dense; + SparseT *Sparse; + unsigned Universe; + KeyFunctorT KeyOf; + + // Disable copy construction and assignment. + // This data structure is not meant to be used that way. + SparseSet(const SparseSet&); // DO NOT IMPLEMENT. + SparseSet &operator=(const SparseSet&); // DO NOT IMPLEMENT. + +public: + typedef ValueT value_type; + typedef ValueT &reference; + typedef const ValueT &const_reference; + typedef ValueT *pointer; + typedef const ValueT *const_pointer; + + SparseSet() : Sparse(0), Universe(0) {} + ~SparseSet() { free(Sparse); } + + /// setUniverse - Set the universe size which determines the largest key the + /// set can hold. The universe must be sized before any elements can be + /// added. + /// + /// @param U Universe size. All object keys must be less than U. + /// + void setUniverse(unsigned U) { + // It's not hard to resize the universe on a non-empty set, but it doesn't + // seem like a likely use case, so we can add that code when we need it. + assert(empty() && "Can only resize universe on an empty map"); + // Hysteresis prevents needless reallocations. + if (U >= Universe/4 && U <= Universe) + return; + free(Sparse); + // The Sparse array doesn't actually need to be initialized, so malloc + // would be enough here, but that will cause tools like valgrind to + // complain about branching on uninitialized data. + Sparse = reinterpret_cast(calloc(U, sizeof(SparseT))); + Universe = U; + } + + // Import trivial vector stuff from DenseT. + typedef typename DenseT::iterator iterator; + typedef typename DenseT::const_iterator const_iterator; + + const_iterator begin() const { return Dense.begin(); } + const_iterator end() const { return Dense.end(); } + iterator begin() { return Dense.begin(); } + iterator end() { return Dense.end(); } + + /// empty - Returns true if the set is empty. + /// + /// This is not the same as BitVector::empty(). + /// + bool empty() const { return Dense.empty(); } + + /// size - Returns the number of elements in the set. + /// + /// This is not the same as BitVector::size() which returns the size of the + /// universe. + /// + unsigned size() const { return Dense.size(); } + + /// clear - Clears the set. This is a very fast constant time operation. + /// + void clear() { + // Sparse does not need to be cleared, see find(). + Dense.clear(); + } + + /// find - Find an element by its key. + /// + /// @param Key A valid key to find. + /// @returns An iterator to the element identified by key, or end(). + /// + iterator find(unsigned Key) { + assert(Key < Universe && "Key out of range"); + assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + "SparseT must be an unsigned integer type"); + const unsigned Stride = std::numeric_limits::max() + 1u; + for (unsigned i = Sparse[Key], e = size(); i < e; i += Stride) { + const unsigned FoundKey = KeyOf(Dense[i]); + assert(FoundKey < Universe && "Invalid key in set. Did object mutate?"); + if (Key == FoundKey) + return begin() + i; + // Stride is 0 when SparseT >= unsigned. We don't need to loop. + if (!Stride) + break; + } + return end(); + } + + const_iterator find(unsigned Key) const { + return const_cast(this)->find(Key); + } + + /// count - Returns true if this set contains an element identified by Key. + /// + bool count(unsigned Key) const { + return find(Key) != end(); + } + + /// insert - Attempts to insert a new element. + /// + /// If Val is successfully inserted, return (I, true), where I is an iterator + /// pointing to the newly inserted element. + /// + /// If the set already contains an element with the same key as Val, return + /// (I, false), where I is an iterator pointing to the existing element. + /// + /// Insertion invalidates all iterators. + /// + std::pair insert(const ValueT &Val) { + unsigned Key = KeyOf(Val); + iterator I = find(Key); + if (I != end()) + return std::make_pair(I, false); + Sparse[Key] = size(); + Dense.push_back(Val); + return std::make_pair(end() - 1, true); + } + + /// erase - Erases an existing element identified by a valid iterator. + /// + /// This invalidates all iterators, but erase() returns an iterator pointing + /// to the next element. This makes it possible to erase selected elements + /// while iterating over the set: + /// + /// for (SparseSet::iterator I = Set.begin(); I != Set.end();) + /// if (test(*I)) + /// I = Set.erase(I); + /// else + /// ++I; + /// + /// Note that end() changes when elements are erased, unlike std::list. + /// + iterator erase(iterator I) { + assert(I - begin() < size() && "Invalid iterator"); + if (I != end() - 1) { + *I = Dense.back(); + unsigned BackKey = KeyOf(Dense.back()); + assert(BackKey < Universe && "Invalid key in set. Did object mutate?"); + Sparse[BackKey] = I - begin(); + } + // This depends on SmallVector::pop_back() not invalidating iterators. + // std::vector::pop_back() doesn't give that guarantee. + Dense.pop_back(); + return I; + } + + /// erase - Erases an element identified by Key, if it exists. + /// + /// @param Key The key identifying the element to erase. + /// @returns True when an element was erased, false if no element was found. + /// + bool erase(unsigned Key) { + iterator I = find(Key); + if (I == end()) + return false; + erase(I); + return true; + } + +}; + +} // end namespace llvm + +#endif diff --git a/unittests/ADT/SparseSetTest.cpp b/unittests/ADT/SparseSetTest.cpp new file mode 100644 index 00000000000..981d52e3a26 --- /dev/null +++ b/unittests/ADT/SparseSetTest.cpp @@ -0,0 +1,186 @@ +//===------ ADT/SparseSetTest.cpp - SparseSet unit tests - -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SparseSet.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +typedef SparseSet USet; + +// Empty set tests +TEST(SparseSetTest, EmptySet) { + USet Set; + EXPECT_TRUE(Set.empty()); + EXPECT_TRUE(Set.begin() == Set.end()); + EXPECT_EQ(0u, Set.size()); + + Set.setUniverse(10); + + // Lookups on empty set. + EXPECT_TRUE(Set.find(0) == Set.end()); + EXPECT_TRUE(Set.find(9) == Set.end()); + + // Same thing on a const reference. + const USet &CSet = Set; + EXPECT_TRUE(CSet.empty()); + EXPECT_TRUE(CSet.begin() == CSet.end()); + EXPECT_EQ(0u, CSet.size()); + EXPECT_TRUE(CSet.find(0) == CSet.end()); + USet::const_iterator I = CSet.find(5); + EXPECT_TRUE(I == CSet.end()); +} + +// Single entry set tests +TEST(SparseSetTest, SingleEntrySet) { + USet Set; + Set.setUniverse(10); + std::pair IP = Set.insert(5); + EXPECT_TRUE(IP.second); + EXPECT_TRUE(IP.first == Set.begin()); + + EXPECT_FALSE(Set.empty()); + EXPECT_FALSE(Set.begin() == Set.end()); + EXPECT_TRUE(Set.begin() + 1 == Set.end()); + EXPECT_EQ(1u, Set.size()); + + EXPECT_TRUE(Set.find(0) == Set.end()); + EXPECT_TRUE(Set.find(9) == Set.end()); + + EXPECT_FALSE(Set.count(0)); + EXPECT_TRUE(Set.count(5)); + + // Redundant insert. + IP = Set.insert(5); + EXPECT_FALSE(IP.second); + EXPECT_TRUE(IP.first == Set.begin()); + + // Erase non-existant element. + EXPECT_FALSE(Set.erase(1)); + EXPECT_EQ(1u, Set.size()); + EXPECT_EQ(5u, *Set.begin()); + + // Erase iterator. + USet::iterator I = Set.find(5); + EXPECT_TRUE(I == Set.begin()); + I = Set.erase(I); + EXPECT_TRUE(I == Set.end()); + EXPECT_TRUE(Set.empty()); +} + +// Multiple entry set tests +TEST(SparseSetTest, MultipleEntrySet) { + USet Set; + Set.setUniverse(10); + + Set.insert(5); + Set.insert(3); + Set.insert(2); + Set.insert(1); + Set.insert(4); + EXPECT_EQ(5u, Set.size()); + + // Without deletions, iteration order == insertion order. + USet::const_iterator I = Set.begin(); + EXPECT_EQ(5u, *I); + ++I; + EXPECT_EQ(3u, *I); + ++I; + EXPECT_EQ(2u, *I); + ++I; + EXPECT_EQ(1u, *I); + ++I; + EXPECT_EQ(4u, *I); + ++I; + EXPECT_TRUE(I == Set.end()); + + // Redundant insert. + std::pair IP = Set.insert(3); + EXPECT_FALSE(IP.second); + EXPECT_TRUE(IP.first == Set.begin() + 1); + + // Erase last element by key. + EXPECT_TRUE(Set.erase(4)); + EXPECT_EQ(4u, Set.size()); + EXPECT_FALSE(Set.count(4)); + EXPECT_FALSE(Set.erase(4)); + EXPECT_EQ(4u, Set.size()); + EXPECT_FALSE(Set.count(4)); + + // Erase first element by key. + EXPECT_TRUE(Set.count(5)); + EXPECT_TRUE(Set.find(5) == Set.begin()); + EXPECT_TRUE(Set.erase(5)); + EXPECT_EQ(3u, Set.size()); + EXPECT_FALSE(Set.count(5)); + EXPECT_FALSE(Set.erase(5)); + EXPECT_EQ(3u, Set.size()); + EXPECT_FALSE(Set.count(5)); + + Set.insert(6); + Set.insert(7); + EXPECT_EQ(5u, Set.size()); + + // Erase last element by iterator. + I = Set.erase(Set.end() - 1); + EXPECT_TRUE(I == Set.end()); + EXPECT_EQ(4u, Set.size()); + + // Erase second element by iterator. + I = Set.erase(Set.begin() + 1); + EXPECT_TRUE(I == Set.begin() + 1); + + // Clear and resize the universe. + Set.clear(); + EXPECT_FALSE(Set.count(5)); + Set.setUniverse(1000); + + // Add more than 256 elements. + for (unsigned i = 100; i != 800; ++i) + Set.insert(i); + + for (unsigned i = 0; i != 10; ++i) + Set.erase(i); + + for (unsigned i = 100; i != 800; ++i) + EXPECT_TRUE(Set.count(i)); + + EXPECT_FALSE(Set.count(99)); + EXPECT_FALSE(Set.count(800)); + EXPECT_EQ(700u, Set.size()); +} + +struct Alt { + unsigned Value; + explicit Alt(unsigned x) : Value(x) {} + unsigned getSparseSetKey() const { return Value - 1000; } +}; + +TEST(SparseSetTest, AltStructSet) { + typedef SparseSet ASet; + ASet Set; + Set.setUniverse(10); + Set.insert(Alt(1005)); + + ASet::iterator I = Set.find(5); + ASSERT_TRUE(I == Set.begin()); + EXPECT_EQ(1005u, I->Value); + + Set.insert(Alt(1006)); + Set.insert(Alt(1006)); + I = Set.erase(Set.begin()); + ASSERT_TRUE(I == Set.begin()); + EXPECT_EQ(1006u, I->Value); + + EXPECT_FALSE(Set.erase(5)); + EXPECT_TRUE(Set.erase(6)); +} +} // namespace diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 8420cd4e101..c6a904ee45d 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -71,6 +71,7 @@ add_llvm_unittest(ADT ADT/SmallStringTest.cpp ADT/SmallVectorTest.cpp ADT/SparseBitVectorTest.cpp + ADT/SparseSetTest.cpp ADT/StringMapTest.cpp ADT/StringRefTest.cpp ADT/TripleTest.cpp