//===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This class defines a generic trie structure. The trie structure // is immutable after creation, but the payload contained within it is not. // //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_TRIE_H #define LLVM_ADT_TRIE_H #include "llvm/ADT/GraphTraits.h" #include "llvm/Support/DOTGraphTraits.h" #include #include namespace llvm { // FIXME: // - Labels are usually small, maybe it's better to use SmallString // - Should we use char* during construction? // - Should we templatize Empty with traits-like interface? template class Trie { friend class GraphTraits >; friend class DOTGraphTraits >; public: class Node { friend class Trie; public: typedef std::vector NodeVectorType; typedef typename NodeVectorType::iterator iterator; typedef typename NodeVectorType::const_iterator const_iterator; private: enum QueryResult { Same = -3, StringIsPrefix = -2, LabelIsPrefix = -1, DontMatch = 0, HaveCommonPart }; struct NodeCmp { bool operator() (Node* N1, Node* N2) { return (N1->Label[0] < N2->Label[0]); } bool operator() (Node* N, char Id) { return (N->Label[0] < Id); } }; std::string Label; Payload Data; NodeVectorType Children; // Do not implement Node(const Node&); Node& operator=(const Node&); inline void addEdge(Node* N) { if (Children.empty()) Children.push_back(N); else { iterator I = std::lower_bound(Children.begin(), Children.end(), N, NodeCmp()); // FIXME: no dups are allowed Children.insert(I, N); } } inline void setEdge(Node* N) { char Id = N->Label[0]; iterator I = std::lower_bound(Children.begin(), Children.end(), Id, NodeCmp()); assert(I != Children.end() && "Node does not exists!"); *I = N; } QueryResult query(const std::string& s) const { unsigned i, l; unsigned l1 = s.length(); unsigned l2 = Label.length(); // Find the length of common part l = std::min(l1, l2); i = 0; while ((i < l) && (s[i] == Label[i])) ++i; if (i == l) { // One is prefix of another, find who is who if (l1 == l2) return Same; else if (i == l1) return StringIsPrefix; else return LabelIsPrefix; } else // s and Label have common (possible empty) part, return its length return (QueryResult)i; } public: inline explicit Node(const Payload& data, const std::string& label = ""): Label(label), Data(data) { } inline const Payload& data() const { return Data; } inline void setData(const Payload& data) { Data = data; } inline const std::string& label() const { return Label; } #if 0 inline void dump() { llvm::cerr << "Node: " << this << "\n" << "Label: " << Label << "\n" << "Children:\n"; for (iterator I = Children.begin(), E = Children.end(); I != E; ++I) llvm::cerr << (*I)->Label << "\n"; } #endif inline Node* getEdge(char Id) { Node* fNode = NULL; iterator I = std::lower_bound(Children.begin(), Children.end(), Id, NodeCmp()); if (I != Children.end() && (*I)->Label[0] == Id) fNode = *I; return fNode; } inline iterator begin() { return Children.begin(); } inline const_iterator begin() const { return Children.begin(); } inline iterator end () { return Children.end(); } inline const_iterator end () const { return Children.end(); } inline size_t size () const { return Children.size(); } inline bool empty() const { return Children.empty(); } inline const Node* &front() const { return Children.front(); } inline Node* &front() { return Children.front(); } inline const Node* &back() const { return Children.back(); } inline Node* &back() { return Children.back(); } }; private: std::vector Nodes; Payload Empty; inline Node* addNode(const Payload& data, const std::string label = "") { Node* N = new Node(data, label); Nodes.push_back(N); return N; } inline Node* splitEdge(Node* N, char Id, size_t index) { Node* eNode = N->getEdge(Id); assert(eNode && "Node doesn't exist"); const std::string &l = eNode->Label; assert(index > 0 && index < l.length() && "Trying to split too far!"); std::string l1 = l.substr(0, index); std::string l2 = l.substr(index); Node* nNode = addNode(Empty, l1); N->setEdge(nNode); eNode->Label = l2; nNode->addEdge(eNode); return nNode; } // Do not implement Trie(const Trie&); Trie& operator=(const Trie&); public: inline explicit Trie(const Payload& empty):Empty(empty) { addNode(Empty); } inline ~Trie() { for (unsigned i = 0, e = Nodes.size(); i != e; ++i) delete Nodes[i]; } inline Node* getRoot() const { return Nodes[0]; } bool addString(const std::string& s, const Payload& data); const Payload& lookup(const std::string& s) const; }; // Define this out-of-line to dissuade the C++ compiler from inlining it. template bool Trie::addString(const std::string& s, const Payload& data) { Node* cNode = getRoot(); Node* tNode = NULL; std::string s1(s); while (tNode == NULL) { char Id = s1[0]; if (Node* nNode = cNode->getEdge(Id)) { typename Node::QueryResult r = nNode->query(s1); switch (r) { case Node::Same: case Node::StringIsPrefix: // Currently we don't allow to have two strings in the trie one // being a prefix of another. This should be fixed. assert(0 && "FIXME!"); return false; case Node::DontMatch: assert(0 && "Impossible!"); return false; case Node::LabelIsPrefix: s1 = s1.substr(nNode->label().length()); cNode = nNode; break; default: nNode = splitEdge(cNode, Id, r); tNode = addNode(data, s1.substr(r)); nNode->addEdge(tNode); } } else { tNode = addNode(data, s1); cNode->addEdge(tNode); } } return true; } template const Payload& Trie::lookup(const std::string& s) const { Node* cNode = getRoot(); Node* tNode = NULL; std::string s1(s); while (tNode == NULL) { char Id = s1[0]; if (Node* nNode = cNode->getEdge(Id)) { typename Node::QueryResult r = nNode->query(s1); switch (r) { case Node::Same: tNode = nNode; break; case Node::StringIsPrefix: return Empty; case Node::DontMatch: assert(0 && "Impossible!"); return Empty; case Node::LabelIsPrefix: s1 = s1.substr(nNode->label().length()); cNode = nNode; break; default: return Empty; } } else return Empty; } return tNode->data(); } template struct GraphTraits > { typedef Trie TrieType; typedef typename TrieType::Node NodeType; typedef typename NodeType::iterator ChildIteratorType; static inline NodeType *getEntryNode(const TrieType& T) { return T.getRoot(); } static inline ChildIteratorType child_begin(NodeType *N) { return N->begin(); } static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } typedef typename std::vector::const_iterator nodes_iterator; static inline nodes_iterator nodes_begin(const TrieType& G) { return G.Nodes.begin(); } static inline nodes_iterator nodes_end(const TrieType& G) { return G.Nodes.end(); } }; template struct DOTGraphTraits > : public DefaultDOTGraphTraits { typedef typename Trie::Node NodeType; typedef typename GraphTraits >::ChildIteratorType EdgeIter; static std::string getGraphName(const Trie& T) { return "Trie"; } static std::string getNodeLabel(NodeType* Node, const Trie& T, bool ShortNames) { if (T.getRoot() == Node) return ""; else return Node->label(); } static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) { NodeType* N = *I; return N->label().substr(0, 1); } static std::string getNodeAttributes(const NodeType* Node, const Trie& T) { if (Node->data() != T.Empty) return "color=blue"; return ""; } }; } // end of llvm namespace #endif // LLVM_ADT_TRIE_H