| //===--- PrefixMap.h - A ternary tree ---------------------------*- C++ -*-===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines a data structure which stores a map whose keys are |
| // sequences of comparable values. Specifically, it implements the trie |
| // variant known as a ternary search tree. In performance, it is similar |
| // to a binary tree; however, it has two properties specific to the use of |
| // homogeneous sequences as keys: |
| // |
| // - Individual entries do not necessarily store the entire key; instead, |
| // the key data may be spread over a sequence of nodes. This causes the |
| // tree to be much more space-compact when keys share common prefixes. |
| // This does require an extra pointer of storage in each node. |
| // |
| // Unlike some traditional presentations of ternary trees, this |
| // implementation allows more than one key element per node. |
| // |
| // - It is efficient to find entries that share a common prefix with |
| // a given key. |
| // |
| // FIXME: The current implementation doesn't rebalance siblings. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef SWIFT_BASIC_PREFIXMAP_H |
| #define SWIFT_BASIC_PREFIXMAP_H |
| |
| #include "swift/Basic/Debug.h" |
| #include "swift/Basic/LLVM.h" |
| #include "swift/Basic/type_traits.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/PointerIntPair.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <algorithm> |
| #include <iterator> |
| |
| namespace swift { |
| |
| void printOpaquePrefixMap(raw_ostream &out, void *root, |
| void (*printNode)(raw_ostream &, void*)); |
| template <class KeyElementType> class PrefixMapKeyPrinter; |
| |
| /// A map whose keys are sequences of comparable values, optimized for |
| /// finding a mapped value for the longest matching initial subsequence. |
| template <class KeyElementType, class ValueType, |
| size_t InlineKeyCapacity = std::max( |
| (sizeof(void *) - 1) / sizeof(KeyElementType), size_t(1))> |
| class PrefixMap { |
| public: |
| using KeyType = ArrayRef<KeyElementType>; |
| |
| static_assert(IsTriviallyCopyable<KeyElementType>::value, |
| "key element type must be trivially copyable"); |
| static_assert(IsTriviallyConstructible<KeyElementType>::value, |
| "key element type must be trivially default-initializable"); |
| |
| private: |
| template <typename T> |
| union UninitializedStorage { |
| alignas(T) char Storage[sizeof(T)]; |
| |
| template <typename... A> |
| void emplace(A && ...value) { |
| ::new((void*) &Storage) T(std::forward<A>(value)...); |
| } |
| |
| T &get() { return *reinterpret_cast<T*>(Storage); } |
| const T &get() const { return *reinterpret_cast<T*>(Storage); } |
| }; |
| |
| // We expect to see a lot of entries for keys like: |
| // ab |
| // abcdef |
| // abcdefg |
| // abcdefh |
| // corresponding to closely-related paths and different prefixes |
| // of the same path. This basically demands something like a trie. |
| // |
| // The rules of our data structure are: |
| // - A node's complete key is the concatenation of its non-local prefix |
| // and its local key. The non-local prefix is the concatenation of |
| // of the local keys of the Further links followed from the root. |
| // - A node's local key contains no common prefix with the local keys of |
| // its left or right siblings. |
| |
| struct Node; |
| struct NodeBase { |
| // The initial layout of this struct is assumed in the out-of-line |
| // printing code; you'll need to modify it. |
| |
| // Left and right siblings: nodes which share the same non-local |
| // prefix as this one, but which share no common local prefix with it. |
| Node *Left = nullptr; |
| Node *Right = nullptr; |
| |
| // Further children: nodes whose non-local prefix is the concatenation |
| // of the non-local prefix of this node and its local key. |
| Node *Further = nullptr; |
| |
| KeyElementType Key[InlineKeyCapacity]; |
| unsigned char KeyLength : 7; |
| unsigned char HasValue : 1; |
| static_assert(InlineKeyCapacity < (1 << 7), |
| "can't store inline key length in bit-field"); |
| |
| NodeBase() : KeyLength(0), HasValue(false) {} |
| |
| KeyType getLocalKey() const { return { Key, KeyLength }; } |
| }; |
| struct Node : NodeBase { |
| private: |
| UninitializedStorage<ValueType> Value; |
| |
| public: |
| Node() { /* leave Value uninitialized */ } |
| |
| // We split NodeBase out so that we can just delegate to something that |
| // copies all the other fields. |
| Node(const Node &other) : NodeBase(other) { |
| if (this->HasValue) { |
| Value.emplace(other.Value.get()); |
| } |
| } |
| |
| ValueType &get() { |
| assert(this->HasValue); |
| return Value.get(); |
| } |
| const ValueType &get() const { |
| assert(this->HasValue); |
| return Value.get(); |
| } |
| |
| template <typename... A> |
| void emplace(A && ...value) { |
| assert(!this->HasValue); |
| Value.emplace(std::forward<A>(value)...); |
| this->HasValue = true; |
| } |
| }; |
| |
| /// Splits a node in two. The second part must always be non-empty. |
| /// |
| /// ref -> cur 'abcdef' -> ... |
| /// => |
| /// ref -> split 'abc' -> cur 'def' -> ... |
| /// |
| /// Returns the node that stores the common prefix as its key. |
| /// |
| /// TODO: consider just reorganizing cur and its Further node if |
| /// cur doesn't have a value or left/right children, e.g. |
| /// ref -> cur 'abcdefg' -> further 'hij' |
| /// => |
| /// ref -> cur 'abc' -> further 'defghij' |
| static Node *splitNode(Node **ref, size_t splitIndex) { |
| auto cur = *ref; |
| assert(splitIndex < cur->KeyLength && |
| "split index would leave second node with empty key"); |
| |
| auto split = new Node(); |
| split->Further = cur; |
| |
| // Move the sibling links of cur onto split unless we're giving split |
| // an empty local key, which is the only case where the siblings will |
| // have split's key as a prefix. |
| if (splitIndex != 0) { |
| split->Left = cur->Left; |
| split->Right = cur->Right; |
| cur->Left = nullptr; |
| cur->Right = nullptr; |
| } |
| |
| // Initialize the key of the split node. |
| split->KeyLength = splitIndex; |
| memcpy(split->Key, cur->Key, splitIndex * sizeof(KeyElementType)); |
| |
| // Slide cur's key if the split point wasn't the start. |
| if (splitIndex != 0) { |
| cur->KeyLength -= splitIndex; |
| memmove(cur->Key, cur->Key + splitIndex, |
| cur->KeyLength * sizeof(KeyElementType)); |
| } |
| |
| *ref = split; |
| return split; |
| } |
| |
| /// Try to find a node corresponding to a prefix of the given key. |
| /// |
| /// If 'remainingLookupKey' is non-null, the returned node will correspond |
| /// to the longest prefix that had a value set, and 'remainingLookupKey' |
| /// will be set to the part of the key that was not matched. The tree |
| /// will not be modified. |
| /// |
| /// If 'remainingLookupKey' is null, the returned node will correspond |
| /// to the complete lookup key. This node will be created if necessary. |
| Node *getOrCreatePrefixNode(KeyType lookupKey, |
| KeyType *remainingLookupKey) { |
| // Invariant: 'best' contains the last node we followed the Further |
| // link of that had a value. This is used only when not creating |
| // an exact match. |
| Node *best = nullptr; |
| |
| Node **next = &Root; |
| while (Node * const cur = *next) { |
| KeyType curKey = cur->getLocalKey(); |
| |
| // Compare the lookup key with the stored key in the node. |
| size_t len = std::min(curKey.size(), lookupKey.size()); |
| size_t i = 0; |
| for (; i != len; ++i) { |
| if (lookupKey[i] != curKey[i]) break; |
| } |
| |
| // If we didn't reach the end of the common length, then we have two |
| // basic cases: |
| // looking up 'def' in 'abc' or 'ghi' |
| // looking up 'abd' in 'abc' or 'abce' |
| if (i != len) { |
| assert(len != 0); |
| bool goLeft = (lookupKey[i] < curKey[i]); |
| |
| // If there's no common prefix, just go to the appropriate side. |
| if (i == 0) { |
| next = (goLeft ? &cur->Left : &cur->Right); |
| continue; |
| } |
| |
| // Otherwise, there's a common prefix, but it's not cur's entire |
| // key, so there's no node at that common prefix. That means that |
| // there can't be an exact match. So if we don't need to return |
| // one, we can just stop here. |
| if (remainingLookupKey) |
| return best; |
| |
| // Otherwise, we need to split cur at the appropriate place. |
| Node *split = splitNode(next, i); |
| |
| // Continue from a sibling of its Further link. |
| lookupKey = lookupKey.slice(i); |
| next = (goLeft ? &split->Further->Left : &split->Further->Right); |
| assert(*next == nullptr); |
| break; |
| } |
| |
| // If we did reach the end of the common length, then we have three |
| // cases: |
| // looking up 'abc' in 'abc' |
| // looking up 'abc' in 'abcdef' |
| // looking up 'abc' in 'ab' |
| |
| // We might have exhausted the node's local key. |
| // (This could be empty.) |
| if (len == curKey.size()) { |
| lookupKey = lookupKey.slice(len); |
| |
| // Remember this as the best mapped match if it has a value. |
| if (remainingLookupKey && cur->HasValue) { |
| best = cur; |
| *remainingLookupKey = lookupKey; |
| } |
| |
| // If we've exhausted the lookup key, too, we have an exact match. |
| if (lookupKey.empty()) { |
| return (remainingLookupKey ? best : cur); |
| } |
| |
| // Otherwise, we have a suffix match; continue along the Further |
| // path. |
| next = &cur->Further; |
| continue; |
| } |
| |
| // Otherwise, we matched a prefix of the node's key. |
| assert(lookupKey.size() < curKey.size()); |
| |
| // If we don't need to create an exact node, don't claim anything |
| // more from the key and just use our deepest match. |
| if (remainingLookupKey) { |
| return best; |
| } |
| |
| // Okay, we're supposed to return a node that exactly matches the key. |
| // Split 'cur'. |
| Node *split = splitNode(next, len); |
| return split; |
| } |
| |
| // Okay, we reached the end of our search. If we don't need an exact |
| // match, return our best match so far. |
| if (remainingLookupKey) { |
| return best; |
| } |
| |
| // Otherwise, create nodes until we're out of lookup key. |
| // TODO: balance the subtree when creating nodes to the left or right. |
| do { |
| best = new Node(); |
| *next = best; |
| next = &best->Further; |
| |
| auto len = std::min(InlineKeyCapacity, lookupKey.size()); |
| best->KeyLength = len; |
| memcpy(best->Key, lookupKey.data(), len * sizeof(KeyElementType)); |
| lookupKey = lookupKey.slice(len); |
| } while (!lookupKey.empty()); |
| |
| return best; |
| } |
| |
| /// Delete all the nodes in a subtree. |
| static void deleteTree(Node *root) { |
| if (!root) return; |
| |
| SmallVector<Node *, 8> stack; |
| auto pushChildrenAndDelete = [&](Node *node) { |
| if (node->Left) stack.push_back(node->Left); |
| if (node->Right) stack.push_back(node->Right); |
| if (node->Further) stack.push_back(node->Further); |
| delete node; |
| }; |
| pushChildrenAndDelete(root); |
| while (!stack.empty()) { |
| pushChildrenAndDelete(stack.pop_back_val()); |
| } |
| } |
| |
| /// Copy all the nodes in a subtree. |
| static Node *cloneTree(Node *root) { |
| if (!root) return nullptr; |
| |
| SmallVector<Node **, 8> stack; |
| auto copyAndPushChildren = [&](Node **ptr) { |
| assert(*ptr); |
| Node *copy = new Node(**ptr); |
| *ptr = copy; |
| if (copy->Left) stack.push_back(©->Left); |
| if (copy->Right) stack.push_back(©->Right); |
| if (copy->Further) stack.push_back(©->Further); |
| }; |
| copyAndEnqueueChildren(&root); |
| while (!stack.empty()) { |
| copyAndEnqueueChildren(stack.pop_back_val()); |
| } |
| return root; |
| } |
| |
| Node *Root = nullptr; |
| |
| public: |
| PrefixMap() {} |
| |
| PrefixMap(const PrefixMap &other) : Root(cloneTree(other.Root)) {} |
| PrefixMap(PrefixMap &&other) : Root(other.Root) { |
| other.Root = nullptr; |
| } |
| PrefixMap &operator=(const PrefixMap &other) { |
| deleteTree(Root); |
| Root = cloneTree(other.Root); |
| return *this; |
| } |
| PrefixMap &operator=(PrefixMap &&other) { |
| deleteTree(Root); |
| Root = other.Root; |
| other.Root = nullptr; |
| return *this; |
| } |
| |
| ~PrefixMap() { |
| deleteTree(Root); |
| } |
| |
| /// Are there any entries in this map? |
| bool empty() const { |
| // The only way to create nodes is to insert an entry, and we don't |
| // yet support delete, so having any nodes means we're non-empty. |
| return Root == nullptr; |
| } |
| |
| /// Return the number of entries in this map. |
| size_t size() const { |
| if (!Root) return 0; |
| |
| size_t result = 0; |
| SmallVector<Node *, 16> stack; |
| auto handleNode = [&](Node *node) { |
| if (node->HasValue) result++; |
| if (node->Left) stack.push_back(node->Left); |
| if (node->Right) stack.push_back(node->Right); |
| if (node->Further) stack.push_back(node->Further); |
| }; |
| handleNode(Root); |
| while (!stack.empty()) |
| handleNode(stack.pop_back_val()); |
| return result; |
| } |
| |
| /// An input iterator over the entries in the map. |
| /// |
| /// This iterator stores a stack of the access path to the entry and |
| /// is therefore expensive to copy, and its operator* returns a proxy |
| /// object with a reference to its internal storage. These are both |
| /// legal for C++ input iterators, but still, be careful. |
| class const_iterator { |
| protected: |
| enum Position { |
| /// We are visiting the node's left subtree. |
| Left, |
| /// If the node is on the top of the stack, we are visiting its value. |
| /// Otherwise, we are visiting its further subtree. |
| Further, |
| // We remove the node from the stack when visiting its right subtree. |
| }; |
| using NodeAndPosition = llvm::PointerIntPair<Node*, 1, Position>; |
| SmallVector<NodeAndPosition, 8> Stack; |
| public: |
| const_iterator() {} |
| explicit const_iterator(Node *node) { enter(node); } |
| |
| /// A proxy object referencing a valid entry in the map. |
| class ConstEntryProxy { |
| ArrayRef<NodeAndPosition> Path; |
| public: |
| explicit ConstEntryProxy(ArrayRef<NodeAndPosition> path) : Path(path) { |
| assert(!Path.empty() && Path.back().getPointer()->HasValue); |
| } |
| |
| /// Return the value of the entry. The returned reference is valid |
| /// as long as the entry remains in the map. |
| const ValueType &getValue() const { |
| return Path.back().getPointer()->get(); |
| } |
| |
| /// Read the value's key into the given buffer. |
| KeyType getKey(SmallVectorImpl<KeyElementType> &buffer) const { |
| buffer.clear(); |
| for (auto &entry : Path.drop_back()) { |
| if (entry.getInt() != Further) continue; |
| auto key = entry.getPointer()->getLocalKey(); |
| buffer.append(key.begin(), key.end()); |
| } |
| auto key = Path.back().getPointer()->getLocalKey(); |
| buffer.append(key.begin(), key.end()); |
| return buffer; |
| } |
| }; |
| |
| /// Return a proxy value for the entry. The returned proxy is invalidated |
| /// by any change to the underlying iterator. |
| ConstEntryProxy operator*() const { |
| assert(!Stack.empty()); |
| assert(Stack.back().getPointer()->HasValue); |
| return ConstEntryProxy(Stack); |
| } |
| |
| /// Advance the iterator. |
| const_iterator &operator++() { |
| assert(!Stack.empty()); |
| |
| while (true) { |
| if (Stack.back().getInt() == Left) { |
| Stack.back().setInt(Further); |
| if (Stack.back().getPointer()->HasValue) return *this; |
| } |
| if (enter(Stack.back().getPointer()->Further)) return *this; |
| |
| do { |
| Node *top = Stack.pop_back_val().getPointer(); |
| if (enter(top->Right)) return *this; |
| if (Stack.empty()) return *this; |
| } while (Stack.back().getInt() == Further); |
| } |
| } |
| const_iterator operator++(int) { |
| const_iterator copy = *this; |
| operator++(); |
| return copy; |
| } |
| |
| bool operator==(const const_iterator &other) const { |
| return Stack == other.Stack; |
| } |
| bool operator!=(const const_iterator &other) const { |
| return !operator==(other); |
| } |
| |
| using difference_type = ptrdiff_t; |
| using iterator_category = std::input_iterator_tag; |
| using value_type = ConstEntryProxy; |
| using pointer = ConstEntryProxy; |
| using reference = ConstEntryProxy; |
| |
| private: |
| bool enter(Node *node) { |
| if (!node) return false; |
| Stack.push_back({node, Left}); |
| if (enter(node->Left)) return true; |
| Stack.back().setInt(Further); |
| if (node->HasValue) return true; |
| if (enter(node->Further)) return true; |
| Stack.pop_back(); |
| return enter(node->Right); |
| } |
| }; |
| |
| /// An input iterator over the entries in this map. |
| /// |
| /// This iterator stores a stack of the access path to the entry and |
| /// is therefore expensive to copy, and its operator* returns a proxy |
| /// object with a reference to its internal storage. These are both |
| /// legal for C++ input iterators, but still, be careful. |
| struct iterator : const_iterator { |
| iterator() : const_iterator() {} |
| explicit iterator(Node *node) : const_iterator(node) {} |
| |
| struct EntryProxy : const_iterator::ConstEntryProxy { |
| explicit EntryProxy( |
| ArrayRef<typename const_iterator::NodeAndPosition> path) |
| : const_iterator::ConstEntryProxy(path) {} |
| ValueType &getValue() const { |
| return const_cast<ValueType &>( |
| const_iterator::ConstEntryProxy::getValue()); |
| } |
| }; |
| |
| EntryProxy operator*() const { |
| return EntryProxy(this->Stack); |
| } |
| |
| iterator &operator++() { |
| return static_cast<iterator&>(const_iterator::operator++()); |
| } |
| iterator operator++(int) { |
| iterator copy = *this; |
| operator++(); |
| return copy; |
| } |
| |
| // Base implementations of operator== are fine. |
| |
| using value_type = EntryProxy; |
| using pointer = EntryProxy; |
| using reference = EntryProxy; |
| }; |
| |
| iterator begin() { return iterator(Root); } |
| iterator end() { return iterator(); } |
| |
| const_iterator begin() const { return const_iterator(Root); } |
| const_iterator end() const { return const_iterator(); } |
| |
| /// A handle to the mapping for a given key. Only invalidated by |
| /// changes that remove the mapping. |
| class Handle { |
| Node *Ptr; |
| public: |
| Handle() : Ptr(nullptr) {} |
| explicit Handle(Node *ptr) : Ptr(ptr) {} |
| |
| explicit operator bool() const { return Ptr != nullptr; } |
| ValueType &operator*() const { |
| return Ptr->get(); |
| } |
| }; |
| |
| using KeyIterator = typename KeyType::iterator; |
| |
| /// Find the longest prefix of the given encoded sequence which has |
| /// an entry in this map, and return an iterator corresponding to the |
| /// end of the prefix. |
| std::pair<Handle, KeyIterator> |
| findPrefix(KeyType key) const { |
| KeyType remainingKey; |
| auto node = const_cast<PrefixMap*>(this) |
| ->getOrCreatePrefixNode(key, &remainingKey); |
| assert(!node || node->HasValue); |
| return { Handle(node), remainingKey.begin() }; |
| } |
| |
| /// Remove all entries in the map. |
| void clear() { |
| deleteTree(Root); |
| Root = nullptr; |
| } |
| |
| /// Get or create an entry in the map. |
| /// |
| /// \return a handle to the entry and a bool indicating (if true) |
| /// that the map was modified to insert the mapping. |
| template <typename Fn> |
| std::pair<Handle, bool> |
| insertLazy(KeyType key, const Fn &create) { |
| auto node = getOrCreatePrefixNode(key, nullptr); |
| assert(node); |
| if (node->HasValue) { |
| return { Handle(node), false }; |
| } else { |
| node->emplace(create()); |
| return { Handle(node), true }; |
| } |
| } |
| |
| std::pair<Handle, bool> |
| insert(KeyType key, ValueType &&value) { |
| return insertLazy(key, |
| [&]() -> ValueType && { return std::move(value); }); |
| } |
| |
| std::pair<Handle, bool> |
| insert(KeyType key, const ValueType &value) { |
| return insertLazy(key, |
| [&]() -> const ValueType & { return value; }); |
| } |
| |
| /// Insert a new entry into the map, asserting that it doesn't |
| /// already exist. |
| template <typename Fn> |
| Handle insertNewLazy(KeyType key, const Fn &create) { |
| auto node = getOrCreatePrefixNode(key, nullptr); |
| assert(node); |
| node->emplace(create()); |
| return Handle(node); |
| } |
| |
| Handle insertNew(KeyType key, ValueType &&value) { |
| return insertNewLazy(key, |
| [&]() -> ValueType && { return std::move(value); }); |
| } |
| |
| Handle insertNew(KeyType key, const ValueType &value) { |
| return insertNewLazy(key, |
| [&]() -> const ValueType & { return value; }); |
| } |
| |
| SWIFT_DEBUG_DUMP { print(llvm::errs()); } |
| void print(raw_ostream &out) const { |
| printOpaquePrefixMap(out, Root, |
| [](raw_ostream &out, void *_node) { |
| Node *node = reinterpret_cast<Node*>(_node); |
| PrefixMapKeyPrinter<KeyElementType>::print(out, node->getLocalKey()); |
| if (node->HasValue) { |
| out << " (" << node->get() << ')'; |
| } |
| }); |
| } |
| }; |
| |
| /// The standard implementation of a key printer: |
| /// {0,1,2,3} |
| template <class KeyElementType> |
| class PrefixMapKeyPrinter { |
| public: |
| static void print(raw_ostream &out, ArrayRef<KeyElementType> key) { |
| out << '{'; |
| for (size_t i = 0; i != key.size(); ++i) { |
| if (i != 0) out << ','; |
| out << key[i]; |
| } |
| out << '}'; |
| } |
| }; |
| |
| /// A key printer for char sequences that prints as a quoted string: |
| /// "hello, \"world\"" |
| template <> |
| class PrefixMapKeyPrinter<char> { |
| public: |
| static void print(raw_ostream &out, ArrayRef<char> key); |
| }; |
| |
| /// A key printer for byte sequences that prints as a byte-string: |
| // '0F346E' |
| template <> |
| class PrefixMapKeyPrinter<unsigned char> { |
| public: |
| static void print(raw_ostream &out, ArrayRef<unsigned char> key); |
| }; |
| |
| } // end namespace swift |
| |
| #endif // SWIFT_BASIC_PREFIXMAP_H |