| //===--- Concurrent.h - Concurrent Data Structures -------------*- C++ -*-===// |
| // |
| // This source file is part of the Swift.org open source project |
| // |
| // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors |
| // Licensed under Apache License v2.0 with Runtime Library Exception |
| // |
| // See https://swift.org/LICENSE.txt for license information |
| // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| // |
| //===----------------------------------------------------------------------===// |
| #ifndef SWIFT_RUNTIME_CONCURRENTUTILS_H |
| #define SWIFT_RUNTIME_CONCURRENTUTILS_H |
| #include <iterator> |
| #include <algorithm> |
| #include <atomic> |
| #include <functional> |
| #include <stdint.h> |
| #include <vector> |
| #include "llvm/ADT/Hashing.h" |
| #include "llvm/Support/Allocator.h" |
| #include "Atomic.h" |
| #include "Debug.h" |
| #include "Mutex.h" |
| |
| #if defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__HAIKU__) |
| #include <stdio.h> |
| #endif |
| |
| #if defined(__APPLE__) && defined(__MACH__) |
| #include <malloc/malloc.h> |
| #endif |
| |
| namespace swift { |
| |
| /// This is a node in a concurrent linked list. |
| template <class ElemTy> struct ConcurrentListNode { |
| ConcurrentListNode(ElemTy Elem) : Payload(Elem), Next(nullptr) {} |
| ConcurrentListNode(const ConcurrentListNode &) = delete; |
| ConcurrentListNode &operator=(const ConcurrentListNode &) = delete; |
| |
| /// The element. |
| ElemTy Payload; |
| /// Points to the next link in the chain. |
| ConcurrentListNode<ElemTy> *Next; |
| }; |
| |
| /// This is a concurrent linked list. It supports insertion at the beginning |
| /// of the list and traversal using iterators. |
| /// This is a very simple implementation of a concurrent linked list |
| /// using atomic operations. The 'push_front' method allocates a new link |
| /// and attempts to compare and swap the old head pointer with pointer to |
| /// the new link. This operation may fail many times if there are other |
| /// contending threads, but eventually the head pointer is set to the new |
| /// link that already points to the old head value. Notice that the more |
| /// difficult feature of removing links is not supported. |
| /// See 'push_front' for more details. |
| template <class ElemTy> struct ConcurrentList { |
| ConcurrentList() : First(nullptr) {} |
| ~ConcurrentList() { |
| clear(); |
| } |
| |
| /// Remove all of the links in the chain. This method leaves |
| /// the list at a usable state and new links can be added. |
| /// Notice that this operation is non-concurrent because |
| /// we have no way of ensuring that no one is currently |
| /// traversing the list. |
| void clear() { |
| // Iterate over the list and delete all the nodes. |
| auto Ptr = First.load(std::memory_order_acquire); |
| First.store(nullptr, std:: memory_order_release); |
| |
| while (Ptr) { |
| auto N = Ptr->Next; |
| delete Ptr; |
| Ptr = N; |
| } |
| } |
| |
| ConcurrentList(const ConcurrentList &) = delete; |
| ConcurrentList &operator=(const ConcurrentList &) = delete; |
| |
| /// A list iterator. |
| struct ConcurrentListIterator : |
| public std::iterator<std::forward_iterator_tag, ElemTy> { |
| |
| /// Points to the current link. |
| ConcurrentListNode<ElemTy> *Ptr; |
| /// C'tor. |
| ConcurrentListIterator(ConcurrentListNode<ElemTy> *P) : Ptr(P) {} |
| /// Move to the next element. |
| ConcurrentListIterator &operator++() { |
| Ptr = Ptr->Next; |
| return *this; |
| } |
| /// Access the element. |
| ElemTy &operator*() { return Ptr->Payload; } |
| /// Same? |
| bool operator==(const ConcurrentListIterator &o) const { |
| return o.Ptr == Ptr; |
| } |
| /// Not the same? |
| bool operator!=(const ConcurrentListIterator &o) const { |
| return o.Ptr != Ptr; |
| } |
| }; |
| |
| /// Iterator entry point. |
| typedef ConcurrentListIterator iterator; |
| /// Marks the beginning of the list. |
| iterator begin() const { |
| return ConcurrentListIterator(First.load(std::memory_order_acquire)); |
| } |
| /// Marks the end of the list. |
| iterator end() const { return ConcurrentListIterator(nullptr); } |
| |
| /// Add a new item to the list. |
| void push_front(ElemTy Elem) { |
| /// Allocate a new node. |
| ConcurrentListNode<ElemTy> *N = new ConcurrentListNode<ElemTy>(Elem); |
| // Point to the first element in the list. |
| N->Next = First.load(std::memory_order_acquire); |
| auto OldFirst = N->Next; |
| // Try to replace the current First with the new node. |
| while (!std::atomic_compare_exchange_weak_explicit(&First, &OldFirst, N, |
| std::memory_order_release, |
| std::memory_order_relaxed)) { |
| // If we fail, update the new node to point to the new head and try to |
| // insert before the new |
| // first element. |
| N->Next = OldFirst; |
| } |
| } |
| |
| /// Points to the first link in the list. |
| std::atomic<ConcurrentListNode<ElemTy> *> First; |
| }; |
| |
| /// A utility function for ordering two integers, which is useful |
| /// for implementing compareWithKey. |
| template <class T> |
| static inline int compareIntegers(T left, T right) { |
| return (left == right ? 0 : left < right ? -1 : 1); |
| } |
| |
| /// A utility function for ordering two pointers, which is useful |
| /// for implementing compareWithKey. |
| template <class T> |
| static inline int comparePointers(const T *left, const T *right) { |
| return (left == right ? 0 : std::less<const T *>()(left, right) ? -1 : 1); |
| } |
| |
| template <class EntryTy, bool ProvideDestructor, class Allocator> |
| class ConcurrentMapBase; |
| |
| /// The partial specialization of ConcurrentMapBase whose destructor is |
| /// trivial. The other implementation inherits from this, so this is a |
| /// base for all ConcurrentMaps. |
| template <class EntryTy, class Allocator> |
| class ConcurrentMapBase<EntryTy, false, Allocator> : protected Allocator { |
| protected: |
| struct Node { |
| std::atomic<Node*> Left; |
| std::atomic<Node*> Right; |
| EntryTy Payload; |
| |
| template <class... Args> |
| Node(Args &&... args) |
| : Left(nullptr), Right(nullptr), Payload(std::forward<Args>(args)...) {} |
| |
| Node(const Node &) = delete; |
| Node &operator=(const Node &) = delete; |
| |
| #ifndef NDEBUG |
| void dump() const { |
| auto L = Left.load(std::memory_order_acquire); |
| auto R = Right.load(std::memory_order_acquire); |
| printf("\"%p\" [ label = \" {<f0> %08lx | {<f1> | <f2>}}\" " |
| "style=\"rounded\" shape=\"record\"];\n", |
| this, (long) Payload.getKeyValueForDump()); |
| |
| if (L) { |
| L->dump(); |
| printf("\"%p\":f1 -> \"%p\":f0;\n", this, L); |
| } |
| if (R) { |
| R->dump(); |
| printf("\"%p\":f2 -> \"%p\":f0;\n", this, R); |
| } |
| } |
| #endif |
| }; |
| |
| std::atomic<Node*> Root; |
| |
| constexpr ConcurrentMapBase() : Root(nullptr) {} |
| |
| // Implicitly trivial destructor. |
| ~ConcurrentMapBase() = default; |
| |
| void destroyNode(Node *node) { |
| assert(node && "destroying null node"); |
| auto allocSize = sizeof(Node) + node->Payload.getExtraAllocationSize(); |
| |
| // Destroy the node's payload. |
| node->~Node(); |
| |
| // Deallocate the node. The static_cast here is required |
| // because LLVM's allocator API is insane. |
| this->Deallocate(static_cast<void*>(node), allocSize, alignof(Node)); |
| } |
| }; |
| |
| /// The partial specialization of ConcurrentMapBase which provides a |
| /// non-trivial destructor. |
| template <class EntryTy, class Allocator> |
| class ConcurrentMapBase<EntryTy, true, Allocator> |
| : protected ConcurrentMapBase<EntryTy, false, Allocator> { |
| protected: |
| using super = ConcurrentMapBase<EntryTy, false, Allocator>; |
| using Node = typename super::Node; |
| |
| constexpr ConcurrentMapBase() {} |
| |
| ~ConcurrentMapBase() { |
| destroyTree(this->Root); |
| } |
| |
| private: |
| void destroyTree(const std::atomic<Node*> &edge) { |
| // This can be a relaxed load because destruction is not allowed to race |
| // with other operations. |
| auto node = edge.load(std::memory_order_relaxed); |
| if (!node) return; |
| |
| // Destroy the node's children. |
| destroyTree(node->Left); |
| destroyTree(node->Right); |
| |
| // Destroy the node itself. |
| this->destroyNode(node); |
| } |
| }; |
| |
| /// A concurrent map that is implemented using a binary tree. It supports |
| /// concurrent insertions but does not support removals or rebalancing of |
| /// the tree. |
| /// |
| /// The entry type must provide the following operations: |
| /// |
| /// /// For debugging purposes only. Summarize this key as an integer value. |
| /// intptr_t getKeyIntValueForDump() const; |
| /// |
| /// /// A ternary comparison. KeyTy is the type of the key provided |
| /// /// to find or getOrInsert. |
| /// int compareWithKey(KeyTy key) const; |
| /// |
| /// /// Return the amount of extra trailing space required by an entry, |
| /// /// where KeyTy is the type of the first argument to getOrInsert and |
| /// /// ArgTys is the type of the remaining arguments. |
| /// static size_t getExtraAllocationSize(KeyTy key, ArgTys...) |
| /// |
| /// /// Return the amount of extra trailing space that was requested for |
| /// /// this entry. This method is only used to compute the size of the |
| /// /// object during node deallocation; it does not need to return a |
| /// /// correct value so long as the allocator's Deallocate implementation |
| /// /// ignores this argument. |
| /// size_t getExtraAllocationSize() const; |
| /// |
| /// If ProvideDestructor is false, the destructor will be trivial. This |
| /// can be appropriate when the object is declared at global scope. |
| template <class EntryTy, bool ProvideDestructor = true, |
| class Allocator = llvm::MallocAllocator> |
| class ConcurrentMap |
| : private ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator> { |
| using super = ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator>; |
| |
| using Node = typename super::Node; |
| |
| /// Inherited from base class: |
| /// std::atomic<Node*> Root; |
| using super::Root; |
| |
| /// This member stores the address of the last node that was found by the |
| /// search procedure. We cache the last search to accelerate code that |
| /// searches the same value in a loop. |
| std::atomic<Node*> LastSearch; |
| |
| public: |
| constexpr ConcurrentMap() : LastSearch(nullptr) {} |
| |
| ConcurrentMap(const ConcurrentMap &) = delete; |
| ConcurrentMap &operator=(const ConcurrentMap &) = delete; |
| |
| // ConcurrentMap<T, false> must have a trivial destructor. |
| ~ConcurrentMap() = default; |
| |
| public: |
| |
| Allocator &getAllocator() { |
| return *this; |
| } |
| |
| #ifndef NDEBUG |
| void dump() const { |
| auto R = Root.load(std::memory_order_acquire); |
| printf("digraph g {\n" |
| "graph [ rankdir = \"TB\"];\n" |
| "node [ fontsize = \"16\" ];\n" |
| "edge [ ];\n"); |
| if (R) { |
| R->dump(); |
| } |
| printf("\n}\n"); |
| } |
| #endif |
| |
| /// Search for a value by key \p Key. |
| /// \returns a pointer to the value or null if the value is not in the map. |
| template <class KeyTy> |
| EntryTy *find(const KeyTy &key) { |
| // Check if we are looking for the same key that we looked for in the last |
| // time we called this function. |
| if (Node *last = LastSearch.load(std::memory_order_acquire)) { |
| if (last->Payload.compareWithKey(key) == 0) |
| return &last->Payload; |
| } |
| |
| // Search the tree, starting from the root. |
| Node *node = Root.load(std::memory_order_acquire); |
| while (node) { |
| int comparisonResult = node->Payload.compareWithKey(key); |
| if (comparisonResult == 0) { |
| LastSearch.store(node, std::memory_order_release); |
| return &node->Payload; |
| } else if (comparisonResult < 0) { |
| node = node->Left.load(std::memory_order_acquire); |
| } else { |
| node = node->Right.load(std::memory_order_acquire); |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| /// Get or create an entry in the map. |
| /// |
| /// \returns the entry in the map and whether a new node was added (true) |
| /// or already existed (false) |
| template <class KeyTy, class... ArgTys> |
| std::pair<EntryTy*, bool> getOrInsert(KeyTy key, ArgTys &&... args) { |
| // Check if we are looking for the same key that we looked for the |
| // last time we called this function. |
| if (Node *last = LastSearch.load(std::memory_order_acquire)) { |
| if (last && last->Payload.compareWithKey(key) == 0) |
| return { &last->Payload, false }; |
| } |
| |
| // The node we allocated. |
| Node *newNode = nullptr; |
| |
| // Start from the root. |
| auto edge = &Root; |
| |
| while (true) { |
| // Load the edge. |
| Node *node = edge->load(std::memory_order_acquire); |
| |
| // If there's a node there, it's either a match or we're going to |
| // one of its children. |
| if (node) { |
| searchFromNode: |
| |
| // Compare our key against the node's key. |
| int comparisonResult = node->Payload.compareWithKey(key); |
| |
| // If it's equal, we can use this node. |
| if (comparisonResult == 0) { |
| // Destroy the node we allocated before if we're carrying one around. |
| if (newNode) this->destroyNode(newNode); |
| |
| // Cache and report that we found an existing node. |
| LastSearch.store(node, std::memory_order_release); |
| return { &node->Payload, false }; |
| } |
| |
| // Otherwise, select the appropriate child edge and descend. |
| edge = (comparisonResult < 0 ? &node->Left : &node->Right); |
| continue; |
| } |
| |
| // Create a new node. |
| if (!newNode) { |
| size_t allocSize = |
| sizeof(Node) + EntryTy::getExtraAllocationSize(key, args...); |
| void *memory = this->Allocate(allocSize, alignof(Node)); |
| newNode = ::new (memory) Node(key, std::forward<ArgTys>(args)...); |
| } |
| |
| // Try to set the edge to the new node. |
| if (std::atomic_compare_exchange_strong_explicit(edge, &node, newNode, |
| std::memory_order_acq_rel, |
| std::memory_order_acquire)) { |
| // If that succeeded, cache and report that we created a new node. |
| LastSearch.store(newNode, std::memory_order_release); |
| return { &newNode->Payload, true }; |
| } |
| |
| // Otherwise, we lost the race because some other thread initialized |
| // the edge before us. node will be set to the current value; |
| // repeat the search from there. |
| assert(node && "spurious failure from compare_exchange_strong?"); |
| goto searchFromNode; |
| } |
| } |
| }; |
| |
| |
| /// An append-only array that can be read without taking locks. Writes |
| /// are still locked and serialized, but only with respect to other |
| /// writes. |
| template <class ElemTy> struct ConcurrentReadableArray { |
| private: |
| /// The struct used for the array's storage. The `Elem` member is |
| /// considered to be the first element of a variable-length array, |
| /// whose size is determined by the allocation. The `Capacity` member |
| /// from `ConcurrentReadableArray` indicates how large it can be. |
| struct Storage { |
| std::atomic<size_t> Count; |
| typename std::aligned_storage<sizeof(ElemTy), alignof(ElemTy)>::type Elem; |
| |
| static Storage *allocate(size_t capacity) { |
| auto size = sizeof(Storage) + (capacity - 1) * sizeof(Storage().Elem); |
| auto *ptr = reinterpret_cast<Storage *>(malloc(size)); |
| if (!ptr) swift::crash("Could not allocate memory."); |
| ptr->Count.store(0, std::memory_order_relaxed); |
| return ptr; |
| } |
| |
| void deallocate() { |
| for (size_t i = 0; i < Count; ++i) { |
| data()[i].~ElemTy(); |
| } |
| free(this); |
| } |
| |
| ElemTy *data() { |
| return reinterpret_cast<ElemTy *>(&Elem); |
| } |
| }; |
| |
| size_t Capacity; |
| std::atomic<size_t> ReaderCount; |
| std::atomic<Storage *> Elements; |
| Mutex WriterLock; |
| std::vector<Storage *> FreeList; |
| |
| void incrementReaders() { |
| ReaderCount.fetch_add(1, std::memory_order_acquire); |
| } |
| |
| void decrementReaders() { |
| ReaderCount.fetch_sub(1, std::memory_order_release); |
| } |
| |
| void deallocateFreeList() { |
| for (Storage *storage : FreeList) |
| storage->deallocate(); |
| FreeList.clear(); |
| FreeList.shrink_to_fit(); |
| } |
| |
| public: |
| struct Snapshot { |
| ConcurrentReadableArray *Array; |
| const ElemTy *Start; |
| size_t Count; |
| |
| Snapshot(ConcurrentReadableArray *array, const ElemTy *start, size_t count) |
| : Array(array), Start(start), Count(count) {} |
| |
| Snapshot(const Snapshot &other) |
| : Array(other.Array), Start(other.Start), Count(other.Count) { |
| Array->incrementReaders(); |
| } |
| |
| ~Snapshot() { |
| Array->decrementReaders(); |
| } |
| |
| const ElemTy *begin() { return Start; } |
| const ElemTy *end() { return Start + Count; } |
| size_t count() { return Count; } |
| }; |
| |
| // This type cannot be safely copied or moved. |
| ConcurrentReadableArray(const ConcurrentReadableArray &) = delete; |
| ConcurrentReadableArray(ConcurrentReadableArray &&) = delete; |
| ConcurrentReadableArray &operator=(const ConcurrentReadableArray &) = delete; |
| |
| ConcurrentReadableArray() : Capacity(0), ReaderCount(0), Elements(nullptr) {} |
| |
| ~ConcurrentReadableArray() { |
| assert(ReaderCount.load(std::memory_order_acquire) == 0 && |
| "deallocating ConcurrentReadableArray with outstanding snapshots"); |
| deallocateFreeList(); |
| } |
| |
| void push_back(const ElemTy &elem) { |
| Mutex::ScopedLock guard(WriterLock); |
| |
| auto *storage = Elements.load(std::memory_order_relaxed); |
| auto count = storage ? storage->Count.load(std::memory_order_relaxed) : 0; |
| if (count >= Capacity) { |
| auto newCapacity = std::max((size_t)16, count * 2); |
| auto *newStorage = Storage::allocate(newCapacity); |
| if (storage) { |
| std::copy(storage->data(), storage->data() + count, newStorage->data()); |
| newStorage->Count.store(count, std::memory_order_release); |
| FreeList.push_back(storage); |
| } |
| |
| storage = newStorage; |
| Capacity = newCapacity; |
| Elements.store(storage, std::memory_order_release); |
| } |
| |
| new(&storage->data()[count]) ElemTy(elem); |
| storage->Count.store(count + 1, std::memory_order_release); |
| |
| if (ReaderCount.load(std::memory_order_acquire) == 0) |
| deallocateFreeList(); |
| } |
| |
| Snapshot snapshot() { |
| incrementReaders(); |
| auto *storage = Elements.load(SWIFT_MEMORY_ORDER_CONSUME); |
| if (storage == nullptr) { |
| return Snapshot(this, nullptr, 0); |
| } |
| |
| auto count = storage->Count.load(std::memory_order_acquire); |
| const auto *ptr = storage->data(); |
| return Snapshot(this, ptr, count); |
| } |
| }; |
| |
| using llvm::hash_value; |
| |
| /// A hash table that can be queried without taking any locks. Writes are still |
| /// locked and serialized, but only with respect to other locks. Writers can add |
| /// elements and clear the table, but they cannot remove individual elements. |
| /// Readers work by taking a snapshot of the table and then querying that |
| /// snapshot. |
| /// |
| /// The basic structure of the table consists of two arrays. Elements are stored |
| /// in a contiguous array, with new elements appended to the end. The second |
| /// array is the actual hash table, and it contains indices into the elements |
| /// array. This scheme cuts down on wasted space when the elements are larger |
| /// than a few bytes: instead of wasting `(1 - loadFactor) * sizeof(element)` |
| /// bytes on unused space in the hash table, we only waste `(1 - loadFactor) * |
| /// sizeof(index)`. This scheme also avoids readers seeing partially constructed |
| /// elements. |
| /// |
| /// Reader/writer synchronization for new elements is handled by keeping an |
| /// element count which is only incremented when the element has been fully |
| /// constructed. A reader which sees an index beyond its view of the current |
| /// count will ignore it and treat that as if there was no entry. |
| /// |
| /// Reader/writer synchronization for resizing the arrays is handled by tracking |
| /// the current number of active readers. When resizing, the new array is |
| /// allocated, the data copied, and then the old array is placed in a free list. |
| /// The free list is only deallocated if there are no readers, otherwise freeing |
| /// is deferred. |
| /// |
| /// Reader/writer synchronization for clearing the table is a combination of the |
| /// above. By keeping the old arrays around until all readers are finished, we |
| /// ensure that readers which started before the clear see valid (pre-clear) |
| /// data. Readers which see any array as empty will produce no results, thus |
| /// providing valid post-clear data. |
| /// |
| /// This is intended to be used for tables that exist for the life of the |
| /// process. It has no destructor, to avoid generating useless global destructor |
| /// calls. The memory it allocates can be freed by calling clear() with no |
| /// outstanding readers, but this won't destroy the static mutex it uses. |
| template <class ElemTy, class MutexTy = StaticMutex> |
| struct ConcurrentReadableHashMap { |
| // We use memcpy and don't call destructors. Make sure the elements will put |
| // up with this. |
| static_assert(std::is_trivially_copyable<ElemTy>::value, |
| "Elements must be trivially copyable."); |
| static_assert(std::is_trivially_destructible<ElemTy>::value, |
| "Elements must not have destructors (they won't be called)."); |
| |
| private: |
| /// The reciprocal of the load factor at which we expand the table. A value of |
| /// 4 means that we resize at 1/4 = 75% load factor. |
| static const size_t ResizeProportion = 4; |
| |
| /// Get the "good size" for a given allocation size. When available, this |
| /// rounds up to the next allocation quantum by calling `malloc_good_size`. |
| /// Otherwise, just return the passed-in size, which is always valid even if |
| /// not necessarily optimal. |
| static size_t goodSize(size_t size) { |
| #if defined(__APPLE__) && defined(__MACH__) |
| return malloc_good_size(size); |
| #else |
| return size; |
| #endif |
| } |
| |
| /// A private class representing the storage of the indices. In order to |
| /// ensure that readers can get a consistent view of the indices with a single |
| /// atomic read, we store the size of the indices array inline, as the first |
| /// element in the array. |
| /// |
| /// We want the number of indices to be a power of two so that we can use a |
| /// bitwise AND to convert a hash code to an index. We want the entire array |
| /// to be a power of two in size to be friendly to the allocator, but the size |
| /// is stored inline. We work around this contradiction by considering the |
| /// first index to always be occupied with a value that never matches any key. |
| struct IndexStorage { |
| using RawType = uintptr_t; |
| |
| RawType Value; |
| |
| static constexpr uintptr_t log2(uintptr_t x) { |
| return x <= 1 ? 0 : log2(x >> 1) + 1; |
| } |
| |
| static constexpr uintptr_t InlineIndexBits = 4; |
| static constexpr uintptr_t InlineIndexMask = 0xF; |
| static constexpr uintptr_t InlineCapacity = |
| sizeof(RawType) * CHAR_BIT / InlineIndexBits; |
| static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity); |
| |
| // Indices can be stored in different ways, depending on how big they need |
| // to be. The index mode is stored in the bottom two bits of Value. The |
| // meaning of the rest of Value depends on the mode. |
| enum class IndexMode { |
| // Value is treated as an array of four-bit integers, storing the indices. |
| // The first element overlaps with the mode, and is never used. |
| Inline, |
| |
| // The rest of Value holds a pointer to storage. The first byte of this |
| // storage holds the log2 of the storage capacity. The storage is treated |
| // as an array of 8, 16, or 32-bit integers. The first element overlaps |
| // with the capacity, and is never used. |
| Array8, |
| Array16, |
| Array32, |
| }; |
| |
| IndexStorage() : Value(0) {} |
| IndexStorage(RawType value) : Value(value) {} |
| IndexStorage(void *ptr, unsigned indexSize, uint8_t capacityLog2) { |
| assert(capacityLog2 > InlineCapacityLog2); |
| IndexMode mode; |
| switch (indexSize) { |
| case sizeof(uint8_t): |
| mode = IndexMode::Array8; |
| break; |
| case sizeof(uint16_t): |
| mode = IndexMode::Array16; |
| break; |
| case sizeof(uint32_t): |
| mode = IndexMode::Array32; |
| break; |
| default: |
| swift_unreachable("unknown index size"); |
| } |
| Value = reinterpret_cast<uintptr_t>(ptr) | static_cast<uintptr_t>(mode); |
| *reinterpret_cast<uint8_t *>(ptr) = capacityLog2; |
| } |
| |
| bool valueIsPointer() { return Value & 3; } |
| |
| void *pointer() { |
| if (valueIsPointer()) |
| return (void *)(Value & (RawType)~3); |
| return nullptr; |
| } |
| |
| IndexMode indexMode() { return IndexMode(Value & 3); } |
| |
| // Index size is variable based on capacity, either 8, 16, or 32 bits. |
| // |
| // This is somewhat conservative. We could have, for example, a capacity of |
| // 512 but a maximum index of only 200, which would still allow for 8-bit |
| // indices. However, taking advantage of this would require reallocating |
| // the index storage when the element count crossed a threshold, which is |
| // more complex, and the advantages are minimal. This keeps it simple. |
| |
| // Get the size, in bytes, of the index needed for the given capacity. |
| static unsigned indexSize(uint8_t capacityLog2) { |
| if (capacityLog2 <= sizeof(uint8_t) * CHAR_BIT) |
| return sizeof(uint8_t); |
| if (capacityLog2 <= sizeof(uint16_t) * CHAR_BIT) |
| return sizeof(uint16_t); |
| return sizeof(uint32_t); |
| } |
| |
| uint8_t getCapacityLog2() { |
| if (auto *ptr = pointer()) |
| return *reinterpret_cast<uint8_t *>(ptr); |
| return InlineCapacityLog2; |
| } |
| |
| static IndexStorage allocate(size_t capacityLog2) { |
| assert(capacityLog2 > 0); |
| size_t capacity = 1UL << capacityLog2; |
| unsigned size = indexSize(capacityLog2); |
| auto *ptr = calloc(capacity, size); |
| if (!ptr) |
| swift::crash("Could not allocate memory."); |
| return IndexStorage(ptr, size, capacityLog2); |
| } |
| |
| unsigned loadIndexAt(size_t i, std::memory_order order) { |
| assert(i > 0 && "index zero is off-limits, used to store capacity"); |
| assert(i < (1 << getCapacityLog2()) && |
| "index is off the end of the indices"); |
| |
| switch (indexMode()) { |
| case IndexMode::Inline: |
| return (Value >> (i * InlineIndexBits)) & InlineIndexMask; |
| case IndexMode::Array8: |
| return ((std::atomic<uint8_t> *)pointer())[i].load(order); |
| case IndexMode::Array16: |
| return ((std::atomic<uint16_t> *)pointer())[i].load(order); |
| case IndexMode::Array32: |
| return ((std::atomic<uint32_t> *)pointer())[i].load(order); |
| } |
| } |
| |
| void storeIndexAt(std::atomic<RawType> *inlineStorage, unsigned value, |
| size_t i, std::memory_order order) { |
| assert(i > 0 && "index zero is off-limits, used to store capacity"); |
| assert(i < (1 << getCapacityLog2()) && |
| "index is off the end of the indices"); |
| |
| switch (indexMode()) { |
| case IndexMode::Inline: { |
| assert(value == (value & InlineIndexMask) && "value is too big to fit"); |
| auto shift = i * InlineIndexBits; |
| assert((Value & (InlineIndexMask << shift)) == 0 && |
| "can't overwrite an existing index"); |
| assert(Value == inlineStorage->load(std::memory_order_relaxed) && |
| "writing with a stale IndexStorage"); |
| auto newStorage = Value | ((RawType)value << shift); |
| inlineStorage->store(newStorage, order); |
| break; |
| } |
| case IndexMode::Array8: |
| ((std::atomic<uint8_t> *)pointer())[i].store(value, order); |
| break; |
| case IndexMode::Array16: |
| ((std::atomic<uint16_t> *)pointer())[i].store(value, order); |
| break; |
| case IndexMode::Array32: |
| ((std::atomic<uint32_t> *)pointer())[i].store(value, order); |
| break; |
| } |
| } |
| }; |
| |
| /// The struct used for element storage. The `Elem` member is considered to be |
| /// the first element of a variable-length array, whose size is determined by |
| /// the allocation. |
| struct ElementStorage { |
| uint32_t Capacity; |
| ElemTy Elem; |
| |
| static ElementStorage *allocate(size_t capacity) { |
| auto headerSize = offsetof(ElementStorage, Elem); |
| auto size = goodSize(headerSize + capacity * sizeof(ElemTy)); |
| |
| auto *ptr = reinterpret_cast<ElementStorage *>(malloc(size)); |
| if (!ptr) |
| swift::crash("Could not allocate memory."); |
| |
| ptr->Capacity = (size - headerSize) / sizeof(ElemTy); |
| |
| return ptr; |
| } |
| |
| ElemTy *data() { return &Elem; } |
| }; |
| |
| /// A simple linked list representing pointers that need to be freed. |
| struct FreeListNode { |
| FreeListNode *Next; |
| void *Ptr; |
| |
| static void add(FreeListNode **head, void *ptr) { |
| auto *newNode = new FreeListNode{*head, ptr}; |
| *head = newNode; |
| } |
| |
| static void freeAll(FreeListNode **head) { |
| auto *node = *head; |
| while (node) { |
| auto *next = node->Next; |
| free(node->Ptr); |
| delete node; |
| node = next; |
| } |
| *head = nullptr; |
| } |
| }; |
| |
| /// The number of readers currently active, equal to the number of snapshot |
| /// objects currently alive. |
| std::atomic<uint32_t> ReaderCount{0}; |
| |
| /// The number of elements in the elements array. |
| std::atomic<uint32_t> ElementCount{0}; |
| |
| /// The array of elements. |
| std::atomic<ElementStorage *> Elements{nullptr}; |
| |
| /// The array of indices. |
| /// |
| /// This has to be stored as a IndexStorage::RawType instead of a IndexStorage |
| /// because some of our targets don't support interesting structs as atomic |
| /// types. See also MetadataCache::TrackingInfo which uses the same technique. |
| std::atomic<typename IndexStorage::RawType> Indices{0}; |
| |
| /// The writer lock, which must be taken before any mutation of the table. |
| MutexTy WriterLock; |
| |
| /// The list of pointers to be freed once no readers are active. |
| FreeListNode *FreeList{nullptr}; |
| |
| void incrementReaders() { |
| ReaderCount.fetch_add(1, std::memory_order_acquire); |
| } |
| |
| void decrementReaders() { |
| ReaderCount.fetch_sub(1, std::memory_order_release); |
| } |
| |
| /// Free all the arrays in the free lists if there are no active readers. If |
| /// there are active readers, do nothing. |
| void deallocateFreeListIfSafe() { |
| if (ReaderCount.load(std::memory_order_acquire) == 0) |
| FreeListNode::freeAll(&FreeList); |
| } |
| |
| /// Grow the elements array, adding the old array to the free list and |
| /// returning the new array with all existing elements copied into it. |
| ElementStorage *resize(ElementStorage *elements, size_t elementCount) { |
| // Grow capacity by 25%, making sure we grow by at least 1. |
| size_t newCapacity = |
| std::max(elementCount + (elementCount >> 2), elementCount + 1); |
| auto *newElements = ElementStorage::allocate(newCapacity); |
| |
| if (elements) { |
| memcpy(newElements->data(), elements->data(), |
| elementCount * sizeof(ElemTy)); |
| FreeListNode::add(&FreeList, elements); |
| } |
| |
| Elements.store(newElements, std::memory_order_release); |
| return newElements; |
| } |
| |
| /// Grow the indices array, adding the old array to the free list and |
| /// returning the new array with all existing indices copied into it. This |
| /// operation performs a rehash, so that the indices are in the correct |
| /// location in the new array. |
| IndexStorage resize(IndexStorage indices, uint8_t indicesCapacityLog2, |
| ElemTy *elements) { |
| // Double the size. |
| size_t newCapacityLog2 = indicesCapacityLog2 + 1; |
| size_t newMask = (1UL << newCapacityLog2) - 1; |
| |
| IndexStorage newIndices = IndexStorage::allocate(newCapacityLog2); |
| |
| size_t indicesCount = 1UL << indicesCapacityLog2; |
| for (size_t i = 1; i < indicesCount; i++) { |
| unsigned index = indices.loadIndexAt(i, std::memory_order_relaxed); |
| if (index == 0) |
| continue; |
| |
| auto *element = &elements[index - 1]; |
| auto hash = hash_value(*element); |
| |
| size_t newI = hash & newMask; |
| // Index 0 is unusable (occupied by the capacity), so always skip it. |
| while (newI == 0 || |
| newIndices.loadIndexAt(newI, std::memory_order_relaxed) != 0) { |
| newI = (newI + 1) & newMask; |
| } |
| newIndices.storeIndexAt(nullptr, index, newI, std::memory_order_relaxed); |
| } |
| |
| Indices.store(newIndices.Value, std::memory_order_release); |
| |
| if (auto *ptr = indices.pointer()) |
| FreeListNode::add(&FreeList, ptr); |
| |
| return newIndices; |
| } |
| |
| /// Search for the given key within the given indices and elements arrays. If |
| /// an entry already exists for that key, return a pointer to the element. If |
| /// no entry exists, return the location in the indices array where the index |
| /// of the new element would be stored. |
| template <class KeyTy> |
| static std::pair<ElemTy *, unsigned> |
| find(const KeyTy &key, IndexStorage indices, size_t elementCount, |
| ElemTy *elements) { |
| auto hash = hash_value(key); |
| auto indicesMask = (1UL << indices.getCapacityLog2()) - 1; |
| |
| auto i = hash & indicesMask; |
| while (true) { |
| // Index 0 is used for the mask and is not actually an index. |
| if (i == 0) |
| i++; |
| |
| auto index = indices.loadIndexAt(i, std::memory_order_acquire); |
| // Element indices are 1-based, 0 means no entry. |
| if (index == 0) |
| return {nullptr, i}; |
| if (index - 1 < elementCount) { |
| auto *candidate = &elements[index - 1]; |
| if (candidate->matchesKey(key)) |
| return {candidate, 0}; |
| } |
| |
| i = (i + 1) & indicesMask; |
| } |
| } |
| |
| public: |
| // Implicitly trivial constructor/destructor. |
| ConcurrentReadableHashMap() = default; |
| ~ConcurrentReadableHashMap() = default; |
| |
| // This type cannot be safely copied or moved. |
| ConcurrentReadableHashMap(const ConcurrentReadableHashMap &) = delete; |
| ConcurrentReadableHashMap(ConcurrentReadableHashMap &&) = delete; |
| ConcurrentReadableHashMap & |
| operator=(const ConcurrentReadableHashMap &) = delete; |
| |
| /// Returns whether there are outstanding readers. For testing purposes only. |
| bool hasActiveReaders() { |
| return ReaderCount.load(std::memory_order_relaxed) > 0; |
| } |
| |
| /// Readers take a snapshot of the hash map, then work with the snapshot. |
| class Snapshot { |
| ConcurrentReadableHashMap *Map; |
| IndexStorage Indices; |
| ElemTy *Elements; |
| size_t ElementCount; |
| |
| public: |
| Snapshot(ConcurrentReadableHashMap *map, IndexStorage indices, |
| ElemTy *elements, size_t elementCount) |
| : Map(map), Indices(indices), Elements(elements), |
| ElementCount(elementCount) {} |
| |
| Snapshot(const Snapshot &other) |
| : Map(other.Map), Indices(other.Indices), Elements(other.Elements), |
| ElementCount(other.ElementCount) { |
| Map->incrementReaders(); |
| } |
| |
| ~Snapshot() { Map->decrementReaders(); } |
| |
| /// Search for an element matching the given key. Returns a pointer to the |
| /// found element, or nullptr if no matching element exists. |
| template <class KeyTy> const ElemTy *find(const KeyTy &key) { |
| if (!Indices.Value || !ElementCount || !Elements) |
| return nullptr; |
| return ConcurrentReadableHashMap::find(key, Indices, ElementCount, |
| Elements) |
| .first; |
| } |
| }; |
| |
| /// Take a snapshot of the current state of the hash map. |
| Snapshot snapshot() { |
| incrementReaders(); |
| |
| // Carefully loading the indices, element count, and elements pointer in |
| // order ensures a consistent view of the table with respect to concurrent |
| // inserts. However, this is not sufficient to avoid an inconsistent view |
| // with respect to concurrent clears. The danger scenario is: |
| // |
| // 1. Read indices and elementCount from a table with N entries. |
| // 2. Another thread clears the table. |
| // 3. Another thread inserts M entries, where M < N. |
| // 4. The reader thread reads elements. |
| // 5. The reader thread performs a find. The key's hash leads us to an index |
| // I, where > M. |
| // 6. The reader thread reads from element I, which is off the end of the |
| // elements array. |
| // |
| // To avoid this, read the elements pointer twice, at the beginning and end. |
| // If the values are not the same then there may have been a clear in the |
| // middle, so we retry. This will have false positives: a new element |
| // pointer can just mean a concurrent insert that triggered a resize of the |
| // elements array. This is harmless aside from a small performance hit, and |
| // should not happen often. |
| IndexStorage indices; |
| size_t elementCount; |
| ElementStorage *elements; |
| ElementStorage *elements2; |
| do { |
| elements = Elements.load(std::memory_order_acquire); |
| indices = Indices.load(std::memory_order_acquire); |
| elementCount = ElementCount.load(std::memory_order_acquire); |
| elements2 = Elements.load(std::memory_order_acquire); |
| } while (elements != elements2); |
| |
| ElemTy *elementsPtr = elements ? elements->data() : nullptr; |
| return Snapshot(this, indices, elementsPtr, elementCount); |
| } |
| |
| /// Get an element by key, or insert a new element for that key if one is not |
| /// already present. Invoke `call` with the pointer to the element. BEWARE: |
| /// `call` is invoked with the internal writer lock held, keep work to a |
| /// minimum. |
| /// |
| /// `call` is passed the following parameters: |
| /// - `element`: the pointer to the element corresponding to `key` |
| /// - `created`: true if the element is newly created, false if it already |
| /// exists |
| /// `call` returns a `bool`. When `created` is `true`, the return values mean: |
| /// - `true` the new entry is to be kept |
| /// - `false` indicates that the new entry is discarded |
| /// If the new entry is kept, then the new element MUST be initialized, and |
| /// have a hash value that matches the hash value of `key`. |
| /// |
| /// The return value is ignored when `created` is `false`. |
| template <class KeyTy, typename Call> |
| void getOrInsert(KeyTy key, const Call &call) { |
| typename MutexTy::ScopedLock guard(WriterLock); |
| |
| auto indices = IndexStorage{Indices.load(std::memory_order_relaxed)}; |
| auto indicesCapacityLog2 = indices.getCapacityLog2(); |
| auto elementCount = ElementCount.load(std::memory_order_relaxed); |
| auto *elements = Elements.load(std::memory_order_relaxed); |
| auto *elementsPtr = elements ? elements->data() : nullptr; |
| |
| auto found = this->find(key, indices, elementCount, elementsPtr); |
| if (found.first) { |
| call(found.first, false); |
| deallocateFreeListIfSafe(); |
| return; |
| } |
| |
| auto indicesCapacity = 1UL << indicesCapacityLog2; |
| |
| // The number of slots in use is elementCount + 1, since the capacity also |
| // takes a slot. |
| auto emptyCount = indicesCapacity - (elementCount + 1); |
| auto proportion = indicesCapacity / emptyCount; |
| if (proportion >= ResizeProportion) { |
| indices = resize(indices, indicesCapacityLog2, elementsPtr); |
| found = find(key, indices, elementCount, elementsPtr); |
| assert(!found.first && "Shouldn't suddenly find the key after rehashing"); |
| } |
| |
| if (!elements || elementCount >= elements->Capacity) { |
| elements = resize(elements, elementCount); |
| } |
| auto *element = &elements->data()[elementCount]; |
| |
| // Order matters: fill out the element, then update the count, |
| // then update the index. |
| bool keep = call(element, true); |
| if (keep) { |
| assert(hash_value(key) == hash_value(*element) && |
| "Element must have the same hash code as its key."); |
| ElementCount.store(elementCount + 1, std::memory_order_release); |
| indices.storeIndexAt(&Indices, elementCount + 1, found.second, |
| std::memory_order_release); |
| } |
| |
| deallocateFreeListIfSafe(); |
| } |
| |
| /// Clear the hash table, freeing (when safe) all memory currently used for |
| /// indices and elements. |
| void clear() { |
| typename MutexTy::ScopedLock guard(WriterLock); |
| |
| IndexStorage indices = Indices.load(std::memory_order_relaxed); |
| auto *elements = Elements.load(std::memory_order_relaxed); |
| |
| // Order doesn't matter here, snapshots will gracefully handle any field |
| // being NULL/0 while the others are not. |
| Indices.store(0, std::memory_order_relaxed); |
| ElementCount.store(0, std::memory_order_relaxed); |
| Elements.store(nullptr, std::memory_order_relaxed); |
| |
| if (auto *ptr = indices.pointer()) |
| FreeListNode::add(&FreeList, ptr); |
| FreeListNode::add(&FreeList, elements); |
| |
| deallocateFreeListIfSafe(); |
| } |
| }; |
| |
| /// A wrapper type for indirect hash map elements. Stores a pointer to the real |
| /// element and forwards key matching and hashing. |
| template <class ElemTy> struct HashMapElementWrapper { |
| ElemTy *Ptr; |
| |
| template <class KeyTy> bool matchesKey(const KeyTy &key) { |
| return Ptr->matchesKey(key); |
| } |
| |
| friend llvm::hash_code hash_value(const HashMapElementWrapper &wrapper) { |
| return hash_value(*wrapper.Ptr); |
| } |
| }; |
| |
| /// A ConcurrentReadableHashMap that provides stable addresses for the elements |
| /// by allocating them separately and storing pointers to them. The elements of |
| /// the hash table are instances of HashMapElementWrapper. A new getOrInsert |
| /// method is provided that directly returns the stable element pointer. |
| template <class ElemTy, class Allocator, class MutexTy = StaticMutex> |
| struct StableAddressConcurrentReadableHashMap |
| : public ConcurrentReadableHashMap<HashMapElementWrapper<ElemTy>, MutexTy> { |
| // Implicitly trivial destructor. |
| ~StableAddressConcurrentReadableHashMap() = default; |
| |
| std::atomic<ElemTy *> LastFound{nullptr}; |
| |
| /// Get or insert an element for the given key and arguments. Returns the |
| /// pointer to the existing or new element, and a bool indicating whether the |
| /// element was created. When false, the element already existed before the |
| /// call. |
| template <class KeyTy, class... ArgTys> |
| std::pair<ElemTy *, bool> getOrInsert(KeyTy key, ArgTys &&...args) { |
| // See if this is a repeat of the previous search. |
| if (auto lastFound = LastFound.load(std::memory_order_acquire)) |
| if (lastFound->matchesKey(key)) |
| return {lastFound, false}; |
| |
| // Optimize for the case where the value already exists. |
| { |
| // Tightly scope the snapshot so it's gone before we call getOrInsert |
| // below, otherwise that call will always see an outstanding snapshot and |
| // never be able to collect garbage. |
| auto snapshot = this->snapshot(); |
| if (auto wrapper = snapshot.find(key)) { |
| LastFound.store(wrapper->Ptr, std::memory_order_relaxed); |
| return {wrapper->Ptr, false}; |
| } |
| } |
| |
| // No such element. Insert if needed. Note: another thread may have inserted |
| // it in the meantime, so we still have to handle both cases! |
| ElemTy *ptr = nullptr; |
| bool outerCreated = false; |
| ConcurrentReadableHashMap<HashMapElementWrapper<ElemTy>, MutexTy>:: |
| getOrInsert(key, [&](HashMapElementWrapper<ElemTy> *wrapper, |
| bool created) { |
| if (created) { |
| // Created the indirect entry. Allocate the actual storage. |
| size_t allocSize = |
| sizeof(ElemTy) + ElemTy::getExtraAllocationSize(key, args...); |
| void *memory = Allocator().Allocate(allocSize, alignof(ElemTy)); |
| new (memory) ElemTy(key, std::forward<ArgTys>(args)...); |
| wrapper->Ptr = reinterpret_cast<ElemTy *>(memory); |
| } |
| ptr = wrapper->Ptr; |
| outerCreated = created; |
| return true; // Keep the new entry. |
| }); |
| LastFound.store(ptr, std::memory_order_relaxed); |
| return {ptr, outerCreated}; |
| } |
| |
| template <class KeyTy> ElemTy *find(const KeyTy &key) { |
| auto result = this->snapshot().find(key); |
| if (!result) |
| return nullptr; |
| return result->Ptr; |
| } |
| |
| private: |
| // Clearing would require deallocating elements, which we don't support. |
| void clear() = delete; |
| }; |
| |
| } // end namespace swift |
| |
| #endif // SWIFT_RUNTIME_CONCURRENTUTILS_H |