include/swift/Runtime/Concurrent.h - third_party/swift - Git at Google

 //===--- Concurrent.h - Concurrent Data Structures  -------------*- C++ -*-===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//
 #ifndef SWIFT_RUNTIME_CONCURRENTUTILS_H
 #define SWIFT_RUNTIME_CONCURRENTUTILS_H
 #include <iterator>
 #include <algorithm>
 #include <atomic>
 #include <functional>
 #include <stdint.h>
 #include <vector>
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Allocator.h"
 #include "Atomic.h"
 #include "Debug.h"
 #include "Mutex.h"

 #if defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__HAIKU__)
 #include <stdio.h>
 #endif

 #if defined(__APPLE__) && defined(__MACH__)
 #include <malloc/malloc.h>
 #endif

 namespace swift {

 /// This is a node in a concurrent linked list.
 template <class ElemTy> struct ConcurrentListNode {
   ConcurrentListNode(ElemTy Elem) : Payload(Elem), Next(nullptr) {}
   ConcurrentListNode(const ConcurrentListNode &) = delete;
   ConcurrentListNode &operator=(const ConcurrentListNode &) = delete;

   /// The element.
   ElemTy Payload;
   /// Points to the next link in the chain.
   ConcurrentListNode<ElemTy> *Next;
 };

 /// This is a concurrent linked list. It supports insertion at the beginning
 /// of the list and traversal using iterators.
 /// This is a very simple implementation of a concurrent linked list
 /// using atomic operations. The 'push_front' method allocates a new link
 /// and attempts to compare and swap the old head pointer with pointer to
 /// the new link. This operation may fail many times if there are other
 /// contending threads, but eventually the head pointer is set to the new
 /// link that already points to the old head value. Notice that the more
 /// difficult feature of removing links is not supported.
 /// See 'push_front' for more details.
 template <class ElemTy> struct ConcurrentList {
   ConcurrentList() : First(nullptr) {}
   ~ConcurrentList() {
     clear();
   }

   /// Remove all of the links in the chain. This method leaves
   /// the list at a usable state and new links can be added.
   /// Notice that this operation is non-concurrent because
   /// we have no way of ensuring that no one is currently
   /// traversing the list.
   void clear() {
     // Iterate over the list and delete all the nodes.
     auto Ptr = First.load(std::memory_order_acquire);
     First.store(nullptr, std:: memory_order_release);

     while (Ptr) {
       auto N = Ptr->Next;
       delete Ptr;
       Ptr = N;
     }
   }

   ConcurrentList(const ConcurrentList &) = delete;
   ConcurrentList &operator=(const ConcurrentList &) = delete;

   /// A list iterator.
   struct ConcurrentListIterator :
       public std::iterator<std::forward_iterator_tag, ElemTy> {

     /// Points to the current link.
     ConcurrentListNode<ElemTy> *Ptr;
     /// C'tor.
     ConcurrentListIterator(ConcurrentListNode<ElemTy> *P) : Ptr(P) {}
     /// Move to the next element.
     ConcurrentListIterator &operator++() {
       Ptr = Ptr->Next;
       return *this;
     }
     /// Access the element.
     ElemTy &operator*() { return Ptr->Payload; }
     /// Same?
     bool operator==(const ConcurrentListIterator &o) const {
       return o.Ptr == Ptr;
     }
     /// Not the same?
     bool operator!=(const ConcurrentListIterator &o) const {
       return o.Ptr != Ptr;
     }
   };

   /// Iterator entry point.
   typedef ConcurrentListIterator iterator;
   /// Marks the beginning of the list.
   iterator begin() const {
     return ConcurrentListIterator(First.load(std::memory_order_acquire));
   }
   /// Marks the end of the list.
   iterator end() const { return ConcurrentListIterator(nullptr); }

   /// Add a new item to the list.
   void push_front(ElemTy Elem) {
     /// Allocate a new node.
     ConcurrentListNode<ElemTy> *N = new ConcurrentListNode<ElemTy>(Elem);
     // Point to the first element in the list.
     N->Next = First.load(std::memory_order_acquire);
     auto OldFirst = N->Next;
     // Try to replace the current First with the new node.
     while (!std::atomic_compare_exchange_weak_explicit(&First, &OldFirst, N,
                                                std::memory_order_release,
                                                std::memory_order_relaxed)) {
       // If we fail, update the new node to point to the new head and try to
       // insert before the new
       // first element.
       N->Next = OldFirst;
     }
   }

   /// Points to the first link in the list.
   std::atomic<ConcurrentListNode<ElemTy> *> First;
 };

 /// A utility function for ordering two integers, which is useful
 /// for implementing compareWithKey.
 template <class T>
 static inline int compareIntegers(T left, T right) {
   return (left == right ? 0 : left < right ? -1 : 1);
 }

 /// A utility function for ordering two pointers, which is useful
 /// for implementing compareWithKey.
 template <class T>
 static inline int comparePointers(const T *left, const T *right) {
   return (left == right ? 0 : std::less<const T *>()(left, right) ? -1 : 1);
 }

 template <class EntryTy, bool ProvideDestructor, class Allocator>
 class ConcurrentMapBase;

 /// The partial specialization of ConcurrentMapBase whose destructor is
 /// trivial.  The other implementation inherits from this, so this is a
 /// base for all ConcurrentMaps.
 template <class EntryTy, class Allocator>
 class ConcurrentMapBase<EntryTy, false, Allocator> : protected Allocator {
 protected:
   struct Node {
     std::atomic<Node*> Left;
     std::atomic<Node*> Right;
     EntryTy Payload;

     template <class... Args>
     Node(Args &&... args)
       : Left(nullptr), Right(nullptr), Payload(std::forward<Args>(args)...) {}

     Node(const Node &) = delete;
     Node &operator=(const Node &) = delete;

   #ifndef NDEBUG
     void dump() const {
       auto L = Left.load(std::memory_order_acquire);
       auto R = Right.load(std::memory_order_acquire);
       printf("\"%p\" [ label = \" {<f0> %08lx | {<f1> | <f2>}}\" "
              "style=\"rounded\" shape=\"record\"];\n",
              this, (long) Payload.getKeyValueForDump());

       if (L) {
         L->dump();
         printf("\"%p\":f1 -> \"%p\":f0;\n", this, L);
       }
       if (R) {
         R->dump();
         printf("\"%p\":f2 -> \"%p\":f0;\n", this, R);
       }
     }
   #endif
   };

   std::atomic<Node*> Root;

   constexpr ConcurrentMapBase() : Root(nullptr) {}

   // Implicitly trivial destructor.
   ~ConcurrentMapBase() = default;

   void destroyNode(Node *node) {
     assert(node && "destroying null node");
     auto allocSize = sizeof(Node) + node->Payload.getExtraAllocationSize();

     // Destroy the node's payload.
     node->~Node();

     // Deallocate the node.  The static_cast here is required
     // because LLVM's allocator API is insane.
     this->Deallocate(static_cast<void*>(node), allocSize, alignof(Node));
   }
 };

 /// The partial specialization of ConcurrentMapBase which provides a
 /// non-trivial destructor.
 template <class EntryTy, class Allocator>
 class ConcurrentMapBase<EntryTy, true, Allocator>
     : protected ConcurrentMapBase<EntryTy, false, Allocator> {
 protected:
   using super = ConcurrentMapBase<EntryTy, false, Allocator>;
   using Node = typename super::Node;

   constexpr ConcurrentMapBase() {}

   ~ConcurrentMapBase() {
     destroyTree(this->Root);
   }

 private:
   void destroyTree(const std::atomic<Node*> &edge) {
     // This can be a relaxed load because destruction is not allowed to race
     // with other operations.
     auto node = edge.load(std::memory_order_relaxed);
     if (!node) return;

     // Destroy the node's children.
     destroyTree(node->Left);
     destroyTree(node->Right);

     // Destroy the node itself.
     this->destroyNode(node);
   }
 };

 /// A concurrent map that is implemented using a binary tree. It supports
 /// concurrent insertions but does not support removals or rebalancing of
 /// the tree.
 ///
 /// The entry type must provide the following operations:
 ///
 ///   /// For debugging purposes only. Summarize this key as an integer value.
 ///   intptr_t getKeyIntValueForDump() const;
 ///
 ///   /// A ternary comparison.  KeyTy is the type of the key provided
 ///   /// to find or getOrInsert.
 ///   int compareWithKey(KeyTy key) const;
 ///
 ///   /// Return the amount of extra trailing space required by an entry,
 ///   /// where KeyTy is the type of the first argument to getOrInsert and
 ///   /// ArgTys is the type of the remaining arguments.
 ///   static size_t getExtraAllocationSize(KeyTy key, ArgTys...)
 ///
 ///   /// Return the amount of extra trailing space that was requested for
 ///   /// this entry.  This method is only used to compute the size of the
 ///   /// object during node deallocation; it does not need to return a
 ///   /// correct value so long as the allocator's Deallocate implementation
 ///   /// ignores this argument.
 ///   size_t getExtraAllocationSize() const;
 ///
 /// If ProvideDestructor is false, the destructor will be trivial.  This
 /// can be appropriate when the object is declared at global scope.
 template <class EntryTy, bool ProvideDestructor = true,
           class Allocator = llvm::MallocAllocator>
 class ConcurrentMap
       : private ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator> {
   using super = ConcurrentMapBase<EntryTy, ProvideDestructor, Allocator>;

   using Node = typename super::Node;

   /// Inherited from base class:
   ///   std::atomic<Node*> Root;
   using super::Root;

   /// This member stores the address of the last node that was found by the
   /// search procedure. We cache the last search to accelerate code that
   /// searches the same value in a loop.
   std::atomic<Node*> LastSearch;

 public:
   constexpr ConcurrentMap() : LastSearch(nullptr) {}

   ConcurrentMap(const ConcurrentMap &) = delete;
   ConcurrentMap &operator=(const ConcurrentMap &) = delete;

   // ConcurrentMap<T, false> must have a trivial destructor.
   ~ConcurrentMap() = default;

 public:

   Allocator &getAllocator() {
     return *this;
   }

 #ifndef NDEBUG
   void dump() const {
     auto R = Root.load(std::memory_order_acquire);
     printf("digraph g {\n"
            "graph [ rankdir = \"TB\"];\n"
            "node  [ fontsize = \"16\" ];\n"
            "edge  [ ];\n");
     if (R) {
       R->dump();
     }
     printf("\n}\n");
   }
 #endif

   /// Search for a value by key \p Key.
   /// \returns a pointer to the value or null if the value is not in the map.
   template <class KeyTy>
   EntryTy *find(const KeyTy &key) {
     // Check if we are looking for the same key that we looked for in the last
     // time we called this function.
     if (Node *last = LastSearch.load(std::memory_order_acquire)) {
       if (last->Payload.compareWithKey(key) == 0)
         return &last->Payload;
     }

     // Search the tree, starting from the root.
     Node *node = Root.load(std::memory_order_acquire);
     while (node) {
       int comparisonResult = node->Payload.compareWithKey(key);
       if (comparisonResult == 0) {
         LastSearch.store(node, std::memory_order_release);
         return &node->Payload;
       } else if (comparisonResult < 0) {
         node = node->Left.load(std::memory_order_acquire);
       } else {
         node = node->Right.load(std::memory_order_acquire);
       }
     }

     return nullptr;
   }

   /// Get or create an entry in the map.
   ///
   /// \returns the entry in the map and whether a new node was added (true)
   ///   or already existed (false)
   template <class KeyTy, class... ArgTys>
   std::pair<EntryTy*, bool> getOrInsert(KeyTy key, ArgTys &&... args) {
     // Check if we are looking for the same key that we looked for the
     // last time we called this function.
     if (Node *last = LastSearch.load(std::memory_order_acquire)) {
       if (last && last->Payload.compareWithKey(key) == 0)
         return { &last->Payload, false };
     }

     // The node we allocated.
     Node *newNode = nullptr;

     // Start from the root.
     auto edge = &Root;

     while (true) {
       // Load the edge.
       Node *node = edge->load(std::memory_order_acquire);

       // If there's a node there, it's either a match or we're going to
       // one of its children.
       if (node) {
       searchFromNode:

         // Compare our key against the node's key.
         int comparisonResult = node->Payload.compareWithKey(key);

         // If it's equal, we can use this node.
         if (comparisonResult == 0) {
           // Destroy the node we allocated before if we're carrying one around.
           if (newNode) this->destroyNode(newNode);

           // Cache and report that we found an existing node.
           LastSearch.store(node, std::memory_order_release);
           return { &node->Payload, false };
         }

         // Otherwise, select the appropriate child edge and descend.
         edge = (comparisonResult < 0 ? &node->Left : &node->Right);
         continue;
       }

       // Create a new node.
       if (!newNode) {
         size_t allocSize =
           sizeof(Node) + EntryTy::getExtraAllocationSize(key, args...);
         void *memory = this->Allocate(allocSize, alignof(Node));
         newNode = ::new (memory) Node(key, std::forward<ArgTys>(args)...);
       }

       // Try to set the edge to the new node.
       if (std::atomic_compare_exchange_strong_explicit(edge, &node, newNode,
                                                   std::memory_order_acq_rel,
                                                   std::memory_order_acquire)) {
         // If that succeeded, cache and report that we created a new node.
         LastSearch.store(newNode, std::memory_order_release);
         return { &newNode->Payload, true };
       }

       // Otherwise, we lost the race because some other thread initialized
       // the edge before us.  node will be set to the current value;
       // repeat the search from there.
       assert(node && "spurious failure from compare_exchange_strong?");
       goto searchFromNode;
     }
   }
 };


 /// An append-only array that can be read without taking locks. Writes
 /// are still locked and serialized, but only with respect to other
 /// writes.
 template <class ElemTy> struct ConcurrentReadableArray {
 private:
   /// The struct used for the array's storage. The `Elem` member is
   /// considered to be the first element of a variable-length array,
   /// whose size is determined by the allocation. The `Capacity` member
   /// from `ConcurrentReadableArray` indicates how large it can be.
   struct Storage {
     std::atomic<size_t> Count;
     typename std::aligned_storage<sizeof(ElemTy), alignof(ElemTy)>::type Elem;

     static Storage *allocate(size_t capacity) {
       auto size = sizeof(Storage) + (capacity - 1) * sizeof(Storage().Elem);
       auto *ptr = reinterpret_cast<Storage *>(malloc(size));
       if (!ptr) swift::crash("Could not allocate memory.");
       ptr->Count.store(0, std::memory_order_relaxed);
       return ptr;
     }

     void deallocate() {
       for (size_t i = 0; i < Count; ++i) {
         data()[i].~ElemTy();
       }
       free(this);
     }

     ElemTy *data() {
       return reinterpret_cast<ElemTy *>(&Elem);
     }
   };

   size_t Capacity;
   std::atomic<size_t> ReaderCount;
   std::atomic<Storage *> Elements;
   Mutex WriterLock;
   std::vector<Storage *> FreeList;

   void incrementReaders() {
     ReaderCount.fetch_add(1, std::memory_order_acquire);
   }

   void decrementReaders() {
     ReaderCount.fetch_sub(1, std::memory_order_release);
   }

   void deallocateFreeList() {
     for (Storage *storage : FreeList)
       storage->deallocate();
     FreeList.clear();
     FreeList.shrink_to_fit();
   }

 public:
   struct Snapshot {
     ConcurrentReadableArray *Array;
     const ElemTy *Start;
     size_t Count;

     Snapshot(ConcurrentReadableArray *array, const ElemTy *start, size_t count)
       : Array(array), Start(start), Count(count) {}

     Snapshot(const Snapshot &other)
       : Array(other.Array), Start(other.Start), Count(other.Count) {
       Array->incrementReaders();
     }

     ~Snapshot() {
       Array->decrementReaders();
     }

     const ElemTy *begin() { return Start; }
     const ElemTy *end() { return Start + Count; }
     size_t count() { return Count; }
   };

   // This type cannot be safely copied or moved.
   ConcurrentReadableArray(const ConcurrentReadableArray &) = delete;
   ConcurrentReadableArray(ConcurrentReadableArray &&) = delete;
   ConcurrentReadableArray &operator=(const ConcurrentReadableArray &) = delete;

   ConcurrentReadableArray() : Capacity(0), ReaderCount(0), Elements(nullptr) {}

   ~ConcurrentReadableArray() {
     assert(ReaderCount.load(std::memory_order_acquire) == 0 &&
            "deallocating ConcurrentReadableArray with outstanding snapshots");
     deallocateFreeList();
   }

   void push_back(const ElemTy &elem) {
     ScopedLock guard(WriterLock);

     auto *storage = Elements.load(std::memory_order_relaxed);
     auto count = storage ? storage->Count.load(std::memory_order_relaxed) : 0;
     if (count >= Capacity) {
       auto newCapacity = std::max((size_t)16, count * 2);
       auto *newStorage = Storage::allocate(newCapacity);
       if (storage) {
         std::copy(storage->data(), storage->data() + count, newStorage->data());
         newStorage->Count.store(count, std::memory_order_release);
         FreeList.push_back(storage);
       }

       storage = newStorage;
       Capacity = newCapacity;
       Elements.store(storage, std::memory_order_release);
     }

     new(&storage->data()[count]) ElemTy(elem);
     storage->Count.store(count + 1, std::memory_order_release);

     if (ReaderCount.load(std::memory_order_acquire) == 0)
       deallocateFreeList();
   }

   Snapshot snapshot() {
     incrementReaders();
     auto *storage = Elements.load(SWIFT_MEMORY_ORDER_CONSUME);
     if (storage == nullptr) {
       return Snapshot(this, nullptr, 0);
     }

     auto count = storage->Count.load(std::memory_order_acquire);
     const auto *ptr = storage->data();
     return Snapshot(this, ptr, count);
   }
 };

 using llvm::hash_value;

 /// A hash table that can be queried without taking any locks. Writes are still
 /// locked and serialized, but only with respect to other locks. Writers can add
 /// elements and clear the table, but they cannot remove individual elements.
 /// Readers work by taking a snapshot of the table and then querying that
 /// snapshot.
 ///
 /// The basic structure of the table consists of two arrays. Elements are stored
 /// in a contiguous array, with new elements appended to the end. The second
 /// array is the actual hash table, and it contains indices into the elements
 /// array. This scheme cuts down on wasted space when the elements are larger
 /// than a few bytes: instead of wasting `(1 - loadFactor) * sizeof(element)`
 /// bytes on unused space in the hash table, we only waste `(1 - loadFactor) *
 /// sizeof(index)`. This scheme also avoids readers seeing partially constructed
 /// elements.
 ///
 /// Reader/writer synchronization for new elements is handled by keeping an
 /// element count which is only incremented when the element has been fully
 /// constructed. A reader which sees an index beyond its view of the current
 /// count will ignore it and treat that as if there was no entry.
 ///
 /// Reader/writer synchronization for resizing the arrays is handled by tracking
 /// the current number of active readers. When resizing, the new array is
 /// allocated, the data copied, and then the old array is placed in a free list.
 /// The free list is only deallocated if there are no readers, otherwise freeing
 /// is deferred.
 ///
 /// Reader/writer synchronization for clearing the table is a combination of the
 /// above. By keeping the old arrays around until all readers are finished, we
 /// ensure that readers which started before the clear see valid (pre-clear)
 /// data. Readers which see any array as empty will produce no results, thus
 /// providing valid post-clear data.
 template <class ElemTy> struct ConcurrentReadableHashMap {
   // We use memcpy and don't call destructors. Make sure the elements will put
   // up with this.
   static_assert(std::is_trivially_copyable<ElemTy>::value,
                 "Elements must be trivially copyable.");
   static_assert(std::is_trivially_destructible<ElemTy>::value,
                 "Elements must not have destructors (they won't be called).");

 private:
   /// The type of the elements of the indices array. TODO: use one or two byte
   /// indices for smaller tables to save more memory.
   using Index = unsigned;

   /// The reciprocal of the load factor at which we expand the table. A value of
   /// 4 means that we resize at 1/4 = 75% load factor.
   static const size_t ResizeProportion = 4;

   /// Get the "good size" for a given allocation size. When available, this
   /// rounds up to the next allocation quantum by calling `malloc_good_size`.
   /// Otherwise, just return the passed-in size, which is always valid even if
   /// not necessarily optimal.
   size_t goodSize(size_t size) {
 #if defined(__APPLE__) && defined(__MACH__)
     return malloc_good_size(size);
 #else
     return size;
 #endif
   }

   /// A private class representing the storage of the indices. In order to
   /// ensure that readers can get a consistent view of the indices with a single
   /// atomic read, we store the size of the indices array inline, as the first
   /// element in the array.
   ///
   /// We want the number of indices to be a power of two so that we can use a
   /// bitwise AND to convert a hash code to an index. We want the entire array
   /// to be a power of two in size to be friendly to the allocator, but the size
   /// is stored inline. We work around this contradiction by considering the
   /// first index to always be occupied with a value that never matches any key.
   struct IndexStorage {
     std::atomic<Index> Mask;

     static IndexStorage *allocate(size_t capacity) {
       assert((capacity & (capacity - 1)) == 0 &&
              "Capacity must be a power of 2");
       auto *ptr =
           reinterpret_cast<IndexStorage *>(calloc(capacity, sizeof(Mask)));
       if (!ptr)
         swift::crash("Could not allocate memory.");
       ptr->Mask.store(capacity - 1, std::memory_order_relaxed);
       return ptr;
     }

     std::atomic<Index> &at(size_t i) { return (&Mask)[i]; }
   };

   /// A simple linked list representing pointers that need to be freed.
   struct FreeListNode {
     FreeListNode *Next;
     void *Ptr;

     static void add(FreeListNode **head, void *ptr) {
       auto *newNode = new FreeListNode{*head, ptr};
       *head = newNode;
     }

     static void freeAll(FreeListNode **head) {
       auto *node = *head;
       while (node) {
         auto *next = node->Next;
         free(node->Ptr);
         delete node;
         node = next;
       }
       *head = nullptr;
     }
   };

   /// The number of readers currently active, equal to the number of snapshot
   /// objects currently alive.
   std::atomic<uint32_t> ReaderCount{0};

   /// The number of elements in the elements array.
   std::atomic<uint32_t> ElementCount{0};

   /// The array of elements.
   std::atomic<ElemTy *> Elements{nullptr};

   /// The array of indices.
   std::atomic<IndexStorage *> Indices{nullptr};

   /// The writer lock, which must be taken before any mutation of the table.
   Mutex WriterLock;

   /// The maximum number of elements that the current elements array can hold.
   uint32_t ElementCapacity{0};

   /// The list of pointers to be freed once no readers are active.
   FreeListNode *FreeList{nullptr};

   void incrementReaders() {
     ReaderCount.fetch_add(1, std::memory_order_acquire);
   }

   void decrementReaders() {
     ReaderCount.fetch_sub(1, std::memory_order_release);
   }

   /// Free all the arrays in the free lists if there are no active readers. If
   /// there are active readers, do nothing.
   void deallocateFreeListIfSafe() {
     if (ReaderCount.load(std::memory_order_acquire) == 0)
       FreeListNode::freeAll(&FreeList);
   }

   /// Grow the elements array, adding the old array to the free list and
   /// returning the new array with all existing elements copied into it.
   ElemTy *resize(ElemTy *elements, size_t elementCount) {
     // Grow capacity by 25%, making sure we grow by at least 1.
     size_t newCapacity =
         std::max(elementCount + (elementCount >> 2), elementCount + 1);
     size_t newSize = newCapacity * sizeof(ElemTy);

     newSize = goodSize(newSize);
     newCapacity = newSize / sizeof(ElemTy);

     ElemTy *newElements = static_cast<ElemTy *>(malloc(newSize));
     if (elements) {
       memcpy(newElements, elements, elementCount * sizeof(ElemTy));
       FreeListNode::add(&FreeList, elements);
     }

     ElementCapacity = newCapacity;
     Elements.store(newElements, std::memory_order_release);
     return newElements;
   }

   /// Grow the indices array, adding the old array to the free list and
   /// returning the new array with all existing indices copied into it. This
   /// operation performs a rehash, so that the indices are in the correct
   /// location in the new array.
   IndexStorage *resize(IndexStorage *indices, Index indicesMask,
                        ElemTy *elements) {
     // Mask is size - 1. Double the size. Start with 4 (fits into 16-byte malloc
     // bucket).
     size_t newCount = indices ? 2 * (indicesMask + 1) : 4;
     size_t newMask = newCount - 1;

     IndexStorage *newIndices = IndexStorage::allocate(newCount);

     for (size_t i = 1; i <= indicesMask; i++) {
       Index index = indices->at(i).load(std::memory_order_relaxed);
       if (index == 0)
         continue;

       auto *element = &elements[index - 1];
       auto hash = hash_value(*element);

       size_t newI = hash & newMask;
       while (newIndices->at(newI) != 0)
         newI = (newI + 1) & newMask;
       newIndices->at(newI).store(index, std::memory_order_relaxed);
     }

     Indices.store(newIndices, std::memory_order_release);

     FreeListNode::add(&FreeList, indices);

     return newIndices;
   }

   /// Search for the given key within the given indices and elements arrays. If
   /// an entry already exists for that key, return a pointer to the element. If
   /// no entry exists, return a pointer to the location in the indices array
   /// where the index of the new element would be stored.
   template <class KeyTy>
   static std::pair<ElemTy *, std::atomic<Index> *>
   find(const KeyTy &key, IndexStorage *indices, size_t elementCount,
        ElemTy *elements) {
     if (!indices)
       return {nullptr, nullptr};
     auto hash = hash_value(key);
     auto indicesMask = indices->Mask.load(std::memory_order_relaxed);

     auto i = hash & indicesMask;
     while (true) {
       // Index 0 is used for the mask and is not actually an index.
       if (i == 0)
         i++;

       auto *indexPtr = &indices->at(i);
       auto index = indexPtr->load(std::memory_order_acquire);
       // Element indices are 1-based, 0 means no entry.
       if (index == 0)
         return {nullptr, indexPtr};
       if (index - 1 < elementCount) {
         auto *candidate = &elements[index - 1];
         if (candidate->matchesKey(key))
           return {candidate, nullptr};
       }

       i = (i + 1) & indicesMask;
     }
   }

 public:
   // This type cannot be safely copied or moved.
   ConcurrentReadableHashMap(const ConcurrentReadableHashMap &) = delete;
   ConcurrentReadableHashMap(ConcurrentReadableHashMap &&) = delete;
   ConcurrentReadableHashMap &
   operator=(const ConcurrentReadableHashMap &) = delete;

   ConcurrentReadableHashMap()
       : ReaderCount(0), ElementCount(0), Elements(nullptr), Indices(nullptr),
         ElementCapacity(0) {}

   ~ConcurrentReadableHashMap() {
     assert(ReaderCount.load(std::memory_order_acquire) == 0 &&
            "deallocating ConcurrentReadableHashMap with outstanding snapshots");
     FreeListNode::freeAll(&FreeList);
   }

   /// Readers take a snapshot of the hash map, then work with the snapshot.
   class Snapshot {
     ConcurrentReadableHashMap *Map;
     IndexStorage *Indices;
     ElemTy *Elements;
     size_t ElementCount;

   public:
     Snapshot(ConcurrentReadableHashMap *map, IndexStorage *indices,
              ElemTy *elements, size_t elementCount)
         : Map(map), Indices(indices), Elements(elements),
           ElementCount(elementCount) {}

     Snapshot(const Snapshot &other)
         : Map(other.Map), Indices(other.Indices), Elements(other.Elements),
           ElementCount(other.ElementCount) {
       Map->incrementReaders();
     }

     ~Snapshot() { Map->decrementReaders(); }

     /// Search for an element matching the given key. Returns a pointer to the
     /// found element, or nullptr if no matching element exists.
     template <class KeyTy> const ElemTy *find(const KeyTy &key) {
       if (!Indices || !ElementCount || !Elements)
         return nullptr;
       return ConcurrentReadableHashMap::find(key, Indices, ElementCount,
                                              Elements)
           .first;
     }
   };

   /// Take a snapshot of the current state of the hash map.
   Snapshot snapshot() {
     incrementReaders();

     // Carefully loading the indices, element count, and elements pointer in
     // order ensures a consistent view of the table with respect to concurrent
     // inserts. However, this is not sufficient to avoid an inconsistent view
     // with respect to concurrent clears. The danger scenario is:
     //
     // 1. Read indices and elementCount from a table with N entries.
     // 2. Another thread clears the table.
     // 3. Another thread inserts M entries, where M < N.
     // 4. The reader thread reads elements.
     // 5. The reader thread performs a find. The key's hash leads us to an index
     //    I, where > M.
     // 6. The reader thread reads from element I, which is off the end of the
     //    elements array.
     //
     // To avoid this, read the elements pointer twice, at the beginning and end.
     // If the values are not the same then there may have been a clear in the
     // middle, so we retry. This will have false positives: a new element
     // pointer can just mean a concurrent insert that triggered a resize of the
     // elements array. This is harmless aside from a small performance hit, and
     // should not happen often.
     IndexStorage *indices;
     size_t elementCount;
     ElemTy *elements;
     ElemTy *elements2;
     do {
       elements = Elements.load(std::memory_order_acquire);
       indices = Indices.load(std::memory_order_acquire);
       elementCount = ElementCount.load(std::memory_order_acquire);
       elements2 = Elements.load(std::memory_order_acquire);
     } while (elements != elements2);

     return Snapshot(this, indices, elements, elementCount);
   }

   /// Get an element by key, or insert a new element for that key if one is not
   /// already present. Invoke `call` with the pointer to the element. BEWARE:
   /// `call` is invoked with the internal writer lock held, keep work to a
   /// minimum.
   ///
   /// `call` is passed the following parameters:
   ///   - `element`: the pointer to the element corresponding to `key`
   ///   - `created`: true if the element is newly created, false if it already
   ///                exists
   /// `call` returns a `bool`. When `created` is `true`, the return values mean:
   ///   - `true` the new entry is to be kept
   ///   - `false` indicates that the new entry is discarded
   /// If the new entry is kept, then the new element MUST be initialized, and
   /// have a hash value that matches the hash value of `key`.
   ///
   /// The return value is ignored when `created` is `false`.
   template <class KeyTy, typename Call>
   void getOrInsert(KeyTy key, const Call &call) {
     ScopedLock guard(WriterLock);

     auto *indices = Indices.load(std::memory_order_relaxed);
     if (!indices)
       indices = resize(indices, 0, nullptr);

     auto indicesMask = indices->Mask.load(std::memory_order_relaxed);
     auto elementCount = ElementCount.load(std::memory_order_relaxed);
     auto *elements = Elements.load(std::memory_order_relaxed);

     auto found = find(key, indices, elementCount, elements);
     if (found.first) {
       call(found.first, false);
       deallocateFreeListIfSafe();
       return;
     }

     // The actual capacity is indicesMask + 1. The number of slots in use is
     // elementCount + 1, since the mask also takes a slot.
     auto emptyCount = (indicesMask + 1) - (elementCount + 1);
     auto proportion = (indicesMask + 1) / emptyCount;
     if (proportion >= ResizeProportion) {
       indices = resize(indices, indicesMask, elements);
       found = find(key, indices, elementCount, elements);
       assert(!found.first && "Shouldn't suddenly find the key after rehashing");
     }

     if (elementCount >= ElementCapacity) {
       elements = resize(elements, elementCount);
     }
     auto *element = &elements[elementCount];

     // Order matters: fill out the element, then update the count,
     // then update the index.
     bool keep = call(element, true);
     if (keep) {
       assert(hash_value(key) == hash_value(*element) &&
              "Element must have the same hash code as its key.");
       ElementCount.store(elementCount + 1, std::memory_order_release);
       found.second->store(elementCount + 1, std::memory_order_release);
     }

     deallocateFreeListIfSafe();
   }

   /// Clear the hash table, freeing (when safe) all memory currently used for
   /// indices and elements.
   void clear() {
     ScopedLock guard(WriterLock);

     auto *indices = Indices.load(std::memory_order_relaxed);
     auto *elements = Elements.load(std::memory_order_relaxed);

     // Order doesn't matter here, snapshots will gracefully handle any field
     // being NULL/0 while the others are not.
     Indices.store(nullptr, std::memory_order_relaxed);
     ElementCount.store(0, std::memory_order_relaxed);
     Elements.store(nullptr, std::memory_order_relaxed);
     ElementCapacity = 0;

     FreeListNode::add(&FreeList, indices);
     FreeListNode::add(&FreeList, elements);

     deallocateFreeListIfSafe();
   }
 };

 } // end namespace swift

 #endif // SWIFT_RUNTIME_CONCURRENTUTILS_H