| // Copyright 2019 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #ifndef ZIRCON_KERNEL_LIB_FBL_INCLUDE_FBL_GPARENA_H_ |
| #define ZIRCON_KERNEL_LIB_FBL_INCLUDE_FBL_GPARENA_H_ |
| |
| #include <align.h> |
| |
| #include <fbl/auto_call.h> |
| #include <fbl/confine_array_index.h> |
| #include <kernel/mutex.h> |
| #include <vm/vm_address_region.h> |
| #include <vm/vm_aspace.h> |
| #include <vm/vm_object_paged.h> |
| |
| namespace fbl { |
| |
| // Growable Persistant Arena (GPArena) is an arena that allows for fast allocation and deallocation |
| // of a single kind of object. Compared to other arena style allocators it additionally guarantees |
| // that a portion of the objects memory will be preserved between calls to Free+Alloc. |
| template <size_t PersistSize, size_t ObjectSize> |
| class __OWNER(void) GPArena { |
| public: |
| GPArena() = default; |
| ~GPArena() { |
| DEBUG_ASSERT(count_ == 0); |
| if (vmar_ != nullptr) { |
| // Unmap all of our memory and free our resources. |
| vmar_->Destroy(); |
| vmar_.reset(); |
| } |
| } |
| |
| zx_status_t Init(const char* name, size_t max_count) { |
| if (!max_count) |
| return ZX_ERR_INVALID_ARGS; |
| |
| // Carve out some memory from the kernel root VMAR |
| const size_t mem_sz = ROUNDUP(max_count * ObjectSize, PAGE_SIZE); |
| |
| fbl::RefPtr<VmObjectPaged> vmo; |
| zx_status_t status = VmObjectPaged::Create(PMM_ALLOC_FLAG_ANY, 0u, mem_sz, &vmo); |
| if (status != ZX_OK) { |
| return status; |
| } |
| |
| auto kspace = VmAspace::kernel_aspace(); |
| DEBUG_ASSERT(kspace != nullptr); |
| auto root_vmar = kspace->RootVmar(); |
| DEBUG_ASSERT(root_vmar != nullptr); |
| |
| char vname[32]; |
| snprintf(vname, sizeof(vname), "gparena:%s", name); |
| vmo->set_name(vname, sizeof(vname)); |
| |
| zx_status_t st = root_vmar->CreateSubVmar( |
| 0, // offset (ignored) |
| mem_sz, false, /*align_pow2=*/ |
| VMAR_FLAG_CAN_MAP_READ | VMAR_FLAG_CAN_MAP_WRITE | VMAR_FLAG_CAN_MAP_SPECIFIC, vname, |
| &vmar_); |
| if (st != ZX_OK || vmar_ == nullptr) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| // The VMAR's parent holds a ref, so it won't be destroyed |
| // automatically when we return. |
| auto destroy_vmar = fbl::MakeAutoCall([this]() { vmar_->Destroy(); }); |
| |
| st = vmar_->CreateVmMapping(0, // mapping_offset |
| mem_sz, |
| false, // align_pow2 |
| VMAR_FLAG_SPECIFIC, vmo, |
| 0, // vmo_offset |
| ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE, "gparena", |
| &mapping_); |
| if (st != ZX_OK || mapping_ == nullptr) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| |
| top_ = committed_ = start_ = mapping_->base(); |
| end_ = start_ + mem_sz; |
| |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(start_)); |
| DEBUG_ASSERT(IS_PAGE_ALIGNED(end_)); |
| |
| destroy_vmar.cancel(); |
| |
| return ZX_OK; |
| } |
| |
| // Returns a raw pointer and not a reference to an object of type T so that the memory can be |
| // inspected prior to construction taking place. |
| void* Alloc() { |
| // Take a local copy/snapshot of the current head node. |
| // Use an acquire to match with the release in Free. |
| HeadNode head_node = head_node_.load(ktl::memory_order_acquire); |
| while (head_node.head) { |
| const HeadNode next_head_node(head_node.head->next, head_node.gen + 1); |
| if (head_node_.compare_exchange_strong(head_node, next_head_node, ktl::memory_order_acquire, |
| ktl::memory_order_acquire)) { |
| count_.fetch_add(1, ktl::memory_order_relaxed); |
| return reinterpret_cast<void*>(head_node.head); |
| } |
| // There is no pause here as we don't need to wait for anyone before trying again, |
| // rather the sooner we retry the *more* likely we are to succeed given that we just |
| // received the most up to date copy of head_node. |
| } |
| |
| // Nothing in the free list, we need to grow. |
| uintptr_t top = top_.load(ktl::memory_order_relaxed); |
| uintptr_t next_top; |
| do { |
| // Every time the compare_exchange below fails top becomes the current value and so |
| // we recalculate our potential next_top every iteration from it. |
| next_top = top + ObjectSize; |
| // See if we need to commit more memory. |
| if (next_top > committed_.load(ktl::memory_order_relaxed)) { |
| if (!Grow(next_top)) { |
| return nullptr; |
| } |
| } |
| } while (!top_.compare_exchange_strong(top, next_top, ktl::memory_order_relaxed, |
| ktl::memory_order_relaxed)); |
| count_.fetch_add(1, ktl::memory_order_relaxed); |
| return reinterpret_cast<void*>(top); |
| } |
| |
| // Takes a raw pointer as the destructor is expected to have already been run. |
| void Free(void* node) { |
| FreeNode* free_node = reinterpret_cast<FreeNode*>(node); |
| // Take a local copy/snapshot of the current head node. |
| HeadNode head_node = head_node_.load(ktl::memory_order_relaxed); |
| HeadNode next_head_node; |
| do { |
| // Every time the compare_exchange below fails head_node becomes the current value and so |
| // we need to reset our intended next pointer every iteration. |
| free_node->next = head_node.head; |
| // Build our candidate next head node. |
| next_head_node = HeadNode(free_node, head_node.gen + 1); |
| // Use release semantics so that any writes to the Persist area, and our write to |
| // free_node->next, are visible before the node can be seen in the free list and reused. |
| } while (!head_node_.compare_exchange_strong( |
| head_node, next_head_node, ktl::memory_order_release, ktl::memory_order_relaxed)); |
| count_.fetch_sub(1, ktl::memory_order_relaxed); |
| } |
| |
| size_t DiagnosticCount() const { return count_.load(ktl::memory_order_relaxed); } |
| |
| bool Committed(void* node) const { |
| uintptr_t n = reinterpret_cast<uintptr_t>(node); |
| return n >= start_ && n < top_.load(ktl::memory_order_relaxed); |
| } |
| |
| // Return |address| if it is within the valid range of the arena or the base of the arena if not. |
| // Hardened against Spectre V1 / bounds check bypass speculation attacks - it always returns a |
| // safe value, even under speculation. |
| uintptr_t Confine(uintptr_t address) const { |
| const size_t size = top_.load(ktl::memory_order_relaxed) - start_; |
| uintptr_t offset = address - start_; |
| offset = fbl::confine_array_index(offset, /*size=*/size); |
| return start_ + offset; |
| } |
| |
| void* Base() const { return reinterpret_cast<void*>(start_); } |
| |
| void Dump() { |
| // Take the mapping lock so we can safely dump the vmar_ without mappings being done in |
| // parallel. |
| Guard<Mutex> guard{&mapping_lock_}; |
| |
| DEBUG_ASSERT(vmar_ != nullptr); |
| printf("GPArena<%#zx,%#zx> %s mappings:\n", PersistSize, ObjectSize, vmar_->name()); |
| { |
| Guard<Mutex> guard{vmar_->lock()}; |
| vmar_->DumpLocked(/* depth */ 1, /* verbose */ true); |
| } |
| |
| // |top_|, |committed_|, and |count_| are all atomic variables that can change out from under |
| // this method so it's not possible to guarantee a consistent snapshot. Instead, we aim for |
| // providing output that's not "obviously wrong". |
| // |
| // Load (with sequential consistency) all three values into local variable to minimize the |
| // chance of getting inconsistent data. |
| const size_t top = top_.load(); |
| const size_t committed = committed_.load(); |
| const size_t count = count_.load(); |
| |
| const size_t nslots = (top - start_) / ObjectSize; |
| const size_t np = (committed - start_) / PAGE_SIZE; |
| const size_t npmax = (end_ - start_) / PAGE_SIZE; |
| const size_t tslots = static_cast<size_t>(end_ - start_) / ObjectSize; |
| |
| // If we didn't get a consistent read, at least make sure we don't underflow. |
| const size_t free_list = (nslots > count) ? (nslots - count) : 0; |
| |
| printf(" start 0x%#zx\n", start_); |
| printf(" top 0x%zx (%zu slots allocated)\n", top, nslots); |
| printf(" committed 0x%#zx (%zu/%zu pages)\n", committed, np, npmax); |
| printf(" end 0x%#zx (%zu slots total)\n", end_, tslots); |
| printf(" free list length %zu\n", free_list); |
| } |
| |
| private: |
| DISALLOW_COPY_ASSIGN_AND_MOVE(GPArena); |
| |
| // Attempts to grow the committed memory range such that next_top is included in the range. |
| bool Grow(uintptr_t next_top) { |
| // take the mapping lock |
| Guard<Mutex> guard{&mapping_lock_}; |
| // Cache committed_ as only we can change it as we have the lock. |
| uintptr_t committed = committed_.load(ktl::memory_order_relaxed); |
| // now that we have the lock, double check we need to proceed |
| if (next_top > committed) { |
| uintptr_t nc = committed + 4 * PAGE_SIZE; |
| // Clip our commit attempt to the end of our mapping. |
| if (nc > end_) { |
| nc = end_; |
| } |
| if (nc == committed) { |
| // If we aren't going to commit more than we already haven then this |
| // means we have completely filled the arena. |
| return false; |
| } |
| const size_t offset = reinterpret_cast<vaddr_t>(committed) - start_; |
| const size_t len = nc - committed; |
| zx_status_t st = mapping_->MapRange(offset, len, /* commit */ true); |
| if (st != ZX_OK) { |
| // Try to clean up any committed pages, but don't require |
| // that it succeeds. |
| mapping_->DecommitRange(offset, len); |
| return false; |
| } |
| committed_.store(nc, ktl::memory_order_relaxed); |
| } |
| return true; |
| } |
| |
| struct FreeNode { |
| char data[PersistSize]; |
| // This struct is explicitly not packed to allow for the next field to be naturally aligned. |
| // As a result we *may* preserve more than PersistSize, but that is fine. This is not an atomic |
| // as reads and writes will be serialized with our updates to head_node_, which acts like a |
| // lock. |
| FreeNode* next; |
| }; |
| static_assert(sizeof(FreeNode) <= ObjectSize, "Not enough free space in object"); |
| static_assert((ObjectSize % alignof(FreeNode)) == 0, |
| "ObjectSize must be common alignment multiple"); |
| fbl::RefPtr<VmAddressRegion> vmar_; |
| fbl::RefPtr<VmMapping> mapping_; |
| |
| uintptr_t start_ = 0; |
| // top_ is the address of the next object to be allocated from the arena. |
| ktl::atomic<uintptr_t> top_ = 0; |
| // start_ .. committed_ represents the committed and mapped portion of the arena. |
| ktl::atomic<uintptr_t> committed_ = 0; |
| // start_ .. end_ represent the total virtual address reservation for the arena, and committed_ |
| // may not grow past end_. |
| uintptr_t end_ = 0; |
| |
| DECLARE_MUTEX(GPArena) mapping_lock_; |
| |
| ktl::atomic<size_t> count_ = 0; |
| |
| // Stores the current head pointer and a generation count. The generation count prevents races |
| // where one thread is modifying the list whilst another thread rapidly adds and removes. Every |
| // time a HeadNode is modified the generation count should be incremented to generate a unique |
| // value. |
| // |
| // It is important that the count not wrap past an existing value that is still in use. The |
| // generation is currently 64-bit number and shouldn't ever wrap back to 0 to begin with. Although |
| // even if it should it is incredibly unlikely a thread was stalled for 2^64 operations to cause a |
| // generation collision. |
| struct alignas(16) HeadNode { |
| FreeNode* head = nullptr; |
| uint64_t gen = 0; |
| HeadNode() = default; |
| HeadNode(FreeNode* h, uint64_t g) : head(h), gen(g) {} |
| }; |
| ktl::atomic<HeadNode> head_node_{}; |
| #ifdef __clang__ |
| static_assert(ktl::atomic<HeadNode>::is_always_lock_free, "atomic<HeadNode> not lock free"); |
| #else |
| // For gcc we have a custom libatomic implementation that *is* lock free, but the compiler doesn't |
| // know that at this point. Instead we ensure HeadNode is 16bytes so ensure our atomics will be |
| // the ones used. |
| static_assert(sizeof(HeadNode) == 16, "HeadNode must be 16 bytes"); |
| #endif |
| }; |
| |
| } // namespace fbl |
| |
| #endif // ZIRCON_KERNEL_LIB_FBL_INCLUDE_FBL_GPARENA_H_ |