blob: 373e7eb55522404f9b32980d9c84231e0d280791 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#ifndef ZIRCON_KERNEL_PHYS_LIB_MEMALLOC_INCLUDE_LIB_MEMALLOC_POOL_H_
#define ZIRCON_KERNEL_PHYS_LIB_MEMALLOC_INCLUDE_LIB_MEMALLOC_POOL_H_
#include <lib/fit/function.h>
#include <lib/fitx/result.h>
#include <lib/memalloc/range.h>
#include <lib/stdcompat/span.h>
#include <stdio.h>
#include <zircon/boot/image.h>
#include <array>
#include <string_view>
#include <fbl/intrusive_double_list.h>
namespace memalloc {
// Pool is a container that tracks ranges of physical memory and allocates
// available regions of RAM from among them.
//
// One initializes a Pool with a variable number of arrays of memory ranges.
// Except among extended types (see Type documentation), the ranges are
// permitted to overlap with one another to an arbitrary degree. In practice,
// the main array of ranges would be supplied by a ZBI or synthesized from a
// legacy booting protocol, while the others would consist of other auxiliary
// reserved areas known to be in use at the time of initialization
// (e.g., physboot's load image and the data ZBI itself). Despite arbitrarily
// overlapping inputs, Pool gives normalized outputs: iteration yields ranges
// that are lexicographically ordered, mutually disjoint, and for which
// addr + size does not overflow.
//
// kReserved ranges merely exist for the convenience of bootloaders: they
// represent holes to be punched out of kFreeRam ranges during normalization
// and do not have intrinsic value in and of themselves. Accordingly, Pool does
// not explicitly track them after initialization.
//
// Pool dynamically uses ranges of the free RAM it encodes for bookkeeping
// space. On initialization, it will attempt to find initial chunks to cover
// space to track the first crop of normalized ranges. With further allocation,
// fragmentation will increase and Pool will internally allocate more such
// space to manage it. Bookkeeping memory will also avoid the zero(th) page.
//
// Pool is neither copyable nor movable.
//
class Pool {
private:
// Forward-declared; defined below.
struct NodeTraits;
// The internal doubly-linked list type whose iterators - which are public
// API - deference as const Range&.
using List = fbl::DoublyLinkedList<Range*, fbl::DefaultObjectTag, fbl::SizeOrder::N, NodeTraits>;
public:
// Pool decouples the memory tracked by it from the dynamic memory it
// actually uses (i.e., bookkkeeping) by way of a function of this type. A
// BookkeepingAddressToPointer takes a would-be bookkeeping range
// (start + size) and maps it to a corresponding region where the bookkeeping
// data structures will actually be stored. While the actual bookkeeping
// range will differ in general from the pre-translated range, Pool will only
// track the latter (after blindly using the former); the onus is on the
// caller to manage the translated ranges: There are two main uses for such a
// feature:
//
// (*) testing (unit- and fuzz-): the decoupling is imperative for fully
// testing Pool. Since bookkeeping space is carved out from among the input
// RAM ranges, Pool would otherwise constrain the input RAM to always being
// actually usable memory, which would make testing awkward limit and limit
// observable behaviour in a virtualized environment (e.g., that relating to
// the zero page). In this case, a function that translates bookkeeping space
// to heap would be simple and effective.
//
// (*) encapsulating MMU-like management: input ranges of memory could
// conceivably by inaccessible by default and require preparation by such a
// function. For example, perhaps pages of free RAM would need to be mapped
// before Pool could make use of them. Or, in a more specific case, take
// 64-bit addressable hardware in 32-bit mode: input ranges of memory could
// live in the upper 2^32 of the address space, but would be inaccessible
// until mapped into the lower 2^32 values.
//
using BookkeepingAddressToPointer =
fit::inline_function<std::byte*(uint64_t addr, uint64_t size)>;
// The size of a chunk of free RAM reserved for internal Pool bookkeeping.
// The value is ultimately arbitrary, but is chosen with the expectation that
// it is sufficiently large to avoid fragmentation of the available memory in
// the pool.
static constexpr uint64_t kBookkeepingChunkSize = 0x1000;
// The first 64KiB of the address space is a region of memory we generally
// wish to discard so that accidental, relative access off of a null pointer
// is unlikely to touch any memory that might be allocated.
static constexpr uint64_t kDefaultMinAddr = 0x10000;
// Captures the maximum possible 64-bit address in both 32- and 64-bit modes.
static constexpr uint64_t kDefaultMaxAddr = std::numeric_limits<uintptr_t>::max();
// Default-construction uses the identity mapping for a
// BookkeepingAddressToPointer.
Pool() = default;
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
explicit Pool(BookkeepingAddressToPointer bookkeeping_pointer)
: bookkeeping_pointer_(std::move(bookkeeping_pointer)) {}
~Pool() {
ranges_.clear_unsafe();
unused_.clear_unsafe();
}
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
// Initializes a Pool from a variable number of memory ranges, performing an
// internal allocation for its internal bookkeeping among the free RAM
// encoded in the provided ranges. `default_min_addr` and `default_max_addr`
// prescribe default bounds on the addresses Pool is allowed to allocate; the
// placement of the internal bookkeeping must respect these.
//
// The provided ranges cannot feature overlap among different extended
// types, or between an extended type with one of kReserved or kPeripheral;
// otherwise, arbitrary overlap is permitted.
//
// fitx::failed is returned if there is insufficient free RAM to use for
// Pool's initial bookkeeping.
//
template <size_t N>
fitx::result<fitx::failed> Init(std::array<cpp20::span<Range>, N> ranges,
uint64_t default_min_addr = kDefaultMinAddr,
uint64_t default_max_addr = kDefaultMaxAddr) {
return Init(ranges, std::make_index_sequence<N>(), default_min_addr, default_max_addr);
}
using iterator = typename List::const_iterator;
using const_iterator = iterator;
iterator begin() const { return ranges_.begin(); }
iterator end() const { return ranges_.end(); }
bool empty() const { return ranges_.is_empty(); }
const Range& front() const {
ZX_ASSERT(!empty());
return *begin();
}
const Range& back() const {
ZX_ASSERT(!empty());
return *std::prev(end());
}
// Returns the number of tracked, normalized ranges.
size_t size() const { return num_ranges_; }
// Returns a pointer to the tracked, normalized range containing the
// provided address, if one exists.
const Range* GetContainingRange(uint64_t addr);
// Attempts to allocate memory out of free RAM of the prescribed type, size,
// and alignment. An optional upper address bound may be passed: if
// unspecified the default upper bound passed to Init() will be respected.
// `type` must be an extended type.
//
// Any returned address is guaranteed to be nonzero.
//
// fitx::failed is returned if there is insufficient free RAM to track any
// new ranges or if there is no free RAM that meets the given constraints.
//
fitx::result<fitx::failed, uint64_t> Allocate(
Type type, uint64_t size, uint64_t alignment = __STDCPP_DEFAULT_NEW_ALIGNMENT__,
std::optional<uint64_t> min_addr = {}, std::optional<uint64_t> max_addr = {});
// Attempts to perform a "weak allocation" of the given range, wherein all
// kFreeRam subranges are updated to `type`. The given range must be
// comprised of tracked subranges of extended type, kFreeRam, or
// kBookkeeping. `addr + size` cannot exceed UINT64_MAX.
//
// The utility of weak allocation lies in situations where there is a special
// range that we ultimately want reserved for "something" later on, but it is
// immaterial what occupies it in the meantime, so long as nothing is further
// allocated from there. For example, when loading a fixed-address kernel
// image, we would want to prevent page tables - which must persist
// across the boot -from being allocated out of that load range.
//
// fitx::failed is returned if there is insufficient bookkeeping to track any
// new ranges of memory.
//
fitx::result<fitx::failed> UpdateFreeRamSubranges(Type type, uint64_t addr, uint64_t size);
// Attempts to free a subrange of a previously allocated range or one of
// an extended type that had previously been passed to Init(). This subrange
// is updated to have type kFreeRam.
//
// Freeing a range already tracked as kFreeRam is a no-op.
//
// fitx::failed is returned if there is insufficient memory to track the new
// (subdivided) ranges of memory that would result from freeing.
//
fitx::result<fitx::failed> Free(uint64_t addr, uint64_t size);
// Attempts to resize a previously allocated range or one of the ranges of
// extended type originally passed to Init(). The resizing occurs only at
// the level of bookkeeping; the start of the resized region is returned and,
// if it differs from the one originally supplied, it is the responsibility
// of the caller to actually std::memmove the old contents to the new region
// before another allocation is performed. If the new size is smaller than
// the original, then this method effectively just frees the range's tail.
//
// The original range - as opposed to just its root address - is required to
// be passed in as testimony; since the original allocation or
// initialization, the pool may have coalesced neighbouring regions into it
// or freed subregions of it, this method asserts that the associated tracked
// range does at least still have `original` as a subrange with the
// appropriate type. The provided range must also be of extended type.
//
// `new_size` must be positive; this method is not a backdoor `Free()`. The
// original range must already be `min_alignment`-aligned.
//
// fitx::failed is returned if there is insufficient memory to track the new
// (subdivided) ranges of memory that would result from resizing, or if the
// range is untracked. No failure mode will leave the original range freed.
fitx::result<fitx::failed, uint64_t> Resize(const Range& original, uint64_t new_size,
uint64_t min_alignment);
// Pretty-prints the memory ranges contained in the pool.
void PrintMemoryRanges(const char* prefix, FILE* f = stdout) const;
// These are the components of what PrintMemoryRanges does internally,
// for use on different kinds of containers of memalloc::Range objects.
static void PrintMemoryRangeHeader(const char* prefix, FILE* f = stdout);
static void PrintOneMemoryRange(const memalloc::Range& range, const char* prefix,
FILE* f = stdout);
private:
using mutable_iterator = typename List::iterator;
using TypeUpdateFunc = fit::inline_function<void(Type&)>;
// Custom node struct defined so that List's iterators dereference instead as
// const Range&.
struct Node : public Range {
using State = fbl::DoublyLinkedListNodeState<Range*, fbl::NodeOptions::AllowClearUnsafe>;
State node_;
};
struct NodeTraits {
using NodeState = typename Node::State;
static NodeState& node_state(Range& element) { return static_cast<Node&>(element).node_; }
};
// Ultimately deferred to as the actual initialization routine.
fitx::result<fitx::failed> Init(cpp20::span<internal::RangeIterationContext> state,
uint64_t min_addr, uint64_t max_addr);
template <size_t... I>
fitx::result<fitx::failed> Init(std::array<cpp20::span<Range>, sizeof...(I)> ranges,
std::index_sequence<I...> seq, uint64_t min_addr,
uint64_t max_addr) {
std::array state{internal::RangeIterationContext(ranges[I])...};
return Init({state}, min_addr, max_addr);
}
// Similar semantics to Allocate(), FindAllocatable() is its main allocation
// subroutine: it only finds a suitable address to allocate (not actually
// performing the allocation).
fitx::result<fitx::failed, uint64_t> FindAllocatable(Type type, uint64_t size, uint64_t alignment,
uint64_t min_addr, uint64_t max_addr);
// On success, returns disconnected node with the given range information;
// fails if there is insufficient bookkeeping space for the new node.
fitx::result<fitx::failed, Node*> NewNode(const Range& range);
// Insert creates a new node for a range that is expected to already be a
// subrange of an existing node. No policy checking is done to determine
// whether inserting a subrange of type `range.type` is actually kosher;
// fails if there is insufficient bookkeeping space for the new node. As an
// optimization if known, the iterator pointing to the parent node may be
// provided.
fitx::result<fitx::failed, mutable_iterator> InsertSubrange(
const Range& range, std::optional<mutable_iterator> parent_it = std::nullopt);
// Merges into `it` any neighboring nodes with directly adjacent ranges of
// the same type.
mutable_iterator Coalesce(mutable_iterator it);
// Returns an iterator pointing to the node whose range contains
// [addr, addr + size), returning ranges_.end() if no such node exists.
mutable_iterator GetContainingNode(uint64_t addr, uint64_t size);
// Converts as much of [addr, addr + size) as bookkeeping memory as possible,
// returning the address just after what it was able to convert.
std::byte* PopulateAsBookkeeping(std::byte* addr, uint64_t size);
// Attempts to ensure - through dynamic allocation of more space if need be -
// that at least two unused bookkeeping nodes are available. N = 2 here is
// the maximum number of new nodes other Pool methods might need in order to
// successfully complete. This method is used in a best-effort context: if
// there is insufficient space, the following operation will be permitted to
// complete and report exactly that.
void TryToEnsureTwoBookkeepingNodes();
// Thin wrappers around List methods that also update `num_ranges_`.
void AppendNode(Node* node);
mutable_iterator InsertNodeAt(Node* node, mutable_iterator it);
Node* RemoveNodeAt(mutable_iterator it);
BookkeepingAddressToPointer bookkeeping_pointer_ = [](uint64_t addr, uint64_t size) {
return reinterpret_cast<std::byte*>(addr);
};
// The list of unused nodes. We avoid the term "free" to disambiguate from
// "free memory", which is unrelated to this list. The nodes stored within
// should be treated as having garbage values for its fields; internal logic
// must fully populate these fields (e.g., via NewNode()) when transferring
// nodes to ranges_.
List unused_;
// The tracked, normalized ranges of memory. Normalization is an invariant:
// at any time, the ranges within this list are lexicographically sorted,
// mutually disjoint, maximally contiguous, and where addr + size does not
// overflow.
List ranges_;
// Default bounds on allocatable addresses, configured during Init() - and
// overridable in Allocate().
uint64_t default_min_addr_ = 0;
uint64_t default_max_addr_ = 0;
// The number of tracked ranges.
size_t num_ranges_ = 0;
};
} // namespace memalloc
#endif // ZIRCON_KERNEL_PHYS_LIB_MEMALLOC_INCLUDE_LIB_MEMALLOC_POOL_H_