blob: 23e69171e28c9affbf34b1aec2aeaccc3f87f028 [file] [log] [blame] [edit]
// Copyright 2016 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_VM_PAGE_LIST_H_
#define ZIRCON_KERNEL_VM_INCLUDE_VM_VM_PAGE_LIST_H_
#include <align.h>
#include <bits.h>
#include <lib/fit/function.h>
#include <zircon/errors.h>
#include <zircon/types.h>
#include <fbl/canary.h>
#include <fbl/intrusive_wavl_tree.h>
#include <fbl/macros.h>
#include <ktl/algorithm.h>
#include <ktl/unique_ptr.h>
#include <vm/page.h>
#include <vm/pmm.h>
#include <vm/vm.h>
// RAII helper for representing content in a page list node. This supports being in one of three
// states
// * Empty - Contains nothing
// * Page p - Contains a vm_page 'p'. This 'p' is considered owned by this wrapper and
// `ReleasePage` must be called to give up ownership.
// * Reference r - Contains a reference 'r' to some content. This 'r' is considered owned by this
// wrapper and `ReleaseReference` must be called to give up ownership.
// * Marker - Indicates that whilst not a page, it is also not empty. Markers can be used to
// separate the distinction between "there's no page because we've deduped to the
// zero page" and "there's no page because our parent contains the content".
class VmPageOrMarker {
public:
// A PageType that otherwise holds a null pointer is considered to be Empty.
VmPageOrMarker() : raw_(kPageType) {}
~VmPageOrMarker() { DEBUG_ASSERT(!IsPageOrRef()); }
VmPageOrMarker(VmPageOrMarker&& other) noexcept : raw_(other.Release()) {}
VmPageOrMarker(const VmPageOrMarker&) = delete;
VmPageOrMarker& operator=(const VmPageOrMarker&) = delete;
// Minimal wrapper around a uint64_t to provide stronger typing in code to prevent accidental
// mixing of references and other uint64_t values.
// Provides a way to query the required alignment of the references and does debug enforcement of
// this.
class ReferenceValue {
public:
// kAlignBits represents the number of low bits in a reference that must be zero so they can be
// used for internal metadata. This is declared here for convenience, and is asserted to be in
// sync with the private kReferenceBits.
static constexpr uint64_t kAlignBits = 4;
explicit constexpr ReferenceValue(uint64_t raw) : value_(raw) {
DEBUG_ASSERT((value_ & BIT_MASK(kAlignBits)) == 0);
}
uint64_t value() const { return value_; }
private:
uint64_t value_;
};
// Returns a reference to the underlying vm_page*. Is only valid to call if `IsPage` is true.
vm_page* Page() const {
DEBUG_ASSERT(IsPage());
// Do not need to mask any bits out of raw_, since Page has 0's for the type anyway.
static_assert(kPageType == 0);
return reinterpret_cast<vm_page*>(raw_);
}
ReferenceValue Reference() const {
DEBUG_ASSERT(IsReference());
return ReferenceValue(raw_ & ~BIT_MASK(kReferenceBits));
}
// If this is a page, moves the underlying vm_page* out and returns it. After this IsPage will
// be false and IsEmpty will be true.
[[nodiscard]] vm_page* ReleasePage() {
DEBUG_ASSERT(IsPage());
// Do not need to mask any bits out of the Release since Page has 0's for the type
// anyway.
static_assert(kPageType == 0);
return reinterpret_cast<vm_page*>(Release());
}
[[nodiscard]] ReferenceValue ReleaseReference() {
DEBUG_ASSERT(IsReference());
return ReferenceValue(Release() & ~BIT_MASK(kReferenceBits));
}
// Convenience wrappers for getting and setting split bits on both pages and references.
bool PageOrRefLeftSplit() const {
DEBUG_ASSERT(IsPageOrRef());
if (IsPage()) {
return Page()->object.cow_left_split;
}
return raw_ & kReferenceLeftSplit;
}
bool PageOrRefRightSplit() const {
DEBUG_ASSERT(IsPageOrRef());
if (IsPage()) {
return Page()->object.cow_right_split;
}
return raw_ & kReferenceRightSplit;
}
void SetPageOrRefLeftSplit(bool value) {
DEBUG_ASSERT(IsPageOrRef());
if (IsPage()) {
Page()->object.cow_left_split = value;
} else {
if (value) {
raw_ |= kReferenceLeftSplit;
} else {
raw_ &= ~kReferenceLeftSplit;
}
}
}
void SetPageOrRefRightSplit(bool value) {
DEBUG_ASSERT(IsPageOrRef());
if (IsPage()) {
Page()->object.cow_right_split = value;
} else {
if (value) {
raw_ |= kReferenceRightSplit;
} else {
raw_ &= ~kReferenceRightSplit;
}
}
}
// Changes the content from a reference to a page, preserving the split bits and returning the
// original reference.
[[nodiscard]] VmPageOrMarker::ReferenceValue SwapReferenceForPage(vm_page_t* p) {
DEBUG_ASSERT(p);
// Ensure the caller has correctly set the split bits in the page as this swap is not supposed
// to change any other information.
DEBUG_ASSERT(p->object.cow_left_split == PageOrRefLeftSplit());
DEBUG_ASSERT(p->object.cow_right_split == PageOrRefRightSplit());
VmPageOrMarker::ReferenceValue ref = ReleaseReference();
*this = VmPageOrMarker::Page(p);
return ref;
}
// Changes the content from a page to a reference, preserving the split bits and returning the
// original page.
[[nodiscard]] vm_page_t* SwapPageForReference(VmPageOrMarker::ReferenceValue ref) {
const bool left_split = PageOrRefLeftSplit();
const bool right_split = PageOrRefRightSplit();
vm_page_t* page = ReleasePage();
*this = VmPageOrMarker::Reference(ref, left_split, right_split);
return page;
}
// Changes the content from one reference to a different one, preserving the split bits an
// returning the original reference.
[[nodiscard]] VmPageOrMarker::ReferenceValue ChangeReferenceValue(
VmPageOrMarker::ReferenceValue ref) {
const bool left_split = PageOrRefLeftSplit();
const bool right_split = PageOrRefRightSplit();
const VmPageOrMarker::ReferenceValue old = ReleaseReference();
*this = VmPageOrMarker::Reference(ref, left_split, right_split);
return old;
}
bool IsPage() const { return !IsEmpty() && (GetType() == kPageType); }
bool IsMarker() const { return GetType() == kZeroMarkerType; }
bool IsEmpty() const {
// A PageType that otherwise holds a null pointer is considered to be Empty.
return raw_ == kPageType;
}
bool IsReference() const { return GetType() == kReferenceType; }
bool IsPageOrRef() const { return IsPage() || IsReference(); }
VmPageOrMarker& operator=(VmPageOrMarker&& other) noexcept {
// Forbid overriding content, as that would leak it.
DEBUG_ASSERT(!IsPageOrRef());
raw_ = other.Release();
return *this;
}
bool operator==(const VmPageOrMarker& other) const { return raw_ == other.raw_; }
bool operator!=(const VmPageOrMarker& other) const { return raw_ != other.raw_; }
// A PageType that otherwise holds a null pointer is considered to be Empty.
static VmPageOrMarker Empty() { return VmPageOrMarker{kPageType}; }
static VmPageOrMarker Marker() { return VmPageOrMarker{kZeroMarkerType}; }
[[nodiscard]] static VmPageOrMarker Page(vm_page* p) {
// A null page is incorrect for two reasons
// 1. It's a violation of the API of this method
// 2. A null page cannot be represented internally as this is used to represent Empty
DEBUG_ASSERT(p);
const uint64_t raw = reinterpret_cast<uint64_t>(p);
// A pointer should be aligned by definition, and hence the low bits should always be zero, but
// assert this anyway just in case kTypeBits is increased or someone passed an invalid pointer.
DEBUG_ASSERT((raw & BIT_MASK(kTypeBits)) == 0);
return VmPageOrMarker{raw | kPageType};
}
[[nodiscard]] static VmPageOrMarker Reference(ReferenceValue ref, bool left_split,
bool right_split) {
return VmPageOrMarker(ref.value() | (left_split ? kReferenceLeftSplit : 0) |
(right_split ? kReferenceRightSplit : 0) | kReferenceType);
}
private:
explicit VmPageOrMarker(uint64_t raw) : raw_(raw) {}
// The low 2 bits of raw_ are reserved to select the type, any other data has to fit into the
// remaining high bits. Note that there is no explicit Empty type, rather a PageType with a zero
// pointer is used to represent Empty.
static constexpr uint64_t kTypeBits = 2;
static constexpr uint64_t kPageType = 0b00;
static constexpr uint64_t kZeroMarkerType = 0b01;
static constexpr uint64_t kReferenceType = 0b10;
// In addition to storing the type, a reference needs to track two additional pieces of data,
// these being the left and right split bits. The split bits are normally stored in the vm_page_t
// and are used for copy-on-write tracking in hidden VMOs. Having the ability to store the split
// bits here allows these pages to be candidates for compression. The remaining bits are then
// available for the actual reference value being stored. Unlike the page type, which does not
// allow the 0 value to be stored, a reference has no restrictions and a ref value of 0 is valid
// and may be stored.
static constexpr uint64_t kReferenceBits = kTypeBits + 2;
// Due to ordering and public/private visibility ReferenceValue::kAlignBits is declared
// separately, but it should match kReferenceBits.
static_assert(ReferenceValue::kAlignBits == kReferenceBits);
static constexpr uint64_t kReferenceLeftSplit = 0b10 << kTypeBits;
static constexpr uint64_t kReferenceRightSplit = 0b01 << kTypeBits;
uint64_t GetType() const { return raw_ & BIT_MASK(kTypeBits); }
uint64_t Release() {
const uint64_t p = raw_;
raw_ = 0;
return p;
}
uint64_t raw_;
};
// Limited reference to a VmPageOrMarker. This reference provides unrestricted const access to the
// underlying VmPageOrMarker, but as it holds a non-const VmPageOrMarker* it has the ability to
// modify the underlying entry. However, the interface for modification is very limited.
//
// This allows for the majority of VmPageList iterations that are not intended to allow for clearing
// entries to the Empty state to allow limited mutation (such as between different content states),
// without being completely mutable.
class VmPageOrMarkerRef {
public:
VmPageOrMarkerRef() = default;
explicit VmPageOrMarkerRef(VmPageOrMarker* page_or_marker) : page_or_marker_(page_or_marker) {}
~VmPageOrMarkerRef() = default;
const VmPageOrMarker& operator*() const {
DEBUG_ASSERT(page_or_marker_);
return *page_or_marker_;
}
const VmPageOrMarker* operator->() const {
DEBUG_ASSERT(page_or_marker_);
return page_or_marker_;
}
explicit operator bool() const { return !!page_or_marker_; }
// Forward split bit modifications as an allowed mutation.
void SetPageOrRefLeftSplit(bool value) {
DEBUG_ASSERT(page_or_marker_);
page_or_marker_->SetPageOrRefLeftSplit(value);
}
void SetPageOrRefRightSplit(bool value) {
DEBUG_ASSERT(page_or_marker_);
page_or_marker_->SetPageOrRefRightSplit(value);
}
// Changing the kind of content is an allowed mutation and this takes ownership of the provided
// page and returns ownership of the previous reference.
[[nodiscard]] VmPageOrMarker::ReferenceValue SwapReferenceForPage(vm_page_t* p) {
DEBUG_ASSERT(page_or_marker_);
return page_or_marker_->SwapReferenceForPage(p);
}
// Similar to SwapReferenceForPage, but takes ownership of the ref and returns ownership of the
// previous page.
[[nodiscard]] vm_page_t* SwapPageForReference(VmPageOrMarker::ReferenceValue ref) {
DEBUG_ASSERT(page_or_marker_);
return page_or_marker_->SwapPageForReference(ref);
}
// Similar to SwapReferenceForPage, but changes one reference for another.
[[nodiscard]] VmPageOrMarker::ReferenceValue ChangeReferenceValue(
VmPageOrMarker::ReferenceValue ref) {
DEBUG_ASSERT(page_or_marker_);
return page_or_marker_->ChangeReferenceValue(ref);
}
private:
VmPageOrMarker* page_or_marker_ = nullptr;
};
class VmPageListNode final : public fbl::WAVLTreeContainable<ktl::unique_ptr<VmPageListNode>> {
public:
explicit VmPageListNode(uint64_t offset);
~VmPageListNode();
DISALLOW_COPY_ASSIGN_AND_MOVE(VmPageListNode);
static const size_t kPageFanOut = 16;
// accessors
uint64_t offset() const { return obj_offset_; }
uint64_t GetKey() const { return obj_offset_; }
uint64_t end_offset() const { return offset() + kPageFanOut * PAGE_SIZE; }
void set_offset(uint64_t offset) {
DEBUG_ASSERT(!InContainer());
obj_offset_ = offset;
}
// for every page or marker in the node call the passed in function.
template <typename PTR_TYPE, typename F>
zx_status_t ForEveryPage(F func, uint64_t skew) {
return ForEveryPageInRange<PTR_TYPE>(this, func, offset(), end_offset(), skew);
}
// for every page or marker in the node call the passed in function.
template <typename PTR_TYPE, typename F>
zx_status_t ForEveryPage(F func, uint64_t skew) const {
return ForEveryPageInRange<PTR_TYPE>(this, func, offset(), end_offset(), skew);
}
// for every page or marker in the node in the range call the passed in function. The range is
// assumed to be within the nodes object range.
template <typename PTR_TYPE, typename F>
zx_status_t ForEveryPageInRange(F func, uint64_t start_offset, uint64_t end_offset,
uint64_t skew) {
return ForEveryPageInRange<PTR_TYPE>(this, func, start_offset, end_offset, skew);
}
// for every page or marker in the node in the range call the passed in function. The range is
// assumed to be within the nodes object range.
template <typename PTR_TYPE, typename F>
zx_status_t ForEveryPageInRange(F func, uint64_t start_offset, uint64_t end_offset,
uint64_t skew) const {
return ForEveryPageInRange<PTR_TYPE>(this, func, start_offset, end_offset, skew);
}
const VmPageOrMarker& Lookup(size_t index) const {
canary_.Assert();
DEBUG_ASSERT(index < kPageFanOut);
return pages_[index];
}
VmPageOrMarker& Lookup(size_t index) {
canary_.Assert();
DEBUG_ASSERT(index < kPageFanOut);
return pages_[index];
}
// A node is empty if it contains no pages, references or markers.
bool IsEmpty() const {
for (const auto& p : pages_) {
if (!p.IsEmpty()) {
return false;
}
}
return true;
}
// Returns true if there are still any pages or references owned by this node.
bool HasNoPageOrRef() const {
for (const auto& p : pages_) {
if (p.IsPageOrRef()) {
return false;
}
}
return true;
}
private:
template <typename PTR_TYPE, typename S, typename F>
static zx_status_t ForEveryPageInRange(S self, F func, uint64_t start_offset, uint64_t end_offset,
uint64_t skew) {
// Assert that the requested range is sensible and falls within our nodes actual offset range.
DEBUG_ASSERT(end_offset >= start_offset);
DEBUG_ASSERT(start_offset >= self->obj_offset_);
DEBUG_ASSERT(end_offset <= self->end_offset());
const size_t start = (start_offset - self->obj_offset_) / PAGE_SIZE;
const size_t end = (end_offset - self->obj_offset_) / PAGE_SIZE;
for (size_t i = start; i < end; i++) {
if (!self->pages_[i].IsEmpty()) {
zx_status_t status =
func(PTR_TYPE{&self->pages_[i]}, self->obj_offset_ + i * PAGE_SIZE - skew);
if (unlikely(status != ZX_ERR_NEXT)) {
return status;
}
}
}
return ZX_ERR_NEXT;
}
fbl::Canary<fbl::magic("PLST")> canary_;
uint64_t obj_offset_ = 0;
VmPageOrMarker pages_[kPageFanOut];
};
class VmPageList;
// Class which holds the list of vm_page structs removed from a VmPageList
// by TakePages. The list include information about uncommitted pages and markers.
class VmPageSpliceList final {
public:
VmPageSpliceList();
VmPageSpliceList(VmPageSpliceList&& other);
VmPageSpliceList& operator=(VmPageSpliceList&& other_tree);
~VmPageSpliceList();
// For use by PhysicalPageProvider. The user-pager path doesn't use this.
static VmPageSpliceList CreateFromPageList(uint64_t offset, uint64_t length, list_node* pages);
// Pops the next page off of the splice.
VmPageOrMarker Pop();
// Returns true after the whole collection has been processed by Pop.
bool IsDone() const { return pos_ >= length_; }
DISALLOW_COPY_AND_ASSIGN_ALLOW_MOVE(VmPageSpliceList);
private:
VmPageSpliceList(uint64_t offset, uint64_t length);
void FreeAllPages();
uint64_t offset_;
uint64_t length_;
uint64_t pos_ = 0;
VmPageListNode head_ = VmPageListNode(0);
fbl::WAVLTree<uint64_t, ktl::unique_ptr<VmPageListNode>> middle_;
VmPageListNode tail_ = VmPageListNode(0);
// To avoid the possibility of allocation failure, we don't use head_, middle_, tail_ for
// CreateFromPageList(). With CreateFromPageList() we know that all the pages are present, so
// we can just keep a list of pages, and create VmPageListNode on the stack as pages are Pop()ed.
list_node raw_pages_ = LIST_INITIAL_VALUE(raw_pages_);
friend VmPageList;
};
class VmPageList final {
public:
VmPageList();
~VmPageList();
VmPageList& operator=(VmPageList&& other);
VmPageList(VmPageList&& other);
void InitializeSkew(uint64_t parent_skew, uint64_t offset) {
// Checking list_skew_ doesn't catch all instances of double-initialization, but
// it should catch some of them.
DEBUG_ASSERT(list_skew_ == 0);
DEBUG_ASSERT(list_.is_empty());
list_skew_ = (parent_skew + offset) % (PAGE_SIZE * VmPageListNode::kPageFanOut);
}
uint64_t GetSkew() const { return list_skew_; }
DISALLOW_COPY_AND_ASSIGN_ALLOW_MOVE(VmPageList);
// walk the page tree, calling the passed in function on every tree node.
template <typename F>
zx_status_t ForEveryPage(F per_page_func) const {
return ForEveryPage<const VmPageOrMarker*>(this, per_page_func);
}
// similar to ForEveryPage, but the per_page_func gets called with a VmPageOrMarkerRef instead of
// a const VmPageOrMarker*, allowing for limited mutation.
template <typename F>
zx_status_t ForEveryPageMutable(F per_page_func) {
return ForEveryPage<VmPageOrMarkerRef>(this, per_page_func);
}
// walk the page tree, calling the passed in function on every tree node.
template <typename F>
zx_status_t ForEveryPageInRange(F per_page_func, uint64_t start_offset,
uint64_t end_offset) const {
return ForEveryPageInRange<const VmPageOrMarker*>(this, per_page_func, start_offset,
end_offset);
}
// similar to ForEveryPageInRange, but the per_page_func gets called with a VmPageOrMarkerRef
// instead of a const VmPageOrMarker*, allowing for limited mutation.
template <typename F>
zx_status_t ForEveryPageInRangeMutable(F per_page_func, uint64_t start_offset,
uint64_t end_offset) {
return ForEveryPageInRange<VmPageOrMarkerRef>(this, per_page_func, start_offset, end_offset);
}
// walk the page tree, calling |per_page_func| on every page/marker and |per_gap_func| on every
// gap.
template <typename PAGE_FUNC, typename GAP_FUNC>
zx_status_t ForEveryPageAndGapInRange(PAGE_FUNC per_page_func, GAP_FUNC per_gap_func,
uint64_t start_offset, uint64_t end_offset) const {
return ForEveryPageAndGapInRange<const VmPageOrMarker*>(this, per_page_func, per_gap_func,
start_offset, end_offset);
}
// walk the page tree, calling |per_page_func| on every page/marker that fulfills (returns true)
// the |compare_func|. Also call |contiguous_run_func| on every contiguous range of such
// pages/markers encountered.
template <typename COMPARE_FUNC, typename PAGE_FUNC, typename CONTIGUOUS_RUN_FUNC>
zx_status_t ForEveryPageAndContiguousRunInRange(COMPARE_FUNC compare_func,
PAGE_FUNC per_page_func,
CONTIGUOUS_RUN_FUNC contiguous_run_func,
uint64_t start_offset,
uint64_t end_offset) const {
return ForEveryPageAndContiguousRunInRange<const VmPageOrMarker*>(
this, compare_func, per_page_func, contiguous_run_func, start_offset, end_offset);
}
// Returns true if any pages or markers are in the given range.
bool AnyPagesInRange(uint64_t start_offset, uint64_t end_offset) const {
bool found_page = false;
ForEveryPageInRange(
[&found_page](const VmPageOrMarker* page, uint64_t offset) {
found_page = true;
return ZX_ERR_STOP;
},
start_offset, end_offset);
return found_page;
}
// Attempts to return a reference to the VmPageOrMarker at the specified offset. The returned
// pointer is valid until the VmPageList is destroyed or any of the Remove*/Take/Merge etc
// functions are called.
//
// Lookup may return 'nullptr' if there is no slot allocated for the given offset. If non-null
// is returned it may still be the case that IsEmpty() on the returned PageOrMarker is true.
const VmPageOrMarker* Lookup(uint64_t offset) const;
// Similar to `Lookup` but returns a VmPageOrMarkerRef that allows for limited mutation of the
// slot. General mutation requires calling `LookupOrAllocate`.
VmPageOrMarkerRef LookupMutable(uint64_t offset);
// Similar to `Lookup` but only returns `nullptr` if a slot cannot be allocated either due to out
// of memory or due to offset being invalid.
//
// The returned slot, if not a `nullptr`, may generally be freely manipulated with the exception
// that if it started !Empty, then it is an error to set it to Empty. In this case the
// `RemovePage` method must be used.
//
// If the returned slot started Empty, as it not made !Empty, then the slot must be returned with
// ReturnEmptySlot, to ensure no empty nodes are retained.
VmPageOrMarker* LookupOrAllocate(uint64_t offset);
// Returns a slot that was empty after LookupOrAllocate, and that the caller did not end up
// filling.
// This ensures that if LookupOrAllocate allocated a new underlying list node, then that list node
// needs to be free'd otherwise it might not get cleaned up for the lifetime of the page list.
//
// This is only correct to call on an offset for which LookupOrAllocate had just returned a non
// null slot, and that slot was Empty and is still Empty.
void ReturnEmptySlot(uint64_t offset);
// Removes any item at |offset| from the list and returns it, or VmPageOrMarker::Empty() if none.
VmPageOrMarker RemoveContent(uint64_t offset);
// Release every item in the page list and calls free_content_fn on any content, giving it
// ownership. Any markers are cleared.
template <typename T>
void RemoveAllContent(T free_content_fn) {
// per page get a reference to the page pointer inside the page list node
auto per_page_func = [&free_content_fn](VmPageOrMarker* p, uint64_t offset) {
if (p->IsPageOrRef()) {
free_content_fn(ktl::move(*p));
}
*p = VmPageOrMarker::Empty();
return ZX_ERR_NEXT;
};
// walk the tree in order, freeing all the pages on every node
ForEveryPage<VmPageOrMarker*>(this, per_page_func);
// empty the tree
list_.clear();
}
// Calls the provided callback for every page or marker in the range [start_offset, end_offset).
// The callback can modify the VmPageOrMarker and take ownership of any pages, or leave them in
// place. The difference between this and ForEveryPage is as this allows for modifying the
// underlying pages any intermediate data structures can be checked and potentially freed if no
// longer needed.
template <typename T>
void RemovePages(T per_page_fn, uint64_t start_offset, uint64_t end_offset) {
ForEveryPageInRange<VmPageOrMarker*, NodeCheck::CleanupEmpty>(this, per_page_fn, start_offset,
end_offset);
}
// Similar to RemovePages but also takes a |per_gap_fn| callback to allow for iterating over any
// gaps encountered as well. This can be used when the intent is to modify the underlying pages
// and/or gaps, while checking any intermediate data structures to potentially free ones that are
// no longer needed.
template <typename P, typename G>
zx_status_t RemovePagesAndIterateGaps(P per_page_fn, G per_gap_fn, uint64_t start_offset,
uint64_t end_offset) {
return ForEveryPageAndGapInRange<VmPageOrMarker*, NodeCheck::CleanupEmpty>(
this, per_page_fn, per_gap_fn, start_offset, end_offset);
}
// Returns true if there are no pages, references or markers in the page list.
bool IsEmpty() const;
// Returns true if the page list does not own any pages or references.
bool HasNoPageOrRef() const;
// Merges the pages in |other| in the range [|offset|, |end_offset|) into |this|
// page list, starting at offset 0 in this list.
//
// For every page in |other| in the given range, if there is no corresponding page or marker
// in |this|, then they will be passed to |migrate_fn|. If |migrate_fn| leaves the page in the
// VmPageOrMarker it will be migrated into |this|, otherwise the migrate_fn is assumed to now own
// the page. For any pages or markers in |other| outside the given range or which conflict with a
// page in |this|, they will be released given ownership to |release_fn|.
//
// The |offset| values passed to |release_fn| and |migrate_fn| are the original offsets
// in |other|, not the adapted offsets in |this|.
//
// **NOTE** unlike MergeOnto, |other| will be empty at the end of this method.
void MergeFrom(
VmPageList& other, uint64_t offset, uint64_t end_offset,
fit::inline_function<void(VmPageOrMarker&&, uint64_t offset), 3 * sizeof(void*)> release_fn,
fit::inline_function<void(VmPageOrMarker*, uint64_t offset)> migrate_fn);
// Merges this pages in |this| onto |other|.
//
// For every page (or marker) in |this|, checks the same offset in |other|. If there is no
// page or marker, then it inserts the page into |other|. Otherwise, it releases the page (or
// marker) and gives ownership to |release_fn|.
//
// **NOTE** unlike MergeFrom, |this| will be empty at the end of this method.
void MergeOnto(VmPageList& other, fit::inline_function<void(VmPageOrMarker&&)> release_fn);
// Takes the pages, references and markers in the range [offset, length) out of this page list.
VmPageSpliceList TakePages(uint64_t offset, uint64_t length);
uint64_t HeapAllocationBytes() const { return list_.size() * sizeof(VmPageListNode); }
// Allow the implementation to use a one-past-the-end for VmPageListNode offsets,
// plus to account for skew_.
static constexpr uint64_t MAX_SIZE =
ROUNDDOWN(UINT64_MAX, 2 * VmPageListNode::kPageFanOut * PAGE_SIZE);
private:
template <typename PTR_TYPE, typename S, typename F>
static zx_status_t ForEveryPage(S self, F per_page_func) {
for (auto& pl : self->list_) {
zx_status_t status = pl.template ForEveryPage<PTR_TYPE, F>(per_page_func, self->list_skew_);
if (unlikely(status != ZX_ERR_NEXT)) {
if (status == ZX_ERR_STOP) {
break;
}
return status;
}
}
return ZX_OK;
}
// Calls the provided callback for every page in the given range. If the CleanupNodes template
// argument is true then it is assumed the per_page_func may remove pages and page nodes will be
// checked to see if they are empty and can be cleaned up.
enum class NodeCheck : bool {
Skip = false,
CleanupEmpty = true,
};
template <typename PTR_TYPE, NodeCheck NODE_CHECK = NodeCheck::Skip, typename S, typename F>
static zx_status_t ForEveryPageInRange(S self, F per_page_func, uint64_t start_offset,
uint64_t end_offset) {
start_offset += self->list_skew_;
end_offset += self->list_skew_;
// Find the first node (if any) that will contain our starting offset.
auto cur =
self->list_.lower_bound(ROUNDDOWN(start_offset, VmPageListNode::kPageFanOut * PAGE_SIZE));
if (!cur) {
return ZX_OK;
}
// Handle scenario where start_offset begins not aligned to a node.
if (cur->offset() < start_offset) {
zx_status_t status = cur->template ForEveryPageInRange<PTR_TYPE, F>(
per_page_func, start_offset, ktl::min(end_offset, cur->end_offset()), self->list_skew_);
auto prev = cur++;
if constexpr (NODE_CHECK == NodeCheck::CleanupEmpty) {
if (prev->IsEmpty()) {
self->list_.erase(prev);
}
}
if (unlikely(status != ZX_ERR_NEXT)) {
if (status == ZX_ERR_STOP) {
return ZX_OK;
}
return status;
}
}
// Iterate through all full nodes contained in the range.
while (cur && cur->end_offset() < end_offset) {
DEBUG_ASSERT(start_offset <= cur->offset());
zx_status_t status = cur->template ForEveryPage<PTR_TYPE, F>(per_page_func, self->list_skew_);
auto prev = cur++;
if constexpr (NODE_CHECK == NodeCheck::CleanupEmpty) {
if (prev->IsEmpty()) {
self->list_.erase(prev);
}
}
if (unlikely(status != ZX_ERR_NEXT)) {
if (status == ZX_ERR_STOP) {
return ZX_OK;
}
return status;
}
}
// Handle scenario where the end_offset is not aligned to the end of a node.
if (cur && cur->offset() < end_offset) {
DEBUG_ASSERT(cur->end_offset() >= end_offset);
zx_status_t status = cur->template ForEveryPageInRange<PTR_TYPE, F>(
per_page_func, cur->offset(), end_offset, self->list_skew_);
if constexpr (NODE_CHECK == NodeCheck::CleanupEmpty) {
if (cur->IsEmpty()) {
self->list_.erase(cur);
}
}
if (unlikely(status != ZX_ERR_NEXT)) {
if (status == ZX_ERR_STOP) {
return ZX_OK;
}
return status;
}
}
return ZX_OK;
}
template <typename PTR_TYPE, NodeCheck NODE_CHECK = NodeCheck::Skip, typename S,
typename PAGE_FUNC, typename GAP_FUNC>
static zx_status_t ForEveryPageAndGapInRange(S self, PAGE_FUNC per_page_func,
GAP_FUNC per_gap_func, uint64_t start_offset,
uint64_t end_offset) {
uint64_t expected_next_off = start_offset;
auto per_page_wrapper_fn = [&expected_next_off, end_offset, per_page_func, &per_gap_func](
auto* p, uint64_t off) {
zx_status_t status = ZX_ERR_NEXT;
if (expected_next_off != off) {
status = per_gap_func(expected_next_off, off);
}
if (status == ZX_ERR_NEXT) {
status = per_page_func(p, off);
}
expected_next_off = off + PAGE_SIZE;
// Prevent the last call to per_gap_func
if (status == ZX_ERR_STOP) {
expected_next_off = end_offset;
}
return status;
};
zx_status_t status = ForEveryPageInRange<PTR_TYPE, NODE_CHECK>(self, per_page_wrapper_fn,
start_offset, end_offset);
if (status != ZX_OK) {
return status;
}
if (expected_next_off != end_offset) {
status = per_gap_func(expected_next_off, end_offset);
if (status != ZX_ERR_NEXT && status != ZX_ERR_STOP) {
return status;
}
}
return ZX_OK;
}
template <typename PTR_TYPE, typename S, typename COMPARE_FUNC, typename PAGE_FUNC,
typename CONTIGUOUS_RUN_FUNC>
static zx_status_t ForEveryPageAndContiguousRunInRange(S self, COMPARE_FUNC compare_func,
PAGE_FUNC per_page_func,
CONTIGUOUS_RUN_FUNC contiguous_run_func,
uint64_t start_offset,
uint64_t end_offset) {
// Track contiguous range of pages fulfilling compare_func.
uint64_t contiguous_run_start = start_offset;
uint64_t contiguous_run_len = 0;
zx_status_t status = ForEveryPageAndGapInRange<PTR_TYPE>(
self,
[&](const VmPageOrMarker* p, uint64_t off) {
zx_status_t st = ZX_ERR_NEXT;
if (compare_func(p, off)) {
st = per_page_func(p, off);
if (st == ZX_ERR_STOP) {
return ZX_OK;
}
if (st != ZX_ERR_NEXT) {
return st;
}
// Start tracking a new range first if no range is being tracked yet.
if (contiguous_run_len == 0) {
contiguous_run_start = off;
}
// Append this page to the contiguous range being tracked.
contiguous_run_len += PAGE_SIZE;
return ZX_ERR_NEXT;
}
// We were already tracking a contiguous range when we encountered this page that does not
// fulfill compare_func. Invoke contiguous_run_func on the range so far and start tracking
// a new one skipping over this page.
if (contiguous_run_len > 0) {
st = contiguous_run_func(contiguous_run_start,
contiguous_run_start + contiguous_run_len);
if (st == ZX_ERR_STOP) {
return ZX_OK;
}
if (st != ZX_ERR_NEXT) {
return st;
}
}
// Reset contiguous_run_len to zero to track a new range later if required.
contiguous_run_len = 0;
return ZX_ERR_NEXT;
},
[&](uint64_t start, uint64_t end) {
// We were already tracking a contiguous range when we encountered this gap. Invoke
// contiguous_run_func on the range so far and start tracking a new one skipping over this
// gap.
if (contiguous_run_len > 0) {
zx_status_t st = contiguous_run_func(contiguous_run_start,
contiguous_run_start + contiguous_run_len);
if (st == ZX_ERR_STOP) {
return ZX_OK;
}
if (st != ZX_ERR_NEXT) {
return st;
}
}
// Reset contiguous_run_len to zero to track a new range later if required.
contiguous_run_len = 0;
return ZX_ERR_NEXT;
},
start_offset, end_offset);
if (status != ZX_OK) {
return status;
}
// Process the last contiguous range if there is one.
if (contiguous_run_len > 0) {
status = contiguous_run_func(contiguous_run_start, contiguous_run_start + contiguous_run_len);
if (status != ZX_ERR_NEXT && status != ZX_ERR_STOP) {
return status;
}
}
return ZX_OK;
}
fbl::WAVLTree<uint64_t, ktl::unique_ptr<VmPageListNode>> list_;
// A skew added to offsets provided as arguments to VmPageList functions before
// interfacing with list_. This allows all VmPageLists within a clone tree
// to place individual vm_page_t entries at the same offsets within their nodes, so
// that the nodes can be moved between different lists without having to worry
// about needing to split up a node.
uint64_t list_skew_ = 0;
};
#endif // ZIRCON_KERNEL_VM_INCLUDE_VM_VM_PAGE_LIST_H_