blob: e2bbc53c85a9250d64ea71f78c8ec18b1cd8bfc5 [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "vm/content_size_manager.h"
#include <ktl/limits.h>
#include <ktl/move.h>
// static
zx_status_t ContentSizeManager::Create(uint64_t content_size,
fbl::RefPtr<ContentSizeManager>* content_size_manager) {
fbl::AllocChecker ac;
fbl::RefPtr<ContentSizeManager> csm = fbl::AdoptRef(new (&ac) ContentSizeManager(content_size));
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
*content_size_manager = ktl::move(csm);
return ZX_OK;
}
uint64_t ContentSizeManager::Operation::GetSizeLocked() const {
DEBUG_ASSERT(IsValid());
// Reading the size for `Append` operations should only occur after it's been initialized.
DEBUG_ASSERT(type_ != OperationType::Append || size_ > 0);
return size_;
}
void ContentSizeManager::Operation::ShrinkSizeLocked(uint64_t new_size) {
DEBUG_ASSERT(IsValid());
// If `new_size` is 0, the operation should be cancelled instead.
DEBUG_ASSERT(new_size > 0);
// This function may only be called on expanding content write operations.
ASSERT(type_ == OperationType::Append || type_ == OperationType::Write);
ASSERT(new_size <= size_);
size_ = new_size;
}
void ContentSizeManager::Operation::CommitLocked() {
DEBUG_ASSERT(IsValid());
parent()->CommitAndDequeueOperationLocked(this);
}
void ContentSizeManager::Operation::CancelLocked() {
DEBUG_ASSERT(IsValid());
parent()->DequeueOperationLocked(this);
}
void ContentSizeManager::Operation::UpdateContentSizeFromProgress(uint64_t new_content_size) {
DEBUG_ASSERT(IsValid());
DEBUG_ASSERT(type_ == OperationType::Write || type_ == OperationType::Append);
DEBUG_ASSERT(new_content_size <= size_);
DEBUG_ASSERT(new_content_size > parent_->GetContentSize());
parent_->SetContentSize(new_content_size);
}
void ContentSizeManager::Operation::Initialize(ContentSizeManager* parent, uint64_t size,
OperationType type) {
DEBUG_ASSERT(!IsValid());
DEBUG_ASSERT(parent != nullptr);
parent_ = parent;
size_ = size;
type_ = type;
}
zx_status_t ContentSizeManager::BeginAppendLocked(uint64_t append_size, Guard<Mutex>* lock_guard,
Operation* out_op) {
DEBUG_ASSERT(out_op);
DEBUG_ASSERT(append_size > 0);
// Temporarily initialize an `Append` operation with zero size until the size is known.
out_op->Initialize(this, 0, OperationType::Append);
write_q_.push_back(out_op);
// Block until head if there are any of the following operations preceding this one:
// * Appends or writes that exceed the current content size.
// * Set size
//
// Effectively, this checks for any content size modifying operations.
bool should_block = false;
auto iter = --write_q_.make_iterator(*out_op);
// It's okay to read the content size once here, since the lock is held. This means that content
// size can only be increased if the front-most content size modifying operation is an expanding
// write or append. Not re-reading content size and seeing a potentially smaller content size here
// is valid, since it will only pessimize (i.e. blocking until head) this operation for a very
// small number of cases within an extremely narrow timing window. There are no correctness
// issues with pessimization. Since the pessimizing case is so rare, prefer reading once over
// continuously re-reading the atomic in a loop.
uint64_t cur_content_size = GetContentSize();
while (iter.IsValid()) {
iter->AssertParentLockHeld();
if (iter->GetType() == OperationType::SetSize || iter->GetType() == OperationType::Append ||
(iter->GetType() == OperationType::Write && iter->GetSizeLocked() > cur_content_size)) {
should_block = true;
break;
}
--iter;
}
if (should_block) {
BlockUntilHeadLocked(out_op, lock_guard);
// Must re-read the content size here, since `BlockUntilHeadLocked` would have dropped the lock,
// and content size may have been modified by the operations in front of this one.
cur_content_size = GetContentSize();
}
// Using the previously read content size. In the case where this operation blocked until it was
// the head, content size was re-read with the lock held and with this operation at the head of
// the queue (no other content size mutating operations can proceed before this). In all other
// cases, the previous loop verified that no content size mutating operations are in front of this
// operation.
if (add_overflow(cur_content_size, append_size, &out_op->size_)) {
// Dequeue operation since this change should not be committed.
DequeueOperationLocked(out_op);
return ZX_ERR_OUT_OF_RANGE;
}
return ZX_OK;
}
void ContentSizeManager::BeginWriteLocked(uint64_t target_size, Guard<Mutex>* lock_guard,
ktl::optional<uint64_t>* out_prev_content_size,
Operation* out_op) {
DEBUG_ASSERT(out_prev_content_size);
DEBUG_ASSERT(out_op);
*out_prev_content_size = ktl::nullopt;
out_op->Initialize(this, target_size, OperationType::Write);
write_q_.push_back(out_op);
// Check if there are any set size operations in front of this that sets the content size smaller
// than `target_size`.
bool block_due_to_set = false;
auto iter = --write_q_.make_iterator(*out_op);
while (iter.IsValid()) {
iter->AssertParentLockHeld();
if (iter->GetType() == OperationType::SetSize && iter->GetSizeLocked() < target_size) {
block_due_to_set = true;
break;
}
--iter;
}
// If this write can potentially create a scenario where it expands content, block until it is the
// head of the queue.
if (block_due_to_set || target_size > GetContentSize()) {
BlockUntilHeadLocked(out_op, lock_guard);
// Must re-read the content size here, since `BlockUntilHeadLocked` would have dropped the lock,
// and content size may have been modified by the operations in front of this one.
uint64_t cur_content_size = GetContentSize();
if (target_size > cur_content_size) {
*out_prev_content_size = cur_content_size;
}
}
}
void ContentSizeManager::BeginReadLocked(uint64_t target_size, uint64_t* out_content_size_limit,
Operation* out_op) {
DEBUG_ASSERT(out_content_size_limit);
DEBUG_ASSERT(out_op);
// Allow reads up to the smallest outstanding size.
// Other concurrent, in-flight operations may or may not complete before this read, so it is okay
// to be more conservative here and only read up to the guaranteed valid region.
*out_content_size_limit = GetContentSize();
for (auto& op : read_q_) {
if (op.GetType() != OperationType::SetSize) {
continue;
}
op.AssertParentLockHeld();
*out_content_size_limit = ktl::min(op.GetSizeLocked(), *out_content_size_limit);
}
*out_content_size_limit = ktl::min(target_size, *out_content_size_limit);
out_op->Initialize(this, *out_content_size_limit, OperationType::Read);
read_q_.push_back(out_op);
}
void ContentSizeManager::BeginSetContentSizeLocked(uint64_t target_size, Operation* out_op,
Guard<Mutex>* lock_guard) {
out_op->Initialize(this, target_size, OperationType::SetSize);
write_q_.push_back(out_op);
read_q_.push_back(out_op);
// Block until head if there are any of the following operations preceding this one:
// * Appends or writes that exceed either the current content size or the target size.
// - If it exceeds the current content size, the overlap is in the region in which the set
// size will zero content and the write will commit data.
// - If it exceeds the target size, the overlap is in the region in which the set size will
// invalidate pages/data and the write will commit data.
// * Reads that are reading at or beyond target size.
// * Set size
bool should_block = false;
auto write_iter = --write_q_.make_iterator(*out_op);
// It's okay to read the content size once here, since the lock is held. This means that content
// size can only be increased if the front-most content size modifying operation is an expanding
// write or append. Not re-reading content size and seeing a potentially smaller content size here
// is valid, since it will only pessimize (i.e. blocking until head) this operation for a very
// small number of cases within an extremely narrow timing window. There are no correctness
// issues with pessimization. Since the pessimizing case is so rare, prefer reading once over
// continuously re-reading the atomic in a loop.
uint64_t cur_content_size = GetContentSize();
while (write_iter.IsValid()) {
write_iter->AssertParentLockHeld();
if (write_iter->GetType() == OperationType::SetSize ||
write_iter->GetType() == OperationType::Append ||
(write_iter->GetType() == OperationType::Write &&
write_iter->GetSizeLocked() > ktl::min(cur_content_size, target_size))) {
should_block = true;
break;
}
--write_iter;
}
auto read_iter = --read_q_.make_iterator(*out_op);
while (read_iter.IsValid() && !should_block) {
read_iter->AssertParentLockHeld();
if (read_iter->GetType() == OperationType::Read && read_iter->GetSizeLocked() > target_size) {
should_block = true;
break;
}
--read_iter;
}
if (should_block) {
BlockUntilHeadLocked(out_op, lock_guard);
}
}
void ContentSizeManager::BlockUntilHeadLocked(Operation* op, Guard<Mutex>* lock_guard) {
DEBUG_ASSERT(op->parent_ == this);
if (fbl::InContainer<WriteQueueTag>(*op)) {
while (op->IsValid() && &write_q_.front() != op) {
lock_guard->CallUnlocked([op] { op->ready_event_.Wait(); });
}
}
if (fbl::InContainer<ReadQueueTag>(*op)) {
while (op->IsValid() && &read_q_.front() != op) {
lock_guard->CallUnlocked([op] { op->ready_event_.Wait(); });
}
}
}
void ContentSizeManager::CommitAndDequeueOperationLocked(Operation* op) {
if (!op->IsValid()) {
DEBUG_ASSERT(!fbl::InContainer<WriteQueueTag>(*op));
DEBUG_ASSERT(!fbl::InContainer<ReadQueueTag>(*op));
return;
}
op->AssertParentLockHeld();
switch (op->type_) {
case OperationType::Write:
SetContentSize(ktl::max(op->GetSizeLocked(), GetContentSize()));
break;
case OperationType::Append:
case OperationType::SetSize:
SetContentSize(op->GetSizeLocked());
break;
case OperationType::Read:
// No-op
break;
}
DequeueOperationLocked(op);
}
void ContentSizeManager::DequeueOperationLocked(Operation* op) {
DEBUG_ASSERT(op->IsValid());
DEBUG_ASSERT(op->parent_ == this);
auto dequeue_from_list = [op](auto& list) {
const bool is_head = &list.front() == op;
auto next = ++list.make_iterator(*op);
list.erase(*op);
// If the current operation is now at the head of the list, signal to the next operation that it
// should wake to complete its task after this one finishes dequeueing.
if (is_head && next.IsValid()) {
next->ready_event_.Signal();
}
};
switch (op->type_) {
case OperationType::Write:
case OperationType::Append:
DEBUG_ASSERT(fbl::InContainer<WriteQueueTag>(*op));
dequeue_from_list(write_q_);
break;
case OperationType::Read:
DEBUG_ASSERT(fbl::InContainer<ReadQueueTag>(*op));
dequeue_from_list(read_q_);
break;
case OperationType::SetSize:
DEBUG_ASSERT(fbl::InContainer<ReadQueueTag>(*op));
DEBUG_ASSERT(fbl::InContainer<WriteQueueTag>(*op));
dequeue_from_list(write_q_);
dequeue_from_list(read_q_);
break;
}
// Just in case, signal the ready event of `op` in case another thread is blocking on it.
//
// Note that this should never usually occur, since only the owning thread of the operation should
// be blocking or dequeueing.
op->ready_event_.Signal();
op->Reset();
}