| // Copyright 2022 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #include "vm/content_size_manager.h" |
| |
| #include <ktl/limits.h> |
| #include <ktl/move.h> |
| |
| // static |
| zx_status_t ContentSizeManager::Create(uint64_t content_size, |
| fbl::RefPtr<ContentSizeManager>* content_size_manager) { |
| fbl::AllocChecker ac; |
| fbl::RefPtr<ContentSizeManager> csm = fbl::AdoptRef(new (&ac) ContentSizeManager(content_size)); |
| if (!ac.check()) { |
| return ZX_ERR_NO_MEMORY; |
| } |
| *content_size_manager = ktl::move(csm); |
| return ZX_OK; |
| } |
| |
| uint64_t ContentSizeManager::Operation::GetSizeLocked() const { |
| DEBUG_ASSERT(IsValid()); |
| // Reading the size for `Append` operations should only occur after it's been initialized. |
| DEBUG_ASSERT(type_ != OperationType::Append || size_ > 0); |
| |
| return size_; |
| } |
| |
| void ContentSizeManager::Operation::ShrinkSizeLocked(uint64_t new_size) { |
| DEBUG_ASSERT(IsValid()); |
| // If `new_size` is 0, the operation should be cancelled instead. |
| DEBUG_ASSERT(new_size > 0); |
| // This function may only be called on expanding content write operations. |
| ASSERT(type_ == OperationType::Append || type_ == OperationType::Write); |
| ASSERT(new_size <= size_); |
| |
| size_ = new_size; |
| } |
| |
| void ContentSizeManager::Operation::CommitLocked() { |
| DEBUG_ASSERT(IsValid()); |
| |
| parent()->CommitAndDequeueOperationLocked(this); |
| } |
| |
| void ContentSizeManager::Operation::CancelLocked() { |
| DEBUG_ASSERT(IsValid()); |
| |
| parent()->DequeueOperationLocked(this); |
| } |
| |
| void ContentSizeManager::Operation::UpdateContentSizeFromProgress(uint64_t new_content_size) { |
| DEBUG_ASSERT(IsValid()); |
| DEBUG_ASSERT(type_ == OperationType::Write || type_ == OperationType::Append); |
| DEBUG_ASSERT(new_content_size <= size_); |
| DEBUG_ASSERT(new_content_size > parent_->GetContentSize()); |
| |
| parent_->SetContentSize(new_content_size); |
| } |
| |
| void ContentSizeManager::Operation::Initialize(ContentSizeManager* parent, uint64_t size, |
| OperationType type) { |
| DEBUG_ASSERT(!IsValid()); |
| DEBUG_ASSERT(parent != nullptr); |
| |
| parent_ = parent; |
| size_ = size; |
| type_ = type; |
| } |
| |
| zx_status_t ContentSizeManager::BeginAppendLocked(uint64_t append_size, Guard<Mutex>* lock_guard, |
| Operation* out_op) { |
| DEBUG_ASSERT(out_op); |
| DEBUG_ASSERT(append_size > 0); |
| |
| // Temporarily initialize an `Append` operation with zero size until the size is known. |
| out_op->Initialize(this, 0, OperationType::Append); |
| write_q_.push_back(out_op); |
| |
| // Block until head if there are any of the following operations preceding this one: |
| // * Appends or writes that exceed the current content size. |
| // * Set size |
| // |
| // Effectively, this checks for any content size modifying operations. |
| bool should_block = false; |
| auto iter = --write_q_.make_iterator(*out_op); |
| |
| // It's okay to read the content size once here, since the lock is held. This means that content |
| // size can only be increased if the front-most content size modifying operation is an expanding |
| // write or append. Not re-reading content size and seeing a potentially smaller content size here |
| // is valid, since it will only pessimize (i.e. blocking until head) this operation for a very |
| // small number of cases within an extremely narrow timing window. There are no correctness |
| // issues with pessimization. Since the pessimizing case is so rare, prefer reading once over |
| // continuously re-reading the atomic in a loop. |
| uint64_t cur_content_size = GetContentSize(); |
| while (iter.IsValid()) { |
| iter->AssertParentLockHeld(); |
| if (iter->GetType() == OperationType::SetSize || iter->GetType() == OperationType::Append || |
| (iter->GetType() == OperationType::Write && iter->GetSizeLocked() > cur_content_size)) { |
| should_block = true; |
| break; |
| } |
| --iter; |
| } |
| |
| if (should_block) { |
| BlockUntilHeadLocked(out_op, lock_guard); |
| |
| // Must re-read the content size here, since `BlockUntilHeadLocked` would have dropped the lock, |
| // and content size may have been modified by the operations in front of this one. |
| cur_content_size = GetContentSize(); |
| } |
| |
| // Using the previously read content size. In the case where this operation blocked until it was |
| // the head, content size was re-read with the lock held and with this operation at the head of |
| // the queue (no other content size mutating operations can proceed before this). In all other |
| // cases, the previous loop verified that no content size mutating operations are in front of this |
| // operation. |
| if (add_overflow(cur_content_size, append_size, &out_op->size_)) { |
| // Dequeue operation since this change should not be committed. |
| DequeueOperationLocked(out_op); |
| return ZX_ERR_OUT_OF_RANGE; |
| } |
| |
| return ZX_OK; |
| } |
| |
| void ContentSizeManager::BeginWriteLocked(uint64_t target_size, Guard<Mutex>* lock_guard, |
| ktl::optional<uint64_t>* out_prev_content_size, |
| Operation* out_op) { |
| DEBUG_ASSERT(out_prev_content_size); |
| DEBUG_ASSERT(out_op); |
| |
| *out_prev_content_size = ktl::nullopt; |
| |
| out_op->Initialize(this, target_size, OperationType::Write); |
| write_q_.push_back(out_op); |
| |
| // Check if there are any set size operations in front of this that sets the content size smaller |
| // than `target_size`. |
| bool block_due_to_set = false; |
| auto iter = --write_q_.make_iterator(*out_op); |
| while (iter.IsValid()) { |
| iter->AssertParentLockHeld(); |
| if (iter->GetType() == OperationType::SetSize && iter->GetSizeLocked() < target_size) { |
| block_due_to_set = true; |
| break; |
| } |
| --iter; |
| } |
| |
| // If this write can potentially create a scenario where it expands content, block until it is the |
| // head of the queue. |
| if (block_due_to_set || target_size > GetContentSize()) { |
| BlockUntilHeadLocked(out_op, lock_guard); |
| |
| // Must re-read the content size here, since `BlockUntilHeadLocked` would have dropped the lock, |
| // and content size may have been modified by the operations in front of this one. |
| uint64_t cur_content_size = GetContentSize(); |
| if (target_size > cur_content_size) { |
| *out_prev_content_size = cur_content_size; |
| } |
| } |
| } |
| |
| void ContentSizeManager::BeginReadLocked(uint64_t target_size, uint64_t* out_content_size_limit, |
| Operation* out_op) { |
| DEBUG_ASSERT(out_content_size_limit); |
| DEBUG_ASSERT(out_op); |
| |
| // Allow reads up to the smallest outstanding size. |
| // Other concurrent, in-flight operations may or may not complete before this read, so it is okay |
| // to be more conservative here and only read up to the guaranteed valid region. |
| *out_content_size_limit = GetContentSize(); |
| for (auto& op : read_q_) { |
| if (op.GetType() != OperationType::SetSize) { |
| continue; |
| } |
| |
| op.AssertParentLockHeld(); |
| *out_content_size_limit = ktl::min(op.GetSizeLocked(), *out_content_size_limit); |
| } |
| *out_content_size_limit = ktl::min(target_size, *out_content_size_limit); |
| |
| out_op->Initialize(this, *out_content_size_limit, OperationType::Read); |
| read_q_.push_back(out_op); |
| } |
| |
| void ContentSizeManager::BeginSetContentSizeLocked(uint64_t target_size, Operation* out_op, |
| Guard<Mutex>* lock_guard) { |
| out_op->Initialize(this, target_size, OperationType::SetSize); |
| |
| write_q_.push_back(out_op); |
| read_q_.push_back(out_op); |
| |
| // Block until head if there are any of the following operations preceding this one: |
| // * Appends or writes that exceed either the current content size or the target size. |
| // - If it exceeds the current content size, the overlap is in the region in which the set |
| // size will zero content and the write will commit data. |
| // - If it exceeds the target size, the overlap is in the region in which the set size will |
| // invalidate pages/data and the write will commit data. |
| // * Reads that are reading at or beyond target size. |
| // * Set size |
| bool should_block = false; |
| auto write_iter = --write_q_.make_iterator(*out_op); |
| |
| // It's okay to read the content size once here, since the lock is held. This means that content |
| // size can only be increased if the front-most content size modifying operation is an expanding |
| // write or append. Not re-reading content size and seeing a potentially smaller content size here |
| // is valid, since it will only pessimize (i.e. blocking until head) this operation for a very |
| // small number of cases within an extremely narrow timing window. There are no correctness |
| // issues with pessimization. Since the pessimizing case is so rare, prefer reading once over |
| // continuously re-reading the atomic in a loop. |
| uint64_t cur_content_size = GetContentSize(); |
| while (write_iter.IsValid()) { |
| write_iter->AssertParentLockHeld(); |
| if (write_iter->GetType() == OperationType::SetSize || |
| write_iter->GetType() == OperationType::Append || |
| (write_iter->GetType() == OperationType::Write && |
| write_iter->GetSizeLocked() > ktl::min(cur_content_size, target_size))) { |
| should_block = true; |
| break; |
| } |
| --write_iter; |
| } |
| |
| auto read_iter = --read_q_.make_iterator(*out_op); |
| while (read_iter.IsValid() && !should_block) { |
| read_iter->AssertParentLockHeld(); |
| if (read_iter->GetType() == OperationType::Read && read_iter->GetSizeLocked() > target_size) { |
| should_block = true; |
| break; |
| } |
| --read_iter; |
| } |
| |
| if (should_block) { |
| BlockUntilHeadLocked(out_op, lock_guard); |
| } |
| } |
| |
| void ContentSizeManager::BlockUntilHeadLocked(Operation* op, Guard<Mutex>* lock_guard) { |
| DEBUG_ASSERT(op->parent_ == this); |
| |
| if (fbl::InContainer<WriteQueueTag>(*op)) { |
| while (op->IsValid() && &write_q_.front() != op) { |
| lock_guard->CallUnlocked([op] { op->ready_event_.Wait(); }); |
| } |
| } |
| |
| if (fbl::InContainer<ReadQueueTag>(*op)) { |
| while (op->IsValid() && &read_q_.front() != op) { |
| lock_guard->CallUnlocked([op] { op->ready_event_.Wait(); }); |
| } |
| } |
| } |
| |
| void ContentSizeManager::CommitAndDequeueOperationLocked(Operation* op) { |
| if (!op->IsValid()) { |
| DEBUG_ASSERT(!fbl::InContainer<WriteQueueTag>(*op)); |
| DEBUG_ASSERT(!fbl::InContainer<ReadQueueTag>(*op)); |
| return; |
| } |
| |
| op->AssertParentLockHeld(); |
| switch (op->type_) { |
| case OperationType::Write: |
| SetContentSize(ktl::max(op->GetSizeLocked(), GetContentSize())); |
| break; |
| case OperationType::Append: |
| case OperationType::SetSize: |
| SetContentSize(op->GetSizeLocked()); |
| break; |
| case OperationType::Read: |
| // No-op |
| break; |
| } |
| |
| DequeueOperationLocked(op); |
| } |
| |
| void ContentSizeManager::DequeueOperationLocked(Operation* op) { |
| DEBUG_ASSERT(op->IsValid()); |
| DEBUG_ASSERT(op->parent_ == this); |
| |
| auto dequeue_from_list = [op](auto& list) { |
| const bool is_head = &list.front() == op; |
| auto next = ++list.make_iterator(*op); |
| |
| list.erase(*op); |
| |
| // If the current operation is now at the head of the list, signal to the next operation that it |
| // should wake to complete its task after this one finishes dequeueing. |
| if (is_head && next.IsValid()) { |
| next->ready_event_.Signal(); |
| } |
| }; |
| |
| switch (op->type_) { |
| case OperationType::Write: |
| case OperationType::Append: |
| DEBUG_ASSERT(fbl::InContainer<WriteQueueTag>(*op)); |
| dequeue_from_list(write_q_); |
| break; |
| case OperationType::Read: |
| DEBUG_ASSERT(fbl::InContainer<ReadQueueTag>(*op)); |
| dequeue_from_list(read_q_); |
| break; |
| case OperationType::SetSize: |
| DEBUG_ASSERT(fbl::InContainer<ReadQueueTag>(*op)); |
| DEBUG_ASSERT(fbl::InContainer<WriteQueueTag>(*op)); |
| dequeue_from_list(write_q_); |
| dequeue_from_list(read_q_); |
| break; |
| } |
| |
| // Just in case, signal the ready event of `op` in case another thread is blocking on it. |
| // |
| // Note that this should never usually occur, since only the owning thread of the operation should |
| // be blocking or dequeueing. |
| op->ready_event_.Signal(); |
| |
| op->Reset(); |
| } |