blob: 0bb7677b86939d492f76f71d482fe03a538926df [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_
#define ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_
#include <lib/zx/result.h>
#include <fbl/intrusive_container_utils.h>
#include <fbl/intrusive_double_list.h>
#include <fbl/ref_counted.h>
#include <kernel/event.h>
#include <kernel/mutex.h>
#include <ktl/algorithm.h>
#include <ktl/atomic.h>
#include <ktl/limits.h>
#include <ktl/optional.h>
#include <ktl/variant.h>
// `ContentSizeManager` is a class that helps coordinate multiple, potentially concurrent changes
// to a VMO's content size without needing to serialize the I/O of those operations. This is done by
// maintaining queues of outstanding operations, allowing concurrent execution of the operations,
// and then committing the content size effects of those operations in a particular order. This idea
// is similar to the re-order buffer in Tomasulo's algorithm.
//
// There are 2 ordering queues: the read queue and the write queue. Both queues hold their
// respective namesake operations as well as shrink operations.
//
// Read operations are permitted to read up to the smallest outstanding content size, which can be
// found as the minimum of the current content size and all shrink operations. Upon completion,
// reads will always commit without blocking behind other operations.
//
// Write operations may extend content size. Upon completion, a write will block until it is the
// head of the write queue if the smallest outstanding content size is less than its target size.
//
// Set size operations are treated differently, depending on whether the operation will expand or
// shrink the content size. When expanding, set size ops are treated as write operations of the same
// target size (see above). When shrinking, set size ops are treated as shrink operations and will
// block until it is the head if any read or write operations that operate beyond the target size
// are queued in front of the set size.
class ContentSizeManager : public fbl::RefCounted<ContentSizeManager> {
private:
// Forward declarations
struct WriteQueueTag {};
struct ReadQueueTag {};
public:
enum class OperationType {
Write,
Read,
SetSize,
Append,
};
// `ContentSizeManager::Operation` is a structure to ensure operations related to content size are
// committed in order. `Operation` is intended to be used as a stack-allocated structure.
//
// Currently, an operation maps 1:1 with the thread it is executing on and thus, can be
// considered owned by that thread.
//
// Notes:
// * The initialization, destruction, and immutable properties of this type are only
// thread-compatible.
// * The type must either be committed or cancelled before destruction. Otherwise, the destructor
// will panic.
class Operation : public fbl::ContainableBaseClasses<
fbl::TaggedDoublyLinkedListable<Operation*, WriteQueueTag>,
fbl::TaggedDoublyLinkedListable<Operation*, ReadQueueTag>> {
public:
Operation() = default;
~Operation() {
DEBUG_ASSERT_MSG(!IsValid(), "Operation destructed without cancelling or committing!");
}
// Disallow copy and move
Operation(const Operation&) = delete;
Operation& operator=(const Operation&) = delete;
Operation(Operation&&) = delete;
Operation& operator=(Operation&&) = delete;
ContentSizeManager* parent() const { return parent_; }
OperationType GetType() const { return type_; }
// This function exists to satisfy Clang thread safety analysis since there are many
// circumstances where the parent lock is not acquired through the parent pointer
// (i.e. initialization). As of writing, Clang is unable to follow pointer aliasing.
void AssertParentLockHeld() const TA_ASSERT(parent()->lock()) {
DEBUG_ASSERT(IsValid());
parent()->lock()->capability().AssertHeld();
}
// Gets the content size that the operation will expand to once it is completed.
//
// Note:
// * This may only be called on a valid operation.
// * This must only be called when holding the parent `ContentSizeManager` lock.
uint64_t GetSizeLocked() const TA_REQ(parent()->lock());
// Shrinks the size of the operation.
//
// Only size shrinks are allowed, since the concurrency of other operations are gated on the
// largest potential size of operations in front of it.
//
// Note:
// * This may only be called on a valid operation.
// * This must only be called when holding the parent `ContentSizeManager` lock.
// * The `new_size` passed in must be greater than 0.
// * The `new_size` passed in must be less than or equal to the current size.
// * This must only be called for `OperationType::Append` and `OperationType::Write` ops.
void ShrinkSizeLocked(uint64_t new_size) TA_REQ(parent()->lock());
// Commits the operation's effects on the content size.
//
// Note:
// * This may only be called on a valid operation.
// * This must only be called when holding the parent `ContentSizeManager` lock.
void CommitLocked() TA_REQ(parent()->lock());
// Cancels the operation and does not commit any changes to the content size.
//
// Note:
// * This may only be called on a valid operation.
// * This must only be called when holding the parent `ContentSizeManager` lock.
void CancelLocked() TA_REQ(parent()->lock());
// Updates the content size when progress is made from the operation.
//
// Note:
// * This may only be called on a valid `Append` or `Write` operation.
// * The content size must be larger than the current content size.
void UpdateContentSizeFromProgress(uint64_t new_content_size);
private:
friend class ContentSizeManager;
// Indicates whether the operation is valid.
bool IsValid() const { return parent_ != nullptr; }
void Reset() { parent_ = nullptr; }
void Initialize(ContentSizeManager* parent, uint64_t size, OperationType type);
ContentSizeManager* parent_ = nullptr;
OperationType type_;
// Holds the target size. For appends, this will only be valid once the operation is at the head
// of the queue.
uint64_t size_;
Event ready_event_;
};
// Create a ContentSizeManager with its initial content size set to |content_size|. Returns a
// RefPtr to the newly created ContentSizeManager in |content_size_manager| on success.
static zx_status_t Create(uint64_t content_size,
fbl::RefPtr<ContentSizeManager>* content_size_manager);
Lock<Mutex>* lock() const TA_RET_CAP(lock_) { return &lock_; }
// Returns the current content size.
uint64_t GetContentSize() const {
// Loads from the operation the content size must be ordered with the acquire ordering to ensure
// that all memory operations from the VMO (i.e. reads) after the load are not reordered before
// reading the content size. Otherwise, reads from the VMO before acquiring content size may not
// see data that was written to the VMO just before content size was updated (via
// `SetContentSize`).
return content_size_.load(ktl::memory_order_acquire);
}
// Marks and registers the beginning of an append operation.
//
//
// Notes:
// * This function may block until other conflicting operations complete.
// * This function may drop and reacquire the lock guarded by `lock_guard`.
// * `append_size` must be greater than 0.
zx_status_t BeginAppendLocked(uint64_t append_size, Guard<Mutex>* lock_guard, Operation* out_op)
TA_REQ(lock_);
// Marks and registers the beginning of a write operation.
//
// If the write is results in an expansion of the content size, returns the previous content size
// from which the write expands in `out_prev_content_size`. The gap from the previous content size
// to where the write begins likely needs to be zeroed out.
//
// Notes:
// * This function may block until other conflicting operations complete.
// * This function may drop and reacquire the lock guarted by `lock_guard`.
void BeginWriteLocked(uint64_t target_size, Guard<Mutex>* lock_guard,
ktl::optional<uint64_t>* out_prev_content_size, Operation* out_op)
TA_REQ(lock_);
// Marks and registers the beginning of a read operation.
//
// Returns the maximum size of the content that should be read in `out_content_size_limit`.
void BeginReadLocked(uint64_t target_size, uint64_t* out_content_size_limit, Operation* out_op)
TA_REQ(lock_);
// Marks and registers the beginning of an operation to set the content size to a target size.
//
// Note that this function may drop and reacquire the lock guarded by `lock_guard`.
void BeginSetContentSizeLocked(uint64_t target_size, Operation* out_op, Guard<Mutex>* lock_guard)
TA_REQ(lock_);
private:
// Private constructor. External callers should use ContentSizeManager::Create.
explicit ContentSizeManager(uint64_t content_size) : content_size_(content_size) {}
ContentSizeManager() = delete;
// Updates the content size to a new value.
//
// Note that this function should only be called by internal functions, as content size should
// only be modified by one operation at a time. This is enforced by the queues.
void SetContentSize(uint64_t new_content_size) {
// Stores to the content size must be ordered with release ordering to ensure that all memory
// operations (i.e. writes) to the VMO are visible *before* updating content size. Readers must
// see valid data in the VMO if the region being read is within content size. See
// `GetContentSize` as well.
content_size_.store(new_content_size, ktl::memory_order_release);
}
// Blocks until the provided operation is at the head of the queue.
//
// Note that this function will drop the lock guarded by `lock_guard` while blocking and
// reacquires the lock after.
void BlockUntilHeadLocked(Operation* op, Guard<Mutex>* lock_guard) TA_REQ(lock_);
void CommitAndDequeueOperationLocked(Operation* op) TA_REQ(lock_);
// Dequeues an `Operation`. This must only be called internally, once an `Operation` is committed
// or cancelled.
void DequeueOperationLocked(Operation* op) TA_REQ(lock_);
mutable DECLARE_MUTEX(ContentSizeManager) lock_;
// These queues are usually very shallow, unless stream clients call many operations concurrently.
fbl::DoublyLinkedList<Operation*, WriteQueueTag> write_q_ TA_GUARDED(lock_);
fbl::DoublyLinkedList<Operation*, ReadQueueTag> read_q_ TA_GUARDED(lock_);
// `content_size_` is not guarded by a lock because the queues above maintains that only one
// operation can ever be mutating `content_size_` at any given point.
//
// Accessing this value should be done via `GetContentSize` and `SetContentSize`.
ktl::atomic<uint64_t> content_size_ = 0;
};
#endif // ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_