zircon/kernel/vm/include/vm/content_size_manager.h - fuchsia - Git at Google

 // Copyright 2022 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_
 #define ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_

 #include <lib/zx/result.h>

 #include <fbl/intrusive_container_utils.h>
 #include <fbl/intrusive_double_list.h>
 #include <fbl/ref_counted.h>
 #include <kernel/event.h>
 #include <kernel/mutex.h>
 #include <ktl/algorithm.h>
 #include <ktl/atomic.h>
 #include <ktl/limits.h>
 #include <ktl/optional.h>
 #include <ktl/variant.h>

 // `ContentSizeManager` is a class that helps coordinate multiple, potentially concurrent changes
 // to a VMO's content size without needing to serialize the I/O of those operations. This is done by
 // maintaining queues of outstanding operations, allowing concurrent execution of the operations,
 // and then committing the content size effects of those operations in a particular order. This idea
 // is similar to the re-order buffer in Tomasulo's algorithm.
 //
 // There are 2 ordering queues: the read queue and the write queue. Both queues hold their
 // respective namesake operations as well as shrink operations.
 //
 // Read operations are permitted to read up to the smallest outstanding content size, which can be
 // found as the minimum of the current content size and all shrink operations. Upon completion,
 // reads will always commit without blocking behind other operations.
 //
 // Write operations may extend content size. Upon completion, a write will block until it is the
 // head of the write queue if the smallest outstanding content size is less than its target size.
 //
 // Set size operations are treated differently, depending on whether the operation will expand or
 // shrink the content size. When expanding, set size ops are treated as write operations of the same
 // target size (see above). When shrinking, set size ops are treated as shrink operations and will
 // block until it is the head if any read or write operations that operate beyond the target size
 // are queued in front of the set size.
 class ContentSizeManager : public fbl::RefCounted<ContentSizeManager> {
  private:
   // Forward declarations
   struct WriteQueueTag {};
   struct ReadQueueTag {};

  public:
   enum class OperationType {
     Write,
     Read,
     SetSize,
     Append,
   };

   // `ContentSizeManager::Operation` is a structure to ensure operations related to content size are
   // committed in order. `Operation` is intended to be used as a stack-allocated structure.
   //
   // Currently, an operation maps 1:1 with the thread it is executing on and thus, can be
   // considered owned by that thread.
   //
   // Notes:
   //  * The initialization, destruction, and immutable properties of this type are only
   //    thread-compatible.
   //  * The type must either be committed or cancelled before destruction. Otherwise, the destructor
   //    will panic.
   class Operation : public fbl::ContainableBaseClasses<
                         fbl::TaggedDoublyLinkedListable<Operation*, WriteQueueTag>,
                         fbl::TaggedDoublyLinkedListable<Operation*, ReadQueueTag>> {
    public:
     Operation() = default;

     ~Operation() {
       DEBUG_ASSERT_MSG(!IsValid(), "Operation destructed without cancelling or committing!");
     }

     // Disallow copy and move
     Operation(const Operation&) = delete;
     Operation& operator=(const Operation&) = delete;
     Operation(Operation&&) = delete;
     Operation& operator=(Operation&&) = delete;

     ContentSizeManager* parent() const { return parent_; }

     OperationType GetType() const { return type_; }

     // This function exists to satisfy Clang thread safety analysis since there are many
     // circumstances where the parent lock is not acquired through the parent pointer
     // (i.e. initialization). As of writing, Clang is unable to follow pointer aliasing.
     void AssertParentLockHeld() const TA_ASSERT(parent()->lock()) {
       DEBUG_ASSERT(IsValid());
       parent()->lock()->capability().AssertHeld();
     }

     // Gets the content size that the operation will expand to once it is completed.
     //
     // Note:
     //  * This may only be called on a valid operation.
     //  * This must only be called when holding the parent `ContentSizeManager` lock.
     uint64_t GetSizeLocked() const TA_REQ(parent()->lock());

     // Shrinks the size of the operation.
     //
     // Only size shrinks are allowed, since the concurrency of other operations are gated on the
     // largest potential size of operations in front of it.
     //
     // Note:
     //  * This may only be called on a valid operation.
     //  * This must only be called when holding the parent `ContentSizeManager` lock.
     //  * The `new_size` passed in must be greater than 0.
     //  * The `new_size` passed in must be less than or equal to the current size.
     //  * This must only be called for `OperationType::Append` and `OperationType::Write` ops.
     void ShrinkSizeLocked(uint64_t new_size) TA_REQ(parent()->lock());

     // Commits the operation's effects on the content size.
     //
     // Note:
     //  * This may only be called on a valid operation.
     //  * This must only be called when holding the parent `ContentSizeManager` lock.
     void CommitLocked() TA_REQ(parent()->lock());

     // Cancels the operation and does not commit any changes to the content size.
     //
     // Note:
     //  * This may only be called on a valid operation.
     //  * This must only be called when holding the parent `ContentSizeManager` lock.
     void CancelLocked() TA_REQ(parent()->lock());

     // Updates the content size when progress is made from the operation.
     //
     // Note:
     //  * This may only be called on a valid `Append` or `Write` operation.
     //  * The content size must be larger than the current content size.
     void UpdateContentSizeFromProgress(uint64_t new_content_size);

    private:
     friend class ContentSizeManager;

     // Indicates whether the operation is valid.
     bool IsValid() const { return parent_ != nullptr; }

     void Reset() { parent_ = nullptr; }

     void Initialize(ContentSizeManager* parent, uint64_t size, OperationType type);

     ContentSizeManager* parent_ = nullptr;
     OperationType type_;

     // Holds the target size. For appends, this will only be valid once the operation is at the head
     // of the queue.
     uint64_t size_;

     Event ready_event_;
   };

   // Create a ContentSizeManager with its initial content size set to |content_size|. Returns a
   // RefPtr to the newly created ContentSizeManager in |content_size_manager| on success.
   static zx_status_t Create(uint64_t content_size,
                             fbl::RefPtr<ContentSizeManager>* content_size_manager);

   Lock<Mutex>* lock() const TA_RET_CAP(lock_) { return &lock_; }

   // Returns the current content size.
   uint64_t GetContentSize() const {
     // Loads from the operation the content size must be ordered with the acquire ordering to ensure
     // that all memory operations from the VMO (i.e. reads) after the load are not reordered before
     // reading the content size. Otherwise, reads from the VMO before acquiring content size may not
     // see data that was written to the VMO just before content size was updated (via
     // `SetContentSize`).
     return content_size_.load(ktl::memory_order_acquire);
   }

   // Marks and registers the beginning of an append operation.
   //
   //
   // Notes:
   //  * This function may block until other conflicting operations complete.
   //  * This function may drop and reacquire the lock guarded by `lock_guard`.
   //  * `append_size` must be greater than 0.
   zx_status_t BeginAppendLocked(uint64_t append_size, Guard<Mutex>* lock_guard, Operation* out_op)
       TA_REQ(lock_);

   // Marks and registers the beginning of a write operation.
   //
   // If the write is results in an expansion of the content size, returns the previous content size
   // from which the write expands in `out_prev_content_size`. The gap from the previous content size
   // to where the write begins likely needs to be zeroed out.
   //
   // Notes:
   //  * This function may block until other conflicting operations complete.
   //  * This function may drop and reacquire the lock guarted by `lock_guard`.
   void BeginWriteLocked(uint64_t target_size, Guard<Mutex>* lock_guard,
                         ktl::optional<uint64_t>* out_prev_content_size, Operation* out_op)
       TA_REQ(lock_);

   // Marks and registers the beginning of a read operation.
   //
   // Returns the maximum size of the content that should be read in `out_content_size_limit`.
   void BeginReadLocked(uint64_t target_size, uint64_t* out_content_size_limit, Operation* out_op)
       TA_REQ(lock_);

   // Marks and registers the beginning of an operation to set the content size to a target size.
   //
   // Note that this function may drop and reacquire the lock guarded by `lock_guard`.
   void BeginSetContentSizeLocked(uint64_t target_size, Operation* out_op, Guard<Mutex>* lock_guard)
       TA_REQ(lock_);

  private:
   // Private constructor. External callers should use ContentSizeManager::Create.
   explicit ContentSizeManager(uint64_t content_size) : content_size_(content_size) {}
   ContentSizeManager() = delete;

   // Updates the content size to a new value.
   //
   // Note that this function should only be called by internal functions, as content size should
   // only be modified by one operation at a time. This is enforced by the queues.
   void SetContentSize(uint64_t new_content_size) {
     // Stores to the content size must be ordered with release ordering to ensure that all memory
     // operations (i.e. writes) to the VMO are visible *before* updating content size. Readers must
     // see valid data in the VMO if the region being read is within content size. See
     // `GetContentSize` as well.
     content_size_.store(new_content_size, ktl::memory_order_release);
   }

   // Blocks until the provided operation is at the head of the queue.
   //
   // Note that this function will drop the lock guarded by `lock_guard` while blocking and
   // reacquires the lock after.
   void BlockUntilHeadLocked(Operation* op, Guard<Mutex>* lock_guard) TA_REQ(lock_);

   void CommitAndDequeueOperationLocked(Operation* op) TA_REQ(lock_);

   // Dequeues an `Operation`. This must only be called internally, once an `Operation` is committed
   // or cancelled.
   void DequeueOperationLocked(Operation* op) TA_REQ(lock_);

   mutable DECLARE_MUTEX(ContentSizeManager) lock_;
   // These queues are usually very shallow, unless stream clients call many operations concurrently.
   fbl::DoublyLinkedList<Operation*, WriteQueueTag> write_q_ TA_GUARDED(lock_);
   fbl::DoublyLinkedList<Operation*, ReadQueueTag> read_q_ TA_GUARDED(lock_);

   // `content_size_` is not guarded by a lock because the queues above maintains that only one
   // operation can ever be mutating `content_size_` at any given point.
   //
   // Accessing this value should be done via `GetContentSize` and `SetContentSize`.
   ktl::atomic<uint64_t> content_size_ = 0;
 };

 #endif  // ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_
	// Copyright 2022 The Fuchsia Authors
	//
	// Use of this source code is governed by a MIT-style
	// license that can be found in the LICENSE file or at
	// https://opensource.org/licenses/MIT

	#ifndef ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_
	#define ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_

	#include <lib/zx/result.h>

	#include <fbl/intrusive_container_utils.h>
	#include <fbl/intrusive_double_list.h>
	#include <fbl/ref_counted.h>
	#include <kernel/event.h>
	#include <kernel/mutex.h>
	#include <ktl/algorithm.h>
	#include <ktl/atomic.h>
	#include <ktl/limits.h>
	#include <ktl/optional.h>
	#include <ktl/variant.h>

	// `ContentSizeManager` is a class that helps coordinate multiple, potentially concurrent changes
	// to a VMO's content size without needing to serialize the I/O of those operations. This is done by
	// maintaining queues of outstanding operations, allowing concurrent execution of the operations,
	// and then committing the content size effects of those operations in a particular order. This idea
	// is similar to the re-order buffer in Tomasulo's algorithm.
	//
	// There are 2 ordering queues: the read queue and the write queue. Both queues hold their
	// respective namesake operations as well as shrink operations.
	//
	// Read operations are permitted to read up to the smallest outstanding content size, which can be
	// found as the minimum of the current content size and all shrink operations. Upon completion,
	// reads will always commit without blocking behind other operations.
	//
	// Write operations may extend content size. Upon completion, a write will block until it is the
	// head of the write queue if the smallest outstanding content size is less than its target size.
	//
	// Set size operations are treated differently, depending on whether the operation will expand or
	// shrink the content size. When expanding, set size ops are treated as write operations of the same
	// target size (see above). When shrinking, set size ops are treated as shrink operations and will
	// block until it is the head if any read or write operations that operate beyond the target size
	// are queued in front of the set size.
	class ContentSizeManager : public fbl::RefCounted<ContentSizeManager> {
	private:
	// Forward declarations
	struct WriteQueueTag {};
	struct ReadQueueTag {};

	public:
	enum class OperationType {
	Write,
	Read,
	SetSize,
	Append,
	};

	// `ContentSizeManager::Operation` is a structure to ensure operations related to content size are
	// committed in order. `Operation` is intended to be used as a stack-allocated structure.
	//
	// Currently, an operation maps 1:1 with the thread it is executing on and thus, can be
	// considered owned by that thread.
	//
	// Notes:
	// * The initialization, destruction, and immutable properties of this type are only
	// thread-compatible.
	// * The type must either be committed or cancelled before destruction. Otherwise, the destructor
	// will panic.
	class Operation : public fbl::ContainableBaseClasses<
	fbl::TaggedDoublyLinkedListable<Operation*, WriteQueueTag>,
	fbl::TaggedDoublyLinkedListable<Operation*, ReadQueueTag>> {
	public:
	Operation() = default;

	~Operation() {
	DEBUG_ASSERT_MSG(!IsValid(), "Operation destructed without cancelling or committing!");
	}

	// Disallow copy and move
	Operation(const Operation&) = delete;
	Operation& operator=(const Operation&) = delete;
	Operation(Operation&&) = delete;
	Operation& operator=(Operation&&) = delete;

	ContentSizeManager* parent() const { return parent_; }

	OperationType GetType() const { return type_; }

	// This function exists to satisfy Clang thread safety analysis since there are many
	// circumstances where the parent lock is not acquired through the parent pointer
	// (i.e. initialization). As of writing, Clang is unable to follow pointer aliasing.
	void AssertParentLockHeld() const TA_ASSERT(parent()->lock()) {
	DEBUG_ASSERT(IsValid());
	parent()->lock()->capability().AssertHeld();
	}

	// Gets the content size that the operation will expand to once it is completed.
	//
	// Note:
	// * This may only be called on a valid operation.
	// * This must only be called when holding the parent `ContentSizeManager` lock.
	uint64_t GetSizeLocked() const TA_REQ(parent()->lock());

	// Shrinks the size of the operation.
	//
	// Only size shrinks are allowed, since the concurrency of other operations are gated on the
	// largest potential size of operations in front of it.
	//
	// Note:
	// * This may only be called on a valid operation.
	// * This must only be called when holding the parent `ContentSizeManager` lock.
	// * The `new_size` passed in must be greater than 0.
	// * The `new_size` passed in must be less than or equal to the current size.
	// * This must only be called for `OperationType::Append` and `OperationType::Write` ops.
	void ShrinkSizeLocked(uint64_t new_size) TA_REQ(parent()->lock());

	// Commits the operation's effects on the content size.
	//
	// Note:
	// * This may only be called on a valid operation.
	// * This must only be called when holding the parent `ContentSizeManager` lock.
	void CommitLocked() TA_REQ(parent()->lock());

	// Cancels the operation and does not commit any changes to the content size.
	//
	// Note:
	// * This may only be called on a valid operation.
	// * This must only be called when holding the parent `ContentSizeManager` lock.
	void CancelLocked() TA_REQ(parent()->lock());

	// Updates the content size when progress is made from the operation.
	//
	// Note:
	// * This may only be called on a valid `Append` or `Write` operation.
	// * The content size must be larger than the current content size.
	void UpdateContentSizeFromProgress(uint64_t new_content_size);

	private:
	friend class ContentSizeManager;

	// Indicates whether the operation is valid.
	bool IsValid() const { return parent_ != nullptr; }

	void Reset() { parent_ = nullptr; }

	void Initialize(ContentSizeManager* parent, uint64_t size, OperationType type);

	ContentSizeManager* parent_ = nullptr;
	OperationType type_;

	// Holds the target size. For appends, this will only be valid once the operation is at the head
	// of the queue.
	uint64_t size_;

	Event ready_event_;
	};

	// Create a ContentSizeManager with its initial content size set to \|content_size\|. Returns a
	// RefPtr to the newly created ContentSizeManager in \|content_size_manager\| on success.
	static zx_status_t Create(uint64_t content_size,
	fbl::RefPtr<ContentSizeManager>* content_size_manager);

	Lock<Mutex>* lock() const TA_RET_CAP(lock_) { return &lock_; }

	// Returns the current content size.
	uint64_t GetContentSize() const {
	// Loads from the operation the content size must be ordered with the acquire ordering to ensure
	// that all memory operations from the VMO (i.e. reads) after the load are not reordered before
	// reading the content size. Otherwise, reads from the VMO before acquiring content size may not
	// see data that was written to the VMO just before content size was updated (via
	// `SetContentSize`).
	return content_size_.load(ktl::memory_order_acquire);
	}

	// Marks and registers the beginning of an append operation.
	//
	//
	// Notes:
	// * This function may block until other conflicting operations complete.
	// * This function may drop and reacquire the lock guarded by `lock_guard`.
	// * `append_size` must be greater than 0.
	zx_status_t BeginAppendLocked(uint64_t append_size, Guard<Mutex>* lock_guard, Operation* out_op)
	TA_REQ(lock_);

	// Marks and registers the beginning of a write operation.
	//
	// If the write is results in an expansion of the content size, returns the previous content size
	// from which the write expands in `out_prev_content_size`. The gap from the previous content size
	// to where the write begins likely needs to be zeroed out.
	//
	// Notes:
	// * This function may block until other conflicting operations complete.
	// * This function may drop and reacquire the lock guarted by `lock_guard`.
	void BeginWriteLocked(uint64_t target_size, Guard<Mutex>* lock_guard,
	ktl::optional<uint64_t>* out_prev_content_size, Operation* out_op)
	TA_REQ(lock_);

	// Marks and registers the beginning of a read operation.
	//
	// Returns the maximum size of the content that should be read in `out_content_size_limit`.
	void BeginReadLocked(uint64_t target_size, uint64_t* out_content_size_limit, Operation* out_op)
	TA_REQ(lock_);

	// Marks and registers the beginning of an operation to set the content size to a target size.
	//
	// Note that this function may drop and reacquire the lock guarded by `lock_guard`.
	void BeginSetContentSizeLocked(uint64_t target_size, Operation* out_op, Guard<Mutex>* lock_guard)
	TA_REQ(lock_);

	private:
	// Private constructor. External callers should use ContentSizeManager::Create.
	explicit ContentSizeManager(uint64_t content_size) : content_size_(content_size) {}
	ContentSizeManager() = delete;

	// Updates the content size to a new value.
	//
	// Note that this function should only be called by internal functions, as content size should
	// only be modified by one operation at a time. This is enforced by the queues.
	void SetContentSize(uint64_t new_content_size) {
	// Stores to the content size must be ordered with release ordering to ensure that all memory
	// operations (i.e. writes) to the VMO are visible before updating content size. Readers must
	// see valid data in the VMO if the region being read is within content size. See
	// `GetContentSize` as well.
	content_size_.store(new_content_size, ktl::memory_order_release);
	}

	// Blocks until the provided operation is at the head of the queue.
	//
	// Note that this function will drop the lock guarded by `lock_guard` while blocking and
	// reacquires the lock after.
	void BlockUntilHeadLocked(Operation* op, Guard<Mutex>* lock_guard) TA_REQ(lock_);

	void CommitAndDequeueOperationLocked(Operation* op) TA_REQ(lock_);

	// Dequeues an `Operation`. This must only be called internally, once an `Operation` is committed
	// or cancelled.
	void DequeueOperationLocked(Operation* op) TA_REQ(lock_);

	mutable DECLARE_MUTEX(ContentSizeManager) lock_;
	// These queues are usually very shallow, unless stream clients call many operations concurrently.
	fbl::DoublyLinkedList<Operation*, WriteQueueTag> write_q_ TA_GUARDED(lock_);
	fbl::DoublyLinkedList<Operation*, ReadQueueTag> read_q_ TA_GUARDED(lock_);

	// `content_size_` is not guarded by a lock because the queues above maintains that only one
	// operation can ever be mutating `content_size_` at any given point.
	//
	// Accessing this value should be done via `GetContentSize` and `SetContentSize`.
	ktl::atomic<uint64_t> content_size_ = 0;
	};

	#endif // ZIRCON_KERNEL_VM_INCLUDE_VM_CONTENT_SIZE_MANAGER_H_