|  | // Copyright 2018 The Fuchsia Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | // This file contains Vnodes which back a Blobfs filesystem. | 
|  |  | 
|  | #ifndef SRC_STORAGE_BLOBFS_BLOB_H_ | 
|  | #define SRC_STORAGE_BLOBFS_BLOB_H_ | 
|  |  | 
|  | #ifndef __Fuchsia__ | 
|  | #error Fuchsia-only Header | 
|  | #endif | 
|  |  | 
|  | #include <fuchsia/io/llcpp/fidl.h> | 
|  | #include <lib/async/cpp/wait.h> | 
|  | #include <lib/fit/promise.h> | 
|  | #include <lib/zx/event.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #include <memory> | 
|  |  | 
|  | #include <fbl/algorithm.h> | 
|  | #include <fbl/intrusive_wavl_tree.h> | 
|  | #include <fbl/macros.h> | 
|  | #include <fbl/ref_ptr.h> | 
|  | #include <fbl/vector.h> | 
|  | #include <storage/buffer/owned_vmoid.h> | 
|  |  | 
|  | #include "src/lib/digest/digest.h" | 
|  | #include "src/lib/storage/vfs/cpp/journal/data_streamer.h" | 
|  | #include "src/lib/storage/vfs/cpp/vfs.h" | 
|  | #include "src/lib/storage/vfs/cpp/vfs_types.h" | 
|  | #include "src/lib/storage/vfs/cpp/vnode.h" | 
|  | #include "src/storage/blobfs/allocator/allocator.h" | 
|  | #include "src/storage/blobfs/allocator/extent_reserver.h" | 
|  | #include "src/storage/blobfs/allocator/node_reserver.h" | 
|  | #include "src/storage/blobfs/blob_cache.h" | 
|  | #include "src/storage/blobfs/blob_layout.h" | 
|  | #include "src/storage/blobfs/common.h" | 
|  | #include "src/storage/blobfs/compression/blob_compressor.h" | 
|  | #include "src/storage/blobfs/compression/compressor.h" | 
|  | #include "src/storage/blobfs/format.h" | 
|  | #include "src/storage/blobfs/format_assertions.h" | 
|  | #include "src/storage/blobfs/metrics.h" | 
|  | #include "src/storage/blobfs/pager/page_watcher.h" | 
|  | #include "src/storage/blobfs/transaction.h" | 
|  |  | 
|  | namespace blobfs { | 
|  |  | 
|  | class Blobfs; | 
|  | class BlobDataProducer; | 
|  |  | 
|  | using digest::Digest; | 
|  |  | 
|  | enum class BlobState : uint8_t { | 
|  | // After Open: | 
|  | kEmpty, | 
|  | // After Space Reserved (but allocation not yet persisted). | 
|  | kDataWrite, | 
|  | // After Writing: | 
|  | kReadable, | 
|  | // After Unlink: | 
|  | kPurged, | 
|  | // Unrecoverable error states: | 
|  | kError, | 
|  | }; | 
|  |  | 
|  | // clang-format on | 
|  |  | 
|  | class Blob final : public CacheNode, fbl::Recyclable<Blob> { | 
|  | public: | 
|  | // Constructs a blob, reads in data, verifies the contents, then destroys the in-memory copy. | 
|  | static zx_status_t LoadAndVerifyBlob(Blobfs* bs, uint32_t node_index); | 
|  |  | 
|  | Blob(Blobfs* bs, const Digest& digest); | 
|  |  | 
|  | // Creates a readable blob from existing data. | 
|  | Blob(Blobfs* bs, uint32_t node_index, const Inode& inode); | 
|  |  | 
|  | virtual ~Blob(); | 
|  |  | 
|  | //////////////// | 
|  | // fs::Vnode interface. | 
|  |  | 
|  | zx_status_t GetNodeInfoForProtocol(fs::VnodeProtocol protocol, fs::Rights rights, | 
|  | fs::VnodeRepresentation* info) final; | 
|  | fs::VnodeProtocolSet GetProtocols() const final; | 
|  | bool ValidateRights(fs::Rights rights) final; | 
|  | zx_status_t Read(void* data, size_t len, size_t off, size_t* out_actual) final; | 
|  | zx_status_t Write(const void* data, size_t len, size_t offset, size_t* out_actual) final; | 
|  | zx_status_t Append(const void* data, size_t len, size_t* out_end, size_t* out_actual) final; | 
|  | zx_status_t GetAttributes(fs::VnodeAttributes* a) final; | 
|  | zx_status_t Truncate(size_t len) final; | 
|  | zx_status_t QueryFilesystem(fuchsia_io::wire::FilesystemInfo* out) final; | 
|  | zx_status_t GetDevicePath(size_t buffer_len, char* out_name, size_t* out_len) final; | 
|  | zx_status_t GetVmo(int flags, zx::vmo* out_vmo, size_t* out_size) final; | 
|  | void Sync(SyncCallback on_complete) final; | 
|  |  | 
|  | #if defined(ENABLE_BLOBFS_NEW_PAGER) | 
|  | // fs::PagedVnode implementation. | 
|  | void VmoRead(uint64_t offset, uint64_t length) override; | 
|  | #endif | 
|  |  | 
|  | //////////////// | 
|  | // fbl::Recyclable interface. | 
|  |  | 
|  | void fbl_recycle() final { CacheNode::fbl_recycle(); } | 
|  |  | 
|  | //////////////// | 
|  | // Other methods. | 
|  |  | 
|  | // Returns a digest::Digest containing the blob's merkle root. | 
|  | // Equivalent to digest::Digest(GetKey()). | 
|  | digest::Digest MerkleRoot() const; | 
|  |  | 
|  | // Returns whether there are any external references to the blob. | 
|  | // | 
|  | // Threading: Currently this *must* be called on the main dispatch thread; otherwise the | 
|  | // underlying state of the blob could change after (or during) the call. It currently locks | 
|  | // to read the Vnode's open_count_. When making Blobfs more threadsafe this should be changed to | 
|  | // require the mutex be held, then the main-dispatch-thread-requirement can be removed (the caller | 
|  | // can lock to ensure consistency over whatever it's doing). | 
|  | bool HasReferences() const FS_TA_EXCLUDES(mutex_); | 
|  |  | 
|  | // Identifies if we can safely remove all on-disk and in-memory storage used by this blob. | 
|  | // Note that this *must* be called on the main dispatch thread; otherwise the underlying state of | 
|  | // the blob could change after (or during) the call, and the blob might not really be purgeable. | 
|  | bool Purgeable() const { | 
|  | return !HasReferences() && (DeletionQueued() || state() != BlobState::kReadable); | 
|  | } | 
|  |  | 
|  | bool DeletionQueued() const { return deletable_; } | 
|  |  | 
|  | uint32_t GetMapIndex() const { return map_index_; } | 
|  |  | 
|  | // Returns a unique identifier for this blob | 
|  | uint32_t Ino() const { return map_index_; } | 
|  |  | 
|  | uint64_t SizeData() const; | 
|  |  | 
|  | const Inode& GetNode() const { return inode_; } | 
|  |  | 
|  | void CompleteSync(); | 
|  |  | 
|  | // When blob VMOs are cloned and returned to clients, blobfs watches | 
|  | // the original VMO handle for the signal |ZX_VMO_ZERO_CHILDREN|. | 
|  | // While this signal is not set, the blob's Vnode keeps an extra | 
|  | // reference to itself to prevent teardown while clients are using | 
|  | // this Vmo. This reference is internally called the "clone watcher". | 
|  | // | 
|  | // This function may be called on a blob to tell it to forcefully release | 
|  | // the "reference to itself" that is kept when the blob is mapped. | 
|  | // | 
|  | // Returns this reference, if it exists, to provide control over | 
|  | // when the Vnode destructor is executed. | 
|  | // | 
|  | // TODO(fxbug.dev/51111) This is not used with the new pager. Remove this code when the transition | 
|  | // is complete. | 
|  | fbl::RefPtr<Blob> CloneWatcherTeardown(); | 
|  |  | 
|  | // Marks the blob as deletable, and attempt to purge it. | 
|  | zx_status_t QueueUnlink(); | 
|  |  | 
|  | // PrepareWrite should be called after allocating a vnode and before writing any data to the blob. | 
|  | // The function sets blob size, allocates vmo needed for data and merkle tree, initiates | 
|  | // structures needed for compression and reserves an inode for the blob.  It is not meant to be | 
|  | // called multiple times on a given vnode.  This is public only for testing. | 
|  | zx_status_t PrepareWrite(uint64_t size_data, bool compress); | 
|  |  | 
|  | // If this is part of a migration and involves writing a new blob to replace an old blob, this can | 
|  | // be called so that the blob is deleted in the transaction that writes the new blob.  The blob | 
|  | // *must* not be currently in use.  It is designed to be used for mount time migrations. | 
|  | void SetOldBlob(Blob& blob); | 
|  |  | 
|  | // Sets the target_compression_size in write_info to |size|. | 
|  | // Setter made public for testing. | 
|  | void SetTargetCompressionSize(uint64_t size); | 
|  |  | 
|  | // Reads in and verifies the contents of this Blob. | 
|  | zx_status_t Verify(); | 
|  |  | 
|  | // Exposed for testing. | 
|  | const zx::vmo& DataVmo() const { return data_mapping_.vmo(); } | 
|  |  | 
|  | // Returns whether the blob's contents are pager-backed or not. | 
|  | bool IsPagerBacked() const; | 
|  |  | 
|  | private: | 
|  | friend class BlobLoaderTest; | 
|  | DISALLOW_COPY_ASSIGN_AND_MOVE(Blob); | 
|  |  | 
|  | // Vnode protected overrides: | 
|  | zx_status_t OpenNode(ValidatedOptions options, fbl::RefPtr<Vnode>* out_redirect) override; | 
|  | zx_status_t CloseNode() override; | 
|  |  | 
|  | #if defined(ENABLE_BLOBFS_NEW_PAGER) | 
|  | // PagedVnode protected overrides: | 
|  | void OnNoClones() override FS_TA_REQUIRES(mutex_); | 
|  | #endif | 
|  |  | 
|  | // blobfs::CacheNode implementation: | 
|  | BlobCache& Cache() final; | 
|  | bool ShouldCache() const final; | 
|  | void ActivateLowMemory() final FS_TA_EXCLUDES(mutex_); | 
|  |  | 
|  | void set_state(BlobState new_state) { state_ = new_state; }; | 
|  | BlobState state() const { return state_; } | 
|  |  | 
|  | // After writing the blob, marks the blob as readable. | 
|  | [[nodiscard]] zx_status_t MarkReadable(); | 
|  |  | 
|  | // Returns a handle to an event which will be signalled when | 
|  | // the blob is readable. | 
|  | // | 
|  | // Returns "ZX_OK" if successful, otherwise the error code | 
|  | // will indicate the failure status. | 
|  | zx_status_t GetReadableEvent(zx::event* out); | 
|  |  | 
|  | // Returns a clone of the blobfs VMO. | 
|  | // | 
|  | // Monitors the current VMO, keeping a reference to the Vnode | 
|  | // alive while the |out| VMO (and any clones it may have) are open. | 
|  | zx_status_t CloneDataVmo(zx_rights_t rights, zx::vmo* out_vmo, size_t* out_size); | 
|  |  | 
|  | // Receives notifications when all clones vended by CloneDataVmo() are released. | 
|  | // | 
|  | // TODO(fxbug.dev/51111) This is not used with the new pager. Remove this code when the transition | 
|  | // is complete. | 
|  | void HandleNoClones(async_dispatcher_t* dispatcher, async::WaitBase* wait, zx_status_t status, | 
|  | const zx_packet_signal_t* signal); | 
|  |  | 
|  | // Invokes |Purge()| if the vnode is purgeable. | 
|  | zx_status_t TryPurge(); | 
|  |  | 
|  | // Removes all traces of the vnode from blobfs. | 
|  | // The blob is not expected to be accessed again after this is called. | 
|  | zx_status_t Purge(); | 
|  |  | 
|  | // Schedules journal transaction prepared by PrepareWrite for the null blob. | 
|  | // Null blob doesn't have any data to write. They don't go through regular | 
|  | // Write()/WriteInternal path so we explicitly issue journaled write that | 
|  | // commits inode allocation and creation. | 
|  | zx_status_t WriteNullBlob(); | 
|  |  | 
|  | // If successful, allocates Blob Node and Blocks (in-memory) | 
|  | // kBlobStateEmpty --> kBlobStateDataWrite | 
|  | zx_status_t SpaceAllocate(uint32_t block_count); | 
|  |  | 
|  | // Writes to either the Merkle Tree or the Data section, | 
|  | // depending on the state. | 
|  | zx_status_t WriteInternal(const void* data, size_t len, size_t* actual); | 
|  |  | 
|  | // Reads from a blob. | 
|  | // Requires: kBlobStateReadable | 
|  | zx_status_t ReadInternal(void* data, size_t len, size_t off, size_t* actual); | 
|  |  | 
|  | // Loads the blob's data and merkle from disk, and initializes the data/merkle VMOs. | 
|  | // If paging is enabled, the data VMO will be pager-backed and lazily loaded and verified as the | 
|  | // client accesses the pages. | 
|  | // If paging is disabled, the entire data VMO is loaded in and verified. | 
|  | // | 
|  | // Idempotent. | 
|  | zx_status_t LoadVmosFromDisk(); | 
|  |  | 
|  | // Initializes the data VMO for writing.  Idempotent. | 
|  | zx_status_t PrepareDataVmoForWriting(); | 
|  |  | 
|  | // Commits all the data pages of the blob into memory, i.e. reads them from disk. | 
|  | zx_status_t CommitDataBuffer(); | 
|  |  | 
|  | // Verifies the integrity of the null blob (i.e. that its name is correct). Can only be called on | 
|  | // the null blob and will assert otherwise. | 
|  | zx_status_t VerifyNullBlob() const; | 
|  |  | 
|  | // Called by the Vnode once the last write has completed, updating the | 
|  | // on-disk metadata. | 
|  | zx_status_t WriteMetadata(BlobTransaction& transaction); | 
|  |  | 
|  | // Returns whether the data or merkle tree bytes are mapped and resident in memory. | 
|  | bool IsDataLoaded() const; | 
|  | bool IsMerkleTreeLoaded() const; | 
|  |  | 
|  | // Acquires a pointer to the mapped data or merkle tree. | 
|  | // May return nullptr if the mappings have not been initialized. | 
|  | void* GetDataBuffer() const; | 
|  | void* GetMerkleTreeBuffer(const BlobLayout& blob_layout) const; | 
|  |  | 
|  | // Commits the blob to persistent storage. | 
|  | zx_status_t Commit(); | 
|  |  | 
|  | // Returns the block size used by blobfs. | 
|  | uint32_t GetBlockSize() const; | 
|  |  | 
|  | // Write |block_count| blocks using the data from |producer| into |streamer|. | 
|  | zx_status_t WriteData(uint32_t block_count, BlobDataProducer& producer, | 
|  | fs::DataStreamer& streamer); | 
|  |  | 
|  | Blobfs* const blobfs_; | 
|  | BlobState state_ = BlobState::kEmpty; | 
|  | // True if this node should be unlinked when closed. | 
|  | bool deletable_ = false; | 
|  |  | 
|  | #if defined(ENABLE_BLOBFS_NEW_PAGER) | 
|  | // When paging we can dynamically notice that a blob is corrupt. The read operation will set this | 
|  | // flag if a corruption is encoutered at runtime and future operations will fail. | 
|  | // | 
|  | // This is used only in the new pager. In the old pager, this state is kept in | 
|  | // PageWatcher::is_corrupt_. | 
|  | bool is_corrupt_ FS_TA_GUARDED(mutex_) = false; | 
|  |  | 
|  | // In the old pager this is kepd in the PageWatcher. | 
|  | pager::UserPagerInfo pager_info_ FS_TA_GUARDED(mutex_); | 
|  | #endif | 
|  |  | 
|  | bool tearing_down_ = false; | 
|  |  | 
|  | enum class SyncingState : char { | 
|  | // The Blob is being streamed and it is not possible to generate the merkle root and metadata at | 
|  | // this point. | 
|  | kDataIncomplete, | 
|  |  | 
|  | // The blob merkle root is complete but the metadate write has not yet submitted to the | 
|  | // underlying media. | 
|  | kSyncing, | 
|  |  | 
|  | // The blob exists on the underlying media. | 
|  | kDone, | 
|  | }; | 
|  | // This value is marked kDone on the journal's background thread but read on the main thread so | 
|  | // is protected by the mutex. | 
|  | SyncingState syncing_state_ __TA_GUARDED(mutex_) = SyncingState::kDataIncomplete; | 
|  |  | 
|  | uint32_t map_index_ = 0; | 
|  |  | 
|  | // VMO mappings for the blob's merkle tree and data. | 
|  | // Data is stored in a separate VMO from the merkle tree for several reasons: | 
|  | //   - While data may be paged, the merkle tree (i.e. verification metadata) should always be | 
|  | //     retained. | 
|  | //   - VMO cloning when handing out a copy to read clients is simpler and requires no arithmetic. | 
|  | //   - Makes memory accounting more granular. | 
|  | // For small blobs, merkle_mapping_ may be absent, since small blobs may not have any stored | 
|  | // merkle tree. | 
|  | fzl::OwnedVmoMapper merkle_mapping_; | 
|  | fzl::OwnedVmoMapper data_mapping_; | 
|  |  | 
|  | #if !defined(ENABLE_BLOBFS_NEW_PAGER) | 
|  | // In the new pager the PagedVnode base class provides this function. Defining an identical | 
|  | // function for the old pager allows fewer divergences in this class' logic. | 
|  | bool has_clones() const { return !!clone_ref_; } | 
|  | #endif | 
|  |  | 
|  | // Watches any clones of "vmo_" provided to clients. Observes the ZX_VMO_ZERO_CHILDREN signal. | 
|  | // | 
|  | // TODO(fxbug.dev/51111) This is not used with the new pager. Remove this code when the transition | 
|  | // is complete. | 
|  | async::WaitMethod<Blob, &Blob::HandleNoClones> clone_watcher_; | 
|  |  | 
|  | // Keeps a reference to the blob alive (from within itself) until there are no cloned VMOs in | 
|  | // used. | 
|  | // | 
|  | // This RefPtr is only non-null when a client is using a cloned VMO, or there would be a clear | 
|  | // leak of Blob. | 
|  | // | 
|  | // TODO(fxbug.dev/51111) This is not used with the new pager. Remove this code when the transition | 
|  | // is complete. | 
|  | fbl::RefPtr<Blob> clone_ref_; | 
|  |  | 
|  | zx::event readable_event_; | 
|  |  | 
|  | // TODO(smklein): We are only using a few of these fields, such as: | 
|  | // - blob_size | 
|  | // - block_count | 
|  | // To save space, we could avoid holding onto the entire inode. | 
|  | Inode inode_ = {}; | 
|  |  | 
|  | // Data used exclusively during writeback. | 
|  | struct WriteInfo; | 
|  | std::unique_ptr<WriteInfo> write_info_; | 
|  |  | 
|  | #if !defined(ENABLE_BLOBFS_NEW_PAGER) | 
|  | // Reads in the blob's pages on demand. Used only in the old pager path. | 
|  | std::unique_ptr<pager::PageWatcher> page_watcher_; | 
|  | #endif | 
|  | }; | 
|  |  | 
|  | // Returns true if the given inode supports paging. | 
|  | bool SupportsPaging(const Inode& inode); | 
|  |  | 
|  | }  // namespace blobfs | 
|  |  | 
|  | #endif  // SRC_STORAGE_BLOBFS_BLOB_H_ |