| // Copyright 2016 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // This file describes the in-memory structures which construct |
| // a MinFS filesystem. |
| |
| #pragma once |
| |
| #include <utility> |
| |
| #include <inttypes.h> |
| |
| #ifdef __Fuchsia__ |
| #include <fbl/auto_lock.h> |
| #include <fs/managed-vfs.h> |
| #include <fs/remote.h> |
| #include <fs/watcher.h> |
| #include <fuchsia/io/c/fidl.h> |
| #include <fuchsia/minfs/c/fidl.h> |
| #include <lib/fzl/resizeable-vmo-mapper.h> |
| #include <lib/sync/completion.h> |
| #include <lib/zx/vmo.h> |
| #include <minfs/metrics.h> |
| #include <minfs/writeback-async.h> |
| #endif |
| |
| #include <fbl/algorithm.h> |
| #include <fbl/function.h> |
| #include <fbl/intrusive_hash_table.h> |
| #include <fbl/intrusive_single_list.h> |
| #include <fbl/macros.h> |
| #include <fbl/ref_ptr.h> |
| #include <fbl/unique_ptr.h> |
| #include <fs/block-txn.h> |
| #include <fs/locking.h> |
| #include <fs/ticker.h> |
| #include <fs/trace.h> |
| #include <fs/vfs.h> |
| #include <fs/vnode.h> |
| #include <lib/zircon-internal/fnv1hash.h> |
| #include <minfs/format.h> |
| #include <minfs/minfs.h> |
| #include <minfs/superblock.h> |
| #include <minfs/transaction-limits.h> |
| #include <minfs/writeback.h> |
| |
| #ifdef __Fuchsia__ |
| #include "vnode-allocation.h" |
| #include "work-queue.h" |
| #endif |
| |
| #include "allocator/allocator.h" |
| #include "allocator/inode-manager.h" |
| #include "vnode.h" |
| |
| constexpr uint32_t kExtentCount = 6; |
| |
| // A compile-time debug check, which, if enabled, causes |
| // inline functions to be expanded to error checking code. |
| // Since this may be expensive, it is typically turned |
| // off, except for debugging. |
| // #define MINFS_PARANOID_MODE |
| |
| namespace minfs { |
| #ifdef __Fuchsia__ |
| using BlockRegion = fuchsia_minfs_BlockRegion; |
| |
| // Validate that |vmo| is large enough to access block |blk|, |
| // relative to the start of the vmo. |
| inline void ValidateVmoSize(zx_handle_t vmo, blk_t blk) { |
| #ifdef MINFS_PARANOID_MODE |
| uint64_t size; |
| size_t min = (blk + 1) * kMinfsBlockSize; |
| ZX_ASSERT(zx_vmo_get_size(vmo, &size) == ZX_OK); |
| ZX_ASSERT_MSG(size >= min, "VMO size %" PRIu64 " too small for access at block %u\n", |
| size, blk); |
| #endif // MINFS_PARANOID_MODE |
| } |
| #endif // __Fuchsia__ |
| |
| // SyncVnode flags |
| constexpr uint32_t kMxFsSyncDefault = 0; // default: no implicit time update |
| constexpr uint32_t kMxFsSyncMtime = (1 << 0); |
| constexpr uint32_t kMxFsSyncCtime = (1 << 1); |
| |
| constexpr uint32_t kMinfsBlockCacheSize = 64; |
| |
| // Used by fsck |
| class MinfsChecker; |
| class VnodeMinfs; |
| |
| using SyncCallback = fs::Vnode::SyncCallback; |
| |
| #ifndef __Fuchsia__ |
| // Store start block + length for all extents. These may differ from info block for |
| // sparse files. |
| class BlockOffsets { |
| public: |
| BlockOffsets(const Bcache& bc, const SuperblockManager& sb); |
| |
| blk_t IbmStartBlock() const { return ibm_start_block_; } |
| blk_t IbmBlockCount() const { return ibm_block_count_; } |
| |
| blk_t AbmStartBlock() const { return abm_start_block_; } |
| blk_t AbmBlockCount() const { return abm_block_count_; } |
| |
| blk_t InoStartBlock() const { return ino_start_block_; } |
| blk_t InoBlockCount() const { return ino_block_count_; } |
| |
| blk_t JournalStartBlock() const { return journal_start_block_; } |
| blk_t JournalBlockCount() const { return journal_block_count_; } |
| |
| blk_t DatStartBlock() const { return dat_start_block_; } |
| blk_t DatBlockCount() const { return dat_block_count_; } |
| |
| private: |
| blk_t ibm_start_block_; |
| blk_t ibm_block_count_; |
| |
| blk_t abm_start_block_; |
| blk_t abm_block_count_; |
| |
| blk_t ino_start_block_; |
| blk_t ino_block_count_; |
| |
| blk_t journal_start_block_; |
| blk_t journal_block_count_; |
| |
| blk_t dat_start_block_; |
| blk_t dat_block_count_; |
| }; |
| #endif |
| |
| class TransactionalFs { |
| public: |
| #ifdef __Fuchsia__ |
| virtual fbl::Mutex* GetLock() const = 0; |
| |
| void EnqueueCallback(SyncCallback callback) { |
| fbl::unique_ptr<WritebackWork> work(new WritebackWork(GetMutableBcache())); |
| work->SetSyncCallback(std::move(callback)); |
| EnqueueWork(std::move(work)); |
| } |
| #endif |
| // Begin a transaction with |reserve_inodes| inodes and |reserve_blocks| blocks reserved. |
| virtual zx_status_t BeginTransaction(size_t reserve_inodes, size_t reserve_blocks, |
| fbl::unique_ptr<Transaction>* transaction_out) = 0; |
| |
| // Enqueues a WritebackWork for processing. |
| virtual zx_status_t EnqueueWork(fbl::unique_ptr<WritebackWork> work) = 0; |
| |
| // Enqueues a metadata transaction by persisting its contents to disk. |
| virtual zx_status_t CommitTransaction(fbl::unique_ptr<Transaction> transaction) = 0; |
| |
| virtual Bcache* GetMutableBcache() = 0; |
| }; |
| |
| class InspectableFilesystem { |
| public: |
| virtual ~InspectableFilesystem() {} |
| |
| // Returns an immutable reference to the superblock. |
| virtual const Superblock& Info() const = 0; |
| |
| // Gets an immutable reference to the InodeManager. |
| virtual const InspectableInodeManager* GetInodeManager() const = 0; |
| |
| // Gets an immutable reference to the block_allocator. |
| virtual const Allocator* GetBlockAllocator() const = 0; |
| |
| // Reads a block at the |start_block_num| location. |
| virtual zx_status_t ReadBlock(blk_t start_block_num, void* out_data) const = 0; |
| |
| #ifndef __Fuchsia__ |
| // Gets an immutable copy of offsets_. |
| virtual const BlockOffsets GetBlockOffsets() const = 0; |
| #endif |
| }; |
| |
| class Minfs : |
| #ifdef __Fuchsia__ |
| public fs::ManagedVfs, |
| #else |
| public fs::Vfs, |
| #endif |
| public fbl::RefCounted<Minfs>, public TransactionalFs, public InspectableFilesystem { |
| public: |
| DISALLOW_COPY_ASSIGN_AND_MOVE(Minfs); |
| |
| ~Minfs(); |
| |
| static zx_status_t Create(fbl::unique_ptr<Bcache> bc, const Superblock* info, |
| fbl::unique_ptr<Minfs>* out, IntegrityCheck checks); |
| |
| #ifdef __Fuchsia__ |
| // Initializes the Minfs writeback queue and resolves any pending disk state (e.g., resolving |
| // unlinked nodes). |
| zx_status_t InitializeWriteback(); |
| |
| // Queries the underlying FVM, if it exists. |
| zx_status_t FVMQuery(fuchsia_hardware_block_volume_VolumeInfo* info) const; |
| #endif |
| |
| // instantiate a vnode from an inode |
| // the inode must exist in the file system |
| zx_status_t VnodeGet(fbl::RefPtr<VnodeMinfs>* out, ino_t ino); |
| |
| // instantiate a vnode with a new inode |
| zx_status_t VnodeNew(Transaction* transaction, fbl::RefPtr<VnodeMinfs>* out, uint32_t type); |
| |
| // Insert, lookup, and remove vnode from hash map. |
| void VnodeInsert(VnodeMinfs* vn) FS_TA_EXCLUDES(hash_lock_); |
| fbl::RefPtr<VnodeMinfs> VnodeLookup(uint32_t ino) FS_TA_EXCLUDES(hash_lock_); |
| void VnodeRelease(VnodeMinfs* vn) FS_TA_EXCLUDES(hash_lock_); |
| |
| // Allocate a new data block. |
| void BlockNew(Transaction* transaction, blk_t* out_bno); |
| |
| // Set/Unset the flags. |
| void UpdateFlags(Transaction* transaction, uint32_t flags, bool set); |
| |
| // Mark |in_bno| for de-allocation (if it is > 0), and return a new block |*out_bno|. |
| // The swap will not be persisted until the transaction is commited. |
| void BlockSwap(Transaction* transaction, blk_t in_bno, blk_t* out_bno); |
| |
| // Free a data block. |
| void BlockFree(Transaction* transaction, blk_t bno); |
| |
| // Free ino in inode bitmap, release all blocks held by inode. |
| zx_status_t InoFree(Transaction* transaction, VnodeMinfs* vn); |
| |
| // Mark |vn| to be unlinked. |
| void AddUnlinked(Transaction* transaction, VnodeMinfs* vn); |
| |
| // Remove |vn| from the list of unlinked vnodes. |
| void RemoveUnlinked(Transaction* transaction, VnodeMinfs* vn); |
| |
| // Free resources of all vnodes marked unlinked. |
| zx_status_t PurgeUnlinked(); |
| |
| // Writes back an inode into the inode table on persistent storage. |
| // Does not modify inode bitmap. |
| void InodeUpdate(WriteTxn* transaction, ino_t ino, const Inode* inode) { |
| inodes_->Update(transaction, ino, inode); |
| } |
| |
| // Reads an inode from the inode table into memory. |
| void InodeLoad(ino_t ino, Inode* out) const { |
| inodes_->Load(ino, out); |
| } |
| |
| void ValidateBno(blk_t bno) const { |
| ZX_DEBUG_ASSERT(bno != 0); |
| ZX_DEBUG_ASSERT(bno < Info().block_count); |
| } |
| |
| zx_status_t BeginTransaction(size_t reserve_inodes, size_t reserve_blocks, |
| fbl::unique_ptr<Transaction>* transaction) __WARN_UNUSED_RESULT; |
| |
| zx_status_t EnqueueWork(fbl::unique_ptr<WritebackWork> work) final __WARN_UNUSED_RESULT; |
| |
| void EnqueueAllocation(fbl::unique_ptr<Transaction> transaction); |
| |
| // Complete a transaction by enqueueing its WritebackWork to the WritebackQueue. |
| zx_status_t CommitTransaction(fbl::unique_ptr<Transaction> transaction) final |
| __WARN_UNUSED_RESULT; |
| |
| #ifdef __Fuchsia__ |
| // Hands off a work unit to be completed by the "data assigner" thread. |
| void EnqueueDataTask(TaskCallback callback) { |
| assigner_->EnqueueCallback(std::move(callback)); |
| } |
| |
| // Returns the capacity of the writeback buffer, in blocks. |
| size_t WritebackCapacity() const { |
| ZX_DEBUG_ASSERT(writeback_ != nullptr); |
| return writeback_->GetCapacity(); |
| } |
| |
| void SetUnmountCallback(fbl::Closure closure) { on_unmount_ = std::move(closure); } |
| void Shutdown(fs::Vfs::ShutdownCallback cb) final; |
| |
| // Returns a unique identifier for this instance. |
| uint64_t GetFsId() const { return fs_id_; } |
| |
| // Signals the completion object as soon as... |
| // (1) A sync probe has entered and exited the writeback queue, and |
| // (2) The block cache has sync'd with the underlying block device. |
| void Sync(SyncCallback closure); |
| #endif |
| |
| // The following methods are used to read one block from the specified extent, |
| // from relative block |bno|. |
| // |data| is an out parameter that must be a block in size, provided by the caller |
| // These functions are single-block and synchronous. On Fuchsia, using the batched read |
| // functions is preferred. |
| zx_status_t ReadDat(blk_t bno, void* data); |
| |
| void SetMetrics(bool enable) { |
| #ifdef __Fuchsia__ |
| metrics_.SetEnable(enable); |
| #endif |
| } |
| fs::Ticker StartTicker() { |
| #ifdef __Fuchsia__ |
| return fs::Ticker(metrics_.Enabled()); |
| #endif |
| return fs::Ticker(true); |
| } |
| |
| // Update aggregate information about VMO initialization. |
| void UpdateInitMetrics(uint32_t dnum_count, uint32_t inum_count, |
| uint32_t dinum_count, uint64_t user_data_size, |
| const fs::Duration& duration); |
| // Update aggregate information about looking up vnodes by name. |
| void UpdateLookupMetrics(bool success, const fs::Duration& duration); |
| // Update aggregate information about looking up vnodes by inode. |
| void UpdateOpenMetrics(bool cache_hit, const fs::Duration& duration); |
| // Update aggregate information about inode creation. |
| void UpdateCreateMetrics(bool success, const fs::Duration& duration); |
| // Update aggregate information about reading from Vnodes. |
| void UpdateReadMetrics(uint64_t size, const fs::Duration& duration); |
| // Update aggregate information about writing to Vnodes. |
| void UpdateWriteMetrics(uint64_t size, const fs::Duration& duration); |
| // Update aggregate information about truncating Vnodes. |
| void UpdateTruncateMetrics(const fs::Duration& duration); |
| // Update aggregate information about unlinking Vnodes. |
| void UpdateUnlinkMetrics(bool success, const fs::Duration& duration); |
| // Update aggregate information about renaming Vnodes. |
| void UpdateRenameMetrics(bool success, const fs::Duration& duration); |
| |
| #ifdef __Fuchsia__ |
| // Acquire a copy of the collected metrics. |
| zx_status_t GetMetrics(fuchsia_minfs_Metrics* out) const { |
| #ifdef __Fuchsia__ |
| if (metrics_.Enabled()) { |
| metrics_.CopyToFidl(out); |
| return ZX_OK; |
| } |
| #endif |
| return ZX_ERR_UNAVAILABLE; |
| } |
| |
| // Record the location, size, and number of all non-free block regions. |
| fbl::Vector<BlockRegion> GetAllocatedRegions() const; |
| #endif |
| |
| // InspectableFilesystem interface. |
| const Superblock& Info() const final { |
| return sb_->Info(); |
| } |
| |
| const InspectableInodeManager* GetInodeManager() const final { |
| return inodes_.get(); |
| } |
| |
| const Allocator* GetBlockAllocator() const final { |
| return block_allocator_.get(); |
| } |
| |
| #ifndef __Fuchsia__ |
| const BlockOffsets GetBlockOffsets() const final { |
| return offsets_; |
| } |
| #endif |
| |
| zx_status_t ReadBlock(blk_t start_block_num, void* data) const final; |
| |
| const TransactionLimits& Limits() const { |
| return limits_; |
| } |
| |
| #ifdef __Fuchsia__ |
| fbl::Mutex* GetLock() const final { return &txn_lock_; } |
| #endif |
| |
| Bcache* GetMutableBcache() final { return bc_.get(); } |
| |
| // TODO(rvargas): Make private. |
| fbl::unique_ptr<Bcache> bc_; |
| |
| private: |
| using HashTable = fbl::HashTable<ino_t, VnodeMinfs*>; |
| |
| #ifdef __Fuchsia__ |
| Minfs(fbl::unique_ptr<Bcache> bc, fbl::unique_ptr<SuperblockManager> sb, |
| fbl::unique_ptr<Allocator> block_allocator, |
| fbl::unique_ptr<InodeManager> inodes, |
| uint64_t fs_id); |
| #else |
| Minfs(fbl::unique_ptr<Bcache> bc, fbl::unique_ptr<SuperblockManager> sb, |
| fbl::unique_ptr<Allocator> block_allocator, |
| fbl::unique_ptr<InodeManager> inodes, BlockOffsets offsets); |
| #endif |
| |
| // Internal version of VnodeLookup which may also return unlinked vnodes. |
| fbl::RefPtr<VnodeMinfs> VnodeLookupInternal(uint32_t ino) FS_TA_EXCLUDES(hash_lock_); |
| |
| // Check if filesystem is readonly. |
| bool IsReadonly() FS_TA_EXCLUDES(vfs_lock_); |
| |
| // Find a free inode, allocate it in the inode bitmap, and write it back to disk |
| void InoNew(Transaction* transaction, const Inode* inode, ino_t* out_ino); |
| |
| // Enqueues an update to the super block. |
| void WriteInfo(WriteTxn* transaction); |
| |
| // Find an unallocated and unreserved block in the block bitmap starting from block |start| |
| zx_status_t FindBlock(size_t start, size_t* blkno_out); |
| |
| // Creates an unique identifier for this instance. This is to be called only during |
| // "construction". |
| static zx_status_t CreateFsId(uint64_t* out); |
| |
| #ifndef __Fuchsia__ |
| zx_status_t ReadBlk(blk_t bno, blk_t start, blk_t soft_max, blk_t hard_max, void* data); |
| #endif |
| |
| // Global information about the filesystem. |
| // While Allocator is thread-safe, it is recommended that a valid Transaction object be held |
| // while any metadata fields are modified until the time they are enqueued for writeback. This |
| // is to avoid modifications from other threads potentially jeopardizing the metadata integrity |
| // before it is safely persisted to disk. |
| fbl::unique_ptr<SuperblockManager> sb_; |
| fbl::unique_ptr<Allocator> block_allocator_; |
| fbl::unique_ptr<InodeManager> inodes_; |
| |
| #ifdef __Fuchsia__ |
| mutable fbl::Mutex txn_lock_; // Lock required to start a new Transaction. |
| fbl::Mutex hash_lock_; // Lock required to access the vnode_hash_. |
| #endif |
| // Vnodes exist in the hash table as long as one or more reference exists; |
| // when the Vnode is deleted, it is immediately removed from the map. |
| HashTable vnode_hash_ FS_TA_GUARDED(hash_lock_){}; |
| |
| #ifdef __Fuchsia__ |
| fbl::Closure on_unmount_{}; |
| MinfsMetrics metrics_ = {}; |
| fbl::unique_ptr<WritebackQueue> writeback_; |
| fbl::unique_ptr<WorkQueue> assigner_; |
| uint64_t fs_id_ = 0; |
| #else |
| // Store start block + length for all extents. These may differ from info block for |
| // sparse files. |
| BlockOffsets offsets_; |
| #endif |
| |
| TransactionLimits limits_; |
| }; |
| |
| // Return the block offset in vmo_indirect_ of indirect blocks pointed to by the doubly indirect |
| // block at dindex |
| constexpr uint32_t GetVmoOffsetForIndirect(uint32_t dibindex) { |
| return kMinfsIndirect + kMinfsDoublyIndirect + (dibindex * kMinfsDirectPerIndirect); |
| } |
| |
| // Return the required vmo size (in bytes) to store indirect blocks pointed to by doubly indirect |
| // block dibindex |
| constexpr size_t GetVmoSizeForIndirect(uint32_t dibindex) { |
| return GetVmoOffsetForIndirect(dibindex + 1) * kMinfsBlockSize; |
| } |
| |
| // Return the block offset of doubly indirect blocks in vmo_indirect_ |
| constexpr uint32_t GetVmoOffsetForDoublyIndirect(uint32_t dibindex) { |
| ZX_DEBUG_ASSERT(dibindex < kMinfsDoublyIndirect); |
| return kMinfsIndirect + dibindex; |
| } |
| |
| // Return the required vmo size (in bytes) to store doubly indirect blocks in vmo_indirect_ |
| constexpr size_t GetVmoSizeForDoublyIndirect() { |
| return (kMinfsIndirect + kMinfsDoublyIndirect) * kMinfsBlockSize; |
| } |
| |
| // write the inode data of this vnode to disk (default does not update time values) |
| void SyncVnode(fbl::RefPtr<VnodeMinfs> vn, uint32_t flags); |
| void DumpInfo(const Superblock* info); |
| void DumpInode(const Inode* inode, ino_t ino); |
| zx_time_t GetTimeUTC(); |
| void InitializeDirectory(void* bdata, ino_t ino_self, ino_t ino_parent); |
| |
| // Given an input bcache, initialize the filesystem and return a reference to the |
| // root node. |
| zx_status_t Mount(fbl::unique_ptr<minfs::Bcache> bc, const MountOptions& options, |
| fbl::RefPtr<VnodeMinfs>* root_out); |
| } // namespace minfs |