blob: 7ffdce7f8b5907dc479d1f1a0c334ec5b1ce4ef7 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef ZIRCON_SYSTEM_ULIB_MINFS_VNODE_H_
#define ZIRCON_SYSTEM_ULIB_MINFS_VNODE_H_
#include <inttypes.h>
#include <memory>
#include <utility>
#ifdef __Fuchsia__
#include <fuchsia/io/llcpp/fidl.h>
#include <fuchsia/minfs/llcpp/fidl.h>
#include <lib/fzl/resizeable-vmo-mapper.h>
#include <lib/zx/vmo.h>
#include <fbl/auto_lock.h>
#include <fs/remote.h>
#include <fs/watcher.h>
#include "vmo_indirect.h"
#include "vnode_allocation.h"
#endif
#include <lib/zircon-internal/fnv1hash.h>
#include <fbl/algorithm.h>
#include <fbl/macros.h>
#include <fbl/ref_ptr.h>
#include <fs/locking.h>
#include <fs/ticker.h>
#include <fs/trace.h>
#include <fs/vfs.h>
#include <fs/vnode.h>
#include <minfs/format.h>
#include <minfs/minfs.h>
#include <minfs/transaction_limits.h>
#include <minfs/writeback.h>
#include "lazy_buffer.h"
#include "vnode_mapper.h"
namespace minfs {
// Used by fsck
class MinfsChecker;
class Minfs;
// An abstract Vnode class contains the following:
//
// - A VMO, holding the in-memory representation of data stored persistently.
// - An inode, holding the root of this node's metadata.
//
// This class is capable of writing, reading, and truncating the node's data
// in a linear block-address space.
#ifdef __Fuchsia__
class VnodeMinfs : public fs::Vnode,
public fbl::SinglyLinkedListable<VnodeMinfs*>,
public fbl::Recyclable<VnodeMinfs>,
llcpp::fuchsia::minfs::Minfs::Interface {
#else
class VnodeMinfs : public fs::Vnode,
public fbl::SinglyLinkedListable<VnodeMinfs*>,
public fbl::Recyclable<VnodeMinfs> {
#endif
public:
virtual ~VnodeMinfs();
// Allocates a new Vnode and initializes the in-memory inode structure given the type, where
// type is one of:
// - kMinfsTypeFile
// - kMinfsTypeDir
//
// Sets create / modify times of the new node.
// Does not allocate an inode number for the Vnode.
static void Allocate(Minfs* fs, uint32_t type, fbl::RefPtr<VnodeMinfs>* out);
// Allocates a Vnode, loading |ino| from storage.
//
// Doesn't update create / modify times of the node.
static void Recreate(Minfs* fs, ino_t ino, fbl::RefPtr<VnodeMinfs>* out);
bool IsUnlinked() const { return inode_.link_count == 0; }
const Inode* GetInode() const { return &inode_; }
Inode* GetMutableInode() { return &inode_; }
ino_t GetIno() const { return ino_; }
ino_t GetKey() const { return ino_; }
// Should only be called once for the VnodeMinfs lifecycle.
void SetIno(ino_t ino);
void SetNextInode(ino_t ino) { inode_.next_inode = ino; }
void SetLastInode(ino_t ino) { inode_.last_inode = ino; }
void AddLink() { inode_.link_count++; }
void MarkPurged() {
inode_.magic = kMinfsMagicPurged;
}
static size_t GetHash(ino_t key) { return fnv1a_tiny(key, kMinfsHashBits); }
// fs::Vnode interface (invoked publicly).
#ifdef __Fuchsia__
void HandleFsSpecificMessage(fidl_msg_t* msg, fidl::Transaction* txn) final;
#endif
using fs::Vnode::Open;
zx_status_t Open(ValidatedOptions options, fbl::RefPtr<Vnode>* out_redirect) final;
zx_status_t Close() final;
// fbl::Recyclable interface.
void fbl_recycle() override;
// Queries the underlying vnode to ask if it may be unlinked.
//
// If the response is not ZX_OK, operations to unlink (or rename on top of) this
// vnode will fail.
virtual zx_status_t CanUnlink() const = 0;
// Returns the current block count of the vnode.
virtual blk_t GetBlockCount() const = 0;
// Returns the total size of the vnode.
virtual uint64_t GetSize() const = 0;
// Returns if the node is a directory.
// TODO(fxb/39864): This function is used only within minfs to implement unlinking and renaming.
// Consider replacing this with the more general |Vnode::GetProtocols|.
virtual bool IsDirectory() const = 0;
// Sets the new size of the vnode.
// Should update the in-memory representation of the Vnode, but not necessarily
// write it out to persistent storage.
//
// TODO: Upgrade internal size to 64-bit integer.
virtual void SetSize(uint32_t new_size) = 0;
// Accesses a block in the vnode at |vmo_offset| relative to the start of the file,
// which was previously at the device offset |dev_offset|.
//
// If the block was not previously allocated, |dev_offset| is zero.
// |*out_dev_offset| must contain the new value of the device offset to use when writing
// to this part of the Vnode. By default, it is set to |dev_offset|.
//
// |*out_dev_offset| may be passed to |IssueWriteback| as |dev_offset|.
virtual void AcquireWritableBlock(Transaction* transaction, blk_t vmo_offset, blk_t dev_offset,
blk_t* out_dev_offset) = 0;
// Deletes the block at |vmo_offset| within the file, corresponding to on-disk block |dev_offset|
// (zero if unallocated). |indirect| specifies whether the block is a direct or indirect block.
virtual void DeleteBlock(PendingWork* transaction, blk_t vmo_offset, blk_t dev_offset,
bool indirect) = 0;
#ifdef __Fuchsia__
// Instructs the Vnode to write out |count| blocks of the vnode, starting at local
// offset |vmo_offset|, corresponding to on-disk offset |dev_offset|.
virtual void IssueWriteback(Transaction* transaction, blk_t vmo_offset, blk_t dev_offset,
blk_t count) = 0;
// Queries the node, returning |true| if the node has an in-flight operation on |vmo_offset|
// that has not yet been enqueued to the writeback pipeline.
virtual bool HasPendingAllocation(blk_t vmo_offset) = 0;
// Instructs the node to cancel all pending writeback operations that have not yet been
// enqueued to the writeback pipeline.
//
// This method is used exclusively when deleting nodes.
virtual void CancelPendingWriteback() = 0;
// Minfs FIDL interface.
void GetMetrics(GetMetricsCompleter::Sync completer) final;
void ToggleMetrics(bool enabled, ToggleMetricsCompleter::Sync completer) final;
void GetAllocatedRegions(GetAllocatedRegionsCompleter::Sync completer) final;
void GetMountState(GetMountStateCompleter::Sync completer) final;
#endif
Minfs* Vfs() { return fs_; }
// Local implementations of read, write, and truncate functions which
// may operate on either files or directories.
zx_status_t ReadInternal(PendingWork* transaction, void* data, size_t len, size_t off,
size_t* actual);
zx_status_t ReadExactInternal(PendingWork* transaction, void* data, size_t len, size_t off);
zx_status_t WriteInternal(Transaction* transaction, const void* data, size_t len, size_t off,
size_t* actual);
zx_status_t WriteExactInternal(Transaction* transaction, const void* data, size_t len,
size_t off);
zx_status_t TruncateInternal(Transaction* transaction, size_t len);
// TODO: The following methods should be made private:
#ifndef __Fuchsia__
// Reads the block at |bno| on disk.
void ReadIndirectBlock(blk_t bno, uint32_t* entry);
#endif
// Update the vnode's inode and write it to disk.
void InodeSync(PendingWork* transaction, uint32_t flags);
// Decrements the inode link count to a vnode.
// Writes the inode back to |transaction|.
//
// If the link count becomes zero, the node either:
// 1) Calls |Purge()| (if no open fds exist), or
// 2) Adds itself to the "unlinked list", to be purged later.
[[nodiscard]] zx_status_t RemoveInodeLink(Transaction* transaction);
#ifdef __Fuchsia__
VmoIndirect& vmo_indirect() { return vmo_indirect_; }
#endif
// Allocates an indirect block.
void AllocateIndirect(PendingWork* transaction, blk_t* block);
// Initializes (if necessary) and returns the indirect file.
[[nodiscard]] zx::status<LazyBuffer*> GetIndirectFile();
// TODO(smklein): These operations and members are protected as a historical artifact
// of "File + Directory + Vnode" being a single class. They should be transitioned to
// private.
protected:
VnodeMinfs(Minfs* fs);
// fs::Vnode interface.
zx_status_t GetAttributes(fs::VnodeAttributes* a) final;
zx_status_t SetAttributes(fs::VnodeAttributesUpdate a) final;
#ifdef __Fuchsia__
zx_status_t QueryFilesystem(llcpp::fuchsia::io::FilesystemInfo* out) final;
zx_status_t GetDevicePath(size_t buffer_len, char* out_name, size_t* out_len) final;
#endif
// Although file sizes don't need to be block-aligned, the underlying VMO is
// always kept at a size which is a multiple of |kMinfsBlockSize|.
//
// When a Vnode is truncated to a size larger than |inode_.size|, it is
// assumed that any space between |inode_.size| and the nearest block is
// filled with zeroes in the internal VMO. This function validates that
// assumption.
void ValidateVmoTail(uint64_t inode_size) const;
enum class BlockOp {
// Read skips unallocated indirect blocks, setting all output |bno| values to zero.
kRead,
// Delete avoids accessing indirect blocks, but additionally releases indirect blocks
// (and doubly indirect blocks) if all contained blocks have been freed.
//
// |out_dev_offset| must be zero for all callbacks invoked via this operation.
kDelete,
// Write ensures all indirect blocks are allocated before accessing the underlying |bno|.
// Acquiring a block via "kWrite" may cause additional writeback traffic to update
// the metadata itself.
kWrite,
// Swap is identical to write: It ensures all indirect blocks are allocated
// before being accessed.
kSwap,
};
// Callback for block operations. Called exclusively on "leaf node" blocks: indirect blocks
// are considered metadata, and handled internally by the "BlockOp" functions.
//
// |vmo_offset|: Block address relative to start of Vnode.
// |dev_offset|: Previous absolute block address at this node. Zero if unallocated.
// |out_dev_offset|: A new, optional output value. Set to |dev_offset| by default.
// Will alter the results of |bno| returned via |ApplyOperation|.
using BlockOpCallback =
fbl::Function<void(blk_t vmo_offset, blk_t dev_offset, blk_t* out_dev_offset)>;
// Arguments to invoke |callback| on all local nodes of the file in [start, start + count).
//
// Collects result blocks in |bnos|.
struct BlockOpArgs {
BlockOpArgs(Transaction* transaction, BlockOp op, BlockOpCallback callback, blk_t start,
blk_t count, blk_t* bnos)
: transaction(transaction),
op(op),
callback(std::move(callback)),
start(start),
count(count),
bnos(bnos) {
// Initialize output array to 0 in case the indirect block(s)
// containing these bnos do not exist.
if (bnos) {
memset(bnos, 0, sizeof(blk_t) * count);
}
}
Transaction* transaction;
BlockOp op;
BlockOpCallback callback;
blk_t start;
blk_t count;
blk_t* bnos;
};
class DirectArgs {
public:
DirectArgs(BlockOp op, blk_t* array, blk_t count, blk_t rel_bno, blk_t* bnos)
: array_(array), bnos_(bnos), count_(count), rel_bno_(rel_bno), op_(op), dirty_(false) {}
BlockOp GetOp() const { return op_; }
blk_t GetBno(blk_t index) const { return array_[index]; }
void SetBno(blk_t index, blk_t value) {
ZX_DEBUG_ASSERT(index < GetCount());
if (bnos_ != nullptr) {
bnos_[index] = value ? value : array_[index];
}
if (array_[index] != value) {
array_[index] = value;
dirty_ = true;
}
}
blk_t GetCount() const { return count_; }
blk_t GetRelativeBlock() const { return rel_bno_; }
bool IsDirty() const { return dirty_; }
protected:
blk_t* const array_; // array containing blocks to be operated on
blk_t* const bnos_; // array of |count| bnos returned to the user
const blk_t count_; // number of direct blocks to operate on
const blk_t rel_bno_; // The relative bno of the first direct block we are op'ing.
const BlockOp op_; // determines what operation to perform on blocks
bool dirty_; // true if blocks have successfully been op'd
};
class IndirectArgs : public DirectArgs {
public:
IndirectArgs(BlockOp op, blk_t* array, blk_t count, blk_t rel_bno, blk_t* bnos, blk_t bindex,
blk_t ib_vmo_offset)
: DirectArgs(op, array, count, rel_bno, bnos),
bindex_(bindex),
ib_vmo_offset_(ib_vmo_offset) {}
void SetDirty() { dirty_ = true; }
void SetBno(blk_t index, blk_t value) {
ZX_DEBUG_ASSERT(index < GetCount());
array_[index] = value;
SetDirty();
}
// Number of indirect blocks we need to iterate through to touch all |count| direct blocks.
blk_t GetCount() const {
return (bindex_ + count_ + kMinfsDirectPerIndirect - 1) / kMinfsDirectPerIndirect;
}
blk_t GetOffset() const { return ib_vmo_offset_; }
// Generate parameters for direct blocks in indirect block |ibindex|, which are contained
// in |barray|
DirectArgs GetDirect(blk_t* barray, unsigned ibindex) const;
protected:
const blk_t bindex_; // relative index of the first direct block within the first indirect
// block
const blk_t ib_vmo_offset_; // index of the first indirect block
};
class DindirectArgs : public IndirectArgs {
public:
DindirectArgs(BlockOp op, blk_t* array, blk_t count, blk_t rel_bno, blk_t* bnos, blk_t bindex,
blk_t ib_vmo_offset, blk_t ibindex, blk_t dib_vmo_offset)
: IndirectArgs(op, array, count, rel_bno, bnos, bindex, ib_vmo_offset),
ibindex_(ibindex),
dib_vmo_offset_(dib_vmo_offset) {}
// Number of doubly indirect blocks we need to iterate through to touch all |count| direct
// blocks.
blk_t GetCount() const {
return (ibindex_ + count_ + kMinfsDirectPerDindirect - 1) / kMinfsDirectPerDindirect;
}
blk_t GetOffset() const { return dib_vmo_offset_; }
// Generate parameters for indirect blocks in doubly indirect block |dibindex|, which are
// contained in |iarray|
IndirectArgs GetIndirect(blk_t* iarray, unsigned dibindex) const;
protected:
const blk_t ibindex_; // relative index of the first indirect block within the first
// doubly indirect block
const blk_t dib_vmo_offset_; // index of the first doubly indirect block
};
// Allocate an indirect or doubly indirect block at |offset| within the indirect vmo and clear
// the in-memory block array
// Assumes that vmo_indirect_ has already been initialized
void AllocateIndirect(Transaction* transaction, blk_t index, IndirectArgs* args);
// Perform operation |op| on blocks as specified by |params|
// The BlockOp methods should not be called directly
// All BlockOp methods assume that vmo_indirect_ has been grown to the required size
zx_status_t ApplyOperation(BlockOpArgs* params);
zx_status_t BlockOpDirect(BlockOpArgs* op_args, DirectArgs* params);
zx_status_t BlockOpIndirect(BlockOpArgs* op_args, IndirectArgs* params);
zx_status_t BlockOpDindirect(BlockOpArgs* op_args, DindirectArgs* params);
// Ensures that the indirect vmo is large enough to reference a block at
// relative block address |n| within the file.
zx_status_t EnsureIndirectVmoSize(blk_t n);
// Get the disk block 'bno' corresponding to the 'n' block
//
// May or may not allocate |bno|; certain Vnodes (like File) delay allocation
// until writeback, and will return a sentinel value of zero.
//
// TODO: Use types to represent that |bno|, as an output, is optional.
zx_status_t BlockGetWritable(Transaction* transaction, blk_t n, blk_t* bno);
// Get the disk block 'bno' corresponding to relative block address |n| within the file.
// Does not allocate any blocks, direct or indirect, to acquire this block.
zx_status_t BlockGetReadable(blk_t n, blk_t* bno);
// Deletes all blocks (relative to a file) from "start" (inclusive) to the end
// of the file. Does not update mtime/atime.
// This can be extended to return indices of deleted bnos, or to delete a specific number of
// bnos
zx_status_t BlocksShrink(Transaction* transaction, blk_t start);
// Deletes this Vnode from disk, freeing the inode and blocks.
//
// Must only be called on Vnodes which
// - Have no open fds
// - Are fully unlinked (link count == 0)
void Purge(Transaction* transaction);
#ifdef __Fuchsia__
zx_status_t GetNodeInfoForProtocol(fs::VnodeProtocol protocol, fs::Rights rights,
fs::VnodeRepresentation* info) final;
void Sync(SyncCallback closure) final;
zx_status_t AttachRemote(fs::MountChannel h) final;
zx_status_t InitVmo(PendingWork* transaction);
// Use the watcher container to implement a directory watcher
void Notify(fbl::StringPiece name, unsigned event) final;
zx_status_t WatchDir(fs::Vfs* vfs, uint32_t mask, uint32_t options, zx::channel watcher) final;
#else // !__Fuchsia__
// Clears the block at |bno| on disk.
void ClearIndirectBlock(blk_t bno);
#endif
uint32_t FdCount() const { return fd_count_; }
Minfs* const fs_;
#ifdef __Fuchsia__
// TODO(smklein): When we have can register MinFS as a pager service, and
// it can properly handle pages faults on a vnode's contents, then we can
// avoid reading the entire file up-front. Until then, read the contents of
// a VMO into memory when it is read/written.
zx::vmo vmo_{};
uint64_t vmo_size_ = 0;
VmoIndirect vmo_indirect_;
storage::Vmoid vmoid_;
fs::WatcherContainer watcher_{};
#endif
// vnode_mapper.cc explains what this is and the code there is responsible for manipulating it. It
// is created on-demand.
std::unique_ptr<LazyBuffer> indirect_file_;
ino_t ino_{};
// DataBlockAssigner may modify this field asynchronously, so a valid Transaction object must
// be held before accessing it.
Inode inode_{};
// This field tracks the current number of file descriptors with
// an open reference to this Vnode. Notably, this is distinct from the
// VnodeMinfs's own refcount, since there may still be filesystem
// work to do after the last file descriptor has been closed.
uint32_t fd_count_{};
};
// Given vnode block offset returns the indirect vmo size needed to hold the
// vnode block map.
// Note: Vnode block offset 0 cover vnode size from 0 to 8192 bytes.
// offset 1 covers vnode size from 8193 to 16384 bytes. So on.
// Vnode block offset is different from vnode size in blocks.
// TODO(43586).
uint64_t VnodeBlockOffsetToIndirectVmoSize(blk_t vnode_block_offset);
} // namespace minfs
#endif // ZIRCON_SYSTEM_ULIB_MINFS_VNODE_H_