blob: dfa98d8d3142db030b7146624394e28df8c788f2 [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fbl/algorithm.h>
#include <fbl/auto_call.h>
#include <fbl/string_piece.h>
#include <fs/block-txn.h>
#include <safemath/checked_math.h>
#include <zircon/device/vfs.h>
#include <zircon/time.h>
#ifdef __Fuchsia__
#include <lib/fdio/vfs.h>
#include <lib/fidl-utils/bind.h>
#include <fbl/auto_lock.h>
#include <zircon/syscalls.h>
#include <utility>
#endif
#include "file.h"
#include "minfs-private.h"
#include "vnode.h"
namespace minfs {
File::File(Minfs* fs) : VnodeMinfs(fs) {}
File::~File() {
#ifdef __Fuchsia__
ZX_DEBUG_ASSERT_MSG(allocation_state_.GetNodeSize() == inode_.size,
"File being destroyed with pending updates to the inode size");
#endif
}
#ifdef __Fuchsia__
void File::AllocateData() {
// Calculate the maximum number of data blocks we can update within one transaction. This is
// the smallest between half the capacity of the writeback buffer, and the number of direct
// blocks needed to touch the maximum allowed number of indirect blocks.
const uint32_t max_direct_blocks =
kMinfsDirect + (kMinfsDirectPerIndirect * fs_->Limits().GetMaximumMetaDataBlocks());
const uint32_t max_writeback_blocks = static_cast<blk_t>(fs_->WritebackCapacity() / 2);
const uint32_t max_blocks = fbl::min(max_direct_blocks, max_writeback_blocks);
fbl::Array<blk_t> allocated_blocks(new blk_t[max_blocks], max_blocks);
// Iterate through all relative block ranges and acquire absolute blocks for each of them.
while (true) {
fbl::unique_ptr<Transaction> transaction;
ZX_ASSERT(fs_->BeginTransaction(0, 0, &transaction) == ZX_OK);
blk_t expected_blocks = allocation_state_.GetTotalPending();
if (expected_blocks == 0) {
if (inode_.size != allocation_state_.GetNodeSize()) {
inode_.size = allocation_state_.GetNodeSize();
ValidateVmoTail(inode_.size);
InodeSync(transaction->GetWork(), kMxFsSyncMtime);
__UNUSED zx_status_t status = fs_->CommitTransaction(std::move(transaction));
}
// Since we may have pending reservations from an expected update, reset the allocation
// state. This may happen if the same block range is allocated and de-allocated (e.g.
// written and truncated) before the state is resolved.
ZX_ASSERT(allocation_state_.GetNodeSize() == inode_.size);
allocation_state_.Reset(allocation_state_.GetNodeSize());
ZX_DEBUG_ASSERT(allocation_state_.IsEmpty());
// Stop processing if we have not found any data blocks to update.
break;
}
blk_t bno_start, bno_count;
ZX_ASSERT(allocation_state_.GetNextRange(&bno_start, &bno_count) == ZX_OK);
// Transfer reserved blocks from the vnode's allocation state to the current Transaction.
transaction->MergeBlockPromise(allocation_state_.GetPromise());
// Write to data blocks must be done in a separate transaction from the metadata updates to
// ensure that all user data goes out to disk before associated metadata.
transaction->InitDataWork();
if (bno_start + bno_count >= kMinfsDirect) {
// Calculate the number of pre-indirect blocks. These will not factor into the number
// of indirect blocks being touched, and can be added back at the end.
blk_t pre_indirect = bno_start < kMinfsDirect ? kMinfsDirect - bno_start : 0;
// First direct block managed by an indirect block.
blk_t indirect_start = bno_start - fbl::min(bno_start, kMinfsDirect);
// Index of that direct block within the indirect block.
blk_t indirect_index = indirect_start % kMinfsDirectPerIndirect;
// The maximum number of direct blocks that can be updated without touching beyond the
// maximum indirect blocks. This includes any direct blocks prior to the indirect
// section.
blk_t relative_direct_max = max_direct_blocks - kMinfsDirect - indirect_index
+ pre_indirect;
// Determine actual max count between the indirect and writeback constraints.
blk_t max_count = fbl::min(relative_direct_max, max_writeback_blocks);
// Subtract direct blocks contained within the same indirect block before our starting
// point to ensure that we do not go beyond the maximum number of indirect blocks.
bno_count = fbl::min(bno_count, max_count);
}
ZX_ASSERT(bno_count <= max_blocks);
// Since we reserved enough space ahead of time, this should not fail.
ZX_ASSERT(BlocksSwap(transaction.get(), bno_start, bno_count, &allocated_blocks[0]) ==
ZX_OK);
// Enqueue each data block one at a time, as they may not be contiguous on disk.
for (blk_t i = 0; i < bno_count; i++) {
transaction->GetDataWork()->Enqueue(vmo_.get(), bno_start + i,
allocated_blocks[i] + fs_->Info().dat_block, 1);
}
transaction->GetDataWork()->PinVnode(fbl::WrapRefPtr(this));
// Enqueue may fail if we are in a readonly state, but we should continue resolving all
// pending allocations.
__UNUSED zx_status_t status = fs_->EnqueueWork(transaction->RemoveDataWork());
// Since we are updating the file in "chunks", only update the on-disk inode size
// with the portion we've written so far.
blk_t last_byte = (bno_start + bno_count) * kMinfsBlockSize;
ZX_ASSERT(last_byte <= fbl::round_up(allocation_state_.GetNodeSize(), kMinfsBlockSize));
if (last_byte > inode_.size && last_byte < allocation_state_.GetNodeSize()) {
// If we have written past the end of the recorded size but have not yet reached the
// allocated size, update the recorded size to the last byte written.
inode_.size = last_byte;
} else if (allocation_state_.GetNodeSize() <= last_byte) {
// If we have just written to the allocated inode size, update the recorded size
// accordingly.
inode_.size = allocation_state_.GetNodeSize();
}
ValidateVmoTail(inode_.size);
InodeSync(transaction->GetWork(), kMxFsSyncMtime);
// In the future we could resolve on a per state (i.e. promise) basis, but since swaps are
// currently only made within a single thread, for now it is okay to resolve everything.
transaction->GetWork()->PinVnode(fbl::WrapRefPtr(this));
transaction->Resolve();
// Return remaining reserved blocks back to the allocation state.
blk_t bno_remaining = expected_blocks - bno_count;
transaction->GiveBlocksToPromise(bno_remaining, allocation_state_.GetPromise());
// Commit may fail if we are in a readonly state, but we should continue resolving all
// pending allocations.
status = fs_->CommitTransaction(std::move(transaction));
}
}
zx_status_t File::BlocksSwap(Transaction* transaction, blk_t start, blk_t count, blk_t* bnos) {
auto block_callback = [this, transaction](blk_t local_bno, blk_t old_bno, blk_t* out_bno) {
ZX_DEBUG_ASSERT(allocation_state_.IsPending(local_bno));
if (old_bno == 0) {
inode_.block_count++;
}
// For copy-on-write, swap the block out if it's a data block.
fs_->BlockSwap(transaction, old_bno, out_bno);
bool cleared = allocation_state_.ClearPending(local_bno, old_bno != 0);
ZX_DEBUG_ASSERT(cleared);
};
BlockOpArgs op_args(transaction, BlockOp::kSwap, std::move(block_callback), start, count, bnos);
return ApplyOperation(&op_args);
}
#endif
blk_t File::GetBlockCount() const {
#ifdef __Fuchsia__
return inode_.block_count + allocation_state_.GetNewPending();
#else
return inode_.block_count;
#endif
}
uint64_t File::GetSize() const {
#ifdef __Fuchsia__
return allocation_state_.GetNodeSize();
#endif
return inode_.size;
}
void File::SetSize(uint32_t new_size) {
#ifdef __Fuchsia__
allocation_state_.SetNodeSize(new_size);
#else
inode_.size = new_size;
#endif
}
void File::AcquireWritableBlock(Transaction* transaction, blk_t local_bno, blk_t old_bno,
blk_t* out_bno) {
bool using_new_block = (old_bno == 0);
#ifdef __Fuchsia__
allocation_state_.SetPending(local_bno, !using_new_block);
#else
if (using_new_block) {
fs_->BlockNew(transaction, out_bno);
inode_.block_count++;
} else {
*out_bno = old_bno;
}
#endif
}
void File::DeleteBlock(Transaction* transaction, blk_t local_bno, blk_t old_bno) {
// If we found a block that was previously allocated, delete it.
if (old_bno != 0) {
fs_->BlockFree(transaction, old_bno);
inode_.block_count--;
}
#ifdef __Fuchsia__
// Remove this block from the pending allocation map in case it's set so we do not
// proceed to allocate a new block.
allocation_state_.ClearPending(local_bno, old_bno != 0);
#endif
}
#ifdef __Fuchsia__
void File::IssueWriteback(Transaction* transaction, blk_t vmo_offset, blk_t dev_offset,
blk_t block_count) {
ZX_ASSERT(transaction != nullptr);
AllocatorPromise block_promise;
transaction->GiveBlocksToPromise(block_count, &block_promise);
block_promise.GiveBlocks(block_count, allocation_state_.GetPromise());
}
bool File::HasPendingAllocation(blk_t vmo_offset) {
return allocation_state_.IsPending(vmo_offset);
}
void File::CancelPendingWriteback() {
// Drop all pending writes, revert the size of the inode to the "pre-pending-write" size.
allocation_state_.Reset(inode_.size);
}
#endif
zx_status_t File::CanUnlink() const {
return ZX_OK;
}
zx_status_t File::ValidateFlags(uint32_t flags) {
FS_TRACE_DEBUG("File::ValidateFlags(0x%x) vn=%p(#%u)\n", flags, this, GetIno());
if (flags & ZX_FS_FLAG_DIRECTORY) {
return ZX_ERR_NOT_DIR;
}
return ZX_OK;
}
zx_status_t File::Read(void* data, size_t len, size_t off, size_t* out_actual) {
TRACE_DURATION("minfs", "File::Read", "ino", GetIno(), "len", len, "off", off);
ZX_DEBUG_ASSERT_MSG(FdCount() > 0, "Reading from ino with no fds open");
FS_TRACE_DEBUG("minfs_read() vn=%p(#%u) len=%zd off=%zd\n", this, GetIno(), len, off);
fs::Ticker ticker(fs_->StartTicker());
auto get_metrics = fbl::MakeAutoCall([&ticker, &out_actual, this]() {
fs_->UpdateReadMetrics(*out_actual, ticker.End());
});
Transaction transaction(fs_);
return ReadInternal(&transaction, data, len, off, out_actual);
}
zx_status_t File::Write(const void* data, size_t len, size_t offset,
size_t* out_actual) {
TRACE_DURATION("minfs", "File::Write", "ino", GetIno(), "len", len, "off", offset);
ZX_DEBUG_ASSERT_MSG(FdCount() > 0, "Writing to ino with no fds open");
FS_TRACE_DEBUG("minfs_write() vn=%p(#%u) len=%zd off=%zd\n", this, GetIno(), len, offset);
*out_actual = 0;
fs::Ticker ticker(fs_->StartTicker());
auto get_metrics = fbl::MakeAutoCall([&ticker, &out_actual, this]() {
fs_->UpdateWriteMetrics(*out_actual, ticker.End());
});
blk_t reserve_blocks;
// Calculate maximum number of blocks to reserve for this write operation.
zx_status_t status = GetRequiredBlockCount(offset, len, &reserve_blocks);
if (status != ZX_OK) {
return status;
}
fbl::unique_ptr<Transaction> transaction;
if ((status = fs_->BeginTransaction(0, reserve_blocks, &transaction)) != ZX_OK) {
return status;
}
status = WriteInternal(transaction.get(), data, len, offset, out_actual);
if (status != ZX_OK) {
return status;
}
if (*out_actual != 0) {
// Enqueue metadata allocated via write.
InodeSync(transaction->GetWork(), kMxFsSyncMtime); // Successful writes updates mtime
transaction->GetWork()->PinVnode(fbl::WrapRefPtr(this));
status = fs_->CommitTransaction(std::move(transaction));
#ifdef __Fuchsia__
// Enqueue data allocated via write.
fs_->EnqueueDataTask([file = fbl::WrapRefPtr(this)](TransactionalFs*) mutable {
file->AllocateData();
});
#endif
}
return status;
}
zx_status_t File::Append(const void* data, size_t len, size_t* out_end,
size_t* out_actual) {
zx_status_t status = Write(data, len, GetSize(), out_actual);
*out_end = GetSize();
return status;
}
zx_status_t File::Truncate(size_t len) {
TRACE_DURATION("minfs", "File::Truncate");
fs::Ticker ticker(fs_->StartTicker());
auto get_metrics = fbl::MakeAutoCall([&ticker, this] {
fs_->UpdateTruncateMetrics(ticker.End());
});
fbl::unique_ptr<Transaction> transaction;
// Due to file copy-on-write, up to 1 new (data) block may be required.
size_t reserve_blocks = 1;
zx_status_t status;
if ((status = fs_->BeginTransaction(0, reserve_blocks, &transaction)) != ZX_OK) {
return status;
}
if ((status = TruncateInternal(transaction.get(), len)) != ZX_OK) {
return status;
}
#ifdef __Fuchsia__
// Shortcut case: If we don't have any data blocks to update, we may as well just update
// the inode by itself.
//
// This allows us to avoid "only setting inode_.size" in the data task responsible for
// calling "AllocateData()".
if (allocation_state_.IsEmpty()) {
inode_.size = allocation_state_.GetNodeSize();
}
#endif
// Sync the inode to persistent storage: although our data blocks will be allocated
// later, the act of truncating may have allocated indirect blocks.
//
// Ensure our inode is consistent with that metadata.
InodeSync(transaction->GetWork(), kMxFsSyncMtime);
transaction->GetWork()->PinVnode(fbl::WrapRefPtr(this));
status = fs_->CommitTransaction(std::move(transaction));
#ifdef __Fuchsia__
// Enqueue data allocated via write.
if (len != inode_.size) {
fs_->EnqueueDataTask([file = fbl::WrapRefPtr(this)](TransactionalFs*) mutable {
file->AllocateData();
});
}
#endif
return status;
}
} // namespace minfs