blob: ed61afb07e2fb2413bc7fc3dbd401dbb679ceb06 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <fcntl.h>
#include <inttypes.h>
#include <limits>
#include <safemath/checked_math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <digest/digest.h>
#include <digest/merkle-tree.h>
#include <fs/block-txn.h>
#include <fs/trace.h>
#ifdef __Fuchsia__
#include <fuchsia/hardware/block/c/fidl.h>
#include <fuchsia/hardware/block/volume/c/fidl.h>
#include <fvm/client.h>
#include <lib/fdio/directory.h>
#include <lib/fzl/fdio.h>
#endif
#include <blobfs/common.h>
using digest::Digest;
using digest::MerkleTree;
namespace blobfs {
namespace {
// Dumps the content of superblock to |out|. Does nothing if |out| is nullptr.
void DumpSuperblock(const Superblock& info, FILE* out) {
if (out == nullptr) {
return;
}
fprintf(out,
"info.magic0: %" PRIu64 "\n"
"info.magic1: %" PRIu64 "\n"
"info.version: %" PRIu32 "\n"
"info.flags: %" PRIu32 "\n"
"info.block_size: %" PRIu32 "\n"
"info.data_block_count: %" PRIu64 "\n"
"info.journal_block_count: %" PRIu64 "\n"
"info.inode_count: %" PRIu64 "\n"
"info.alloc_block_count: %" PRIu64 "\n"
"info.alloc_inode_count: %" PRIu64 "\n"
"info.blob_header_next: %" PRIu64 "\n"
"info.slice_size: %" PRIu64 "\n"
"info.vslice_count: %" PRIu64 "\n"
"info.abm_slices: %" PRIu32 "\n"
"info.ino_slices: %" PRIu32 "\n"
"info.dat_slices: %" PRIu32 "\n"
"info.journal_slices: %" PRIu32 "\n",
info.magic0, info.magic1, info.version, info.flags, info.block_size,
info.data_block_count, info.journal_block_count, info.inode_count,
info.alloc_block_count, info.alloc_inode_count, info.blob_header_next, info.slice_size,
info.vslice_count, info.abm_slices, info.ino_slices, info.dat_slices,
info.journal_slices);
}
} // namespace
// Number of blocks reserved for the Merkle Tree
uint32_t MerkleTreeBlocks(const Inode& blobNode) {
uint64_t size_merkle = MerkleTree::GetTreeLength(blobNode.blob_size);
ZX_DEBUG_ASSERT(size_merkle <= std::numeric_limits<uint32_t>::max());
return fbl::round_up(static_cast<uint32_t>(size_merkle), kBlobfsBlockSize) / kBlobfsBlockSize;
}
// Sanity check the metadata for the blobfs, given a maximum number of
// available blocks.
zx_status_t CheckSuperblock(const Superblock* info, uint64_t max) {
if ((info->magic0 != kBlobfsMagic0) ||
(info->magic1 != kBlobfsMagic1)) {
FS_TRACE_ERROR("blobfs: bad magic\n");
return ZX_ERR_INVALID_ARGS;
}
if (info->version != kBlobfsVersion) {
FS_TRACE_ERROR("blobfs: FS Version: %08x. Driver version: %08x\n", info->version,
kBlobfsVersion);
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
if (info->block_size != kBlobfsBlockSize) {
FS_TRACE_ERROR("blobfs: bsz %u unsupported\n", info->block_size);
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
if ((info->flags & kBlobFlagFVM) == 0) {
if (TotalBlocks(*info) > max) {
FS_TRACE_ERROR("blobfs: too large for device\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
} else {
const size_t blocks_per_slice = info->slice_size / info->block_size;
size_t abm_blocks_needed = BlockMapBlocks(*info);
size_t abm_blocks_allocated = info->abm_slices * blocks_per_slice;
if (abm_blocks_needed > abm_blocks_allocated) {
FS_TRACE_ERROR("blobfs: Not enough slices for block bitmap\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
} else if (abm_blocks_allocated + BlockMapStartBlock(*info) >= NodeMapStartBlock(*info)) {
FS_TRACE_ERROR("blobfs: Block bitmap collides into node map\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
size_t ino_blocks_needed = NodeMapBlocks(*info);
size_t ino_blocks_allocated = info->ino_slices * blocks_per_slice;
if (ino_blocks_needed > ino_blocks_allocated) {
FS_TRACE_ERROR("blobfs: Not enough slices for node map\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
} else if (ino_blocks_allocated + NodeMapStartBlock(*info) >= DataStartBlock(*info)) {
FS_TRACE_ERROR("blobfs: Node bitmap collides into data blocks\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
size_t dat_blocks_needed = DataBlocks(*info);
size_t dat_blocks_allocated = info->dat_slices * blocks_per_slice;
if (dat_blocks_needed < kStartBlockMinimum) {
FS_TRACE_ERROR("blobfs: Partition too small; no space left for data blocks\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
} else if (dat_blocks_needed > dat_blocks_allocated) {
FS_TRACE_ERROR("blobfs: Not enough slices for data blocks\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
} else if (dat_blocks_allocated + DataStartBlock(*info) >
std::numeric_limits<uint32_t>::max()) {
FS_TRACE_ERROR("blobfs: Data blocks overflow uint32\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
}
if (info->blob_header_next != 0) {
FS_TRACE_ERROR("blobfs: linked blob headers not yet supported\n");
DumpSuperblock(*info, stderr);
return ZX_ERR_INVALID_ARGS;
}
return ZX_OK;
}
zx_status_t GetBlockCount(int fd, uint64_t* out) {
#ifdef __Fuchsia__
fzl::UnownedFdioCaller caller(fd);
fuchsia_hardware_block_BlockInfo info;
zx_status_t status;
zx_status_t io_status = fuchsia_hardware_block_BlockGetInfo(caller.borrow_channel(), &status,
&info);
if (io_status != ZX_OK) {
return io_status;
}
if (status != ZX_OK) {
return status;
}
*out = (info.block_size * info.block_count) / kBlobfsBlockSize;
#else
struct stat s;
if (fstat(fd, &s) < 0) {
return ZX_ERR_BAD_STATE;
}
*out = s.st_size / kBlobfsBlockSize;
#endif
return ZX_OK;
}
zx_status_t readblk(int fd, uint64_t bno, void* data) {
off_t off = bno * kBlobfsBlockSize;
if (lseek(fd, off, SEEK_SET) < 0) {
FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
return ZX_ERR_IO;
}
if (read(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
FS_TRACE_ERROR("blobfs: cannot read block %" PRIu64 "\n", bno);
return ZX_ERR_IO;
}
return ZX_OK;
}
zx_status_t writeblk(int fd, uint64_t bno, const void* data) {
off_t off = bno * kBlobfsBlockSize;
if (lseek(fd, off, SEEK_SET) < 0) {
FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
return ZX_ERR_IO;
}
if (write(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
FS_TRACE_ERROR("blobfs: cannot write block %" PRIu64 "\n", bno);
return ZX_ERR_IO;
}
return ZX_OK;
}
uint32_t BlocksRequiredForInode(uint64_t inode_count) {
return safemath::checked_cast<uint32_t>(fbl::round_up(inode_count, kBlobfsInodesPerBlock) /
kBlobfsInodesPerBlock);
}
uint32_t BlocksRequiredForBits(uint64_t bit_count) {
return safemath::checked_cast<uint32_t>(fbl::round_up(bit_count, kBlobfsBlockBits) /
kBlobfsBlockBits);
}
uint32_t SuggestJournalBlocks(uint32_t current, uint32_t available) {
return current + available;
}
int Mkfs(int fd, uint64_t block_count) {
uint64_t inodes = kBlobfsDefaultInodeCount;
Superblock info;
memset(&info, 0x00, sizeof(info));
info.magic0 = kBlobfsMagic0;
info.magic1 = kBlobfsMagic1;
info.version = kBlobfsVersion;
info.flags = kBlobFlagClean;
info.block_size = kBlobfsBlockSize;
//TODO(planders): Consider modifying the inode count if we are low on space.
// It doesn't make sense to have fewer data blocks than inodes.
info.inode_count = inodes;
info.alloc_block_count = 0;
info.alloc_inode_count = 0;
info.blob_header_next = 0; // TODO(smklein): Allow chaining
// Temporarily set the data_block_count to the total block_count so we can estimate the number
// of pre-data blocks.
info.data_block_count = block_count;
// The result of DataStartBlock(info) is based on the current value of info.data_block_count.
// As a result, the block bitmap may have slightly more space allocated than is necessary.
size_t usable_blocks = JournalStartBlock(info) < block_count
? block_count - JournalStartBlock(info)
: 0;
// Determine allocation for the journal vs. data blocks based on the number of blocks remaining.
if (usable_blocks >= kDefaultJournalBlocks * 2) {
// Regular-sized partition, capable of fitting a data region
// at least as large as the journal. Give all excess blocks
// to the data region.
info.journal_block_count = kDefaultJournalBlocks;
info.data_block_count = usable_blocks - kDefaultJournalBlocks;
} else if (usable_blocks >= kMinimumDataBlocks + kMinimumJournalBlocks) {
// On smaller partitions, give both regions the minimum amount of space,
// and split the remainder. The choice of where to allocate the "remainder"
// is arbitrary.
const size_t remainder_blocks = usable_blocks -
(kMinimumDataBlocks + kMinimumJournalBlocks);
const size_t remainder_for_journal = remainder_blocks / 2;
const size_t remainder_for_data = remainder_blocks - remainder_for_journal;
info.journal_block_count = kMinimumJournalBlocks + remainder_for_journal;
info.data_block_count = kMinimumDataBlocks + remainder_for_data;
} else {
// Error, partition too small.
info.journal_block_count = 0;
info.data_block_count = 0;
}
zx_status_t status;
#ifdef __Fuchsia__
fuchsia_hardware_block_volume_VolumeInfo fvm_info;
fzl::UnownedFdioCaller caller(fd);
// Querying may be used to confirm if the underlying connection is capable of
// communicating the FVM protocol. Clone the connection, since if the block
// device does NOT speak the Volume protocol, the connection is terminated.
zx::channel connection(fdio_service_clone(caller.borrow_channel()));
zx_status_t io_status;
io_status = fuchsia_hardware_block_volume_VolumeQuery(connection.get(), &status, &fvm_info);
if (io_status == ZX_OK && status == ZX_OK) {
info.slice_size = fvm_info.slice_size;
info.flags |= kBlobFlagFVM;
if (info.slice_size % kBlobfsBlockSize) {
FS_TRACE_ERROR("blobfs mkfs: Slice size not multiple of blobfs block\n");
return -1;
}
if (fvm::ResetAllSlices(fd) != ZX_OK) {
FS_TRACE_ERROR("blobfs mkfs: Failed to reset slices\n");
return -1;
}
const size_t kBlocksPerSlice = info.slice_size / kBlobfsBlockSize;
uint64_t offset = kFVMBlockMapStart / kBlocksPerSlice;
uint64_t length = 1;
io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
length, &status);
if (io_status != ZX_OK || status != ZX_OK) {
FS_TRACE_ERROR("blobfs mkfs: Failed to allocate block map\n");
return -1;
}
offset = kFVMNodeMapStart / kBlocksPerSlice;
io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
length, &status);
if (io_status != ZX_OK || status != ZX_OK) {
FS_TRACE_ERROR("blobfs mkfs: Failed to allocate node map\n");
return -1;
}
// Allocate the minimum number of journal blocks in FVM.
offset = kFVMJournalStart / kBlocksPerSlice;
length = fbl::round_up(kDefaultJournalBlocks, kBlocksPerSlice) / kBlocksPerSlice;
info.journal_slices = static_cast<uint32_t>(length);
io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
length, &status);
if (io_status != ZX_OK || status != ZX_OK) {
FS_TRACE_ERROR("blobfs mkfs: Failed to allocate journal blocks\n");
return -1;
}
// Allocate the minimum number of data blocks in the FVM.
offset = kFVMDataStart / kBlocksPerSlice;
length = fbl::round_up(kMinimumDataBlocks, kBlocksPerSlice) / kBlocksPerSlice;
info.dat_slices = static_cast<uint32_t>(length);
io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
length, &status);
if (io_status != ZX_OK || status != ZX_OK) {
FS_TRACE_ERROR("blobfs mkfs: Failed to allocate data blocks\n");
return -1;
}
info.abm_slices = 1;
info.ino_slices = 1;
info.vslice_count = info.abm_slices + info.ino_slices + info.dat_slices +
info.journal_slices + 1;
info.inode_count = static_cast<uint32_t>(info.ino_slices * info.slice_size
/ kBlobfsInodeSize);
info.data_block_count = static_cast<uint32_t>(info.dat_slices * info.slice_size
/ kBlobfsBlockSize);
info.journal_block_count = static_cast<uint32_t>(info.journal_slices * info.slice_size
/ kBlobfsBlockSize);
}
#endif
FS_TRACE_DEBUG("Blobfs Mkfs\n");
FS_TRACE_DEBUG("Disk size : %" PRIu64 "\n", block_count * kBlobfsBlockSize);
FS_TRACE_DEBUG("Block Size : %u\n", kBlobfsBlockSize);
FS_TRACE_DEBUG("Block Count: %" PRIu64 "\n", TotalBlocks(info));
FS_TRACE_DEBUG("Inode Count: %" PRIu64 "\n", inodes);
FS_TRACE_DEBUG("FVM-aware: %s\n", (info.flags & kBlobFlagFVM) ? "YES" : "NO");
if (info.data_block_count < kMinimumDataBlocks) {
FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum data partition\n");
return -1;
}
if (info.journal_block_count < kMinimumJournalBlocks) {
FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum journal partition\n");
return -1;
}
// Determine the number of blocks necessary for the block map and node map.
uint64_t bbm_blocks = BlockMapBlocks(info);
uint64_t nbm_blocks = NodeMapBlocks(info);
RawBitmap abm;
if (abm.Reset(bbm_blocks * kBlobfsBlockBits)) {
FS_TRACE_ERROR("Couldn't allocate blobfs block map\n");
return -1;
} else if (abm.Shrink(info.data_block_count)) {
FS_TRACE_ERROR("Couldn't shrink blobfs block map\n");
return -1;
}
// Reserve first |kStartBlockMinimum| data blocks
abm.Set(0, kStartBlockMinimum);
info.alloc_block_count += kStartBlockMinimum;
if (info.inode_count * sizeof(Inode) != nbm_blocks * kBlobfsBlockSize) {
FS_TRACE_ERROR("For simplicity, inode table block must be entirely filled\n");
return -1;
}
// All in-memory structures have been created successfully. Dump everything to disk.
char block[kBlobfsBlockSize];
memset(block, 0, sizeof(block));
JournalInfo* journal_info = reinterpret_cast<JournalInfo*>(block);
journal_info->magic = kJournalMagic;
if ((status = writeblk(fd, JournalStartBlock(info), block)) != ZX_OK) {
FS_TRACE_ERROR("Failed to write journal block\n");
return status;
}
// write the root block to disk
memset(block, 0, sizeof(journal_info));
memcpy(block, &info, sizeof(info));
if ((status = writeblk(fd, 0, block)) != ZX_OK) {
FS_TRACE_ERROR("Failed to write root block\n");
return status;
}
// write allocation bitmap to disk
for (uint64_t n = 0; n < bbm_blocks; n++) {
void* bmdata = GetRawBitmapData(abm, n);
if ((status = writeblk(fd, BlockMapStartBlock(info) + n, bmdata)) < 0) {
FS_TRACE_ERROR("Failed to write blockmap block %" PRIu64 "\n", n);
return status;
}
}
// write node map to disk
for (uint64_t n = 0; n < nbm_blocks; n++) {
memset(block, 0, sizeof(block));
if (writeblk(fd, NodeMapStartBlock(info) + n, block)) {
FS_TRACE_ERROR("blobfs: failed writing inode map\n");
return ZX_ERR_IO;
}
}
FS_TRACE_DEBUG("BLOBFS: mkfs success\n");
return 0;
}
} // namespace blobfs