zircon/system/ulib/blobfs/common.cpp - fuchsia - Git at Google

 // Copyright 2017 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <fcntl.h>
 #include <inttypes.h>
 #include <limits>
 #include <safemath/checked_math.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <unistd.h>

 #include <digest/digest.h>
 #include <digest/merkle-tree.h>
 #include <fs/block-txn.h>
 #include <fs/trace.h>

 #ifdef __Fuchsia__
 #include <fuchsia/hardware/block/c/fidl.h>
 #include <fuchsia/hardware/block/volume/c/fidl.h>
 #include <fvm/client.h>
 #include <lib/fdio/directory.h>
 #include <lib/fzl/fdio.h>
 #endif

 #include <blobfs/common.h>

 using digest::Digest;
 using digest::MerkleTree;

 namespace blobfs {

 namespace {
 // Dumps the content of superblock to |out|. Does nothing if |out| is nullptr.
 void DumpSuperblock(const Superblock& info, FILE* out) {
     if (out == nullptr) {
         return;
     }

     fprintf(out,
             "info.magic0: %" PRIu64 "\n"
             "info.magic1: %" PRIu64 "\n"
             "info.version: %" PRIu32 "\n"
             "info.flags: %" PRIu32 "\n"
             "info.block_size: %" PRIu32 "\n"
             "info.data_block_count: %" PRIu64 "\n"
             "info.journal_block_count: %" PRIu64 "\n"
             "info.inode_count: %" PRIu64 "\n"
             "info.alloc_block_count: %" PRIu64 "\n"
             "info.alloc_inode_count: %" PRIu64 "\n"
             "info.blob_header_next: %" PRIu64 "\n"
             "info.slice_size: %" PRIu64 "\n"
             "info.vslice_count: %" PRIu64 "\n"
             "info.abm_slices: %" PRIu32 "\n"
             "info.ino_slices: %" PRIu32 "\n"
             "info.dat_slices: %" PRIu32 "\n"
             "info.journal_slices: %" PRIu32 "\n",
             info.magic0, info.magic1, info.version, info.flags, info.block_size,
             info.data_block_count, info.journal_block_count, info.inode_count,
             info.alloc_block_count, info.alloc_inode_count, info.blob_header_next, info.slice_size,
             info.vslice_count, info.abm_slices, info.ino_slices, info.dat_slices,
             info.journal_slices);
 }

 } // namespace

 // Number of blocks reserved for the Merkle Tree
 uint32_t MerkleTreeBlocks(const Inode& blobNode) {
     uint64_t size_merkle = MerkleTree::GetTreeLength(blobNode.blob_size);
     ZX_DEBUG_ASSERT(size_merkle <= std::numeric_limits<uint32_t>::max());
     return fbl::round_up(static_cast<uint32_t>(size_merkle), kBlobfsBlockSize) / kBlobfsBlockSize;
 }

 // Sanity check the metadata for the blobfs, given a maximum number of
 // available blocks.
 zx_status_t CheckSuperblock(const Superblock* info, uint64_t max) {
     if ((info->magic0 != kBlobfsMagic0) ||
         (info->magic1 != kBlobfsMagic1)) {
         FS_TRACE_ERROR("blobfs: bad magic\n");
         return ZX_ERR_INVALID_ARGS;
     }
     if (info->version != kBlobfsVersion) {
         FS_TRACE_ERROR("blobfs: FS Version: %08x. Driver version: %08x\n", info->version,
                        kBlobfsVersion);
         DumpSuperblock(*info, stderr);
         return ZX_ERR_INVALID_ARGS;
     }
     if (info->block_size != kBlobfsBlockSize) {
         FS_TRACE_ERROR("blobfs: bsz %u unsupported\n", info->block_size);
         DumpSuperblock(*info, stderr);
         return ZX_ERR_INVALID_ARGS;
     }
     if ((info->flags & kBlobFlagFVM) == 0) {
         if (TotalBlocks(*info) > max) {
             FS_TRACE_ERROR("blobfs: too large for device\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         }
     } else {
         const size_t blocks_per_slice = info->slice_size / info->block_size;

         size_t abm_blocks_needed = BlockMapBlocks(*info);
         size_t abm_blocks_allocated = info->abm_slices * blocks_per_slice;
         if (abm_blocks_needed > abm_blocks_allocated) {
             FS_TRACE_ERROR("blobfs: Not enough slices for block bitmap\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         } else if (abm_blocks_allocated + BlockMapStartBlock(*info) >= NodeMapStartBlock(*info)) {
             FS_TRACE_ERROR("blobfs: Block bitmap collides into node map\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         }

         size_t ino_blocks_needed = NodeMapBlocks(*info);
         size_t ino_blocks_allocated = info->ino_slices * blocks_per_slice;
         if (ino_blocks_needed > ino_blocks_allocated) {
             FS_TRACE_ERROR("blobfs: Not enough slices for node map\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         } else if (ino_blocks_allocated + NodeMapStartBlock(*info) >= DataStartBlock(*info)) {
             FS_TRACE_ERROR("blobfs: Node bitmap collides into data blocks\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         }

         size_t dat_blocks_needed = DataBlocks(*info);
         size_t dat_blocks_allocated = info->dat_slices * blocks_per_slice;
         if (dat_blocks_needed < kStartBlockMinimum) {
             FS_TRACE_ERROR("blobfs: Partition too small; no space left for data blocks\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         } else if (dat_blocks_needed > dat_blocks_allocated) {
             FS_TRACE_ERROR("blobfs: Not enough slices for data blocks\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         } else if (dat_blocks_allocated + DataStartBlock(*info) >
                    std::numeric_limits<uint32_t>::max()) {
             FS_TRACE_ERROR("blobfs: Data blocks overflow uint32\n");
             DumpSuperblock(*info, stderr);
             return ZX_ERR_INVALID_ARGS;
         }
     }
     if (info->blob_header_next != 0) {
         FS_TRACE_ERROR("blobfs: linked blob headers not yet supported\n");
         DumpSuperblock(*info, stderr);
         return ZX_ERR_INVALID_ARGS;
     }
     return ZX_OK;
 }

 zx_status_t GetBlockCount(int fd, uint64_t* out) {
 #ifdef __Fuchsia__
     fzl::UnownedFdioCaller caller(fd);
     fuchsia_hardware_block_BlockInfo info;
     zx_status_t status;
     zx_status_t io_status = fuchsia_hardware_block_BlockGetInfo(caller.borrow_channel(), &status,
                                                                 &info);
     if (io_status != ZX_OK) {
         return io_status;
     }
     if (status != ZX_OK) {
         return status;
     }

     *out = (info.block_size * info.block_count) / kBlobfsBlockSize;
 #else
     struct stat s;
     if (fstat(fd, &s) < 0) {
         return ZX_ERR_BAD_STATE;
     }
     *out = s.st_size / kBlobfsBlockSize;
 #endif
     return ZX_OK;
 }

 zx_status_t readblk(int fd, uint64_t bno, void* data) {
     off_t off = bno * kBlobfsBlockSize;
     if (lseek(fd, off, SEEK_SET) < 0) {
         FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
         return ZX_ERR_IO;
     }
     if (read(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
         FS_TRACE_ERROR("blobfs: cannot read block %" PRIu64 "\n", bno);
         return ZX_ERR_IO;
     }
     return ZX_OK;
 }

 zx_status_t writeblk(int fd, uint64_t bno, const void* data) {
     off_t off = bno * kBlobfsBlockSize;
     if (lseek(fd, off, SEEK_SET) < 0) {
         FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
         return ZX_ERR_IO;
     }
     if (write(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
         FS_TRACE_ERROR("blobfs: cannot write block %" PRIu64 "\n", bno);
         return ZX_ERR_IO;
     }
     return ZX_OK;
 }

 uint32_t BlocksRequiredForInode(uint64_t inode_count) {
     return safemath::checked_cast<uint32_t>(fbl::round_up(inode_count, kBlobfsInodesPerBlock) /
                                             kBlobfsInodesPerBlock);
 }

 uint32_t BlocksRequiredForBits(uint64_t bit_count) {
     return safemath::checked_cast<uint32_t>(fbl::round_up(bit_count, kBlobfsBlockBits) /
                                             kBlobfsBlockBits);
 }

 uint32_t SuggestJournalBlocks(uint32_t current, uint32_t available) {
     return current + available;
 }

 int Mkfs(int fd, uint64_t block_count) {
     uint64_t inodes = kBlobfsDefaultInodeCount;

     Superblock info;
     memset(&info, 0x00, sizeof(info));
     info.magic0 = kBlobfsMagic0;
     info.magic1 = kBlobfsMagic1;
     info.version = kBlobfsVersion;
     info.flags = kBlobFlagClean;
     info.block_size = kBlobfsBlockSize;
     //TODO(planders): Consider modifying the inode count if we are low on space.
     //                It doesn't make sense to have fewer data blocks than inodes.
     info.inode_count = inodes;
     info.alloc_block_count = 0;
     info.alloc_inode_count = 0;
     info.blob_header_next = 0; // TODO(smklein): Allow chaining

     // Temporarily set the data_block_count to the total block_count so we can estimate the number
     // of pre-data blocks.
     info.data_block_count = block_count;

     // The result of DataStartBlock(info) is based on the current value of info.data_block_count.
     // As a result, the block bitmap may have slightly more space allocated than is necessary.
     size_t usable_blocks = JournalStartBlock(info) < block_count
                            ? block_count - JournalStartBlock(info)
                            : 0;

     // Determine allocation for the journal vs. data blocks based on the number of blocks remaining.
     if (usable_blocks >= kDefaultJournalBlocks * 2) {
         // Regular-sized partition, capable of fitting a data region
         // at least as large as the journal. Give all excess blocks
         // to the data region.
         info.journal_block_count = kDefaultJournalBlocks;
         info.data_block_count = usable_blocks - kDefaultJournalBlocks;
     } else if (usable_blocks >= kMinimumDataBlocks + kMinimumJournalBlocks) {
         // On smaller partitions, give both regions the minimum amount of space,
         // and split the remainder. The choice of where to allocate the "remainder"
         // is arbitrary.
         const size_t remainder_blocks = usable_blocks -
                                         (kMinimumDataBlocks + kMinimumJournalBlocks);
         const size_t remainder_for_journal = remainder_blocks / 2;
         const size_t remainder_for_data = remainder_blocks - remainder_for_journal;
         info.journal_block_count = kMinimumJournalBlocks + remainder_for_journal;
         info.data_block_count = kMinimumDataBlocks + remainder_for_data;
     } else {
         // Error, partition too small.
         info.journal_block_count = 0;
         info.data_block_count = 0;
     }

     zx_status_t status;
 #ifdef __Fuchsia__
     fuchsia_hardware_block_volume_VolumeInfo fvm_info;
     fzl::UnownedFdioCaller caller(fd);

     // Querying may be used to confirm if the underlying connection is capable of
     // communicating the FVM protocol. Clone the connection, since if the block
     // device does NOT speak the Volume protocol, the connection is terminated.
     zx::channel connection(fdio_service_clone(caller.borrow_channel()));

     zx_status_t io_status;
     io_status = fuchsia_hardware_block_volume_VolumeQuery(connection.get(), &status, &fvm_info);
     if (io_status == ZX_OK && status == ZX_OK) {
         info.slice_size = fvm_info.slice_size;
         info.flags |= kBlobFlagFVM;

         if (info.slice_size % kBlobfsBlockSize) {
             FS_TRACE_ERROR("blobfs mkfs: Slice size not multiple of blobfs block\n");
             return -1;
         }

         if (fvm::ResetAllSlices(fd) != ZX_OK) {
             FS_TRACE_ERROR("blobfs mkfs: Failed to reset slices\n");
             return -1;
         }

         const size_t kBlocksPerSlice = info.slice_size / kBlobfsBlockSize;

         uint64_t offset = kFVMBlockMapStart / kBlocksPerSlice;
         uint64_t length = 1;
         io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
                                                                length, &status);
         if (io_status != ZX_OK || status != ZX_OK) {
             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate block map\n");
             return -1;
         }

         offset = kFVMNodeMapStart / kBlocksPerSlice;
         io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
                                                                length, &status);
         if (io_status != ZX_OK || status != ZX_OK) {
             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate node map\n");
             return -1;
         }

         // Allocate the minimum number of journal blocks in FVM.
         offset = kFVMJournalStart / kBlocksPerSlice;
         length = fbl::round_up(kDefaultJournalBlocks, kBlocksPerSlice) / kBlocksPerSlice;
         info.journal_slices = static_cast<uint32_t>(length);
         io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
                                                                length, &status);
         if (io_status != ZX_OK || status != ZX_OK) {
             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate journal blocks\n");
             return -1;
         }

         // Allocate the minimum number of data blocks in the FVM.
         offset = kFVMDataStart / kBlocksPerSlice;
         length = fbl::round_up(kMinimumDataBlocks, kBlocksPerSlice) / kBlocksPerSlice;
         info.dat_slices = static_cast<uint32_t>(length);
         io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
                                                                length, &status);
         if (io_status != ZX_OK || status != ZX_OK) {
             FS_TRACE_ERROR("blobfs mkfs: Failed to allocate data blocks\n");
             return -1;
         }

         info.abm_slices = 1;
         info.ino_slices = 1;

         info.vslice_count = info.abm_slices + info.ino_slices + info.dat_slices +
                             info.journal_slices + 1;

         info.inode_count = static_cast<uint32_t>(info.ino_slices * info.slice_size
                                                  / kBlobfsInodeSize);

         info.data_block_count = static_cast<uint32_t>(info.dat_slices * info.slice_size
                                                       / kBlobfsBlockSize);
         info.journal_block_count = static_cast<uint32_t>(info.journal_slices * info.slice_size
                                                          / kBlobfsBlockSize);
     }
 #endif

     FS_TRACE_DEBUG("Blobfs Mkfs\n");
     FS_TRACE_DEBUG("Disk size  : %" PRIu64 "\n", block_count * kBlobfsBlockSize);
     FS_TRACE_DEBUG("Block Size : %u\n", kBlobfsBlockSize);
     FS_TRACE_DEBUG("Block Count: %" PRIu64 "\n", TotalBlocks(info));
     FS_TRACE_DEBUG("Inode Count: %" PRIu64 "\n", inodes);
     FS_TRACE_DEBUG("FVM-aware: %s\n", (info.flags & kBlobFlagFVM) ? "YES" : "NO");

     if (info.data_block_count < kMinimumDataBlocks) {
         FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum data partition\n");
         return -1;
     }

     if (info.journal_block_count < kMinimumJournalBlocks) {
         FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum journal partition\n");
         return -1;
     }

     // Determine the number of blocks necessary for the block map and node map.
     uint64_t bbm_blocks = BlockMapBlocks(info);
     uint64_t nbm_blocks = NodeMapBlocks(info);

     RawBitmap abm;
     if (abm.Reset(bbm_blocks * kBlobfsBlockBits)) {
         FS_TRACE_ERROR("Couldn't allocate blobfs block map\n");
         return -1;
     } else if (abm.Shrink(info.data_block_count)) {
         FS_TRACE_ERROR("Couldn't shrink blobfs block map\n");
         return -1;
     }

     // Reserve first |kStartBlockMinimum| data blocks
     abm.Set(0, kStartBlockMinimum);
     info.alloc_block_count += kStartBlockMinimum;

     if (info.inode_count * sizeof(Inode) != nbm_blocks * kBlobfsBlockSize) {
         FS_TRACE_ERROR("For simplicity, inode table block must be entirely filled\n");
         return -1;
     }

     // All in-memory structures have been created successfully. Dump everything to disk.
     char block[kBlobfsBlockSize];
     memset(block, 0, sizeof(block));

     JournalInfo* journal_info = reinterpret_cast<JournalInfo*>(block);
     journal_info->magic = kJournalMagic;
     if ((status = writeblk(fd, JournalStartBlock(info), block)) != ZX_OK) {
         FS_TRACE_ERROR("Failed to write journal block\n");
         return status;
     }

     // write the root block to disk
     memset(block, 0, sizeof(journal_info));
     memcpy(block, &info, sizeof(info));
     if ((status = writeblk(fd, 0, block)) != ZX_OK) {
         FS_TRACE_ERROR("Failed to write root block\n");
         return status;
     }

     // write allocation bitmap to disk
     for (uint64_t n = 0; n < bbm_blocks; n++) {
         void* bmdata = GetRawBitmapData(abm, n);
         if ((status = writeblk(fd, BlockMapStartBlock(info) + n, bmdata)) < 0) {
             FS_TRACE_ERROR("Failed to write blockmap block %" PRIu64 "\n", n);
             return status;
         }
     }

     // write node map to disk
     for (uint64_t n = 0; n < nbm_blocks; n++) {
         memset(block, 0, sizeof(block));
         if (writeblk(fd, NodeMapStartBlock(info) + n, block)) {
             FS_TRACE_ERROR("blobfs: failed writing inode map\n");
             return ZX_ERR_IO;
         }
     }

     FS_TRACE_DEBUG("BLOBFS: mkfs success\n");
     return 0;
 }

 } // namespace blobfs
	// Copyright 2017 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include <fcntl.h>
	#include <inttypes.h>
	#include <limits>
	#include <safemath/checked_math.h>
	#include <stdarg.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/stat.h>
	#include <unistd.h>

	#include <digest/digest.h>
	#include <digest/merkle-tree.h>
	#include <fs/block-txn.h>
	#include <fs/trace.h>

	#ifdef __Fuchsia__
	#include <fuchsia/hardware/block/c/fidl.h>
	#include <fuchsia/hardware/block/volume/c/fidl.h>
	#include <fvm/client.h>
	#include <lib/fdio/directory.h>
	#include <lib/fzl/fdio.h>
	#endif

	#include <blobfs/common.h>

	using digest::Digest;
	using digest::MerkleTree;

	namespace blobfs {

	namespace {
	// Dumps the content of superblock to \|out\|. Does nothing if \|out\| is nullptr.
	void DumpSuperblock(const Superblock& info, FILE* out) {
	if (out == nullptr) {
	return;
	}

	fprintf(out,
	"info.magic0: %" PRIu64 "\n"
	"info.magic1: %" PRIu64 "\n"
	"info.version: %" PRIu32 "\n"
	"info.flags: %" PRIu32 "\n"
	"info.block_size: %" PRIu32 "\n"
	"info.data_block_count: %" PRIu64 "\n"
	"info.journal_block_count: %" PRIu64 "\n"
	"info.inode_count: %" PRIu64 "\n"
	"info.alloc_block_count: %" PRIu64 "\n"
	"info.alloc_inode_count: %" PRIu64 "\n"
	"info.blob_header_next: %" PRIu64 "\n"
	"info.slice_size: %" PRIu64 "\n"
	"info.vslice_count: %" PRIu64 "\n"
	"info.abm_slices: %" PRIu32 "\n"
	"info.ino_slices: %" PRIu32 "\n"
	"info.dat_slices: %" PRIu32 "\n"
	"info.journal_slices: %" PRIu32 "\n",
	info.magic0, info.magic1, info.version, info.flags, info.block_size,
	info.data_block_count, info.journal_block_count, info.inode_count,
	info.alloc_block_count, info.alloc_inode_count, info.blob_header_next, info.slice_size,
	info.vslice_count, info.abm_slices, info.ino_slices, info.dat_slices,
	info.journal_slices);
	}

	} // namespace

	// Number of blocks reserved for the Merkle Tree
	uint32_t MerkleTreeBlocks(const Inode& blobNode) {
	uint64_t size_merkle = MerkleTree::GetTreeLength(blobNode.blob_size);
	ZX_DEBUG_ASSERT(size_merkle <= std::numeric_limits<uint32_t>::max());
	return fbl::round_up(static_cast<uint32_t>(size_merkle), kBlobfsBlockSize) / kBlobfsBlockSize;
	}

	// Sanity check the metadata for the blobfs, given a maximum number of
	// available blocks.
	zx_status_t CheckSuperblock(const Superblock* info, uint64_t max) {
	if ((info->magic0 != kBlobfsMagic0) \|\|
	(info->magic1 != kBlobfsMagic1)) {
	FS_TRACE_ERROR("blobfs: bad magic\n");
	return ZX_ERR_INVALID_ARGS;
	}
	if (info->version != kBlobfsVersion) {
	FS_TRACE_ERROR("blobfs: FS Version: %08x. Driver version: %08x\n", info->version,
	kBlobfsVersion);
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}
	if (info->block_size != kBlobfsBlockSize) {
	FS_TRACE_ERROR("blobfs: bsz %u unsupported\n", info->block_size);
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}
	if ((info->flags & kBlobFlagFVM) == 0) {
	if (TotalBlocks(*info) > max) {
	FS_TRACE_ERROR("blobfs: too large for device\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}
	} else {
	const size_t blocks_per_slice = info->slice_size / info->block_size;

	size_t abm_blocks_needed = BlockMapBlocks(*info);
	size_t abm_blocks_allocated = info->abm_slices * blocks_per_slice;
	if (abm_blocks_needed > abm_blocks_allocated) {
	FS_TRACE_ERROR("blobfs: Not enough slices for block bitmap\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	} else if (abm_blocks_allocated + BlockMapStartBlock(info) >= NodeMapStartBlock(info)) {
	FS_TRACE_ERROR("blobfs: Block bitmap collides into node map\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}

	size_t ino_blocks_needed = NodeMapBlocks(*info);
	size_t ino_blocks_allocated = info->ino_slices * blocks_per_slice;
	if (ino_blocks_needed > ino_blocks_allocated) {
	FS_TRACE_ERROR("blobfs: Not enough slices for node map\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	} else if (ino_blocks_allocated + NodeMapStartBlock(info) >= DataStartBlock(info)) {
	FS_TRACE_ERROR("blobfs: Node bitmap collides into data blocks\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}

	size_t dat_blocks_needed = DataBlocks(*info);
	size_t dat_blocks_allocated = info->dat_slices * blocks_per_slice;
	if (dat_blocks_needed < kStartBlockMinimum) {
	FS_TRACE_ERROR("blobfs: Partition too small; no space left for data blocks\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	} else if (dat_blocks_needed > dat_blocks_allocated) {
	FS_TRACE_ERROR("blobfs: Not enough slices for data blocks\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	} else if (dat_blocks_allocated + DataStartBlock(*info) >
	std::numeric_limits<uint32_t>::max()) {
	FS_TRACE_ERROR("blobfs: Data blocks overflow uint32\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}
	}
	if (info->blob_header_next != 0) {
	FS_TRACE_ERROR("blobfs: linked blob headers not yet supported\n");
	DumpSuperblock(*info, stderr);
	return ZX_ERR_INVALID_ARGS;
	}
	return ZX_OK;
	}

	zx_status_t GetBlockCount(int fd, uint64_t* out) {
	#ifdef __Fuchsia__
	fzl::UnownedFdioCaller caller(fd);
	fuchsia_hardware_block_BlockInfo info;
	zx_status_t status;
	zx_status_t io_status = fuchsia_hardware_block_BlockGetInfo(caller.borrow_channel(), &status,
	&info);
	if (io_status != ZX_OK) {
	return io_status;
	}
	if (status != ZX_OK) {
	return status;
	}

	out = (info.block_size info.block_count) / kBlobfsBlockSize;
	#else
	struct stat s;
	if (fstat(fd, &s) < 0) {
	return ZX_ERR_BAD_STATE;
	}
	*out = s.st_size / kBlobfsBlockSize;
	#endif
	return ZX_OK;
	}

	zx_status_t readblk(int fd, uint64_t bno, void* data) {
	off_t off = bno * kBlobfsBlockSize;
	if (lseek(fd, off, SEEK_SET) < 0) {
	FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
	return ZX_ERR_IO;
	}
	if (read(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
	FS_TRACE_ERROR("blobfs: cannot read block %" PRIu64 "\n", bno);
	return ZX_ERR_IO;
	}
	return ZX_OK;
	}

	zx_status_t writeblk(int fd, uint64_t bno, const void* data) {
	off_t off = bno * kBlobfsBlockSize;
	if (lseek(fd, off, SEEK_SET) < 0) {
	FS_TRACE_ERROR("blobfs: cannot seek to block %" PRIu64 "\n", bno);
	return ZX_ERR_IO;
	}
	if (write(fd, data, kBlobfsBlockSize) != kBlobfsBlockSize) {
	FS_TRACE_ERROR("blobfs: cannot write block %" PRIu64 "\n", bno);
	return ZX_ERR_IO;
	}
	return ZX_OK;
	}

	uint32_t BlocksRequiredForInode(uint64_t inode_count) {
	return safemath::checked_cast<uint32_t>(fbl::round_up(inode_count, kBlobfsInodesPerBlock) /
	kBlobfsInodesPerBlock);
	}

	uint32_t BlocksRequiredForBits(uint64_t bit_count) {
	return safemath::checked_cast<uint32_t>(fbl::round_up(bit_count, kBlobfsBlockBits) /
	kBlobfsBlockBits);
	}

	uint32_t SuggestJournalBlocks(uint32_t current, uint32_t available) {
	return current + available;
	}

	int Mkfs(int fd, uint64_t block_count) {
	uint64_t inodes = kBlobfsDefaultInodeCount;

	Superblock info;
	memset(&info, 0x00, sizeof(info));
	info.magic0 = kBlobfsMagic0;
	info.magic1 = kBlobfsMagic1;
	info.version = kBlobfsVersion;
	info.flags = kBlobFlagClean;
	info.block_size = kBlobfsBlockSize;
	//TODO(planders): Consider modifying the inode count if we are low on space.
	// It doesn't make sense to have fewer data blocks than inodes.
	info.inode_count = inodes;
	info.alloc_block_count = 0;
	info.alloc_inode_count = 0;
	info.blob_header_next = 0; // TODO(smklein): Allow chaining

	// Temporarily set the data_block_count to the total block_count so we can estimate the number
	// of pre-data blocks.
	info.data_block_count = block_count;

	// The result of DataStartBlock(info) is based on the current value of info.data_block_count.
	// As a result, the block bitmap may have slightly more space allocated than is necessary.
	size_t usable_blocks = JournalStartBlock(info) < block_count
	? block_count - JournalStartBlock(info)
	: 0;

	// Determine allocation for the journal vs. data blocks based on the number of blocks remaining.
	if (usable_blocks >= kDefaultJournalBlocks * 2) {
	// Regular-sized partition, capable of fitting a data region
	// at least as large as the journal. Give all excess blocks
	// to the data region.
	info.journal_block_count = kDefaultJournalBlocks;
	info.data_block_count = usable_blocks - kDefaultJournalBlocks;
	} else if (usable_blocks >= kMinimumDataBlocks + kMinimumJournalBlocks) {
	// On smaller partitions, give both regions the minimum amount of space,
	// and split the remainder. The choice of where to allocate the "remainder"
	// is arbitrary.
	const size_t remainder_blocks = usable_blocks -
	(kMinimumDataBlocks + kMinimumJournalBlocks);
	const size_t remainder_for_journal = remainder_blocks / 2;
	const size_t remainder_for_data = remainder_blocks - remainder_for_journal;
	info.journal_block_count = kMinimumJournalBlocks + remainder_for_journal;
	info.data_block_count = kMinimumDataBlocks + remainder_for_data;
	} else {
	// Error, partition too small.
	info.journal_block_count = 0;
	info.data_block_count = 0;
	}

	zx_status_t status;
	#ifdef __Fuchsia__
	fuchsia_hardware_block_volume_VolumeInfo fvm_info;
	fzl::UnownedFdioCaller caller(fd);

	// Querying may be used to confirm if the underlying connection is capable of
	// communicating the FVM protocol. Clone the connection, since if the block
	// device does NOT speak the Volume protocol, the connection is terminated.
	zx::channel connection(fdio_service_clone(caller.borrow_channel()));

	zx_status_t io_status;
	io_status = fuchsia_hardware_block_volume_VolumeQuery(connection.get(), &status, &fvm_info);
	if (io_status == ZX_OK && status == ZX_OK) {
	info.slice_size = fvm_info.slice_size;
	info.flags \|= kBlobFlagFVM;

	if (info.slice_size % kBlobfsBlockSize) {
	FS_TRACE_ERROR("blobfs mkfs: Slice size not multiple of blobfs block\n");
	return -1;
	}

	if (fvm::ResetAllSlices(fd) != ZX_OK) {
	FS_TRACE_ERROR("blobfs mkfs: Failed to reset slices\n");
	return -1;
	}

	const size_t kBlocksPerSlice = info.slice_size / kBlobfsBlockSize;

	uint64_t offset = kFVMBlockMapStart / kBlocksPerSlice;
	uint64_t length = 1;
	io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
	length, &status);
	if (io_status != ZX_OK \|\| status != ZX_OK) {
	FS_TRACE_ERROR("blobfs mkfs: Failed to allocate block map\n");
	return -1;
	}

	offset = kFVMNodeMapStart / kBlocksPerSlice;
	io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
	length, &status);
	if (io_status != ZX_OK \|\| status != ZX_OK) {
	FS_TRACE_ERROR("blobfs mkfs: Failed to allocate node map\n");
	return -1;
	}

	// Allocate the minimum number of journal blocks in FVM.
	offset = kFVMJournalStart / kBlocksPerSlice;
	length = fbl::round_up(kDefaultJournalBlocks, kBlocksPerSlice) / kBlocksPerSlice;
	info.journal_slices = static_cast<uint32_t>(length);
	io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
	length, &status);
	if (io_status != ZX_OK \|\| status != ZX_OK) {
	FS_TRACE_ERROR("blobfs mkfs: Failed to allocate journal blocks\n");
	return -1;
	}

	// Allocate the minimum number of data blocks in the FVM.
	offset = kFVMDataStart / kBlocksPerSlice;
	length = fbl::round_up(kMinimumDataBlocks, kBlocksPerSlice) / kBlocksPerSlice;
	info.dat_slices = static_cast<uint32_t>(length);
	io_status = fuchsia_hardware_block_volume_VolumeExtend(caller.borrow_channel(), offset,
	length, &status);
	if (io_status != ZX_OK \|\| status != ZX_OK) {
	FS_TRACE_ERROR("blobfs mkfs: Failed to allocate data blocks\n");
	return -1;
	}

	info.abm_slices = 1;
	info.ino_slices = 1;

	info.vslice_count = info.abm_slices + info.ino_slices + info.dat_slices +
	info.journal_slices + 1;

	info.inode_count = static_cast<uint32_t>(info.ino_slices * info.slice_size
	/ kBlobfsInodeSize);

	info.data_block_count = static_cast<uint32_t>(info.dat_slices * info.slice_size
	/ kBlobfsBlockSize);
	info.journal_block_count = static_cast<uint32_t>(info.journal_slices * info.slice_size
	/ kBlobfsBlockSize);
	}
	#endif

	FS_TRACE_DEBUG("Blobfs Mkfs\n");
	FS_TRACE_DEBUG("Disk size : %" PRIu64 "\n", block_count * kBlobfsBlockSize);
	FS_TRACE_DEBUG("Block Size : %u\n", kBlobfsBlockSize);
	FS_TRACE_DEBUG("Block Count: %" PRIu64 "\n", TotalBlocks(info));
	FS_TRACE_DEBUG("Inode Count: %" PRIu64 "\n", inodes);
	FS_TRACE_DEBUG("FVM-aware: %s\n", (info.flags & kBlobFlagFVM) ? "YES" : "NO");

	if (info.data_block_count < kMinimumDataBlocks) {
	FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum data partition\n");
	return -1;
	}

	if (info.journal_block_count < kMinimumJournalBlocks) {
	FS_TRACE_ERROR("blobfs mkfs: Not enough space for minimum journal partition\n");
	return -1;
	}

	// Determine the number of blocks necessary for the block map and node map.
	uint64_t bbm_blocks = BlockMapBlocks(info);
	uint64_t nbm_blocks = NodeMapBlocks(info);

	RawBitmap abm;
	if (abm.Reset(bbm_blocks * kBlobfsBlockBits)) {
	FS_TRACE_ERROR("Couldn't allocate blobfs block map\n");
	return -1;
	} else if (abm.Shrink(info.data_block_count)) {
	FS_TRACE_ERROR("Couldn't shrink blobfs block map\n");
	return -1;
	}

	// Reserve first \|kStartBlockMinimum\| data blocks
	abm.Set(0, kStartBlockMinimum);
	info.alloc_block_count += kStartBlockMinimum;

	if (info.inode_count * sizeof(Inode) != nbm_blocks * kBlobfsBlockSize) {
	FS_TRACE_ERROR("For simplicity, inode table block must be entirely filled\n");
	return -1;
	}

	// All in-memory structures have been created successfully. Dump everything to disk.
	char block[kBlobfsBlockSize];
	memset(block, 0, sizeof(block));

	JournalInfo* journal_info = reinterpret_cast<JournalInfo*>(block);
	journal_info->magic = kJournalMagic;
	if ((status = writeblk(fd, JournalStartBlock(info), block)) != ZX_OK) {
	FS_TRACE_ERROR("Failed to write journal block\n");
	return status;
	}

	// write the root block to disk
	memset(block, 0, sizeof(journal_info));
	memcpy(block, &info, sizeof(info));
	if ((status = writeblk(fd, 0, block)) != ZX_OK) {
	FS_TRACE_ERROR("Failed to write root block\n");
	return status;
	}

	// write allocation bitmap to disk
	for (uint64_t n = 0; n < bbm_blocks; n++) {
	void* bmdata = GetRawBitmapData(abm, n);
	if ((status = writeblk(fd, BlockMapStartBlock(info) + n, bmdata)) < 0) {
	FS_TRACE_ERROR("Failed to write blockmap block %" PRIu64 "\n", n);
	return status;
	}
	}

	// write node map to disk
	for (uint64_t n = 0; n < nbm_blocks; n++) {
	memset(block, 0, sizeof(block));
	if (writeblk(fd, NodeMapStartBlock(info) + n, block)) {
	FS_TRACE_ERROR("blobfs: failed writing inode map\n");
	return ZX_ERR_IO;
	}
	}

	FS_TRACE_DEBUG("BLOBFS: mkfs success\n");
	return 0;
	}

	} // namespace blobfs