blob: b1096db614b815e3852489b622e7edc4049e12ca [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <threads.h>
#include <sys/param.h>
#include <algorithm>
#include <limits>
#include <new>
#include <ddk/binding.h>
#include <ddk/device.h>
#include <ddk/driver.h>
#include <ddk/metadata.h>
#include <ddktl/device.h>
#include <ddktl/protocol/block.h>
#include <ddktl/protocol/block/partition.h>
#include <ddktl/protocol/block/volume.h>
#include <fbl/auto_lock.h>
#include <fbl/mutex.h>
#include <lib/zx/fifo.h>
#include <lib/zx/vmo.h>
#include <zircon/boot/image.h>
#include <zircon/device/block.h>
#include <zircon/process.h>
#include <zircon/thread_annotations.h>
#include "server.h"
#include "server-manager.h"
class BlockDevice;
using BlockDeviceType = ddk::Device<BlockDevice,
ddk::GetProtocolable,
ddk::Ioctlable,
ddk::Unbindable,
ddk::Readable,
ddk::Writable,
ddk::GetSizable>;
class BlockDevice : public BlockDeviceType,
public ddk::BlockProtocol<BlockDevice, ddk::base_protocol> {
public:
BlockDevice(zx_device_t* parent)
: BlockDeviceType(parent),
parent_protocol_(parent),
parent_partition_protocol_(parent),
parent_volume_protocol_(parent) {
block_protocol_t self { &block_protocol_ops_, this };
self_protocol_ = ddk::BlockProtocolClient(&self);
};
static zx_status_t Bind(void* ctx, zx_device_t* dev);
void DdkUnbind();
void DdkRelease();
zx_status_t DdkGetProtocol(uint32_t proto_id, void* out_protocol);
zx_status_t DdkIoctl(uint32_t op, const void* cmd, size_t cmd_len,
void* reply, size_t reply_len, size_t* out_actual);
zx_status_t DdkRead(void* buf, size_t buf_len, zx_off_t off, size_t* actual);
zx_status_t DdkWrite(const void* buf, size_t buf_len, zx_off_t off, size_t* actual);
zx_off_t DdkGetSize();
void BlockQuery(block_info_t* block_info, size_t* op_size);
void BlockQueue(block_op_t* op, block_impl_queue_callback completion_cb, void* cookie);
zx_status_t GetStats(const void* cmd, size_t cmd_len, void* reply, size_t reply_len,
size_t* out_actual);
private:
static int ServerThread(void* arg);
zx_status_t GetFifos(zx_handle_t* out_buf, size_t out_len, size_t* out_actual);
zx_status_t AttachVmo(const void* in_buf, size_t in_len, vmoid_t* out_buf,
size_t out_len, size_t* out_actual);
zx_status_t Rebind();
zx_status_t DoIo(void* buf, size_t buf_len, zx_off_t off, bool write);
// The block protocol of the device we are binding against.
ddk::BlockImplProtocolClient parent_protocol_;
// An optional partition protocol, if supported by the parent device.
ddk::BlockPartitionProtocolClient parent_partition_protocol_;
// An optional volume protocol, if supported by the parent device.
ddk::BlockVolumeProtocolClient parent_volume_protocol_;
// The block protocol for ourselves, which redirects to the parent protocol,
// but may also collect auxiliary information like statistics.
ddk::BlockProtocolClient self_protocol_;
block_info_t info_ = {};
size_t block_op_size_ = 0;
// True if we have metadata for a ZBI partition map.
bool has_bootpart_ = false;
// Manages the background FIFO server.
ServerManager server_manager_;
fbl::Mutex io_lock_;
zx::vmo io_vmo_ TA_GUARDED(io_lock_);
zx_status_t io_status_ = ZX_OK;
sync_completion_t io_signal_;
std::unique_ptr<uint8_t[]> io_op_;
fbl::Mutex stat_lock_;
// TODO(kmerrick) have this start as false and create IOCTL to toggle it.
bool enable_stats_ TA_GUARDED(stat_lock_) = true;
block_stats_t stats_ TA_GUARDED(stat_lock_) = {};
};
zx_status_t BlockDevice::GetFifos(zx_handle_t* out_buf, size_t out_len, size_t* out_actual) {
if (out_len < sizeof(zx_handle_t)) {
return ZX_ERR_INVALID_ARGS;
}
zx::fifo fifo;
zx_status_t status = server_manager_.StartServer(&self_protocol_, &fifo);
if (status != ZX_OK) {
return status;
}
*out_buf = fifo.release();
*out_actual = sizeof(zx_handle_t);
return ZX_OK;
}
zx_status_t BlockDevice::AttachVmo(const void* in_buf, size_t in_len, vmoid_t* out_buf,
size_t out_len, size_t* out_actual) {
if ((in_len < sizeof(zx_handle_t)) || (out_len < sizeof(vmoid_t))) {
return ZX_ERR_INVALID_ARGS;
}
zx::vmo vmo(*reinterpret_cast<const zx_handle_t*>(in_buf));
zx_status_t status = server_manager_.AttachVmo(std::move(vmo),
reinterpret_cast<vmoid_t*>(out_buf));
if (status != ZX_OK) {
return status;
}
*out_actual = sizeof(vmoid_t);
return ZX_OK;
}
zx_status_t BlockDevice::Rebind() {
// remove our existing children, ask to bind new children
return device_rebind(zxdev());
}
zx_status_t BlockDevice::DdkGetProtocol(uint32_t proto_id, void* out_protocol) {
switch (proto_id) {
case ZX_PROTOCOL_BLOCK: {
self_protocol_.GetProto(static_cast<block_protocol_t*>(out_protocol));
return ZX_OK;
}
case ZX_PROTOCOL_BLOCK_PARTITION: {
if (!parent_partition_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
parent_partition_protocol_.GetProto(static_cast<block_partition_protocol_t*>(out_protocol));
return ZX_OK;
}
case ZX_PROTOCOL_BLOCK_VOLUME: {
if (!parent_volume_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
parent_volume_protocol_.GetProto(static_cast<block_volume_protocol_t*>(out_protocol));
return ZX_OK;
}
default:
return ZX_ERR_NOT_SUPPORTED;
}
}
zx_status_t BlockDevice::DdkIoctl(uint32_t op, const void* cmd, size_t cmd_len, void* reply,
size_t reply_len, size_t* out_actual) {
switch (op) {
case IOCTL_BLOCK_GET_FIFOS:
return GetFifos(reinterpret_cast<zx_handle_t*>(reply), reply_len, out_actual);
case IOCTL_BLOCK_ATTACH_VMO:
return AttachVmo(cmd, cmd_len, reinterpret_cast<vmoid_t*>(reply), reply_len, out_actual);
case IOCTL_BLOCK_FIFO_CLOSE: {
return server_manager_.CloseFifoServer();
}
case IOCTL_BLOCK_RR_PART:
return Rebind();
case IOCTL_BLOCK_GET_INFO: {
block_info_t* info = static_cast<block_info_t*>(reply);
if (reply_len < sizeof(*info)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
size_t block_op_size = 0;
parent_protocol_.Query(info, &block_op_size);
// set or clear BLOCK_FLAG_BOOTPART appropriately
if (has_bootpart_) {
info->flags |= BLOCK_FLAG_BOOTPART;
} else {
info->flags &= ~BLOCK_FLAG_BOOTPART;
}
*out_actual = sizeof(block_info_t);
return ZX_OK;
}
case IOCTL_BLOCK_GET_STATS: {
return GetStats(cmd, cmd_len, reply, reply_len, out_actual);
}
case IOCTL_BLOCK_GET_TYPE_GUID: {
if (!parent_partition_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
guid_t* guid = static_cast<guid_t*>(reply);
if (reply_len < GUID_LENGTH) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
zx_status_t status = parent_partition_protocol_.GetGuid(GUIDTYPE_TYPE, guid);
if (status != ZX_OK) {
return status;
}
*out_actual = GUID_LENGTH;
return ZX_OK;
}
case IOCTL_BLOCK_GET_PARTITION_GUID: {
if (!parent_partition_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
guid_t* guid = static_cast<guid_t*>(reply);
if (reply_len < GUID_LENGTH) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
zx_status_t status = parent_partition_protocol_.GetGuid(GUIDTYPE_INSTANCE, guid);
if (status != ZX_OK) {
return status;
}
*out_actual = GUID_LENGTH;
return ZX_OK;
}
case IOCTL_BLOCK_GET_NAME: {
if (!parent_partition_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
char* name = static_cast<char*>(reply);
zx_status_t status = parent_partition_protocol_.GetName(name, reply_len);
if (status != ZX_OK) {
return status;
}
*out_actual = strlen(name);
return status;
}
case IOCTL_BLOCK_FVM_EXTEND: {
if (!parent_volume_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
if (cmd_len < sizeof(extend_request_t)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
auto request = static_cast<const extend_request_t*>(cmd);
slice_extent_t extent;
extent.offset = request->offset;
extent.length = request->length;
return parent_volume_protocol_.Extend(&extent);
}
case IOCTL_BLOCK_FVM_SHRINK: {
if (!parent_volume_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
if (cmd_len < sizeof(extend_request_t)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
auto request = static_cast<const extend_request_t*>(cmd);
slice_extent_t extent;
extent.offset = request->offset;
extent.length = request->length;
return parent_volume_protocol_.Shrink(&extent);
}
case IOCTL_BLOCK_FVM_QUERY: {
if (!parent_volume_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
if (reply_len < sizeof(fvm_info_t)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
auto info = static_cast<parent_volume_info_t*>(reply);
zx_status_t status = parent_volume_protocol_.Query(info);
if (status != ZX_OK) {
return status;
}
*out_actual = sizeof(*info);
return ZX_OK;
}
case IOCTL_BLOCK_FVM_VSLICE_QUERY: {
if (!parent_volume_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
if (cmd_len < sizeof(query_request_t)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
if (reply_len < sizeof(query_response_t)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
auto request = static_cast<const query_request_t*>(cmd);
if (request->count > MAX_FVM_VSLICE_REQUESTS) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
static_assert(sizeof(vslice_range_t) == sizeof(slice_region_t), "Size mismatch");
auto response = static_cast<query_response_t*>(reply);
response->count = 0;
slice_region_t* out_regions = reinterpret_cast<slice_region_t*>(response->vslice_range);
zx_status_t status = parent_volume_protocol_.QuerySlices(request->vslice_start,
request->count,
out_regions,
MAX_FVM_VSLICE_REQUESTS,
&response->count);
if (status != ZX_OK) {
return status;
}
*out_actual = sizeof(query_response_t);
return ZX_OK;
}
case IOCTL_BLOCK_FVM_DESTROY_PARTITION:
if (!parent_volume_protocol_.is_valid()) {
return ZX_ERR_NOT_SUPPORTED;
}
return parent_volume_protocol_.Destroy();
default:
return ZX_ERR_NOT_SUPPORTED;
}
}
// Adapter from read/write to block_op_t
// This is technically incorrect because the read/write hooks should not block,
// but the old adapter in devhost was *also* blocking, so we're no worse off
// than before, but now localized to the block middle layer.
// TODO(swetland) plumbing in devhosts to do deferred replies
// Define the maximum I/O possible for the midlayer; this is arbitrarily
// set to the size of RIO's max payload.
//
// If a smaller value of "max_transfer_size" is defined, that will
// be used instead.
constexpr uint32_t kMaxMidlayerIO = 8192;
zx_status_t BlockDevice::DoIo(void* buf, size_t buf_len, zx_off_t off, bool write) {
fbl::AutoLock lock(&io_lock_);
const size_t block_size = info_.block_size;
const size_t max_xfer = std::min(info_.max_transfer_size, kMaxMidlayerIO);
if (buf_len == 0) {
return ZX_OK;
}
if ((buf_len % block_size) || (off % block_size)) {
return ZX_ERR_INVALID_ARGS;
}
if (!io_vmo_) {
if (zx::vmo::create(std::max(max_xfer, static_cast<size_t>(PAGE_SIZE)),
0, &io_vmo_) != ZX_OK) {
return ZX_ERR_INTERNAL;
}
}
// TODO(smklein): These requests can be queued simultaneously without
// blocking. However, as the comment above mentions, this code probably
// shouldn't be blocking at all.
uint64_t sub_txn_offset = 0;
while (sub_txn_offset < buf_len) {
void* sub_buf = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(buf) + sub_txn_offset);
size_t sub_txn_length = std::min(buf_len - sub_txn_offset, max_xfer);
if (write) {
if (io_vmo_.write(sub_buf, 0, sub_txn_length) != ZX_OK) {
return ZX_ERR_INTERNAL;
}
}
block_op_t* op = reinterpret_cast<block_op_t*>(io_op_.get());
op->command = write ? BLOCK_OP_WRITE : BLOCK_OP_READ;
ZX_DEBUG_ASSERT(sub_txn_length / block_size < std::numeric_limits<uint32_t>::max());
op->rw.length = static_cast<uint32_t>(sub_txn_length / block_size);
op->rw.vmo = io_vmo_.get();
op->rw.offset_dev = (off + sub_txn_offset) / block_size;
op->rw.offset_vmo = 0;
sync_completion_reset(&io_signal_);
auto completion_cb = [](void* cookie, zx_status_t status, block_op_t* op) {
BlockDevice* bdev = reinterpret_cast<BlockDevice*>(cookie);
bdev->io_status_ = status;
sync_completion_signal(&bdev->io_signal_);
};
BlockQueue(op, completion_cb, this);
sync_completion_wait(&io_signal_, ZX_TIME_INFINITE);
if (io_status_ != ZX_OK) {
return io_status_;
}
if (!write) {
if (io_vmo_.read(sub_buf, 0, sub_txn_length) != ZX_OK) {
return ZX_ERR_INTERNAL;
}
}
sub_txn_offset += sub_txn_length;
}
return io_status_;
}
zx_status_t BlockDevice::DdkRead(void* buf, size_t buf_len, zx_off_t off, size_t* actual) {
zx_status_t status = DoIo(buf, buf_len, off, false);
*actual = (status == ZX_OK) ? buf_len : 0;
return status;
}
zx_status_t BlockDevice::DdkWrite(const void* buf, size_t buf_len, zx_off_t off, size_t* actual) {
zx_status_t status = DoIo(const_cast<void*>(buf), buf_len, off, true);
*actual = (status == ZX_OK) ? buf_len : 0;
return status;
}
zx_off_t BlockDevice::DdkGetSize() {
return device_get_size(parent());
}
void BlockDevice::DdkUnbind() {
DdkRemove();
}
void BlockDevice::DdkRelease() {
delete this;
}
void BlockDevice::BlockQuery(block_info_t* block_info, size_t* op_size) {
// It is important that all devices sitting on top of the volume protocol avoid
// caching a copy of block info for query. The "block_count" field is dynamic,
// and may change during the lifetime of the volume.
parent_protocol_.Query(block_info, op_size);
}
void BlockDevice::BlockQueue(block_op_t* op, block_impl_queue_callback completion_cb,
void* cookie) {
uint64_t command = op->command & BLOCK_OP_MASK;
{
fbl::AutoLock lock(&stat_lock_);
stats_.total_ops++;
if (command == BLOCK_OP_READ) {
stats_.total_reads++;
stats_.total_blocks_read += op->rw.length;
stats_.total_blocks += op->rw.length;
} else if (command == BLOCK_OP_WRITE) {
stats_.total_writes++;
stats_.total_blocks_written += op->rw.length;
stats_.total_blocks += op->rw.length;
}
}
parent_protocol_.Queue(op, completion_cb, cookie);
}
zx_status_t BlockDevice::GetStats(const void* cmd, size_t cmd_len, void* reply,
size_t reply_len, size_t* out_actual) {
if (cmd_len != sizeof(bool)) {
return ZX_ERR_INVALID_ARGS;
}
block_stats_t* out = reinterpret_cast<block_stats_t*>(reply);
if (reply_len < sizeof(*out)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
fbl::AutoLock lock(&stat_lock_);
if (enable_stats_) {
out->total_ops = stats_.total_ops;
out->total_blocks = stats_.total_blocks;
out->total_reads = stats_.total_reads;
out->total_blocks_read = stats_.total_blocks_read;
out->total_writes = stats_.total_writes;
out->total_blocks_written = stats_.total_blocks_written;
bool clear = *(bool*)cmd;
if (clear) {
stats_.total_ops = 0;
stats_.total_blocks = 0;
stats_.total_reads = 0;
stats_.total_blocks_read = 0;
stats_.total_writes = 0;
stats_.total_blocks_written = 0;
}
*out_actual = sizeof(*out);
return ZX_OK;
} else {
return ZX_ERR_NOT_SUPPORTED;
}
}
zx_status_t BlockDevice::Bind(void* ctx, zx_device_t* dev) {
auto bdev = std::make_unique<BlockDevice>(dev);
// The Block Implementation Protocol is required.
if (!bdev->parent_protocol_.is_valid()) {
printf("ERROR: block device '%s': does not support block protocol\n",
device_get_name(dev));
return ZX_ERR_NOT_SUPPORTED;
}
bdev->parent_protocol_.Query(&bdev->info_, &bdev->block_op_size_);
if (bdev->info_.max_transfer_size < bdev->info_.block_size) {
printf("ERROR: block device '%s': has smaller max xfer (0x%x) than block size (0x%x)\n",
device_get_name(dev), bdev->info_.max_transfer_size, bdev->info_.block_size);
return ZX_ERR_NOT_SUPPORTED;
}
zx_status_t status;
bdev->io_op_ = std::make_unique<uint8_t[]>(bdev->block_op_size_);
size_t block_size = bdev->info_.block_size;
if ((block_size < 512) || (block_size & (block_size - 1))) {
printf("block: device '%s': invalid block size: %zu\n",
device_get_name(dev), block_size);
return ZX_ERR_NOT_SUPPORTED;
}
// check to see if we have a ZBI partition map
// and set BLOCK_FLAG_BOOTPART accordingly
uint8_t buffer[METADATA_PARTITION_MAP_MAX];
size_t actual;
status = device_get_metadata(dev, DEVICE_METADATA_PARTITION_MAP, buffer, sizeof(buffer),
&actual);
if (status == ZX_OK && actual >= sizeof(zbi_partition_map_t)) {
bdev->has_bootpart_ = true;
}
// We implement |ZX_PROTOCOL_BLOCK|, not |ZX_PROTOCOL_BLOCK_IMPL|. This is the
// "core driver" protocol for block device drivers.
status = bdev->DdkAdd("block");
if (status != ZX_OK) {
return status;
}
// The device has been added; we'll release it in blkdev_release.
__UNUSED auto r = bdev.release();
return ZX_OK;
}
static constexpr zx_driver_ops_t block_driver_ops = []() {
zx_driver_ops_t ops = {};
ops.version = DRIVER_OPS_VERSION;
ops.bind = &BlockDevice::Bind;
return ops;
}();
ZIRCON_DRIVER_BEGIN(block, block_driver_ops, "zircon", "0.1", 1)
BI_MATCH_IF(EQ, BIND_PROTOCOL, ZX_PROTOCOL_BLOCK_IMPL),
ZIRCON_DRIVER_END(block)