blob: ae7ad748cf8d7cb98e64bed4bb37b3e8796339d2 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <limits>
#include <memory>
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/param.h>
#include <threads.h>
#include <ddk/binding.h>
#include <ddk/device.h>
#include <ddk/driver.h>
#include <ddk/protocol/block.h>
#include <ddk/protocol/block/partition.h>
#include <fbl/auto_lock.h>
#include <fbl/mutex.h>
#include <lib/fzl/owned-vmo-mapper.h>
#include <lib/sync/completion.h>
#include <lib/zx/vmo.h>
#include <zircon/assert.h>
#include <zircon/boot/image.h>
#include <zircon/device/block.h>
#include <zircon/device/ramdisk.h>
#include <zircon/listnode.h>
#include <zircon/process.h>
#include <zircon/syscalls.h>
#include <zircon/thread_annotations.h>
#include <zircon/types.h>
namespace {
constexpr uint64_t kMaxTransferSize = 1LLU << 19;
typedef struct {
zx_device_t* zxdev;
} ramctl_device_t;
typedef struct ramdisk_device {
zx_device_t* zxdev;
fzl::OwnedVmoMapper mapping;
uint64_t block_size;
uint64_t block_count;
uint8_t type_guid[ZBI_PARTITION_GUID_LEN];
// |signal| identifies when the worker thread should stop sleeping.
// This may occur when the device:
// - Is unbound,
// - Received a message on a queue,
// - Has |asleep| set to false.
sync_completion_t signal;
// Guards fields of the ramdisk which may be accessed concurrently
// from a background worker thread.
fbl::Mutex lock_;
list_node_t txn_list TA_GUARDED(lock_);
list_node_t deferred_list TA_GUARDED(lock_);
// Identifies if the device has been unbound.
bool dead TA_GUARDED(lock_);
// Flags modified by RAMDISK_SET_FLAGS.
//
// Supported flags:
// - RAMDISK_FLAG_RESUME_ON_WAKE: This flag identifies if requests which are
// sent to the ramdisk while it is considered "alseep" should be processed
// when the ramdisk wakes up. This is implemented by utilizing a "deferred
// list" of requests, which are immediately re-issued on wakeup.
uint32_t flags TA_GUARDED(lock_);
// True if the ramdisk is "sleeping", and deferring all upcoming requests,
// or dropping them if |RAMDISK_FLAG_RESUME_ON_WAKE| is not set.
bool asleep TA_GUARDED(lock_);
// The number of blocks-to-be-written that should be processed.
// When this reaches zero, the ramdisk will set |asleep| to true.
uint64_t pre_sleep_write_block_count TA_GUARDED(lock_);
ramdisk_blk_counts_t block_counts TA_GUARDED(lock_);
thrd_t worker;
char name[ZBI_PARTITION_NAME_LEN];
} ramdisk_device_t;
typedef struct {
block_op_t op;
list_node_t node;
block_impl_queue_callback completion_cb;
void* cookie;
} ramdisk_txn_t;
// The worker thread processes messages from iotxns in the background
int worker_thread(void* arg) {
zx_status_t status = ZX_OK;
ramdisk_device_t* dev = (ramdisk_device_t*)arg;
ramdisk_txn_t* txn = nullptr;
bool dead, asleep, defer;
uint64_t blocks = 0;
for (;;) {
for (;;) {
{
fbl::AutoLock lock(&dev->lock_);
txn = nullptr;
dead = dev->dead;
asleep = dev->asleep;
defer = (dev->flags & RAMDISK_FLAG_RESUME_ON_WAKE) != 0;
blocks = dev->pre_sleep_write_block_count;
if (!asleep) {
// If we are awake, try grabbing pending transactions from the deferred list.
txn = list_remove_head_type(&dev->deferred_list, ramdisk_txn_t, node);
}
if (txn == nullptr) {
// If no transactions were available in the deferred list (or we are asleep),
// grab one from the regular txn_list.
txn = list_remove_head_type(&dev->txn_list, ramdisk_txn_t, node);
}
}
if (dead) {
goto goodbye;
}
if (txn == nullptr) {
sync_completion_wait(&dev->signal, ZX_TIME_INFINITE);
} else {
sync_completion_reset(&dev->signal);
break;
}
}
uint64_t txn_blocks = txn->op.rw.length;
if (txn->op.command == BLOCK_OP_READ || blocks == 0 || blocks > txn_blocks) {
// If the ramdisk is not configured to sleep after x blocks, or the number of blocks in
// this transaction does not exceed the pre_sleep_write_block_count, or we are
// performing a read operation, use the current transaction length.
blocks = txn_blocks;
}
size_t length = blocks * dev->block_size;
size_t dev_offset = txn->op.rw.offset_dev * dev->block_size;
size_t vmo_offset = txn->op.rw.offset_vmo * dev->block_size;
void* addr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(dev->mapping.start()) +
dev_offset);
if (length > kMaxTransferSize) {
status = ZX_ERR_OUT_OF_RANGE;
} else if (txn->op.command == BLOCK_OP_READ) {
// A read operation should always succeed, even if the ramdisk is "asleep".
status = zx_vmo_write(txn->op.rw.vmo, addr, vmo_offset, length);
} else if (asleep) {
if (defer) {
// If we are asleep but resuming on wake, add txn to the deferred_list.
// deferred_list is only accessed by the worker_thread, so a lock is not needed.
list_add_tail(&dev->deferred_list, &txn->node);
continue;
} else {
status = ZX_ERR_UNAVAILABLE;
}
} else { // BLOCK_OP_WRITE
status = zx_vmo_read(txn->op.rw.vmo, addr, vmo_offset, length);
if (status == ZX_OK && blocks < txn->op.rw.length && defer) {
// If the first part of the transaction succeeded but the entire transaction is not
// complete, we need to address the remainder.
// If we are deferring after this block count, update the transaction to
// reflect the blocks that have already been written, and add it to the
// deferred queue.
ZX_DEBUG_ASSERT_MSG(blocks <= std::numeric_limits<uint32_t>::max(),
"Block count overflow");
txn->op.rw.length -= static_cast<uint32_t>(blocks);
txn->op.rw.offset_vmo += blocks;
txn->op.rw.offset_dev += blocks;
// Add the remaining blocks to the deferred list.
list_add_tail(&dev->deferred_list, &txn->node);
}
}
if (txn->op.command == BLOCK_OP_WRITE) {
{
// Update the ramdisk block counts. Since we aren't failing read transactions,
// only include write transaction counts.
fbl::AutoLock lock(&dev->lock_);
// Increment the count based on the result of the last transaction.
if (status == ZX_OK) {
dev->block_counts.successful += blocks;
if (blocks != txn_blocks && !defer) {
// If we are not deferring, then any excess blocks have failed.
dev->block_counts.failed += txn_blocks - blocks;
status = ZX_ERR_UNAVAILABLE;
}
} else {
dev->block_counts.failed += txn_blocks;
}
// Put the ramdisk to sleep if we have reached the required # of blocks.
if (dev->pre_sleep_write_block_count > 0) {
dev->pre_sleep_write_block_count -= blocks;
dev->asleep = (dev->pre_sleep_write_block_count == 0);
}
}
if (defer && blocks != txn_blocks && status == ZX_OK) {
// If we deferred partway through a transaction, hold off on returning the
// result until the remainder of the transaction is completed.
continue;
}
}
if (txn->completion_cb) {
txn->completion_cb(txn->cookie, status, &txn->op);
}
}
goodbye:
while (txn != nullptr) {
txn->completion_cb(txn->cookie, ZX_ERR_BAD_STATE, &txn->op);
txn = list_remove_head_type(&dev->deferred_list, ramdisk_txn_t, node);
if (txn == nullptr) {
fbl::AutoLock lock(&dev->lock_);
txn = list_remove_head_type(&dev->txn_list, ramdisk_txn_t, node);
}
}
return 0;
}
uint64_t sizebytes(ramdisk_device_t* rdev) {
return rdev->block_size * rdev->block_count;
}
// implement device protocol:
void ramdisk_unbind(void* ctx) {
ramdisk_device_t* ramdev = static_cast<ramdisk_device_t*>(ctx);
{
fbl::AutoLock lock(&ramdev->lock_);
ramdev->dead = true;
}
sync_completion_signal(&ramdev->signal);
device_remove(ramdev->zxdev);
}
zx_status_t ramdisk_ioctl(void* ctx, uint32_t op, const void* cmd, size_t cmd_len,
void* reply, size_t max, size_t* out_actual) {
ramdisk_device_t* ramdev = static_cast<ramdisk_device_t*>(ctx);
switch (op) {
case IOCTL_RAMDISK_UNLINK: {
ramdisk_unbind(ramdev);
return ZX_OK;
}
case IOCTL_RAMDISK_SET_FLAGS: {
if (cmd_len < sizeof(uint32_t)) {
return ZX_ERR_INVALID_ARGS;
}
const uint32_t flags = *static_cast<const uint32_t*>(cmd);
fbl::AutoLock lock(&ramdev->lock_);
ramdev->flags = flags;
return ZX_OK;
}
case IOCTL_RAMDISK_WAKE_UP: {
// Reset state and transaction counts
fbl::AutoLock lock(&ramdev->lock_);
ramdev->asleep = false;
memset(&ramdev->block_counts, 0, sizeof(ramdev->block_counts));
ramdev->pre_sleep_write_block_count = 0;
sync_completion_signal(&ramdev->signal);
return ZX_OK;
}
case IOCTL_RAMDISK_SLEEP_AFTER: {
if (cmd_len < sizeof(uint64_t)) {
return ZX_ERR_INVALID_ARGS;
}
const uint64_t block_count = *static_cast<const uint64_t*>(cmd);
fbl::AutoLock lock(&ramdev->lock_);
ramdev->asleep = false;
memset(&ramdev->block_counts, 0, sizeof(ramdev->block_counts));
ramdev->pre_sleep_write_block_count = block_count;
if (block_count == 0) {
ramdev->asleep = true;
}
return ZX_OK;
}
case IOCTL_RAMDISK_GET_BLK_COUNTS: {
if (max < sizeof(ramdisk_blk_counts_t)) {
return ZX_ERR_INVALID_ARGS;
}
fbl::AutoLock lock(&ramdev->lock_);
memcpy(reply, &ramdev->block_counts, sizeof(ramdisk_blk_counts_t));
*out_actual = sizeof(ramdisk_blk_counts_t);
return ZX_OK;
}
default:
return ZX_ERR_NOT_SUPPORTED;
}
}
void ramdisk_queue(void* ctx, block_op_t* bop, block_impl_queue_callback completion_cb,
void* cookie) {
ramdisk_device_t* ramdev = static_cast<ramdisk_device_t*>(ctx);
ramdisk_txn_t* txn = containerof(bop, ramdisk_txn_t, op);
bool dead;
bool read = false;
switch ((txn->op.command &= BLOCK_OP_MASK)) {
case BLOCK_OP_READ:
read = true;
__FALLTHROUGH;
case BLOCK_OP_WRITE:
if ((txn->op.rw.offset_dev >= ramdev->block_count) ||
((ramdev->block_count - txn->op.rw.offset_dev) < txn->op.rw.length)) {
completion_cb(cookie, ZX_ERR_OUT_OF_RANGE, bop);
return;
}
{
fbl::AutoLock lock(&ramdev->lock_);
if (!(dead = ramdev->dead)) {
if (!read) {
ramdev->block_counts.received += txn->op.rw.length;
}
txn->completion_cb = completion_cb;
txn->cookie = cookie;
list_add_tail(&ramdev->txn_list, &txn->node);
}
}
if (dead) {
completion_cb(cookie, ZX_ERR_BAD_STATE, bop);
} else {
sync_completion_signal(&ramdev->signal);
}
break;
case BLOCK_OP_FLUSH:
completion_cb(cookie, ZX_OK, bop);
break;
default:
completion_cb(cookie, ZX_ERR_NOT_SUPPORTED, bop);
break;
}
}
void ramdisk_query(void* ctx, block_info_t* info, size_t* bopsz) {
ramdisk_device_t* ramdev = static_cast<ramdisk_device_t*>(ctx);
memset(info, 0, sizeof(*info));
info->block_size = static_cast<uint32_t>(ramdev->block_size);
info->block_count = ramdev->block_count;
// Arbitrarily set, but matches the SATA driver for testing
info->max_transfer_size = kMaxTransferSize;
fbl::AutoLock lock(&ramdev->lock_);
info->flags = ramdev->flags;
*bopsz = sizeof(ramdisk_txn_t);
}
zx_off_t ramdisk_getsize(void* ctx) {
return sizebytes(static_cast<ramdisk_device_t*>(ctx));
}
void ramdisk_release(void* ctx) {
ramdisk_device_t* ramdev = static_cast<ramdisk_device_t*>(ctx);
// Wake up the worker thread, in case it is sleeping
sync_completion_signal(&ramdev->signal);
thrd_join(ramdev->worker, nullptr);
delete ramdev;
}
static block_impl_protocol_ops_t block_ops = {
.query = ramdisk_query,
.queue = ramdisk_queue,
};
static_assert(ZBI_PARTITION_GUID_LEN == GUID_LENGTH, "GUID length mismatch");
zx_status_t ramdisk_get_guid(void* ctx, guidtype_t guidtype, guid_t* out_guid) {
if (guidtype != GUIDTYPE_TYPE) {
return ZX_ERR_NOT_SUPPORTED;
}
ramdisk_device_t* device = static_cast<ramdisk_device_t*>(ctx);
memcpy(out_guid, device->type_guid, ZBI_PARTITION_GUID_LEN);
return ZX_OK;
}
static_assert(ZBI_PARTITION_NAME_LEN <= MAX_PARTITION_NAME_LENGTH, "Name length mismatch");
zx_status_t ramdisk_get_name(void* ctx, char* out_name, size_t capacity) {
if (capacity < ZBI_PARTITION_NAME_LEN) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
ramdisk_device_t* device = static_cast<ramdisk_device_t*>(ctx);
strlcpy(out_name, device->name, ZBI_PARTITION_NAME_LEN);
return ZX_OK;
}
static block_partition_protocol_ops_t partition_ops = {
.get_guid = ramdisk_get_guid,
.get_name = ramdisk_get_name,
};
zx_status_t ramdisk_get_protocol(void* ctx, uint32_t proto_id, void* out) {
ramdisk_device_t* device = static_cast<ramdisk_device_t*>(ctx);
switch (proto_id) {
case ZX_PROTOCOL_BLOCK_IMPL: {
block_impl_protocol_t* protocol = static_cast<block_impl_protocol_t*>(out);
protocol->ctx = device;
protocol->ops = &block_ops;
return ZX_OK;
}
case ZX_PROTOCOL_BLOCK_PARTITION: {
block_partition_protocol_t* protocol = static_cast<block_partition_protocol_t*>(out);
protocol->ctx = device;
protocol->ops = &partition_ops;
return ZX_OK;
}
default:
return ZX_ERR_NOT_SUPPORTED;
}
}
static zx_protocol_device_t ramdisk_instance_proto = []() {
zx_protocol_device_t protocol = {};
protocol.version = DEVICE_OPS_VERSION;
protocol.get_protocol = ramdisk_get_protocol;
protocol.unbind = ramdisk_unbind;
protocol.release = ramdisk_release;
protocol.get_size = ramdisk_getsize;
protocol.ioctl = ramdisk_ioctl;
return protocol;
}();
// implement device protocol:
static uint64_t ramdisk_count = 0;
constexpr size_t kMaxRamdiskNameLength = 32;
zx_status_t ramctl_config(ramctl_device_t* ramctl, zx::vmo vmo,
uint64_t block_size, uint64_t block_count,
uint8_t* type_guid, void* reply, size_t max,
size_t* out_actual) {
if (max < kMaxRamdiskNameLength) {
return ZX_ERR_INVALID_ARGS;
}
auto ramdev = std::make_unique<ramdisk_device_t>();
ramdev->block_size = block_size;
ramdev->block_count = block_count;
if (type_guid) {
memcpy(ramdev->type_guid, type_guid, ZBI_PARTITION_GUID_LEN);
} else {
memset(ramdev->type_guid, 0, ZBI_PARTITION_GUID_LEN);
}
snprintf(ramdev->name, sizeof(ramdev->name),
"ramdisk-%" PRIu64, ramdisk_count++);
zx_status_t status = ramdev->mapping.Map(std::move(vmo), sizebytes(ramdev.get()));
if (status != ZX_OK) {
return status;
}
list_initialize(&ramdev->txn_list);
list_initialize(&ramdev->deferred_list);
if (thrd_create(&ramdev->worker, worker_thread, ramdev.get()) != thrd_success) {
return ZX_ERR_NO_MEMORY;
}
device_add_args_t args;
args.version = DEVICE_ADD_ARGS_VERSION;
args.name = ramdev->name;
args.ctx = ramdev.get();
args.ops = &ramdisk_instance_proto;
args.props = nullptr;
args.prop_count = 0;
args.proto_id = ZX_PROTOCOL_BLOCK_IMPL;
args.proto_ops = &block_ops;
args.proxy_args = nullptr;
args.flags = 0;
char* name = static_cast<char*>(reply);
strcpy(name, ramdev->name);
size_t namelen = strlen(name);
if ((status = device_add(ramctl->zxdev, &args, &ramdev->zxdev)) != ZX_OK) {
ramdisk_release(ramdev.release());
return status;
}
__UNUSED auto ptr = ramdev.release();
*out_actual = namelen;
return ZX_OK;
}
zx_status_t ramctl_ioctl(void* ctx, uint32_t op, const void* cmd,
size_t cmdlen, void* reply, size_t max, size_t* out_actual) {
ramctl_device_t* ramctl = static_cast<ramctl_device_t*>(ctx);
switch (op) {
case IOCTL_RAMDISK_CONFIG: {
if (cmdlen != sizeof(ramdisk_ioctl_config_t)) {
return ZX_ERR_INVALID_ARGS;
}
ramdisk_ioctl_config_t* config = (ramdisk_ioctl_config_t*)cmd;
zx::vmo vmo;
zx_status_t status = zx::vmo::create( config->blk_size * config->blk_count, 0, &vmo);
if (status == ZX_OK) {
status = ramctl_config(ramctl, std::move(vmo),
config->blk_size, config->blk_count,
config->type_guid,
reply, max, out_actual);
}
return status;
}
case IOCTL_RAMDISK_CONFIG_VMO: {
if (cmdlen != sizeof(zx_handle_t)) {
return ZX_ERR_INVALID_ARGS;
}
zx::vmo vmo = zx::vmo(*reinterpret_cast<const zx_handle_t*>(cmd));
// Ensure this is the last handle to this VMO; otherwise, the size
// may change from underneath us.
zx_info_handle_count_t info;
zx_status_t status = vmo.get_info(ZX_INFO_HANDLE_COUNT, &info, sizeof(info), nullptr,
nullptr);
if (status != ZX_OK || info.handle_count != 1) {
return ZX_ERR_INVALID_ARGS;
}
uint64_t vmo_size;
status = vmo.get_size(&vmo_size);
if (status != ZX_OK) {
return status;
}
return ramctl_config(ramctl, std::move(vmo),
PAGE_SIZE, (vmo_size + PAGE_SIZE - 1) / PAGE_SIZE,
nullptr, reply, max, out_actual);
}
default:
return ZX_ERR_NOT_SUPPORTED;
}
}
zx_protocol_device_t ramdisk_ctl_proto = []() {
zx_protocol_device_t protocol = {};
protocol.version = DEVICE_OPS_VERSION;
protocol.ioctl = ramctl_ioctl;
return protocol;
}();
zx_status_t ramdisk_driver_bind(void* ctx, zx_device_t* parent) {
auto ramctl = std::make_unique<ramctl_device_t>();
device_add_args_t args = {};
args.version = DEVICE_ADD_ARGS_VERSION;
args.name = "ramctl";
args.ops = &ramdisk_ctl_proto;
args.ctx = ramctl.get();
zx_status_t status = device_add(parent, &args, &ramctl->zxdev);
if (status != ZX_OK) {
return status;
}
__UNUSED auto ptr = ramctl.release();
return ZX_OK;
}
static zx_driver_ops_t ramdisk_driver_ops = []() {
zx_driver_ops_t ops = {};
ops.version = DRIVER_OPS_VERSION;
ops.bind = ramdisk_driver_bind;
return ops;
}();
} // namespace
ZIRCON_DRIVER_BEGIN(ramdisk, ramdisk_driver_ops, "zircon", "0.1", 1)
BI_MATCH_IF(EQ, BIND_PROTOCOL, ZX_PROTOCOL_MISC_PARENT),
ZIRCON_DRIVER_END(ramdisk)