| // Copyright 2016 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "block.h" |
| |
| #include <ddk/debug.h> |
| #include <fbl/algorithm.h> |
| #include <fbl/auto_lock.h> |
| #include <inttypes.h> |
| #include <pretty/hexdump.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/param.h> |
| #include <zircon/compiler.h> |
| |
| #include "trace.h" |
| |
| #define LOCAL_TRACE 0 |
| |
| // 1MB max transfer (unless further restricted by ring size |
| #define MAX_SCATTER 257 |
| #define MAX_MAX_XFER ((MAX_SCATTER - 1) * PAGE_SIZE) |
| |
| #define PAGE_MASK (PAGE_SIZE - 1) |
| |
| namespace virtio { |
| |
| void BlockDevice::txn_complete(block_txn_t* txn, zx_status_t status) { |
| if (txn->pin_base != 0) { |
| bti_.unpin(txn->pin_base); |
| } |
| txn->op.completion_cb(&txn->op, status); |
| } |
| |
| // DDK level ops |
| |
| // optional: return the size (in bytes) of the readable/writable space |
| // of the device. Will default to 0 (non-seekable) if this is unimplemented |
| zx_off_t BlockDevice::virtio_block_get_size(void* ctx) { |
| LTRACEF("ctx %p\n", ctx); |
| |
| BlockDevice* bd = static_cast<BlockDevice*>(ctx); |
| |
| return bd->GetSize(); |
| } |
| |
| void BlockDevice::GetInfo(block_info_t* info) { |
| memset(info, 0, sizeof(*info)); |
| info->block_size = GetBlockSize(); |
| info->block_count = GetSize() / GetBlockSize(); |
| info->max_transfer_size = (uint32_t)(PAGE_SIZE * (ring_size - 2)); |
| |
| // limit max transfer to our worst case scatter list size |
| if (info->max_transfer_size > MAX_MAX_XFER) { |
| info->max_transfer_size = MAX_MAX_XFER; |
| } |
| } |
| |
| void BlockDevice::virtio_block_query(void* ctx, block_info_t* info, size_t* bopsz) { |
| BlockDevice* bd = static_cast<BlockDevice*>(ctx); |
| bd->GetInfo(info); |
| *bopsz = sizeof(block_txn_t); |
| } |
| |
| void BlockDevice::virtio_block_queue(void* ctx, block_op_t* bop) { |
| BlockDevice* bd = static_cast<BlockDevice*>(ctx); |
| block_txn_t* txn = static_cast<block_txn_t*>((void*) bop); |
| txn->pin_base = 0; |
| |
| switch(txn->op.command & BLOCK_OP_MASK) { |
| case BLOCK_OP_READ: |
| bd->QueueReadWriteTxn(txn, false); |
| break; |
| case BLOCK_OP_WRITE: |
| bd->QueueReadWriteTxn(txn, true); |
| break; |
| case BLOCK_OP_FLUSH: |
| //TODO: this should complete after any in-flight IO and before |
| // any later IO begins |
| bd->txn_complete(txn, ZX_OK); |
| break; |
| default: |
| bd->txn_complete(txn, ZX_ERR_NOT_SUPPORTED); |
| } |
| |
| } |
| |
| zx_status_t BlockDevice::virtio_block_ioctl(void* ctx, uint32_t op, const void* in_buf, size_t in_len, |
| void* reply, size_t max, size_t* out_actual) { |
| LTRACEF("ctx %p, op %u\n", ctx, op); |
| |
| BlockDevice* bd = static_cast<BlockDevice*>(ctx); |
| |
| switch (op) { |
| case IOCTL_BLOCK_GET_INFO: { |
| block_info_t* info = reinterpret_cast<block_info_t*>(reply); |
| if (max < sizeof(*info)) |
| return ZX_ERR_BUFFER_TOO_SMALL; |
| bd->GetInfo(info); |
| *out_actual = sizeof(*info); |
| return ZX_OK; |
| } |
| case IOCTL_DEVICE_SYNC: |
| return ZX_OK; |
| default: |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| } |
| |
| BlockDevice::BlockDevice(zx_device_t* bus_device, zx::bti bti, fbl::unique_ptr<Backend> backend) |
| : Device(bus_device, fbl::move(bti), fbl::move(backend)) { |
| completion_reset(&txn_signal_); |
| |
| memset(&blk_req_buf_, 0, sizeof(blk_req_buf_)); |
| } |
| |
| BlockDevice::~BlockDevice() { |
| io_buffer_release(&blk_req_buf_); |
| } |
| |
| zx_status_t BlockDevice::Init() { |
| LTRACE_ENTRY; |
| |
| // reset the device |
| DeviceReset(); |
| |
| // read our configuration |
| CopyDeviceConfig(&config_, sizeof(config_)); |
| // TODO(cja): The blk_size provided in the device configuration is only |
| // populated if a specific feature bit has been negotiated during |
| // initialization, otherwise it is 0, at least in Virtio 0.9.5. Use 512 |
| // as a default as a stopgap for now until proper feature negotiation |
| // is supported. |
| if (config_.blk_size == 0) |
| config_.blk_size = 512; |
| |
| LTRACEF("capacity %#" PRIx64 "\n", config_.capacity); |
| LTRACEF("size_max %#x\n", config_.size_max); |
| LTRACEF("seg_max %#x\n", config_.seg_max); |
| LTRACEF("blk_size %#x\n", config_.blk_size); |
| |
| // ack and set the driver status bit |
| DriverStatusAck(); |
| |
| // XXX check features bits and ack/nak them |
| |
| // allocate the main vring |
| auto err = vring_.Init(0, ring_size); |
| if (err < 0) { |
| zxlogf(ERROR, "failed to allocate vring\n"); |
| return err; |
| } |
| |
| // allocate a queue of block requests |
| size_t size = sizeof(virtio_blk_req_t) * blk_req_count + sizeof(uint8_t) * blk_req_count; |
| |
| zx_status_t status = io_buffer_init_with_bti(&blk_req_buf_, bti_.get(), size, |
| IO_BUFFER_RW | IO_BUFFER_CONTIG); |
| if (status != ZX_OK) { |
| zxlogf(ERROR, "cannot alloc blk_req buffers %d\n", status); |
| return status; |
| } |
| blk_req_ = static_cast<virtio_blk_req_t*>(io_buffer_virt(&blk_req_buf_)); |
| |
| LTRACEF("allocated blk request at %p, physical address %#" PRIxPTR "\n", blk_req_, |
| io_buffer_phys(&blk_req_buf_)); |
| |
| // responses are 32 words at the end of the allocated block |
| blk_res_pa_ = io_buffer_phys(&blk_req_buf_) + sizeof(virtio_blk_req_t) * blk_req_count; |
| blk_res_ = (uint8_t*)((uintptr_t)blk_req_ + sizeof(virtio_blk_req_t) * blk_req_count); |
| |
| LTRACEF("allocated blk responses at %p, physical address %#" PRIxPTR "\n", blk_res_, blk_res_pa_); |
| |
| // start the interrupt thread |
| StartIrqThread(); |
| |
| // set DRIVER_OK |
| DriverStatusOk(); |
| |
| // initialize the zx_device and publish us |
| // point the ctx of our DDK device at ourself |
| device_ops_.get_size = &virtio_block_get_size; |
| device_ops_.ioctl = &virtio_block_ioctl; |
| |
| block_ops_.query = &virtio_block_query; |
| block_ops_.queue = &virtio_block_queue; |
| |
| device_add_args_t args = {}; |
| args.version = DEVICE_ADD_ARGS_VERSION; |
| args.name = "virtio-block"; |
| args.ctx = this; |
| args.ops = &device_ops_; |
| args.proto_id = ZX_PROTOCOL_BLOCK_IMPL; |
| args.proto_ops = &block_ops_; |
| |
| status = device_add(bus_device_, &args, &device_); |
| if (status != ZX_OK) { |
| device_ = nullptr; |
| return status; |
| } |
| |
| return ZX_OK; |
| } |
| |
| void BlockDevice::IrqRingUpdate() { |
| LTRACE_ENTRY; |
| |
| // parse our descriptor chain, add back to the free queue |
| auto free_chain = [this](vring_used_elem* used_elem) { |
| uint32_t i = (uint16_t)used_elem->id; |
| struct vring_desc* desc = vring_.DescFromIndex((uint16_t)i); |
| auto head_desc = desc; // save the first element |
| { |
| fbl::AutoLock lock(&ring_lock_); |
| for (;;) { |
| int next; |
| LTRACE_DO(virtio_dump_desc(desc)); |
| if (desc->flags & VRING_DESC_F_NEXT) { |
| next = desc->next; |
| } else { |
| /* end of chain */ |
| next = -1; |
| } |
| |
| vring_.FreeDesc((uint16_t)i); |
| |
| if (next < 0) |
| break; |
| i = next; |
| desc = vring_.DescFromIndex((uint16_t)i); |
| } |
| } |
| |
| bool need_signal = false; |
| bool need_complete = false; |
| block_txn_t* txn = nullptr; |
| { |
| fbl::AutoLock lock(&txn_lock_); |
| |
| // search our pending txn list to see if this completes it |
| |
| list_for_every_entry (&txn_list_, txn, block_txn_t, node) { |
| if (txn->desc == head_desc) { |
| LTRACEF("completes txn %p\n", txn); |
| free_blk_req((unsigned int)txn->index); |
| list_delete(&txn->node); |
| |
| // we will do this outside of the lock |
| need_complete = true; |
| |
| // check to see if QueueTxn is waiting on |
| // resources becoming available |
| if ((need_signal = txn_wait_)) { |
| txn_wait_ = false; |
| } |
| break; |
| } |
| } |
| } |
| |
| if (need_signal) { |
| completion_signal(&txn_signal_); |
| } |
| if (need_complete) { |
| txn_complete(txn, ZX_OK); |
| } |
| }; |
| |
| // tell the ring to find free chains and hand it back to our lambda |
| vring_.IrqRingUpdate(free_chain); |
| } |
| |
| void BlockDevice::IrqConfigChange() { |
| LTRACE_ENTRY; |
| } |
| |
| zx_status_t BlockDevice::QueueTxn(block_txn_t* txn, bool write, size_t bytes, |
| uint64_t* pages, size_t pagecount, uint16_t* idx) { |
| |
| size_t index; |
| { |
| fbl::AutoLock lock(&txn_lock_); |
| index = alloc_blk_req(); |
| if (index >= blk_req_count) { |
| LTRACEF("too many block requests queued (%zu)!\n", index); |
| return ZX_ERR_NO_RESOURCES; |
| } |
| } |
| |
| auto req = &blk_req_[index]; |
| req->type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN; |
| req->ioprio = 0; |
| req->sector = txn->op.rw.offset_dev; |
| LTRACEF("blk_req type %u ioprio %u sector %" PRIu64 "\n", |
| req->type, req->ioprio, req->sector); |
| |
| // save the req index into the txn->extra[1] slot so we can free it when we complete the transfer |
| txn->index = index; |
| |
| #if LOCAL_TRACE |
| LTRACEF("phys %p, phys_count %#lx\n", txn->phys, txn->phys_count); |
| for (uint64_t i = 0; i < txn->phys_count; i++) { |
| LTRACEF("phys %lu: %#lx\n", i, txn->phys[i]); |
| } |
| #endif |
| |
| LTRACEF("page count %lu\n", pagecount); |
| assert(pagecount > 0); |
| |
| /* put together a transfer */ |
| uint16_t i; |
| vring_desc *desc; |
| { |
| fbl::AutoLock lock(&ring_lock_); |
| desc = vring_.AllocDescChain((uint16_t)(2u + pagecount), &i); |
| } |
| if (!desc) { |
| LTRACEF("failed to allocate descriptor chain of length %zu\n", 2u + pagecount); |
| fbl::AutoLock lock(&txn_lock_); |
| free_blk_req(index); |
| return ZX_ERR_NO_RESOURCES; |
| } |
| |
| LTRACEF("after alloc chain desc %p, i %u\n", desc, i); |
| |
| /* point the txn at this head descriptor */ |
| txn->desc = desc; |
| |
| /* set up the descriptor pointing to the head */ |
| desc->addr = io_buffer_phys(&blk_req_buf_) + index * sizeof(virtio_blk_req_t); |
| desc->len = sizeof(virtio_blk_req_t); |
| desc->flags = VRING_DESC_F_NEXT; |
| LTRACE_DO(virtio_dump_desc(desc)); |
| |
| for (size_t n = 0; n < pagecount; n++) { |
| desc = vring_.DescFromIndex(desc->next); |
| desc->addr = pages[n]; |
| desc->len = (uint32_t) ((bytes > PAGE_SIZE) ? PAGE_SIZE : bytes); |
| if (n == 0) { |
| // first entry may not be page aligned |
| size_t page0_offset = txn->op.rw.offset_vmo & PAGE_MASK; |
| |
| // adjust starting address |
| desc->addr += page0_offset; |
| |
| // trim length if necessary |
| size_t max = PAGE_SIZE - page0_offset; |
| if (desc->len > max) { |
| desc->len = (uint32_t) max; |
| } |
| } |
| desc->flags = VRING_DESC_F_NEXT; |
| LTRACEF("pa %#lx, len %#x\n", desc->addr, desc->len); |
| |
| if (!write) |
| desc->flags |= VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */ |
| |
| bytes -= desc->len; |
| } |
| LTRACE_DO(virtio_dump_desc(desc)); |
| assert(bytes == 0); |
| |
| /* set up the descriptor pointing to the response */ |
| desc = vring_.DescFromIndex(desc->next); |
| desc->addr = blk_res_pa_ + index; |
| desc->len = 1; |
| desc->flags = VRING_DESC_F_WRITE; |
| LTRACE_DO(virtio_dump_desc(desc)); |
| |
| *idx = i; |
| return ZX_OK; |
| } |
| |
| void BlockDevice::QueueReadWriteTxn(block_txn_t* txn, bool write) { |
| LTRACEF("txn %p, command %#x\n", txn, txn->op.command); |
| |
| fbl::AutoLock lock(&lock_); |
| |
| txn->op.rw.offset_vmo *= config_.blk_size; |
| |
| // transaction must fit within device |
| if ((txn->op.rw.offset_dev >= config_.capacity) || |
| (config_.capacity - txn->op.rw.offset_dev < txn->op.rw.length)) { |
| LTRACEF("request beyond the end of the device!\n"); |
| txn_complete(txn, ZX_ERR_OUT_OF_RANGE); |
| return; |
| } |
| |
| if (txn->op.rw.length == 0) { |
| txn_complete(txn, ZX_OK); |
| return; |
| } |
| |
| size_t bytes = txn->op.rw.length * config_.blk_size; |
| |
| uint64_t suboffset = txn->op.rw.offset_vmo & PAGE_MASK; |
| uint64_t aligned_offset = txn->op.rw.offset_vmo & ~PAGE_MASK; |
| size_t pin_size = ROUNDUP(suboffset + bytes, PAGE_SIZE); |
| size_t num_pages = pin_size / PAGE_SIZE; |
| if (num_pages > MAX_SCATTER) { |
| TRACEF("virtio: transaction too large\n"); |
| txn_complete(txn, ZX_ERR_INVALID_ARGS); |
| return; |
| } |
| |
| zx_handle_t vmo = txn->op.rw.vmo; |
| uint64_t pages[MAX_SCATTER]; |
| zx_status_t r; |
| if ((r = zx_bti_pin(bti_.get(), ZX_BTI_PERM_READ | ZX_BTI_PERM_WRITE, vmo, |
| aligned_offset, pin_size, pages, num_pages)) != ZX_OK) { |
| TRACEF("virtio: could not pin pages\n"); |
| txn_complete(txn, ZX_ERR_INTERNAL); |
| return; |
| } |
| |
| txn->pin_base = pages[0]; |
| pages[0] += suboffset; |
| |
| bool cannot_fail = false; |
| |
| for (;;) { |
| uint16_t idx; |
| |
| // attempt to setup hw txn |
| zx_status_t status = QueueTxn(txn, write, bytes, pages, num_pages, &idx); |
| if (status == ZX_OK) { |
| fbl::AutoLock lock(&txn_lock_); |
| |
| // save the txn in a list |
| list_add_tail(&txn_list_, &txn->node); |
| |
| /* submit the transfer */ |
| vring_.SubmitChain(idx); |
| |
| /* kick it off */ |
| vring_.Kick(); |
| |
| return; |
| } else { |
| if (cannot_fail) { |
| printf("virtio-block: failed to queue txn to hw: %d\n", status); |
| txn_complete(txn, status); |
| return; |
| } |
| |
| fbl::AutoLock lock(&txn_lock_); |
| |
| if (list_is_empty(&txn_list_)) { |
| // we hold the queue lock and the list is empty |
| // if we fail this time around, no point in trying again |
| cannot_fail = true; |
| continue; |
| } else { |
| // let the completer know we need to wake up |
| txn_wait_ = true; |
| } |
| } |
| |
| completion_wait(&txn_signal_, ZX_TIME_INFINITE); |
| completion_reset(&txn_signal_); |
| } |
| } |
| |
| } // namespace virtio |