| // Copyright 2016 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/devices/block/drivers/virtio/block.h" |
| |
| #include <fidl/fuchsia.hardware.block.volume/cpp/fidl.h> |
| #include <fuchsia/hardware/block/c/banjo.h> |
| #include <fuchsia/hardware/block/driver/c/banjo.h> |
| #include <inttypes.h> |
| #include <lib/fit/defer.h> |
| #include <lib/virtio/driver_utils.h> |
| #include <lib/zircon-internal/align.h> |
| #include <lib/zx/clock.h> |
| #include <lib/zx/time.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/param.h> |
| #include <zircon/compiler.h> |
| |
| #include <algorithm> |
| #include <cstddef> |
| #include <cstdint> |
| #include <memory> |
| #include <mutex> |
| #include <utility> |
| |
| #include <fbl/algorithm.h> |
| |
| #include "src/devices/block/lib/common/include/common.h" |
| #include "src/storage/lib/block_server/block_server.h" |
| |
| #define LOCAL_TRACE 0 |
| |
| namespace virtio { |
| |
| namespace { |
| |
| // Cache some page size calculations that are used frequently. |
| const uint32_t kPageSize = zx_system_get_page_size(); |
| const uint32_t kPageMask = kPageSize - 1; |
| const uint32_t kMaxMaxXfer = (MAX_SCATTER - 1) * kPageSize; |
| |
| // See 5.2.2 in the virtio spec |
| const uint16_t kVirtioBlkRequestQueueIndex = 0; |
| |
| uint32_t VirtioRequestType(const block_txn_t& txn) { |
| switch (txn.op.command.opcode) { |
| case BLOCK_OPCODE_READ: |
| return VIRTIO_BLK_T_IN; |
| case BLOCK_OPCODE_WRITE: |
| return VIRTIO_BLK_T_OUT; |
| case BLOCK_OPCODE_TRIM: |
| return VIRTIO_BLK_T_DISCARD; |
| case BLOCK_OPCODE_FLUSH: |
| return VIRTIO_BLK_T_FLUSH; |
| default: |
| // Maintain legacy behaviour of defaulting to a read. Opcodes are untrusted. |
| return VIRTIO_BLK_T_IN; |
| } |
| } |
| |
| uint32_t VirtioRequestType(const block_server::Request& request) { |
| switch (request.operation.tag) { |
| case block_server::Operation::Tag::Read: |
| return VIRTIO_BLK_T_IN; |
| case block_server::Operation::Tag::Write: |
| return VIRTIO_BLK_T_OUT; |
| case block_server::Operation::Tag::Trim: |
| return VIRTIO_BLK_T_DISCARD; |
| case block_server::Operation::Tag::Flush: |
| return VIRTIO_BLK_T_FLUSH; |
| case block_server::Operation::Tag::CloseVmo: |
| __UNREACHABLE; |
| } |
| } |
| |
| } // namespace |
| |
| void BlockDevice::CompleteTxn(block_txn_t* transaction, zx_status_t status) { |
| FDF_LOGL(TRACE, logger(), "Complete txn %p %s", transaction, zx_status_get_string(status)); |
| if (transaction->pmt != ZX_HANDLE_INVALID) { |
| zx_pmt_unpin(transaction->pmt); |
| transaction->pmt = ZX_HANDLE_INVALID; |
| } |
| { |
| std::lock_guard<std::mutex> lock(watchdog_lock_); |
| blk_req_start_timestamps_[transaction->req_index] = zx::time::infinite(); |
| } |
| // Save the request ID before we release the transaction's resources, because for block server |
| // requests, the req_index is used to reserve a slot in our pool for txn, so once we free that, |
| // txn could be reused. |
| std::optional<uint64_t> request_id = transaction->request; |
| { |
| std::lock_guard<std::mutex> lock(txn_lock_); |
| // NB: req_index might be invalid (>= kBlkReqCount) if transaction comes from Banjo but never |
| // even made it as far as allocating a req_index. That's tolerated by FreeBlkReqLocked. |
| FreeBlkReqLocked(transaction->req_index); |
| if (transaction->discard_req_index) { |
| FreeBlkReqLocked(*transaction->discard_req_index); |
| } |
| list_delete(&transaction->node); |
| } |
| txn_cond_.notify_all(); |
| if (transaction->completion_cb) { |
| transaction->completion_cb(transaction->cookie, status, &transaction->op); |
| } else { |
| ZX_DEBUG_ASSERT(request_id); |
| std::lock_guard lock(block_server_lock_); |
| if (block_server_) { |
| block_server_->SendReply(*request_id, zx::make_result(status)); |
| } |
| } |
| } |
| |
| uint32_t BlockDevice::GetMaxTransferSize() const { |
| const uint32_t max_transfer_size = static_cast<uint32_t>(kPageSize * (kRingSize - 2)); |
| // Limit max transfer to our worst case scatter list size. |
| return std::min(max_transfer_size, kMaxMaxXfer); |
| } |
| |
| flag_t BlockDevice::GetFlags() const { |
| flag_t flags = 0; |
| if (supports_discard_) { |
| flags |= FLAG_TRIM_SUPPORT; |
| } |
| return flags; |
| } |
| |
| void BlockDevice::BlockImplQuery(block_info_t* info, size_t* bopsz) { |
| memset(info, 0, sizeof(*info)); |
| info->block_size = GetBlockSize(); |
| info->block_count = GetBlockCount(); |
| info->max_transfer_size = GetMaxTransferSize(); |
| info->flags = GetFlags(); |
| *bopsz = sizeof(block_txn_t); |
| } |
| |
| void BlockDevice::BlockImplQueue(block_op_t* bop, block_impl_queue_callback completion_cb, |
| void* cookie) { |
| block_txn_t* txn = static_cast<block_txn_t*>((void*)bop); |
| txn->completion_cb = completion_cb; |
| txn->cookie = cookie; |
| switch (txn->op.command.opcode) { |
| case BLOCK_OPCODE_READ: |
| case BLOCK_OPCODE_WRITE: |
| case BLOCK_OPCODE_TRIM: |
| if (zx_status_t status = block::CheckIoRange(txn->op.rw, config_.capacity, logger()); |
| status != ZX_OK) { |
| completion_cb(cookie, status, bop); |
| return; |
| } |
| if (txn->op.command.flags & BLOCK_IO_FLAG_FORCE_ACCESS || |
| txn->op.command.flags & BLOCK_IO_FLAG_PRE_BARRIER) { |
| completion_cb(cookie, ZX_ERR_NOT_SUPPORTED, bop); |
| return; |
| } |
| FDF_LOG(TRACE, "txn %p, opcode %#x\n", txn, txn->op.command.opcode); |
| break; |
| case BLOCK_OPCODE_FLUSH: |
| FDF_LOG(TRACE, "txn %p, opcode FLUSH\n", txn); |
| break; |
| default: |
| completion_cb(cookie, ZX_ERR_NOT_SUPPORTED, bop); |
| return; |
| } |
| SignalWorker(txn); |
| } |
| |
| void BlockDevice::StartThread(block_server::Thread thread) { |
| if (auto server_dispatcher = fdf::SynchronizedDispatcher::Create( |
| fdf::SynchronizedDispatcher::Options::kAllowSyncCalls, "Virtio Block Server", |
| [&](fdf_dispatcher_t* dispatcher) { fdf_dispatcher_destroy(dispatcher); }); |
| server_dispatcher.is_ok()) { |
| async::PostTask(server_dispatcher->async_dispatcher(), |
| [thread = std::move(thread)]() mutable { thread.Run(); }); |
| |
| // The dispatcher is destroyed in the shutdown handler. |
| server_dispatcher->release(); |
| } |
| } |
| |
| void BlockDevice::OnNewSession(block_server::Session session) { |
| if (auto server_dispatcher = fdf::SynchronizedDispatcher::Create( |
| fdf::SynchronizedDispatcher::Options::kAllowSyncCalls, "Block Server Session", |
| [&](fdf_dispatcher_t* dispatcher) { fdf_dispatcher_destroy(dispatcher); }); |
| server_dispatcher.is_ok()) { |
| async::PostTask(server_dispatcher->async_dispatcher(), |
| [session = std::move(session)]() mutable { session.Run(); }); |
| |
| // The dispatcher is destroyed in the shutdown handler. |
| server_dispatcher->release(); |
| } |
| } |
| |
| void BlockDevice::OnRequests(std::span<block_server::Request> requests) { |
| for (auto& request : requests) { |
| if (zx_status_t status = block_server::CheckIoRange(request, config_.capacity); |
| status != ZX_OK) { |
| FDF_LOGL(WARNING, logger(), "Invalid request range."); |
| std::lock_guard lock(block_server_lock_); |
| if (block_server_) { |
| block_server_->SendReply(request.request_id, zx::make_result(status)); |
| } |
| continue; |
| } |
| if (request.operation.tag == block_server::Operation::Tag::Write && |
| request.operation.write.options.flags.is_pre_barrier() && !supports_barriers_) { |
| zx::result status = FlushSync(request); |
| if (status.is_error()) { |
| FDF_LOGL(WARNING, logger(), "FlushSync failed: %s", status.status_string()); |
| std::lock_guard lock(block_server_lock_); |
| if (block_server_) { |
| block_server_->SendReply(request.request_id, status.take_error()); |
| } |
| continue; |
| } |
| } |
| zx::result status = SubmitBlockServerRequest(request); |
| if (status.is_error()) { |
| FDF_LOGL(WARNING, logger(), "Failed to submit request: %s", status.status_string()); |
| std::lock_guard lock(block_server_lock_); |
| if (block_server_) { |
| block_server_->SendReply(request.request_id, status.take_error()); |
| } |
| } |
| } |
| } |
| |
| void BlockDevice::ServeRequests(fidl::ServerEnd<fuchsia_hardware_block_volume::Volume> server_end) { |
| std::lock_guard lock(block_server_lock_); |
| if (block_server_) { |
| block_server_->Serve(std::move(server_end)); |
| } |
| } |
| |
| BlockDevice::BlockDevice(zx::bti bti, std::unique_ptr<Backend> backend, fdf::Logger& logger) |
| : virtio::Device(std::move(bti), std::move(backend)), logger_(logger) { |
| sync_completion_reset(&worker_signal_); |
| for (auto& time : blk_req_start_timestamps_) { |
| time = zx::time::infinite(); |
| } |
| } |
| |
| zx_status_t BlockDevice::Init() { |
| DeviceReset(); |
| CopyDeviceConfig(&config_, sizeof(config_)); |
| |
| // TODO(cja): The blk_size provided in the device configuration is only |
| // populated if a specific feature bit has been negotiated during |
| // initialization, otherwise it is 0, at least in Virtio 0.9.5. Use 512 |
| // as a default as a stopgap for now until proper feature negotiation |
| // is supported. |
| if (config_.blk_size == 0) |
| config_.blk_size = 512; |
| |
| FDF_LOG(DEBUG, "capacity %#" PRIx64 "", config_.capacity); |
| FDF_LOG(DEBUG, "size_max %#x", config_.size_max); |
| FDF_LOG(DEBUG, "seg_max %#x", config_.seg_max); |
| FDF_LOG(DEBUG, "blk_size %#x", config_.blk_size); |
| |
| DriverStatusAck(); |
| |
| uint64_t features = DeviceFeaturesSupported(); |
| if (!(features & VIRTIO_F_VERSION_1)) { |
| // Declaring non-support until there is a need in the future. |
| FDF_LOG(ERROR, "Legacy virtio interface is not supported by this driver"); |
| return ZX_ERR_NOT_SUPPORTED; |
| } |
| if (features & VIRTIO_BLK_F_DISCARD) { |
| FDF_LOG(INFO, "virtio device supports discard"); |
| supports_discard_ = true; |
| } |
| |
| if (features & VIRTIO_BLK_F_BARRIER) { |
| FDF_LOG(INFO, "virtio device supports barriers"); |
| supports_barriers_ = true; |
| } |
| |
| features &= (VIRTIO_F_VERSION_1 | VIRTIO_BLK_F_DISCARD); |
| DriverFeaturesAck(features); |
| if (zx_status_t status = DeviceStatusFeaturesOk(); status != ZX_OK) { |
| FDF_LOG(ERROR, "Feature negotiation failed: %s", zx_status_get_string(status)); |
| return status; |
| } |
| |
| // Allocate the main vring. |
| auto err = vring_.Init(kVirtioBlkRequestQueueIndex, kRingSize); |
| if (err < 0) { |
| FDF_LOG(ERROR, "failed to allocate vring"); |
| return err; |
| } |
| |
| // Allocate a queue of block requests. |
| size_t size = sizeof(virtio_blk_req_t) * kBlkReqCount + sizeof(uint8_t) * kBlkReqCount; |
| |
| auto buffer_factory = dma_buffer::CreateBufferFactory(); |
| const size_t buffer_size = fbl::round_up(size, zx_system_get_page_size()); |
| zx_status_t status = buffer_factory->CreateContiguous(bti_, buffer_size, 0, true, &blk_req_buf_); |
| if (status != ZX_OK) { |
| FDF_LOG(ERROR, "cannot alloc blk_req buffers: %s", zx_status_get_string(status)); |
| return status; |
| } |
| blk_req_ = static_cast<virtio_blk_req_t*>(blk_req_buf_->virt()); |
| |
| FDF_LOG(TRACE, "allocated blk request at %p, physical address %#" PRIxPTR "", blk_req_, |
| blk_req_buf_->phys()); |
| |
| // Responses are 32 words at the end of the allocated block. |
| blk_res_pa_ = blk_req_buf_->phys() + sizeof(virtio_blk_req_t) * kBlkReqCount; |
| blk_res_ = reinterpret_cast<uint8_t*>( |
| (reinterpret_cast<uintptr_t>(blk_req_) + sizeof(virtio_blk_req_t) * kBlkReqCount)); |
| |
| FDF_LOG(TRACE, "allocated blk responses at %p, physical address %#" PRIxPTR "", blk_res_, |
| blk_res_pa_); |
| |
| StartIrqThread(); |
| DriverStatusOk(); |
| |
| { |
| std::lock_guard lock(block_server_lock_); |
| block_server_.emplace( |
| block_server::PartitionInfo{ |
| .device_flags = GetFlags(), |
| .block_count = GetBlockCount(), |
| .block_size = GetBlockSize(), |
| .max_transfer_size = GetMaxTransferSize(), |
| }, |
| this); |
| } |
| |
| auto worker_thread_entry = [](void* ctx) { |
| auto bd = static_cast<BlockDevice*>(ctx); |
| bd->WorkerThread(); |
| return ZX_OK; |
| }; |
| int ret = |
| thrd_create_with_name(&worker_thread_, worker_thread_entry, this, "virtio-block-worker"); |
| if (ret != thrd_success) { |
| return ZX_ERR_INTERNAL; |
| } |
| |
| auto watchdog_thread_entry = [](void* ctx) { |
| auto bd = static_cast<BlockDevice*>(ctx); |
| bd->WatchdogThread(); |
| return ZX_OK; |
| }; |
| ret = thrd_create_with_name(&watchdog_thread_, watchdog_thread_entry, this, |
| "virtio-block-watchdog"); |
| if (ret != thrd_success) { |
| return ZX_ERR_INTERNAL; |
| } |
| return ZX_OK; |
| } |
| |
| void BlockDevice::Release() { |
| watchdog_shutdown_.store(true); |
| sync_completion_signal(&watchdog_signal_); |
| thrd_join(watchdog_thread_, nullptr); |
| |
| { |
| std::lock_guard lock(block_server_lock_); |
| block_server_.reset(); |
| } |
| worker_shutdown_.store(true); |
| sync_completion_signal(&worker_signal_); |
| txn_cond_.notify_all(); |
| thrd_join(worker_thread_, nullptr); |
| virtio::Device::Release(); |
| } |
| |
| struct vring_desc* BlockDevice::FreeDescChainLocked(uint16_t index) { |
| struct vring_desc* desc = vring_.DescFromIndex(index); |
| auto head_desc = desc; // Save the first element. |
| { |
| for (;;) { |
| std::optional<uint16_t> next; |
| if (fdf::Logger::GlobalInstance()->GetSeverity() <= FUCHSIA_LOG_TRACE) { |
| virtio_dump_desc(desc); |
| } |
| if (desc->flags & VRING_DESC_F_NEXT) { |
| next = desc->next; |
| } |
| |
| vring_.FreeDesc(index); |
| |
| if (!next) { |
| // End of chain |
| break; |
| } |
| index = *next; |
| desc = vring_.DescFromIndex(index); |
| } |
| } |
| return head_desc; |
| } |
| |
| void BlockDevice::IrqRingUpdate() { |
| // Parse our descriptor chain and add back to the free queue. |
| auto free_chain = [this](vring_used_elem* used_elem) { |
| std::optional<uint8_t> status; |
| block_txn_t* txn = nullptr; |
| { |
| std::lock_guard<std::mutex> lock(txn_lock_); |
| struct vring_desc* head_desc; |
| { |
| std::lock_guard<std::mutex> lock2(ring_lock_); |
| head_desc = FreeDescChainLocked(static_cast<uint16_t>(used_elem->id)); |
| } |
| |
| // Search our pending txn list to see if this completes it. |
| list_for_every_entry (&pending_txn_list_, txn, block_txn_t, node) { |
| if (txn->desc == head_desc) { |
| FDF_LOG(TRACE, "completes txn %p", txn); |
| status = blk_res_[txn->req_index]; |
| // NB: We can't free the transaction's resources until we complete it, because the |
| // req_index is used to allocate requests out of the pool. |
| break; |
| } |
| } |
| } |
| |
| if (status) { |
| zx_status_t zx_status = ZX_ERR_IO; |
| switch (*status) { |
| case VIRTIO_BLK_S_OK: |
| zx_status = ZX_OK; |
| break; |
| case VIRTIO_BLK_S_IOERR: |
| break; |
| case VIRTIO_BLK_S_UNSUPP: |
| zx_status = ZX_ERR_NOT_SUPPORTED; |
| } |
| CompleteTxn(txn, zx_status); |
| } |
| }; |
| |
| // Tell the ring to find free chains and hand it back to our lambda. |
| vring_.IrqRingUpdate(free_chain); |
| } |
| |
| void BlockDevice::IrqConfigChange() {} |
| |
| void BlockDevice::QueueTxn(block_txn_t* txn, RequestContext context) { |
| ZX_DEBUG_ASSERT(txn); |
| |
| uint32_t type = VirtioRequestType(*txn); |
| txn->req_index = context.req_index(); |
| txn->discard_req_index = context.discard_req_index(); |
| txn->desc = context.desc(); |
| txn->pmt = context.pmt(); |
| |
| auto req = &blk_req_[txn->req_index]; |
| req->type = type; |
| req->ioprio = 0; |
| |
| // If the device does not support barriers, we issue a flush before the write instead. |
| if (txn->op.rw.command.flags & BLOCK_IO_FLAG_PRE_BARRIER && supports_barriers_) { |
| req->type |= VIRTIO_BLK_T_BARRIER; |
| } |
| |
| if (req->type == VIRTIO_BLK_T_FLUSH) { |
| req->sector = 0; |
| } else { |
| req->sector = txn->op.rw.offset_dev; |
| } |
| FDF_LOG(TRACE, "blk_req type %u ioprio %u sector %" PRIu64 "", req->type, req->ioprio, |
| req->sector); |
| |
| if (type == VIRTIO_BLK_T_DISCARD) { |
| // NOTE: if we decide to later send multiple virtio_blk_discard_write_zeroes at once, we must |
| // respect the max_discard_seg configuration of the device. |
| static_assert(sizeof(virtio_blk_discard_write_zeroes_t) <= sizeof(virtio_blk_req_t)); |
| virtio_blk_discard_write_zeroes_t* req = |
| reinterpret_cast<virtio_blk_discard_write_zeroes_t*>(&blk_req_[*txn->discard_req_index]); |
| req->sector = txn->op.trim.offset_dev; |
| req->num_sectors = txn->op.trim.length; |
| req->flags = 0; |
| FDF_LOG(TRACE, "blk_dwz_req sector %" PRIu64 " num_sectors %" PRIu32, req->sector, |
| req->num_sectors); |
| } |
| FDF_LOG(TRACE, "page count %lu", context.num_pages()); |
| |
| // Set up the head descriptor. |
| struct vring_desc* desc = context.desc(); |
| desc->addr = blk_req_buf_->phys() + txn->req_index * sizeof(virtio_blk_req_t); |
| desc->len = sizeof(virtio_blk_req_t); |
| desc->flags = VRING_DESC_F_NEXT; |
| if (fdf::Logger::GlobalInstance()->GetSeverity() <= FUCHSIA_LOG_TRACE) { |
| virtio_dump_desc(txn->desc); |
| } |
| |
| size_t bytes = type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_OUT |
| ? txn->op.rw.length * config_.blk_size |
| : 0; |
| for (size_t n = 0; n < context.num_pages(); n++) { |
| desc = vring_.DescFromIndex(desc->next); |
| desc->addr = context.pages()[n]; // |pages| are all page-aligned addresses. |
| desc->len = static_cast<uint32_t>((bytes > kPageSize) ? kPageSize : bytes); |
| if (n == 0) { |
| // First entry may not be page aligned. |
| size_t page0_offset = (txn->op.rw.offset_vmo * config_.blk_size) & kPageMask; |
| |
| // Adjust starting address. |
| desc->addr += page0_offset; |
| |
| // Trim length if necessary. |
| size_t max = kPageSize - page0_offset; |
| if (desc->len > max) { |
| desc->len = static_cast<uint32_t>(max); |
| } |
| } |
| desc->flags = VRING_DESC_F_NEXT; |
| FDF_LOG(TRACE, "pa %#lx, len %#x", desc->addr, desc->len); |
| |
| // Mark buffer as write-only if its a block read. |
| if (type == VIRTIO_BLK_T_IN) { |
| desc->flags |= VRING_DESC_F_WRITE; |
| } |
| |
| bytes -= desc->len; |
| } |
| assert(bytes == 0); |
| |
| if (type == VIRTIO_BLK_T_DISCARD) { |
| desc = vring_.DescFromIndex(desc->next); |
| desc->addr = blk_req_buf_->phys() + *context.discard_req_index() * sizeof(virtio_blk_req_t); |
| desc->len = sizeof(virtio_blk_discard_write_zeroes_t); |
| desc->flags = VRING_DESC_F_NEXT; |
| if (fdf::Logger::GlobalInstance()->GetSeverity() <= FUCHSIA_LOG_TRACE) { |
| virtio_dump_desc(desc); |
| } |
| } |
| |
| // Set up the descriptor pointing to the response. |
| desc = vring_.DescFromIndex(desc->next); |
| desc->addr = blk_res_pa_ + context.req_index(); |
| desc->len = 1; |
| desc->flags = VRING_DESC_F_WRITE; |
| if (fdf::Logger::GlobalInstance()->GetSeverity() <= FUCHSIA_LOG_TRACE) { |
| virtio_dump_desc(desc); |
| } |
| |
| { |
| std::lock_guard<std::mutex> lock(watchdog_lock_); |
| blk_req_start_timestamps_[context.req_index()] = zx::clock::get_monotonic(); |
| } |
| |
| std::lock_guard<std::mutex> lock(txn_lock_); |
| list_add_tail(&pending_txn_list_, &txn->node); |
| vring_.SubmitChain(context.desc_index()); |
| vring_.Kick(); |
| FDF_LOG(TRACE, "Submitted txn %p (desc %u)", txn, context.desc_index()); |
| context.Release(); |
| } |
| |
| zx::result<zx_handle_t> BlockDevice::PinPages(zx_handle_t bti, zx_handle_t vmo, |
| uint64_t vmo_offset_blocks, uint32_t num_blocks, |
| std::array<zx_paddr_t, MAX_SCATTER>* pages, |
| size_t* num_pages) const { |
| uint64_t suboffset = (vmo_offset_blocks * config_.blk_size) & kPageMask; |
| uint64_t aligned_offset = (vmo_offset_blocks * config_.blk_size) & ~kPageMask; |
| size_t pin_size = |
| ZX_ROUNDUP(suboffset + (static_cast<uint64_t>(num_blocks * config_.blk_size)), kPageSize); |
| *num_pages = pin_size / kPageSize; |
| if (*num_pages > pages->size()) { |
| FDF_LOG(ERROR, "transaction too large"); |
| return zx::error(ZX_ERR_INVALID_ARGS); |
| } |
| |
| zx_handle_t pmt; |
| zx_status_t status = zx_bti_pin(bti, ZX_BTI_PERM_READ | ZX_BTI_PERM_WRITE, vmo, aligned_offset, |
| pin_size, pages->data(), *num_pages, &pmt); |
| if (status != ZX_OK) { |
| FDF_LOG(ERROR, "could not pin pages: %s", zx_status_get_string(status)); |
| return zx::error(status); |
| } |
| |
| return zx::ok(pmt); |
| } |
| |
| zx::result<> BlockDevice::FlushSync(const block_server::Request& request) { |
| struct Context { |
| zx_status_t status; |
| sync_completion_t completion; |
| } cookie_context; |
| |
| zx::result<RequestContext> request_context = AllocateRequestContext( |
| VIRTIO_BLK_T_FLUSH, request.vmo->get(), request.operation.read.vmo_offset / config_.blk_size, |
| request.operation.read.block_count, nullptr); |
| if (request_context.is_error()) { |
| return request_context.take_error(); |
| } |
| block_txn_t* txn = &block_server_request_pool_[request_context->req_index()]; |
| |
| txn->request = request.request_id; |
| txn->completion_cb = +[](void* cookie, zx_status_t status, block_op_t* op) { |
| Context* context = reinterpret_cast<Context*>(cookie); |
| context->status = status; |
| sync_completion_signal(&context->completion); |
| }; |
| txn->cookie = &cookie_context; |
| txn->op.command.opcode = BLOCK_OPCODE_FLUSH; |
| |
| FlushPendingTxns(); |
| if (worker_shutdown_.load()) { |
| std::lock_guard lock1(txn_lock_); |
| std::lock_guard lock2(ring_lock_); |
| FreeRequestContext(*request_context); |
| return zx::error(ZX_ERR_IO_NOT_PRESENT); |
| } |
| |
| QueueTxn(txn, std::move(*request_context)); |
| |
| sync_completion_wait(&cookie_context.completion, ZX_TIME_INFINITE); |
| |
| return zx::make_result(cookie_context.status); |
| } |
| |
| zx::result<> BlockDevice::SubmitBlockServerRequest(const block_server::Request& request) { |
| uint32_t type = VirtioRequestType(request); |
| std::array<zx_paddr_t, MAX_SCATTER> pages; |
| zx::result<RequestContext> context = AllocateRequestContext( |
| type, request.vmo->get(), request.operation.read.vmo_offset / config_.blk_size, |
| request.operation.read.block_count, &pages); |
| if (context.is_error()) { |
| return context.take_error(); |
| } |
| block_txn_t* txn = &block_server_request_pool_[context->req_index()]; |
| |
| txn->request = request.request_id; |
| txn->completion_cb = nullptr; // Not used for block_server requests |
| |
| switch (request.operation.tag) { |
| case block_server::Operation::Tag::Read: |
| txn->op.rw.command.opcode = BLOCK_OPCODE_READ; |
| txn->op.rw.vmo = request.vmo->get(); |
| txn->op.rw.length = request.operation.read.block_count; |
| txn->op.rw.offset_dev = request.operation.read.device_block_offset; |
| txn->op.rw.offset_vmo = request.operation.read.vmo_offset / config_.blk_size; |
| break; |
| case block_server::Operation::Tag::Write: |
| txn->op.rw.command.opcode = BLOCK_OPCODE_WRITE; |
| if (request.operation.write.options.flags.is_force_access()) { |
| txn->op.rw.command.flags |= BLOCK_IO_FLAG_FORCE_ACCESS; |
| } |
| if (request.operation.write.options.flags.is_pre_barrier()) { |
| txn->op.rw.command.flags |= BLOCK_IO_FLAG_PRE_BARRIER; |
| } |
| txn->op.rw.vmo = request.vmo->get(); |
| txn->op.rw.length = request.operation.write.block_count; |
| txn->op.rw.offset_dev = request.operation.write.device_block_offset; |
| txn->op.rw.offset_vmo = request.operation.write.vmo_offset / config_.blk_size; |
| break; |
| case block_server::Operation::Tag::Trim: |
| txn->op.trim.command.opcode = BLOCK_OPCODE_TRIM; |
| txn->op.trim.length = request.operation.trim.block_count; |
| txn->op.trim.offset_dev = request.operation.trim.device_block_offset; |
| break; |
| case block_server::Operation::Tag::Flush: |
| txn->op.command.opcode = BLOCK_OPCODE_FLUSH; |
| break; |
| case block_server::Operation::Tag::CloseVmo: |
| // The rust block server will never send CloseVmo to its C interface. |
| __UNREACHABLE; |
| } |
| |
| // A flush operation should complete after any in-flight transactions, so wait for all |
| // pending txns to complete before submitting a flush txn. This is necessary because a virtio |
| // block device may service requests in any order. |
| if (type == VIRTIO_BLK_T_FLUSH) { |
| FlushPendingTxns(); |
| if (worker_shutdown_.load()) { |
| std::lock_guard lock1(txn_lock_); |
| std::lock_guard lock2(ring_lock_); |
| FreeRequestContext(*context); |
| return zx::error(ZX_ERR_IO_NOT_PRESENT); |
| } |
| } |
| |
| QueueTxn(txn, std::move(*context)); |
| |
| // A flush operation should complete before any subsequent transactions. So, we wait |
| // for all pending transactions (including the flush) to complete before continuing. |
| if (type == VIRTIO_BLK_T_FLUSH) { |
| FlushPendingTxns(); |
| } |
| |
| return zx::ok(); |
| } |
| |
| BlockDevice::RequestContext::RequestContext(BlockDevice::RequestContext&& other) { |
| *this = std::move(other); |
| } |
| |
| BlockDevice::RequestContext& BlockDevice::RequestContext::operator=(RequestContext&& other) { |
| if (this != &other) { |
| released_ = other.released_; |
| req_index_ = other.req_index_; |
| discard_req_index_ = other.discard_req_index_; |
| desc_index_ = other.desc_index_; |
| desc_ = other.desc_; |
| pages_ = other.pages_; |
| num_pages_ = other.num_pages_; |
| pmt_ = other.pmt_; |
| other.Release(); |
| } |
| return *this; |
| } |
| |
| BlockDevice::RequestContext::~RequestContext() { |
| // RAII-style cleanup isn't a good option, because the destructor would need to take locks (see |
| // BlockDevice::FreeRequestContext), which could cause difficult-to-spot deadlocks. |
| ZX_ASSERT_MSG(released_, "Did you forget to call Release/FreeRequestContext?"); |
| } |
| |
| void BlockDevice::FreeRequestContext(BlockDevice::RequestContext& context) { |
| if (context.pmt() != ZX_HANDLE_INVALID) { |
| zx_pmt_unpin(context.pmt()); |
| } |
| if (context.desc() != nullptr) { |
| FreeDescChainLocked(context.desc_index()); |
| } |
| if (context.req_index() < kBlkReqCount) { |
| FreeBlkReqLocked(context.req_index()); |
| if (context.discard_req_index()) { |
| FreeBlkReqLocked(*context.discard_req_index()); |
| } |
| } |
| context.Release(); |
| txn_cond_.notify_all(); |
| } |
| |
| void BlockDevice::RequestContext::Release() { |
| ZX_ASSERT_MSG(!released_, "Release called twice"); |
| released_ = true; |
| } |
| |
| void BlockDevice::SignalWorker(block_txn_t* txn) { |
| std::lock_guard lock(lock_); |
| if (worker_shutdown_.load()) { |
| CompleteTxn(txn, ZX_ERR_IO_NOT_PRESENT); |
| return; |
| } |
| list_add_tail(&worker_txn_list_, &txn->node); |
| sync_completion_signal(&worker_signal_); |
| } |
| |
| void BlockDevice::WorkerThread() { |
| auto cleanup = fit::defer([this]() { CleanupPendingTxns(); }); |
| block_txn_t* txn = nullptr; |
| for (;;) { |
| if (worker_shutdown_.load()) { |
| return; |
| } |
| |
| // Pull a txn off the list or wait to be signaled. |
| { |
| std::lock_guard lock(lock_); |
| txn = list_remove_head_type(&worker_txn_list_, block_txn_t, node); |
| } |
| if (!txn) { |
| sync_completion_wait(&worker_signal_, ZX_TIME_INFINITE); |
| sync_completion_reset(&worker_signal_); |
| continue; |
| } |
| |
| FDF_LOG(TRACE, "WorkerThread handling txn %p", txn); |
| if (zx::result result = SubmitBanjoRequest(txn); result.is_error()) { |
| if (txn->completion_cb) { |
| txn->completion_cb(txn->cookie, result.status_value(), &txn->op); |
| } |
| } |
| } |
| } |
| |
| zx::result<> BlockDevice::SubmitBanjoRequest(block_txn_t* transaction) { |
| uint32_t type = VirtioRequestType(*transaction); |
| std::array<zx_paddr_t, MAX_SCATTER> pages; |
| zx::result<RequestContext> context = |
| AllocateRequestContext(type, transaction->op.rw.vmo, transaction->op.rw.offset_vmo, |
| transaction->op.rw.length, &pages); |
| if (context.is_error()) { |
| FDF_LOG(ERROR, "failed to queue txn to hw: %s", context.status_string()); |
| return context.take_error(); |
| } |
| |
| // A flush operation should complete after any in-flight transactions, so wait for all pending |
| // txns to complete before submitting a flush txn. This is necessary because a virtio block |
| // device may service requests in any order. |
| if (type == VIRTIO_BLK_T_FLUSH) { |
| FlushPendingTxns(); |
| if (worker_shutdown_.load()) { |
| std::lock_guard lock1(txn_lock_); |
| std::lock_guard lock2(ring_lock_); |
| FreeRequestContext(*context); |
| return zx::error(ZX_ERR_IO_NOT_PRESENT); |
| } |
| } |
| |
| QueueTxn(transaction, std::move(*context)); |
| |
| // A flush operation should complete before any subsequent transactions. So, we wait for all |
| // pending transactions (including the flush) to complete before continuing. |
| if (type == VIRTIO_BLK_T_FLUSH) { |
| FlushPendingTxns(); |
| } |
| |
| return zx::ok(); |
| } |
| |
| // Thread safety: Disable thread safety analysis because TA doesn't understand std::unique_lock, |
| // which is required by std::condition_variable. |
| zx::result<BlockDevice::RequestContext> BlockDevice::AllocateRequestContext( |
| uint32_t type, zx_handle_t vmo, uint64_t vmo_offset_blocks, uint32_t num_blocks, |
| std::array<zx_paddr_t, MAX_SCATTER>* pages) TA_NO_THREAD_SAFETY_ANALYSIS { |
| for (;;) { |
| std::unique_lock lock(txn_lock_); |
| if (worker_shutdown_.load()) { |
| return zx::error(ZX_ERR_IO_NOT_PRESENT); |
| } |
| zx::result<std::optional<RequestContext>> result; |
| { |
| std::unique_lock lock2(ring_lock_); |
| result = TryAllocateRequestContextLocked(type, vmo, vmo_offset_blocks, num_blocks, pages); |
| } |
| if (result.is_error()) { |
| FDF_LOG(ERROR, "failed to allocate virtio resources: %s", result.status_string()); |
| return result.take_error(); |
| } |
| if (result.value().has_value()) { |
| return zx::ok(std::move(*result).value()); |
| } |
| |
| // No resources; try again. |
| txn_cond_.wait(lock); |
| } |
| } |
| |
| zx::result<std::optional<BlockDevice::RequestContext>> BlockDevice::TryAllocateRequestContextLocked( |
| uint32_t type, zx_handle_t vmo, uint64_t vmo_offset_blocks, uint32_t num_blocks, |
| std::array<zx_paddr_t, MAX_SCATTER>* pages) { |
| RequestContext out; |
| // Thread safety: TryAllocateRequestContextLocked requires the necessary locks. |
| auto cleanup = fit::defer([&]() TA_NO_THREAD_SAFETY_ANALYSIS { FreeRequestContext(out); }); |
| std::optional<size_t> idx = AllocateBlkReqLocked(); |
| if (!idx) { |
| FDF_LOG(TRACE, "too many block requests queued!"); |
| return zx::ok(std::nullopt); |
| } |
| out.set_req_index(*idx); |
| |
| if (type == VIRTIO_BLK_T_DISCARD) { |
| // A second descriptor needs to be allocated for discard requests. |
| idx = AllocateBlkReqLocked(); |
| if (!idx) { |
| FDF_LOG(TRACE, "too many block requests queued!"); |
| return zx::ok(std::nullopt); |
| } |
| out.set_discard_req_index(*idx); |
| } |
| |
| if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_OUT) { |
| size_t num_pages; |
| zx::result result = PinPages(bti_.get(), vmo, vmo_offset_blocks, num_blocks, pages, &num_pages); |
| if (result.is_error()) { |
| return result.take_error(); |
| } |
| out.set_pages(pages->data(), num_pages); |
| out.set_pmt(result.value()); |
| } |
| |
| uint16_t num_descriptors = |
| (type == VIRTIO_BLK_T_DISCARD ? 3u : 2u) + static_cast<uint16_t>(out.num_pages()); |
| uint16_t desc_index; |
| struct vring_desc* desc = nullptr; |
| desc = vring_.AllocDescChain(num_descriptors, &desc_index); |
| if (!desc) { |
| FDF_LOG(TRACE, "failed to allocate descriptor chain of length %zu", 2u + out.num_pages()); |
| return zx::ok(std::nullopt); |
| } |
| out.set_desc(desc, desc_index); |
| cleanup.cancel(); |
| return zx::ok(std::move(out)); |
| } |
| |
| void BlockDevice::WatchdogThread() { |
| for (;;) { |
| sync_completion_wait(&watchdog_signal_, kWatchdogInterval.get()); |
| if (watchdog_shutdown_.load()) { |
| return; |
| } |
| zx::time now = zx::clock::get_monotonic(); |
| { |
| std::lock_guard<std::mutex> lock(watchdog_lock_); |
| int idx = 0; |
| for (const auto& start_time : blk_req_start_timestamps_) { |
| if (now - kWatchdogInterval >= start_time) { |
| // Round down to the interval |
| zx::duration latency = ((now - start_time) / kWatchdogInterval) * kWatchdogInterval; |
| // LINT.IfChange(watchdog_tefmo) |
| FDF_LOG(WARNING, "txn %d has not completed after %" PRIu64 "s!", idx, latency.to_secs()); |
| // LINT.ThenChange(/tools/testing/tefmocheck/string_in_log_check.go:watchdog_tefmo) |
| } |
| idx += 1; |
| } |
| } |
| } |
| } |
| |
| void BlockDevice::FlushPendingTxns() { |
| std::unique_lock lock(txn_lock_); |
| txn_cond_.wait(lock, |
| [&]() { return worker_shutdown_.load() || list_is_empty(&pending_txn_list_); }); |
| } |
| |
| void BlockDevice::CleanupPendingTxns() { |
| // Virtio specification 3.3.1 Driver Requirements: Device Cleanup |
| // A driver MUST ensure a virtqueue isn’t live (by device reset) before removing exposed |
| // buffers. |
| DeviceReset(); |
| block_txn_t* txn = nullptr; |
| block_txn_t* temp_entry = nullptr; |
| { |
| std::lock_guard lock(lock_); |
| list_for_every_entry_safe (&worker_txn_list_, txn, temp_entry, block_txn_t, node) { |
| list_delete(&txn->node); |
| CompleteTxn(txn, ZX_ERR_IO_NOT_PRESENT); |
| } |
| } |
| std::lock_guard<std::mutex> lock(txn_lock_); |
| list_for_every_entry_safe (&pending_txn_list_, txn, temp_entry, block_txn_t, node) { |
| FreeBlkReqLocked(txn->req_index); |
| if (txn->discard_req_index) { |
| FreeBlkReqLocked(*txn->discard_req_index); |
| } |
| list_delete(&txn->node); |
| CompleteTxn(txn, ZX_ERR_IO_NOT_PRESENT); |
| } |
| } |
| |
| zx::result<> BlockDriver::Start() { |
| { |
| compat::DeviceServer::BanjoConfig banjo_config; |
| banjo_config.callbacks[ZX_PROTOCOL_BLOCK_IMPL] = block_impl_server_.callback(); |
| zx::result<> result = |
| compat_server_.Initialize(incoming(), outgoing(), node_name(), name(), |
| compat::ForwardMetadata::None(), std::move(banjo_config)); |
| if (result.is_error()) { |
| return result.take_error(); |
| } |
| } |
| |
| zx::result device = CreateBlockDevice(); |
| if (device.is_error()) { |
| return device.take_error(); |
| } |
| block_device_ = std::move(*device); |
| |
| zx_status_t status = block_device_->Init(); |
| if (status != ZX_OK) { |
| return zx::error(status); |
| } |
| |
| parent_node_.Bind(std::move(node())); |
| |
| auto [controller_client_end, controller_server_end] = |
| fidl::Endpoints<fuchsia_driver_framework::NodeController>::Create(); |
| |
| node_controller_.Bind(std::move(controller_client_end)); |
| |
| fidl::Arena arena; |
| |
| fidl::VectorView<fuchsia_driver_framework::wire::NodeProperty2> properties(arena, 1); |
| properties[0] = fdf::MakeProperty2(arena, bind_fuchsia::PROTOCOL, |
| static_cast<uint32_t>(ZX_PROTOCOL_BLOCK_IMPL)); |
| |
| std::vector<fuchsia_driver_framework::wire::Offer> offers = compat_server_.CreateOffers2(arena); |
| |
| const auto args = fuchsia_driver_framework::wire::NodeAddArgs::Builder(arena) |
| .name(arena, name()) |
| .offers2(arena, std::move(offers)) |
| .properties2(properties) |
| .Build(); |
| |
| auto result = parent_node_->AddChild(args, std::move(controller_server_end), {}); |
| if (!result.ok()) { |
| FDF_LOG(ERROR, "Failed to add child: %s", result.status_string()); |
| return zx::error(result.status()); |
| } |
| |
| if (zx::result result = outgoing()->AddService<fuchsia_hardware_block_volume::Service>( |
| fuchsia_hardware_block_volume::Service::InstanceHandler({ |
| .volume = |
| [this](fidl::ServerEnd<fuchsia_hardware_block_volume::Volume> server_end) { |
| block_device_->ServeRequests(std::move(server_end)); |
| }, |
| })); |
| result.is_error()) { |
| FDF_LOGL(ERROR, logger(), "Failed to add volume service instance: %s", result.status_string()); |
| return result.take_error(); |
| } |
| |
| return zx::ok(); |
| } |
| |
| zx::result<std::unique_ptr<BlockDevice>> BlockDriver::CreateBlockDevice() { |
| zx::result<fidl::ClientEnd<fuchsia_hardware_pci::Device>> pci_client_result = |
| incoming()->Connect<fuchsia_hardware_pci::Service::Device>(); |
| if (pci_client_result.is_error()) { |
| FDF_LOG(ERROR, "Failed to get pci client: %s", pci_client_result.status_string()); |
| return pci_client_result.take_error(); |
| } |
| |
| zx::result<std::pair<zx::bti, std::unique_ptr<virtio::Backend>>> bti_and_backend_result = |
| virtio::GetBtiAndBackend(std::move(pci_client_result).value()); |
| if (!bti_and_backend_result.is_ok()) { |
| FDF_LOG(ERROR, "GetBtiAndBackend failed: %s", bti_and_backend_result.status_string()); |
| return bti_and_backend_result.take_error(); |
| } |
| auto [bti, backend] = std::move(bti_and_backend_result).value(); |
| |
| return zx::ok(std::make_unique<BlockDevice>(std::move(bti), std::move(backend), logger())); |
| } |
| |
| void BlockDriver::PrepareStop(fdf::PrepareStopCompleter completer) { |
| if (block_device_) { |
| block_device_->Release(); |
| } |
| completer(zx::ok()); |
| } |
| |
| void BlockDriver::BlockImplQuery(block_info_t* info, size_t* bopsz) { |
| if (block_device_) { |
| block_device_->BlockImplQuery(info, bopsz); |
| } else { |
| FDF_LOG(ERROR, "BlockImplQuery called for driver that has not been started."); |
| memset(info, 0, sizeof(*info)); |
| *bopsz = 0; |
| } |
| } |
| |
| void BlockDriver::BlockImplQueue(block_op_t* bop, block_impl_queue_callback completion_cb, |
| void* cookie) { |
| if (block_device_) { |
| block_device_->BlockImplQueue(bop, completion_cb, cookie); |
| } else { |
| FDF_LOG(ERROR, "BlockImplQueue called for driver that has not been started."); |
| completion_cb(cookie, ZX_ERR_BAD_STATE, bop); |
| } |
| } |
| |
| } // namespace virtio |