| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "msd_vsi_device.h" |
| |
| #include <lib/fit/defer.h> |
| #include <lib/magma/platform/platform_barriers.h> |
| #include <lib/magma/platform/platform_logger.h> |
| #include <lib/magma/platform/platform_mmio.h> |
| #include <lib/magma/platform/platform_thread.h> |
| #include <lib/magma/platform/platform_trace.h> |
| #include <lib/magma/util/short_macros.h> |
| #include <lib/magma_service/msd.h> |
| |
| #include <algorithm> |
| #include <chrono> |
| #include <iterator> |
| #include <thread> |
| |
| #include <fbl/string_printf.h> |
| |
| #include "address_space_layout.h" |
| #include "command_buffer.h" |
| #include "instructions.h" |
| #include "magma_vendor_queries.h" |
| #include "msd_vsi_context.h" |
| #include "registers.h" |
| |
| static constexpr uint32_t kInterruptIndex = 0; |
| |
| static constexpr uint32_t kSramMmioIndex = 4; |
| |
| MsdVsiDevice::~MsdVsiDevice() { Shutdown(); } |
| |
| bool MsdVsiDevice::Shutdown() { |
| CHECK_THREAD_NOT_CURRENT(device_thread_id_); |
| |
| DisableInterrupts(); |
| |
| stop_interrupt_thread_ = true; |
| if (interrupt_) { |
| interrupt_->Signal(); |
| } |
| if (interrupt_thread_.joinable()) { |
| interrupt_thread_.join(); |
| DLOG("Joined interrupt thread"); |
| } |
| |
| stop_device_thread_ = true; |
| |
| if (device_request_semaphore_) { |
| device_request_semaphore_->Signal(); |
| } |
| |
| if (device_thread_.joinable()) { |
| DLOG("joining device thread"); |
| device_thread_.join(); |
| DLOG("joined"); |
| } |
| |
| // Ensure hardware is idle. |
| if (register_io_) { |
| return HardwareReset(); |
| } |
| |
| return true; |
| } |
| |
| std::unique_ptr<MsdVsiDevice> MsdVsiDevice::Create(void* device_handle, bool start_device_thread) { |
| auto device = std::make_unique<MsdVsiDevice>(); |
| |
| if (!device->Init(device_handle)) { |
| MAGMA_LOG(ERROR, "Failed to initialize device"); |
| return nullptr; |
| } |
| |
| if (start_device_thread) |
| device->StartDeviceThread(); |
| |
| return device; |
| } |
| |
| bool MsdVsiDevice::Init(void* device_handle) { |
| platform_device_ = MsdVsiPlatformDevice::Create(device_handle); |
| if (!platform_device_) { |
| MAGMA_LOG(ERROR, "Failed to create platform device"); |
| return false; |
| } |
| |
| uint32_t mmio_count = platform_device_->platform_device()->GetMmioCount(); |
| DASSERT(mmio_count > 0); |
| |
| std::unique_ptr<magma::PlatformMmio> mmio = platform_device_->platform_device()->CpuMapMmio( |
| 0, magma::PlatformMmio::CACHE_POLICY_UNCACHED_DEVICE); |
| if (!mmio) { |
| MAGMA_LOG(ERROR, "failed to map registers"); |
| return false; |
| } |
| |
| register_io_ = std::make_unique<VsiRegisterIo>(std::move(mmio), *this); |
| |
| device_id_ = registers::ChipId::Get().ReadFrom(register_io()).chip_id(); |
| customer_id_ = registers::CustomerId::Get().ReadFrom(register_io()).customer_id(); |
| chip_date_ = registers::ChipDate::Get().ReadFrom(register_io()).chip_date(); |
| product_id_ = registers::ProductId::Get().ReadFrom(register_io()).product_id(); |
| eco_id_ = registers::EcoId::Get().ReadFrom(register_io()).eco_id(); |
| DLOG("Detected vsi chip id 0x%x customer id 0x%x", device_id_, customer_id_); |
| |
| if (HasAxiSram()) { |
| external_sram_ = platform_device_->platform_device()->GetMmioBuffer(kSramMmioIndex); |
| if (!external_sram_) { |
| MAGMA_LOG(ERROR, "GetMmioBuffer(%d) failed", kSramMmioIndex); |
| return false; |
| } |
| |
| if (!external_sram_->SetCachePolicy(MAGMA_CACHE_POLICY_WRITE_COMBINING)) { |
| MAGMA_LOG(ERROR, "Failed setting cache policy on external SRAM"); |
| return false; |
| } |
| } |
| |
| if (!IsValidDeviceId()) { |
| MAGMA_LOG(ERROR, "Unsupported NPU model 0x%x\n", device_id_); |
| return false; |
| } |
| |
| revision_ = registers::Revision::Get().ReadFrom(register_io()).chip_revision(); |
| |
| gpu_features_ = std::make_unique<GpuFeatures>(register_io()); |
| DLOG("NPU features: 0x%x minor features 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", |
| gpu_features_->features().reg_value(), gpu_features_->minor_features(0), |
| gpu_features_->minor_features(1), gpu_features_->minor_features(2), |
| gpu_features_->minor_features(3), gpu_features_->minor_features(4), |
| gpu_features_->minor_features(5)); |
| DLOG("halti5: %d mmu: %d", gpu_features_->halti5(), gpu_features_->has_mmu()); |
| |
| DLOG( |
| "stream count %u register_max %u thread_count %u vertex_cache_size %u shader_core_count " |
| "%u pixel_pipes %u vertex_output_buffer_size %u\n", |
| gpu_features_->stream_count(), gpu_features_->register_max(), gpu_features_->thread_count(), |
| gpu_features_->vertex_cache_size(), gpu_features_->shader_core_count(), |
| gpu_features_->pixel_pipes(), gpu_features_->vertex_output_buffer_size()); |
| DLOG("instruction count %u buffer_size %u num_constants %u varyings_count %u\n", |
| gpu_features_->instruction_count(), gpu_features_->buffer_size(), |
| gpu_features_->num_constants(), gpu_features_->varyings_count()); |
| |
| if (Has3dPipe()) { |
| if (!gpu_features_->features().pipe_3d()) { |
| MAGMA_LOG(ERROR, "NPU has no 3d pipe: features 0x%x\n", |
| gpu_features_->features().reg_value()); |
| return false; |
| } |
| } |
| |
| bus_mapper_ = magma::PlatformBusMapper::Create( |
| platform_device_->platform_device()->GetBusTransactionInitiator()); |
| if (!bus_mapper_) { |
| MAGMA_LOG(ERROR, "failed to create bus mapper"); |
| return false; |
| } |
| |
| page_table_arrays_ = PageTableArrays::Create(bus_mapper_.get()); |
| if (!page_table_arrays_) { |
| MAGMA_LOG(ERROR, "failed to create page table arrays"); |
| return false; |
| } |
| |
| // Add a page to account for ringbuffer overfetch |
| uint32_t ringbuffer_size = AddressSpaceLayout::ringbuffer_size() + magma::page_size(); |
| DASSERT(ringbuffer_size <= AddressSpaceLayout::system_gpu_addr_size()); |
| |
| auto buffer = MsdVsiBuffer::Create(ringbuffer_size, "ring-buffer"); |
| buffer->platform_buffer()->SetCachePolicy(MAGMA_CACHE_POLICY_UNCACHED); |
| |
| ringbuffer_ = |
| std::make_unique<Ringbuffer>(std::move(buffer), AddressSpaceLayout::ringbuffer_size()); |
| |
| if (!ringbuffer_->MapCpu()) { |
| MAGMA_LOG(ERROR, "Failed to map cpu for ringbuffer"); |
| return false; |
| } |
| |
| progress_ = std::make_unique<GpuProgress>(); |
| |
| constexpr uint32_t kFirstSequenceNumber = 0x1; |
| sequencer_ = std::make_unique<Sequencer>(kFirstSequenceNumber); |
| |
| device_request_semaphore_ = magma::PlatformSemaphore::Create(); |
| |
| interrupt_ = platform_device_->platform_device()->RegisterInterrupt(kInterruptIndex); |
| if (!interrupt_) { |
| MAGMA_LOG(ERROR, "Failed to register interrupt"); |
| return false; |
| } |
| |
| page_table_slot_allocator_ = std::make_unique<PageTableSlotAllocator>(page_table_arrays_->size()); |
| |
| bool reset = HardwareReset(); |
| if (!reset) { |
| MAGMA_LOG(ERROR, "Failed to reset hardware"); |
| return false; |
| } |
| |
| HardwareInit(); |
| |
| PowerSuspend(); |
| |
| return true; |
| } |
| |
| void MsdVsiDevice::HardwareInit() { |
| { |
| auto reg = registers::PulseEater::Get().ReadFrom(register_io()); |
| reg.set_disable_internal_dfs(1); |
| reg.WriteTo(register_io()); |
| } |
| |
| { |
| auto reg = registers::IrqEnable::Get().FromValue(~0); |
| reg.WriteTo(register_io()); |
| } |
| |
| { |
| auto reg = registers::SecureAhbControl::Get().ReadFrom(register_io()); |
| reg.set_non_secure_access(1); |
| reg.WriteTo(register_io()); |
| } |
| |
| page_table_arrays_->HardwareInit(register_io()); |
| } |
| |
| void MsdVsiDevice::KillCurrentContext() { |
| // Get the context of the batch with the lowest sequence number. |
| uint32_t min_seq = UINT_MAX; |
| std::shared_ptr<MsdVsiContext> context_to_kill; |
| for (unsigned int i = 0; i < kNumEvents; i++) { |
| if (events_[i].allocated) { |
| uint32_t seq_num = events_[i].mapped_batch->GetSequenceNumber(); |
| if (seq_num < min_seq) { |
| min_seq = seq_num; |
| context_to_kill = events_[i].mapped_batch->GetContext().lock(); |
| } |
| } |
| } |
| if (context_to_kill) { |
| context_to_kill->Kill(); |
| } |
| } |
| |
| void MsdVsiDevice::Reset() { |
| HardwareReset(); |
| |
| // Save the pending batches that have been posted to the ringbuffer. |
| std::vector<DeferredRequest> pending_batches; |
| for (unsigned int i = 0; i < kNumEvents; i++) { |
| if (events_[i].allocated) { |
| auto context = events_[i].mapped_batch->GetContext().lock(); |
| if (context && !context->killed()) { |
| // Since we are going to reset the hardware state, the TLB should be invalidated. |
| // |SubmitCommandBuffer| will determine if flushing is required when switching address |
| // spaces. |
| pending_batches.emplace_back( |
| DeferredRequest{std::move(events_[i].mapped_batch), false /* do_flush */}); |
| } |
| CompleteInterruptEvent(i); |
| } |
| } |
| |
| // Ensure the batches will be requeued in the same order. |
| std::sort(pending_batches.begin(), pending_batches.end(), |
| [](const DeferredRequest& a, const DeferredRequest& b) { |
| return a.batch->GetSequenceNumber() < b.batch->GetSequenceNumber(); |
| }); |
| |
| // Prepend these batches to the backlog, which is processed before the device request list. |
| request_backlog_.insert(request_backlog_.begin(), |
| std::make_move_iterator(pending_batches.begin()), |
| std::make_move_iterator(pending_batches.end())); |
| |
| ringbuffer_->Reset(0); |
| configured_address_space_ = nullptr; |
| progress_ = std::make_unique<GpuProgress>(); |
| |
| HardwareInit(); |
| } |
| |
| void MsdVsiDevice::DisableInterrupts() { |
| if (!register_io_) { |
| DLOG("Register io was not initialized, skipping disabling interrupts"); |
| return; |
| } |
| auto reg = registers::IrqEnable::Get().FromValue(0); |
| reg.WriteTo(register_io()); |
| } |
| |
| void MsdVsiDevice::HangCheckTimeout() { |
| std::vector<std::string> dump; |
| DumpToString(&dump, false /* fault_present */); |
| |
| MAGMA_LOG(WARNING, "Suspected NPU hang:"); |
| MAGMA_LOG(WARNING, "last_interrupt_timestamp %lu", last_interrupt_timestamp_.load()); |
| |
| #if defined(MSD_VSI_VIP_ENABLE_SUSPEND) |
| MAGMA_LOG(WARNING, "Power state %u", static_cast<unsigned int>(power_state_)); |
| #endif |
| |
| for (auto& str : dump) { |
| MAGMA_LOG(WARNING, "%s", str.c_str()); |
| } |
| KillCurrentContext(); |
| Reset(); |
| |
| ProcessRequestBacklog(); |
| } |
| |
| void MsdVsiDevice::StartDeviceThread(bool disable_suspend) { |
| DASSERT(!device_thread_.joinable()); |
| device_thread_ = |
| std::thread([this, disable_suspend] { this->DeviceThreadLoop(disable_suspend); }); |
| interrupt_thread_ = std::thread([this] { this->InterruptThreadLoop(); }); |
| } |
| |
| int MsdVsiDevice::DeviceThreadLoop(bool disable_suspend) { |
| magma::PlatformThreadHelper::SetCurrentThreadName("DeviceThread"); |
| |
| device_thread_id_ = std::make_unique<magma::PlatformThreadId>(); |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| DLOG("DeviceThreadLoop starting thread 0x%lx", device_thread_id_->id()); |
| |
| const char* kRoleName = "fuchsia.graphics.drivers.msd-vsi-vip.device"; |
| if (!magma::PlatformThreadHelper::SetRole(platform_device_->platform_device()->GetDeviceHandle(), |
| kRoleName)) { |
| MAGMA_LOG(ERROR, "Failed to set device thread role: %s", kRoleName); |
| return 0; |
| } |
| |
| std::unique_lock<std::mutex> lock(device_request_mutex_, std::defer_lock); |
| |
| while (!stop_device_thread_) { |
| constexpr uint32_t kTimeoutMs = 6000; |
| |
| auto timeout = std::chrono::duration_cast<std::chrono::milliseconds>( |
| progress_->GetHangcheckTimeout(kTimeoutMs, std::chrono::steady_clock::now())); |
| |
| #if defined(MSD_VSI_VIP_ENABLE_SUSPEND) |
| constexpr uint32_t kWaitForSuspendMs = 10; |
| // If there are no more command buffers to execute wait before suspending |
| if (!disable_suspend) { |
| if (progress_->IsIdle() && power_state_ != PowerState::kSuspended) { |
| timeout = std::chrono::milliseconds(kWaitForSuspendMs); |
| } |
| } |
| #endif |
| |
| magma::Status status = device_request_semaphore_->Wait(timeout.count()); |
| switch (status.get()) { |
| case MAGMA_STATUS_OK: |
| break; |
| case MAGMA_STATUS_TIMED_OUT: { |
| // Check that there are no pending device requests. |
| lock.lock(); |
| bool empty = device_request_list_.empty(); |
| lock.unlock(); |
| if (!empty) { |
| break; |
| } |
| |
| #if defined(MSD_VSI_VIP_ENABLE_SUSPEND) |
| if (timeout == std::chrono::milliseconds(kWaitForSuspendMs)) { |
| if (progress_->IsIdle() && power_state_ != PowerState::kSuspended) { |
| StopRingBufferAndSuspend(); |
| } |
| break; |
| } |
| #endif |
| |
| HangCheckTimeout(); |
| } break; |
| default: |
| MAGMA_LOG(WARNING, "device_request_semaphore_ Wait failed: %d", status.get()); |
| DASSERT(false); |
| // TODO(https://fxbug.dev/42120893): handle wait errors. |
| } |
| |
| while (true) { |
| lock.lock(); |
| if (!device_request_list_.size()) { |
| lock.unlock(); |
| break; |
| } |
| |
| auto request = std::move(device_request_list_.front()); |
| device_request_list_.pop_front(); |
| lock.unlock(); |
| request->ProcessAndReply(this); |
| } |
| } |
| |
| DLOG("DeviceThreadLoop exit"); |
| return 0; |
| } |
| |
| void MsdVsiDevice::EnqueueDeviceRequest(std::unique_ptr<DeviceRequest> request) { |
| std::unique_lock<std::mutex> lock(device_request_mutex_); |
| |
| // Interrupts are higher priority and placed at the front of the queue in FIFO order |
| if (request->RequestType() == InterruptRequest::kRequestType) { |
| if (device_request_list_.empty()) { |
| device_request_list_.emplace_front(std::move(request)); |
| } else { |
| for (auto it = device_request_list_.begin();; ++it) { |
| if (it == device_request_list_.end()) { |
| device_request_list_.emplace_back(std::move(request)); |
| break; |
| } else if (it->get()->RequestType() != InterruptRequest::kRequestType) { |
| device_request_list_.emplace(it, std::move(request)); |
| break; |
| } |
| } |
| } |
| } else { |
| device_request_list_.emplace_back(std::move(request)); |
| } |
| |
| device_request_semaphore_->Signal(); |
| } |
| |
| int MsdVsiDevice::InterruptThreadLoop() { |
| magma::PlatformThreadHelper::SetCurrentThreadName("VSI InterruptThread"); |
| DLOG("VSI Interrupt thread started"); |
| |
| const char* kRoleName = "fuchsia.graphics.drivers.msd-vsi-vip.vsi-interrupt"; |
| if (!magma::PlatformThreadHelper::SetRole(platform_device_->platform_device()->GetDeviceHandle(), |
| kRoleName)) { |
| MAGMA_LOG(ERROR, "Failed to set interrupt thread role: %s", kRoleName); |
| return 0; |
| } |
| |
| while (!stop_interrupt_thread_) { |
| interrupt_->Wait(); |
| |
| if (stop_interrupt_thread_) { |
| break; |
| } |
| |
| last_interrupt_timestamp_ = magma::get_monotonic_ns(); |
| |
| // In the field (b/280363833) we observe a crash while reading from IrqAck, |
| // which indicates the hardware is suspended. This should not be possible |
| // because we should not be suspending the hardware while there is work in |
| // in progress. To prevent the crash we do PowerOn() here as a temporary measure |
| // to reduce the impact of having the driver crash in the field. |
| if (power_state() != PowerState::kOn) { |
| MAGMA_LOG(ERROR, "Processing Interrupt with power state 0x%x", |
| static_cast<unsigned int>(power_state())); |
| PowerOn(); |
| } |
| auto irqack = registers::IrqAck::Get().ReadFrom(register_io_.get()); |
| interrupt_->Complete(); |
| |
| auto request = std::make_unique<InterruptRequest>(std::move(irqack)); |
| EnqueueDeviceRequest(std::move(request)); |
| } |
| DLOG("VSI Interrupt thread exiting"); |
| return 0; |
| } |
| |
| magma::Status MsdVsiDevice::ProcessInterrupt(registers::IrqAck irq_status) { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| auto mmu_exception = irq_status.mmu_exception(); |
| auto bus_error = irq_status.bus_error(); |
| auto value = irq_status.value(); |
| bool do_dump = false; |
| if (mmu_exception) { |
| MAGMA_LOG(ERROR, "Interrupt thread received mmu_exception"); |
| do_dump = true; |
| } |
| if (bus_error) { |
| MAGMA_LOG(ERROR, "Interrupt thread received bus error"); |
| } |
| |
| // Though events complete in order, we may receive a single interrupt for multiple events |
| // simultaneously. We should update the ringbuffer head following the event with the |
| // highest sequence number. |
| uint32_t max_seq_num = 0; |
| uint32_t rb_new_head = kInvalidRingbufferOffset; |
| // Check which bits are set and complete the corresponding event. |
| for (unsigned int i = 0; i < kNumEvents; i++) { |
| if (value & (1 << i)) { |
| const auto& batch = events_[i].mapped_batch; |
| // This should never be null as |WriteInterruptEvent| does not allow it. |
| // Ignore it in case it's a spurious interrupt. |
| if (!batch) { |
| MAGMA_LOG( |
| ERROR, |
| "Ignoring interrupt, event %u did not have an associated mapped batch, allocated: %d " |
| "submitted: %d", |
| i, events_[i].allocated, events_[i].submitted); |
| do_dump = true; |
| continue; |
| } |
| |
| if (batch->IsCommandBuffer()) { |
| auto* buffer = static_cast<CommandBuffer*>(batch.get())->GetBatchBuffer(); |
| TRACE_VTHREAD_DURATION_END("magma", "Command Buffer", "NPU", buffer->id(), |
| magma::PlatformTrace::GetCurrentTicks(), "id", buffer->id()); |
| } |
| |
| if (batch->GetSequenceNumber() > max_seq_num) { |
| max_seq_num = batch->GetSequenceNumber(); |
| rb_new_head = events_[i].ringbuffer_offset; |
| } |
| if (!CompleteInterruptEvent(i)) { |
| MAGMA_LOG(ERROR, "Failed to complete event %u", i); |
| } |
| } |
| } |
| if (max_seq_num) { |
| DASSERT(rb_new_head != kInvalidRingbufferOffset); |
| ringbuffer_->update_head(rb_new_head); |
| progress_->Completed(max_seq_num, std::chrono::steady_clock::now()); |
| } else { |
| MAGMA_LOG(ERROR, "Interrupt thread did not find any interrupt events"); |
| do_dump = true; |
| } |
| if (do_dump) { |
| std::vector<std::string> dump; |
| DumpToString(&dump, mmu_exception /* fault_present */); |
| #if defined(MSD_VSI_VIP_ENABLE_SUSPEND) |
| MAGMA_LOG(WARNING, "Power state %u", static_cast<unsigned int>(power_state_)); |
| #endif |
| for (auto& str : dump) { |
| MAGMA_LOG(WARNING, "%s", str.c_str()); |
| } |
| } |
| |
| if (mmu_exception) { |
| KillCurrentContext(); |
| Reset(); |
| } |
| |
| ProcessRequestBacklog(); |
| |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma::Status MsdVsiDevice::ProcessDumpStatusToLog() { |
| std::vector<std::string> dump; |
| // Faults are detected on the interrupt thread. |
| DumpToString(&dump, false /* fault_present */); |
| for (auto& str : dump) { |
| MAGMA_LOG(INFO, "%s", str.c_str()); |
| } |
| return MAGMA_STATUS_OK; |
| } |
| |
| void MsdVsiDevice::ProcessRequestBacklog() { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| while (!request_backlog_.empty()) { |
| uint32_t event_id; |
| if (!AllocInterruptEvent(true /* free_on_complete */, &event_id)) { |
| // No more events available, we will continue processing after the next interrupt. |
| return; |
| } |
| // Free the interrupt event if submitting fails. |
| auto free_event = fit::defer([this, event_id]() { FreeInterruptEvent(event_id); }); |
| |
| auto request = std::move(request_backlog_.front()); |
| request_backlog_.pop_front(); |
| |
| auto context = request.batch->GetContext().lock(); |
| if (!context) { |
| DMESSAGE("No context for batch %lu, IsCommandBuffer=%d", request.batch->GetBatchBufferId(), |
| request.batch->IsCommandBuffer()); |
| // If a batch fails, we will drop it and try the next one. |
| continue; |
| } |
| auto address_space = context->exec_address_space(); |
| |
| if (!SubmitCommandBuffer(context, address_space->page_table_array_slot(), request.do_flush, |
| std::move(request.batch), event_id)) { |
| DMESSAGE("Failed to submit command buffer"); |
| continue; |
| } |
| free_event.cancel(); |
| } |
| } |
| |
| #if defined(MSD_VSI_VIP_ENABLE_SUSPEND) |
| bool MsdVsiDevice::IsSuspendSupported() const { return true; } |
| |
| void MsdVsiDevice::PowerOn() { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| if (power_state_ != PowerState::kOn) { |
| auto clock_control = registers::ClockControl::Get().FromValue(0); |
| |
| clock_control.set_clk3d_dis(0); |
| clock_control.set_clk2d_dis(0); |
| clock_control.set_fscale_val(registers::ClockControl::kFscaleOn); |
| clock_control.set_fscale_cmd_load(1); |
| clock_control.WriteTo(register_io_.get()); |
| |
| clock_control.set_fscale_cmd_load(0); |
| clock_control.WriteTo(register_io_.get()); |
| |
| power_state_ = PowerState::kOn; |
| |
| DLOG("NNA on"); |
| } |
| } |
| |
| void MsdVsiDevice::PowerSuspend() { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| if (power_state_ != PowerState::kSuspended) { |
| auto clock_control = registers::ClockControl::Get().FromValue(0); |
| |
| clock_control.set_clk3d_dis(1); |
| clock_control.set_clk2d_dis(1); |
| clock_control.set_fscale_val(registers::ClockControl::kFscaleSuspend); |
| clock_control.set_fscale_cmd_load(1); |
| clock_control.WriteTo(register_io_.get()); |
| |
| clock_control.set_fscale_cmd_load(0); |
| clock_control.WriteTo(register_io_.get()); |
| |
| power_state_ = PowerState::kSuspended; |
| |
| DLOG("NNA suspended"); |
| } |
| } |
| |
| void MsdVsiDevice::StopRingBufferAndSuspend() { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| if (!StopRingbuffer()) { |
| MAGMA_LOG(ERROR, "Stop ring buffer for suspend failed"); |
| DASSERT(false); |
| } |
| |
| constexpr uint32_t kTimeoutMs = 100; |
| if (!WaitUntilIdle(kTimeoutMs)) { |
| MAGMA_LOG(WARNING, "Timeout stopping ringbuffer for suspend"); |
| DASSERT(false); |
| } else { |
| PowerSuspend(); |
| } |
| } |
| #else |
| bool MsdVsiDevice::IsSuspendSupported() const { return false; } |
| void MsdVsiDevice::PowerSuspend() {} |
| void MsdVsiDevice::StopRingBufferAndSuspend() {} |
| void MsdVsiDevice::PowerOn() {} |
| #endif |
| |
| bool MsdVsiDevice::AllocInterruptEvent(bool free_on_complete, uint32_t* out_event_id) { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| for (uint32_t i = 0; i < kNumEvents; i++) { |
| if (!events_[i].allocated) { |
| events_[i].allocated = true; |
| events_[i].free_on_complete = free_on_complete; |
| *out_event_id = i; |
| return true; |
| } |
| } |
| |
| MAGMA_LOG(ERROR, "No events are currently available"); |
| return false; |
| } |
| |
| bool MsdVsiDevice::FreeInterruptEvent(uint32_t event_id) { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| if (event_id >= kNumEvents) { |
| MAGMA_LOG(ERROR, "Invalid event id %u", event_id); |
| return false; |
| } |
| if (!events_[event_id].allocated) { |
| MAGMA_LOG(ERROR, "Event id %u was not allocated", event_id); |
| return false; |
| } |
| events_[event_id] = {}; |
| |
| return true; |
| } |
| |
| // Writes an event into the end of the ringbuffer. |
| bool MsdVsiDevice::WriteInterruptEvent(uint32_t event_id, std::unique_ptr<MappedBatch> mapped_batch, |
| std::shared_ptr<AddressSpace> prev_address_space) { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| if (event_id >= kNumEvents) { |
| MAGMA_LOG(ERROR, "Invalid event id %u", event_id); |
| return false; |
| } |
| if (!events_[event_id].allocated) { |
| MAGMA_LOG(ERROR, "Event id %u was not allocated", event_id); |
| return false; |
| } |
| if (events_[event_id].submitted) { |
| MAGMA_LOG(ERROR, "Event id %u was already submitted", event_id); |
| return false; |
| } |
| if (!mapped_batch) { |
| MAGMA_LOG(ERROR, "No mapped batch was provided"); |
| return false; |
| } |
| events_[event_id].submitted = true; |
| events_[event_id].mapped_batch = std::move(mapped_batch); |
| events_[event_id].prev_address_space = prev_address_space; |
| MiEvent::write(ringbuffer_.get(), event_id); |
| |
| // Save the ringbuffer offset immediately after this event. |
| events_[event_id].ringbuffer_offset = ringbuffer_->tail(); |
| return true; |
| } |
| |
| bool MsdVsiDevice::CompleteInterruptEvent(uint32_t event_id) { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| if (event_id >= kNumEvents) { |
| MAGMA_LOG(ERROR, "Invalid event id %u", event_id); |
| return false; |
| } |
| if (!events_[event_id].allocated || !events_[event_id].submitted) { |
| MAGMA_LOG(ERROR, "Cannot complete event %u, allocated %u submitted %u", event_id, |
| events_[event_id].allocated, events_[event_id].submitted); |
| return false; |
| } |
| num_events_completed_++; |
| |
| bool free_on_complete = events_[event_id].free_on_complete; |
| events_[event_id] = {}; |
| events_[event_id].allocated = !free_on_complete; |
| |
| return true; |
| } |
| |
| bool MsdVsiDevice::HardwareReset() { |
| DLOG("HardwareReset start"); |
| |
| constexpr uint32_t kResetTimeoutMs = 100; |
| |
| auto start = std::chrono::steady_clock::now(); |
| |
| bool is_idle, is_idle_3d; |
| |
| while (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - |
| start) |
| .count() < kResetTimeoutMs) { |
| auto clock_control = registers::ClockControl::Get().FromValue(0); |
| clock_control.set_isolate_gpu(1); |
| clock_control.WriteTo(register_io_.get()); |
| |
| #if defined(MSD_VSI_VIP_ENABLE_SUSPEND) |
| power_state_ = PowerState::kUnknown; |
| #endif |
| |
| { |
| auto reg = registers::SecureAhbControl::Get().FromValue(0); |
| reg.set_reset(1); |
| reg.WriteTo(register_io()); |
| } |
| |
| std::this_thread::sleep_for(std::chrono::microseconds(100)); |
| |
| clock_control.set_soft_reset(0); |
| clock_control.WriteTo(register_io_.get()); |
| |
| clock_control.set_isolate_gpu(0); |
| clock_control.WriteTo(register_io_.get()); |
| |
| clock_control = registers::ClockControl::Get().ReadFrom(register_io_.get()); |
| |
| is_idle = IsIdle(); |
| is_idle_3d = clock_control.idle_3d(); |
| |
| if (is_idle && is_idle_3d) { |
| DLOG("HardwareReset complete"); |
| return true; |
| } |
| } |
| |
| MAGMA_LOG(WARNING, "Hardware reset failed: is_idle %d is_idle_3d %d", is_idle, is_idle_3d); |
| return false; |
| } |
| |
| bool MsdVsiDevice::IsIdle() { |
| return registers::IdleState::Get().ReadFrom(register_io_.get()).IsIdle(); |
| } |
| |
| bool MsdVsiDevice::StopRingbuffer() { |
| if (IsIdle()) { |
| return true; |
| } |
| // Overwrite the last WAIT with an END. |
| uint32_t prev_wait_link = ringbuffer_->SubtractOffset(kWaitLinkDwords * sizeof(uint32_t)); |
| if (!ringbuffer_->Overwrite32(prev_wait_link, MiEnd::kCommandType)) { |
| MAGMA_LOG(ERROR, "Failed to overwrite WAIT in ringbuffer"); |
| return false; |
| } |
| |
| DLOG("Ringbuffer stopped (0x%X)", prev_wait_link); |
| |
| return true; |
| } |
| |
| bool MsdVsiDevice::WaitUntilIdle(uint32_t timeout_ms) { |
| TRACE_DURATION("magma", "WaitUntilIdle"); |
| |
| auto start = std::chrono::high_resolution_clock::now(); |
| while (std::chrono::duration_cast<std::chrono::milliseconds>( |
| std::chrono::high_resolution_clock::now() - start) |
| .count() < timeout_ms) { |
| if (IsIdle()) { |
| return true; |
| } |
| std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| } |
| auto idle_state = registers::IdleState::Get().ReadFrom(register_io_.get()).reg_value(); |
| |
| MAGMA_LOG(ERROR, "WaitUntilIdle failed, IdleState register: 0x%x", idle_state); |
| return false; |
| } |
| |
| bool MsdVsiDevice::LoadInitialAddressSpace(std::shared_ptr<MsdVsiContext> context, |
| uint32_t address_space_index) { |
| // Ensure NNA is on before register access. |
| PowerOn(); |
| |
| // Check if we have already configured an address space and enabled the MMU. |
| if (page_table_arrays_->IsEnabled(register_io())) { |
| MAGMA_LOG(ERROR, "MMU already enabled"); |
| return false; |
| } |
| static constexpr uint32_t kPageCount = 1; |
| |
| std::unique_ptr<magma::PlatformBuffer> buffer = |
| magma::PlatformBuffer::Create(PAGE_SIZE * kPageCount, "address space config"); |
| if (!buffer) { |
| MAGMA_LOG(ERROR, "failed to create buffer"); |
| return false; |
| } |
| |
| auto bus_mapping = GetBusMapper()->MapPageRangeBus(buffer.get(), 0, kPageCount); |
| if (!bus_mapping) { |
| MAGMA_LOG(ERROR, "failed to create bus mapping"); |
| return false; |
| } |
| |
| uint32_t* cmd_ptr; |
| if (!buffer->MapCpu(reinterpret_cast<void**>(&cmd_ptr))) { |
| MAGMA_LOG(ERROR, "failed to map command buffer"); |
| return false; |
| } |
| |
| BufferWriter buf_writer(cmd_ptr, magma::to_uint32(buffer->size()), 0); |
| auto reg = registers::MmuPageTableArrayConfig::Get().addr(); |
| MiLoadState::write(&buf_writer, reg, address_space_index); |
| MiEnd::write(&buf_writer); |
| |
| if (!buffer->UnmapCpu()) { |
| MAGMA_LOG(ERROR, "failed to unmap cpu"); |
| return false; |
| } |
| if (!buffer->CleanCache(0, PAGE_SIZE * kPageCount, false)) { |
| MAGMA_LOG(ERROR, "failed to clean buffer cache"); |
| return false; |
| } |
| |
| auto res = |
| SubmitCommandBufferNoMmu(bus_mapping->Get()[0], magma::to_uint32(buf_writer.bytes_written())); |
| if (!res) { |
| MAGMA_LOG(ERROR, "failed to submit command buffer"); |
| return false; |
| } |
| constexpr uint32_t kTimeoutMs = 1000; |
| if (!WaitUntilIdle(kTimeoutMs)) { |
| MAGMA_LOG(ERROR, "failed to wait for device to be idle"); |
| return false; |
| } |
| |
| page_table_arrays_->Enable(register_io(), true); |
| |
| DLOG("Address space loaded, index %u", address_space_index); |
| |
| configured_address_space_ = context->exec_address_space(); |
| |
| return true; |
| } |
| |
| bool MsdVsiDevice::SubmitCommandBufferNoMmu(uint64_t bus_addr, uint32_t length, |
| uint16_t* prefetch_out) { |
| if (bus_addr & 0xFFFFFFFF00000000ul) { |
| MAGMA_LOG(ERROR, "Can't submit address > 32 bits without mmu: 0x%08lx", bus_addr); |
| return false; |
| } |
| |
| uint32_t prefetch = |
| magma::round_up(length, static_cast<uint32_t>(sizeof(uint64_t))) / sizeof(uint64_t); |
| if (prefetch & 0xFFFF0000) { |
| MAGMA_LOG(ERROR, "Can't submit length %u (prefetch 0x%x)", length, prefetch); |
| return false; |
| } |
| |
| prefetch &= 0xFFFF; |
| if (prefetch_out) { |
| *prefetch_out = static_cast<uint16_t>(prefetch); |
| } |
| |
| DLOG("Submitting buffer at bus addr 0x%lx", bus_addr); |
| |
| // Ensure NNA is on before register access. |
| PowerOn(); |
| |
| auto reg_cmd_addr = registers::FetchEngineCommandAddress::Get().FromValue(0); |
| reg_cmd_addr.set_addr(bus_addr & 0xFFFFFFFF); |
| |
| auto reg_cmd_ctrl = registers::FetchEngineCommandControl::Get().FromValue(0); |
| reg_cmd_ctrl.set_enable(1); |
| reg_cmd_ctrl.set_prefetch(prefetch); |
| |
| auto reg_sec_cmd_ctrl = registers::SecureCommandControl::Get().FromValue(0); |
| reg_sec_cmd_ctrl.set_enable(1); |
| reg_sec_cmd_ctrl.set_prefetch(prefetch); |
| |
| reg_cmd_addr.WriteTo(register_io()); |
| reg_cmd_ctrl.WriteTo(register_io()); |
| reg_sec_cmd_ctrl.WriteTo(register_io()); |
| |
| return true; |
| } |
| |
| bool MsdVsiDevice::StartRingbuffer(std::shared_ptr<MsdVsiContext> context) { |
| if (!IsIdle()) { |
| return true; // Already running and looping on WAIT-LINK. |
| } |
| DLOG("Starting ringbuffer"); |
| |
| // On a restart of the RingBuffer the buffer needs resetting. |
| ringbuffer_->Reset(0); |
| |
| uint64_t rb_gpu_addr; |
| bool res = context->exec_address_space()->GetRingbufferGpuAddress(&rb_gpu_addr); |
| if (!res) { |
| MAGMA_LOG(ERROR, "Could not get ringbuffer NPU address"); |
| return false; |
| } |
| |
| const uint16_t kRbPrefetch = 2; |
| // Write the initial WAIT-LINK to the ringbuffer. The LINK points back to the WAIT, |
| // and will keep looping until the WAIT is replaced with a LINK on command buffer submission. |
| uint32_t wait_gpu_addr = magma::to_uint32(rb_gpu_addr + ringbuffer_->tail()); |
| MiWait::write(ringbuffer_.get()); |
| MiLink::write(ringbuffer_.get(), kRbPrefetch, wait_gpu_addr); |
| |
| auto reg_cmd_addr = registers::FetchEngineCommandAddress::Get().FromValue(0); |
| reg_cmd_addr.set_addr(static_cast<uint32_t>(wait_gpu_addr)); |
| |
| auto reg_cmd_ctrl = registers::FetchEngineCommandControl::Get().FromValue(0); |
| reg_cmd_ctrl.set_enable(1); |
| reg_cmd_ctrl.set_prefetch(kRbPrefetch); |
| |
| auto reg_sec_cmd_ctrl = registers::SecureCommandControl::Get().FromValue(0); |
| reg_sec_cmd_ctrl.set_enable(1); |
| reg_sec_cmd_ctrl.set_prefetch(kRbPrefetch); |
| |
| reg_cmd_addr.WriteTo(register_io()); |
| reg_cmd_ctrl.WriteTo(register_io()); |
| reg_sec_cmd_ctrl.WriteTo(register_io()); |
| |
| DLOG("Ringbuffer started (0x%X)", wait_gpu_addr); |
| |
| return true; |
| } |
| |
| bool MsdVsiDevice::AddRingbufferWaitLink() { |
| uint64_t rb_gpu_addr; |
| bool res = configured_address_space_->GetRingbufferGpuAddress(&rb_gpu_addr); |
| if (!res) { |
| MAGMA_LOG(ERROR, "Failed to get ringbuffer NPU address"); |
| return false; |
| } |
| uint32_t wait_gpu_addr = magma::to_uint32(rb_gpu_addr) + ringbuffer_->tail(); |
| MiWait::write(ringbuffer_.get()); |
| MiLink::write(ringbuffer_.get(), 2 /* prefetch */, wait_gpu_addr); |
| return true; |
| } |
| |
| void MsdVsiDevice::LinkRingbuffer(uint32_t wait_link_offset, uint32_t gpu_addr, |
| uint32_t dest_prefetch) { |
| DASSERT(ringbuffer_->IsOffsetPopulated(wait_link_offset)); |
| // We can assume the instruction was written as 8 contiguous bytes. |
| DASSERT(ringbuffer_->IsOffsetPopulated(wait_link_offset + sizeof(uint32_t))); |
| |
| // Replace the penultimate WAIT (before the newly added one) with a LINK to the command buffer. |
| // We will first modify the second dword which specifies the address, |
| // as the hardware may be executing at the address of the current WAIT. |
| ringbuffer_->Overwrite32(wait_link_offset + sizeof(uint32_t), gpu_addr); |
| magma::barriers::Barrier(); |
| ringbuffer_->Overwrite32(wait_link_offset, MiLink::kCommandType | dest_prefetch); |
| magma::barriers::Barrier(); |
| } |
| |
| bool MsdVsiDevice::WriteLinkCommand(magma::PlatformBuffer* buf, uint32_t write_offset, |
| uint16_t link_prefetch, uint32_t link_addr) { |
| // Check if we have enough space for the LINK command. |
| uint32_t link_instr_size = kInstructionDwords * sizeof(uint32_t); |
| |
| if (buf->size() < write_offset + link_instr_size) { |
| MAGMA_LOG(ERROR, "Buffer does not have %d free bytes for ringbuffer LINK", link_instr_size); |
| return false; |
| } |
| |
| uint32_t* buf_cpu_addr; |
| bool res = buf->MapCpu(reinterpret_cast<void**>(&buf_cpu_addr)); |
| if (!res) { |
| MAGMA_LOG(ERROR, "Failed to map command buffer"); |
| return false; |
| } |
| |
| BufferWriter buf_writer(buf_cpu_addr, magma::to_uint32(buf->size()), write_offset); |
| MiLink::write(&buf_writer, link_prefetch, link_addr); |
| if (!buf->UnmapCpu()) { |
| MAGMA_LOG(ERROR, "Failed to unmap command buffer"); |
| return false; |
| } |
| return true; |
| } |
| |
| bool MsdVsiDevice::SubmitFlushTlb(std::shared_ptr<MsdVsiContext> context) { |
| // It's possible we may need to switch to the address space of |context|. We will use the |
| // currently configured address space until the switch occurs. The ringbuffer should already be |
| // mapped. |
| DASSERT(configured_address_space_); |
| uint64_t rb_gpu_addr; |
| bool res = configured_address_space_->GetRingbufferGpuAddress(&rb_gpu_addr); |
| if (!res) { |
| MAGMA_LOG(ERROR, "Failed to get ringbuffer NPU address"); |
| return false; |
| } |
| |
| // Save the previous WAIT LINK which will be replaced with a LINK jumping to the new commands. |
| uint32_t prev_wait_link = ringbuffer_->SubtractOffset(kWaitLinkDwords * sizeof(uint32_t)); |
| |
| uint32_t prefetch = kRbInstructionsPerFlush; |
| bool switch_address_space = |
| configured_address_space_.get() != context->exec_address_space().get(); |
| if (switch_address_space) { |
| // Need to add an additional instruction to load the address space. |
| prefetch++; |
| } |
| // We need to write the new block of ringbuffer instructions contiguously. |
| // Since only 30 concurrent events are supported, it should not be possible to run out |
| // of space in the ringbuffer. |
| bool reserved = ringbuffer_->ReserveContiguous(prefetch * sizeof(uint64_t)); |
| DASSERT(reserved); |
| |
| // Save the gpu address pointing to the new instructions so we can link to it. |
| uint32_t new_rb_instructions_start_offset = ringbuffer_->tail(); |
| uint32_t gpu_addr = magma::to_uint32(rb_gpu_addr + new_rb_instructions_start_offset); |
| |
| if (switch_address_space) { |
| auto reg = registers::MmuPageTableArrayConfig::Get().addr(); |
| MiLoadState::write(ringbuffer_.get(), reg, |
| context->exec_address_space()->page_table_array_slot()); |
| configured_address_space_ = context->exec_address_space(); |
| } |
| auto reg = registers::MmuConfig::Get().addr(); |
| // The MmuConfig register can also be used to change modes. |
| // Instruct the hardware to ignore mode change bits. |
| constexpr uint32_t kModeMask = 0x8; |
| constexpr uint32_t kFlushAllTlbs = 0x10; |
| constexpr uint32_t flush_command = kModeMask | kFlushAllTlbs; |
| MiLoadState::write(ringbuffer_.get(), reg, flush_command); |
| // These additional bits appear to be needed to ensure the fetch engine waits for any |
| // address space change to complete. |
| constexpr uint32_t kWaitAddressSpaceChange = 0x3 << 28; |
| MiSemaphore::write(ringbuffer_.get(), MiRecipient::FetchEngine, MiRecipient::PixelEngine, |
| kWaitAddressSpaceChange); |
| MiStall::write(ringbuffer_.get(), MiRecipient::FetchEngine, MiRecipient::PixelEngine, |
| kWaitAddressSpaceChange); |
| |
| if (!AddRingbufferWaitLink()) { |
| MAGMA_LOG(ERROR, "Failed to get ringbuffer NPU address"); |
| return false; |
| } |
| |
| // Verify the number of instructions we just wrote matches the prefetch value |
| // of the user buffer's LINK command. |
| DASSERT(new_rb_instructions_start_offset == |
| ringbuffer_->SubtractOffset(prefetch * sizeof(uint64_t))); |
| |
| DLOG("Submitting flush TLB command"); |
| |
| LinkRingbuffer(prev_wait_link, gpu_addr, prefetch); |
| |
| return true; |
| } |
| |
| // When submitting a command buffer, we modify the following: |
| // 1) add a LINK from the command buffer to the end of the ringbuffer |
| // 2) add an EVENT and WAIT-LINK pair to the end of the ringbuffer |
| // 3) modify the penultimate WAIT in the ringbuffer to LINK to the command buffer |
| bool MsdVsiDevice::SubmitCommandBuffer(std::shared_ptr<MsdVsiContext> context, |
| uint32_t address_space_index, bool do_flush, |
| std::unique_ptr<MappedBatch> mapped_batch, |
| uint32_t event_id) { |
| if (context->killed()) { |
| MAGMA_LOG(ERROR, "Context killed"); |
| return false; |
| } |
| |
| auto kill_context = fit::defer([context]() { context->Kill(); }); |
| |
| // Ensure NNA is on before register access. |
| PowerOn(); |
| |
| // Check if we have loaded an address space and enabled the MMU. |
| bool initial_address_space_loaded = page_table_arrays_->IsEnabled(register_io()); |
| if (!initial_address_space_loaded) { |
| if (!LoadInitialAddressSpace(context, address_space_index)) { |
| MAGMA_LOG(ERROR, "Failed to load initial address space"); |
| return false; |
| } |
| } |
| // Check if we have started the ringbuffer WAIT-LINK loop. |
| if (IsIdle()) { |
| if (!StartRingbuffer(context)) { |
| MAGMA_LOG(ERROR, "Failed to start ringbuffer"); |
| return false; |
| } |
| } |
| // Check if we need to switch address spaces. We should also save this copy before |
| // any possible address space switch happens in |SubmitFlushTlb|. |
| auto prev_address_space = configured_address_space_; |
| // We always save the last address space the ringbuffer was mapped to, as we need |
| // to keep the previous address space alive until the switch is completed by the hardware. |
| DASSERT(prev_address_space); |
| bool switch_address_space = prev_address_space.get() != context->exec_address_space().get(); |
| do_flush |= switch_address_space; |
| if (do_flush && !SubmitFlushTlb(context)) { |
| MAGMA_LOG(ERROR, "Failed to submit flush tlb command"); |
| return false; |
| } |
| uint64_t rb_gpu_addr; |
| bool res = context->exec_address_space()->GetRingbufferGpuAddress(&rb_gpu_addr); |
| if (!res) { |
| MAGMA_LOG(ERROR, "Failed to get ringbuffer NPU address"); |
| return false; |
| } |
| uint32_t gpu_addr = magma::to_uint32(mapped_batch->GetGpuAddress()); |
| uint32_t length = magma::to_uint32(magma::round_up(mapped_batch->GetLength(), sizeof(uint64_t))); |
| |
| // Number of new commands to be added to the ringbuffer - EVENT WAIT LINK. |
| const uint16_t kRbPrefetch = kRbInstructionsPerBatch; |
| uint32_t prev_wait_link = ringbuffer_->SubtractOffset(kWaitLinkDwords * sizeof(uint32_t)); |
| |
| // We need to write the new block of ringbuffer instructions contiguously. |
| // Since only 30 concurrent events are supported, it should not be possible to run out |
| // of space in the ringbuffer. |
| bool reserved = ringbuffer_->ReserveContiguous(kRbPrefetch * sizeof(uint64_t)); |
| DASSERT(reserved); |
| |
| // Calculate where to jump to after completion of the command buffer. |
| // This will point to EVENT WAIT LINK. |
| uint32_t rb_complete_addr = magma::to_uint32(rb_gpu_addr + ringbuffer_->tail()); |
| |
| bool is_cmd_buf = mapped_batch->IsCommandBuffer(); |
| if (is_cmd_buf) { |
| auto* command_buf = static_cast<CommandBuffer*>(mapped_batch.get()); |
| magma::PlatformBuffer* buf = command_buf->GetBatchBuffer(); |
| |
| TRACE_VTHREAD_DURATION_BEGIN("magma", "Command Buffer", "NPU", buf->id(), |
| magma::PlatformTrace::GetCurrentTicks(), "id", buf->id()); |
| |
| uint32_t write_offset = command_buf->GetBatchBufferWriteOffset(); |
| |
| // Write a LINK at the end of the command buffer that links back to the ringbuffer. |
| if (!WriteLinkCommand(buf, write_offset, kRbPrefetch, rb_complete_addr)) { |
| MAGMA_LOG(ERROR, "Failed to write LINK from command buffer to ringbuffer"); |
| return false; |
| } |
| // Increment the command buffer length to account for the LINK command size. |
| length += (kInstructionDwords * sizeof(uint32_t)); |
| |
| auto prev_executed_context = prev_executed_context_.lock(); |
| if (!prev_executed_context || (prev_executed_context != context)) { |
| auto csb = command_buf->GetContextStateBufferResource(); |
| if (csb) { |
| auto csb_mapping = command_buf->GetContextStateBufferMapping(); |
| DASSERT(csb_mapping); |
| // |gpu_addr| and |length| currently point to the command buffer which the ringbuffer |
| // will be linking to at the end of this function. We want the ringbuffer to link |
| // to the CSB instead, and the CSB to link to the command buffer. |
| uint32_t cmd_buf_prefetch = |
| magma::round_up(length, static_cast<uint32_t>(sizeof(uint64_t))) / sizeof(uint64_t); |
| if (cmd_buf_prefetch & 0xFFFF0000) { |
| MAGMA_LOG(ERROR, "Can't submit length %u (prefetch 0x%x)", length, cmd_buf_prefetch); |
| return false; |
| } |
| // Write a LINK at the end of the context state buffer that links to the command buffer. |
| uint32_t csb_length = magma::to_uint32(magma::round_up(csb->length, sizeof(uint64_t))); |
| bool res = WriteLinkCommand(csb->buffer->platform_buffer(), |
| magma::to_uint32(csb_length + csb->offset) /* write_offset */, |
| static_cast<uint16_t>(cmd_buf_prefetch), gpu_addr); |
| if (!res) { |
| MAGMA_LOG(ERROR, "Failed to write LINK from context state buffer to command buffer"); |
| return false; |
| } |
| // Update the address the ringbuffer will link to. |
| gpu_addr = magma::to_uint32(csb_mapping->gpu_addr()); |
| length = csb_length + (kInstructionDwords * sizeof(uint32_t)); // Additional LINK size. |
| } |
| } |
| } else { |
| // If there is no command buffer, we link directly to the new ringbuffer commands. |
| gpu_addr = rb_complete_addr; |
| length = kRbPrefetch * sizeof(uint64_t); |
| } |
| |
| uint32_t prefetch = |
| magma::round_up(length, static_cast<uint32_t>(sizeof(uint64_t))) / sizeof(uint64_t); |
| if (prefetch & 0xFFFF0000) { |
| MAGMA_LOG(ERROR, "Can't submit length %u (prefetch 0x%x)", length, prefetch); |
| return false; |
| } |
| |
| // Write the new commands to the end of the ringbuffer. |
| // When adding new instructions, make sure to modify |kRbInstructionsPerBatch| accordingly. |
| // Add an EVENT to the end to the ringbuffer. |
| uint32_t new_rb_instructions_start = ringbuffer_->tail(); |
| if (!WriteInterruptEvent(event_id, std::move(mapped_batch), prev_address_space)) { |
| MAGMA_LOG(ERROR, "Failed to write interrupt event %u\n", event_id); |
| return false; |
| } |
| // Add a new WAIT-LINK to the end of the ringbuffer. |
| if (!AddRingbufferWaitLink()) { |
| MAGMA_LOG(ERROR, "Failed to add WAIT-LINK to ringbuffer"); |
| return false; |
| } |
| // Verify the number of instructions we just wrote matches the prefetch value |
| // of the user buffer's LINK command. |
| DASSERT(new_rb_instructions_start == |
| ringbuffer_->SubtractOffset(kRbInstructionsPerBatch * sizeof(uint64_t))); |
| |
| DLOG("Submitting buffer at NPU addr 0x%x", gpu_addr); |
| |
| LinkRingbuffer(prev_wait_link, gpu_addr, prefetch); |
| |
| // Save the context of the last executed command buffer. Since any command buffer may modify |
| // hardware state, we should update this even if no command state buffer was provided. |
| if (is_cmd_buf) { |
| prev_executed_context_ = context; |
| } |
| |
| kill_context.cancel(); |
| |
| return true; |
| } |
| |
| std::vector<MappedBatch*> MsdVsiDevice::GetInflightBatches() { |
| std::vector<MappedBatch*> inflight_batches; |
| inflight_batches.reserve(kNumEvents); |
| for (unsigned i = 0; i < kNumEvents; i++) { |
| if (events_[i].submitted) { |
| DASSERT(events_[i].mapped_batch != nullptr); |
| inflight_batches.push_back(events_[i].mapped_batch.get()); |
| } |
| } |
| // Sort the batches by sequence number, as the event ids may not correspond to the actual |
| // ordering. |
| std::sort(inflight_batches.begin(), inflight_batches.end(), |
| [](const MappedBatch* a, const MappedBatch* b) { |
| return a->GetSequenceNumber() < b->GetSequenceNumber(); |
| }); |
| |
| return inflight_batches; |
| } |
| |
| void MsdVsiDevice::DumpStatusToLog() { EnqueueDeviceRequest(std::make_unique<DumpRequest>()); } |
| |
| magma::Status MsdVsiDevice::SubmitBatch(std::unique_ptr<MappedBatch> batch, bool do_flush) { |
| DLOG("SubmitBatch"); |
| CHECK_THREAD_NOT_CURRENT(device_thread_id_); |
| |
| EnqueueDeviceRequest(std::make_unique<BatchRequest>(std::move(batch), do_flush)); |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma::Status MsdVsiDevice::ProcessBatch(std::unique_ptr<MappedBatch> batch, bool do_flush) { |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| auto context = batch->GetContext().lock(); |
| if (!context) { |
| MAGMA_LOG(ERROR, "No context for batch %lu, IsCommandBuffer=%d", batch->GetBatchBufferId(), |
| batch->IsCommandBuffer()); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| |
| uint32_t sequence_number = sequencer_->next_sequence_number(); |
| batch->SetSequenceNumber(sequence_number); |
| progress_->Submitted(sequence_number, std::chrono::steady_clock::now()); |
| |
| uint32_t event_id; |
| if (!AllocInterruptEvent(true /* free_on_complete */, &event_id)) { |
| DLOG("No events remaining, deferring execution of command buffer until next interrupt"); |
| // Not an error, just need to wait for a pending command buffer to complete. |
| request_backlog_.emplace_back(DeferredRequest{std::move(batch), do_flush}); |
| return MAGMA_STATUS_OK; |
| } |
| if (!SubmitCommandBuffer(context, context->exec_address_space()->page_table_array_slot(), |
| do_flush, std::move(batch), event_id)) { |
| FreeInterruptEvent(event_id); |
| MAGMA_LOG(ERROR, "Failed to submit command buffer"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| |
| return MAGMA_STATUS_OK; |
| } |
| |
| std::unique_ptr<MsdVsiConnection> MsdVsiDevice::OpenVsiConnection(msd::msd_client_id_t client_id) { |
| uint32_t page_table_array_slot; |
| if (!page_table_slot_allocator_->Alloc(&page_table_array_slot)) { |
| MAGMA_LOG(ERROR, "couldn't allocate page table slot"); |
| return nullptr; |
| } |
| |
| auto address_space = AddressSpace::Create(this, page_table_array_slot); |
| if (!address_space) { |
| MAGMA_LOG(ERROR, "failed to create address space"); |
| return nullptr; |
| } |
| |
| page_table_arrays_->AssignAddressSpace(page_table_array_slot, address_space.get()); |
| |
| return std::make_unique<MsdVsiConnection>(this, std::move(address_space), client_id); |
| } |
| |
| std::unique_ptr<msd::Connection> MsdVsiDevice::Open(msd::msd_client_id_t client_id) { |
| auto connection = OpenVsiConnection(client_id); |
| if (connection) { |
| return std::make_unique<MsdVsiAbiConnection>(std::move(connection)); |
| } else { |
| MAGMA_LOG(ERROR, "failed to open vsi connection"); |
| return nullptr; |
| } |
| } |
| |
| magma_status_t MsdVsiDevice::ChipIdentity(magma_vsi_vip_chip_identity* out_identity) { |
| if (!IsValidDeviceId()) { |
| // TODO(https://fxbug.dev/42113659): Read hardcoded values from features database instead. |
| MAGMA_LOG(ERROR, "unhandled device id 0x%x", device_id()); |
| return MAGMA_STATUS_UNIMPLEMENTED; |
| } |
| |
| memset(out_identity, 0, sizeof(*out_identity)); |
| out_identity->chip_model = device_id(); |
| out_identity->chip_revision = revision(); |
| out_identity->chip_date = chip_date(); |
| |
| out_identity->stream_count = gpu_features_->stream_count(); |
| out_identity->pixel_pipes = gpu_features_->pixel_pipes(); |
| out_identity->resolve_pipes = 0x0; |
| out_identity->instruction_count = gpu_features_->instruction_count(); |
| out_identity->num_constants = gpu_features_->num_constants(); |
| out_identity->varyings_count = gpu_features_->varyings_count(); |
| out_identity->gpu_core_count = 0x1; |
| |
| out_identity->product_id = product_id(); |
| out_identity->chip_flags = 0x4; |
| out_identity->eco_id = eco_id(); |
| out_identity->customer_id = customer_id(); |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma_status_t MsdVsiDevice::ChipOption(magma_vsi_vip_chip_option* out_option) { |
| if (!IsValidDeviceId()) { |
| // TODO(https://fxbug.dev/42113659): Read hardcoded values from features database instead. |
| MAGMA_LOG(ERROR, "unhandled device id 0x%x", device_id()); |
| return MAGMA_STATUS_UNIMPLEMENTED; |
| } |
| |
| memset(out_option, 0, sizeof(*out_option)); |
| out_option->gpu_profiler = false; |
| out_option->allow_fast_clear = false; |
| out_option->power_management = false; |
| out_option->enable_mmu = true; |
| out_option->compression = kVsiVipCompressionOptionNone; |
| out_option->usc_l1_cache_ratio = 0; |
| out_option->secure_mode = kVsiVipSecureModeNormal; |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma_status_t MsdVsiDevice::QuerySram(zx::vmo* out_sram) { |
| if (!external_sram_) { |
| MAGMA_LOG(ERROR, "Device has no external SRAM"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| |
| // TODO(https://fxbug.dev/42149674): this may fail due to delays in handling client VMO release |
| if (external_sram_->HasChildren()) { |
| MAGMA_LOG(ERROR, "External SRAM has children"); |
| return MAGMA_STATUS_ACCESS_DENIED; |
| } |
| |
| void* ptr; |
| if (!external_sram_->MapCpu(&ptr)) { |
| MAGMA_LOG(ERROR, "MapCpu failed"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| |
| // Wipe any previous content |
| memset(ptr, 0, external_sram_->size()); |
| |
| // Client looks for phys addr in the first few bytes |
| std::optional<uint64_t> sram_base = platform_device_->GetExternalSramPhysicalBase(); |
| if (!sram_base.has_value()) { |
| external_sram_->UnmapCpu(); |
| MAGMA_LOG(ERROR, "Could not get external sram physical base"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| *reinterpret_cast<uint64_t*>(ptr) = sram_base.value(); |
| |
| external_sram_->UnmapCpu(); |
| |
| uint32_t handle; |
| if (!external_sram_->CreateChild(&handle)) { |
| MAGMA_LOG(ERROR, "CreateChild failed"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| |
| *out_sram = zx::vmo(handle); |
| |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma_status_t MsdVsiDevice::DataToBuffer(const char* name, void* data, uint64_t size, |
| zx::vmo* result_buffer_out) { |
| auto buffer = magma::PlatformBuffer::Create(size, name); |
| if (!buffer) { |
| MAGMA_LOG(ERROR, "Failed to allocate buffer"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| if (!buffer->Write(data, 0, size)) { |
| MAGMA_LOG(ERROR, "Failed to write result to buffer"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| |
| uint32_t result_buffer; |
| |
| if (!buffer->duplicate_handle(&result_buffer)) { |
| MAGMA_LOG(ERROR, "Failed to duplicate handle"); |
| return MAGMA_STATUS_INTERNAL_ERROR; |
| } |
| *result_buffer_out = zx::vmo(result_buffer); |
| |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma_status_t MsdVsiDevice::Query(uint64_t id, zx::vmo* result_buffer_out, uint64_t* result_out) { |
| switch (id) { |
| case MAGMA_QUERY_VENDOR_ID: |
| *result_out = MAGMA_VENDOR_ID_VSI; |
| break; |
| |
| case MAGMA_QUERY_VENDOR_VERSION: |
| *result_out = MAGMA_VENDOR_VERSION_VSI; |
| break; |
| |
| case MAGMA_QUERY_DEVICE_ID: |
| *result_out = device_id(); |
| break; |
| |
| case MAGMA_QUERY_IS_TOTAL_TIME_SUPPORTED: |
| *result_out = 0; |
| break; |
| |
| case kMsdVsiVendorQueryClientGpuAddrRange: { |
| uint32_t size_in_pages = AddressSpaceLayout::client_gpu_addr_size() / magma::page_size(); |
| DASSERT(size_in_pages * magma::page_size() == AddressSpaceLayout::client_gpu_addr_size()); |
| uint32_t base_in_pages = AddressSpaceLayout::client_gpu_addr_base() / magma::page_size(); |
| DASSERT(base_in_pages * magma::page_size() == AddressSpaceLayout::client_gpu_addr_base()); |
| *result_out = |
| static_cast<uint64_t>(base_in_pages) | (static_cast<uint64_t>(size_in_pages) << 32); |
| break; |
| } |
| |
| case kMsdVsiVendorQueryChipIdentity: { |
| magma_vsi_vip_chip_identity chip_id; |
| magma_status_t status = ChipIdentity(&chip_id); |
| if (status != MAGMA_STATUS_OK) |
| return status; |
| |
| return DataToBuffer("chip_identity", &chip_id, sizeof(chip_id), result_buffer_out); |
| } |
| |
| case kMsdVsiVendorQueryChipOption: { |
| magma_vsi_vip_chip_option chip_opt; |
| magma_status_t status = ChipOption(&chip_opt); |
| if (status != MAGMA_STATUS_OK) |
| return status; |
| |
| return DataToBuffer("chip_option", &chip_opt, sizeof(chip_opt), result_buffer_out); |
| } |
| |
| case kMsdVsiVendorQueryExternalSram: |
| return QuerySram(result_buffer_out); |
| |
| default: |
| MAGMA_LOG(ERROR, "unhandled id %" PRIu64, id); |
| return MAGMA_STATUS_INVALID_ARGS; |
| } |
| |
| return MAGMA_STATUS_OK; |
| } |
| |
| void MsdVsiDevice::DumpStatus(uint32_t dump_type) { DumpStatusToLog(); } |
| |
| magma_status_t MsdVsiDevice::GetIcdList(std::vector<msd::MsdIcdInfo>* icd_info_out) { |
| const char* kSuffixes[] = {"_test", ""}; |
| auto& icd_info = *icd_info_out; |
| icd_info.clear(); |
| icd_info.resize(std::size(kSuffixes)); |
| for (uint32_t i = 0; i < std::size(kSuffixes); i++) { |
| icd_info[i].component_url = fbl::StringPrintf( |
| "fuchsia-pkg://fuchsia.com/libopencl_vsi_vip%s#meta/opencl.cm", kSuffixes[i]); |
| icd_info[i].support_flags = msd::ICD_SUPPORT_FLAG_OPENCL; |
| } |
| return MAGMA_STATUS_OK; |
| } |