| // Copyright 2017 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "msd_arm_device.h" |
| |
| #include <fbl/algorithm.h> |
| #include <fbl/string_printf.h> |
| |
| #include <bitset> |
| #include <cinttypes> |
| #include <cstdio> |
| #include <string> |
| |
| #include "job_scheduler.h" |
| #include "magma_util/dlog.h" |
| #include "magma_util/macros.h" |
| #include "magma_vendor_queries.h" |
| #include "platform_barriers.h" |
| #include "platform_port.h" |
| #include "platform_trace.h" |
| |
| #include "registers.h" |
| |
| // This is the index into the mmio section of the mdi. |
| enum MmioIndex { |
| kMmioIndexRegisters = 0, |
| }; |
| |
| enum InterruptIndex { |
| kInterruptIndexJob = 0, |
| kInterruptIndexMmu = 1, |
| kInterruptIndexGpu = 2, |
| }; |
| |
| class MsdArmDevice::DumpRequest : public DeviceRequest { |
| public: |
| DumpRequest() {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override |
| { |
| return device->ProcessDumpStatusToLog(); |
| } |
| }; |
| |
| class MsdArmDevice::PerfCounterSampleCompletedRequest : public DeviceRequest { |
| public: |
| PerfCounterSampleCompletedRequest() {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override |
| { |
| return device->ProcessPerfCounterSampleCompleted(); |
| } |
| }; |
| |
| class MsdArmDevice::JobInterruptRequest : public DeviceRequest { |
| public: |
| JobInterruptRequest() {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override { return device->ProcessJobInterrupt(); } |
| }; |
| |
| class MsdArmDevice::MmuInterruptRequest : public DeviceRequest { |
| public: |
| MmuInterruptRequest() {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override { return device->ProcessMmuInterrupt(); } |
| }; |
| |
| class MsdArmDevice::ScheduleAtomRequest : public DeviceRequest { |
| public: |
| ScheduleAtomRequest() {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override { return device->ProcessScheduleAtoms(); } |
| }; |
| |
| class MsdArmDevice::CancelAtomsRequest : public DeviceRequest { |
| public: |
| CancelAtomsRequest(std::shared_ptr<MsdArmConnection> connection) : connection_(connection) {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override |
| { |
| return device->ProcessCancelAtoms(connection_); |
| } |
| |
| std::weak_ptr<MsdArmConnection> connection_; |
| }; |
| |
| class MsdArmDevice::PerfCounterRequest : public DeviceRequest { |
| public: |
| PerfCounterRequest(uint32_t type) : type_(type) {} |
| |
| protected: |
| magma::Status Process(MsdArmDevice* device) override |
| { |
| return device->ProcessPerfCounterRequest(type_); |
| } |
| |
| uint32_t type_; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////////////////////////////// |
| |
| std::unique_ptr<MsdArmDevice> MsdArmDevice::Create(void* device_handle, bool start_device_thread) |
| { |
| auto device = std::make_unique<MsdArmDevice>(); |
| |
| if (!device->Init(device_handle)) |
| return DRETP(nullptr, "Failed to initialize MsdArmDevice"); |
| |
| if (start_device_thread) |
| device->StartDeviceThread(); |
| |
| return device; |
| } |
| |
| MsdArmDevice::MsdArmDevice() { magic_ = kMagic; } |
| |
| MsdArmDevice::~MsdArmDevice() { Destroy(); } |
| |
| void MsdArmDevice::Destroy() |
| { |
| DLOG("Destroy"); |
| CHECK_THREAD_NOT_CURRENT(device_thread_id_); |
| |
| DisableInterrupts(); |
| |
| interrupt_thread_quit_flag_ = true; |
| |
| if (gpu_interrupt_) |
| gpu_interrupt_->Signal(); |
| if (job_interrupt_) |
| job_interrupt_->Signal(); |
| if (mmu_interrupt_) |
| mmu_interrupt_->Signal(); |
| |
| if (gpu_interrupt_thread_.joinable()) { |
| DLOG("joining GPU interrupt thread"); |
| gpu_interrupt_thread_.join(); |
| DLOG("joined"); |
| } |
| if (job_interrupt_thread_.joinable()) { |
| DLOG("joining Job interrupt thread"); |
| job_interrupt_thread_.join(); |
| DLOG("joined"); |
| } |
| if (mmu_interrupt_thread_.joinable()) { |
| DLOG("joining MMU interrupt thread"); |
| mmu_interrupt_thread_.join(); |
| DLOG("joined"); |
| } |
| device_thread_quit_flag_ = true; |
| |
| if (device_request_semaphore_) |
| device_request_semaphore_->Signal(); |
| |
| if (device_thread_.joinable()) { |
| DLOG("joining device thread"); |
| device_thread_.join(); |
| DLOG("joined"); |
| } |
| } |
| |
| bool MsdArmDevice::Init(void* device_handle) |
| { |
| DLOG("Init"); |
| platform_device_ = magma::PlatformDevice::Create(device_handle); |
| if (!platform_device_) |
| return DRETF(false, "Failed to initialize device"); |
| |
| std::unique_ptr<magma::PlatformMmio> mmio = platform_device_->CpuMapMmio( |
| kMmioIndexRegisters, magma::PlatformMmio::CACHE_POLICY_UNCACHED_DEVICE); |
| if (!mmio) |
| return DRETF(false, "failed to map registers"); |
| |
| register_io_ = std::make_unique<magma::RegisterIo>(std::move(mmio)); |
| |
| gpu_features_.ReadFrom(register_io_.get()); |
| magma::log(magma::LOG_INFO, "ARM mali ID %x", gpu_features_.gpu_id.reg_value()); |
| |
| #if defined(MSD_ARM_ENABLE_CACHE_COHERENCY) |
| if (gpu_features_.coherency_features.ace().get()) { |
| cache_coherency_status_ = kArmMaliCacheCoherencyAce; |
| } else { |
| magma::log(magma::LOG_INFO, "Cache coherency unsupported"); |
| } |
| #endif |
| |
| reset_semaphore_ = magma::PlatformSemaphore::Create(); |
| |
| device_request_semaphore_ = magma::PlatformSemaphore::Create(); |
| device_port_ = magma::PlatformPort::Create(); |
| |
| power_manager_ = std::make_unique<PowerManager>(register_io_.get()); |
| perf_counters_ = std::make_unique<PerformanceCounters>(this); |
| scheduler_ = std::make_unique<JobScheduler>(this, 3); |
| address_manager_ = std::make_unique<AddressManager>(this, gpu_features_.address_space_count); |
| |
| bus_mapper_ = magma::PlatformBusMapper::Create(platform_device_->GetBusTransactionInitiator()); |
| if (!bus_mapper_) |
| return DRETF(false, "Failed to create bus mapper"); |
| |
| if (!InitializeInterrupts()) |
| return false; |
| |
| return InitializeHardware(); |
| } |
| |
| bool MsdArmDevice::InitializeHardware() |
| { |
| cycle_counter_refcount_ = 0; |
| DASSERT(registers::GpuStatus::Get().ReadFrom(register_io_.get()).cycle_count_active().get() == |
| 0); |
| EnableInterrupts(); |
| InitializeHardwareQuirks(&gpu_features_, register_io_.get()); |
| |
| uint64_t enabled_cores = 1; |
| #if defined(MSD_ARM_ENABLE_ALL_CORES) |
| enabled_cores = gpu_features_.shader_present; |
| #endif |
| power_manager_->EnableCores(register_io_.get(), enabled_cores); |
| |
| return true; |
| } |
| |
| std::shared_ptr<MsdArmConnection> MsdArmDevice::Open(msd_client_id_t client_id) |
| { |
| auto connection = MsdArmConnection::Create(client_id, this); |
| if (connection) { |
| std::lock_guard<std::mutex> lock(connection_list_mutex_); |
| connection_list_.push_back(connection); |
| } |
| return connection; |
| } |
| |
| void MsdArmDevice::DeregisterConnection() |
| { |
| std::lock_guard<std::mutex> lock(connection_list_mutex_); |
| connection_list_.erase(std::remove_if(connection_list_.begin(), connection_list_.end(), |
| [](auto& connection) { return connection.expired(); }), |
| connection_list_.end()); |
| } |
| |
| void MsdArmDevice::DumpStatusToLog() { EnqueueDeviceRequest(std::make_unique<DumpRequest>()); } |
| |
| void MsdArmDevice::OutputHangMessage() |
| { |
| magma::log(magma::LOG_WARNING, "Possible GPU hang\n"); |
| ProcessDumpStatusToLog(); |
| } |
| |
| int MsdArmDevice::DeviceThreadLoop() |
| { |
| magma::PlatformThreadHelper::SetCurrentThreadName("DeviceThread"); |
| |
| device_thread_id_ = std::make_unique<magma::PlatformThreadId>(); |
| CHECK_THREAD_IS_CURRENT(device_thread_id_); |
| |
| DLOG("DeviceThreadLoop starting thread 0x%lx", device_thread_id_->id()); |
| |
| std::unique_lock<std::mutex> lock(device_request_mutex_, std::defer_lock); |
| device_request_semaphore_->WaitAsync(device_port_.get()); |
| |
| while (!device_thread_quit_flag_) { |
| auto timeout_duration = scheduler_->GetCurrentTimeoutDuration(); |
| if (timeout_duration <= JobScheduler::Clock::duration::zero()) { |
| scheduler_->HandleTimedOutAtoms(); |
| continue; |
| } |
| uint64_t key; |
| magma::Status status(MAGMA_STATUS_OK); |
| if (timeout_duration < JobScheduler::Clock::duration::max()) { |
| // Add 1 to avoid rounding time down and spinning with timeouts close to 0. |
| int64_t millisecond_timeout = |
| std::chrono::duration_cast<std::chrono::milliseconds>(timeout_duration).count() + 1; |
| status = device_port_->Wait(&key, millisecond_timeout); |
| } else { |
| status = device_port_->Wait(&key); |
| } |
| if (status.ok()) { |
| if (key == device_request_semaphore_->id()) { |
| device_request_semaphore_->Reset(); |
| device_request_semaphore_->WaitAsync(device_port_.get()); |
| while (!device_thread_quit_flag_) { |
| lock.lock(); |
| if (!device_request_list_.size()) { |
| lock.unlock(); |
| break; |
| } |
| auto request = std::move(device_request_list_.front()); |
| device_request_list_.pop_front(); |
| lock.unlock(); |
| request->ProcessAndReply(this); |
| } |
| } else { |
| scheduler_->PlatformPortSignaled(key); |
| } |
| } |
| } |
| |
| DLOG("DeviceThreadLoop exit"); |
| return 0; |
| } |
| |
| int MsdArmDevice::GpuInterruptThreadLoop() |
| { |
| magma::PlatformThreadHelper::SetCurrentThreadName("Gpu InterruptThread"); |
| DLOG("GPU Interrupt thread started"); |
| |
| while (!interrupt_thread_quit_flag_) { |
| DLOG("GPU waiting for interrupt"); |
| gpu_interrupt_->Wait(); |
| DLOG("GPU Returned from interrupt wait!"); |
| |
| if (interrupt_thread_quit_flag_) |
| break; |
| |
| auto irq_status = registers::GpuIrqFlags::GetStatus().ReadFrom(register_io_.get()); |
| |
| if (!irq_status.reg_value()) { |
| magma::log(magma::LOG_WARNING, "Got unexpected GPU IRQ with no flags set\n"); |
| } |
| |
| auto clear_flags = registers::GpuIrqFlags::GetIrqClear().FromValue(irq_status.reg_value()); |
| // Handle interrupts on the interrupt thread so the device thread can wait for them to |
| // complete. |
| if (irq_status.reset_completed().get()) { |
| DLOG("Received GPU reset completed"); |
| reset_semaphore_->Signal(); |
| irq_status.reset_completed().set(0); |
| } |
| if (irq_status.power_changed_single().get() || irq_status.power_changed_all().get()) { |
| irq_status.power_changed_single().set(0); |
| irq_status.power_changed_all().set(0); |
| power_manager_->ReceivedPowerInterrupt(register_io_.get()); |
| if (power_manager_->l2_ready_status() && |
| (cache_coherency_status_ == kArmMaliCacheCoherencyAce)) { |
| auto enable_reg = registers::CoherencyFeatures::GetEnable().FromValue(0); |
| enable_reg.ace().set(true); |
| enable_reg.WriteTo(register_io_.get()); |
| } |
| } |
| |
| if (irq_status.performance_counter_sample_completed().get()) { |
| irq_status.performance_counter_sample_completed().set(0); |
| EnqueueDeviceRequest(std::make_unique<PerfCounterSampleCompletedRequest>(), true); |
| // Don't wait for a reply, to ensure there's no deadlock. Clearing the interrupt flag |
| // before the interrupt is actually processed shouldn't matter, because perf_counters_ |
| // ensures only one request happens at a time. |
| } |
| |
| if (irq_status.reg_value()) { |
| magma::log(magma::LOG_WARNING, "Got unexpected GPU IRQ %d\n", irq_status.reg_value()); |
| uint64_t fault_addr = |
| registers::GpuFaultAddress::Get().ReadFrom(register_io_.get()).reg_value(); |
| { |
| std::lock_guard<std::mutex> lock(connection_list_mutex_); |
| for (auto& connection : connection_list_) { |
| auto locked = connection.lock(); |
| if (locked) { |
| uint64_t virtual_address; |
| if (locked->GetVirtualAddressFromPhysical(fault_addr, &virtual_address)) |
| magma::log(magma::LOG_WARNING, |
| "Client %lx has VA %lx mapped to PA %lx\n", |
| locked->client_id(), virtual_address, fault_addr); |
| } |
| } |
| } |
| |
| // Perform the GPU dump immediately, because clearing the irq flags might cause another |
| // GPU fault to be generated, which could overwrite the earlier data. |
| std::string dump; |
| DumpToString(dump, false); |
| magma::log(magma::LOG_INFO, "GPU fault status: %s", dump.c_str()); |
| } |
| |
| if (clear_flags.reg_value()) { |
| clear_flags.WriteTo(register_io_.get()); |
| } |
| } |
| |
| DLOG("GPU Interrupt thread exited"); |
| return 0; |
| } |
| |
| magma::Status MsdArmDevice::ProcessPerfCounterSampleCompleted() |
| { |
| |
| DLOG("Perf Counter sample completed"); |
| |
| uint64_t duration_ms = 0; |
| std::vector<uint32_t> perf_result = perf_counters_->ReadCompleted(&duration_ms); |
| |
| magma::log(magma::LOG_INFO, "Performance counter read complete, duration %lu ms:\n", |
| duration_ms); |
| for (uint32_t i = 0; i < perf_result.size(); ++i) { |
| magma::log(magma::LOG_INFO, "Performance counter %d: %u\n", i, perf_result[i]); |
| } |
| return MAGMA_STATUS_OK; |
| } |
| |
| int MsdArmDevice::JobInterruptThreadLoop() |
| { |
| magma::PlatformThreadHelper::SetCurrentThreadName("Job InterruptThread"); |
| DLOG("Job Interrupt thread started"); |
| |
| while (!interrupt_thread_quit_flag_) { |
| DLOG("Job waiting for interrupt"); |
| job_interrupt_->Wait(); |
| DLOG("Job Returned from interrupt wait!"); |
| |
| if (interrupt_thread_quit_flag_) |
| break; |
| auto request = std::make_unique<JobInterruptRequest>(); |
| auto reply = request->GetReply(); |
| EnqueueDeviceRequest(std::move(request), true); |
| reply->Wait(); |
| } |
| |
| DLOG("Job Interrupt thread exited"); |
| return 0; |
| } |
| |
| static bool IsHardwareResultCode(uint32_t result) |
| { |
| switch (result) { |
| case kArmMaliResultSuccess: |
| case kArmMaliResultSoftStopped: |
| case kArmMaliResultAtomTerminated: |
| |
| case kArmMaliResultConfigFault: |
| case kArmMaliResultPowerFault: |
| case kArmMaliResultReadFault: |
| case kArmMaliResultWriteFault: |
| case kArmMaliResultAffinityFault: |
| case kArmMaliResultBusFault: |
| |
| case kArmMaliResultProgramCounterInvalidFault: |
| case kArmMaliResultEncodingInvalidFault: |
| case kArmMaliResultTypeMismatchFault: |
| case kArmMaliResultOperandFault: |
| case kArmMaliResultTlsFault: |
| case kArmMaliResultBarrierFault: |
| case kArmMaliResultAlignmentFault: |
| case kArmMaliResultDataInvalidFault: |
| case kArmMaliResultTileRangeFault: |
| case kArmMaliResultOutOfMemoryFault: |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| magma::Status MsdArmDevice::ProcessJobInterrupt() |
| { |
| TRACE_DURATION("magma", "MsdArmDevice::ProcessJobInterrupt"); |
| while (true) { |
| auto irq_status = registers::JobIrqFlags::GetRawStat().ReadFrom(register_io_.get()); |
| if (!irq_status.reg_value()) |
| break; |
| auto clear_flags = registers::JobIrqFlags::GetIrqClear().FromValue(irq_status.reg_value()); |
| clear_flags.WriteTo(register_io_.get()); |
| DLOG("Processing job interrupt status %x", irq_status.reg_value()); |
| |
| bool dumped_on_failure = false; |
| uint32_t failed = irq_status.failed_slots().get(); |
| while (failed) { |
| uint32_t slot = __builtin_ffs(failed) - 1; |
| registers::JobSlotRegisters regs(slot); |
| uint32_t raw_result = regs.Status().ReadFrom(register_io_.get()).reg_value(); |
| uint32_t result = |
| IsHardwareResultCode(raw_result) ? raw_result : kArmMaliResultUnknownFault; |
| |
| // Soft stopping isn't counted as an actual failure. |
| if (result != kArmMaliResultSoftStopped && !dumped_on_failure) { |
| magma::log(magma::LOG_WARNING, "Got failed slot bitmask %x with result code %x\n", |
| irq_status.failed_slots().get(), raw_result); |
| ProcessDumpStatusToLog(); |
| dumped_on_failure = true; |
| } |
| |
| uint64_t job_tail = regs.Tail().ReadFrom(register_io_.get()).reg_value(); |
| |
| scheduler_->JobCompleted(slot, static_cast<ArmMaliResultCode>(result), job_tail); |
| failed &= ~(1 << slot); |
| } |
| |
| uint32_t finished = irq_status.finished_slots().get(); |
| while (finished) { |
| uint32_t slot = __builtin_ffs(finished) - 1; |
| scheduler_->JobCompleted(slot, kArmMaliResultSuccess, 0u); |
| finished &= ~(1 << slot); |
| } |
| } |
| job_interrupt_->Complete(); |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma::Status MsdArmDevice::ProcessMmuInterrupt() |
| { |
| auto irq_status = registers::MmuIrqFlags::GetStatus().ReadFrom(register_io_.get()); |
| DLOG("Received MMU IRQ status 0x%x\n", irq_status.reg_value()); |
| |
| uint32_t faulted_slots = irq_status.pf_flags().get() | irq_status.bf_flags().get(); |
| while (faulted_slots) { |
| uint32_t slot = ffs(faulted_slots) - 1; |
| |
| // Clear all flags before attempting to page in memory, as otherwise |
| // if the atom continues executing the next interrupt may be lost. |
| auto clear_flags = registers::MmuIrqFlags::GetIrqClear().FromValue(0); |
| clear_flags.pf_flags().set(1 << slot); |
| clear_flags.bf_flags().set(1 << slot); |
| clear_flags.WriteTo(register_io_.get()); |
| |
| std::shared_ptr<MsdArmConnection> connection; |
| { |
| auto mapping = address_manager_->GetMappingForSlot(slot); |
| if (!mapping) { |
| magma::log(magma::LOG_WARNING, "Fault on idle slot %d\n", slot); |
| } else { |
| connection = mapping->connection(); |
| } |
| } |
| if (connection) { |
| uint64_t address = registers::AsRegisters(slot) |
| .FaultAddress() |
| .ReadFrom(register_io_.get()) |
| .reg_value(); |
| bool kill_context = true; |
| if (irq_status.bf_flags().get() & (1 << slot)) { |
| magma::log(magma::LOG_WARNING, "Bus fault at address 0x%lx on slot %d\n", address, |
| slot); |
| } else { |
| if (connection->PageInMemory(address)) { |
| DLOG("Paged in address %lx\n", address); |
| kill_context = false; |
| } else { |
| magma::log(magma::LOG_WARNING, "Failed to page in address 0x%lx on slot %d\n", |
| address, slot); |
| } |
| } |
| if (kill_context) { |
| ProcessDumpStatusToLog(); |
| |
| connection->set_address_space_lost(); |
| scheduler_->ReleaseMappingsForConnection(connection); |
| // This will invalidate the address slot, causing the job to die |
| // with a fault. |
| address_manager_->ReleaseSpaceMappings(connection->const_address_space()); |
| } |
| } |
| faulted_slots &= ~(1 << slot); |
| } |
| |
| mmu_interrupt_->Complete(); |
| return MAGMA_STATUS_OK; |
| } |
| |
| int MsdArmDevice::MmuInterruptThreadLoop() |
| { |
| magma::PlatformThreadHelper::SetCurrentThreadName("MMU InterruptThread"); |
| DLOG("MMU Interrupt thread started"); |
| |
| while (!interrupt_thread_quit_flag_) { |
| DLOG("MMU waiting for interrupt"); |
| mmu_interrupt_->Wait(); |
| DLOG("MMU Returned from interrupt wait!"); |
| |
| if (interrupt_thread_quit_flag_) |
| break; |
| auto request = std::make_unique<MmuInterruptRequest>(); |
| auto reply = request->GetReply(); |
| EnqueueDeviceRequest(std::move(request), true); |
| reply->Wait(); |
| } |
| |
| DLOG("MMU Interrupt thread exited"); |
| return 0; |
| } |
| |
| void MsdArmDevice::StartDeviceThread() |
| { |
| DASSERT(!device_thread_.joinable()); |
| device_thread_ = std::thread([this] { this->DeviceThreadLoop(); }); |
| |
| gpu_interrupt_thread_ = std::thread([this] { this->GpuInterruptThreadLoop(); }); |
| job_interrupt_thread_ = std::thread([this] { this->JobInterruptThreadLoop(); }); |
| mmu_interrupt_thread_ = std::thread([this] { this->MmuInterruptThreadLoop(); }); |
| } |
| |
| bool MsdArmDevice::InitializeInterrupts() |
| { |
| // When it's initialize the reset completed flag may be set. Clear it so |
| // we don't get a useless interrupt. |
| auto clear_flags = registers::GpuIrqFlags::GetIrqClear().FromValue(0xffffffff); |
| clear_flags.WriteTo(register_io_.get()); |
| |
| gpu_interrupt_ = platform_device_->RegisterInterrupt(kInterruptIndexGpu); |
| if (!gpu_interrupt_) |
| return DRETF(false, "failed to register GPU interrupt"); |
| |
| job_interrupt_ = platform_device_->RegisterInterrupt(kInterruptIndexJob); |
| if (!job_interrupt_) |
| return DRETF(false, "failed to register JOB interrupt"); |
| |
| mmu_interrupt_ = platform_device_->RegisterInterrupt(kInterruptIndexMmu); |
| if (!mmu_interrupt_) |
| return DRETF(false, "failed to register MMU interrupt"); |
| |
| return true; |
| } |
| |
| void MsdArmDevice::EnableInterrupts() |
| { |
| auto gpu_flags = registers::GpuIrqFlags::GetIrqMask().FromValue(0xffffffff); |
| gpu_flags.WriteTo(register_io_.get()); |
| |
| auto mmu_flags = registers::MmuIrqFlags::GetIrqMask().FromValue(0xffffffff); |
| mmu_flags.WriteTo(register_io_.get()); |
| |
| auto job_flags = registers::JobIrqFlags::GetIrqMask().FromValue(0xffffffff); |
| job_flags.WriteTo(register_io_.get()); |
| } |
| |
| void MsdArmDevice::DisableInterrupts() |
| { |
| if (!register_io_) |
| return; |
| auto gpu_flags = registers::GpuIrqFlags::GetIrqMask().FromValue(0); |
| gpu_flags.WriteTo(register_io_.get()); |
| |
| auto mmu_flags = registers::MmuIrqFlags::GetIrqMask().FromValue(0); |
| mmu_flags.WriteTo(register_io_.get()); |
| |
| auto job_flags = registers::JobIrqFlags::GetIrqMask().FromValue(0); |
| job_flags.WriteTo(register_io_.get()); |
| } |
| |
| void MsdArmDevice::EnqueueDeviceRequest(std::unique_ptr<DeviceRequest> request, bool enqueue_front) |
| { |
| std::unique_lock<std::mutex> lock(device_request_mutex_); |
| if (enqueue_front) { |
| device_request_list_.emplace_front(std::move(request)); |
| } else { |
| device_request_list_.emplace_back(std::move(request)); |
| } |
| device_request_semaphore_->Signal(); |
| } |
| |
| void MsdArmDevice::ScheduleAtom(std::shared_ptr<MsdArmAtom> atom) |
| { |
| bool need_schedule; |
| { |
| std::lock_guard<std::mutex> lock(schedule_mutex_); |
| need_schedule = atoms_to_schedule_.empty(); |
| atoms_to_schedule_.push_back(std::move(atom)); |
| } |
| if (need_schedule) |
| EnqueueDeviceRequest(std::make_unique<ScheduleAtomRequest>()); |
| } |
| |
| void MsdArmDevice::CancelAtoms(std::shared_ptr<MsdArmConnection> connection) |
| { |
| EnqueueDeviceRequest(std::make_unique<CancelAtomsRequest>(connection)); |
| } |
| |
| magma::PlatformPort* MsdArmDevice::GetPlatformPort() { return device_port_.get(); } |
| |
| void MsdArmDevice::UpdateGpuActive(bool active) { power_manager_->UpdateGpuActive(active); } |
| |
| void MsdArmDevice::DumpRegisters(const GpuFeatures& features, magma::RegisterIo* io, |
| DumpState* dump_state) |
| { |
| static struct { |
| const char* name; |
| registers::CoreReadyState::CoreType type; |
| } core_types[] = {{"L2 Cache", registers::CoreReadyState::CoreType::kL2}, |
| {"Shader", registers::CoreReadyState::CoreType::kShader}, |
| {"Tiler", registers::CoreReadyState::CoreType::kTiler}}; |
| |
| static struct { |
| const char* name; |
| registers::CoreReadyState::StatusType type; |
| } status_types[] = { |
| {"Present", registers::CoreReadyState::StatusType::kPresent}, |
| {"Ready", registers::CoreReadyState::StatusType::kReady}, |
| {"Transitioning", registers::CoreReadyState::StatusType::kPowerTransitioning}, |
| {"Power active", registers::CoreReadyState::StatusType::kPowerActive}}; |
| for (size_t i = 0; i < fbl::count_of(core_types); i++) { |
| for (size_t j = 0; j < fbl::count_of(status_types); j++) { |
| uint64_t bitmask = registers::CoreReadyState::ReadBitmask(io, core_types[i].type, |
| status_types[j].type); |
| dump_state->power_states.push_back({core_types[i].name, status_types[j].name, bitmask}); |
| } |
| } |
| |
| dump_state->gpu_fault_status = registers::GpuFaultStatus::Get().ReadFrom(io).reg_value(); |
| dump_state->gpu_fault_address = registers::GpuFaultAddress::Get().ReadFrom(io).reg_value(); |
| dump_state->gpu_status = registers::GpuStatus::Get().ReadFrom(io).reg_value(); |
| dump_state->cycle_count = registers::CycleCount::Get().ReadFrom(io).reg_value(); |
| dump_state->timestamp = registers::Timestamp::Get().ReadFrom(io).reg_value(); |
| |
| for (size_t i = 0; i < features.job_slot_count; i++) { |
| DumpState::JobSlotStatus status; |
| auto js_regs = registers::JobSlotRegisters(i); |
| status.status = js_regs.Status().ReadFrom(io).reg_value(); |
| status.head = js_regs.Head().ReadFrom(io).reg_value(); |
| status.tail = js_regs.Tail().ReadFrom(io).reg_value(); |
| status.config = js_regs.Config().ReadFrom(io).reg_value(); |
| dump_state->job_slot_status.push_back(status); |
| } |
| |
| for (size_t i = 0; i < features.address_space_count; i++) { |
| DumpState::AddressSpaceStatus status; |
| auto as_regs = registers::AsRegisters(i); |
| status.status = as_regs.Status().ReadFrom(io).reg_value(); |
| status.fault_status = as_regs.FaultStatus().ReadFrom(io).reg_value(); |
| status.fault_address = as_regs.FaultAddress().ReadFrom(io).reg_value(); |
| dump_state->address_space_status.push_back(status); |
| } |
| } |
| |
| void MsdArmDevice::Dump(DumpState* dump_state, bool on_device_thread) |
| { |
| DumpRegisters(gpu_features_, register_io_.get(), dump_state); |
| |
| if (on_device_thread) { |
| std::chrono::steady_clock::duration total_time; |
| std::chrono::steady_clock::duration active_time; |
| power_manager_->GetGpuActiveInfo(&total_time, &active_time); |
| dump_state->total_time_ms = |
| std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count(); |
| dump_state->active_time_ms = |
| std::chrono::duration_cast<std::chrono::milliseconds>(active_time).count(); |
| } |
| } |
| |
| void MsdArmDevice::DumpToString(std::string& dump_string, bool on_device_thread) |
| { |
| DumpState dump_state = {}; |
| Dump(&dump_state, on_device_thread); |
| |
| FormatDump(dump_state, dump_string); |
| } |
| |
| void MsdArmDevice::FormatDump(DumpState& dump_state, std::string& dump_string) |
| { |
| dump_string.append("Core power states\n"); |
| for (auto& state : dump_state.power_states) { |
| dump_string += fbl::StringPrintf("Core type %s state %s bitmap: 0x%lx\n", state.core_type, |
| state.status_type, state.bitmask) |
| .c_str(); |
| } |
| dump_string += fbl::StringPrintf("Total ms %" PRIu64 " Active ms %" PRIu64 "\n", |
| dump_state.total_time_ms, dump_state.active_time_ms) |
| .c_str(); |
| dump_string += fbl::StringPrintf("Gpu fault status 0x%x, address 0x%lx\n", |
| dump_state.gpu_fault_status, dump_state.gpu_fault_address) |
| .c_str(); |
| dump_string += fbl::StringPrintf("Gpu status 0x%x\n", dump_state.gpu_status).c_str(); |
| dump_string += fbl::StringPrintf("Gpu cycle count %ld, timestamp %ld\n", dump_state.cycle_count, |
| dump_state.timestamp) |
| .c_str(); |
| for (size_t i = 0; i < dump_state.job_slot_status.size(); i++) { |
| auto* status = &dump_state.job_slot_status[i]; |
| dump_string += |
| fbl::StringPrintf("Job slot %zu status 0x%x head 0x%lx tail 0x%lx config 0x%x\n", i, |
| status->status, status->head, status->tail, status->config) |
| .c_str(); |
| } |
| for (size_t i = 0; i < dump_state.address_space_status.size(); i++) { |
| auto* status = &dump_state.address_space_status[i]; |
| dump_string += |
| fbl::StringPrintf("AS %zu status 0x%x fault status 0x%x fault address 0x%lx\n", i, |
| status->status, status->fault_status, status->fault_address) |
| .c_str(); |
| } |
| } |
| |
| magma::Status MsdArmDevice::ProcessDumpStatusToLog() |
| { |
| std::string dump; |
| DumpToString(dump, true); |
| magma::log(magma::LOG_INFO, "%s", dump.c_str()); |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma::Status MsdArmDevice::ProcessScheduleAtoms() |
| { |
| TRACE_DURATION("magma", "MsdArmDevice::ProcessScheduleAtoms"); |
| std::vector<std::shared_ptr<MsdArmAtom>> atoms_to_schedule; |
| { |
| std::lock_guard<std::mutex> lock(schedule_mutex_); |
| atoms_to_schedule.swap(atoms_to_schedule_); |
| } |
| for (auto& atom : atoms_to_schedule) |
| scheduler_->EnqueueAtom(std::move(atom)); |
| scheduler_->TryToSchedule(); |
| return MAGMA_STATUS_OK; |
| } |
| |
| magma::Status MsdArmDevice::ProcessCancelAtoms(std::weak_ptr<MsdArmConnection> connection) |
| { |
| // It's fine to cancel with an invalid shared_ptr, as that will clear out |
| // atoms for connections that are dead already. |
| scheduler_->CancelAtomsForConnection(connection.lock()); |
| return MAGMA_STATUS_OK; |
| } |
| |
| void MsdArmDevice::ExecuteAtomOnDevice(MsdArmAtom* atom, magma::RegisterIo* register_io) |
| { |
| TRACE_DURATION("magma", "ExecuteAtomOnDevice", "address", atom->gpu_address(), "slot", |
| atom->slot()); |
| DASSERT(atom->slot() < 2u); |
| bool dependencies_finished; |
| atom->UpdateDependencies(&dependencies_finished); |
| DASSERT(dependencies_finished); |
| DASSERT(atom->gpu_address()); |
| |
| // Skip atom if address space can't be assigned. |
| if (!address_manager_->AssignAddressSpace(atom)) { |
| scheduler_->JobCompleted(atom->slot(), kArmMaliResultAtomTerminated, 0u); |
| return; |
| } |
| if (atom->require_cycle_counter()) { |
| DASSERT(!atom->using_cycle_counter()); |
| atom->set_using_cycle_counter(true); |
| |
| if (++cycle_counter_refcount_ == 1) { |
| register_io_->Write32(registers::GpuCommand::kOffset, |
| registers::GpuCommand::kCmdCycleCountStart); |
| } |
| } |
| |
| if (atom->is_protected()) { |
| DASSERT(IsInProtectedMode()); |
| } else { |
| DASSERT(!IsInProtectedMode()); |
| } |
| |
| // Ensure the client's writes/cache flushes to the job chain are complete |
| // before scheduling. Unlikely to be an issue since several thread and |
| // process hops already happened. |
| magma::barriers::WriteBarrier(); |
| |
| registers::JobSlotRegisters slot(atom->slot()); |
| slot.HeadNext().FromValue(atom->gpu_address()).WriteTo(register_io); |
| auto config = slot.ConfigNext().FromValue(0); |
| config.address_space().set(atom->address_slot_mapping()->slot_number()); |
| config.start_flush_clean().set(true); |
| config.start_flush_invalidate().set(true); |
| // TODO(MA-367): Enable flush reduction optimization. |
| config.thread_priority().set(8); |
| config.end_flush_clean().set(true); |
| config.end_flush_invalidate().set(true); |
| // Atoms are in unprotected memory, so don't attempt to write to them when |
| // executing in protected mode. |
| bool disable_descriptor_write_back = atom->is_protected(); |
| #if defined(ENABLE_PROTECTED_DEBUG_SWAP_MODE) |
| // In this case, nonprotected-mode atoms also need to abide by protected mode restrictions. |
| disable_descriptor_write_back = true; |
| #endif |
| config.disable_descriptor_write_back().set(disable_descriptor_write_back); |
| config.WriteTo(register_io); |
| |
| // Execute on every powered-on core. |
| slot.AffinityNext().FromValue(UINT64_MAX).WriteTo(register_io); |
| slot.CommandNext().FromValue(registers::JobSlotCommand::kCommandStart).WriteTo(register_io); |
| } |
| |
| void MsdArmDevice::RunAtom(MsdArmAtom* atom) { ExecuteAtomOnDevice(atom, register_io_.get()); } |
| |
| void MsdArmDevice::AtomCompleted(MsdArmAtom* atom, ArmMaliResultCode result) |
| { |
| TRACE_DURATION("magma", "AtomCompleted", "address", atom->gpu_address()); |
| DLOG("Completed job atom: 0x%lx\n", atom->gpu_address()); |
| address_manager_->AtomFinished(atom); |
| if (atom->using_cycle_counter()) { |
| DASSERT(atom->require_cycle_counter()); |
| |
| if (--cycle_counter_refcount_ == 0) { |
| register_io_->Write32(registers::GpuCommand::kOffset, |
| registers::GpuCommand::kCmdCycleCountStop); |
| } |
| atom->set_using_cycle_counter(false); |
| } |
| // Soft stopped atoms will be retried, so this result shouldn't be reported. |
| if (result != kArmMaliResultSoftStopped) { |
| atom->set_result_code(result); |
| auto connection = atom->connection().lock(); |
| // Ensure any client writes/reads from memory happen after the mmio access saying memory is |
| // read. In practice unlikely to be an issue due to data dependencies and the thread/process |
| // hops. |
| magma::barriers::Barrier(); |
| if (connection) |
| connection->SendNotificationData(atom, result); |
| } |
| } |
| |
| void MsdArmDevice::HardStopAtom(MsdArmAtom* atom) |
| { |
| DASSERT(atom->hard_stopped()); |
| registers::JobSlotRegisters slot(atom->slot()); |
| DLOG("Hard stopping atom slot %d\n", atom->slot()); |
| slot.Command() |
| .FromValue(registers::JobSlotCommand::kCommandHardStop) |
| .WriteTo(register_io_.get()); |
| } |
| |
| void MsdArmDevice::SoftStopAtom(MsdArmAtom* atom) |
| { |
| registers::JobSlotRegisters slot(atom->slot()); |
| DLOG("Soft stopping atom slot %d\n", atom->slot()); |
| slot.Command() |
| .FromValue(registers::JobSlotCommand::kCommandSoftStop) |
| .WriteTo(register_io_.get()); |
| } |
| |
| void MsdArmDevice::ReleaseMappingsForAtom(MsdArmAtom* atom) |
| { |
| // The atom should be hung on a fault, so it won't reference memory |
| // afterwards. |
| address_manager_->AtomFinished(atom); |
| } |
| |
| magma_status_t MsdArmDevice::QueryInfo(uint64_t id, uint64_t* value_out) |
| { |
| switch (id) { |
| case MAGMA_QUERY_DEVICE_ID: |
| *value_out = gpu_features_.gpu_id.reg_value(); |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryL2Present: |
| *value_out = gpu_features_.l2_present; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryMaxThreads: |
| *value_out = gpu_features_.thread_max_threads; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryThreadMaxBarrierSize: |
| *value_out = gpu_features_.thread_max_barrier_size; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryThreadMaxWorkgroupSize: |
| *value_out = gpu_features_.thread_max_workgroup_size; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryThreadTlsAlloc: |
| *value_out = gpu_features_.thread_tls_alloc; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryShaderPresent: |
| *value_out = gpu_features_.shader_present; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryTilerFeatures: |
| *value_out = gpu_features_.tiler_features.reg_value(); |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryThreadFeatures: |
| *value_out = gpu_features_.thread_features.reg_value(); |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryL2Features: |
| *value_out = gpu_features_.l2_features.reg_value(); |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryMemoryFeatures: |
| *value_out = gpu_features_.mem_features.reg_value(); |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryMmuFeatures: |
| *value_out = gpu_features_.mmu_features.reg_value(); |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQueryCoherencyEnabled: |
| *value_out = cache_coherency_status_; |
| return MAGMA_STATUS_OK; |
| |
| case kMsdArmVendorQuerySupportsProtectedMode: |
| *value_out = IsProtectedModeSupported(); |
| return MAGMA_STATUS_OK; |
| |
| default: |
| return DRET_MSG(MAGMA_STATUS_INVALID_ARGS, "unhandled id %" PRIu64, id); |
| } |
| } |
| |
| // static |
| void MsdArmDevice::InitializeHardwareQuirks(GpuFeatures* features, magma::RegisterIo* reg) |
| { |
| auto shader_config = registers::ShaderConfig::Get().FromValue(0); |
| const uint32_t kGpuIdTGOX = 0x7212; |
| uint32_t gpu_product_id = features->gpu_id.product_id().get(); |
| if (gpu_product_id == kGpuIdTGOX) { |
| DLOG("Enabling TLS hashing\n"); |
| shader_config.tls_hashing_enable().set(1); |
| } |
| |
| if (0x750 <= gpu_product_id && gpu_product_id <= 0x880) { |
| DLOG("Enabling LS attr types\n"); |
| // This seems necessary for geometry shaders to work with non-indexed draws with point and |
| // line lists on T8xx and T7xx. |
| shader_config.ls_allow_attr_types().set(1); |
| } |
| |
| shader_config.WriteTo(reg); |
| } |
| |
| bool MsdArmDevice::IsProtectedModeSupported() |
| { |
| uint32_t gpu_product_id = gpu_features_.gpu_id.product_id().get(); |
| // TODO(MA-522): Support protected mode when using ACE cache coherency. Apparently |
| // the L2 needs to be powered down then switched to ACE Lite in that mode. |
| if (cache_coherency_status_ == kArmMaliCacheCoherencyAce) |
| return false; |
| // All Bifrost should support it. 0x6956 is Mali-t60x MP4 r0p0, so it doesn't count. |
| return gpu_product_id != 0x6956 && (gpu_product_id > 0x1000); |
| } |
| |
| void MsdArmDevice::EnterProtectedMode() |
| { |
| // TODO(MA-522): If cache-coherency is enabled, power down L2 and wait for the |
| // completion of that. |
| register_io_->Write32(registers::GpuCommand::kOffset, |
| registers::GpuCommand::kCmdSetProtectedMode); |
| } |
| |
| bool MsdArmDevice::ExitProtectedMode() |
| { |
| // Remove perf counter address mapping. |
| perf_counters_->ForceDisable(); |
| // |force_expire| is false because nothing should have been using an address |
| // space before. Do this before powering down L2 so connections don't try to |
| // hit the MMU while that's happening. |
| address_manager_->ClearAddressMappings(false); |
| |
| if (!PowerDownL2()) { |
| return DRETF(false, "Powering down L2 timed out\n"); |
| } |
| |
| return ResetDevice(); |
| } |
| |
| bool MsdArmDevice::ResetDevice() |
| { |
| DLOG("Resetting device protected mode\n"); |
| // Reset semaphore shouldn't already be signaled. |
| DASSERT(!reset_semaphore_->Wait(0)); |
| |
| register_io_->Write32(registers::GpuCommand::kOffset, registers::GpuCommand::kCmdSoftReset); |
| |
| if (!reset_semaphore_->Wait(1000)) { |
| magma::log(magma::LOG_WARNING, "Hardware reset timed out"); |
| return false; |
| } |
| |
| if (!InitializeHardware()) { |
| magma::log(magma::LOG_WARNING, "Initialize hardware failed"); |
| return false; |
| } |
| |
| if (!power_manager_->WaitForShaderReady(register_io_.get())) { |
| magma::log(magma::LOG_WARNING, "Waiting for shader ready failed"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool MsdArmDevice::PowerDownL2() |
| { |
| power_manager_->DisableL2(register_io_.get()); |
| return power_manager_->WaitForL2Disable(register_io_.get()); |
| } |
| |
| bool MsdArmDevice::IsInProtectedMode() |
| { |
| return registers::GpuStatus::Get().ReadFrom(register_io_.get()).protected_mode_active().get(); |
| } |
| |
| void MsdArmDevice::RequestPerfCounterOperation(uint32_t type) |
| { |
| EnqueueDeviceRequest(std::make_unique<PerfCounterRequest>(type)); |
| } |
| |
| magma::Status MsdArmDevice::ProcessPerfCounterRequest(uint32_t type) |
| { |
| if (type == (MAGMA_DUMP_TYPE_PERF_COUNTER_ENABLE | MAGMA_DUMP_TYPE_PERF_COUNTERS)) { |
| if (!perf_counters_->TriggerRead(true)) |
| return MAGMA_STATUS_INVALID_ARGS; |
| } else if (type == MAGMA_DUMP_TYPE_PERF_COUNTERS) { |
| if (!perf_counters_->TriggerRead(false)) |
| return MAGMA_STATUS_INVALID_ARGS; |
| } else if (type == MAGMA_DUMP_TYPE_PERF_COUNTER_ENABLE) { |
| if (!perf_counters_->Enable()) |
| return MAGMA_STATUS_INVALID_ARGS; |
| } else { |
| DASSERT(false); |
| return MAGMA_STATUS_INVALID_ARGS; |
| } |
| return MAGMA_STATUS_OK; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////////////////////////////// |
| |
| msd_connection_t* msd_device_open(msd_device_t* dev, msd_client_id_t client_id) |
| { |
| auto connection = MsdArmDevice::cast(dev)->Open(client_id); |
| if (!connection) |
| return DRETP(nullptr, "MsdArmDevice::Open failed"); |
| return new MsdArmAbiConnection(std::move(connection)); |
| } |
| |
| void msd_device_destroy(msd_device_t* dev) { delete MsdArmDevice::cast(dev); } |
| |
| magma_status_t msd_device_query(msd_device_t* device, uint64_t id, uint64_t* value_out) |
| { |
| return MsdArmDevice::cast(device)->QueryInfo(id, value_out); |
| } |
| |
| void msd_device_dump_status(msd_device_t* device, uint32_t dump_type) |
| { |
| uint32_t perf_dump_type = |
| dump_type & (MAGMA_DUMP_TYPE_PERF_COUNTER_ENABLE | MAGMA_DUMP_TYPE_PERF_COUNTERS); |
| if (perf_dump_type) { |
| MsdArmDevice::cast(device)->RequestPerfCounterOperation(perf_dump_type); |
| } |
| if (!dump_type || (dump_type & MAGMA_DUMP_TYPE_NORMAL)) { |
| MsdArmDevice::cast(device)->DumpStatusToLog(); |
| } |
| } |