[msd-arm-mali] Fix deadlock processing gpu interrupts

The interrupt thread could be waiting for the device thread to process
a request (generally if the GPU faults) while the device thread could
wait for the interrupt thread to process a power changed interrupt.

Prevent the deadlock by avoiding having the interrupt thread wait on the
device thread - instead it can either process events itself or hand them
off asynchronously.

Test: astro:go/magma-tps#L0

Change-Id: I81e7cf4d8b652cdddf648a5f7ae6339a1263e361
diff --git a/drivers/gpu/msd-arm-mali/src/msd_arm_device.cc b/drivers/gpu/msd-arm-mali/src/msd_arm_device.cc
index 4769ff1..df2bea4 100644
--- a/drivers/gpu/msd-arm-mali/src/msd_arm_device.cc
+++ b/drivers/gpu/msd-arm-mali/src/msd_arm_device.cc
@@ -44,12 +44,15 @@
     }
 };
 
-class MsdArmDevice::GpuInterruptRequest : public DeviceRequest {
+class MsdArmDevice::PerfCounterSampleCompletedRequest : public DeviceRequest {
 public:
-    GpuInterruptRequest() {}
+    PerfCounterSampleCompletedRequest() {}
 
 protected:
-    magma::Status Process(MsdArmDevice* device) override { return device->ProcessGpuInterrupt(); }
+    magma::Status Process(MsdArmDevice* device) override
+    {
+        return device->ProcessPerfCounterSampleCompleted();
+    }
 };
 
 class MsdArmDevice::JobInterruptRequest : public DeviceRequest {
@@ -212,7 +215,8 @@
 bool MsdArmDevice::InitializeHardware()
 {
     cycle_counter_refcount_ = 0;
-    DASSERT(registers::GpuStatus::Get().ReadFrom(register_io_.get()).cycle_count_active().get() == 0);
+    DASSERT(registers::GpuStatus::Get().ReadFrom(register_io_.get()).cycle_count_active().get() ==
+            0);
     EnableInterrupts();
     InitializeHardwareQuirks(&gpu_features_, register_io_.get());
 
@@ -306,18 +310,20 @@
             break;
 
         auto irq_status = registers::GpuIrqFlags::GetStatus().ReadFrom(register_io_.get());
-        auto clear_flags = registers::GpuIrqFlags::GetIrqClear().FromValue(0);
-        // Handle some interrupts on the interrupt thread so the device thread
-        // can wait for them to complete.
+
+        if (!irq_status.reg_value()) {
+            magma::log(magma::LOG_WARNING, "Got unexpected GPU IRQ with no flags set\n");
+        }
+
+        auto clear_flags = registers::GpuIrqFlags::GetIrqClear().FromValue(irq_status.reg_value());
+        // Handle interrupts on the interrupt thread so the device thread can wait for them to
+        // complete.
         if (irq_status.reset_completed().get()) {
             DLOG("Received GPU reset completed");
-            clear_flags.reset_completed().set(true);
             reset_semaphore_->Signal();
             irq_status.reset_completed().set(0);
         }
         if (irq_status.power_changed_single().get() || irq_status.power_changed_all().get()) {
-            clear_flags.power_changed_single().set(irq_status.power_changed_single().get());
-            clear_flags.power_changed_all().set(irq_status.power_changed_all().get());
             irq_status.power_changed_single().set(0);
             irq_status.power_changed_all().set(0);
             power_manager_->ReceivedPowerInterrupt(register_io_.get());
@@ -328,16 +334,26 @@
                 enable_reg.WriteTo(register_io_.get());
             }
         }
-        if (clear_flags.reg_value()) {
-            clear_flags.WriteTo(register_io_.get());
+
+        if (irq_status.performance_counter_sample_completed().get()) {
+            irq_status.performance_counter_sample_completed().set(0);
+            EnqueueDeviceRequest(std::make_unique<PerfCounterSampleCompletedRequest>(), true);
+            // Don't wait for a reply, to ensure there's no deadlock. Clearing the interrupt flag
+            // before the interrupt is actually processed shouldn't matter, because perf_counters_
+            // ensures only one request happens at a time.
         }
 
         if (irq_status.reg_value()) {
-            auto request = std::make_unique<GpuInterruptRequest>();
-            auto reply = request->GetReply();
+            magma::log(magma::LOG_WARNING, "Got unexpected GPU IRQ %d\n", irq_status.reg_value());
+            // Perform the GPU dump immediately, because clearing the irq flags might cause another
+            // GPU fault to be generated, which could overwrite the earlier data.
+            std::string dump;
+            DumpToString(dump, false);
+            magma::log(magma::LOG_INFO, "GPU fault status: %s", dump.c_str());
+        }
 
-            EnqueueDeviceRequest(std::move(request), true);
-            reply->Wait();
+        if (clear_flags.reg_value()) {
+            clear_flags.WriteTo(register_io_.get());
         }
     }
 
@@ -345,38 +361,19 @@
     return 0;
 }
 
-magma::Status MsdArmDevice::ProcessGpuInterrupt()
+magma::Status MsdArmDevice::ProcessPerfCounterSampleCompleted()
 {
-    auto irq_status = registers::GpuIrqFlags::GetStatus().ReadFrom(register_io_.get());
-    // Some interrupts are handled on the interrupt thread, so ignore them in this
-    // function.
-    irq_status.reset_completed().set(0);
-    irq_status.power_changed_single().set(0);
-    irq_status.power_changed_all().set(0);
-    auto clear_flags = registers::GpuIrqFlags::GetIrqClear().FromValue(irq_status.reg_value());
-    clear_flags.WriteTo(register_io_.get());
 
-    DLOG("Got GPU interrupt status 0x%x\n", irq_status.reg_value());
-    if (!irq_status.reg_value())
-        magma::log(magma::LOG_WARNING, "Got unexpected GPU IRQ with no flags set\n");
+    DLOG("Perf Counter sample completed");
 
-    if (irq_status.performance_counter_sample_completed().get()) {
-        uint64_t duration_ms = 0;
-        std::vector<uint32_t> perf_result = perf_counters_->ReadCompleted(&duration_ms);
+    uint64_t duration_ms = 0;
+    std::vector<uint32_t> perf_result = perf_counters_->ReadCompleted(&duration_ms);
 
-        magma::log(magma::LOG_INFO, "Performance counter read complete, duration %lu ms:\n",
-                   duration_ms);
-        for (uint32_t i = 0; i < perf_result.size(); ++i) {
-            magma::log(magma::LOG_INFO, "Performance counter %d: %u\n", i, perf_result[i]);
-        }
-        irq_status.performance_counter_sample_completed().set(0);
+    magma::log(magma::LOG_INFO, "Performance counter read complete, duration %lu ms:\n",
+               duration_ms);
+    for (uint32_t i = 0; i < perf_result.size(); ++i) {
+        magma::log(magma::LOG_INFO, "Performance counter %d: %u\n", i, perf_result[i]);
     }
-
-    if (irq_status.reg_value()) {
-        magma::log(magma::LOG_WARNING, "Got unexpected GPU IRQ %d\n", irq_status.reg_value());
-        ProcessDumpStatusToLog();
-    }
-    gpu_interrupt_->Complete();
     return MAGMA_STATUS_OK;
 }
 
@@ -702,23 +699,25 @@
     }
 }
 
-void MsdArmDevice::Dump(DumpState* dump_state)
+void MsdArmDevice::Dump(DumpState* dump_state, bool on_device_thread)
 {
     DumpRegisters(gpu_features_, register_io_.get(), dump_state);
 
-    std::chrono::steady_clock::duration total_time;
-    std::chrono::steady_clock::duration active_time;
-    power_manager_->GetGpuActiveInfo(&total_time, &active_time);
-    dump_state->total_time_ms =
-        std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count();
-    dump_state->active_time_ms =
-        std::chrono::duration_cast<std::chrono::milliseconds>(active_time).count();
+    if (on_device_thread) {
+        std::chrono::steady_clock::duration total_time;
+        std::chrono::steady_clock::duration active_time;
+        power_manager_->GetGpuActiveInfo(&total_time, &active_time);
+        dump_state->total_time_ms =
+            std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count();
+        dump_state->active_time_ms =
+            std::chrono::duration_cast<std::chrono::milliseconds>(active_time).count();
+    }
 }
 
-void MsdArmDevice::DumpToString(std::string& dump_string)
+void MsdArmDevice::DumpToString(std::string& dump_string, bool on_device_thread)
 {
-    DumpState dump_state;
-    Dump(&dump_state);
+    DumpState dump_state = {};
+    Dump(&dump_state, on_device_thread);
 
     FormatDump(dump_state, dump_string);
 }
@@ -760,7 +759,7 @@
 magma::Status MsdArmDevice::ProcessDumpStatusToLog()
 {
     std::string dump;
-    DumpToString(dump);
+    DumpToString(dump, true);
     magma::log(magma::LOG_INFO, "%s", dump.c_str());
     return MAGMA_STATUS_OK;
 }
diff --git a/drivers/gpu/msd-arm-mali/src/msd_arm_device.h b/drivers/gpu/msd-arm-mali/src/msd_arm_device.h
index b0dc79e..85ea615 100644
--- a/drivers/gpu/msd-arm-mali/src/msd_arm_device.h
+++ b/drivers/gpu/msd-arm-mali/src/msd_arm_device.h
@@ -87,8 +87,8 @@
     };
     static void DumpRegisters(const GpuFeatures& features, magma::RegisterIo* io,
                               DumpState* dump_state);
-    void Dump(DumpState* dump_state);
-    void DumpToString(std::string& dump_string);
+    void Dump(DumpState* dump_state, bool from_device_thread);
+    void DumpToString(std::string& dump_string, bool from_device_thread);
     void FormatDump(DumpState& dump_state, std::string& dump_string);
     void DumpStatusToLog();
 
@@ -123,7 +123,7 @@
     friend class TestMsdArmDevice;
 
     class DumpRequest;
-    class GpuInterruptRequest;
+    class PerfCounterSampleCompletedRequest;
     class JobInterruptRequest;
     class MmuInterruptRequest;
     class ScheduleAtomRequest;
@@ -158,7 +158,7 @@
     bool ResetDevice();
 
     magma::Status ProcessDumpStatusToLog();
-    magma::Status ProcessGpuInterrupt();
+    magma::Status ProcessPerfCounterSampleCompleted();
     magma::Status ProcessJobInterrupt();
     magma::Status ProcessMmuInterrupt();
     magma::Status ProcessScheduleAtoms();
diff --git a/drivers/gpu/msd-arm-mali/tests/unit_tests/test_device.cc b/drivers/gpu/msd-arm-mali/tests/unit_tests/test_device.cc
index 63e5f82..1cad61c 100644
--- a/drivers/gpu/msd-arm-mali/tests/unit_tests/test_device.cc
+++ b/drivers/gpu/msd-arm-mali/tests/unit_tests/test_device.cc
@@ -24,7 +24,7 @@
         EXPECT_NE(device, nullptr);
 
         MsdArmDevice::DumpState dump_state;
-        device->Dump(&dump_state);
+        device->Dump(&dump_state, true);
         ASSERT_EQ(12u, dump_state.power_states.size());
         EXPECT_EQ(std::string("L2 Cache"), dump_state.power_states[0].core_type);
         EXPECT_EQ(std::string("Present"), dump_state.power_states[0].status_type);
@@ -190,7 +190,7 @@
         EXPECT_NE(device, nullptr);
 
         MsdArmDevice::DumpState dump_state;
-        device->Dump(&dump_state);
+        device->Dump(&dump_state, false);
 
         // Ensure that the GPU is idle and not doing anything at this point. A
         // failure in this may be caused by a previous test.