[msd-intel-gen] Retry on release when there are active mappings
Needed for upcoming Mesa/Anvil 20.0.
Multiply:msd_intel_gen_nonhardware_tests
Bug:13257
Change-Id: Ib2aaa7cf70bb47126f384facf87a516e5175613e
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/402026
Reviewed-by: John Bauman <jbauman@google.com>
Testability-Review: John Bauman <jbauman@google.com>
Commit-Queue: Craig Stout <cstout@google.com>
diff --git a/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.cc b/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.cc
index ec57ede..d369a17 100644
--- a/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.cc
+++ b/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.cc
@@ -101,15 +101,48 @@
->ReleaseBuffer(MsdIntelAbiBuffer::cast(buffer)->ptr()->platform_buffer());
}
-void MsdIntelConnection::ReleaseBuffer(magma::PlatformBuffer* buffer) {
+void MsdIntelConnection::ReleaseBuffer(magma::PlatformBuffer* buffer,
+ std::function<void(uint32_t delay_ms)> sleep_callback) {
std::vector<std::shared_ptr<GpuMapping>> mappings;
per_process_gtt()->ReleaseBuffer(buffer, &mappings);
DLOG("ReleaseBuffer %lu\n", buffer->id());
- bool killed = false;
for (const auto& mapping : mappings) {
uint32_t use_count = mapping.use_count();
+
+ if (use_count > 1) {
+ // It's an error to release a buffer while it has inflight mappings, as that can fault the
+ // GPU. However Mesa/Anvil no longer exactly tracks the user buffers that are associated
+ // with each command buffer, instead it snapshots all user buffers currently allocated on
+ // the device, which can include buffers from other threads.
+ // We observe this happening in at least one multithreaded CTS case.
+ // Intel says their DRM system driver will stall to handle the unlikely case when it happens,
+ // so we do the same here.
+ DLOG("ReleaseBuffer %lu mapping has use count %u", mapping->BufferId(), use_count);
+
+ if (!sent_context_killed()) {
+ constexpr uint32_t kRetries = 10;
+ constexpr uint32_t kRetrySleepMs = 100;
+
+ for (uint32_t retry = 0; retry < kRetries; retry++) {
+ sleep_callback(kRetrySleepMs);
+
+ use_count = mapping.use_count();
+ if (use_count == 1)
+ break;
+ }
+
+ if (use_count > 1) {
+ MAGMA_LOG(
+ WARNING,
+ "ReleaseBuffer %lu mapping has use count %u after stall, sending context killed",
+ mapping->BufferId(), use_count);
+ SendContextKilled();
+ }
+ }
+ }
+
if (use_count == 1) {
// Bus mappings are held in the connection and passed through the command stream to
// ensure the memory isn't released until the tlbs are invalidated, which happens
@@ -119,14 +152,6 @@
for (uint32_t i = 0; i < bus_mappings.size(); i++) {
mappings_to_release_.emplace_back(std::move(bus_mappings[i]));
}
- } else {
- // It's an error to release a buffer while it has inflight mappings, as that
- // can fault the gpu.
- DLOG("buffer %lu mapping use_count %d", mapping->BufferId(), use_count);
- if (!killed) {
- SendContextKilled();
- killed = true;
- }
}
}
}
diff --git a/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.h b/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.h
index 98810dd..deb9c89 100644
--- a/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.h
+++ b/src/graphics/drivers/msd-intel-gen/src/msd_intel_connection.h
@@ -50,14 +50,22 @@
notifications_.SendBufferIds(buffer_ids);
}
- void SendContextKilled() { notifications_.SendContextKilled(); }
+ void SendContextKilled() {
+ notifications_.SendContextKilled();
+ sent_context_killed_ = true;
+ }
// Maps |page_count| pages of the given |buffer| at |page_offset| to |gpu_addr| into the
// GPU address space belonging to this connection.
magma::Status MapBufferGpu(std::shared_ptr<MsdIntelBuffer> buffer, uint64_t gpu_addr,
uint64_t page_offset, uint64_t page_count);
- void ReleaseBuffer(magma::PlatformBuffer* buffer);
+ void ReleaseBuffer(magma::PlatformBuffer* buffer) {
+ auto callback = [](uint32_t delay_ms) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms));
+ };
+ ReleaseBuffer(buffer, callback);
+ }
// Submit pending release mappings on the given context
bool SubmitPendingReleaseMappings(std::shared_ptr<MsdIntelContext> context);
@@ -71,12 +79,18 @@
return mappings_to_release_;
}
+ bool sent_context_killed() { return sent_context_killed_; }
+
+ void ReleaseBuffer(magma::PlatformBuffer* buffer,
+ std::function<void(uint32_t delay_ms)> sleep_callback);
+
static const uint32_t kMagic = 0x636f6e6e; // "conn" (Connection)
Owner* owner_;
std::shared_ptr<PerProcessGtt> ppgtt_;
msd_client_id_t client_id_;
std::vector<std::unique_ptr<magma::PlatformBusMapper::BusMapping>> mappings_to_release_;
+ bool sent_context_killed_ = false;
class Notifications {
public:
diff --git a/src/graphics/drivers/msd-intel-gen/src/msd_intel_context.cc b/src/graphics/drivers/msd-intel-gen/src/msd_intel_context.cc
index 1642a0d..f4d930314 100644
--- a/src/graphics/drivers/msd-intel-gen/src/msd_intel_context.cc
+++ b/src/graphics/drivers/msd-intel-gen/src/msd_intel_context.cc
@@ -110,6 +110,11 @@
}
semaphore_port_.reset();
+
+ // Clear pending command buffers so buffer release doesn't see stuck mappings
+ while (pending_command_buffer_queue_.size()) {
+ pending_command_buffer_queue_.pop();
+ }
}
magma::Status ClientContext::SubmitCommandBuffer(std::unique_ptr<CommandBuffer> command_buffer) {
diff --git a/src/graphics/drivers/msd-intel-gen/tests/unit_tests/test_connection.cc b/src/graphics/drivers/msd-intel-gen/tests/unit_tests/test_connection.cc
index fbd3e70..2685662 100644
--- a/src/graphics/drivers/msd-intel-gen/tests/unit_tests/test_connection.cc
+++ b/src/graphics/drivers/msd-intel-gen/tests/unit_tests/test_connection.cc
@@ -94,14 +94,49 @@
std::shared_ptr<MsdIntelBuffer> buffer = MsdIntelBuffer::Create(PAGE_SIZE, "test");
std::shared_ptr<GpuMapping> mapping;
+
+ constexpr uint64_t kGpuAddr = 0x10000;
+ EXPECT_TRUE(AddressSpace::MapBufferGpu(connection->per_process_gtt(), buffer, kGpuAddr, 0, 1,
+ &mapping));
+ ASSERT_TRUE(mapping);
+ EXPECT_TRUE(connection->per_process_gtt()->AddMapping(mapping));
+
+ // Release the mapping during the retry.
+ auto sleep_callback = [&](uint32_t ms) { mapping.reset(); };
+
+ connection->ReleaseBuffer(buffer->platform_buffer(), sleep_callback);
+
+ EXPECT_EQ(0u, callback_count_);
+ EXPECT_FALSE(connection->sent_context_killed());
+
+ const std::vector<std::unique_ptr<magma::PlatformBusMapper::BusMapping>>& mappings =
+ connection->mappings_to_release();
+ EXPECT_EQ(1u, mappings.size());
+ }
+
+ void ReleaseBufferWhileMappedContextKilled() {
+ auto connection = MsdIntelConnection::Create(this, 0);
+ ASSERT_TRUE(connection);
+
+ connection->SetNotificationCallback(KillCallbackStatic, this);
+
+ std::shared_ptr<MsdIntelBuffer> buffer = MsdIntelBuffer::Create(PAGE_SIZE, "test");
+ std::shared_ptr<GpuMapping> mapping;
EXPECT_TRUE(
AddressSpace::MapBufferGpu(connection->per_process_gtt(), buffer, 0x10000, 0, 1, &mapping));
ASSERT_TRUE(mapping);
EXPECT_TRUE(connection->per_process_gtt()->AddMapping(mapping));
- // Release the buffer while holding the mapping triggers the killed callback
- connection->ReleaseBuffer(buffer->platform_buffer());
+ // Release the buffer while holding the mapping retries for a while then
+ // triggers the killed callback.
+ uint32_t sleep_callback_count = 0;
+ auto sleep_callback = [&](uint32_t ms) { sleep_callback_count += 1; };
+
+ connection->ReleaseBuffer(buffer->platform_buffer(), sleep_callback);
+
+ EXPECT_GT(sleep_callback_count, 0u);
EXPECT_EQ(1u, callback_count_);
+ EXPECT_TRUE(connection->sent_context_killed());
const std::vector<std::unique_ptr<magma::PlatformBusMapper::BusMapping>>& mappings =
connection->mappings_to_release();
@@ -144,6 +179,10 @@
TEST_F(TestMsdIntelConnection, ReleaseBufferWhileMapped) { ReleaseBufferWhileMapped(); }
+TEST_F(TestMsdIntelConnection, ReleaseBufferWhileMappedContextKilled) {
+ ReleaseBufferWhileMappedContextKilled();
+}
+
TEST_F(TestMsdIntelConnection, ReuseGpuAddrWithoutRelease) { ReuseGpuAddrWithoutRelease(); }
TEST_F(TestMsdIntelConnection, InheritanceCheck) {