[starnix][kernel] Fix ThreadGroup::shut_down() zombie hang

Calling shut_down() on a ThreadGroup that has already exited (i.e., a
zombie) should not set an exit notifier. Waiting on an exit notifier on
an exited thread group will result in permanent suspension.

An integration test is added to exercise this via the lifecycle
controller.

Bug: 297440106
Test: starnix_shutdown_test
Change-Id: I86ce784981ea1adc594f3ed54c9a310c3fe66b35
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/1629480
Fuchsia-Auto-Submit: Brian Bosak <bbosak@google.com>
Commit-Queue: Brian Bosak <bbosak@google.com>
Reviewed-by: Adam Barth <abarth@google.com>
diff --git a/src/starnix/kernel/core/task/thread_group.rs b/src/starnix/kernel/core/task/thread_group.rs
index d86ff67..90bd152 100644
--- a/src/starnix/kernel/core/task/thread_group.rs
+++ b/src/starnix/kernel/core/task/thread_group.rs
@@ -1884,9 +1884,15 @@
                 return;
             };
 
+            let mut state = this.write();
+            if state.is_exited() {
+                // Do not set an exit notifier on an exited thread group. It will never be notified.
+                return;
+            }
+
             // Register a channel to be notified when exit() is complete.
             let (on_exited_send, on_exited) = futures::channel::oneshot::channel();
-            this.write().exit_notifier = Some(on_exited_send);
+            state.exit_notifier = Some(on_exited_send);
 
             // We want to be able to log about this thread group without upgrading the `Weak`.
             let tg_name = format!("{this:?}");
diff --git a/src/starnix/tests/BUILD.gn b/src/starnix/tests/BUILD.gn
index 446db4f..32acbc6 100644
--- a/src/starnix/tests/BUILD.gn
+++ b/src/starnix/tests/BUILD.gn
@@ -29,6 +29,7 @@
     "remotevol:tests",
     "scheduler:tests",
     "selinux/userspace:tests",
+    "shutdown:tests",
     "suspend:tests",
     "syscalls:tests",
     "sysrq:tests",
diff --git a/src/starnix/tests/shutdown/BUILD.gn b/src/starnix/tests/shutdown/BUILD.gn
new file mode 100644
index 0000000..10c0e37
--- /dev/null
+++ b/src/starnix/tests/shutdown/BUILD.gn
@@ -0,0 +1,75 @@
+# Copyright 2026 The Fuchsia Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/components.gni")
+import("//build/rust/rustc_test.gni")
+import("//src/starnix/build/starnix_linux_executable.gni")
+import("//src/starnix/tests/starnix_test_subpackages.gni")
+
+group("tests") {
+  testonly = true
+  if (target_cpu == "x64") {
+    deps = [ ":starnix_shutdown_test" ]
+  }
+}
+
+starnix_linux_executable("zombie_shutdown_bin") {
+  testonly = true
+  sources = [ "zombie_shutdown.cc" ]
+}
+
+rustc_test("starnix_shutdown_test_runner") {
+  testonly = true
+  edition = "2024"
+
+  deps = [
+    "//sdk/fidl/fuchsia.sys2:fuchsia.sys2_rust",
+    "//sdk/rust/zx",
+    "//src/lib/diagnostics/reader/rust",
+    "//src/lib/fidl/rust/fidl",
+    "//src/lib/fuchsia-async",
+    "//src/lib/fuchsia-component-test",
+    "//third_party/rust_crates:anyhow",
+    "//third_party/rust_crates:futures",
+  ]
+
+  source_root = "src/test_runner.rs"
+  sources = [ source_root ]
+}
+
+fuchsia_test_component("zombie_shutdown") {
+  manifest = "meta/zombie_shutdown.cml"
+  deps = [ ":zombie_shutdown_bin" ]
+}
+
+fuchsia_test_component("realm") {
+  manifest = "meta/realm.cml"
+}
+
+fuchsia_test_component("starnix_shutdown_test_component") {
+  component_name = "starnix_shutdown_test"
+  manifest = "meta/starnix_shutdown_test.cml"
+  deps = [ ":starnix_shutdown_test_runner" ]
+  test_type = "starnix"
+}
+
+fuchsia_test_package("starnix_shutdown_test") {
+  test_components = [ ":starnix_shutdown_test_component" ]
+  deps = [
+    ":realm",
+    ":zombie_shutdown",
+  ]
+
+  subpackages = starnix_test_subpackages +
+                [ "//src/starnix/containers/debian:debian_package" ]
+
+  # TODO(https://fxbug.dev/295073633): Remove when Starnix can shut down
+  # gracefully.
+  test_specs = {
+    log_settings = {
+      max_severity = "ERROR"
+    }
+    package_resolution_timeout_secs = 660
+  }
+}
diff --git a/src/starnix/tests/shutdown/meta/realm.cml b/src/starnix/tests/shutdown/meta/realm.cml
new file mode 100644
index 0000000..d27db78
--- /dev/null
+++ b/src/starnix/tests/shutdown/meta/realm.cml
@@ -0,0 +1,82 @@
+// Copyright 2026 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+{
+    include: [
+        "inspect/offer.shard.cml",
+        "syslog/offer.shard.cml",
+    ],
+    children: [
+        {
+            name: "kernel",
+            url: "starnix_kernel#meta/starnix_kernel.cm",
+        },
+        {
+            name: "container",
+            url: "debian#meta/debian_container.cm",
+            environment: "#starnix_runner_env",
+        },
+        {
+            name: "zombie_shutdown",
+            url: "#meta/zombie_shutdown.cm",
+            environment: "#starnix_container_runner_env",
+        },
+    ],
+    offer: [
+        {
+            protocol: [
+                "fuchsia.buildinfo.Provider",
+                "fuchsia.kernel.Stats",
+                "fuchsia.kernel.VmexResource",
+                "fuchsia.sysinfo.SysInfo",
+            ],
+            from: "parent",
+            to: "#kernel",
+        },
+        {
+            directory: "boot-kernel",
+            from: "parent",
+            to: "#kernel",
+        },
+        {
+            protocol: [
+                "fuchsia.sysmem.Allocator",
+                "fuchsia.sysmem2.Allocator",
+                "fuchsia.ui.composition.Allocator",
+                "fuchsia.ui.composition.Flatland",
+                "fuchsia.ui.display.singleton.Info",
+            ],
+            from: "void",
+            to: "#kernel",
+            availability: "optional",
+        },
+        {
+            protocol: [ "fuchsia.tracing.provider.Registry" ],
+            from: "parent",
+            to: "#kernel",
+            availability: "same_as_target",
+        },
+    ],
+    environments: [
+        {
+            name: "starnix_runner_env",
+            extends: "realm",
+            runners: [
+                {
+                    runner: "starnix",
+                    from: "#kernel",
+                },
+            ],
+        },
+        {
+            name: "starnix_container_runner_env",
+            extends: "realm",
+            runners: [
+                {
+                    runner: "starnix_container",
+                    from: "#container",
+                },
+            ],
+        },
+    ],
+}
diff --git a/src/starnix/tests/shutdown/meta/starnix_shutdown_test.cml b/src/starnix/tests/shutdown/meta/starnix_shutdown_test.cml
new file mode 100644
index 0000000..e33d6f6
--- /dev/null
+++ b/src/starnix/tests/shutdown/meta/starnix_shutdown_test.cml
@@ -0,0 +1,47 @@
+// Copyright 2026 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+{
+    include: [
+        "//src/sys/test_runners/rust/default.shard.cml",
+        "//src/sys/test_runners/tmp_storage.shard.cml",
+        "inspect/client.shard.cml",
+        "sys/component/realm_builder.shard.cml",
+        "syslog/client.shard.cml",
+    ],
+    program: {
+        binary: "bin/starnix_shutdown_test_runner",
+    },
+    use: [
+        {
+            protocol: [ "fuchsia.diagnostics.ArchiveAccessor" ],
+        },
+        {
+            protocol: [ "fuchsia.sys2.LifecycleController" ],
+            from: "framework",
+        },
+    ],
+    offer: [
+        {
+            protocol: [
+                "fuchsia.buildinfo.Provider",
+                "fuchsia.kernel.Stats",
+                "fuchsia.kernel.VmexResource",
+                "fuchsia.sysinfo.SysInfo",
+            ],
+            from: "parent",
+            to: "#realm_builder",
+        },
+        {
+            protocol: [ "fuchsia.tracing.provider.Registry" ],
+            from: "parent",
+            to: "#realm_builder",
+            availability: "optional",
+        },
+        {
+            directory: "boot-kernel",
+            from: "parent",
+            to: "#realm_builder",
+        },
+    ],
+}
diff --git a/src/starnix/tests/shutdown/meta/zombie_shutdown.cml b/src/starnix/tests/shutdown/meta/zombie_shutdown.cml
new file mode 100644
index 0000000..442cc0c
--- /dev/null
+++ b/src/starnix/tests/shutdown/meta/zombie_shutdown.cml
@@ -0,0 +1,10 @@
+// Copyright 2026 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+{
+    program: {
+        runner: "starnix_container",
+        binary: "data/tests/zombie_shutdown_bin",
+        uid: "0",
+    },
+}
diff --git a/src/starnix/tests/shutdown/src/test_runner.rs b/src/starnix/tests/shutdown/src/test_runner.rs
new file mode 100644
index 0000000..a6bcbbb
--- /dev/null
+++ b/src/starnix/tests/shutdown/src/test_runner.rs
@@ -0,0 +1,81 @@
+// Copyright 2026 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use anyhow::anyhow;
+use diagnostics_reader::ArchiveReader;
+use fidl_fuchsia_sys2 as fsys2;
+use fuchsia_async as fasync;
+use fuchsia_component_test::{
+    Capability, RealmBuilder, RealmBuilderParams, RealmInstance, Ref, Route,
+};
+use futures::StreamExt;
+use zx;
+
+async fn build_realm() -> RealmInstance {
+    let builder =
+        RealmBuilder::with_params(RealmBuilderParams::new().from_relative_url("#meta/realm.cm"))
+            .await
+            .expect("created");
+
+    builder
+        .add_route(
+            Route::new()
+                .capability(Capability::protocol::<fsys2::LifecycleControllerMarker>())
+                .from(Ref::framework())
+                .to(Ref::parent()),
+        )
+        .await
+        .unwrap();
+
+    builder.build().await.unwrap()
+}
+
+#[fasync::run_singlethreaded(test)]
+async fn test_shutdown_with_zombie() {
+    let realm_instance = build_realm().await;
+    let lifecycle_controller: fsys2::LifecycleControllerProxy =
+        realm_instance.root.connect_to_protocol_at_exposed_dir().unwrap();
+
+    // Run the test program to generate an unreaped zombie.
+    let (_, binder_server) = fidl::endpoints::create_endpoints();
+    lifecycle_controller.start_instance("./zombie_shutdown", binder_server).await.unwrap().unwrap();
+    wait_for_zombie_signal(&realm_instance).await;
+
+    let stop_future = async {
+        lifecycle_controller
+            .stop_instance("./zombie_shutdown")
+            .await
+            .map_err(|e| anyhow!("FIDL error: {:?}", e))?
+            .map_err(|e| anyhow!("Lifecycle error: {:?}", e))
+    };
+
+    let result = fasync::TimeoutExt::on_timeout(
+        stop_future,
+        fasync::MonotonicInstant::after(zx::MonotonicDuration::from_seconds(5)),
+        || Err(anyhow!("Container shutdown hung")),
+    )
+    .await;
+
+    assert!(result.is_ok(), "Container failed to stop gracefully: {:?}", result);
+}
+
+// Wait until the spawned zombie process signals that it is ready via stdout.
+async fn wait_for_zombie_signal(realm_instance: &RealmInstance) {
+    let realm_moniker = format!("realm_builder:{}", realm_instance.root.child_name());
+    let kernel_moniker = format!("{realm_moniker}/kernel");
+
+    let mut logs = ArchiveReader::logs()
+        .select_all_for_component(kernel_moniker.as_str())
+        .snapshot_then_subscribe()
+        .expect("failed to subscribe to kernel logs");
+
+    while let Some(log) = logs.next().await {
+        let log = log.expect("failed to read log from stream");
+        if let Some(msg) = log.msg() {
+            if msg.contains("[ZOMBIE_READY]") {
+                break;
+            }
+        }
+    }
+}
diff --git a/src/starnix/tests/shutdown/zombie_shutdown.cc b/src/starnix/tests/shutdown/zombie_shutdown.cc
new file mode 100644
index 0000000..485bd59
--- /dev/null
+++ b/src/starnix/tests/shutdown/zombie_shutdown.cc
@@ -0,0 +1,28 @@
+// Copyright 2026 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+int main() {
+  pid_t pid = fork();
+  if (pid == 0) {
+    // Child process exits immediately, leaving a zombie.
+    return 0;
+  }
+
+  // Block until the child has exited but leave it as a zombie.
+  siginfo_t info;
+  waitid(P_PID, pid, &info, WEXITED | WNOWAIT);
+
+  // The child is now a zombie. Signal ready to the Rust test runner.
+  printf("[ZOMBIE_READY]\n");
+  fflush(stdout);
+
+  // Sleep forever to keep the parent alive.
+  while (true) {
+    sleep(1);
+  }
+}