[components] Add overall shutdown timeout
Put an upper limit on how long shutdown can take. This is a
backstop against a logical error in component manager that would
cause a lock up.
Fixed: 51300
Change-Id: Ica0e50627994fb1eabfe1b7a168f83f66d4879cb
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/403754
Reviewed-by: Derek Gonyeo <dgonyeo@google.com>
Testability-Review: Derek Gonyeo <dgonyeo@google.com>
Commit-Queue: Justin Mattson <jmatt@google.com>
diff --git a/src/sys/component_manager/src/builtin/system_controller.rs b/src/sys/component_manager/src/builtin/system_controller.rs
index f6b4813..9cac774 100644
--- a/src/sys/component_manager/src/builtin/system_controller.rs
+++ b/src/sys/component_manager/src/builtin/system_controller.rs
@@ -27,6 +27,7 @@
convert::TryInto,
path::PathBuf,
sync::{Arc, Weak},
+ time::Duration,
},
};
@@ -38,11 +39,12 @@
#[derive(Clone)]
pub struct SystemController {
model: Arc<Model>,
+ shutdown_timeout: Duration,
}
impl SystemController {
- pub fn new(model: Arc<Model>) -> Self {
- Self { model }
+ pub fn new(model: Arc<Model>, shutdown_timeout: Duration) -> Self {
+ Self { model, shutdown_timeout }
}
pub fn hooks(self: &Arc<Self>) -> Vec<HooksRegistration> {
@@ -62,8 +64,10 @@
InternalCapability::Protocol(capability_path)
if *capability_path == *SYSTEM_CONTROLLER_CAPABILITY_PATH =>
{
- Ok(Some(Box::new(SystemControllerCapabilityProvider::new(self.model.clone()))
- as Box<dyn CapabilityProvider>))
+ Ok(Some(Box::new(SystemControllerCapabilityProvider::new(
+ self.model.clone(),
+ self.shutdown_timeout.clone(),
+ )) as Box<dyn CapabilityProvider>))
}
_ => Ok(capability_provider),
}
@@ -89,11 +93,13 @@
pub struct SystemControllerCapabilityProvider {
model: Arc<Model>,
+ request_timeout: Duration,
}
impl SystemControllerCapabilityProvider {
- pub fn new(model: Arc<Model>) -> Self {
- Self { model }
+ // TODO (jmatt) allow timeout to be supplied in the constructor
+ pub fn new(model: Arc<Model>, request_timeout: Duration) -> Self {
+ Self { model, request_timeout }
}
async fn open_async(self, mut stream: SystemControllerRequestStream) -> Result<(), Error> {
@@ -112,6 +118,11 @@
// exit. main.rs waits on the model to observe the root realm
// disappear.
SystemControllerRequest::Shutdown { responder } => {
+ let timeout = zx::Duration::from(self.request_timeout);
+ fasync::spawn(async move {
+ fasync::Timer::new(fasync::Time::after(timeout)).await;
+ panic!("Component manager did not complete shutdown in allowed time.");
+ });
ActionSet::register(self.model.root_realm.clone(), Action::Shutdown)
.await
.await
@@ -162,14 +173,17 @@
super::*,
crate::model::{
binding::Binder,
+ hooks::{EventType, Hook, HooksRegistration},
moniker::AbsoluteMoniker,
realm::BindReason,
testing::test_helpers::{
component_decl_with_test_runner, ActionsTest, ComponentDeclBuilder, ComponentInfo,
},
},
+ async_trait::async_trait,
fidl::endpoints,
fidl_fuchsia_sys2 as fsys,
+ std::{boxed::Box, convert::TryFrom, sync::Arc, time::Duration},
};
/// Use SystemController to shut down a system whose root has the child `a`
@@ -200,7 +214,11 @@
.expect("could not bind to a");
// Wire up connections to SystemController
- let sys_controller = Box::new(SystemControllerCapabilityProvider::new(test.model.clone()));
+ let sys_controller = Box::new(SystemControllerCapabilityProvider::new(
+ test.model.clone(),
+ // allow simulated shutdown to take up to 30 days
+ Duration::from_secs(60 * 60 * 24 * 30),
+ ));
let (client_channel, server_channel) =
endpoints::create_endpoints::<fsys::SystemControllerMarker>()
.expect("failed creating channel endpoints");
@@ -238,4 +256,89 @@
realm_c_info.check_is_shut_down(&test.runner).await;
realm_d_info.check_is_shut_down(&test.runner).await;
}
+
+ #[test]
+ #[should_panic(expected = "Component manager did not complete shutdown in allowed time.")]
+ fn test_timeout() {
+ const TIMEOUT_SECONDS: i64 = 6;
+ const EVENT_PAUSE_SECONDS: i64 = TIMEOUT_SECONDS + 1;
+ struct StopHook;
+ #[async_trait]
+ impl Hook for StopHook {
+ async fn on(self: Arc<Self>, _event: &Event) -> Result<(), ModelError> {
+ fasync::Timer::new(fasync::Time::after(zx::Duration::from_seconds(
+ EVENT_PAUSE_SECONDS.into(),
+ )))
+ .await;
+ Ok(())
+ }
+ }
+
+ let mut exec = fasync::Executor::new_with_fake_time().unwrap();
+ let mut test_logic = Box::pin(async {
+ // Configure and start realm
+ let components = vec![
+ ("root", ComponentDeclBuilder::new().add_eager_child("a").build()),
+ ("a", ComponentDeclBuilder::new().build()),
+ ];
+
+ let s = StopHook {};
+ let s_hook: Arc<dyn Hook> = Arc::new(s);
+ let hooks_reg = HooksRegistration::new(
+ "stop hook",
+ vec![EventType::Stopped],
+ Arc::downgrade(&s_hook),
+ );
+
+ let test = ActionsTest::new_with_hooks("root", components, None, vec![hooks_reg]).await;
+ let realm_a = test.look_up(vec!["a:0"].into()).await;
+ test.model
+ .bind(
+ &realm_a.abs_moniker,
+ &BindReason::BindChild { parent: AbsoluteMoniker::root() },
+ )
+ .await
+ .expect("could not bind to a");
+
+ // Wire up connections to SystemController
+ let sys_controller = Box::new(SystemControllerCapabilityProvider::new(
+ test.model.clone(),
+ // require shutdown in a second
+ Duration::from_secs(u64::try_from(TIMEOUT_SECONDS).unwrap()),
+ ));
+ let (client_channel, server_channel) =
+ endpoints::create_endpoints::<fsys::SystemControllerMarker>()
+ .expect("failed creating channel endpoints");
+ let mut server_channel = server_channel.into_channel();
+ sys_controller
+ .open(0, 0, PathBuf::new(), &mut server_channel)
+ .await
+ .expect("failed to open capability");
+ let controller_proxy =
+ client_channel.into_proxy().expect("failed converting endpoint into proxy");
+
+ let root_realm_info = ComponentInfo::new(test.model.root_realm.clone()).await;
+ let realm_a_info = ComponentInfo::new(realm_a.clone()).await;
+
+ // Check that the root realm is still here
+ root_realm_info.check_not_shut_down(&test.runner).await;
+ realm_a_info.check_not_shut_down(&test.runner).await;
+
+ // Ask the SystemController to shut down the system and wait to be
+ // notified that the room realm stopped.
+ let _completion = test.builtin_environment.wait_for_root_realm_stop();
+ controller_proxy.shutdown().await.expect("shutdown request failed");
+ });
+
+ assert_eq!(std::task::Poll::Pending, exec.run_until_stalled(&mut test_logic));
+
+ let new_time = fasync::Time::from_nanos(
+ exec.now().into_nanos() + zx::Duration::from_seconds(TIMEOUT_SECONDS).into_nanos(),
+ );
+
+ exec.set_fake_time(new_time);
+ exec.wake_expired_timers();
+
+ assert_eq!(std::task::Poll::Pending, exec.run_until_stalled(&mut test_logic));
+ }
}
diff --git a/src/sys/component_manager/src/builtin_environment.rs b/src/sys/component_manager/src/builtin_environment.rs
index df22fda..cf34fef 100644
--- a/src/sys/component_manager/src/builtin_environment.rs
+++ b/src/sys/component_manager/src/builtin_environment.rs
@@ -64,6 +64,8 @@
// Re-export so that the component_manager binary can see it.
pub use crate::builtin::time::create_and_start_utc_clock;
+// Allow shutdown to take up to an hour.
+pub static SHUTDOWN_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60 * 60);
// TODO(viktard): Merge Arguments, RuntimeConfig and root_component_url from ModelParams
#[derive(Default)]
@@ -358,7 +360,7 @@
}
// Set up System Controller service.
- let system_controller = Arc::new(SystemController::new(model.clone()));
+ let system_controller = Arc::new(SystemController::new(model.clone(), SHUTDOWN_TIMEOUT));
model.root_realm.hooks.install(system_controller.hooks()).await;
// Set up work scheduler.