[system-update-committer] implement wait_and_reboot
This change adds reboot functionality to the system-update-committer.
Generally, if we fail to put the boot metadata in a happy state, we'll
reboot. Exception: if it's a blobfs verify failure, and the config says
to ignore, we won't reboot.
Note: we can't yet integration test the exception because currently the
blobfs verification will always pass.
Fixed: 64600
Test: fx test -o system-update-committer-tests
Test: fx test -o system-update-committer-integration-tests
Change-Id: Ic48be5220870c31ab0450cd2ea19f46ddaef6021
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/475560
Commit-Queue: Zach Kirschenbaum <zkbaum@google.com>
Reviewed-by: Ben Keller <galbanum@google.com>
diff --git a/src/sys/pkg/bin/system-update-committer/BUILD.gn b/src/sys/pkg/bin/system-update-committer/BUILD.gn
index 7eb1daf..7ea3124 100644
--- a/src/sys/pkg/bin/system-update-committer/BUILD.gn
+++ b/src/sys/pkg/bin/system-update-committer/BUILD.gn
@@ -14,6 +14,7 @@
edition = "2018"
deps = [
+ "//sdk/fidl/fuchsia.hardware.power.statecontrol:fuchsia.hardware.power.statecontrol-rustc",
"//sdk/fidl/fuchsia.paver:fuchsia.paver-rustc",
"//sdk/fidl/fuchsia.update:fuchsia.update-rustc",
"//src/lib/fidl/rust/fidl",
@@ -30,8 +31,10 @@
]
test_deps = [
"//src/sys/pkg/testing/mock-paver",
+ "//src/sys/pkg/testing/mock-reboot",
"//src/sys/pkg/testing/mock-verifier:lib",
"//third_party/rust_crates:matches",
+ "//third_party/rust_crates:parking_lot",
"//third_party/rust_crates:proptest",
"//third_party/rust_crates:proptest-derive",
]
@@ -47,6 +50,7 @@
"src/metadata/errors.rs",
"src/metadata/policy.rs",
"src/metadata/verify.rs",
+ "src/reboot.rs",
]
visibility = [
diff --git a/src/sys/pkg/bin/system-update-committer/meta/system-update-committer.cml b/src/sys/pkg/bin/system-update-committer/meta/system-update-committer.cml
index 3215955..a951c57 100644
--- a/src/sys/pkg/bin/system-update-committer/meta/system-update-committer.cml
+++ b/src/sys/pkg/bin/system-update-committer/meta/system-update-committer.cml
@@ -8,7 +8,12 @@
],
use: [
{ runner: "elf" },
- { protocol: "fuchsia.paver.Paver" },
+ {
+ protocol: [
+ "fuchsia.hardware.power.statecontrol.Admin",
+ "fuchsia.paver.Paver",
+ ],
+ },
],
expose: [
{
diff --git a/src/sys/pkg/bin/system-update-committer/src/main.rs b/src/sys/pkg/bin/system-update-committer/src/main.rs
index 655417a..5575d5f 100644
--- a/src/sys/pkg/bin/system-update-committer/src/main.rs
+++ b/src/sys/pkg/bin/system-update-committer/src/main.rs
@@ -3,21 +3,36 @@
// found in the LICENSE file.
use {
- crate::fidl::FidlServer,
+ crate::{
+ fidl::FidlServer,
+ metadata::put_metadata_in_happy_state,
+ reboot::{should_reboot, wait_and_reboot},
+ },
anyhow::{anyhow, Context, Error},
config::Config,
+ fidl_fuchsia_hardware_power_statecontrol::AdminMarker as PowerStateControlMarker,
fidl_fuchsia_paver::PaverMarker,
fuchsia_async as fasync,
fuchsia_component::server::ServiceFs,
fuchsia_syslog::{fx_log_info, fx_log_warn},
fuchsia_zircon::{self as zx, HandleBased},
futures::{channel::oneshot, prelude::*, stream::FuturesUnordered},
- std::sync::Arc,
+ std::{sync::Arc, time::Duration},
};
mod config;
mod fidl;
mod metadata;
+mod reboot;
+
+// The system persists the reboot reason via a component called last_reboot. If we issue a reboot
+// before last_reboot starts, the reboot reason will not persist. Since last_reboot is v1 and the
+// system-update-committer is v2, and v2 components start earlier than v1 components, let's try to
+// minimize this risk by defining a minimum duration we must wait to issue a reboot. Ideally, reboot
+// clients should not have to worry about this. However, given the transition from v1 to v2, for now
+// we mitigate this with a timer. This value was determined experimentally on Astro, where there
+// seems to be a ~2 second gap between the system-update-committer and last_reboot starting.
+const MINIMUM_REBOOT_WAIT: Duration = Duration::from_secs(5);
pub fn main() -> Result<(), Error> {
fuchsia_syslog::init_with_tags(&["system-update-committer"])
@@ -37,9 +52,8 @@
}
async fn main_inner_async() -> Result<(), Error> {
- // TODO(http://fxbug.dev/64595) Actually use the configuration once we start implementing
- // health verification.
- let _config = Config::load_from_config_data_or_default();
+ let config = Config::load_from_config_data_or_default();
+ let reboot_timer = fasync::Timer::new(MINIMUM_REBOOT_WAIT);
let paver = fuchsia_component::client::connect_to_service::<PaverMarker>()
.context("while connecting to paver")?;
@@ -50,6 +64,9 @@
.find_boot_manager(boot_manager_server_end)
.context("transport error while calling find_boot_manager()")?;
+ let reboot_proxy = fuchsia_component::client::connect_to_service::<PowerStateControlMarker>()
+ .context("while connecting to power state control")?;
+
let futures = FuturesUnordered::new();
let (p_internal, p_external) = zx::EventPair::create().context("while creating EventPairs")?;
@@ -60,16 +77,25 @@
let (unblocker, blocker) = oneshot::channel();
- // Handle putting boot metadata in happy state.
+ // Handle putting boot metadata in happy state and rebooting on failure (if necessary).
futures.push(
async move {
- // TODO(http://fxbug.dev/64595) combine the config and the result of
- // put_metadata_in_happy_state to determine if we should reboot. For now, we just log.
- if let Err(e) =
- crate::metadata::put_metadata_in_happy_state(&boot_manager, &p_internal, unblocker)
- .await
+ if let Err(e) = put_metadata_in_happy_state(&boot_manager, &p_internal, unblocker).await
{
- fx_log_warn!("error putting boot metadata in happy state: {:#}", anyhow!(e));
+ if should_reboot(&e, &config) {
+ fx_log_warn!(
+ "Failed to put metadata in happy state. Rebooting given error {:#} and config {:?}",
+ anyhow!(e),
+ config
+ );
+ wait_and_reboot(&reboot_proxy, reboot_timer).await;
+ } else {
+ fx_log_warn!(
+ "Failed to put metadata in happy state. NOT rebooting given error {:#} and config {:?}",
+ anyhow!(e),
+ config
+ );
+ }
}
}
.boxed_local(),
diff --git a/src/sys/pkg/bin/system-update-committer/src/metadata.rs b/src/sys/pkg/bin/system-update-committer/src/metadata.rs
index 1418d20..bc164c2 100644
--- a/src/sys/pkg/bin/system-update-committer/src/metadata.rs
+++ b/src/sys/pkg/bin/system-update-committer/src/metadata.rs
@@ -6,7 +6,6 @@
use {
commit::do_commit,
- errors::MetadataError,
fidl_fuchsia_paver as paver,
fuchsia_zircon::{self as zx, EventPair, Peered},
futures::channel::oneshot,
@@ -14,6 +13,11 @@
verify::do_health_verification,
};
+pub(super) use errors::{MetadataError, VerifyError};
+
+#[cfg(test)]
+pub(super) use errors::{BootManagerError, PolicyError, VerifyFailureReason};
+
mod commit;
mod configuration;
mod errors;
diff --git a/src/sys/pkg/bin/system-update-committer/src/reboot.rs b/src/sys/pkg/bin/system-update-committer/src/reboot.rs
new file mode 100644
index 0000000..2c21035
--- /dev/null
+++ b/src/sys/pkg/bin/system-update-committer/src/reboot.rs
@@ -0,0 +1,165 @@
+// Copyright 2021 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+use {
+ crate::{
+ config::{Config, Mode},
+ metadata::{MetadataError, VerifyError},
+ },
+ anyhow::{anyhow, Context},
+ fidl_fuchsia_hardware_power_statecontrol::{
+ AdminProxy as PowerStateControlProxy, RebootReason,
+ },
+ fuchsia_async as fasync,
+ fuchsia_syslog::fx_log_err,
+ fuchsia_zircon::Status,
+};
+
+/// Determines if we should reboot.
+pub(super) fn should_reboot(error: &MetadataError, config: &Config) -> bool {
+ if let MetadataError::Verify(VerifyError::BlobFs(_)) = error {
+ return config.blobfs() == &Mode::RebootOnFailure;
+ }
+ return true;
+}
+
+/// Waits for the timer to complete and then reboots the system, logging errors instead of failing.
+pub(super) async fn wait_and_reboot(proxy: &PowerStateControlProxy, timer: fasync::Timer) {
+ let () = timer.await;
+ if let Err(e) = async move {
+ proxy
+ .reboot(RebootReason::RetrySystemUpdate)
+ .await
+ .context("while performing reboot call")?
+ .map_err(Status::from_raw)
+ .context("reboot responded with")
+ }
+ .await
+ {
+ fx_log_err!("error initiating reboot: {:#}", anyhow!(e));
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use {
+ super::*,
+ crate::metadata::{BootManagerError, PolicyError, VerifyFailureReason},
+ futures::{channel::oneshot, pin_mut, task::Poll},
+ mock_reboot::MockRebootService,
+ parking_lot::Mutex,
+ proptest::prelude::*,
+ std::{sync::Arc, time::Duration},
+ };
+
+ #[test]
+ fn test_wait_and_reboot_success() {
+ let mut executor = fasync::Executor::new_with_fake_time().unwrap();
+
+ // Create a mock reboot service.
+ let (sender, recv) = oneshot::channel();
+ let sender = Arc::new(Mutex::new(Some(sender)));
+ let mock = Arc::new(MockRebootService::new(Box::new(move |reason: RebootReason| {
+ sender.lock().take().unwrap().send(reason).unwrap();
+ Ok(())
+ })));
+ let proxy = mock.spawn_reboot_service();
+
+ // Prepare futures to call reboot and receive the reboot request.
+ let timer_duration = 5;
+ let reboot_fut =
+ wait_and_reboot(&proxy, fasync::Timer::new(Duration::from_secs(timer_duration)));
+ pin_mut!(reboot_fut);
+ pin_mut!(recv);
+
+ // Set the time so that the timer is still going, so we should neither call reboot nor
+ // observe the reboot service was called.
+ executor.set_fake_time(fasync::Time::after(Duration::from_secs(timer_duration - 1).into()));
+ assert!(!executor.wake_expired_timers());
+ match executor.run_until_stalled(&mut reboot_fut) {
+ Poll::Ready(res) => panic!("future unexpectedly completed with response: {:?}", res),
+ Poll::Pending => {}
+ };
+ match executor.run_until_stalled(&mut recv) {
+ Poll::Ready(res) => panic!("future unexpectedly completed with response: {:?}", res),
+ Poll::Pending => {}
+ };
+
+ // Once the timer completes, we should complete the reboot call and observe we called the
+ // reboot service with the given reboot reason.
+ executor.set_fake_time(fasync::Time::after(Duration::from_secs(1).into()));
+ assert!(executor.wake_expired_timers());
+ match executor.run_until_stalled(&mut recv) {
+ Poll::Ready(res) => panic!("future unexpectedly completed with response: {:?}", res),
+ Poll::Pending => {}
+ };
+ match executor.run_until_stalled(&mut reboot_fut) {
+ Poll::Ready(_) => {}
+ Poll::Pending => panic!("future unexpectedly pending"),
+ };
+ match executor.run_until_stalled(&mut recv) {
+ Poll::Ready(res) => assert_eq!(res, Ok(RebootReason::RetrySystemUpdate)),
+ Poll::Pending => panic!("future unexpectedly pending"),
+ };
+ }
+
+ fn test_blobfs_verify_errors(config: Config, expect_reboot: bool) {
+ let timeout_err = MetadataError::Verify(VerifyError::BlobFs(VerifyFailureReason::Timeout));
+ let verify_err =
+ MetadataError::Verify(VerifyError::BlobFs(VerifyFailureReason::Verify(anyhow!("foo"))));
+ let fidl_err = MetadataError::Verify(VerifyError::BlobFs(VerifyFailureReason::Fidl(
+ fidl::Error::OutOfRange,
+ )));
+
+ assert_eq!(should_reboot(&timeout_err, &config), expect_reboot);
+ assert_eq!(should_reboot(&verify_err, &config), expect_reboot);
+ assert_eq!(should_reboot(&fidl_err, &config), expect_reboot);
+ }
+
+ /// Blobfs errors SHOULD NOT cause a reboot if they're ignored.
+ #[test]
+ fn test_blobfs_errors_should_not_reboot() {
+ test_blobfs_verify_errors(Config::builder().blobfs(Mode::Ignore).build(), false);
+ }
+
+ /// Blobfs errors SHOULD cause a reboot if they're NOT ignored.
+ #[test]
+ fn test_blobfs_errors_should_reboot() {
+ test_blobfs_verify_errors(Config::builder().blobfs(Mode::RebootOnFailure).build(), true);
+ }
+
+ // Test that all the non-blobfs metadata errors will always cause a reboot, regardless of the
+ // config. Ideally, we'd also generate arbitrary MetadataErrors, but that's not possible
+ // because there are !Clone descendants.
+ proptest! {
+ #[test]
+ fn test_should_reboot_commit_error(config: Config) {
+ let err = MetadataError::Commit(BootManagerError::Status {
+ method_name: "foo",
+ status: Status::UNAVAILABLE,
+ });
+ assert!(should_reboot(&err, &config));
+ }
+
+ #[test]
+ fn test_should_reboot_policy_error(config: Config) {
+ let err = MetadataError::Policy(PolicyError::Build(BootManagerError::Status {
+ method_name: "bar",
+ status: Status::ACCESS_DENIED,
+ }));
+ assert!(should_reboot(&err, &config));
+ }
+
+ #[test]
+ fn test_should_reboot_signal_error(config: Config) {
+ let err = MetadataError::SignalPeer(fuchsia_zircon::Status::NOT_FOUND);
+ assert!(should_reboot(&err, &config));
+ }
+
+ #[test]
+ fn test_should_reboot_unblock_error(config: Config) {
+ assert!(should_reboot(&MetadataError::Unblock, &config));
+ }
+ }
+}
diff --git a/src/sys/pkg/tests/system-update-committer/BUILD.gn b/src/sys/pkg/tests/system-update-committer/BUILD.gn
index 82b59a7..e7d45d2e 100644
--- a/src/sys/pkg/tests/system-update-committer/BUILD.gn
+++ b/src/sys/pkg/tests/system-update-committer/BUILD.gn
@@ -19,9 +19,11 @@
"//src/lib/fuchsia-component",
"//src/lib/zircon/rust:fuchsia-zircon",
"//src/sys/pkg/testing/mock-paver",
+ "//src/sys/pkg/testing/mock-reboot",
"//third_party/rust_crates:anyhow",
"//third_party/rust_crates:futures",
"//third_party/rust_crates:matches",
+ "//third_party/rust_crates:parking_lot",
]
sources = [ "src/lib.rs" ]
diff --git a/src/sys/pkg/tests/system-update-committer/meta/system-update-committer.cmx b/src/sys/pkg/tests/system-update-committer/meta/system-update-committer.cmx
index b8cd5f5..65ccd44 100644
--- a/src/sys/pkg/tests/system-update-committer/meta/system-update-committer.cmx
+++ b/src/sys/pkg/tests/system-update-committer/meta/system-update-committer.cmx
@@ -7,6 +7,7 @@
},
"sandbox": {
"services": [
+ "fuchsia.hardware.power.statecontrol.Admin",
"fuchsia.paver.Paver"
]
}
diff --git a/src/sys/pkg/tests/system-update-committer/src/lib.rs b/src/sys/pkg/tests/system-update-committer/src/lib.rs
index 83b97a5..c7bc886 100644
--- a/src/sys/pkg/tests/system-update-committer/src/lib.rs
+++ b/src/sys/pkg/tests/system-update-committer/src/lib.rs
@@ -12,9 +12,11 @@
server::{NestedEnvironment, ServiceFs},
},
fuchsia_zircon::{self as zx, AsHandleRef},
- futures::prelude::*,
+ futures::{channel::oneshot, prelude::*},
matches::assert_matches,
mock_paver::{hooks as mphooks, MockPaverService, MockPaverServiceBuilder, PaverEvent},
+ mock_reboot::{MockRebootService, RebootReason},
+ parking_lot::Mutex,
std::{sync::Arc, time::Duration},
};
@@ -24,12 +26,17 @@
struct TestEnvBuilder {
paver_service_builder: Option<MockPaverServiceBuilder>,
+ reboot_service: Option<MockRebootService>,
}
impl TestEnvBuilder {
fn paver_service_builder(self, paver_service_builder: MockPaverServiceBuilder) -> Self {
Self { paver_service_builder: Some(paver_service_builder), ..self }
}
+ fn reboot_service(self, reboot_service: MockRebootService) -> Self {
+ Self { reboot_service: Some(reboot_service), ..self }
+ }
+
fn build(self) -> TestEnv {
let mut fs = ServiceFs::new();
fs.add_proxy_service::<fidl_fuchsia_logger::LogSinkMarker, _>();
@@ -48,6 +55,20 @@
.detach()
});
+ // Set up reboot service.
+ let reboot_service = Arc::new(self.reboot_service.unwrap_or_else(|| {
+ MockRebootService::new(Box::new(|_| panic!("unexpected call to reboot")))
+ }));
+ let reboot_service_clone = Arc::clone(&reboot_service);
+ fs.add_fidl_service(move |stream| {
+ fasync::Task::spawn(
+ Arc::clone(&reboot_service_clone)
+ .run_reboot_service(stream)
+ .unwrap_or_else(|e| panic!("error running reboot service: {:#}", anyhow!(e))),
+ )
+ .detach()
+ });
+
let env = fs
.create_salted_nested_environment("system_update_committer_env")
.expect("nested environment to create successfully");
@@ -57,18 +78,24 @@
.spawn(env.launcher())
.expect("system-update-committer to launch");
- TestEnv { _env: env, system_update_committer, _paver_service: paver_service }
+ TestEnv {
+ _env: env,
+ system_update_committer,
+ _paver_service: paver_service,
+ _reboot_service: reboot_service,
+ }
}
}
struct TestEnv {
_env: NestedEnvironment,
system_update_committer: App,
_paver_service: Arc<MockPaverService>,
+ _reboot_service: Arc<MockRebootService>,
}
impl TestEnv {
fn builder() -> TestEnvBuilder {
- TestEnvBuilder { paver_service_builder: None }
+ TestEnvBuilder { paver_service_builder: None, reboot_service: None }
}
/// Opens a connection to the fuchsia.update/CommitStatusProvider FIDL service.
@@ -212,3 +239,29 @@
Ok(zx::Signals::USER_0)
);
}
+
+/// When the paver fails, we expect the system-update-committer to trigger a reboot.
+#[fasync::run_singlethreaded(test)]
+async fn reboot() {
+ let (sender, recv) = oneshot::channel();
+ let sender = Arc::new(Mutex::new(Some(sender)));
+ let reboot_service = MockRebootService::new(Box::new(move |reason: RebootReason| {
+ sender.lock().take().unwrap().send(reason).unwrap();
+ Ok(())
+ }));
+
+ let _env = TestEnv::builder()
+ .paver_service_builder(MockPaverServiceBuilder::new().insert_hook(mphooks::return_error(
+ |e: &PaverEvent| {
+ if e == &PaverEvent::QueryCurrentConfiguration {
+ zx::Status::NOT_FOUND
+ } else {
+ zx::Status::OK
+ }
+ },
+ )))
+ .reboot_service(reboot_service)
+ .build();
+
+ assert_eq!(recv.await, Ok(RebootReason::RetrySystemUpdate));
+}