tools/blackout/blackout-host/src/lib.rs - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 //! library for host-side of filesystem integrity host-target interaction tests.

 #![deny(missing_docs)]

 use rand::random;
 use std::fmt;
 use std::path::PathBuf;
 use std::process::{ExitStatus, Output};
 use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use thiserror::Error;

 pub mod steps;
 use ffx_config::global_env_context;
 pub use steps::RebootType;
 use steps::{LoadStep, RebootStep, SetupStep, TestStep, VerifyStep};

 pub mod integration;

 const BLACKOUT_DEVICE_LABEL: &'static str = "blackout";

 fn box_message(message: String) -> String {
     let line_len = message.len() + 2;
     let line: String = std::iter::repeat('━').take(line_len).collect();
     format!(
         "┏{line}┓
 ┃ {message} ┃
 ┗{line}┛",
         message = message,
         line = line
     )
 }

 /// An error occurred running a command on the target system. Contains the exit status, stdout, and
 /// stderr of the command.
 #[derive(Debug, Error)]
 #[error(
     "failed to run command: {}\n\
                stdout:\n\
                {}\n\
                stderr:\n\
                {}",
     _0,
     _1,
     _2
 )]
 pub struct CommandError(ExitStatus, String, String);

 impl From<Output> for CommandError {
     /// Convert the std::process::Output of a command to an error. Mostly takes care of converting
     /// the stdout and stderr into strings from Vec<u8>.
     fn from(out: Output) -> Self {
         let stdout = String::from_utf8(out.stdout).expect("stdout not utf8");
         let stderr = String::from_utf8(out.stderr).expect("stderr not utf8");
         CommandError(out.status, stdout, stderr)
     }
 }

 impl From<ffx_isolate::CommandOutput> for CommandError {
     fn from(out: ffx_isolate::CommandOutput) -> Self {
         CommandError(out.status, out.stdout, out.stderr)
     }
 }

 /// An error occurred while attempting to reboot the system.
 #[derive(Debug, Error)]
 pub enum RebootError {
     /// The path to the relay device required for hard-rebooting the target doesn't exist.
     #[error("device does not exist: {:?}", _0)]
     MissingDevice(PathBuf),

     /// An io error occurred during rebooting. Maybe we failed to write to the device.
     #[error("io error: {:?}", _0)]
     IoError(#[from] std::io::Error),

     /// The command we executed on the target failed.
     #[error("command error: {:?}", _0)]
     Command(#[from] CommandError),
 }

 /// Error used for the host-side of the blackout library.
 #[derive(Debug, Error)]
 pub enum BlackoutError {
     /// Something went wrong!
     #[error("error: {}", _0)]
     AnyhowError(#[from] anyhow::Error),

     /// We got an error when trying to reboot.
     #[error("failed to reboot: {:?}", _0)]
     Reboot(#[from] RebootError),

     /// We failed to run the command on the host. Specifically, when the spawn or something fails,
     /// not when the command itself returns a non-zero exit code.
     #[error("host command failed: {:?}", _0)]
     HostCommand(#[from] std::io::Error),

     /// Timed out during target discovery.
     #[error("no targets found after 5s: {:?}", _0)]
     TargetDiscoveryTimeout(CommandError),

     /// We got an error from the ffx command.
     #[error("failed to run an ffx command: {:?}", _0)]
     FfxError(CommandError),

     /// A failure in the setup step
     #[error("failed to setup test: {:?}", _0)]
     SetupError(CommandError),

     /// Specifically the verification step failed. This indicates an actual test failure as opposed
     /// to a failure of the test framework or environmental failure.
     #[error("verification failed: {:?}", _0)]
     Verification(CommandError),
 }

 /// Blackout is a power-failure testing framework for the filesystems. This host-side harness runs
 /// operations on the configured target device for generating load on the filesystem, then reboots
 /// the device after a certain amount of time using a configured reboot mechanism. By default, it
 /// runs one iteration of this test. Options are provided for running the test until failure or
 /// running the test N times and collecting failure statistics.
 #[derive(Clone)]
 pub struct CommonOpts {
     /// The optional label for the partition to run the test on. If non is provided, a default will
     /// be used.
     pub device_label: Option<String>,
     /// The optional path to the block device on the target device to use for testing. If none is
     /// provided, the test will find an appropriate device. WARNING: the test can (and likely
     /// will!) format this device. Don't use a main system partition!
     pub device_path: Option<String>,
     /// [Optional] A seed to use for all random operations. Tests are NOT deterministic relative to
     /// the provided seed. The operations will be identical, but because of the non-deterministic
     /// timing-dependent nature of the tests, the exact time the reboot is triggered in relation to
     /// the operations is not guaranteed.
     ///
     /// One will be randomly generated if not provided. When performing the same test multiple times
     /// in one run, a new seed will be generated for each run if one was not provided.
     pub seed: Option<u64>,
     /// Reboot type. There are three options
     /// 1. Soft reboot - we reboot the system using ffx target reboot
     /// 2. Hard reboot with a serial power relay - we reboot the system by writing bytes to a
     /// serial device that we assume is a power relay. Includes a path to the power relay. Probably
     /// the highest-numbered /dev/ttyUSB[N]. If in doubt, try removing it and seeing what
     /// disappears from /dev.
     /// 3. Hard reboot with the infra dmc command - we reboot the system by calling the dmc binary
     /// provided by infra. This command cycles the power for us using some kind of http accessible
     /// power strip, but the details are abstracted behind the set-power-state command.
     pub reboot: RebootType,
     /// Run the test N number of times, collecting statistics on the number of failures.
     pub iterations: Option<u64>,
     /// Run the test until a verification failure is detected, then exit.
     pub run_until_failure: bool,
 }

 /// the seed for a run of the test.
 #[derive(Clone, Debug)]
 pub enum Seed {
     /// the seed is constant over multiple runs of the test.
     Constant(u64),
     /// the seed is a random value for every run of the test, generated by `random`.
     Variable(Arc<AtomicU64>),
 }

 impl Seed {
     fn new(maybe_seed: Option<u64>) -> Seed {
         match maybe_seed {
             Some(seed) => Seed::Constant(seed),
             None => Seed::Variable(Arc::new(AtomicU64::new(random()))),
         }
     }

     fn reroll(&self) {
         match self {
             Seed::Constant(_) => (),
             Seed::Variable(seed) => seed.store(random(), Ordering::Relaxed),
         }
     }

     fn get(&self) -> u64 {
         match self {
             Seed::Constant(seed) => *seed,
             Seed::Variable(seed) => seed.load(Ordering::Relaxed),
         }
     }
 }

 impl fmt::Display for Seed {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Seed::Constant(seed) => write!(f, "{}", seed),
             Seed::Variable(seed) => {
                 write!(f, "{}", seed.load(Ordering::Relaxed))
             }
         }
     }
 }

 #[derive(Clone, Debug)]
 enum RunMode {
     Once,
     Iterations(u64),
     IterationsUntilFailure(u64),
 }

 /// Test definition. This contains all the information to make a test reproducible in a particular
 /// environment, and allows host binaries to configure the steps taken by the test.
 struct Test {
     package: String,
     component: String,
     seed: Seed,
     device_label: String,
     device_path: Option<String>,
     reboot_type: RebootType,
     run_mode: RunMode,
     steps: Vec<Box<dyn TestStep>>,
 }

 impl fmt::Display for Test {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(
             f,
             "Test {{
     package: {:?},
     component: {:?},
     seed: {:?},
     device_label: {:?},
     device_path: {:?},
     reboot_type: {:?},
     run_mode: {:?},
 }}",
             self.package,
             self.component,
             self.seed,
             self.device_label,
             self.device_path,
             self.reboot_type,
             self.run_mode,
         )
     }
 }

 impl Test {
     /// Create a new test with the provided name. The name needs to match the associated package
     /// that will be present on the target device. The package should be callable with `run` from
     /// the command line.
     pub fn new_component(package: &'static str, component: &'static str, opts: CommonOpts) -> Test {
         Test {
             package: package.to_string(),
             component: component.to_string(),
             seed: Seed::new(opts.seed),
             device_label: opts.device_label.unwrap_or(BLACKOUT_DEVICE_LABEL.to_string()),
             device_path: opts.device_path,
             reboot_type: opts.reboot,
             run_mode: match (opts.iterations, opts.run_until_failure) {
                 (None, false) => RunMode::Once,
                 (None, true) => panic!("run until failure requires multiple iterations"),
                 (Some(iterations), false) => RunMode::Iterations(iterations),
                 (Some(iterations), true) => RunMode::IterationsUntilFailure(iterations),
             },
             steps: Vec::new(),
         }
     }

     /// Add a custom test step implementation.
     pub fn add_step(&mut self, step: Box<dyn TestStep>) -> &mut Self {
         self.steps.push(step);
         self
     }

     /// Run the defined test steps. Prints the test definition before execution. Attempts to re-roll
     /// the random seed before the test run.
     async fn run_test(&self) -> Result<(), BlackoutError> {
         self.seed.reroll();

         println!("{}", self);

         for step in &self.steps {
             step.execute().await?;
         }

         Ok(())
     }

     async fn run_iterations(&self, iterations: u64) -> Result<(), BlackoutError> {
         let mut failures = 0u64;
         let mut flukes = 0u64;

         for runs in 1..iterations + 1 {
             println!("{}", box_message(format!("test run #{}", runs)));

             match self.run_test().await {
                 Ok(()) => (),
                 Err(BlackoutError::Verification(_)) => failures += 1,
                 Err(_) => flukes += 1,
             }

             println!("runs:                    {}", runs);
             println!("failures:                {}", failures);
             println!("non-verification errors: {}", flukes);
             println!(
                 "failure percentage:      {:.2}%",
                 (failures as f64 / (runs - flukes) as f64) * 100.0
             );
         }

         Ok(())
     }

     async fn run_iterations_until_failure(&self, iterations: u64) -> Result<(), BlackoutError> {
         for runs in 1..iterations + 1 {
             println!("{}", box_message(format!("test run #{}", runs)));
             match self.run_test().await {
                 Ok(()) => (),
                 Err(e @ BlackoutError::Verification(_)) => return Err(e),
                 Err(_) => (),
             }
         }
         Ok(())
     }

     /// Run the provided test. The test is provided as a function that takes a clone of the command
     /// line options and produces a test to run. This
     /// There are essentially 4 possible types of test runs that we may be doing here.
     /// 1. one iteration (iterations == None && run_until_failure == false)
     ///    this is the simplist case. run one iteration of the test by constructing the test and
     ///    calling test.run(), returning the result.
     /// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
     ///    essentially the InfiniteExecutor code, except instead of an infinite loop we use a
     ///    bounded one.
     /// 3. run until verification failure (iterations == None && run_until_failure == true)
     ///    run tests until an Error::Verification is returned. keep track of the number of runs,
     ///    but there is no need to tabulate other errors.
     /// 4. run until verification failure, except with a max number of iterations
     ///    (iterations == Some(N) && run_until_failure == true)
     ///    if both flags are present, we combine the functionality. only run a certain number of
     ///    iterations, but quit early if there is a failure instead of aggregating the results.
     pub async fn run(self) -> Result<(), BlackoutError> {
         match self.run_mode {
             RunMode::Once => self.run_test().await,
             RunMode::Iterations(iterations) => self.run_iterations(iterations).await,
             RunMode::IterationsUntilFailure(iterations) => {
                 self.run_iterations_until_failure(iterations).await
             }
         }
     }
 }

 /// A test environment. This wraps a test configuration with the environmental context to run it,
 /// such as the isolated ffx instance.
 pub struct TestEnv {
     test: Test,
     isolated_ffx: Arc<ffx_isolate::Isolate>,
 }

 impl TestEnv {
     /// Create a new test with the provided name. The name needs to match the associated package
     /// that will be present on the target device. The package should be callable with `ffx
     /// component run` from the command line.
     ///
     /// At this point, the test environment will also perform target discovery, selecting either a
     /// node specified by $FUCHSIA_NODENAME or whatever the first target that gets enumerated by
     /// ffx is.
     ///
     /// If either creating the isolated ffx instance or the target discovery fails, this function
     /// will panic.
     pub async fn new(package: &'static str, component: &'static str, opts: CommonOpts) -> TestEnv {
         let context = global_env_context().expect("No global context");
         let ssh_key = context.get::<String, _>("ssh.priv").expect("could not get ssh key").into();
         let isolate = Arc::new(
             ffx_isolate::Isolate::new_with_sdk("blackout-ffx", ssh_key, &context)
                 .await
                 .expect("failed to make new isolated ffx"),
         );

         TestEnv { test: Test::new_component(package, component, opts), isolated_ffx: isolate }
     }

     /// Add a test step for setting up the filesystem in the way we want it for the test. This
     /// executes the `setup` subcommand on the target binary and waits for completion, checking the
     /// result.
     pub fn setup_step(&mut self) -> &mut Self {
         self.test.add_step(Box::new(SetupStep::new(
             self.isolated_ffx.clone(),
             &self.test.package,
             &self.test.component,
             self.test.seed.clone(),
             &self.test.device_label,
             self.test.device_path.clone(),
         )));
         self
     }

     /// Add a test step for generating load on the device using the `test` subcommand on the target
     /// binary. This load doesn't terminate. After `duration`, it checks to make sure the command is
     /// still running, then return.
     pub fn load_step(&mut self, duration: Option<Duration>) -> &mut Self {
         self.test.add_step(Box::new(LoadStep::new(
             self.isolated_ffx.clone(),
             &self.test.package,
             &self.test.component,
             self.test.seed.clone(),
             &self.test.device_label,
             self.test.device_path.clone(),
             duration,
         )));
         self
     }

     /// Add a reboot step. This reboots the target machine using the configured reboot mechanism.
     pub fn reboot_step(&mut self, bootserver: bool) -> &mut Self {
         self.test.add_step(Box::new(RebootStep::new(
             self.isolated_ffx.clone(),
             &self.test.reboot_type,
             bootserver,
         )));
         self
     }

     /// Add a verify step. This runs the `verify` subcommand on the target binary, waiting for
     /// completion, and checks the result. The verification is done in a retry loop, attempting to
     /// run the verification command `num_retries` times, sleeping for `retry_timeout` duration
     /// between each attempt.
     pub fn verify_step(&mut self, num_retries: u32, retry_timeout: Duration) -> &mut Self {
         self.test.add_step(Box::new(VerifyStep::new(
             self.isolated_ffx.clone(),
             &self.test.package,
             &self.test.component,
             self.test.seed.clone(),
             &self.test.device_label,
             self.test.device_path.clone(),
             num_retries,
             retry_timeout,
         )));
         self
     }

     /// Run the provided test. The test is provided as a function that takes a clone of the command
     /// line options and produces a test to run. This
     /// There are essentially 4 possible types of test runs that we may be doing here.
     /// 1. one iteration (iterations == None && run_until_failure == false)
     ///    this is the simplest case. run one iteration of the test by constructing the test and
     ///    calling test.run(), returning the result.
     /// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
     ///    essentially the InfiniteExecutor code, except instead of an infinite loop we use a
     ///    bounded one.
     /// 3. run until verification failure (iterations == None && run_until_failure == true)
     ///    run tests until an Error::Verification is returned. keep track of the number of runs,
     ///    but there is no need to tabulate other errors.
     /// 4. run until verification failure, except with a max number of iterations
     ///    (iterations == Some(N) && run_until_failure == true)
     ///    if both flags are present, we combine the functionality. only run a certain number of
     ///    iterations, but quit early if there is a failure instead of aggregating the results.
     pub async fn run(self) -> Result<(), BlackoutError> {
         self.test.run().await
     }
 }

 #[cfg(test)]
 mod tests {
     use super::{
         BlackoutError as Error, BlackoutError, CommandError, CommonOpts, RebootType, Test, TestStep,
     };
     use async_trait::async_trait;
     use fuchsia_async as fasync;
     use std::os::unix::process::ExitStatusExt;
     use std::process::ExitStatus;
     use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
     use std::sync::Arc;

     struct FakeStep<F>
     where
         F: Fn(u64) -> Result<(), Error> + Send + Sync,
     {
         res: F,
         runs: Arc<AtomicU64>,
     }
     impl<F> FakeStep<F>
     where
         F: Fn(u64) -> Result<(), Error> + Send + Sync,
     {
         fn new(res: F) -> (FakeStep<F>, Arc<AtomicU64>) {
             let runs = Arc::new(AtomicU64::new(0));
             (FakeStep { res, runs: runs.clone() }, runs)
         }
     }
     #[async_trait]
     impl<F> TestStep for FakeStep<F>
     where
         F: Fn(u64) -> Result<(), Error> + Send + Sync,
     {
         async fn execute(&self) -> Result<(), BlackoutError> {
             self.runs.fetch_add(1, Ordering::Relaxed);
             (self.res)(self.runs.load(Ordering::Relaxed))
         }
     }

     fn fake_test(iterations: Option<u64>, run_until_failure: bool) -> Test {
         let opts = CommonOpts {
             device_label: None,
             device_path: None,
             seed: None,
             reboot: RebootType::Software,
             iterations,
             run_until_failure,
         };
         Test::new_component("fake_package", "fake_component", opts)
     }

     #[fasync::run_singlethreaded(test)]
     async fn run_once_executes_steps_once() {
         // using the run once mode, we execute all our test steps once, in order. run once is how
         // all the other run modes execute their test steps, so if this is solid then we just test
         // iterations and exit modes for the rest.

         let step1_exec1 = Arc::new(AtomicBool::new(false));
         let step1_exec2 = step1_exec1.clone();
         let step1_func = move |_| {
             assert!(!step1_exec1.load(Ordering::Relaxed), "step one already executed");
             step1_exec1.store(true, Ordering::Relaxed);
             Ok(())
         };
         let step2_func = move |_| {
             assert!(step1_exec2.load(Ordering::Relaxed), "step two executed before step one");
             Ok(())
         };
         let (step1, step1_runs) = FakeStep::new(step1_func);
         let (step2, step2_runs) = FakeStep::new(step2_func);

         let mut test = fake_test(None, false);
         test.add_step(Box::new(step1)).add_step(Box::new(step2));

         test.run().await.expect("failed to run test");

         assert_eq!(step1_runs.load(Ordering::Relaxed), 1);
         assert_eq!(step2_runs.load(Ordering::Relaxed), 1);
     }

     #[fasync::run_singlethreaded(test)]
     async fn run_once_exits_on_failure() {
         let (step1, step1_runs) = FakeStep::new(|_| Ok(()));
         let (step2, step2_runs) = FakeStep::new(|_| {
             Err(BlackoutError::Verification(CommandError(
                 ExitStatus::from_raw(1),
                 "(fake stdout)".into(),
                 "(fake stderr)".into(),
             )))
         });
         let (step3, step3_runs) = FakeStep::new(|_| panic!("step 3 should never be run"));
         let mut test = fake_test(None, false);
         test.add_step(Box::new(step1)).add_step(Box::new(step2)).add_step(Box::new(step3));
         match test.run().await {
             Err(BlackoutError::Verification(..)) => (),
             Ok(..) => panic!("test returned Ok on an error"),
             Err(..) => panic!("test returned an unexpected error"),
         }
         assert_eq!(step1_runs.load(Ordering::Relaxed), 1);
         assert_eq!(step2_runs.load(Ordering::Relaxed), 1);
         assert_eq!(step3_runs.load(Ordering::Relaxed), 0);
     }

     #[fasync::run_singlethreaded(test)]
     async fn run_n_executes_steps_n_times() {
         // using the iterations mode, we execute all our test steps `iterations` number of times. we
         // expect that no matter the error value returned from the test, we will execute the
         // expected number of times.
         let iterations = 10;
         let (step, runs) = FakeStep::new(|_| Ok(()));
         let mut test = fake_test(Some(iterations), false);
         test.add_step(Box::new(step));
         test.run().await.expect("failed to run test");
         assert_eq!(
             runs.load(Ordering::Relaxed),
             iterations,
             "step wasn't run the expected number of times"
         );
     }

     #[fasync::run_singlethreaded(test)]
     async fn run_n_executes_steps_n_times_verify_failure() {
         let iterations = 10;
         let (step, runs) = FakeStep::new(|_| {
             Err(BlackoutError::Verification(CommandError(
                 ExitStatus::from_raw(1),
                 "(fake stdout)".into(),
                 "(fake stderr)".into(),
             )))
         });
         let mut test = fake_test(Some(iterations), false);
         test.add_step(Box::new(step));
         test.run().await.expect("failed to run test");
         assert_eq!(
             runs.load(Ordering::Relaxed),
             iterations,
             "step wasn't run the expected number of times"
         );
     }

     #[fasync::run_singlethreaded(test)]
     async fn run_n_executes_steps_n_times_other_error() {
         let iterations = 10;
         let (step, runs) = FakeStep::new(|_| {
             Err(BlackoutError::FfxError(CommandError(
                 ExitStatus::from_raw(1),
                 "(fake stdout)".into(),
                 "(fake stderr)".into(),
             )))
         });
         let mut test = fake_test(Some(iterations), false);
         test.add_step(Box::new(step));
         test.run().await.expect("failed to run test");
         assert_eq!(
             runs.load(Ordering::Relaxed),
             iterations,
             "step wasn't run the expected number of times"
         );
     }
 }
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	//! library for host-side of filesystem integrity host-target interaction tests.

	#![deny(missing_docs)]

	use rand::random;
	use std::fmt;
	use std::path::PathBuf;
	use std::process::{ExitStatus, Output};
	use std::sync::atomic::{AtomicU64, Ordering};
	use std::sync::Arc;
	use std::time::Duration;
	use thiserror::Error;

	pub mod steps;
	use ffx_config::global_env_context;
	pub use steps::RebootType;
	use steps::{LoadStep, RebootStep, SetupStep, TestStep, VerifyStep};

	pub mod integration;

	const BLACKOUT_DEVICE_LABEL: &'static str = "blackout";

	fn box_message(message: String) -> String {
	let line_len = message.len() + 2;
	let line: String = std::iter::repeat('━').take(line_len).collect();
	format!(
	"┏{line}┓
	┃ {message} ┃
	┗{line}┛",
	message = message,
	line = line
	)
	}

	/// An error occurred running a command on the target system. Contains the exit status, stdout, and
	/// stderr of the command.
	#[derive(Debug, Error)]
	#[error(
	"failed to run command: {}\n\
	stdout:\n\
	{}\n\
	stderr:\n\
	{}",
	_0,
	_1,
	_2
	)]
	pub struct CommandError(ExitStatus, String, String);

	impl From<Output> for CommandError {
	/// Convert the std::process::Output of a command to an error. Mostly takes care of converting
	/// the stdout and stderr into strings from Vec<u8>.
	fn from(out: Output) -> Self {
	let stdout = String::from_utf8(out.stdout).expect("stdout not utf8");
	let stderr = String::from_utf8(out.stderr).expect("stderr not utf8");
	CommandError(out.status, stdout, stderr)
	}
	}

	impl From<ffx_isolate::CommandOutput> for CommandError {
	fn from(out: ffx_isolate::CommandOutput) -> Self {
	CommandError(out.status, out.stdout, out.stderr)
	}
	}

	/// An error occurred while attempting to reboot the system.
	#[derive(Debug, Error)]
	pub enum RebootError {
	/// The path to the relay device required for hard-rebooting the target doesn't exist.
	#[error("device does not exist: {:?}", _0)]
	MissingDevice(PathBuf),

	/// An io error occurred during rebooting. Maybe we failed to write to the device.
	#[error("io error: {:?}", _0)]
	IoError(#[from] std::io::Error),

	/// The command we executed on the target failed.
	#[error("command error: {:?}", _0)]
	Command(#[from] CommandError),
	}

	/// Error used for the host-side of the blackout library.
	#[derive(Debug, Error)]
	pub enum BlackoutError {
	/// Something went wrong!
	#[error("error: {}", _0)]
	AnyhowError(#[from] anyhow::Error),

	/// We got an error when trying to reboot.
	#[error("failed to reboot: {:?}", _0)]
	Reboot(#[from] RebootError),

	/// We failed to run the command on the host. Specifically, when the spawn or something fails,
	/// not when the command itself returns a non-zero exit code.
	#[error("host command failed: {:?}", _0)]
	HostCommand(#[from] std::io::Error),

	/// Timed out during target discovery.
	#[error("no targets found after 5s: {:?}", _0)]
	TargetDiscoveryTimeout(CommandError),

	/// We got an error from the ffx command.
	#[error("failed to run an ffx command: {:?}", _0)]
	FfxError(CommandError),

	/// A failure in the setup step
	#[error("failed to setup test: {:?}", _0)]
	SetupError(CommandError),

	/// Specifically the verification step failed. This indicates an actual test failure as opposed
	/// to a failure of the test framework or environmental failure.
	#[error("verification failed: {:?}", _0)]
	Verification(CommandError),
	}

	/// Blackout is a power-failure testing framework for the filesystems. This host-side harness runs
	/// operations on the configured target device for generating load on the filesystem, then reboots
	/// the device after a certain amount of time using a configured reboot mechanism. By default, it
	/// runs one iteration of this test. Options are provided for running the test until failure or
	/// running the test N times and collecting failure statistics.
	#[derive(Clone)]
	pub struct CommonOpts {
	/// The optional label for the partition to run the test on. If non is provided, a default will
	/// be used.
	pub device_label: Option<String>,
	/// The optional path to the block device on the target device to use for testing. If none is
	/// provided, the test will find an appropriate device. WARNING: the test can (and likely
	/// will!) format this device. Don't use a main system partition!
	pub device_path: Option<String>,
	/// [Optional] A seed to use for all random operations. Tests are NOT deterministic relative to
	/// the provided seed. The operations will be identical, but because of the non-deterministic
	/// timing-dependent nature of the tests, the exact time the reboot is triggered in relation to
	/// the operations is not guaranteed.
	///
	/// One will be randomly generated if not provided. When performing the same test multiple times
	/// in one run, a new seed will be generated for each run if one was not provided.
	pub seed: Option<u64>,
	/// Reboot type. There are three options
	/// 1. Soft reboot - we reboot the system using ffx target reboot
	/// 2. Hard reboot with a serial power relay - we reboot the system by writing bytes to a
	/// serial device that we assume is a power relay. Includes a path to the power relay. Probably
	/// the highest-numbered /dev/ttyUSB[N]. If in doubt, try removing it and seeing what
	/// disappears from /dev.
	/// 3. Hard reboot with the infra dmc command - we reboot the system by calling the dmc binary
	/// provided by infra. This command cycles the power for us using some kind of http accessible
	/// power strip, but the details are abstracted behind the set-power-state command.
	pub reboot: RebootType,
	/// Run the test N number of times, collecting statistics on the number of failures.
	pub iterations: Option<u64>,
	/// Run the test until a verification failure is detected, then exit.
	pub run_until_failure: bool,
	}

	/// the seed for a run of the test.
	#[derive(Clone, Debug)]
	pub enum Seed {
	/// the seed is constant over multiple runs of the test.
	Constant(u64),
	/// the seed is a random value for every run of the test, generated by `random`.
	Variable(Arc<AtomicU64>),
	}

	impl Seed {
	fn new(maybe_seed: Option<u64>) -> Seed {
	match maybe_seed {
	Some(seed) => Seed::Constant(seed),
	None => Seed::Variable(Arc::new(AtomicU64::new(random()))),
	}
	}

	fn reroll(&self) {
	match self {
	Seed::Constant(_) => (),
	Seed::Variable(seed) => seed.store(random(), Ordering::Relaxed),
	}
	}

	fn get(&self) -> u64 {
	match self {
	Seed::Constant(seed) => *seed,
	Seed::Variable(seed) => seed.load(Ordering::Relaxed),
	}
	}
	}

	impl fmt::Display for Seed {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	match self {
	Seed::Constant(seed) => write!(f, "{}", seed),
	Seed::Variable(seed) => {
	write!(f, "{}", seed.load(Ordering::Relaxed))
	}
	}
	}
	}

	#[derive(Clone, Debug)]
	enum RunMode {
	Once,
	Iterations(u64),
	IterationsUntilFailure(u64),
	}

	/// Test definition. This contains all the information to make a test reproducible in a particular
	/// environment, and allows host binaries to configure the steps taken by the test.
	struct Test {
	package: String,
	component: String,
	seed: Seed,
	device_label: String,
	device_path: Option<String>,
	reboot_type: RebootType,
	run_mode: RunMode,
	steps: Vec<Box<dyn TestStep>>,
	}

	impl fmt::Display for Test {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	write!(
	f,
	"Test {{
	package: {:?},
	component: {:?},
	seed: {:?},
	device_label: {:?},
	device_path: {:?},
	reboot_type: {:?},
	run_mode: {:?},
	}}",
	self.package,
	self.component,
	self.seed,
	self.device_label,
	self.device_path,
	self.reboot_type,
	self.run_mode,
	)
	}
	}

	impl Test {
	/// Create a new test with the provided name. The name needs to match the associated package
	/// that will be present on the target device. The package should be callable with `run` from
	/// the command line.
	pub fn new_component(package: &'static str, component: &'static str, opts: CommonOpts) -> Test {
	Test {
	package: package.to_string(),
	component: component.to_string(),
	seed: Seed::new(opts.seed),
	device_label: opts.device_label.unwrap_or(BLACKOUT_DEVICE_LABEL.to_string()),
	device_path: opts.device_path,
	reboot_type: opts.reboot,
	run_mode: match (opts.iterations, opts.run_until_failure) {
	(None, false) => RunMode::Once,
	(None, true) => panic!("run until failure requires multiple iterations"),
	(Some(iterations), false) => RunMode::Iterations(iterations),
	(Some(iterations), true) => RunMode::IterationsUntilFailure(iterations),
	},
	steps: Vec::new(),
	}
	}

	/// Add a custom test step implementation.
	pub fn add_step(&mut self, step: Box<dyn TestStep>) -> &mut Self {
	self.steps.push(step);
	self
	}

	/// Run the defined test steps. Prints the test definition before execution. Attempts to re-roll
	/// the random seed before the test run.
	async fn run_test(&self) -> Result<(), BlackoutError> {
	self.seed.reroll();

	println!("{}", self);

	for step in &self.steps {
	step.execute().await?;
	}

	Ok(())
	}

	async fn run_iterations(&self, iterations: u64) -> Result<(), BlackoutError> {
	let mut failures = 0u64;
	let mut flukes = 0u64;

	for runs in 1..iterations + 1 {
	println!("{}", box_message(format!("test run #{}", runs)));

	match self.run_test().await {
	Ok(()) => (),
	Err(BlackoutError::Verification(_)) => failures += 1,
	Err(_) => flukes += 1,
	}

	println!("runs: {}", runs);
	println!("failures: {}", failures);
	println!("non-verification errors: {}", flukes);
	println!(
	"failure percentage: {:.2}%",
	(failures as f64 / (runs - flukes) as f64) * 100.0
	);
	}

	Ok(())
	}

	async fn run_iterations_until_failure(&self, iterations: u64) -> Result<(), BlackoutError> {
	for runs in 1..iterations + 1 {
	println!("{}", box_message(format!("test run #{}", runs)));
	match self.run_test().await {
	Ok(()) => (),
	Err(e @ BlackoutError::Verification(_)) => return Err(e),
	Err(_) => (),
	}
	}
	Ok(())
	}

	/// Run the provided test. The test is provided as a function that takes a clone of the command
	/// line options and produces a test to run. This
	/// There are essentially 4 possible types of test runs that we may be doing here.
	/// 1. one iteration (iterations == None && run_until_failure == false)
	/// this is the simplist case. run one iteration of the test by constructing the test and
	/// calling test.run(), returning the result.
	/// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
	/// essentially the InfiniteExecutor code, except instead of an infinite loop we use a
	/// bounded one.
	/// 3. run until verification failure (iterations == None && run_until_failure == true)
	/// run tests until an Error::Verification is returned. keep track of the number of runs,
	/// but there is no need to tabulate other errors.
	/// 4. run until verification failure, except with a max number of iterations
	/// (iterations == Some(N) && run_until_failure == true)
	/// if both flags are present, we combine the functionality. only run a certain number of
	/// iterations, but quit early if there is a failure instead of aggregating the results.
	pub async fn run(self) -> Result<(), BlackoutError> {
	match self.run_mode {
	RunMode::Once => self.run_test().await,
	RunMode::Iterations(iterations) => self.run_iterations(iterations).await,
	RunMode::IterationsUntilFailure(iterations) => {
	self.run_iterations_until_failure(iterations).await
	}
	}
	}
	}

	/// A test environment. This wraps a test configuration with the environmental context to run it,
	/// such as the isolated ffx instance.
	pub struct TestEnv {
	test: Test,
	isolated_ffx: Arc<ffx_isolate::Isolate>,
	}

	impl TestEnv {
	/// Create a new test with the provided name. The name needs to match the associated package
	/// that will be present on the target device. The package should be callable with `ffx
	/// component run` from the command line.
	///
	/// At this point, the test environment will also perform target discovery, selecting either a
	/// node specified by $FUCHSIA_NODENAME or whatever the first target that gets enumerated by
	/// ffx is.
	///
	/// If either creating the isolated ffx instance or the target discovery fails, this function
	/// will panic.
	pub async fn new(package: &'static str, component: &'static str, opts: CommonOpts) -> TestEnv {
	let context = global_env_context().expect("No global context");
	let ssh_key = context.get::<String, _>("ssh.priv").expect("could not get ssh key").into();
	let isolate = Arc::new(
	ffx_isolate::Isolate::new_with_sdk("blackout-ffx", ssh_key, &context)
	.await
	.expect("failed to make new isolated ffx"),
	);

	TestEnv { test: Test::new_component(package, component, opts), isolated_ffx: isolate }
	}

	/// Add a test step for setting up the filesystem in the way we want it for the test. This
	/// executes the `setup` subcommand on the target binary and waits for completion, checking the
	/// result.
	pub fn setup_step(&mut self) -> &mut Self {
	self.test.add_step(Box::new(SetupStep::new(
	self.isolated_ffx.clone(),
	&self.test.package,
	&self.test.component,
	self.test.seed.clone(),
	&self.test.device_label,
	self.test.device_path.clone(),
	)));
	self
	}

	/// Add a test step for generating load on the device using the `test` subcommand on the target
	/// binary. This load doesn't terminate. After `duration`, it checks to make sure the command is
	/// still running, then return.
	pub fn load_step(&mut self, duration: Option<Duration>) -> &mut Self {
	self.test.add_step(Box::new(LoadStep::new(
	self.isolated_ffx.clone(),
	&self.test.package,
	&self.test.component,
	self.test.seed.clone(),
	&self.test.device_label,
	self.test.device_path.clone(),
	duration,
	)));
	self
	}

	/// Add a reboot step. This reboots the target machine using the configured reboot mechanism.
	pub fn reboot_step(&mut self, bootserver: bool) -> &mut Self {
	self.test.add_step(Box::new(RebootStep::new(
	self.isolated_ffx.clone(),
	&self.test.reboot_type,
	bootserver,
	)));
	self
	}

	/// Add a verify step. This runs the `verify` subcommand on the target binary, waiting for
	/// completion, and checks the result. The verification is done in a retry loop, attempting to
	/// run the verification command `num_retries` times, sleeping for `retry_timeout` duration
	/// between each attempt.
	pub fn verify_step(&mut self, num_retries: u32, retry_timeout: Duration) -> &mut Self {
	self.test.add_step(Box::new(VerifyStep::new(
	self.isolated_ffx.clone(),
	&self.test.package,
	&self.test.component,
	self.test.seed.clone(),
	&self.test.device_label,
	self.test.device_path.clone(),
	num_retries,
	retry_timeout,
	)));
	self
	}

	/// Run the provided test. The test is provided as a function that takes a clone of the command
	/// line options and produces a test to run. This
	/// There are essentially 4 possible types of test runs that we may be doing here.
	/// 1. one iteration (iterations == None && run_until_failure == false)
	/// this is the simplest case. run one iteration of the test by constructing the test and
	/// calling test.run(), returning the result.
	/// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
	/// essentially the InfiniteExecutor code, except instead of an infinite loop we use a
	/// bounded one.
	/// 3. run until verification failure (iterations == None && run_until_failure == true)
	/// run tests until an Error::Verification is returned. keep track of the number of runs,
	/// but there is no need to tabulate other errors.
	/// 4. run until verification failure, except with a max number of iterations
	/// (iterations == Some(N) && run_until_failure == true)
	/// if both flags are present, we combine the functionality. only run a certain number of
	/// iterations, but quit early if there is a failure instead of aggregating the results.
	pub async fn run(self) -> Result<(), BlackoutError> {
	self.test.run().await
	}
	}

	#[cfg(test)]
	mod tests {
	use super::{
	BlackoutError as Error, BlackoutError, CommandError, CommonOpts, RebootType, Test, TestStep,
	};
	use async_trait::async_trait;
	use fuchsia_async as fasync;
	use std::os::unix::process::ExitStatusExt;
	use std::process::ExitStatus;
	use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
	use std::sync::Arc;

	struct FakeStep<F>
	where
	F: Fn(u64) -> Result<(), Error> + Send + Sync,
	{
	res: F,
	runs: Arc<AtomicU64>,
	}
	impl<F> FakeStep<F>
	where
	F: Fn(u64) -> Result<(), Error> + Send + Sync,
	{
	fn new(res: F) -> (FakeStep<F>, Arc<AtomicU64>) {
	let runs = Arc::new(AtomicU64::new(0));
	(FakeStep { res, runs: runs.clone() }, runs)
	}
	}
	#[async_trait]
	impl<F> TestStep for FakeStep<F>
	where
	F: Fn(u64) -> Result<(), Error> + Send + Sync,
	{
	async fn execute(&self) -> Result<(), BlackoutError> {
	self.runs.fetch_add(1, Ordering::Relaxed);
	(self.res)(self.runs.load(Ordering::Relaxed))
	}
	}

	fn fake_test(iterations: Option<u64>, run_until_failure: bool) -> Test {
	let opts = CommonOpts {
	device_label: None,
	device_path: None,
	seed: None,
	reboot: RebootType::Software,
	iterations,
	run_until_failure,
	};
	Test::new_component("fake_package", "fake_component", opts)
	}

	#[fasync::run_singlethreaded(test)]
	async fn run_once_executes_steps_once() {
	// using the run once mode, we execute all our test steps once, in order. run once is how
	// all the other run modes execute their test steps, so if this is solid then we just test
	// iterations and exit modes for the rest.

	let step1_exec1 = Arc::new(AtomicBool::new(false));
	let step1_exec2 = step1_exec1.clone();
	let step1_func = move \|_\| {
	assert!(!step1_exec1.load(Ordering::Relaxed), "step one already executed");
	step1_exec1.store(true, Ordering::Relaxed);
	Ok(())
	};
	let step2_func = move \|_\| {
	assert!(step1_exec2.load(Ordering::Relaxed), "step two executed before step one");
	Ok(())
	};
	let (step1, step1_runs) = FakeStep::new(step1_func);
	let (step2, step2_runs) = FakeStep::new(step2_func);

	let mut test = fake_test(None, false);
	test.add_step(Box::new(step1)).add_step(Box::new(step2));

	test.run().await.expect("failed to run test");

	assert_eq!(step1_runs.load(Ordering::Relaxed), 1);
	assert_eq!(step2_runs.load(Ordering::Relaxed), 1);
	}

	#[fasync::run_singlethreaded(test)]
	async fn run_once_exits_on_failure() {
	let (step1, step1_runs) = FakeStep::new(\|_\| Ok(()));
	let (step2, step2_runs) = FakeStep::new(\|_\| {
	Err(BlackoutError::Verification(CommandError(
	ExitStatus::from_raw(1),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	)))
	});
	let (step3, step3_runs) = FakeStep::new(\|_\| panic!("step 3 should never be run"));
	let mut test = fake_test(None, false);
	test.add_step(Box::new(step1)).add_step(Box::new(step2)).add_step(Box::new(step3));
	match test.run().await {
	Err(BlackoutError::Verification(..)) => (),
	Ok(..) => panic!("test returned Ok on an error"),
	Err(..) => panic!("test returned an unexpected error"),
	}
	assert_eq!(step1_runs.load(Ordering::Relaxed), 1);
	assert_eq!(step2_runs.load(Ordering::Relaxed), 1);
	assert_eq!(step3_runs.load(Ordering::Relaxed), 0);
	}

	#[fasync::run_singlethreaded(test)]
	async fn run_n_executes_steps_n_times() {
	// using the iterations mode, we execute all our test steps `iterations` number of times. we
	// expect that no matter the error value returned from the test, we will execute the
	// expected number of times.
	let iterations = 10;
	let (step, runs) = FakeStep::new(\|_\| Ok(()));
	let mut test = fake_test(Some(iterations), false);
	test.add_step(Box::new(step));
	test.run().await.expect("failed to run test");
	assert_eq!(
	runs.load(Ordering::Relaxed),
	iterations,
	"step wasn't run the expected number of times"
	);
	}

	#[fasync::run_singlethreaded(test)]
	async fn run_n_executes_steps_n_times_verify_failure() {
	let iterations = 10;
	let (step, runs) = FakeStep::new(\|_\| {
	Err(BlackoutError::Verification(CommandError(
	ExitStatus::from_raw(1),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	)))
	});
	let mut test = fake_test(Some(iterations), false);
	test.add_step(Box::new(step));
	test.run().await.expect("failed to run test");
	assert_eq!(
	runs.load(Ordering::Relaxed),
	iterations,
	"step wasn't run the expected number of times"
	);
	}

	#[fasync::run_singlethreaded(test)]
	async fn run_n_executes_steps_n_times_other_error() {
	let iterations = 10;
	let (step, runs) = FakeStep::new(\|_\| {
	Err(BlackoutError::FfxError(CommandError(
	ExitStatus::from_raw(1),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	)))
	});
	let mut test = fake_test(Some(iterations), false);
	test.add_step(Box::new(step));
	test.run().await.expect("failed to run test");
	assert_eq!(
	runs.load(Ordering::Relaxed),
	iterations,
	"step wasn't run the expected number of times"
	);
	}
	}