tools/blackout/blackout-host/src/steps.rs - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 //! test steps

 use {
     crate::{BlackoutError, RebootError, Seed, SshAuth},
     std::{
         fs::OpenOptions,
         io::Write,
         path::{Path, PathBuf},
         process::{Child, Command, Output, Stdio},
         thread::sleep,
         time::Duration,
     },
 };

 #[rustfmt::skip]
 /// ssh options for connecting to a target machine.
 static SSH_OPTIONS: &'static [&str] = &[
     // use the fuchsia user
     "-o", "User=fuchsia",
     // don't bother with any of the known hosts stuff
     "-o", "UserKnownHostsFile=/dev/null",
     "-o", "StrictHostKeyChecking=no",
     "-o", "CheckHostIP=no",
     // don't forward anything
     "-o", "ForwardAgent=no",
     "-o", "ForwardX11=no",
     // allow the ssh connection to be reused
     "-o", "ControlPersist=yes",
     "-o", "ControlMaster=auto",
     "-o", "ControlPath=/tmp/fuchsia--%r@%h:%p",
     // the next three control how long ssh will wait in different situations before giving up.
     // wait 100 seconds for the server to be routable. overrides the system default tcp timeout.
     "-o", "ConnectTimeout=100",
     // send a ping to the server every 10 seconds if no data has been recieved from the server
     "-o", "ServerAliveInterval=10",
     // send 6 of those pings before giving up. this is equivalent to a timeout of about 60 seconds
     // before giving up the connection, when the server is routable but we aren't getting responses
     // from the server yet.
     "-o", "ServerAliveCountMax=6",
 ];

 fn ssh(target: &str, auth: &SshAuth) -> Command {
     let mut command = Command::new("ssh");
     command.args(SSH_OPTIONS);
     command.args(auth.args());
     command.arg(target);
     command
 }

 /// Type of reboot to perform.
 #[derive(Clone, Debug)]
 pub enum RebootType {
     /// Perform a software reboot using dm reboot on the target device over ssh.
     Software,
     /// Perform a hardware reboot by writing a byte to a relay device. THIS OPTION IS LIKELY TO
     /// CHANGE IN THE FUTURE.
     Hardware(PathBuf),
 }

 // TODO(sdemos): the final implementation will also have to handle a CI environment where hard
 // rebooting is done by calling a script that will be in our environment.
 fn hard_reboot(dev: impl AsRef<Path>) -> Result<(), RebootError> {
     if !dev.as_ref().exists() {
         return Err(RebootError::MissingDevice(dev.as_ref().to_path_buf()));
     }
     let mut relay = OpenOptions::new().read(false).write(true).create(false).open(dev)?;
     relay.write_all(&[0x01])?;
     sleep(Duration::from_millis(100));
     relay.write_all(&[0x02])?;
     Ok(())
 }

 /// Reboot the target system using `dm reboot`.
 fn soft_reboot(target: &str, auth: &SshAuth) -> Result<(), RebootError> {
     // ignore the return value because it's garbage
     let _ = ssh(target, auth).arg("dm").arg("reboot").status()?;
     Ok(())
 }

 trait Runner {
     fn run_spawn(&self, subc: &str) -> Result<Child, BlackoutError>;
     fn run_output(&self, subc: &str) -> Result<Output, BlackoutError>;
     fn run(&self, subc: &str) -> Result<(), BlackoutError>;
 }

 /// run a target binary on a target device over ssh.
 struct CmdRunner {
     target: String,
     auth: SshAuth,
     bin: String,
     seed: Seed,
     block_device: String,
 }

 impl CmdRunner {
     fn new(
         target: &str,
         auth: &SshAuth,
         bin: &str,
         seed: Seed,
         block_device: &str,
     ) -> Box<dyn Runner> {
         Box::new(CmdRunner {
             target: target.into(),
             auth: auth.clone(),
             bin: bin.into(),
             seed,
             block_device: block_device.into(),
         })
     }

     fn run_bin(&self) -> Command {
         let mut command = ssh(&self.target, &self.auth);
         command.arg(&self.bin).arg(self.seed.to_string()).arg(&self.block_device);
         command
     }
 }

 impl Runner for CmdRunner {
     /// Run a subcommand of the originally provided binary on the target. The command is spawned as a
     /// separate process, and a reference to the child process is returned. stdout and stderr are
     /// piped (see [`std::process::Stdio::piped()`] for details).
     fn run_spawn(&self, subc: &str) -> Result<Child, BlackoutError> {
         let child =
             self.run_bin().arg(subc).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn()?;

         Ok(child)
     }

     /// Run a subcommand to completion and collect the output from the process.
     fn run_output(&self, subc: &str) -> Result<Output, BlackoutError> {
         let out = self.run_bin().arg(subc).output()?;
         if out.status.success() {
             Ok(out)
         } else if out.status.code().unwrap() == 255 {
             Err(BlackoutError::Ssh(self.target.clone(), out.into()))
         } else {
             Err(BlackoutError::TargetCommand(out.into()))
         }
     }

     fn run(&self, subc: &str) -> Result<(), BlackoutError> {
         self.run_output(subc).map(|_| ())
     }
 }

 /// A step for a test to take. These steps can be added to the test runner in the root of the host
 /// library.
 pub trait TestStep {
     /// Execute this test step.
     fn execute(&self) -> Result<(), BlackoutError>;
 }

 /// A test step for setting up the filesystem in the way we want it for the test. This executes the
 /// `setup` subcommand on the target binary and waits for completion, checking the result.
 pub struct SetupStep {
     runner: Box<dyn Runner>,
 }

 impl SetupStep {
     /// Create a new operation step.
     pub(crate) fn new(
         target: &str,
         auth: &SshAuth,
         bin: &str,
         seed: Seed,
         block_device: &str,
     ) -> Self {
         Self { runner: CmdRunner::new(target, auth, bin, seed, block_device) }
     }
 }

 impl TestStep for SetupStep {
     fn execute(&self) -> Result<(), BlackoutError> {
         println!("setting up test...");
         self.runner.run("setup")
     }
 }

 /// A test step for generating load on a filesystem. This executes the `test` subcommand on the
 /// target binary and then checks to make sure it didn't exit after `duration`.
 pub struct LoadStep {
     runner: Box<dyn Runner>,
     duration: Duration,
 }

 impl LoadStep {
     /// Create a new test step.
     pub(crate) fn new(
         target: &str,
         auth: &SshAuth,
         bin: &str,
         seed: Seed,
         block_device: &str,
         duration: Duration,
     ) -> Self {
         Self { runner: CmdRunner::new(target, auth, bin, seed, block_device), duration }
     }
 }

 impl TestStep for LoadStep {
     fn execute(&self) -> Result<(), BlackoutError> {
         println!("generating filesystem load...");
         let mut child = self.runner.run_spawn("test")?;

         sleep(self.duration);

         // make sure child process is still running
         if let Some(_) = child.try_wait()? {
             let out = child.wait_with_output()?;
             return Err(BlackoutError::TargetCommand(out.into()));
         }

         Ok(())
     }
 }

 /// A test step for running an operation to completion. This executes the `test` subcommand and waits
 /// for completion, checking the result.
 pub struct OperationStep {
     runner: Box<dyn Runner>,
 }

 impl OperationStep {
     /// Create a new operation step.
     pub(crate) fn new(
         target: &str,
         auth: &SshAuth,
         bin: &str,
         seed: Seed,
         block_device: &str,
     ) -> Self {
         Self { runner: CmdRunner::new(target, auth, bin, seed, block_device) }
     }
 }

 impl TestStep for OperationStep {
     fn execute(&self) -> Result<(), BlackoutError> {
         println!("running filesystem operation...");
         self.runner.run("test")
     }
 }

 /// A test step for rebooting the target machine. This uses the configured reboot mechanism.
 pub struct RebootStep {
     target: String,
     auth: SshAuth,
     reboot_type: RebootType,
 }

 impl RebootStep {
     /// Create a new reboot step.
     pub(crate) fn new(target: &str, auth: &SshAuth, reboot_type: &RebootType) -> Self {
         Self { target: target.into(), auth: auth.clone(), reboot_type: reboot_type.clone() }
     }
 }

 impl TestStep for RebootStep {
     fn execute(&self) -> Result<(), BlackoutError> {
         println!("rebooting device...");
         match &self.reboot_type {
             RebootType::Software => soft_reboot(&self.target, &self.auth)?,
             RebootType::Hardware(relay) => hard_reboot(&relay)?,
         }
         Ok(())
     }
 }

 /// A test step for verifying the machine. This executes the `verify` subcommand on the target binary
 /// and waits for completion, checking the result.
 pub struct VerifyStep {
     runner: Box<dyn Runner>,
     num_retries: u32,
     retry_timeout: Duration,
 }

 impl VerifyStep {
     /// Create a new verify step. Verification is done in a retry loop, attempting to run the
     /// verification command `num_retries` times and sleeping for `retry_timeout` duration in between
     /// each attempt.
     pub(crate) fn new(
         target: &str,
         auth: &SshAuth,
         bin: &str,
         seed: Seed,
         block_device: &str,
         num_retries: u32,
         retry_timeout: Duration,
     ) -> Self {
         Self {
             runner: CmdRunner::new(target, auth, bin, seed, block_device),
             num_retries,
             retry_timeout,
         }
     }
 }

 impl TestStep for VerifyStep {
     fn execute(&self) -> Result<(), BlackoutError> {
         let mut last_ssh_error = Ok(());
         let start_time = std::time::Instant::now();
         for i in 1..self.num_retries + 1 {
             println!("verifying device...(attempt #{})", i);
             match self.runner.run("verify") {
                 Ok(()) => {
                     println!("verification successful.");
                     return Ok(());
                 }
                 Err(ssh_error @ BlackoutError::Ssh(..)) => {
                     // always print out the ssh error so it doesn't get buried to help with debugging.
                     println!("{}", ssh_error);
                     last_ssh_error = Err(ssh_error);
                     sleep(self.retry_timeout);
                 }
                 // during the verification stage, we expect that any time the target command fails,
                 // it's a verification failure.
                 Err(BlackoutError::TargetCommand(e)) => return Err(BlackoutError::Verification(e)),
                 Err(e) => return Err(e),
             }
         }
         let elapsed = std::time::Instant::now().duration_since(start_time);
         println!("stopping verification attempt after {}s", elapsed.as_secs());
         // we failed to ssh into the device too many times in a row. something's wrong.
         last_ssh_error
     }
 }

 #[cfg(test)]
 mod tests {
     use super::{OperationStep, Runner, SetupStep, TestStep, VerifyStep};
     use crate::{BlackoutError, CommandError};
     use std::{
         cell::Cell,
         os::unix::process::ExitStatusExt,
         process::{Child, ExitStatus, Output},
         rc::Rc,
         time::Duration,
     };

     struct FakeRunner<F>
     where
         F: Fn() -> Result<(), BlackoutError>,
     {
         command: &'static str,
         res: F,
     }
     impl<F> FakeRunner<F>
     where
         F: Fn() -> Result<(), BlackoutError>,
     {
         pub fn new(command: &'static str, res: F) -> FakeRunner<F> {
             FakeRunner { command, res }
         }
     }
     impl<F> Runner for FakeRunner<F>
     where
         F: Fn() -> Result<(), BlackoutError>,
     {
         fn run_spawn(&self, _subc: &str) -> Result<Child, BlackoutError> {
             unimplemented!()
         }
         fn run_output(&self, _subc: &str) -> Result<Output, BlackoutError> {
             unimplemented!()
         }
         fn run(&self, subc: &str) -> Result<(), BlackoutError> {
             assert_eq!(subc, self.command);
             (self.res)()
         }
     }

     #[test]
     fn setup_success() {
         let step = SetupStep { runner: Box::new(FakeRunner::new("setup", || Ok(()))) };
         match step.execute() {
             Ok(()) => (),
             _ => panic!("setup step returned an error on a successful run"),
         }
     }

     #[test]
     fn setup_error() {
         let error = || {
             Err(BlackoutError::TargetCommand(CommandError(
                 ExitStatus::from_raw(1),
                 "(fake stdout)".into(),
                 "(fake stderr)".into(),
             )))
         };
         let step = SetupStep { runner: Box::new(FakeRunner::new("setup", error)) };
         match step.execute() {
             Err(BlackoutError::TargetCommand(_)) => (),
             Ok(()) => panic!("setup step returned success when runner failed"),
             _ => panic!("setup step returned an unexpected error"),
         }
     }

     #[test]
     fn operation_success() {
         let step = OperationStep { runner: Box::new(FakeRunner::new("test", || Ok(()))) };
         match step.execute() {
             Ok(()) => (),
             _ => panic!("operation step returned an error on a successful run"),
         }
     }

     #[test]
     fn operation_error() {
         let error = || {
             Err(BlackoutError::TargetCommand(CommandError(
                 ExitStatus::from_raw(1),
                 "(fake stdout)".into(),
                 "(fake stderr)".into(),
             )))
         };
         let step = OperationStep { runner: Box::new(FakeRunner::new("test", error)) };
         match step.execute() {
             Err(BlackoutError::TargetCommand(_)) => (),
             Ok(()) => panic!("operation step returned success when runner failed"),
             _ => panic!("operation step returned an unexpected error"),
         }
     }

     #[test]
     fn verify_success() {
         let step = VerifyStep {
             runner: Box::new(FakeRunner::new("verify", || Ok(()))),
             num_retries: 10,
             retry_timeout: Duration::from_secs(0),
         };
         match step.execute() {
             Ok(()) => (),
             _ => panic!("verify step returned an error on a successful run"),
         }
     }

     #[test]
     fn verify_target_command_error() {
         let error = || {
             Err(BlackoutError::TargetCommand(CommandError(
                 ExitStatus::from_raw(1),
                 "(fake stdout)".into(),
                 "(fake stderr)".into(),
             )))
         };
         let step = VerifyStep {
             runner: Box::new(FakeRunner::new("verify", error)),
             num_retries: 10,
             retry_timeout: Duration::from_secs(0),
         };
         match step.execute() {
             // verify step is expected to tranform target command errors into verification errors.
             Err(BlackoutError::Verification(_)) => (),
             Err(BlackoutError::TargetCommand(_)) => {
                 panic!("verify step returned target command error instead of verification error")
             }
             Ok(()) => panic!("verify step returned success when runner failed"),
             _ => panic!("verify step returned an unexpected error"),
         }
     }

     #[test]
     fn verify_ssh_error_retry_loop_timeout() {
         let outer_attempts = Rc::new(Cell::new(0));
         let attempts = outer_attempts.clone();
         let error = move || {
             attempts.set(attempts.get() + 1);
             Err(BlackoutError::Ssh(
                 "fake target".into(),
                 CommandError(
                     ExitStatus::from_raw(255),
                     "(fake stdout)".into(),
                     "(fake stderr)".into(),
                 ),
             ))
         };
         let step = VerifyStep {
             runner: Box::new(FakeRunner::new("verify", error)),
             num_retries: 10,
             retry_timeout: Duration::from_secs(0),
         };
         match step.execute() {
             Err(BlackoutError::Ssh(..)) => (),
             Ok(()) => panic!("verify step returned success when runner failed"),
             _ => panic!("verify step returned an unexpected error"),
         }
         assert_eq!(outer_attempts.get(), 10);
     }

     #[test]
     fn verify_ssh_error_retry_loop_success() {
         let outer_attempts = Rc::new(Cell::new(0));
         let attempts = outer_attempts.clone();
         let error = move || {
             attempts.set(attempts.get() + 1);
             if attempts.get() <= 5 {
                 Err(BlackoutError::Ssh(
                     "fake target".into(),
                     CommandError(
                         ExitStatus::from_raw(255),
                         "(fake stdout)".into(),
                         "(fake stderr)".into(),
                     ),
                 ))
             } else {
                 Ok(())
             }
         };
         let step = VerifyStep {
             runner: Box::new(FakeRunner::new("verify", error)),
             num_retries: 10,
             retry_timeout: Duration::from_secs(0),
         };
         match step.execute() {
             Ok(()) => (),
             Err(BlackoutError::Ssh(..)) => {
                 panic!("verify step returned error when runner succeeded")
             }
             _ => panic!("verify step returned an unexpected error"),
         }
         assert_eq!(outer_attempts.get(), 6);
     }
 }
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	//! test steps

	use {
	crate::{BlackoutError, RebootError, Seed, SshAuth},
	std::{
	fs::OpenOptions,
	io::Write,
	path::{Path, PathBuf},
	process::{Child, Command, Output, Stdio},
	thread::sleep,
	time::Duration,
	},
	};

	#[rustfmt::skip]
	/// ssh options for connecting to a target machine.
	static SSH_OPTIONS: &'static [&str] = &[
	// use the fuchsia user
	"-o", "User=fuchsia",
	// don't bother with any of the known hosts stuff
	"-o", "UserKnownHostsFile=/dev/null",
	"-o", "StrictHostKeyChecking=no",
	"-o", "CheckHostIP=no",
	// don't forward anything
	"-o", "ForwardAgent=no",
	"-o", "ForwardX11=no",
	// allow the ssh connection to be reused
	"-o", "ControlPersist=yes",
	"-o", "ControlMaster=auto",
	"-o", "ControlPath=/tmp/fuchsia--%r@%h:%p",
	// the next three control how long ssh will wait in different situations before giving up.
	// wait 100 seconds for the server to be routable. overrides the system default tcp timeout.
	"-o", "ConnectTimeout=100",
	// send a ping to the server every 10 seconds if no data has been recieved from the server
	"-o", "ServerAliveInterval=10",
	// send 6 of those pings before giving up. this is equivalent to a timeout of about 60 seconds
	// before giving up the connection, when the server is routable but we aren't getting responses
	// from the server yet.
	"-o", "ServerAliveCountMax=6",
	];

	fn ssh(target: &str, auth: &SshAuth) -> Command {
	let mut command = Command::new("ssh");
	command.args(SSH_OPTIONS);
	command.args(auth.args());
	command.arg(target);
	command
	}

	/// Type of reboot to perform.
	#[derive(Clone, Debug)]
	pub enum RebootType {
	/// Perform a software reboot using dm reboot on the target device over ssh.
	Software,
	/// Perform a hardware reboot by writing a byte to a relay device. THIS OPTION IS LIKELY TO
	/// CHANGE IN THE FUTURE.
	Hardware(PathBuf),
	}

	// TODO(sdemos): the final implementation will also have to handle a CI environment where hard
	// rebooting is done by calling a script that will be in our environment.
	fn hard_reboot(dev: impl AsRef<Path>) -> Result<(), RebootError> {
	if !dev.as_ref().exists() {
	return Err(RebootError::MissingDevice(dev.as_ref().to_path_buf()));
	}
	let mut relay = OpenOptions::new().read(false).write(true).create(false).open(dev)?;
	relay.write_all(&[0x01])?;
	sleep(Duration::from_millis(100));
	relay.write_all(&[0x02])?;
	Ok(())
	}

	/// Reboot the target system using `dm reboot`.
	fn soft_reboot(target: &str, auth: &SshAuth) -> Result<(), RebootError> {
	// ignore the return value because it's garbage
	let _ = ssh(target, auth).arg("dm").arg("reboot").status()?;
	Ok(())
	}

	trait Runner {
	fn run_spawn(&self, subc: &str) -> Result<Child, BlackoutError>;
	fn run_output(&self, subc: &str) -> Result<Output, BlackoutError>;
	fn run(&self, subc: &str) -> Result<(), BlackoutError>;
	}

	/// run a target binary on a target device over ssh.
	struct CmdRunner {
	target: String,
	auth: SshAuth,
	bin: String,
	seed: Seed,
	block_device: String,
	}

	impl CmdRunner {
	fn new(
	target: &str,
	auth: &SshAuth,
	bin: &str,
	seed: Seed,
	block_device: &str,
	) -> Box<dyn Runner> {
	Box::new(CmdRunner {
	target: target.into(),
	auth: auth.clone(),
	bin: bin.into(),
	seed,
	block_device: block_device.into(),
	})
	}

	fn run_bin(&self) -> Command {
	let mut command = ssh(&self.target, &self.auth);
	command.arg(&self.bin).arg(self.seed.to_string()).arg(&self.block_device);
	command
	}
	}

	impl Runner for CmdRunner {
	/// Run a subcommand of the originally provided binary on the target. The command is spawned as a
	/// separate process, and a reference to the child process is returned. stdout and stderr are
	/// piped (see [`std::process::Stdio::piped()`] for details).
	fn run_spawn(&self, subc: &str) -> Result<Child, BlackoutError> {
	let child =
	self.run_bin().arg(subc).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn()?;

	Ok(child)
	}

	/// Run a subcommand to completion and collect the output from the process.
	fn run_output(&self, subc: &str) -> Result<Output, BlackoutError> {
	let out = self.run_bin().arg(subc).output()?;
	if out.status.success() {
	Ok(out)
	} else if out.status.code().unwrap() == 255 {
	Err(BlackoutError::Ssh(self.target.clone(), out.into()))
	} else {
	Err(BlackoutError::TargetCommand(out.into()))
	}
	}

	fn run(&self, subc: &str) -> Result<(), BlackoutError> {
	self.run_output(subc).map(\|_\| ())
	}
	}

	/// A step for a test to take. These steps can be added to the test runner in the root of the host
	/// library.
	pub trait TestStep {
	/// Execute this test step.
	fn execute(&self) -> Result<(), BlackoutError>;
	}

	/// A test step for setting up the filesystem in the way we want it for the test. This executes the
	/// `setup` subcommand on the target binary and waits for completion, checking the result.
	pub struct SetupStep {
	runner: Box<dyn Runner>,
	}

	impl SetupStep {
	/// Create a new operation step.
	pub(crate) fn new(
	target: &str,
	auth: &SshAuth,
	bin: &str,
	seed: Seed,
	block_device: &str,
	) -> Self {
	Self { runner: CmdRunner::new(target, auth, bin, seed, block_device) }
	}
	}

	impl TestStep for SetupStep {
	fn execute(&self) -> Result<(), BlackoutError> {
	println!("setting up test...");
	self.runner.run("setup")
	}
	}

	/// A test step for generating load on a filesystem. This executes the `test` subcommand on the
	/// target binary and then checks to make sure it didn't exit after `duration`.
	pub struct LoadStep {
	runner: Box<dyn Runner>,
	duration: Duration,
	}

	impl LoadStep {
	/// Create a new test step.
	pub(crate) fn new(
	target: &str,
	auth: &SshAuth,
	bin: &str,
	seed: Seed,
	block_device: &str,
	duration: Duration,
	) -> Self {
	Self { runner: CmdRunner::new(target, auth, bin, seed, block_device), duration }
	}
	}

	impl TestStep for LoadStep {
	fn execute(&self) -> Result<(), BlackoutError> {
	println!("generating filesystem load...");
	let mut child = self.runner.run_spawn("test")?;

	sleep(self.duration);

	// make sure child process is still running
	if let Some(_) = child.try_wait()? {
	let out = child.wait_with_output()?;
	return Err(BlackoutError::TargetCommand(out.into()));
	}

	Ok(())
	}
	}

	/// A test step for running an operation to completion. This executes the `test` subcommand and waits
	/// for completion, checking the result.
	pub struct OperationStep {
	runner: Box<dyn Runner>,
	}

	impl OperationStep {
	/// Create a new operation step.
	pub(crate) fn new(
	target: &str,
	auth: &SshAuth,
	bin: &str,
	seed: Seed,
	block_device: &str,
	) -> Self {
	Self { runner: CmdRunner::new(target, auth, bin, seed, block_device) }
	}
	}

	impl TestStep for OperationStep {
	fn execute(&self) -> Result<(), BlackoutError> {
	println!("running filesystem operation...");
	self.runner.run("test")
	}
	}

	/// A test step for rebooting the target machine. This uses the configured reboot mechanism.
	pub struct RebootStep {
	target: String,
	auth: SshAuth,
	reboot_type: RebootType,
	}

	impl RebootStep {
	/// Create a new reboot step.
	pub(crate) fn new(target: &str, auth: &SshAuth, reboot_type: &RebootType) -> Self {
	Self { target: target.into(), auth: auth.clone(), reboot_type: reboot_type.clone() }
	}
	}

	impl TestStep for RebootStep {
	fn execute(&self) -> Result<(), BlackoutError> {
	println!("rebooting device...");
	match &self.reboot_type {
	RebootType::Software => soft_reboot(&self.target, &self.auth)?,
	RebootType::Hardware(relay) => hard_reboot(&relay)?,
	}
	Ok(())
	}
	}

	/// A test step for verifying the machine. This executes the `verify` subcommand on the target binary
	/// and waits for completion, checking the result.
	pub struct VerifyStep {
	runner: Box<dyn Runner>,
	num_retries: u32,
	retry_timeout: Duration,
	}

	impl VerifyStep {
	/// Create a new verify step. Verification is done in a retry loop, attempting to run the
	/// verification command `num_retries` times and sleeping for `retry_timeout` duration in between
	/// each attempt.
	pub(crate) fn new(
	target: &str,
	auth: &SshAuth,
	bin: &str,
	seed: Seed,
	block_device: &str,
	num_retries: u32,
	retry_timeout: Duration,
	) -> Self {
	Self {
	runner: CmdRunner::new(target, auth, bin, seed, block_device),
	num_retries,
	retry_timeout,
	}
	}
	}

	impl TestStep for VerifyStep {
	fn execute(&self) -> Result<(), BlackoutError> {
	let mut last_ssh_error = Ok(());
	let start_time = std::time::Instant::now();
	for i in 1..self.num_retries + 1 {
	println!("verifying device...(attempt #{})", i);
	match self.runner.run("verify") {
	Ok(()) => {
	println!("verification successful.");
	return Ok(());
	}
	Err(ssh_error @ BlackoutError::Ssh(..)) => {
	// always print out the ssh error so it doesn't get buried to help with debugging.
	println!("{}", ssh_error);
	last_ssh_error = Err(ssh_error);
	sleep(self.retry_timeout);
	}
	// during the verification stage, we expect that any time the target command fails,
	// it's a verification failure.
	Err(BlackoutError::TargetCommand(e)) => return Err(BlackoutError::Verification(e)),
	Err(e) => return Err(e),
	}
	}
	let elapsed = std::time::Instant::now().duration_since(start_time);
	println!("stopping verification attempt after {}s", elapsed.as_secs());
	// we failed to ssh into the device too many times in a row. something's wrong.
	last_ssh_error
	}
	}

	#[cfg(test)]
	mod tests {
	use super::{OperationStep, Runner, SetupStep, TestStep, VerifyStep};
	use crate::{BlackoutError, CommandError};
	use std::{
	cell::Cell,
	os::unix::process::ExitStatusExt,
	process::{Child, ExitStatus, Output},
	rc::Rc,
	time::Duration,
	};

	struct FakeRunner<F>
	where
	F: Fn() -> Result<(), BlackoutError>,
	{
	command: &'static str,
	res: F,
	}
	impl<F> FakeRunner<F>
	where
	F: Fn() -> Result<(), BlackoutError>,
	{
	pub fn new(command: &'static str, res: F) -> FakeRunner<F> {
	FakeRunner { command, res }
	}
	}
	impl<F> Runner for FakeRunner<F>
	where
	F: Fn() -> Result<(), BlackoutError>,
	{
	fn run_spawn(&self, _subc: &str) -> Result<Child, BlackoutError> {
	unimplemented!()
	}
	fn run_output(&self, _subc: &str) -> Result<Output, BlackoutError> {
	unimplemented!()
	}
	fn run(&self, subc: &str) -> Result<(), BlackoutError> {
	assert_eq!(subc, self.command);
	(self.res)()
	}
	}

	#[test]
	fn setup_success() {
	let step = SetupStep { runner: Box::new(FakeRunner::new("setup", \|\| Ok(()))) };
	match step.execute() {
	Ok(()) => (),
	_ => panic!("setup step returned an error on a successful run"),
	}
	}

	#[test]
	fn setup_error() {
	let error = \|\| {
	Err(BlackoutError::TargetCommand(CommandError(
	ExitStatus::from_raw(1),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	)))
	};
	let step = SetupStep { runner: Box::new(FakeRunner::new("setup", error)) };
	match step.execute() {
	Err(BlackoutError::TargetCommand(_)) => (),
	Ok(()) => panic!("setup step returned success when runner failed"),
	_ => panic!("setup step returned an unexpected error"),
	}
	}

	#[test]
	fn operation_success() {
	let step = OperationStep { runner: Box::new(FakeRunner::new("test", \|\| Ok(()))) };
	match step.execute() {
	Ok(()) => (),
	_ => panic!("operation step returned an error on a successful run"),
	}
	}

	#[test]
	fn operation_error() {
	let error = \|\| {
	Err(BlackoutError::TargetCommand(CommandError(
	ExitStatus::from_raw(1),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	)))
	};
	let step = OperationStep { runner: Box::new(FakeRunner::new("test", error)) };
	match step.execute() {
	Err(BlackoutError::TargetCommand(_)) => (),
	Ok(()) => panic!("operation step returned success when runner failed"),
	_ => panic!("operation step returned an unexpected error"),
	}
	}

	#[test]
	fn verify_success() {
	let step = VerifyStep {
	runner: Box::new(FakeRunner::new("verify", \|\| Ok(()))),
	num_retries: 10,
	retry_timeout: Duration::from_secs(0),
	};
	match step.execute() {
	Ok(()) => (),
	_ => panic!("verify step returned an error on a successful run"),
	}
	}

	#[test]
	fn verify_target_command_error() {
	let error = \|\| {
	Err(BlackoutError::TargetCommand(CommandError(
	ExitStatus::from_raw(1),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	)))
	};
	let step = VerifyStep {
	runner: Box::new(FakeRunner::new("verify", error)),
	num_retries: 10,
	retry_timeout: Duration::from_secs(0),
	};
	match step.execute() {
	// verify step is expected to tranform target command errors into verification errors.
	Err(BlackoutError::Verification(_)) => (),
	Err(BlackoutError::TargetCommand(_)) => {
	panic!("verify step returned target command error instead of verification error")
	}
	Ok(()) => panic!("verify step returned success when runner failed"),
	_ => panic!("verify step returned an unexpected error"),
	}
	}

	#[test]
	fn verify_ssh_error_retry_loop_timeout() {
	let outer_attempts = Rc::new(Cell::new(0));
	let attempts = outer_attempts.clone();
	let error = move \|\| {
	attempts.set(attempts.get() + 1);
	Err(BlackoutError::Ssh(
	"fake target".into(),
	CommandError(
	ExitStatus::from_raw(255),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	),
	))
	};
	let step = VerifyStep {
	runner: Box::new(FakeRunner::new("verify", error)),
	num_retries: 10,
	retry_timeout: Duration::from_secs(0),
	};
	match step.execute() {
	Err(BlackoutError::Ssh(..)) => (),
	Ok(()) => panic!("verify step returned success when runner failed"),
	_ => panic!("verify step returned an unexpected error"),
	}
	assert_eq!(outer_attempts.get(), 10);
	}

	#[test]
	fn verify_ssh_error_retry_loop_success() {
	let outer_attempts = Rc::new(Cell::new(0));
	let attempts = outer_attempts.clone();
	let error = move \|\| {
	attempts.set(attempts.get() + 1);
	if attempts.get() <= 5 {
	Err(BlackoutError::Ssh(
	"fake target".into(),
	CommandError(
	ExitStatus::from_raw(255),
	"(fake stdout)".into(),
	"(fake stderr)".into(),
	),
	))
	} else {
	Ok(())
	}
	};
	let step = VerifyStep {
	runner: Box::new(FakeRunner::new("verify", error)),
	num_retries: 10,
	retry_timeout: Duration::from_secs(0),
	};
	match step.execute() {
	Ok(()) => (),
	Err(BlackoutError::Ssh(..)) => {
	panic!("verify step returned error when runner succeeded")
	}
	_ => panic!("verify step returned an unexpected error"),
	}
	assert_eq!(outer_attempts.get(), 6);
	}
	}