blob: 332e7ad9c32049762ef4da53bcaa6a2a739646ab [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! test steps
use {
crate::{BlackoutError, RebootError, Seed, SshAuth},
std::{
fs::OpenOptions,
io::Write,
path::{Path, PathBuf},
process::{Child, Command, Output, Stdio},
thread::sleep,
time::Duration,
},
};
#[rustfmt::skip]
/// ssh options for connecting to a target machine.
static SSH_OPTIONS: &'static [&str] = &[
// use the fuchsia user
"-o", "User=fuchsia",
// don't bother with any of the known hosts stuff
"-o", "UserKnownHostsFile=/dev/null",
"-o", "StrictHostKeyChecking=no",
"-o", "CheckHostIP=no",
// don't forward anything
"-o", "ForwardAgent=no",
"-o", "ForwardX11=no",
// allow the ssh connection to be reused
"-o", "ControlPersist=yes",
"-o", "ControlMaster=auto",
"-o", "ControlPath=/tmp/fuchsia--%r@%h:%p",
// the next three control how long ssh will wait in different situations before giving up.
// wait 100 seconds for the server to be routable. overrides the system default tcp timeout.
"-o", "ConnectTimeout=100",
// send a ping to the server every 10 seconds if no data has been recieved from the server
"-o", "ServerAliveInterval=10",
// send 6 of those pings before giving up. this is equivalent to a timeout of about 60 seconds
// before giving up the connection, when the server is routable but we aren't getting responses
// from the server yet.
"-o", "ServerAliveCountMax=6",
];
fn ssh(target: &str, auth: &SshAuth) -> Command {
let mut command = Command::new("ssh");
command.args(SSH_OPTIONS);
command.args(auth.args());
command.arg(target);
command
}
/// Type of reboot to perform.
#[derive(Clone, Debug)]
pub enum RebootType {
/// Perform a software reboot using dm reboot on the target device over ssh.
Software,
/// Perform a hardware reboot by writing a byte to a relay device. THIS OPTION IS LIKELY TO
/// CHANGE IN THE FUTURE.
Hardware(PathBuf),
}
// TODO(sdemos): the final implementation will also have to handle a CI environment where hard
// rebooting is done by calling a script that will be in our environment.
fn hard_reboot(dev: impl AsRef<Path>) -> Result<(), RebootError> {
if !dev.as_ref().exists() {
return Err(RebootError::MissingDevice(dev.as_ref().to_path_buf()));
}
let mut relay = OpenOptions::new().read(false).write(true).create(false).open(dev)?;
relay.write_all(&[0x01])?;
sleep(Duration::from_millis(100));
relay.write_all(&[0x02])?;
Ok(())
}
/// Reboot the target system using `dm reboot`.
fn soft_reboot(target: &str, auth: &SshAuth) -> Result<(), RebootError> {
// ignore the return value because it's garbage
let _ = ssh(target, auth).arg("dm").arg("reboot").status()?;
Ok(())
}
trait Runner {
fn run_spawn(&self, subc: &str) -> Result<Child, BlackoutError>;
fn run_output(&self, subc: &str) -> Result<Output, BlackoutError>;
fn run(&self, subc: &str) -> Result<(), BlackoutError>;
}
/// run a target binary on a target device over ssh.
struct CmdRunner {
target: String,
auth: SshAuth,
bin: String,
seed: Seed,
block_device: String,
}
impl CmdRunner {
fn new(
target: &str,
auth: &SshAuth,
bin: &str,
seed: Seed,
block_device: &str,
) -> Box<dyn Runner> {
Box::new(CmdRunner {
target: target.into(),
auth: auth.clone(),
bin: bin.into(),
seed,
block_device: block_device.into(),
})
}
fn run_bin(&self) -> Command {
let mut command = ssh(&self.target, &self.auth);
command.arg(&self.bin).arg(self.seed.to_string()).arg(&self.block_device);
command
}
}
impl Runner for CmdRunner {
/// Run a subcommand of the originally provided binary on the target. The command is spawned as a
/// separate process, and a reference to the child process is returned. stdout and stderr are
/// piped (see [`std::process::Stdio::piped()`] for details).
fn run_spawn(&self, subc: &str) -> Result<Child, BlackoutError> {
let child =
self.run_bin().arg(subc).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn()?;
Ok(child)
}
/// Run a subcommand to completion and collect the output from the process.
fn run_output(&self, subc: &str) -> Result<Output, BlackoutError> {
let out = self.run_bin().arg(subc).output()?;
if out.status.success() {
Ok(out)
} else if out.status.code().unwrap() == 255 {
Err(BlackoutError::Ssh(self.target.clone(), out.into()))
} else {
Err(BlackoutError::TargetCommand(out.into()))
}
}
fn run(&self, subc: &str) -> Result<(), BlackoutError> {
self.run_output(subc).map(|_| ())
}
}
/// A step for a test to take. These steps can be added to the test runner in the root of the host
/// library.
pub trait TestStep {
/// Execute this test step.
fn execute(&self) -> Result<(), BlackoutError>;
}
/// A test step for setting up the filesystem in the way we want it for the test. This executes the
/// `setup` subcommand on the target binary and waits for completion, checking the result.
pub struct SetupStep {
runner: Box<dyn Runner>,
}
impl SetupStep {
/// Create a new operation step.
pub(crate) fn new(
target: &str,
auth: &SshAuth,
bin: &str,
seed: Seed,
block_device: &str,
) -> Self {
Self { runner: CmdRunner::new(target, auth, bin, seed, block_device) }
}
}
impl TestStep for SetupStep {
fn execute(&self) -> Result<(), BlackoutError> {
println!("setting up test...");
self.runner.run("setup")
}
}
/// A test step for generating load on a filesystem. This executes the `test` subcommand on the
/// target binary and then checks to make sure it didn't exit after `duration`.
pub struct LoadStep {
runner: Box<dyn Runner>,
duration: Duration,
}
impl LoadStep {
/// Create a new test step.
pub(crate) fn new(
target: &str,
auth: &SshAuth,
bin: &str,
seed: Seed,
block_device: &str,
duration: Duration,
) -> Self {
Self { runner: CmdRunner::new(target, auth, bin, seed, block_device), duration }
}
}
impl TestStep for LoadStep {
fn execute(&self) -> Result<(), BlackoutError> {
println!("generating filesystem load...");
let mut child = self.runner.run_spawn("test")?;
sleep(self.duration);
// make sure child process is still running
if let Some(_) = child.try_wait()? {
let out = child.wait_with_output()?;
return Err(BlackoutError::TargetCommand(out.into()));
}
Ok(())
}
}
/// A test step for running an operation to completion. This executes the `test` subcommand and waits
/// for completion, checking the result.
pub struct OperationStep {
runner: Box<dyn Runner>,
}
impl OperationStep {
/// Create a new operation step.
pub(crate) fn new(
target: &str,
auth: &SshAuth,
bin: &str,
seed: Seed,
block_device: &str,
) -> Self {
Self { runner: CmdRunner::new(target, auth, bin, seed, block_device) }
}
}
impl TestStep for OperationStep {
fn execute(&self) -> Result<(), BlackoutError> {
println!("running filesystem operation...");
self.runner.run("test")
}
}
/// A test step for rebooting the target machine. This uses the configured reboot mechanism.
pub struct RebootStep {
target: String,
auth: SshAuth,
reboot_type: RebootType,
}
impl RebootStep {
/// Create a new reboot step.
pub(crate) fn new(target: &str, auth: &SshAuth, reboot_type: &RebootType) -> Self {
Self { target: target.into(), auth: auth.clone(), reboot_type: reboot_type.clone() }
}
}
impl TestStep for RebootStep {
fn execute(&self) -> Result<(), BlackoutError> {
println!("rebooting device...");
match &self.reboot_type {
RebootType::Software => soft_reboot(&self.target, &self.auth)?,
RebootType::Hardware(relay) => hard_reboot(&relay)?,
}
Ok(())
}
}
/// A test step for verifying the machine. This executes the `verify` subcommand on the target binary
/// and waits for completion, checking the result.
pub struct VerifyStep {
runner: Box<dyn Runner>,
num_retries: u32,
retry_timeout: Duration,
}
impl VerifyStep {
/// Create a new verify step. Verification is done in a retry loop, attempting to run the
/// verification command `num_retries` times and sleeping for `retry_timeout` duration in between
/// each attempt.
pub(crate) fn new(
target: &str,
auth: &SshAuth,
bin: &str,
seed: Seed,
block_device: &str,
num_retries: u32,
retry_timeout: Duration,
) -> Self {
Self {
runner: CmdRunner::new(target, auth, bin, seed, block_device),
num_retries,
retry_timeout,
}
}
}
impl TestStep for VerifyStep {
fn execute(&self) -> Result<(), BlackoutError> {
let mut last_ssh_error = Ok(());
let start_time = std::time::Instant::now();
for i in 1..self.num_retries + 1 {
println!("verifying device...(attempt #{})", i);
match self.runner.run("verify") {
Ok(()) => {
println!("verification successful.");
return Ok(());
}
Err(ssh_error @ BlackoutError::Ssh(..)) => {
// always print out the ssh error so it doesn't get buried to help with debugging.
println!("{}", ssh_error);
last_ssh_error = Err(ssh_error);
sleep(self.retry_timeout);
}
// during the verification stage, we expect that any time the target command fails,
// it's a verification failure.
Err(BlackoutError::TargetCommand(e)) => return Err(BlackoutError::Verification(e)),
Err(e) => return Err(e),
}
}
let elapsed = std::time::Instant::now().duration_since(start_time);
println!("stopping verification attempt after {}s", elapsed.as_secs());
// we failed to ssh into the device too many times in a row. something's wrong.
last_ssh_error
}
}
#[cfg(test)]
mod tests {
use super::{OperationStep, Runner, SetupStep, TestStep, VerifyStep};
use crate::{BlackoutError, CommandError};
use std::{
cell::Cell,
os::unix::process::ExitStatusExt,
process::{Child, ExitStatus, Output},
rc::Rc,
time::Duration,
};
struct FakeRunner<F>
where
F: Fn() -> Result<(), BlackoutError>,
{
command: &'static str,
res: F,
}
impl<F> FakeRunner<F>
where
F: Fn() -> Result<(), BlackoutError>,
{
pub fn new(command: &'static str, res: F) -> FakeRunner<F> {
FakeRunner { command, res }
}
}
impl<F> Runner for FakeRunner<F>
where
F: Fn() -> Result<(), BlackoutError>,
{
fn run_spawn(&self, _subc: &str) -> Result<Child, BlackoutError> {
unimplemented!()
}
fn run_output(&self, _subc: &str) -> Result<Output, BlackoutError> {
unimplemented!()
}
fn run(&self, subc: &str) -> Result<(), BlackoutError> {
assert_eq!(subc, self.command);
(self.res)()
}
}
#[test]
fn setup_success() {
let step = SetupStep { runner: Box::new(FakeRunner::new("setup", || Ok(()))) };
match step.execute() {
Ok(()) => (),
_ => panic!("setup step returned an error on a successful run"),
}
}
#[test]
fn setup_error() {
let error = || {
Err(BlackoutError::TargetCommand(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
};
let step = SetupStep { runner: Box::new(FakeRunner::new("setup", error)) };
match step.execute() {
Err(BlackoutError::TargetCommand(_)) => (),
Ok(()) => panic!("setup step returned success when runner failed"),
_ => panic!("setup step returned an unexpected error"),
}
}
#[test]
fn operation_success() {
let step = OperationStep { runner: Box::new(FakeRunner::new("test", || Ok(()))) };
match step.execute() {
Ok(()) => (),
_ => panic!("operation step returned an error on a successful run"),
}
}
#[test]
fn operation_error() {
let error = || {
Err(BlackoutError::TargetCommand(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
};
let step = OperationStep { runner: Box::new(FakeRunner::new("test", error)) };
match step.execute() {
Err(BlackoutError::TargetCommand(_)) => (),
Ok(()) => panic!("operation step returned success when runner failed"),
_ => panic!("operation step returned an unexpected error"),
}
}
#[test]
fn verify_success() {
let step = VerifyStep {
runner: Box::new(FakeRunner::new("verify", || Ok(()))),
num_retries: 10,
retry_timeout: Duration::from_secs(0),
};
match step.execute() {
Ok(()) => (),
_ => panic!("verify step returned an error on a successful run"),
}
}
#[test]
fn verify_target_command_error() {
let error = || {
Err(BlackoutError::TargetCommand(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
};
let step = VerifyStep {
runner: Box::new(FakeRunner::new("verify", error)),
num_retries: 10,
retry_timeout: Duration::from_secs(0),
};
match step.execute() {
// verify step is expected to tranform target command errors into verification errors.
Err(BlackoutError::Verification(_)) => (),
Err(BlackoutError::TargetCommand(_)) => {
panic!("verify step returned target command error instead of verification error")
}
Ok(()) => panic!("verify step returned success when runner failed"),
_ => panic!("verify step returned an unexpected error"),
}
}
#[test]
fn verify_ssh_error_retry_loop_timeout() {
let outer_attempts = Rc::new(Cell::new(0));
let attempts = outer_attempts.clone();
let error = move || {
attempts.set(attempts.get() + 1);
Err(BlackoutError::Ssh(
"fake target".into(),
CommandError(
ExitStatus::from_raw(255),
"(fake stdout)".into(),
"(fake stderr)".into(),
),
))
};
let step = VerifyStep {
runner: Box::new(FakeRunner::new("verify", error)),
num_retries: 10,
retry_timeout: Duration::from_secs(0),
};
match step.execute() {
Err(BlackoutError::Ssh(..)) => (),
Ok(()) => panic!("verify step returned success when runner failed"),
_ => panic!("verify step returned an unexpected error"),
}
assert_eq!(outer_attempts.get(), 10);
}
#[test]
fn verify_ssh_error_retry_loop_success() {
let outer_attempts = Rc::new(Cell::new(0));
let attempts = outer_attempts.clone();
let error = move || {
attempts.set(attempts.get() + 1);
if attempts.get() <= 5 {
Err(BlackoutError::Ssh(
"fake target".into(),
CommandError(
ExitStatus::from_raw(255),
"(fake stdout)".into(),
"(fake stderr)".into(),
),
))
} else {
Ok(())
}
};
let step = VerifyStep {
runner: Box::new(FakeRunner::new("verify", error)),
num_retries: 10,
retry_timeout: Duration::from_secs(0),
};
match step.execute() {
Ok(()) => (),
Err(BlackoutError::Ssh(..)) => {
panic!("verify step returned error when runner succeeded")
}
_ => panic!("verify step returned an unexpected error"),
}
assert_eq!(outer_attempts.get(), 6);
}
}