blob: 01f3f7e9b61483890c914b80cdb2c0dba5e475a1 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! library for host-side of filesystem integrity host-target interaction tests.
#![deny(missing_docs)]
use {
rand::random,
std::{
cell::Cell,
fmt,
path::PathBuf,
process::{ExitStatus, Output},
rc::Rc,
time::Duration,
},
structopt::StructOpt,
thiserror::Error,
};
pub mod steps;
use steps::{LoadStep, OperationStep, RebootStep, RebootType, SetupStep, TestStep, VerifyStep};
pub mod integration;
fn box_message(message: String) -> String {
let line_len = message.len() + 2;
let line: String = std::iter::repeat('━').take(line_len).collect();
format!(
"┏{line}┓
{message}
┗{line}┛",
message = message,
line = line
)
}
/// An error occured running a command on the target system. Contains the exit status, stdout, and
/// stderr of the command.
#[derive(Debug, Error)]
#[error(
"failed to run command: {}\n\
stdout:\n\
{}\n\
stderr:\n\
{}",
_0,
_1,
_2
)]
pub struct CommandError(ExitStatus, String, String);
impl From<Output> for CommandError {
/// Convert the std::process::Output of a command to an error. Mostly takes care of converting
/// the stdout and stderr into strings from Vec<u8>.
fn from(out: Output) -> Self {
let stdout = String::from_utf8(out.stdout).expect("stdout not utf8");
let stderr = String::from_utf8(out.stderr).expect("stderr not utf8");
CommandError(out.status, stdout, stderr)
}
}
/// An error occured while attempting to reboot the system.
#[derive(Debug, Error)]
pub enum RebootError {
/// The path to the relay device required for hard-rebooting the target doesn't exist.
#[error("device does not exist: {:?}", _0)]
MissingDevice(PathBuf),
/// An io error occured during rebooting. Maybe we failed to write to the device.
#[error("io error: {:?}", _0)]
IoError(#[from] std::io::Error),
/// The command we executed on the target failed.
#[error("command error: {:?}", _0)]
Command(#[from] CommandError),
}
/// Error used for the host-side of the blackout library.
#[derive(Debug, Error)]
pub enum BlackoutError {
/// We got an error when trying to reboot.
#[error("failed to reboot: {:?}", _0)]
Reboot(#[from] RebootError),
/// We failed to run the command on the host. Specifically, when the spawm or something fails,
/// not when the command itself returns a non-zero exit code.
#[error("host command failed: {:?}", _0)]
HostCommand(#[from] std::io::Error),
/// We got an ssh failure. We know it's an ssh failure because it returned 255.
#[error("failed to ssh into '{}': {:?}", _0, _1)]
Ssh(String, CommandError),
/// The command run on the target device failed to run. Either it returned a non-zero exit code
/// or it exited when it shouldn't have.
#[error("target command failed: {}", _0)]
TargetCommand(CommandError),
/// Specifically the verification step failed. This indicates an actual test failure as opposed
/// to a failure of the test framework or environmental failure.
#[error("verification failed: {}", _0)]
Verification(CommandError),
}
/// Blackout is a power-failure testing framework for the filesystems. This host-side harness runs
/// operations on the configured target device for generating load on the filesystem, then reboots
/// the device after a certain amount of time using a configured reboot mechanism. By default, it
/// runs one iteration of this test. Options are provided for running the test until failure or
/// running the test N times and collecting failure statistics.
#[derive(Clone, StructOpt)]
#[structopt(rename_all = "kebab-case")]
pub struct CommonOpts {
/// The block device on the target device to use for testing. WARNING: the test can (and likely
/// will!) format this device. Don't use a main system partition!
pub block_device: String,
/// The target device to ssh into and execute the test on. A good way to configure this locally
/// is by using `$(fx get-device-addr)`.
pub target: String,
/// [Optional] A seed to use for all random operations. Tests are NOT deterministic relative to
/// the provided seed. The operations will be identical, but because of the non-deterministic
/// timing-dependent nature of the tests, the exact time the reboot is triggered in relation to
/// the operations is not guaranteed.
///
/// One will be randomly generated if not provided. When performing the same test multiple times
/// in one run, a new seed will be generated for each run if one was not provided.
#[structopt(short, long)]
pub seed: Option<u64>,
/// Path to a power relay for cutting the power to a device. Probably the highest-numbered
/// /dev/ttyUSB[N]. If in doubt, try removing it and seeing what disappears from /dev. When a
/// relay is provided, the harness automatically switches to use hardware reboots.
#[structopt(short, long)]
pub relay: Option<PathBuf>,
/// Run the test N number of times, collecting statistics on the number of failures.
#[structopt(short, long)]
pub iterations: Option<u64>,
/// Run the test until a verification failure is detected, then exit.
#[structopt(short = "f", long, requires = "iterations")]
pub run_until_failure: bool,
/// Path to the ssh private key to use when authenticating with the target device. If neither
/// this flag or the --ssh-agent flag is set, the test will open `$FUCHSIA_DIR/.ssh/pkey` if
/// `$FUCHSIA_DIR` exists, otherwise it will attempt to open `$CWD/.ssh/pkey`. If none of these
/// attempts succeed the test will fail to run.
#[structopt(short = "k", long)]
pub ssh_key: Option<String>,
/// Use the ssh agent to authenticate with the target device. If both this option and --ssh-key
/// are provided, --ssk-key will take precedence.
#[structopt(short = "a", long)]
pub ssh_agent: bool,
}
/// the seed for a run of the test.
#[derive(Clone, Debug)]
pub enum Seed {
/// the seed is constant over multiple runs of the test.
Constant(u64),
/// the seed is a random value for every run of the test, generated by `random`.
Variable(Rc<Cell<u64>>),
}
impl Seed {
fn new(maybe_seed: Option<u64>) -> Seed {
match maybe_seed {
Some(seed) => Seed::Constant(seed),
None => Seed::Variable(Rc::new(Cell::new(random()))),
}
}
fn reroll(&self) {
match self {
Seed::Constant(_) => (),
Seed::Variable(seed_cell) => seed_cell.set(random()),
}
}
}
impl fmt::Display for Seed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Seed::Constant(seed) => write!(f, "{}", seed),
Seed::Variable(seed_cell) => write!(f, "{}", seed_cell.get()),
}
}
}
#[derive(Clone, Debug)]
enum RunMode {
Once,
Iterations(u64),
IterationsUntilFailure(u64),
}
/// method for authenticating over ssh
#[derive(Clone, Debug)]
enum SshAuth {
Agent,
PKey(String),
}
impl SshAuth {
/// generate the required ssh args for using the auth method
fn args(&self) -> Vec<String> {
match &self {
SshAuth::Agent => vec![],
SshAuth::PKey(key) => vec!["-o".into(), format!("IdentityFile={}", key)],
}
}
}
/// Unconfigured test. Knows how to configure itself based on the set of common options.
pub struct UnconfiguredTest {
package: &'static str,
}
/// Test definition. This contains all the information to make a test reproducible in a particular
/// environment, and allows host binaries to configure the steps taken by the test.
pub struct Test {
/// net address of the target device.
target: String,
bin: String,
seed: Seed,
block_device: String,
reboot_type: RebootType,
run_mode: RunMode,
auth: SshAuth,
steps: Vec<Box<dyn TestStep>>,
}
impl fmt::Display for Test {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Test {{
target: {:?},
bin: {:?},
seed: {:?},
block_device: {:?},
reboot_type: {:?},
run_mode: {:?},
auth: {:?},
}}",
self.target,
self.bin,
self.seed,
self.block_device,
self.reboot_type,
self.run_mode,
self.auth,
)
}
}
impl UnconfiguredTest {
/// Collect command line options for configuring the test steps. This uses the [`CommonOpts`]
/// struct with StructOpt.
pub fn collect_options(self) -> Test {
self.add_options(CommonOpts::from_args())
}
/// Add the set of common options manually. This can be used if additional options need to be
/// collected at run time that aren't one of the common options. The CommonOpts struct can be
/// flattened into the defined CLI using structopt (see their docs for how).
pub fn add_options(self, opts: CommonOpts) -> Test {
Test {
target: opts.target,
bin: format!("run {}", self.package),
seed: Seed::new(opts.seed),
block_device: opts.block_device,
reboot_type: match opts.relay {
Some(relay) => RebootType::Hardware(relay),
None => RebootType::Software,
},
run_mode: match (opts.iterations, opts.run_until_failure) {
(None, false) => RunMode::Once,
(None, true) => panic!("run until failure requires multiple iterations"),
(Some(iterations), false) => RunMode::Iterations(iterations),
(Some(iterations), true) => RunMode::IterationsUntilFailure(iterations),
},
auth: match (opts.ssh_key, opts.ssh_agent) {
(Some(key), _) => SshAuth::PKey(key),
(None, true) => SshAuth::Agent,
(None, false) => {
if let Ok(fuchsia_dir) = std::env::var("FUCHSIA_DIR") {
SshAuth::PKey(format!("{}/.ssh/pkey", fuchsia_dir))
} else if let Ok(cwd) = std::env::current_dir() {
SshAuth::PKey(format!("{}/.ssh/pkey", cwd.to_str().unwrap()))
} else {
panic!("can't figure out where to get private key - use --ssh-key");
}
}
},
steps: Vec::new(),
}
}
}
impl Test {
/// Create a new test with the provided name. The name needs to match the associated package that
/// will be present on the target device. The package should be callable with `run` from the
/// command line.
pub fn new(package: &'static str) -> UnconfiguredTest {
UnconfiguredTest { package }
}
/// Add a test step for setting up the filesystem in the way we want it for the test. This
/// executes the `setup` subcommand on the target binary and waits for completion, checking the
/// result.
pub fn setup_step(mut self) -> Self {
self.steps.push(Box::new(SetupStep::new(
&self.target,
&self.auth,
&self.bin,
self.seed.clone(),
&self.block_device,
)));
self
}
/// Add a test step for generating load on the device using the `test` subcommand on the target
/// binary. This load doesn't terminate. After `duration`, it checks to make sure the command is
/// still running, then return.
pub fn load_step(mut self, duration: Duration) -> Self {
self.steps.push(Box::new(LoadStep::new(
&self.target,
&self.auth,
&self.bin,
self.seed.clone(),
&self.block_device,
duration,
)));
self
}
/// Add an operation step. This runs the `test` subcommand on the target binary to completion and
/// checks the result.
pub fn operation_step(mut self) -> Self {
self.steps.push(Box::new(OperationStep::new(
&self.target,
&self.auth,
&self.bin,
self.seed.clone(),
&self.block_device,
)));
self
}
/// Add a reboot step. This reboots the target machine using the configured reboot mechanism,
/// then waits for the machine to come back up. Right now, it waits by sleeping for 30 seconds.
/// TODO(fxbug.dev/34504): instead of waiting for 30 seconds, we should have a retry loop around the ssh in
/// the verification step.
pub fn reboot_step(mut self) -> Self {
self.steps.push(Box::new(RebootStep::new(&self.target, &self.auth, &self.reboot_type)));
self
}
/// Add a verify step. This runs the `verify` subcommand on the target binary, waiting for
/// completion, and checks the result. The verification is done in a retry loop, attempting to
/// run the verification command `num_retries` times, sleeping for `retry_timeout` duration
/// between each attempt.
pub fn verify_step(mut self, num_retries: u32, retry_timeout: Duration) -> Self {
self.steps.push(Box::new(VerifyStep::new(
&self.target,
&self.auth,
&self.bin,
self.seed.clone(),
&self.block_device,
num_retries,
retry_timeout,
)));
self
}
/// Add a custom test step implementation.
pub fn add_step(mut self, step: Box<dyn TestStep>) -> Self {
self.steps.push(step);
self
}
/// Run the defined test steps. Prints the test definition before execution. Attempts to re-roll
/// the random seed before the test run.
fn run_test(&self) -> Result<(), BlackoutError> {
self.seed.reroll();
println!("{}", self);
for step in &self.steps {
step.execute()?;
}
Ok(())
}
/// Run the test once. Used either directly, or as the main test driver for the multi-run modes.
///
/// This function is separated from run_test for rather silly technical reasons. In rust, we can
/// have a main function return a result, which rust automatically prints out on failure, setting
/// the error code to some non-zero value. However, it prints out the Debug version of the error,
/// which is rather hard to read, as it's smushed onto one line. Instead of requiring all the
/// test implementations to figure it out on their own, our implementation pretty-prints out any
/// errors we find for you! We continue to return the result so that the exit code is properly
/// set.
fn run_once(&self) -> Result<(), BlackoutError> {
match self.run_test() {
r @ Ok(..) => r,
Err(e) => {
println!("{}", e);
Err(e)
}
}
}
fn run_iterations(&self, iterations: u64) -> Result<(), BlackoutError> {
let mut failures = 0u64;
let mut flukes = 0u64;
for runs in 1..iterations + 1 {
println!("{}", box_message(format!("test run #{}", runs)));
match self.run_once() {
Ok(()) => (),
Err(BlackoutError::Verification(_)) => failures += 1,
Err(_) => flukes += 1,
}
println!("runs: {}", runs);
println!("failures: {}", failures);
println!("non-verification errors: {}", flukes);
println!(
"failure percentage: {:.2}%",
(failures as f64 / (runs - flukes) as f64) * 100.0
);
}
Ok(())
}
fn run_iterations_until_failure(&self, iterations: u64) -> Result<(), BlackoutError> {
for runs in 1..iterations + 1 {
println!("{}", box_message(format!("test run #{}", runs)));
match self.run_once() {
Ok(()) => (),
Err(e @ BlackoutError::Verification(_)) => return Err(e),
Err(_) => (),
}
}
Ok(())
}
/// Run the provided test. The test is provided as a function that takes a clone of the command
/// line options and produces a test to run. This
/// There are essentially 4 possible types of test runs that we may be doing here.
/// 1. one iteration (iterations == None && run_until_failure == false)
/// this is the simplist case. run one iteration of the test by constructing the test and
/// calling test.run(), returning the result.
/// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
/// essentially the InfiniteExecutor code, except instead of an infinite loop we use a
/// bounded one.
/// 3. run until verification failure (iterations == None && run_until_failure == true)
/// run tests until an Error::Verification is returned. keep track of the number of runs,
/// but there is no need to tabulate other errors.
/// 4. run until verification failure, except with a max number of iterations
/// (iterations == Some(N) && run_until_failure == true)
/// if both flags are present, we combine the functionality. only run a certain number of
/// iterations, but quit early if there is a failure instead of aggregating the results.
pub fn run(self) -> Result<(), BlackoutError> {
match self.run_mode {
RunMode::Once => self.run_once(),
RunMode::Iterations(iterations) => self.run_iterations(iterations),
RunMode::IterationsUntilFailure(iterations) => {
self.run_iterations_until_failure(iterations)
}
}
}
}
#[cfg(test)]
mod tests {
use super::BlackoutError as Error;
use super::{BlackoutError, CommandError, CommonOpts, Test, TestStep};
use std::{cell::Cell, os::unix::process::ExitStatusExt, process::ExitStatus, rc::Rc};
struct FakeStep<F>
where
F: Fn(u64) -> Result<(), Error>,
{
res: F,
runs: Rc<Cell<u64>>,
}
impl<F> FakeStep<F>
where
F: Fn(u64) -> Result<(), Error>,
{
fn new(res: F) -> (FakeStep<F>, Rc<Cell<u64>>) {
let runs = Rc::new(Cell::new(0));
(FakeStep { res, runs: runs.clone() }, runs)
}
}
impl<F> TestStep for FakeStep<F>
where
F: Fn(u64) -> Result<(), Error>,
{
fn execute(&self) -> Result<(), BlackoutError> {
self.runs.set(self.runs.get() + 1);
(self.res)(self.runs.get())
}
}
fn fake_test(iterations: Option<u64>, run_until_failure: bool) -> Test {
let test = Test::new("fake package");
let opts = CommonOpts {
block_device: "/fake/block/device".into(),
target: "fake target".into(),
seed: None,
relay: None,
iterations,
run_until_failure,
ssh_key: Some("/fake/ssh/key".into()),
ssh_agent: false,
};
test.add_options(opts)
}
#[test]
fn run_once_executes_steps_once() {
// using the run once mode, we execute all our test steps once, in order. run once is how
// all the other run modes execute their test steps, so if this is solid then we just test
// iterations and exit modes for the rest.
let step1_exec1 = Rc::new(Cell::new(false));
let step1_exec2 = step1_exec1.clone();
let step1_func = move |_| {
assert!(!step1_exec1.get(), "step one already executed");
step1_exec1.set(true);
Ok(())
};
let step2_func = move |_| {
assert!(step1_exec2.get(), "step two executed before step one");
Ok(())
};
let (step1, step1_runs) = FakeStep::new(step1_func);
let (step2, step2_runs) = FakeStep::new(step2_func);
let test = fake_test(None, false).add_step(Box::new(step1)).add_step(Box::new(step2));
test.run().expect("failed to run test");
assert_eq!(step1_runs.get(), 1);
assert_eq!(step2_runs.get(), 1);
}
#[test]
fn run_once_exits_on_failure() {
let (step1, step1_runs) = FakeStep::new(|_| Ok(()));
let (step2, step2_runs) = FakeStep::new(|_| {
Err(BlackoutError::Verification(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
});
let (step3, step3_runs) = FakeStep::new(|_| panic!("step 3 should never be run"));
let test = fake_test(None, false)
.add_step(Box::new(step1))
.add_step(Box::new(step2))
.add_step(Box::new(step3));
match test.run() {
Err(BlackoutError::Verification(..)) => (),
Ok(..) => panic!("test returned Ok on an error"),
Err(..) => panic!("test returned an unexpected error"),
}
assert_eq!(step1_runs.get(), 1);
assert_eq!(step2_runs.get(), 1);
assert_eq!(step3_runs.get(), 0);
}
#[test]
fn run_n_executes_steps_n_times() {
// using the iterations mode, we execute all our test steps `iterations` number of times. we
// expect that no matter the error value returned from the test, we will execute the
// expected number of times.
let iterations = 10;
let (step, runs) = FakeStep::new(|_| Ok(()));
let test = fake_test(Some(iterations), false).add_step(Box::new(step));
test.run().expect("failed to run test");
assert_eq!(runs.get(), iterations, "step wasn't run the expected number of times");
}
#[test]
fn run_n_executes_steps_n_times_verify_failure() {
let iterations = 10;
let (step, runs) = FakeStep::new(|_| {
Err(BlackoutError::Verification(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
});
let test = fake_test(Some(iterations), false).add_step(Box::new(step));
test.run().expect("failed to run test");
assert_eq!(runs.get(), iterations, "step wasn't run the expected number of times");
}
#[test]
fn run_n_executes_steps_n_times_other_error() {
let iterations = 10;
let (step, runs) = FakeStep::new(|_| {
Err(BlackoutError::Ssh(
"(fake target)".into(),
CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
),
))
});
let test = fake_test(Some(iterations), false).add_step(Box::new(step));
test.run().expect("failed to run test");
assert_eq!(runs.get(), iterations, "step wasn't run the expected number of times");
}
}