blob: c547a82f58eaed46ff96bd65eef8065e3a29228f [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! library for host-side of filesystem integrity host-target interaction tests.
#![deny(missing_docs)]
use rand::random;
use std::fmt;
use std::path::PathBuf;
use std::process::{ExitStatus, Output};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Duration;
use thiserror::Error;
pub mod steps;
use ffx_config::global_env_context;
pub use steps::RebootType;
use steps::{LoadStep, RebootStep, SetupStep, TestStep, VerifyStep};
pub mod integration;
const BLACKOUT_DEVICE_LABEL: &'static str = "blackout";
fn box_message(message: String) -> String {
let line_len = message.len() + 2;
let line: String = std::iter::repeat('━').take(line_len).collect();
format!(
"┏{line}┓
{message}
┗{line}┛",
message = message,
line = line
)
}
/// An error occurred running a command on the target system. Contains the exit status, stdout, and
/// stderr of the command.
#[derive(Debug, Error)]
#[error(
"failed to run command: {}\n\
stdout:\n\
{}\n\
stderr:\n\
{}",
_0,
_1,
_2
)]
pub struct CommandError(ExitStatus, String, String);
impl From<Output> for CommandError {
/// Convert the std::process::Output of a command to an error. Mostly takes care of converting
/// the stdout and stderr into strings from Vec<u8>.
fn from(out: Output) -> Self {
let stdout = String::from_utf8(out.stdout).expect("stdout not utf8");
let stderr = String::from_utf8(out.stderr).expect("stderr not utf8");
CommandError(out.status, stdout, stderr)
}
}
impl From<ffx_isolate::CommandOutput> for CommandError {
fn from(out: ffx_isolate::CommandOutput) -> Self {
CommandError(out.status, out.stdout, out.stderr)
}
}
/// An error occurred while attempting to reboot the system.
#[derive(Debug, Error)]
pub enum RebootError {
/// The path to the relay device required for hard-rebooting the target doesn't exist.
#[error("device does not exist: {:?}", _0)]
MissingDevice(PathBuf),
/// An io error occurred during rebooting. Maybe we failed to write to the device.
#[error("io error: {:?}", _0)]
IoError(#[from] std::io::Error),
/// The command we executed on the target failed.
#[error("command error: {:?}", _0)]
Command(#[from] CommandError),
}
/// Error used for the host-side of the blackout library.
#[derive(Debug, Error)]
pub enum BlackoutError {
/// Something went wrong!
#[error("error: {}", _0)]
AnyhowError(#[from] anyhow::Error),
/// We got an error when trying to reboot.
#[error("failed to reboot: {:?}", _0)]
Reboot(#[from] RebootError),
/// We failed to run the command on the host. Specifically, when the spawn or something fails,
/// not when the command itself returns a non-zero exit code.
#[error("host command failed: {:?}", _0)]
HostCommand(#[from] std::io::Error),
/// Timed out during target discovery.
#[error("no targets found after 5s: {:?}", _0)]
TargetDiscoveryTimeout(CommandError),
/// We got an error from the ffx command.
#[error("failed to run an ffx command: {:?}", _0)]
FfxError(CommandError),
/// A failure in the setup step
#[error("failed to setup test: {:?}", _0)]
SetupError(CommandError),
/// Specifically the verification step failed. This indicates an actual test failure as opposed
/// to a failure of the test framework or environmental failure.
#[error("verification failed: {:?}", _0)]
Verification(CommandError),
}
/// Blackout is a power-failure testing framework for the filesystems. This host-side harness runs
/// operations on the configured target device for generating load on the filesystem, then reboots
/// the device after a certain amount of time using a configured reboot mechanism. By default, it
/// runs one iteration of this test. Options are provided for running the test until failure or
/// running the test N times and collecting failure statistics.
#[derive(Clone)]
pub struct CommonOpts {
/// The optional label for the partition to run the test on. If non is provided, a default will
/// be used.
pub device_label: Option<String>,
/// The optional path to the block device on the target device to use for testing. If none is
/// provided, the test will find an appropriate device. WARNING: the test can (and likely
/// will!) format this device. Don't use a main system partition!
pub device_path: Option<String>,
/// [Optional] A seed to use for all random operations. Tests are NOT deterministic relative to
/// the provided seed. The operations will be identical, but because of the non-deterministic
/// timing-dependent nature of the tests, the exact time the reboot is triggered in relation to
/// the operations is not guaranteed.
///
/// One will be randomly generated if not provided. When performing the same test multiple times
/// in one run, a new seed will be generated for each run if one was not provided.
pub seed: Option<u64>,
/// Reboot type. There are three options
/// 1. Soft reboot - we reboot the system using ffx target reboot
/// 2. Hard reboot with a serial power relay - we reboot the system by writing bytes to a
/// serial device that we assume is a power relay. Includes a path to the power relay. Probably
/// the highest-numbered /dev/ttyUSB[N]. If in doubt, try removing it and seeing what
/// disappears from /dev.
/// 3. Hard reboot with the infra dmc command - we reboot the system by calling the dmc binary
/// provided by infra. This command cycles the power for us using some kind of http accessible
/// power strip, but the details are abstracted behind the set-power-state command.
pub reboot: RebootType,
/// Run the test N number of times, collecting statistics on the number of failures.
pub iterations: Option<u64>,
/// Run the test until a verification failure is detected, then exit.
pub run_until_failure: bool,
}
/// the seed for a run of the test.
#[derive(Clone, Debug)]
pub enum Seed {
/// the seed is constant over multiple runs of the test.
Constant(u64),
/// the seed is a random value for every run of the test, generated by `random`.
Variable(Arc<AtomicU64>),
}
impl Seed {
fn new(maybe_seed: Option<u64>) -> Seed {
match maybe_seed {
Some(seed) => Seed::Constant(seed),
None => Seed::Variable(Arc::new(AtomicU64::new(random()))),
}
}
fn reroll(&self) {
match self {
Seed::Constant(_) => (),
Seed::Variable(seed) => seed.store(random(), Ordering::Relaxed),
}
}
fn get(&self) -> u64 {
match self {
Seed::Constant(seed) => *seed,
Seed::Variable(seed) => seed.load(Ordering::Relaxed),
}
}
}
impl fmt::Display for Seed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Seed::Constant(seed) => write!(f, "{}", seed),
Seed::Variable(seed) => {
write!(f, "{}", seed.load(Ordering::Relaxed))
}
}
}
}
#[derive(Clone, Debug)]
enum RunMode {
Once,
Iterations(u64),
IterationsUntilFailure(u64),
}
/// Test definition. This contains all the information to make a test reproducible in a particular
/// environment, and allows host binaries to configure the steps taken by the test.
struct Test {
package: String,
component: String,
seed: Seed,
device_label: String,
device_path: Option<String>,
reboot_type: RebootType,
run_mode: RunMode,
steps: Vec<Box<dyn TestStep>>,
}
impl fmt::Display for Test {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Test {{
package: {:?},
component: {:?},
seed: {:?},
device_label: {:?},
device_path: {:?},
reboot_type: {:?},
run_mode: {:?},
}}",
self.package,
self.component,
self.seed,
self.device_label,
self.device_path,
self.reboot_type,
self.run_mode,
)
}
}
impl Test {
/// Create a new test with the provided name. The name needs to match the associated package
/// that will be present on the target device. The package should be callable with `run` from
/// the command line.
pub fn new_component(package: &'static str, component: &'static str, opts: CommonOpts) -> Test {
Test {
package: package.to_string(),
component: component.to_string(),
seed: Seed::new(opts.seed),
device_label: opts.device_label.unwrap_or(BLACKOUT_DEVICE_LABEL.to_string()),
device_path: opts.device_path,
reboot_type: opts.reboot,
run_mode: match (opts.iterations, opts.run_until_failure) {
(None, false) => RunMode::Once,
(None, true) => panic!("run until failure requires multiple iterations"),
(Some(iterations), false) => RunMode::Iterations(iterations),
(Some(iterations), true) => RunMode::IterationsUntilFailure(iterations),
},
steps: Vec::new(),
}
}
/// Add a custom test step implementation.
pub fn add_step(&mut self, step: Box<dyn TestStep>) -> &mut Self {
self.steps.push(step);
self
}
/// Run the defined test steps. Prints the test definition before execution. Attempts to re-roll
/// the random seed before the test run.
async fn run_test(&self) -> Result<(), BlackoutError> {
self.seed.reroll();
println!("{}", self);
for step in &self.steps {
step.execute().await?;
}
Ok(())
}
async fn run_iterations(&self, iterations: u64) -> Result<(), BlackoutError> {
let mut failures = 0u64;
let mut flukes = 0u64;
for runs in 1..iterations + 1 {
println!("{}", box_message(format!("test run #{}", runs)));
match self.run_test().await {
Ok(()) => (),
Err(BlackoutError::Verification(_)) => failures += 1,
Err(_) => flukes += 1,
}
println!("runs: {}", runs);
println!("failures: {}", failures);
println!("non-verification errors: {}", flukes);
println!(
"failure percentage: {:.2}%",
(failures as f64 / (runs - flukes) as f64) * 100.0
);
}
Ok(())
}
async fn run_iterations_until_failure(&self, iterations: u64) -> Result<(), BlackoutError> {
for runs in 1..iterations + 1 {
println!("{}", box_message(format!("test run #{}", runs)));
match self.run_test().await {
Ok(()) => (),
Err(e @ BlackoutError::Verification(_)) => return Err(e),
Err(_) => (),
}
}
Ok(())
}
/// Run the provided test. The test is provided as a function that takes a clone of the command
/// line options and produces a test to run. This
/// There are essentially 4 possible types of test runs that we may be doing here.
/// 1. one iteration (iterations == None && run_until_failure == false)
/// this is the simplist case. run one iteration of the test by constructing the test and
/// calling test.run(), returning the result.
/// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
/// essentially the InfiniteExecutor code, except instead of an infinite loop we use a
/// bounded one.
/// 3. run until verification failure (iterations == None && run_until_failure == true)
/// run tests until an Error::Verification is returned. keep track of the number of runs,
/// but there is no need to tabulate other errors.
/// 4. run until verification failure, except with a max number of iterations
/// (iterations == Some(N) && run_until_failure == true)
/// if both flags are present, we combine the functionality. only run a certain number of
/// iterations, but quit early if there is a failure instead of aggregating the results.
pub async fn run(self) -> Result<(), BlackoutError> {
match self.run_mode {
RunMode::Once => self.run_test().await,
RunMode::Iterations(iterations) => self.run_iterations(iterations).await,
RunMode::IterationsUntilFailure(iterations) => {
self.run_iterations_until_failure(iterations).await
}
}
}
}
/// A test environment. This wraps a test configuration with the environmental context to run it,
/// such as the isolated ffx instance.
pub struct TestEnv {
test: Test,
isolated_ffx: Arc<ffx_isolate::Isolate>,
}
impl TestEnv {
/// Create a new test with the provided name. The name needs to match the associated package
/// that will be present on the target device. The package should be callable with `ffx
/// component run` from the command line.
///
/// At this point, the test environment will also perform target discovery, selecting either a
/// node specified by $FUCHSIA_NODENAME or whatever the first target that gets enumerated by
/// ffx is.
///
/// If either creating the isolated ffx instance or the target discovery fails, this function
/// will panic.
pub async fn new(package: &'static str, component: &'static str, opts: CommonOpts) -> TestEnv {
let context = global_env_context().expect("No global context");
let ssh_key = context.get::<String, _>("ssh.priv").expect("could not get ssh key").into();
let isolate = Arc::new(
ffx_isolate::Isolate::new_with_sdk("blackout-ffx", ssh_key, &context)
.await
.expect("failed to make new isolated ffx"),
);
TestEnv { test: Test::new_component(package, component, opts), isolated_ffx: isolate }
}
/// Add a test step for setting up the filesystem in the way we want it for the test. This
/// executes the `setup` subcommand on the target binary and waits for completion, checking the
/// result.
pub fn setup_step(&mut self) -> &mut Self {
self.test.add_step(Box::new(SetupStep::new(
self.isolated_ffx.clone(),
&self.test.package,
&self.test.component,
self.test.seed.clone(),
&self.test.device_label,
self.test.device_path.clone(),
)));
self
}
/// Add a test step for generating load on the device using the `test` subcommand on the target
/// binary. This load doesn't terminate. After `duration`, it checks to make sure the command is
/// still running, then return.
pub fn load_step(&mut self, duration: Option<Duration>) -> &mut Self {
self.test.add_step(Box::new(LoadStep::new(
self.isolated_ffx.clone(),
&self.test.package,
&self.test.component,
self.test.seed.clone(),
&self.test.device_label,
self.test.device_path.clone(),
duration,
)));
self
}
/// Add a reboot step. This reboots the target machine using the configured reboot mechanism.
pub fn reboot_step(&mut self, bootserver: bool) -> &mut Self {
self.test.add_step(Box::new(RebootStep::new(
self.isolated_ffx.clone(),
&self.test.reboot_type,
bootserver,
)));
self
}
/// Add a verify step. This runs the `verify` subcommand on the target binary, waiting for
/// completion, and checks the result. The verification is done in a retry loop, attempting to
/// run the verification command `num_retries` times, sleeping for `retry_timeout` duration
/// between each attempt.
pub fn verify_step(&mut self, num_retries: u32, retry_timeout: Duration) -> &mut Self {
self.test.add_step(Box::new(VerifyStep::new(
self.isolated_ffx.clone(),
&self.test.package,
&self.test.component,
self.test.seed.clone(),
&self.test.device_label,
self.test.device_path.clone(),
num_retries,
retry_timeout,
)));
self
}
/// Run the provided test. The test is provided as a function that takes a clone of the command
/// line options and produces a test to run. This
/// There are essentially 4 possible types of test runs that we may be doing here.
/// 1. one iteration (iterations == None && run_until_failure == false)
/// this is the simplest case. run one iteration of the test by constructing the test and
/// calling test.run(), returning the result.
/// 2. some number of iterations (iterations == Some(N) && run_until_failure == false)
/// essentially the InfiniteExecutor code, except instead of an infinite loop we use a
/// bounded one.
/// 3. run until verification failure (iterations == None && run_until_failure == true)
/// run tests until an Error::Verification is returned. keep track of the number of runs,
/// but there is no need to tabulate other errors.
/// 4. run until verification failure, except with a max number of iterations
/// (iterations == Some(N) && run_until_failure == true)
/// if both flags are present, we combine the functionality. only run a certain number of
/// iterations, but quit early if there is a failure instead of aggregating the results.
pub async fn run(self) -> Result<(), BlackoutError> {
self.test.run().await
}
}
#[cfg(test)]
mod tests {
use super::{
BlackoutError as Error, BlackoutError, CommandError, CommonOpts, RebootType, Test, TestStep,
};
use async_trait::async_trait;
use fuchsia_async as fasync;
use std::os::unix::process::ExitStatusExt;
use std::process::ExitStatus;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::Arc;
struct FakeStep<F>
where
F: Fn(u64) -> Result<(), Error> + Send + Sync,
{
res: F,
runs: Arc<AtomicU64>,
}
impl<F> FakeStep<F>
where
F: Fn(u64) -> Result<(), Error> + Send + Sync,
{
fn new(res: F) -> (FakeStep<F>, Arc<AtomicU64>) {
let runs = Arc::new(AtomicU64::new(0));
(FakeStep { res, runs: runs.clone() }, runs)
}
}
#[async_trait]
impl<F> TestStep for FakeStep<F>
where
F: Fn(u64) -> Result<(), Error> + Send + Sync,
{
async fn execute(&self) -> Result<(), BlackoutError> {
self.runs.fetch_add(1, Ordering::Relaxed);
(self.res)(self.runs.load(Ordering::Relaxed))
}
}
fn fake_test(iterations: Option<u64>, run_until_failure: bool) -> Test {
let opts = CommonOpts {
device_label: None,
device_path: None,
seed: None,
reboot: RebootType::Software,
iterations,
run_until_failure,
};
Test::new_component("fake_package", "fake_component", opts)
}
#[fasync::run_singlethreaded(test)]
async fn run_once_executes_steps_once() {
// using the run once mode, we execute all our test steps once, in order. run once is how
// all the other run modes execute their test steps, so if this is solid then we just test
// iterations and exit modes for the rest.
let step1_exec1 = Arc::new(AtomicBool::new(false));
let step1_exec2 = step1_exec1.clone();
let step1_func = move |_| {
assert!(!step1_exec1.load(Ordering::Relaxed), "step one already executed");
step1_exec1.store(true, Ordering::Relaxed);
Ok(())
};
let step2_func = move |_| {
assert!(step1_exec2.load(Ordering::Relaxed), "step two executed before step one");
Ok(())
};
let (step1, step1_runs) = FakeStep::new(step1_func);
let (step2, step2_runs) = FakeStep::new(step2_func);
let mut test = fake_test(None, false);
test.add_step(Box::new(step1)).add_step(Box::new(step2));
test.run().await.expect("failed to run test");
assert_eq!(step1_runs.load(Ordering::Relaxed), 1);
assert_eq!(step2_runs.load(Ordering::Relaxed), 1);
}
#[fasync::run_singlethreaded(test)]
async fn run_once_exits_on_failure() {
let (step1, step1_runs) = FakeStep::new(|_| Ok(()));
let (step2, step2_runs) = FakeStep::new(|_| {
Err(BlackoutError::Verification(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
});
let (step3, step3_runs) = FakeStep::new(|_| panic!("step 3 should never be run"));
let mut test = fake_test(None, false);
test.add_step(Box::new(step1)).add_step(Box::new(step2)).add_step(Box::new(step3));
match test.run().await {
Err(BlackoutError::Verification(..)) => (),
Ok(..) => panic!("test returned Ok on an error"),
Err(..) => panic!("test returned an unexpected error"),
}
assert_eq!(step1_runs.load(Ordering::Relaxed), 1);
assert_eq!(step2_runs.load(Ordering::Relaxed), 1);
assert_eq!(step3_runs.load(Ordering::Relaxed), 0);
}
#[fasync::run_singlethreaded(test)]
async fn run_n_executes_steps_n_times() {
// using the iterations mode, we execute all our test steps `iterations` number of times. we
// expect that no matter the error value returned from the test, we will execute the
// expected number of times.
let iterations = 10;
let (step, runs) = FakeStep::new(|_| Ok(()));
let mut test = fake_test(Some(iterations), false);
test.add_step(Box::new(step));
test.run().await.expect("failed to run test");
assert_eq!(
runs.load(Ordering::Relaxed),
iterations,
"step wasn't run the expected number of times"
);
}
#[fasync::run_singlethreaded(test)]
async fn run_n_executes_steps_n_times_verify_failure() {
let iterations = 10;
let (step, runs) = FakeStep::new(|_| {
Err(BlackoutError::Verification(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
});
let mut test = fake_test(Some(iterations), false);
test.add_step(Box::new(step));
test.run().await.expect("failed to run test");
assert_eq!(
runs.load(Ordering::Relaxed),
iterations,
"step wasn't run the expected number of times"
);
}
#[fasync::run_singlethreaded(test)]
async fn run_n_executes_steps_n_times_other_error() {
let iterations = 10;
let (step, runs) = FakeStep::new(|_| {
Err(BlackoutError::FfxError(CommandError(
ExitStatus::from_raw(1),
"(fake stdout)".into(),
"(fake stderr)".into(),
)))
});
let mut test = fake_test(Some(iterations), false);
test.add_step(Box::new(step));
test.run().await.expect("failed to run test");
assert_eq!(
runs.load(Ordering::Relaxed),
iterations,
"step wasn't run the expected number of times"
);
}
}