src/power/power-manager/src/cpu_manager.rs - fuchsia - Git at Google

 // Copyright 2021 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 use crate::error::PowerManagerError;
 use crate::message::{Message, MessageResult, MessageReturn};
 use crate::node::Node;
 use crate::types::{NormPerfs, PState, Watts};
 use anyhow::{bail, format_err, Error};
 use async_trait::async_trait;
 use fuchsia_inspect::{self as inspect, ArrayProperty as _, Property as _};
 use fuchsia_zircon::sys;
 use serde_derive::Deserialize;
 use serde_json as json;
 use std::cell::Cell;
 use std::collections::HashMap;
 use std::convert::TryInto as _;
 use std::fmt::Debug;
 use std::rc::Rc;

 /// Node: CpuManager
 ///
 /// Summary: Provides high-level management of all CPU domains in the system, coordinating both
 ///          driver- and kernel-level activity. Currently only administers CPU throttling, but in
 ///          the longer term is meant to become a standalone component that provides a FIDL
 ///          interface for managing DVFS.
 ///
 /// Handles Messages:
 ///     - SetMaxPowerConsumption
 ///
 /// Sends Messages:
 ///     - GetCpuLoads
 ///     - GetCpuPerformanceStates
 ///     - GetPerformanceState
 ///     - SetPerformanceState
 ///     - SetCpuPerformanceInfo
 ///
 /// FIDL dependencies: No direct dependencies

 // NOTE(fxbug.dev/85815): The thermal state configuration for Sherlock is generated using a process
 // described in this bug.
 // TODO(fxbug/dev/85813): Move this comment to the sherlock node config.

 #[derive(Clone, Copy, Debug)]
 struct Range<T: Clone + Copy + Debug> {
     upper: T,
     lower: T,
 }

 /// Describes a value that, if not known exactly, can be confined to a range.
 #[derive(Clone, Copy, Debug)]
 enum RangedValue<T: Clone + Copy + Debug> {
     Known(T),
     InRange(Range<T>),
 }

 // The `lower()` and `upper()` methods allow a Known value to be treated as a singleton range
 // without adding extra complexity at the call site.
 impl<T: Clone + Copy + Debug> RangedValue<T> {
     fn lower(&self) -> T {
         match self {
             &Self::Known(value) => value,
             &Self::InRange(range) => range.lower,
         }
     }

     fn upper(&self) -> T {
         match self {
             &Self::Known(value) => value,
             &Self::InRange(range) => range.upper,
         }
     }
 }

 /// Runtime representation of a CPU cluster.
 struct CpuCluster {
     /// Name of the cluster, for logging purposes only.
     name: String,

     /// This cluster's index in CpuManager's ordering of all clusters.
     cluster_index: usize,

     /// Handler that manages the corresponding CPU driver. Must respond to:
     ///  - GetPerformanceState
     ///  - SetPerformanceState
     ///  - GetCpuPerformanceStates
     handler: Rc<dyn Node>,

     /// Logical CPU numbers of the CPUs in this cluster.
     logical_cpu_numbers: Vec<u32>,

     /// Normalized performance of this cluster per GHz of CPU speed.
     performance_per_ghz: NormPerfs,

     /// All P-states supported by this cluster. The handler guarantees that they are sorted
     /// primairly by frequency and secondarily by voltage.
     pstates: Vec<PState>,

     /// Index of this cluster's current P-state. If an update to the P-state fails, we will assume
     /// that it is between the previous and desired states (inclusive), so that pessimistic guesses
     /// of the P-state may be used accordingly.
     // TODO(fxbug.dev/84685): Look into richer specification of failure modes in the CPU device
     // protocols.
     current_pstate: Cell<RangedValue<usize>>,
 }

 impl CpuCluster {
     /// Given fractional loads for all system CPUs, gives this cluster's corresponding load and its
     /// estimated normalized performance. If the current P-state is unknown, the highest-possible
     /// frequency will be used to ensure that performance (and thus contribution to thermals) is
     /// not underestimated.
     fn process_fractional_loads(&self, all_cpu_loads: &Vec<f32>) -> (f32, NormPerfs) {
         let cluster_load: f32 =
             self.logical_cpu_numbers.iter().map(|i| all_cpu_loads[*i as usize]).sum();

         // P-states are sorted with frequency as primary key, so the lowest-possible index has the
         // highest-possible frequency.
         let pstate_index = self.current_pstate.get().lower();
         let frequency = self.pstates[pstate_index].frequency;

         let performance =
             self.performance_per_ghz.mul_scalar(frequency.0 / 1e9 * cluster_load as f64);
         (cluster_load, performance)
     }

     /// Gets the performance capacity of the indicated P-state.
     fn get_performance_capacity(&self, pstate_index: usize) -> NormPerfs {
         let pstate = &self.pstates[pstate_index];
         let num_cores = self.logical_cpu_numbers.len() as f64;
         self.performance_per_ghz.mul_scalar(num_cores * pstate.frequency.0 / 1e9)
     }

     // Updates the kernel's CPU performance info to match the provided P-state.
     async fn update_kernel_performance_info(
         &self,
         syscall_handler: &Rc<dyn Node>,
         target_pstate: &PState,
     ) -> Result<(), PowerManagerError> {
         let performance_scale: sys::zx_cpu_performance_scale_t =
             self.performance_per_ghz.mul_scalar(target_pstate.frequency.0 / 1e9).try_into()?;

         let performance_info = self
             .logical_cpu_numbers
             .iter()
             .map(|n| sys::zx_cpu_performance_info_t { logical_cpu_number: *n, performance_scale })
             .collect::<Vec<_>>();

         let msg = Message::SetCpuPerformanceInfo(performance_info);
         match syscall_handler.handle_message(&msg).await {
             Ok(MessageReturn::SetCpuPerformanceInfo) => Ok(()),
             Ok(other) => panic!("Unexpected SetCpuPerformanceInfo result: {:?}", other),
             Err(e) => Err(e),
         }
     }

     // Carries out a P-state change for this cluster.
     async fn update_pstate(
         &self,
         syscall_handler: &Rc<dyn Node>,
         index: usize,
     ) -> Result<(), PowerManagerError> {
         fuchsia_trace::counter!(
             "power_manager",
             "CpuManager P-state",
             self.cluster_index as u64,
             &self.name => index as u32
         );

         // If the current P-state is known and equal to the new one, no update is needed.
         if let RangedValue::Known(current) = self.current_pstate.get() {
             if current == index {
                 return Ok(());
             }
         }

         // If the P-state is unknown, the lowest-possible frequency (highest-possible P-state index)
         // is what was used to inform the last `update_kernel_performance_info` call.
         let current_frequency = self.pstates[self.current_pstate.get().upper()].frequency;

         let target_pstate = &self.pstates[index];

         // If lowering frequency, we update the kernel before changing P-states. Otherwise, the
         // kernel will be updated after the change.
         let kernel_updated = if target_pstate.frequency < current_frequency {
             self.update_kernel_performance_info(&syscall_handler, target_pstate).await?;
             true
         } else {
             false
         };

         // If the current P-state is unknown or not equal to the new one, attempt an update.
         match self.handler.handle_message(&Message::SetPerformanceState(index as u32)).await {
             Ok(MessageReturn::SetPerformanceState) => {
                 self.current_pstate.set(RangedValue::Known(index));

                 if !kernel_updated {
                     self.update_kernel_performance_info(&syscall_handler, target_pstate).await?;
                 }
                 Ok(())
             }
             Ok(r) => {
                 // Programming error
                 panic!("Wrong response type for SetPerformanceState: {:?}", r);
             }
             Err(e) => {
                 log::error!("SetPerformanceState failed: {:?}", e);

                 // If the update failed, query the value, so at least the current state is known. If
                 // that fails, too, record a range of possible values based on the previous and
                 // desired values. Regardless, propagate the error from SetPerformanceState.
                 match self.handler.handle_message(&Message::GetPerformanceState).await {
                     Ok(MessageReturn::GetPerformanceState(i)) => {
                         self.current_pstate.set(RangedValue::Known(i as usize));
                     }
                     result => {
                         log::error!("Unexpected result from GetPerformanceState: {:?}", result);
                         let range = Range {
                             lower: std::cmp::min(self.current_pstate.get().lower(), index),
                             upper: std::cmp::max(self.current_pstate.get().upper(), index),
                         };
                         self.current_pstate.set(RangedValue::InRange(range));
                     }
                 }

                 // If we already updated the kernel, make a new update using the lowest-possible
                 // CPU frequency (highest-possbile P-state index) to provide a pessimistic estimate
                 // of CPU performance.
                 if kernel_updated {
                     let pstate = &self.pstates[self.current_pstate.get().upper()];
                     self.update_kernel_performance_info(&syscall_handler, pstate).await?;
                 }

                 Err(e)
             }
         }
     }
 }

 /// Cross-cluster CPU thermal state
 ///
 /// The power of a thermal state for a given performance in NormPerfs is modeled as
 //      power = static_power + dynamic_power_per_normperf * performance.
 #[derive(Clone, Debug)]
 struct ThermalState {
     /// Index of the P-state to be used for each CPU cluster.
     cluster_pstates: Vec<usize>,

     /// Minimum performance at which this thermal state will be used. At low performance values,
     /// it is common for different thermal states to have very similar power requirements. The
     /// minimum performance is used to force a particular choice between such states.
     min_performance: NormPerfs,

     /// Static (fixed) power draw of this thermal state.
     static_power: Watts,

     /// Power draw per unit of normalized performance, assuming that load is perfectly balanced
     /// across CPUs.
     ///
     /// If there is only one cluster, and it is modeled using the standard switching power model
     ///     switching_power = capacitance * voltage**2 * operation_rate,
     /// then dynamic_power_per_normperf would be
     ///     operation_rate * performance_per_ghz * capacitance * voltage**2.
     /// The multi-cluster case is somewhat more complicated, and we furthermore don't require use
     /// of the switching power model. But the voltage term captures a typical way that this value
     /// depends on the underlying P-states.
     dynamic_power_per_normperf: Watts,

     /// Maximum performance that this thermal state can provide, i.e. the performance that will be
     /// achieved when all CPUs are saturated. This value is derived directly from the P-states
     /// specified by this thermal state.
     ///
     /// The term "capacity" is used in agreement with the kernel scheduler.
     performance_capacity: NormPerfs,
 }

 struct PerfAndPower {
     performance: NormPerfs,
     power: Watts,
 }

 impl ThermalState {
     /// Estimates the performance and power of this thermal state for the given model of desired
     /// performance.
     fn estimate_perf_and_power(&self, performance_model: PerformanceModel) -> PerfAndPower {
         let performance = match performance_model {
             Saturated => self.performance_capacity,
             FixedValue(p) => NormPerfs::min(self.performance_capacity, p),
         };
         let dynamic_power = self.dynamic_power_per_normperf.mul_scalar(performance.0);

         PerfAndPower { performance, power: self.static_power + dynamic_power }
     }

     /// Like `estimate_perf_and_power`, but returns only the power estimate.
     fn estimate_power(&self, performance_model: PerformanceModel) -> Watts {
         self.estimate_perf_and_power(performance_model).power
     }
 }

 /// Validates that a list of thermal states are valid, meeting the criteria:
 ///  - State 0 has min_performance of 0.0;
 ///  - For any input `desired_performance`, the admissible states (states for which
 ///    `state.min_performance <= desired_performance`) are in order of strictly decreasing modeled
 ///    power.
 ///
 /// To address the power criterion, two further definitions are necessary:
 ///  - Power is modeled as
 ///
 ///    ```
 ///      power = static_power
 ///              + dynamic_power_per_normperf * min(desired_performance, performance_capacity).
 ///    ```
 ///
 ///  - Two states are *adjacent* at `desired_performance` if and only if they are both admissible at
 ///    `desired_performance`, and no state between them in the full list of states is admissible.
 ///
 /// Power is strictly decreasing if the following conditions are met:
 ///  - dynamic_power_per_normperf is non-increasing;
 ///  - performance_capacity is non-increasing;
 ///  - When any two states are adjacent, the lower-index state draws more power at the first
 ///    performance value at which they are adjacent.
 ///
 /// The first two conditions guarantee that the power delta between a lower-index state and a
 /// higher-index state is non-decreasing with respect to performance. The third condition implies
 /// that the power delta between any two adjacent states is initially positive; since the delta is
 /// non-decreasing by the first two conditions, it will continue to be positive for all subsequent
 /// performance values.
 fn validate_thermal_states(states: &Vec<ThermalState>) -> Result<(), Error> {
     anyhow::ensure!(
         states[0].min_performance == NormPerfs(0.0),
         "State 0 ({:?}) must have min_performance == 0.0.",
         states[0]
     );

     for i in 0..states.len() - 1 {
         let state = &states[i];
         let next_state = &states[i + 1];

         anyhow::ensure!(
             next_state.dynamic_power_per_normperf <= state.dynamic_power_per_normperf,
             "Thermal states' dynamic_power_per_normperf must be non-increasing; violated by \
             {:?} and {:?}",
             state,
             next_state
         );
         anyhow::ensure!(
             next_state.performance_capacity <= state.performance_capacity,
             "Thermal states' performance_capacity must be non-increasing; violated by {:?} and \
             {:?}",
             state,
             next_state
         );

         // Compare `state` to each state further down the list that will be adjacent to it at some
         // performance value. Verify that the adjacent state has lower power at the first shared
         // performance.
         for adjacent_state in &states[i + 1..] {
             let first_shared_performance =
                 FixedValue(NormPerfs::max(state.min_performance, adjacent_state.min_performance));
             let power = state.estimate_power(first_shared_performance);
             let adjacent_power = adjacent_state.estimate_power(first_shared_performance);

             anyhow::ensure!(
                 adjacent_power < power,
                 "Power is not strictly decreasing at performance {:?}; state {:?} consumes {:?}, \
                 while state {:?} consumes {:?}.",
                 first_shared_performance,
                 state,
                 power,
                 adjacent_state,
                 adjacent_power
             );

             if adjacent_state.min_performance <= state.min_performance {
                 // `adjacent_state` is always between `state` and any state later in the list.
                 break;
             }
         }
     }

     Ok(())
 }

 // Configuration structs for CpuManagerBuilder.
 #[derive(Clone, Deserialize)]
 struct ClusterConfig {
     name: String,
     cluster_index: usize,
     handler: String,
     logical_cpu_numbers: Vec<u32>,
     normperfs_per_ghz: f64,
 }

 #[derive(Clone, Deserialize)]
 struct ThermalStateConfig {
     cluster_pstates: Vec<usize>,
     min_performance_normperfs: f64,
     static_power_w: f64,
     dynamic_power_per_normperf_w: f64,
 }

 #[derive(Deserialize)]
 struct Config {
     clusters: Vec<ClusterConfig>,
     thermal_states: Vec<ThermalStateConfig>,
 }

 #[derive(Deserialize)]
 struct Dependencies {
     cpu_device_handlers: Vec<String>,
     cpu_stats_handler: String,
     syscall_handler: String,
 }

 #[derive(Deserialize)]
 struct JsonData {
     config: Config,
     dependencies: Dependencies,
 }

 /// Builder for `CpuManager`
 pub struct CpuManagerBuilder<'a> {
     cluster_configs: Vec<ClusterConfig>,

     /// Parallel to `cluster_configs`; contains one `CpuDeviceHandler` node (or equivalent) for each
     /// CPU cluster.
     cluster_handlers: Vec<Rc<dyn Node>>,

     thermal_state_configs: Vec<ThermalStateConfig>,
     syscall_handler: Rc<dyn Node>,
     cpu_stats_handler: Rc<dyn Node>,
     inspect_root: Option<&'a inspect::Node>,
 }

 impl<'a> CpuManagerBuilder<'a> {
     pub fn new_from_json(json_data: json::Value, nodes: &HashMap<String, Rc<dyn Node>>) -> Self {
         let data: JsonData = json::from_value(json_data).unwrap();
         assert_eq!(
             data.config.clusters.iter().map(|c| &c.handler).collect::<Vec<_>>(),
             data.dependencies.cpu_device_handlers.iter().collect::<Vec<_>>()
         );

         let cluster_handlers =
             data.config.clusters.iter().map(|c| nodes[&c.handler].clone()).collect();
         let cpu_stats_handler = nodes[&data.dependencies.cpu_stats_handler].clone();
         let syscall_handler = nodes[&data.dependencies.syscall_handler].clone();

         Self::new(
             data.config.clusters,
             cluster_handlers,
             data.config.thermal_states,
             syscall_handler,
             cpu_stats_handler,
         )
     }

     fn new(
         cluster_configs: Vec<ClusterConfig>,
         cluster_handlers: Vec<Rc<dyn Node>>,
         thermal_state_configs: Vec<ThermalStateConfig>,
         syscall_handler: Rc<dyn Node>,
         cpu_stats_handler: Rc<dyn Node>,
     ) -> Self {
         Self {
             cluster_configs,
             cluster_handlers,
             thermal_state_configs,
             cpu_stats_handler,
             syscall_handler,
             inspect_root: None,
         }
     }

     #[cfg(test)]
     pub fn with_inspect_root(mut self, root: &'a inspect::Node) -> Self {
         self.inspect_root = Some(root);
         self
     }

     pub async fn build(self) -> Result<Rc<CpuManager>, Error> {
         let mut clusters = Vec::new();
         for (cluster_config, handler) in
             self.cluster_configs.into_iter().zip(self.cluster_handlers.into_iter())
         {
             let pstates = match handler.handle_message(&Message::GetCpuPerformanceStates).await {
                 Ok(MessageReturn::GetCpuPerformanceStates(pstates)) => pstates,
                 Ok(r) => {
                     bail!("GetCpuPerformanceStates returned unexpected value: {:?}", r)
                 }
                 Err(e) => bail!("Error fetching performance states: {}", e),
             };

             // The current P-state will be set when CpuManager's thermal state is initialized below,
             // so initialize it to a range of all possible values for now.
             let pstate_range = Range { lower: 0, upper: pstates.len() - 1 };
             let current_pstate = Cell::new(RangedValue::InRange(pstate_range));

             clusters.push(CpuCluster {
                 name: cluster_config.name,
                 cluster_index: cluster_config.cluster_index,
                 handler,
                 logical_cpu_numbers: cluster_config.logical_cpu_numbers,
                 performance_per_ghz: NormPerfs(cluster_config.normperfs_per_ghz),
                 pstates,
                 current_pstate,
             });
         }

         let get_performance_capacity = |thermal_state_config: &ThermalStateConfig| {
             clusters
                 .iter()
                 .map(|cluster| {
                     let pstate_index = thermal_state_config.cluster_pstates[cluster.cluster_index];
                     cluster.get_performance_capacity(pstate_index)
                 })
                 .sum()
         };

         let thermal_states = self
             .thermal_state_configs
             .into_iter()
             .map(|t| {
                 let performance_capacity = get_performance_capacity(&t);
                 ThermalState {
                     cluster_pstates: t.cluster_pstates,
                     min_performance: NormPerfs(t.min_performance_normperfs),
                     static_power: Watts(t.static_power_w),
                     dynamic_power_per_normperf: Watts(t.dynamic_power_per_normperf_w),
                     performance_capacity,
                 }
             })
             .collect();

         validate_thermal_states(&thermal_states)?;

         // Optionally use the default inspect root node
         let inspect_root = self.inspect_root.unwrap_or(inspect::component::inspector().root());

         let cluster_names = clusters.iter().map(|c| c.name.as_str()).collect();
         let inspect_data = InspectData::new(inspect_root, "CpuManager", cluster_names);
         inspect_data.set_thermal_states(&thermal_states);

         // Retrieve the total number of CPUs, and ensure that clusters' logical CPU numbers exactly
         // span 0..num_cpus.
         let num_cpus = match self.syscall_handler.handle_message(&Message::GetNumCpus).await {
             Ok(MessageReturn::GetNumCpus(n)) => n,
             other => bail!("Unexpected GetNumCpus response: {:?}", other),
         };
         let mut covered_cpu_numbers = clusters
             .iter()
             .map(|c| c.logical_cpu_numbers.iter())
             .flatten()
             .map(|v| *v)
             .collect::<Vec<_>>();
         covered_cpu_numbers.sort();
         anyhow::ensure!(
             covered_cpu_numbers == (0..num_cpus).collect::<Vec<_>>(),
             "Clusters' logical CPU numbers must exactly span 0..{}",
             num_cpus
         );

         let cpu_manager = Rc::new(CpuManager {
             clusters,
             num_cpus,
             thermal_states,
             cpu_stats_handler: self.cpu_stats_handler,
             syscall_handler: self.syscall_handler,
             current_thermal_state: Cell::new(None),
             inspect: inspect_data,
         });

         // Update cluster P-states to match the highest power operating condition.
         cpu_manager.update_thermal_state(0).await?;

         Ok(cpu_manager)
     }
 }

 pub struct CpuManager {
     /// All CPU clusters governed by the `CpuManager`.
     clusters: Vec<CpuCluster>,

     /// Number of CPUs in the system; confirmed to be greater than the max logical CPU number of any
     /// cluster.
     num_cpus: u32,

     /// All supported thermal states for the CPU subsystem.
     thermal_states: Vec<ThermalState>,

     /// Must service GetNumCpus and SetCpuPerformanceInfo messages.
     syscall_handler: Rc<dyn Node>,

     /// The node that will provide CPU load information. It is expected that this node responds to
     /// the GetCpuLoads message.
     cpu_stats_handler: Rc<dyn Node>,

     /// The current thermal state of the CPU subsystem. The CPU will be put into its highest-power
     /// state on startup.
     current_thermal_state: Cell<Option<usize>>,

     inspect: InspectData,
 }

 /// A performance model for a future time interval.
 #[derive(Copy, Clone, Debug)]
 enum PerformanceModel {
     /// Models performance to be a fixed value.
     FixedValue(NormPerfs),

     /// Models the CPUs to be saturated regardless of frequency.
     Saturated,
 }
 use PerformanceModel::{FixedValue, Saturated};

 impl PerformanceModel {
     /// Indicates whether a fractional CPU load is considered saturated.
     fn cpu_load_is_saturated(load: f32) -> bool {
         debug_assert!(load <= 1.0);
         const CPU_SATURATION_FRACTION: f32 = 0.99;
         load > CPU_SATURATION_FRACTION
     }
 }

 impl CpuManager {
     // Returns a Vec of all CPU loads as fractional utilizations.
     async fn get_cpu_loads(&self) -> Result<Vec<f32>, Error> {
         fuchsia_trace::duration!("power_manager", "CpuManager::get_cpu_loads");

         // Get load for all CPUs in the system
         match self.send_message(&self.cpu_stats_handler, &Message::GetCpuLoads).await {
             Ok(MessageReturn::GetCpuLoads(loads)) => Ok(loads),
             Ok(r) => Err(format_err!("GetCpuLoads had unexpected return value: {:?}", r)),
             Err(e) => Err(format_err!("GetCpuLoads failed: {:?}", e)),
         }
     }

     // Determines the thermal state that should be used for the given available power and
     // performance model, as well as its estimated performance and power.
     fn select_thermal_state(
         &self,
         available_power: Watts,
         performance_model: PerformanceModel,
     ) -> (usize, PerfAndPower) {
         // State 0 is guaranteed to be admissible. If it meets the power criterion, we return it.
         // Otherwise, we use it to initialize the fallback -- the lowest-index admissible state,
         // which will be used if no states meet the power criterion.
         debug_assert_eq!(self.thermal_states[0].min_performance, NormPerfs(0.0));
         let mut fallback = (0, self.thermal_states[0].estimate_perf_and_power(performance_model));
         if fallback.1.power < available_power {
             return fallback;
         }

         for (i, thermal_state) in self.thermal_states.iter().enumerate().skip(1) {
             if let FixedValue(performance) = performance_model {
                 if thermal_state.min_performance > performance {
                     continue;
                 }
             }

             let estimate = thermal_state.estimate_perf_and_power(performance_model);
             if estimate.power < available_power {
                 return (i, estimate);
             }

             fallback = (i, estimate);
         }

         fallback
     }

     // Updates the current thermal state.
     async fn update_thermal_state(&self, index: usize) -> Result<(), PowerManagerError> {
         fuchsia_trace::duration!(
             "power_manager",
             "CpuManager::update_thermal_state",
             "index" => index as u32
         );

         // Return early if no update is required. We're assuming that P-states have not changed.
         if self.current_thermal_state.get() == Some(index) {
             return Ok(());
         }

         let pstate_indices = &self.thermal_states[index].cluster_pstates;
         let cluster_update_futures: Vec<_> = self
             .clusters
             .iter()
             .map(|cluster| {
                 cluster.update_pstate(&self.syscall_handler, pstate_indices[cluster.cluster_index])
             })
             .collect();

         // Aggregate any errors that may have occurred when setting P-states.
         let errors: Vec<_> = futures::future::join_all(cluster_update_futures)
             .await
             .into_iter()
             .filter_map(|r| r.err())
             .collect();

         // Update the thermal state index.
         if errors.is_empty() {
             self.current_thermal_state.set(Some(index));
             self.inspect.thermal_state_index.set(&index.to_string());
             Ok(())
         } else {
             self.current_thermal_state.set(None);

             let msg = format!("P-state update(s) failed: {:?}", errors);
             self.inspect.thermal_state_index.set(&format!("Unknown; {}", msg));
             Err(format_err!(msg).into())
         }
     }

     /// Handles a SetMaxPowerConsumption message. If an error is encountered in the execution of
     /// this method, CpuManager will keep itself in a usable state by using pessimistic estimates of
     /// any value that it cannot determine and then propagate the error to the caller.
     async fn handle_set_max_power_consumption(&self, available_power: Watts) -> MessageResult {
         fuchsia_trace::duration!(
             "power_manager",
             "CpuManager::handle_set_max_power_consumption",
             "available_power" => available_power.0
         );
         self.inspect.available_power.set(available_power.0);

         // Gather CPU loads over the last time interval. In the unlikely event of an error, use the
         // worst-case CPU load of 1.0 for all CPUs and throttle accordingly before propagating the
         // error.
         let (cpu_loads, load_query_error) = match self.get_cpu_loads().await {
             Ok(loads) => (loads, None),
             Err(e) => {
                 log::error!(
                     "Error querying CPU loads: {}\nWill throttle assuming maximal load.",
                     e
                 );
                 self.inspect.last_error.set(&e.to_string());
                 (vec![1.0; self.num_cpus as usize], Some(e))
             }
         };

         // Determine the normalized performance over the last interval.
         let mut last_performance = NormPerfs(0.0);
         for cluster in self.clusters.iter() {
             let (load, performance) = cluster.process_fractional_loads(&cpu_loads);
             last_performance += performance;

             fuchsia_trace::counter!(
                 "power_manager",
                 "CpuManager cluster_load",
                 cluster.cluster_index as u64,
                 &cluster.name => load as f64
             );
             self.inspect.last_cluster_loads[cluster.cluster_index].set(load as f64);
         }
         self.inspect.last_performance.set(last_performance.0);
         fuchsia_trace::counter!(
             "power_manager",
             "CpuManager last_performance",
             0,
             "value (NormPerfs)" => last_performance.0
         );

         let cpus_saturated = cpu_loads.iter().all(|l| PerformanceModel::cpu_load_is_saturated(*l));
         let performance_model =
             if cpus_saturated { Saturated } else { FixedValue(last_performance) };

         // Determine the next thermal state, updating if needed. We use the performance over the
         // last interval as an estimate of performance over the next interval; in principle a more
         // sophisticated estimate could be used.
         let (new_thermal_state_index, estimate) =
             self.select_thermal_state(available_power, performance_model);
         fuchsia_trace::counter!(
             "power_manager",
             "CpuManager new_thermal_state_index",
             0,
             "value" => new_thermal_state_index as u32
         );

         if let Err(e) = self.update_thermal_state(new_thermal_state_index).await {
             self.inspect.last_error.set(&e.to_string());
             return Err(e);
         }

         fuchsia_trace::counter!(
             "power_manager",
             "CpuManager projected_performance",
             0,
             "value (NormPerfs)" => estimate.performance.0
         );
         self.inspect.projected_performance.set(estimate.performance.0);

         fuchsia_trace::counter!(
             "power_manager",
             "CpuManager projected_power",
             0,
             "value (W)" => estimate.power.0
         );
         self.inspect.projected_power.set(estimate.power.0);

         // Bubble up any error that may have occurred while querying CPU load.
         match load_query_error {
             None => Ok(MessageReturn::SetMaxPowerConsumption(estimate.power)),
             Some(e) => Err(e.into()),
         }
     }
 }

 #[async_trait(?Send)]
 impl Node for CpuManager {
     fn name(&self) -> String {
         "CpuManager".to_string()
     }

     async fn handle_message(&self, msg: &Message) -> MessageResult {
         match msg {
             &Message::SetMaxPowerConsumption(p) => self.handle_set_max_power_consumption(p).await,
             _ => Err(PowerManagerError::Unsupported),
         }
     }
 }

 // TODO(fxbug.dev/84727): Determine whether it would be useful to track histories of any of these
 // signals.
 struct InspectData {
     root_node: inspect::Node,

     // Properties
     thermal_state_index: inspect::StringProperty,
     last_performance: inspect::DoubleProperty,
     last_cluster_loads: Vec<inspect::DoubleProperty>,
     available_power: inspect::DoubleProperty,
     projected_performance: inspect::DoubleProperty,
     projected_power: inspect::DoubleProperty,
     last_error: inspect::StringProperty,
 }

 impl InspectData {
     fn new(parent: &inspect::Node, node_name: &str, cluster_names: Vec<&str>) -> Self {
         // Create a local root node and properties
         let root_node = parent.create_child(node_name);

         let state_node = root_node.create_child("state");
         let thermal_state_index = state_node.create_string("thermal_state_index", "initializing");
         let last_performance = state_node.create_double("last_performance (NormPerfs)", 0.0);

         let mut last_cluster_loads = Vec::new();
         cluster_names.into_iter().for_each(|name| {
             state_node.record_child(name, |cluster_node| {
                 last_cluster_loads.push(cluster_node.create_double("last_load (#cores)", 0.0));
             })
         });

         let available_power = state_node.create_double("available_power (W)", 0.0);
         let projected_performance =
             state_node.create_double("projected_performance (NormPerfs)", 0.0);
         let projected_power = state_node.create_double("projected_power (W)", 0.0);

         let last_error = state_node.create_string("last_error", "<None>");

         root_node.record(state_node);

         Self {
             root_node,
             thermal_state_index,
             last_performance,
             last_cluster_loads,
             available_power,
             projected_performance,
             projected_power,
             last_error,
         }
     }

     fn set_thermal_states(&self, states: &Vec<ThermalState>) {
         let states_node = self.root_node.create_child("thermal_states");

         // Iterate over `states` in reverse order so that the Inspect nodes appear in the same
         // order as the vector (`create_child` inserts nodes at the head).
         for (i, state) in states.iter().enumerate().rev() {
             let node = states_node.create_child(format!("thermal_state_{:02}", i));

             let pstates = node.create_uint_array("cluster_pstates", state.cluster_pstates.len());
             state.cluster_pstates.iter().enumerate().for_each(|(i, p)| pstates.set(i, *p as u64));
             node.record(pstates);

             node.record_double("min_performance (NormPerfs)", state.min_performance.0);
             node.record_double("static_power (W)", state.static_power.0);
             node.record_double(
                 "dynamic_power_per_normperf (W)",
                 state.dynamic_power_per_normperf.0,
             );
             node.record_double("performance_capacity (NormPerfs)", state.performance_capacity.0);

             // Pass ownership of the new node to the parent.
             states_node.record(node);
         }

         // Pass ownership of the new `states_node` to the root node
         self.root_node.record(states_node);
     }
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::test::mock_node::{MessageMatcher, MockNode, MockNodeMaker};
     use crate::types::{Hertz, Volts};
     use crate::{msg_eq, msg_ok_return};
     use assert_matches::assert_matches;
     use fuchsia_async as fasync;
     use inspect::assert_data_tree;
     use test_util::assert_lt;

     // Common test configurations for big and little clusters.
     static BIG_CPU_NUMBERS: [u32; 2] = [0, 1];
     static BIG_PSTATES: [PState; 3] = [
         PState { frequency: Hertz(2.0e9), voltage: Volts(1.0) },
         PState { frequency: Hertz(1.9e9), voltage: Volts(0.9) },
         PState { frequency: Hertz(1.8e9), voltage: Volts(0.8) },
     ];
     static BIG_PERFORMANCE_PER_GHZ: NormPerfs = NormPerfs(1.0);

     static LITTLE_CPU_NUMBERS: [u32; 2] = [2, 3];
     static LITTLE_PSTATES: [PState; 3] = [
         PState { frequency: Hertz(1.0e9), voltage: Volts(0.5) },
         PState { frequency: Hertz(0.9e9), voltage: Volts(0.4) },
         PState { frequency: Hertz(0.8e9), voltage: Volts(0.3) },
     ];
     static LITTLE_PERFORMANCE_PER_GHZ: NormPerfs = NormPerfs(0.5);

     fn make_default_cluster_configs() -> Vec<ClusterConfig> {
         vec![
             ClusterConfig {
                 name: "big_cluster".to_string(),
                 cluster_index: 0,
                 handler: "<unused>".to_string(),
                 logical_cpu_numbers: BIG_CPU_NUMBERS[..].to_vec(),
                 normperfs_per_ghz: BIG_PERFORMANCE_PER_GHZ.0,
             },
             ClusterConfig {
                 name: "little_cluster".to_string(),
                 cluster_index: 1,
                 handler: "<unused>".to_string(),
                 logical_cpu_numbers: LITTLE_CPU_NUMBERS[..].to_vec(),
                 normperfs_per_ghz: LITTLE_PERFORMANCE_PER_GHZ.0,
             },
         ]
     }

     // Convenience struct to manage mocks of the handlers on which CpuManager depends.
     struct Handlers {
         big_cluster: Rc<MockNode>,
         little_cluster: Rc<MockNode>,
         syscall: Rc<MockNode>,
         cpu_stats: Rc<MockNode>,

         // The MockMaker comes last, so it is dropped after the MockNodes.
         _mock_maker: MockNodeMaker,
     }

     impl Handlers {
         fn new() -> Self {
             let mut mock_maker = MockNodeMaker::new();

             // The big and little cluster handlers are initially queried for all performance states.
             let big_cluster = mock_maker.make(
                 "big_cluster_handler",
                 vec![(
                     msg_eq!(GetCpuPerformanceStates),
                     msg_ok_return!(GetCpuPerformanceStates(Vec::from(&BIG_PSTATES[..]))),
                 )],
             );
             let little_cluster = mock_maker.make(
                 "little_cluster_handler",
                 vec![(
                     msg_eq!(GetCpuPerformanceStates),
                     msg_ok_return!(GetCpuPerformanceStates(Vec::from(&LITTLE_PSTATES[..]))),
                 )],
             );

             // The syscall handler provides the number of CPUs during initialization.
             let num_cpus = BIG_CPU_NUMBERS.len() + LITTLE_CPU_NUMBERS.len();
             let syscall = mock_maker.make(
                 "syscall_handler",
                 vec![(msg_eq!(GetNumCpus), msg_ok_return!(GetNumCpus(num_cpus as u32)))],
             );

             let cpu_stats = mock_maker.make("cpu_stats_handler", Vec::new());

             let handlers =
                 Self { big_cluster, little_cluster, syscall, cpu_stats, _mock_maker: mock_maker };

             // During initialization, CpuManager configures the highest-power thermal state, with
             // both clusters at their respective 0th P-states.
             handlers.expect_big_pstate(0);
             handlers.expect_little_pstate(0);

             handlers
         }

         // Tells the syscall handler to expect a SetCpuPerformanceInfo call for the provided
         // collection of CPUs and performance scale.
         fn expect_performance_scale(&self, logical_cpu_numbers: &[u32], float_scale: f64) {
             let scale = NormPerfs(float_scale).try_into().unwrap();
             let info = logical_cpu_numbers
                 .iter()
                 .map(|n| sys::zx_cpu_performance_info_t {
                     logical_cpu_number: *n,
                     performance_scale: scale,
                 })
                 .collect::<Vec<_>>();
             self.syscall.add_msg_response_pair((
                 msg_eq!(SetCpuPerformanceInfo(info)),
                 msg_ok_return!(SetCpuPerformanceInfo),
             ));
         }

         // Updates the handlers with expectations for a big cluster P-state change.
         fn expect_big_pstate(&self, pstate_index: u32) {
             self.big_cluster.add_msg_response_pair((
                 msg_eq!(SetPerformanceState(pstate_index)),
                 msg_ok_return!(SetPerformanceState),
             ));
             let frequency = &BIG_PSTATES[pstate_index as usize].frequency;
             let float_scale = BIG_PERFORMANCE_PER_GHZ.0 * frequency.0 / 1e9;
             self.expect_performance_scale(&BIG_CPU_NUMBERS, float_scale);
         }

         // Updates the handlers with expectations for a little cluster P-state change.
         fn expect_little_pstate(&self, pstate_index: u32) {
             self.little_cluster.add_msg_response_pair((
                 msg_eq!(SetPerformanceState(pstate_index)),
                 msg_ok_return!(SetPerformanceState),
             ));
             let frequency = &LITTLE_PSTATES[pstate_index as usize].frequency;
             let float_scale = LITTLE_PERFORMANCE_PER_GHZ.0 * frequency.0 / 1e9;
             self.expect_performance_scale(&LITTLE_CPU_NUMBERS, float_scale);
         }

         // Prepares the stats handler for a CPU load query.
         fn enqueue_cpu_loads(&self, loads: Vec<f32>) {
             self.cpu_stats
                 .add_msg_response_pair((msg_eq!(GetCpuLoads), msg_ok_return!(GetCpuLoads(loads))));
         }
     }

     // Verify that a node is successfully constructed from JSON configuration.
     #[fasync::run_singlethreaded(test)]
     async fn test_new_from_json() {
         let handlers = Handlers::new();

         let mut nodes = HashMap::<String, Rc<dyn Node>>::new();
         nodes.insert("big_cluster_node".to_string(), handlers.big_cluster.clone());
         nodes.insert("little_cluster_node".to_string(), handlers.little_cluster.clone());
         nodes.insert("syscall_handler_node".to_string(), handlers.syscall.clone());
         nodes.insert("stats_handler_node".to_string(), handlers.cpu_stats.clone());

         let json_data = json::json!({
             "type": "CpuManager",
             "name": "cpu_manager",
             "config": {
                 "clusters": [
                       {
                           "name": "big_cluster",
                           "cluster_index": 0,
                           "handler": "big_cluster_node",
                           "logical_cpu_numbers": [0, 1],
                           "normperfs_per_ghz": BIG_PERFORMANCE_PER_GHZ.0
                       },
                       {
                           "name": "little_cluster",
                           "cluster_index": 1,
                           "handler": "little_cluster_node",
                           "logical_cpu_numbers": [2, 3],
                           "normperfs_per_ghz": LITTLE_PERFORMANCE_PER_GHZ.0
                       }
                 ],
                 "thermal_states": [
                     {
                       "cluster_pstates": [0, 0],
                       "min_performance_normperfs": 0.0,
                       "static_power_w": 0.9,
                       "dynamic_power_per_normperf_w": 0.6
                     }
                 ]
             },
             "dependencies": {
                 "cpu_device_handlers": [
                     "big_cluster_node",
                     "little_cluster_node"
                 ],
                 "cpu_stats_handler": "stats_handler_node",
                 "syscall_handler": "syscall_handler_node"
             }
         });

         let builder = CpuManagerBuilder::new_from_json(json_data, &nodes);
         builder.build().await.unwrap();
     }

     // Verifies that thermal states are properly validated.
     #[fasync::run_singlethreaded(test)]
     async fn test_thermal_state_validation() {
         // Since CpuManagerBuilder::build() exits early, we need a custom constructor for Handlers
         // that omits expectations for messages that are never sent.
         impl Handlers {
             fn new_for_failed_validation() -> Self {
                 let mut mock_maker = MockNodeMaker::new();

                 // The big and little cluster handlers are initially queried for all performance
                 // states.
                 let big_cluster = mock_maker.make(
                     "big_cluster_handler",
                     vec![(
                         msg_eq!(GetCpuPerformanceStates),
                         msg_ok_return!(GetCpuPerformanceStates(Vec::from(&BIG_PSTATES[..]))),
                     )],
                 );
                 let little_cluster = mock_maker.make(
                     "little_cluster_handler",
                     vec![(
                         msg_eq!(GetCpuPerformanceStates),
                         msg_ok_return!(GetCpuPerformanceStates(Vec::from(&LITTLE_PSTATES[..]))),
                     )],
                 );

                 Self {
                     big_cluster,
                     little_cluster,
                     syscall: mock_maker.make("syscall_handler", Vec::new()),
                     cpu_stats: mock_maker.make("cpu_stats_handler", Vec::new()),
                     _mock_maker: mock_maker,
                 }
             }
         }

         let cluster_configs = make_default_cluster_configs();

         // Not allowed: Increasing dynamic power.
         let handlers = Handlers::new_for_failed_validation();
         let thermal_state_configs = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![0, 0],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![2, 2],
                 min_performance_normperfs: 0.0,
                 static_power_w: 1.5,
                 dynamic_power_per_normperf_w: 1.1,
             },
         ];
         let builder = CpuManagerBuilder::new(
             cluster_configs.clone(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_state_configs.clone(),
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         );
         assert!(builder.build().await.is_err());

         // Not allowed: Increasing performance capacity.
         let handlers = Handlers::new_for_failed_validation();
         let thermal_state_configs = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![2, 2],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![2, 1],
                 min_performance_normperfs: 0.0,
                 static_power_w: 1.5,
                 dynamic_power_per_normperf_w: 0.8,
             },
         ];
         let builder = CpuManagerBuilder::new(
             cluster_configs.clone(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_state_configs,
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         );
         assert!(builder.build().await.is_err());

         // Allowed: The second state has higher static power, but its power draw is less than the
         // first state at the first performance at which both states are admissible.
         let handlers = Handlers::new();
         let thermal_state_configs = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![0, 0],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![2, 2],
                 min_performance_normperfs: 1.0,
                 static_power_w: 2.1,
                 dynamic_power_per_normperf_w: 0.8,
             },
         ];
         let builder = CpuManagerBuilder::new(
             cluster_configs.clone(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_state_configs.clone(),
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         );
         builder.build().await.unwrap();

         // Not allowed: The second state has higher power draw at the first performance at which
         // both states are admissible.
         let handlers = Handlers::new_for_failed_validation();
         let thermal_state_configs = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![0, 0],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![2, 2],
                 min_performance_normperfs: 1.0,
                 static_power_w: 2.5,
                 dynamic_power_per_normperf_w: 0.8,
             },
         ];
         let builder = CpuManagerBuilder::new(
             cluster_configs.clone(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_state_configs.clone(),
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         );
         assert!(builder.build().await.is_err());
     }

     // Tests that CpuManagerBuilder requires that clusters are configured to exactly span the space
     // of all logical CPU numbers.
     #[fasync::run_singlethreaded(test)]
     async fn test_validate_all_cpus_spanned() {
         let mut mock_maker = MockNodeMaker::new();

         // The big and little cluster handlers are initially queried for all performance states.
         let big_cluster_handler = mock_maker.make(
             "big_cluster_handler",
             vec![(
                 msg_eq!(GetCpuPerformanceStates),
                 msg_ok_return!(GetCpuPerformanceStates(Vec::from(&BIG_PSTATES[..]))),
             )],
         );
         let little_cluster_handler = mock_maker.make(
             "little_cluster_handler",
             vec![(
                 msg_eq!(GetCpuPerformanceStates),
                 msg_ok_return!(GetCpuPerformanceStates(Vec::from(&LITTLE_PSTATES[..]))),
             )],
         );

         // Configure the syscall handler to report one more CPU than is spanned by the clusters.
         let num_cpus = BIG_CPU_NUMBERS.len() + LITTLE_CPU_NUMBERS.len() + 1;
         let syscall_handler = mock_maker.make(
             "syscall_handler",
             vec![(msg_eq!(GetNumCpus), msg_ok_return!(GetNumCpus(num_cpus as u32)))],
         );

         let cpu_stats_handler = mock_maker.make("cpu_stats_handler", Vec::new());

         let thermal_state_configs = vec![ThermalStateConfig {
             cluster_pstates: vec![0, 0],
             min_performance_normperfs: 0.0,
             static_power_w: 2.0,
             dynamic_power_per_normperf_w: 1.0,
         }];
         let builder = CpuManagerBuilder::new(
             make_default_cluster_configs(),
             vec![big_cluster_handler, little_cluster_handler],
             thermal_state_configs,
             syscall_handler,
             cpu_stats_handler,
         );
         assert!(builder.build().await.is_err());
     }

     // Verify that CpuManager responds as expected to SetMaxPowerConsumption messages.
     #[fasync::run_singlethreaded(test)]
     async fn test_set_max_power_consumption() {
         let handlers = Handlers::new();

         let thermal_state_configs = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![0, 0],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![0, 1],
                 min_performance_normperfs: 0.2,
                 static_power_w: 1.5,
                 dynamic_power_per_normperf_w: 0.8,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![1, 2],
                 min_performance_normperfs: 0.4,
                 static_power_w: 1.0,
                 dynamic_power_per_normperf_w: 0.6,
             },
         ];

         let node = CpuManagerBuilder::new(
             make_default_cluster_configs(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_state_configs,
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         )
         .build()
         .await
         .unwrap();

         // The current P-state is 0, so with 0.1 fractional utililzation per core, we have:
         //   Big cluster: 0.2 cores load -> 0.4GHz utilized -> 0.4 NormPerfs
         //   Little cluster: 0.2 cores load -> 0.2GHz utilized -> 0.1 NormPerfs
         // At thermal state 0, the projected power use at 0.5 NormPerfs is
         //   2W static + 0.5W dynamic = 2.5W
         // This is within the 3W budget, so there are no P-state changes.
         handlers.enqueue_cpu_loads(vec![0.1; 4]);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(3.0))).await;
         result.unwrap();

         // The current P-state is 0, so with 0.25 fractional utililzation per core, we have:
         //   Big cluster: 0.5 cores load -> 1GHz utilized -> 1 NormPerfs
         //   Little cluster: 0.5 cores load -> 0.5GHz utilized -> 0.25 NormPerfs
         // Projected power usage at 1.25 NormPerfs is:
         //   Thermal state 0: 2W static + 1.25 dynamic = 3.25W => over 3W budget
         //   Thermal state 1: 1.5W static + 1W dynamic = 2.5W => within 3W budget
         // So the new thermal state is 1, for which the little cluster changes to P-state 1.
         handlers.enqueue_cpu_loads(vec![0.25; 4]);
         handlers.expect_little_pstate(1);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(3.0))).await;
         result.unwrap();

         // CPU load stays the same, but the power budget drops to 2.4W, below allocation for thermal
         // state 1. This pushes us to thermal state 2, with big P-state 1 and little P-state 2.
         handlers.enqueue_cpu_loads(vec![0.25; 4]);
         handlers.expect_big_pstate(1);
         handlers.expect_little_pstate(2);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(2.4))).await;
         result.unwrap();

         // The power budget is 1.4W, which is below the static power for thermal state 1. However,
         // at 0.05 fractional utilization per core, the projected performance is 0.25 Perfs, which
         // makes thermal state 2 inadmissible. Thus, we fall back to thermal state 1.
         handlers.enqueue_cpu_loads(vec![0.05; 4]);
         handlers.expect_big_pstate(0);
         handlers.expect_little_pstate(1);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(1.4))).await;
         result.unwrap();

         // At 0.01 fractional utilization per core, the projected performance is 0.05 Perfs, so now
         // thermal state 1 is inadmissible. This drives us to thermal state 0.
         handlers.enqueue_cpu_loads(vec![0.01; 4]);
         handlers.expect_little_pstate(0);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(1.4))).await;
         result.unwrap();
     }

     // Verify that CPU saturation is handled as expected.
     #[fasync::run_singlethreaded(test)]
     async fn test_cpu_saturation() {
         let handlers = Handlers::new();

         let thermal_state_configs = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![0, 0],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![1, 1],
                 min_performance_normperfs: 4.5,
                 static_power_w: 1.5,
                 dynamic_power_per_normperf_w: 0.8,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![2, 2],
                 min_performance_normperfs: 0.0,
                 static_power_w: 1.0,
                 dynamic_power_per_normperf_w: 0.6,
             },
         ];

         let node = CpuManagerBuilder::new(
             make_default_cluster_configs(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_state_configs.clone(),
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         )
         .build()
         .await
         .unwrap();

         // Start with low power to force thermal state 2.
         handlers.enqueue_cpu_loads(vec![0.1; 4]);
         handlers.expect_big_pstate(2);
         handlers.expect_little_pstate(2);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(1.0))).await;
         result.unwrap();

         // Now saturate the CPUs. This corresponds to 4.4 NormPerfs.
         handlers.enqueue_cpu_loads(vec![1.0; 4]);

         // We choose a max power above state 1's max and below state 0's.
         let state = &node.thermal_states[1];
         let state_1_max_power = state.estimate_power(Saturated);
         let max_power = state_1_max_power + Watts(0.1);
         assert_lt!(max_power, node.thermal_states[0].estimate_power(Saturated));

         // Now we confirm that:
         //  - State 1 is selected, verifying that its min performance was disregarded due to CPU
         //    saturation.
         //  - The expected power consumption corresponds to the max power of state 1.
         handlers.expect_big_pstate(1);
         handlers.expect_little_pstate(1);
         match node.handle_message(&Message::SetMaxPowerConsumption(max_power)).await {
             Ok(MessageReturn::SetMaxPowerConsumption(p)) => {
                 assert_eq!(p, state_1_max_power);
             }
             other => panic!("Unexpected result: {:?}", other),
         }
     }

     // Verify that Inspect data is populated as expected.
     #[fasync::run_singlethreaded(test)]
     async fn test_inspect_data() {
         let handlers = Handlers::new();

         let thermal_states = vec![
             ThermalStateConfig {
                 cluster_pstates: vec![0, 0],
                 min_performance_normperfs: 0.0,
                 static_power_w: 2.0,
                 dynamic_power_per_normperf_w: 1.0,
             },
             ThermalStateConfig {
                 cluster_pstates: vec![1, 2],
                 min_performance_normperfs: 0.2,
                 static_power_w: 1.5,
                 dynamic_power_per_normperf_w: 0.8,
             },
         ];

         let inspector = inspect::Inspector::new();
         let builder = CpuManagerBuilder::new(
             make_default_cluster_configs(),
             vec![handlers.big_cluster.clone(), handlers.little_cluster.clone()],
             thermal_states,
             handlers.syscall.clone(),
             handlers.cpu_stats.clone(),
         )
         .with_inspect_root(inspector.root());
         let node = builder.build().await.unwrap();

         // The power budget of 1W exceeds the static power of thermal state 0, so we are pushed
         // to thermal state 1.
         handlers.enqueue_cpu_loads(vec![1.0; 4]);
         handlers.expect_big_pstate(1);
         handlers.expect_little_pstate(2);
         let result = node.handle_message(&Message::SetMaxPowerConsumption(Watts(1.0))).await;
         assert_matches!(result, Ok(_));

         let estimate = node.thermal_states[1].estimate_perf_and_power(Saturated);

         assert_data_tree!(
             inspector,
             root: {
                 "CpuManager": {
                     "state": {
                         "thermal_state_index": "1",
                         "last_performance (NormPerfs)": 5.0,
                         "big_cluster": {
                             "last_load (#cores)": 2.0,
                         },
                         "little_cluster": {
                             "last_load (#cores)": 2.0,
                         },
                         "available_power (W)": 1.0,
                         "projected_performance (NormPerfs)": estimate.performance.0,
                         "projected_power (W)": estimate.power.0,
                         "last_error": "<None>",
                     },
                     "thermal_states": {
                         "thermal_state_00": {
                             "cluster_pstates": vec![0u64, 0],
                             "min_performance (NormPerfs)": 0.0,
                             "static_power (W)": 2.0,
                             "dynamic_power_per_normperf (W)": 1.0,
                             "performance_capacity (NormPerfs)": 5.0,
                         },
                         "thermal_state_01": {
                             "cluster_pstates": vec![1u64, 2],
                             "min_performance (NormPerfs)": 0.2,
                             "static_power (W)": 1.5,
                             "dynamic_power_per_normperf (W)": 0.8,
                             "performance_capacity (NormPerfs)": 4.6,
                         },
                     }
                 }
             }
         );
     }
 }