blob: 7816b8053706383f0fdb367013a7b57f4e738c9b [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use crate::log_if_err;
use crate::message::{Message, MessageReturn};
use crate::node::Node;
use crate::platform_metrics::PlatformMetric;
use crate::shutdown_request::{RebootReason, ShutdownRequest};
use crate::temperature_handler::TemperatureFilter;
use crate::types::{Celsius, Nanoseconds, Seconds, ThermalLoad, Watts};
use crate::utils::get_current_timestamp;
use anyhow::{format_err, Error};
use async_trait::async_trait;
use fuchsia_async as fasync;
use fuchsia_inspect::{self as inspect, Property};
use futures::{
future::{FutureExt, LocalBoxFuture},
stream::FuturesUnordered,
StreamExt,
};
use log::*;
use serde_derive::Deserialize;
use serde_json as json;
use std::cell::Cell;
use std::collections::HashMap;
use std::rc::Rc;
/// Node: ThermalPolicy
///
/// Summary: Implements the closed loop thermal control policy for the system
///
/// Handles Messages: N/A
///
/// Sends Messages:
/// - ReadTemperature
/// - SetMaxPowerConsumption
/// - SystemShutdown
/// - UpdateThermalLoad
/// - LogPlatformMetric
/// - GetDriverPath
///
/// FIDL dependencies: N/A
pub struct ThermalPolicyBuilder<'a> {
config: ThermalConfig,
inspect_root: Option<&'a inspect::Node>,
}
impl<'a> ThermalPolicyBuilder<'a> {
pub fn new(config: ThermalConfig) -> Self {
Self { config, inspect_root: None }
}
pub fn new_from_json(json_data: json::Value, nodes: &HashMap<String, Rc<dyn Node>>) -> Self {
#[derive(Deserialize)]
struct ControllerConfig {
sample_interval: f64,
filter_time_constant: f64,
target_temperature: f64,
e_integral_min: f64,
e_integral_max: f64,
sustainable_power: f64,
proportional_gain: f64,
integral_gain: f64,
}
#[derive(Deserialize)]
struct NodeConfig {
thermal_shutdown_temperature: f64,
controller_params: ControllerConfig,
}
#[derive(Deserialize)]
struct Dependencies {
cpu_control_nodes: Vec<String>,
system_power_handler_node: String,
temperature_handler_node: String,
thermal_load_notify_nodes: Vec<String>,
platform_metrics_node: String,
}
#[derive(Deserialize)]
struct JsonData {
config: NodeConfig,
dependencies: Dependencies,
}
let data: JsonData = json::from_value(json_data).unwrap();
let thermal_config = ThermalConfig {
temperature_node: nodes[&data.dependencies.temperature_handler_node].clone(),
cpu_control_nodes: data
.dependencies
.cpu_control_nodes
.iter()
.map(|node| nodes[node].clone())
.collect(),
sys_pwr_handler: nodes[&data.dependencies.system_power_handler_node].clone(),
thermal_load_notify_nodes: data
.dependencies
.thermal_load_notify_nodes
.iter()
.map(|node_name| nodes[node_name].clone())
.collect(),
platform_metrics_node: nodes[&data.dependencies.platform_metrics_node].clone(),
policy_params: ThermalPolicyParams {
controller_params: ThermalControllerParams {
sample_interval: Seconds(data.config.controller_params.sample_interval),
filter_time_constant: Seconds(
data.config.controller_params.filter_time_constant,
),
target_temperature: Celsius(data.config.controller_params.target_temperature),
e_integral_min: data.config.controller_params.e_integral_min,
e_integral_max: data.config.controller_params.e_integral_max,
sustainable_power: Watts(data.config.controller_params.sustainable_power),
proportional_gain: data.config.controller_params.proportional_gain,
integral_gain: data.config.controller_params.integral_gain,
},
thermal_shutdown_temperature: Celsius(data.config.thermal_shutdown_temperature),
},
};
Self::new(thermal_config)
}
#[cfg(test)]
fn with_inspect_root(mut self, root: &'a inspect::Node) -> Self {
self.inspect_root = Some(root);
self
}
pub async fn build<'b>(
self,
futures_out: &FuturesUnordered<LocalBoxFuture<'b, ()>>,
) -> Result<Rc<ThermalPolicy>, Error> {
// Create default values
let inspect_root = self.inspect_root.unwrap_or(inspect::component::inspector().root());
// Query the TemperatureHandler node for its associated driver path
let driver_path = query_driver_path(&self.config.temperature_node).await;
let node = Rc::new(ThermalPolicy {
driver_path,
state: ThermalState {
temperature_filter: TemperatureFilter::new(
self.config.temperature_node.clone(),
self.config.policy_params.controller_params.filter_time_constant,
),
prev_timestamp: Cell::new(Nanoseconds(0)),
max_time_delta: Cell::new(Seconds(0.0)),
error_integral: Cell::new(0.0),
},
inspect: InspectData::new(inspect_root, "ThermalPolicy".to_string(), &self.config),
config: self.config,
});
futures_out.push(node.clone().periodic_thermal_loop());
Ok(node)
}
}
/// Queries the provided `temperature_handler` node for their associated driver path. If any error
/// or unexpected response is encountered then an empty string is returned.
async fn query_driver_path(temperature_handler: &Rc<dyn Node>) -> String {
match temperature_handler.handle_message(&Message::GetDriverPath).await {
Ok(MessageReturn::GetDriverPath(path)) => path,
_ => String::new(),
}
}
pub struct ThermalPolicy {
/// Topological path to the temperature driver that we're bound to.
driver_path: String,
config: ThermalConfig,
state: ThermalState,
/// A struct for managing Component Inspection data.
inspect: InspectData,
}
/// A struct to store all configurable aspects of the ThermalPolicy node
pub struct ThermalConfig {
/// The node to provide temperature readings for the thermal control loop. It is expected that
/// this node responds to the ReadTemperature message.
pub temperature_node: Rc<dyn Node>,
/// The nodes used to impose limits on CPU power state. There will be one node for each CPU
/// power domain (e.g., big.LITTLE). It is expected that these nodes respond to the
/// SetMaxPowerConsumption message.
pub cpu_control_nodes: Vec<Rc<dyn Node>>,
/// The node to handle system power state changes (e.g., shutdown). It is expected that this
/// node responds to the SystemShutdown message.
pub sys_pwr_handler: Rc<dyn Node>,
/// The nodes that are interested in receiving updated thermal load values. It is expected that
/// these nodes respond to the UpdateThermalLoad message.
pub thermal_load_notify_nodes: Vec<Rc<dyn Node>>,
/// All parameter values relating to the thermal policy itself
pub policy_params: ThermalPolicyParams,
/// Node that we'll notify with relevant platform metrics.
pub platform_metrics_node: Rc<dyn Node>,
}
/// A struct to store all configurable aspects of the thermal policy itself
pub struct ThermalPolicyParams {
/// The thermal control loop parameters
pub controller_params: ThermalControllerParams,
/// If temperature reaches or exceeds this value, the policy will command a system shutdown
pub thermal_shutdown_temperature: Celsius,
}
/// A struct to store the tunable thermal control loop parameters
#[derive(Clone, Debug)]
pub struct ThermalControllerParams {
/// The interval at which to run the thermal control loop
pub sample_interval: Seconds,
/// Time constant for the low-pass filter used for smoothing the temperature input signal
pub filter_time_constant: Seconds,
/// Target temperature for the PI control calculation
pub target_temperature: Celsius,
/// Minimum integral error [degC * s] for the PI control calculation
pub e_integral_min: f64,
/// Maximum integral error [degC * s] for the PI control calculation
pub e_integral_max: f64,
/// The available power when there is no temperature error
pub sustainable_power: Watts,
/// The proportional gain [W / degC] for the PI control calculation
pub proportional_gain: f64,
/// The integral gain [W / (degC * s)] for the PI control calculation
pub integral_gain: f64,
}
/// State information that is used for calculations across controller iterations
struct ThermalState {
/// Provides filtered temperature values according to the configured filter constant.
temperature_filter: TemperatureFilter,
/// The time of the previous controller iteration
prev_timestamp: Cell<Nanoseconds>,
/// The largest observed time between controller iterations (may be used to detect hangs)
max_time_delta: Cell<Seconds>,
/// The integral error [degC * s] that is accumulated across controller iterations
error_integral: Cell<f64>,
}
impl ThermalPolicy {
/// Creates a Future driven by a timer firing at the interval specified by
/// ThermalControllerParams.sample_interval. At each fire, `iterate_thermal_control` is called
/// and any resulting errors are logged.
fn periodic_thermal_loop<'a>(self: Rc<Self>) -> LocalBoxFuture<'a, ()> {
let mut periodic_timer = fasync::Interval::new(
self.config.policy_params.controller_params.sample_interval.into(),
);
async move {
while let Some(()) = periodic_timer.next().await {
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::periodic_timer_fired",
fuchsia_trace::Scope::Thread
);
let result = self.iterate_thermal_control().await;
log_if_err!(result, "Error while running thermal control iteration");
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::iterate_thermal_control_result",
fuchsia_trace::Scope::Thread,
"result" => format!("{:?}", result).as_str()
);
}
}
.boxed_local()
}
/// This is the main body of the closed loop thermal control logic. The function is called
/// periodically by the timer started in `start_periodic_thermal_loop`. For each iteration, the
/// following steps will be taken:
/// 1. Read the current temperature from the temperature driver specified in ThermalConfig
/// 2. Filter the raw temperature value using a low-pass filter
/// 3. Use the new filtered temperature to calculate the proportional error and integral
/// error of temperature relative to the configured target temperature
/// 4. Use the proportional error and integral error to derive thermal load and available
/// power values
/// 5. Update the relevant nodes with the new thermal load and available power information
async fn iterate_thermal_control(&self) -> Result<(), Error> {
fuchsia_trace::duration!("power_manager", "ThermalPolicy::iterate_thermal_control");
let timestamp = get_current_timestamp();
let time_delta = self.get_time_delta(timestamp);
let temperature = self.state.temperature_filter.get_temperature(timestamp).await?;
let (error_proportional, error_integral) =
self.get_temperature_error(temperature.filtered, time_delta);
let thermal_load = Self::calculate_thermal_load(
error_integral,
self.config.policy_params.controller_params.e_integral_min,
self.config.policy_params.controller_params.e_integral_max,
);
self.log_thermal_iteration_metrics(
timestamp,
time_delta,
temperature.raw,
temperature.filtered,
error_integral,
thermal_load,
);
// If the new temperature is above the critical threshold then shut down the system
let result = self.check_critical_temperature(temperature.raw).await;
log_if_err!(result, "Error checking critical temperature");
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::check_critical_temperature_result",
fuchsia_trace::Scope::Thread,
"result" => format!("{:?}", result).as_str()
);
// Determine the new thermal load and update `thermal_load_notify_nodes`
let result = self.process_thermal_load(thermal_load).await;
log_if_err!(result, "Error updating thermal load");
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::process_thermal_load_result",
fuchsia_trace::Scope::Thread,
"result" => format!("{:?}", result).as_str()
);
// Update the power allocation according to the new temperature error readings
let result = self.update_power_allocation(error_proportional, error_integral).await;
log_if_err!(result, "Error running thermal feedback controller");
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::update_power_allocation_result",
fuchsia_trace::Scope::Thread,
"result" => format!("{:?}", result).as_str()
);
Ok(())
}
/// Gets the time delta from the previous call to this function using the provided timestamp.
/// Logs the largest delta into Inspect.
fn get_time_delta(&self, timestamp: Nanoseconds) -> Seconds {
let time_delta = (timestamp - self.state.prev_timestamp.get()).into();
if time_delta > self.state.max_time_delta.get().into() {
self.state.max_time_delta.set(time_delta);
self.inspect.max_time_delta.set(time_delta.0);
}
self.state.prev_timestamp.set(timestamp);
time_delta
}
/// Calculates proportional error and integral error of temperature using the provided input
/// temperature and time delta. Stores the new integral error and logs it to Inspect.
fn get_temperature_error(&self, temperature: Celsius, time_delta: Seconds) -> (f64, f64) {
let controller_params = &self.config.policy_params.controller_params;
let error_proportional = controller_params.target_temperature.0 - temperature.0;
let error_integral = num_traits::clamp(
self.state.error_integral.get() + error_proportional * time_delta.0,
controller_params.e_integral_min,
controller_params.e_integral_max,
);
self.state.error_integral.set(error_integral);
self.inspect.error_integral.set(error_integral);
(error_proportional, error_integral)
}
/// Logs various state data that is updated on each iteration of the thermal policy.
fn log_thermal_iteration_metrics(
&self,
timestamp: Nanoseconds,
time_delta: Seconds,
raw_temperature: Celsius,
filtered_temperature: Celsius,
temperature_error_integral: f64,
thermal_load: ThermalLoad,
) {
self.inspect.timestamp.set(timestamp.0);
self.inspect.time_delta.set(time_delta.0);
self.inspect.temperature_raw.set(raw_temperature.0);
self.inspect.temperature_filtered.set(filtered_temperature.0);
self.inspect.thermal_load.set(thermal_load.0.into());
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::thermal_control_iteration_data",
fuchsia_trace::Scope::Thread,
"timestamp" => timestamp.0
);
fuchsia_trace::counter!(
"power_manager",
"ThermalPolicy raw_temperature",
0,
"raw_temperature" => raw_temperature.0
);
fuchsia_trace::counter!(
"power_manager",
"ThermalPolicy filtered_temperature",
0,
"filtered_temperature" => filtered_temperature.0
);
fuchsia_trace::counter!(
"power_manager",
"ThermalPolicy error_integral", 0,
"error_integral" => temperature_error_integral
);
fuchsia_trace::counter!(
"power_manager",
"ThermalPolicy thermal_load",
0,
"thermal_load" => thermal_load.0
);
}
/// Compares the supplied temperature with the thermal config thermal shutdown temperature. If
/// we've reached or exceeded the shutdown temperature, message the system power handler node
/// to initiate a system shutdown.
async fn check_critical_temperature(&self, temperature: Celsius) -> Result<(), Error> {
fuchsia_trace::duration!(
"power_manager",
"ThermalPolicy::check_critical_temperature",
"temperature" => temperature.0
);
// Temperature has exceeded the thermal shutdown temperature
if temperature.0 >= self.config.policy_params.thermal_shutdown_temperature.0 {
fuchsia_trace::instant!(
"power_manager",
"ThermalPolicy::thermal_shutdown_reached",
fuchsia_trace::Scope::Thread,
"temperature" => temperature.0,
"shutdown_temperature" => self.config.policy_params.thermal_shutdown_temperature.0
);
self.log_platform_metric(PlatformMetric::ThrottlingResultShutdown).await;
self.send_message(
&self.config.sys_pwr_handler,
&Message::SystemShutdown(ShutdownRequest::Reboot(RebootReason::HighTemperature)),
)
.await
.map_err(|e| format_err!("Failed to shut down the system: {}", e))?;
}
Ok(())
}
/// Process a new thermal load value. If there is a change from the cached thermal_load, then
/// the new value is sent out to `thermal_load_notify_nodes`.
async fn process_thermal_load(&self, new_load: ThermalLoad) -> Result<(), Error> {
fuchsia_trace::duration!(
"power_manager",
"ThermalPolicy::process_thermal_load",
"new_load" => new_load.0
);
self.send_message_to_many(
&self.config.thermal_load_notify_nodes,
&Message::UpdateThermalLoad(new_load, self.driver_path.to_string()),
)
.await
.into_iter()
.collect::<Result<Vec<_>, _>>()?;
Ok(())
}
async fn log_platform_metric(&self, metric: PlatformMetric) {
let msg = Message::LogPlatformMetric(metric);
log_if_err!(
self.send_message(&self.config.platform_metrics_node, &msg).await,
format!("Failed to log platform metric {:?}", msg)
);
}
/// Calculates the thermal load which is a value in the range [0 - MAX_THERMAL_LOAD] defined as
/// ((error_integral - range_start) / (range_end - range_start) * MAX_THERMAL_LOAD), where the
/// range is defined by the maximum and minimum integral error according to the controller
/// parameters.
fn calculate_thermal_load(
error_integral: f64,
error_integral_min: f64,
error_integral_max: f64,
) -> ThermalLoad {
debug_assert!(
error_integral >= error_integral_min,
"error_integral ({}) less than error_integral_min ({})",
error_integral,
error_integral_min
);
debug_assert!(
error_integral <= error_integral_max,
"error_integral ({}) greater than error_integral_max ({})",
error_integral,
error_integral_max
);
if error_integral < error_integral_min {
error!(
"error_integral {} less than error_integral_min {}",
error_integral, error_integral_min
);
ThermalLoad(100)
} else if error_integral > error_integral_max {
error!(
"error_integral {} greater than error_integral_max {}",
error_integral, error_integral_max
);
ThermalLoad(0)
} else {
ThermalLoad(
((error_integral - error_integral_max) / (error_integral_min - error_integral_max)
* 100.0) as u32,
)
}
}
/// Updates the power allocation according to the provided proportional error and integral error
/// of temperature.
async fn update_power_allocation(
&self,
error_proportional: f64,
error_integral: f64,
) -> Result<(), Error> {
fuchsia_trace::duration!(
"power_manager",
"ThermalPolicy::update_power_allocation",
"error_proportional" => error_proportional,
"error_integral" => error_integral
);
let available_power = self.calculate_available_power(error_proportional, error_integral);
self.log_platform_metric(PlatformMetric::AvailablePower(available_power)).await;
fuchsia_trace::counter!(
"power_manager",
"ThermalPolicy available_power",
0,
"available_power" => available_power.0
);
self.distribute_power(available_power).await
}
/// A PI control algorithm that uses temperature as the measured process variable, and
/// available power as the control variable.
fn calculate_available_power(&self, error_proportional: f64, error_integral: f64) -> Watts {
fuchsia_trace::duration!(
"power_manager",
"ThermalPolicy::calculate_available_power",
"error_proportional" => error_proportional,
"error_integral" => error_integral
);
let controller_params = &self.config.policy_params.controller_params;
let p_term = error_proportional * controller_params.proportional_gain;
let i_term = error_integral * controller_params.integral_gain;
let power_available =
f64::max(0.0, controller_params.sustainable_power.0 + p_term + i_term);
Watts(power_available)
}
/// This function is responsible for distributing the available power (as determined by the
/// prior PI calculation) to the various power actors that are included in this closed loop
/// system. Initially, CPU is the only power actor. In later versions of the thermal policy,
/// there may be more power actors with associated "weights" for distributing power amongst
/// them.
async fn distribute_power(&self, mut total_available_power: Watts) -> Result<(), Error> {
fuchsia_trace::duration!(
"power_manager",
"ThermalPolicy::distribute_power",
"total_available_power" => total_available_power.0
);
// The power distribution currently works by allocating the total available power to the
// first CPU control node in `cpu_control_nodes`. The node replies to the
// SetMaxPowerConsumption message with the amount of power it was able to utilize. This
// utilized amount is subtracted from the total available power, then the remaining power is
// allocated to the remaining CPU control nodes in the same way.
// TODO(fxbug.dev/48205): We may want to revisit this distribution algorithm to avoid potentially
// starving some CPU control nodes. We'll want to have some discussions and learn more about
// intended big.LITTLE scheduling and operation to better inform our decisions here. We may
// find that we'll need to first query the nodes to learn how much power they intend to use
// before making allocation decisions.
for node in self.config.cpu_control_nodes.iter() {
if let MessageReturn::SetMaxPowerConsumption(power_used) = self
.send_message(&node, &Message::SetMaxPowerConsumption(total_available_power))
.await?
{
self.log_platform_metric(PlatformMetric::CpuPowerUsage(node.name(), power_used))
.await;
total_available_power = total_available_power - power_used;
}
}
Ok(())
}
}
#[async_trait(?Send)]
impl Node for ThermalPolicy {
fn name(&self) -> String {
"ThermalPolicy".to_string()
}
}
struct InspectData {
// Nodes
root_node: inspect::Node,
// Properties
timestamp: inspect::IntProperty,
time_delta: inspect::DoubleProperty,
temperature_raw: inspect::DoubleProperty,
temperature_filtered: inspect::DoubleProperty,
error_integral: inspect::DoubleProperty,
thermal_load: inspect::UintProperty,
max_time_delta: inspect::DoubleProperty,
}
impl InspectData {
fn new(parent: &inspect::Node, name: String, config: &ThermalConfig) -> Self {
// Create a local root node and properties
let root_node = parent.create_child(name);
let state_node = root_node.create_child("state");
let stats_node = root_node.create_child("stats");
let timestamp = state_node.create_int("timestamp (ns)", 0);
let time_delta = state_node.create_double("time_delta (s)", 0.0);
let temperature_raw = state_node.create_double("temperature_raw (C)", 0.0);
let temperature_filtered = state_node.create_double("temperature_filtered (C)", 0.0);
let error_integral = state_node.create_double("error_integral", 0.0);
let thermal_load = state_node.create_uint("thermal_load", 0);
let max_time_delta = stats_node.create_double("max_time_delta (s)", 0.0);
// Pass ownership of the new nodes to the root node, otherwise they'll be dropped
root_node.record(state_node);
root_node.record(stats_node);
let inspect_data = InspectData {
root_node,
timestamp,
time_delta,
max_time_delta,
temperature_raw,
temperature_filtered,
error_integral,
thermal_load,
};
inspect_data.set_thermal_config(config);
inspect_data
}
fn set_thermal_config(&self, config: &ThermalConfig) {
let policy_params_node = self.root_node.create_child("policy_params");
let ctrl_params_node = policy_params_node.create_child("controller_params");
let params = &config.policy_params.controller_params;
ctrl_params_node.record_double("sample_interval (s)", params.sample_interval.0);
ctrl_params_node.record_double("filter_time_constant (s)", params.filter_time_constant.0);
ctrl_params_node.record_double("target_temperature (C)", params.target_temperature.0);
ctrl_params_node.record_double("e_integral_min", params.e_integral_min);
ctrl_params_node.record_double("e_integral_max", params.e_integral_max);
ctrl_params_node.record_double("sustainable_power (W)", params.sustainable_power.0);
ctrl_params_node.record_double("proportional_gain", params.proportional_gain);
ctrl_params_node.record_double("integral_gain", params.integral_gain);
policy_params_node.record(ctrl_params_node);
self.root_node.record(policy_params_node);
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::test::mock_node::{create_dummy_node, MessageMatcher, MockNodeMaker};
use crate::{msg_eq, msg_ok_return};
use inspect::testing::assert_data_tree;
pub fn get_sample_interval(thermal_policy: &ThermalPolicy) -> Seconds {
thermal_policy.config.policy_params.controller_params.sample_interval
}
fn default_policy_params() -> ThermalPolicyParams {
ThermalPolicyParams {
controller_params: ThermalControllerParams {
sample_interval: Seconds(1.0),
filter_time_constant: Seconds(10.0),
target_temperature: Celsius(85.0),
e_integral_min: -20.0,
e_integral_max: 0.0,
sustainable_power: Watts(1.1),
proportional_gain: 0.0,
integral_gain: 0.2,
},
thermal_shutdown_temperature: Celsius(95.0),
}
}
/// Tests the calculate_thermal_load function for correctness.
#[test]
fn test_calculate_thermal_load() {
// These tests using invalid error integral values will panic on debug builds and the
// `test_calculate_thermal_load_error_integral_*` tests will verify that. For now (non-debug
// build), just test that the invalid values are clamped to a valid ThermalLoad value.
if cfg!(not(debug_assertions)) {
// An invalid error_integral greater than e_integral_max should clamp at ThermalLoad(0)
assert_eq!(ThermalPolicy::calculate_thermal_load(5.0, -20.0, 0.0), ThermalLoad(0));
// An invalid error_integral less than e_integral_min should clamp at ThermalLoad(100)
assert_eq!(ThermalPolicy::calculate_thermal_load(-25.0, -20.0, 0.0), ThermalLoad(100));
}
// Test some error_integral values ranging from e_integral_max to e_integral_min
assert_eq!(ThermalPolicy::calculate_thermal_load(0.0, -20.0, 0.0), ThermalLoad(0));
assert_eq!(ThermalPolicy::calculate_thermal_load(-10.0, -20.0, 0.0), ThermalLoad(50));
assert_eq!(ThermalPolicy::calculate_thermal_load(-20.0, -20.0, 0.0), ThermalLoad(100));
}
/// Tests that an invalid low error integral value will cause a panic on debug builds.
#[test]
#[should_panic = "error_integral (-25) less than error_integral_min (-20)"]
#[cfg(debug_assertions)]
fn test_calculate_thermal_load_error_integral_low_panic() {
assert_eq!(ThermalPolicy::calculate_thermal_load(-25.0, -20.0, 0.0), ThermalLoad(100));
}
/// Tests that an invalid high error integral value will cause a panic on debug builds.
#[test]
#[should_panic = "error_integral (5) greater than error_integral_max (0)"]
#[cfg(debug_assertions)]
fn test_calculate_thermal_load_error_integral_high_panic() {
assert_eq!(ThermalPolicy::calculate_thermal_load(5.0, -20.0, 0.0), ThermalLoad(0));
}
/// Tests that the `get_time_delta` function correctly calculates time delta while updating the
/// `max_time_delta` state variable.
#[fasync::run_singlethreaded(test)]
async fn test_get_time_delta() {
let thermal_config = ThermalConfig {
temperature_node: create_dummy_node(),
cpu_control_nodes: vec![create_dummy_node()],
sys_pwr_handler: create_dummy_node(),
thermal_load_notify_nodes: vec![create_dummy_node()],
policy_params: default_policy_params(),
platform_metrics_node: create_dummy_node(),
};
let node_futures = FuturesUnordered::new();
let node = ThermalPolicyBuilder::new(thermal_config).build(&node_futures).await.unwrap();
assert_eq!(node.get_time_delta(Seconds(1.5).into()), Seconds(1.5));
assert_eq!(node.get_time_delta(Seconds(2.0).into()), Seconds(0.5));
assert_eq!(node.state.max_time_delta.get(), Seconds(1.5));
}
/// Tests that the `get_temperature_error` function correctly calculates proportional error and
/// integral error.
#[fasync::run_singlethreaded(test)]
async fn test_get_temperature_error() {
let mut policy_params = default_policy_params();
policy_params.controller_params.target_temperature = Celsius(80.0);
policy_params.controller_params.e_integral_min = -20.0;
policy_params.controller_params.e_integral_max = 0.0;
let thermal_config = ThermalConfig {
temperature_node: create_dummy_node(),
cpu_control_nodes: vec![create_dummy_node()],
sys_pwr_handler: create_dummy_node(),
thermal_load_notify_nodes: vec![create_dummy_node()],
policy_params,
platform_metrics_node: create_dummy_node(),
};
let node_futures = FuturesUnordered::new();
let node = ThermalPolicyBuilder::new(thermal_config).build(&node_futures).await.unwrap();
assert_eq!(node.get_temperature_error(Celsius(40.0), Seconds(1.0)), (40.0, 0.0));
assert_eq!(node.get_temperature_error(Celsius(90.0), Seconds(1.0)), (-10.0, -10.0));
assert_eq!(node.get_temperature_error(Celsius(90.0), Seconds(1.0)), (-10.0, -20.0));
}
/// Tests that the ThermalPolicy will correctly divide total available power amongst multiple
/// CPU control nodes.
#[fasync::run_singlethreaded(test)]
async fn test_multiple_cpu_actors() {
let mut mock_maker = MockNodeMaker::new();
// Set up the two CpuControlHandler mock nodes. The message reply to SetMaxPowerConsumption
// indicates how much power the mock node was able to utilize, and ultimately drives the
// test logic.
let cpu_node_1 = mock_maker.make(
"CpuCtrlNode1",
vec![
// On the first iteration, this node will consume all available power (1W)
(
msg_eq!(SetMaxPowerConsumption(Watts(1.0))),
msg_ok_return!(SetMaxPowerConsumption(Watts(1.0))),
),
// On the second iteration, this node will consume half of the available power
// (0.5W)
(
msg_eq!(SetMaxPowerConsumption(Watts(1.0))),
msg_ok_return!(SetMaxPowerConsumption(Watts(0.5))),
),
// On the third iteration, this node will consume none of the available power
// (0.0W)
(
msg_eq!(SetMaxPowerConsumption(Watts(1.0))),
msg_ok_return!(SetMaxPowerConsumption(Watts(0.0))),
),
],
);
let cpu_node_2 = mock_maker.make(
"CpuCtrlNode2",
vec![
// On the first iteration, the first node consumes all available power (1W), so
// expect to receive a power allocation of 0W
(
msg_eq!(SetMaxPowerConsumption(Watts(0.0))),
msg_ok_return!(SetMaxPowerConsumption(Watts(0.0))),
),
// On the second iteration, the first node consumes half of the available power
// (1W), so expect to receive a power allocation of 0.5W
(
msg_eq!(SetMaxPowerConsumption(Watts(0.5))),
msg_ok_return!(SetMaxPowerConsumption(Watts(0.5))),
),
// On the third iteration, the first node consumes none of the available power
// (1W), so expect to receive a power allocation of 1W
(
msg_eq!(SetMaxPowerConsumption(Watts(1.0))),
msg_ok_return!(SetMaxPowerConsumption(Watts(1.0))),
),
],
);
let thermal_config = ThermalConfig {
temperature_node: mock_maker.make(
"TemperatureNode",
vec![(msg_eq!(GetDriverPath), msg_ok_return!(GetDriverPath(String::new())))],
),
cpu_control_nodes: vec![cpu_node_1, cpu_node_2],
sys_pwr_handler: mock_maker.make("SysPwrNode", vec![]),
thermal_load_notify_nodes: vec![mock_maker.make("ThermalLoadNotify", vec![])],
policy_params: default_policy_params(),
platform_metrics_node: create_dummy_node(),
};
let node_futures = FuturesUnordered::new();
let node = ThermalPolicyBuilder::new(thermal_config).build(&node_futures).await.unwrap();
// Distribute 1W of total power across the two CPU nodes. The real test logic happens inside
// the mock node, where we verify that the expected power amounts are granted to both CPU
// nodes via the SetMaxPowerConsumption message. Repeat for the number of messages that the
// mock nodes expect to receive (three).
node.distribute_power(Watts(1.0)).await.unwrap();
node.distribute_power(Watts(1.0)).await.unwrap();
node.distribute_power(Watts(1.0)).await.unwrap();
}
/// Tests for the presence and correctness of dynamically-added inspect data
#[fasync::run_singlethreaded(test)]
async fn test_inspect_data() {
let mut mock_maker = MockNodeMaker::new();
let policy_params = default_policy_params();
let thermal_config = ThermalConfig {
temperature_node: mock_maker.make(
"TemperatureNode",
vec![(msg_eq!(GetDriverPath), msg_ok_return!(GetDriverPath(String::new())))],
),
cpu_control_nodes: vec![mock_maker.make("CpuCtrlNode", vec![])],
sys_pwr_handler: mock_maker.make("SysPwrNode", vec![]),
thermal_load_notify_nodes: vec![mock_maker.make("ThermalLoadNotify", vec![])],
policy_params: default_policy_params(),
platform_metrics_node: create_dummy_node(),
};
let inspector = inspect::Inspector::new();
let node_futures = FuturesUnordered::new();
let _node = ThermalPolicyBuilder::new(thermal_config)
.with_inspect_root(inspector.root())
.build(&node_futures)
.await
.unwrap();
assert_data_tree!(
inspector,
root: {
ThermalPolicy: {
state: contains {},
stats: contains {},
policy_params: {
controller_params: {
"sample_interval (s)":
policy_params.controller_params.sample_interval.0,
"filter_time_constant (s)":
policy_params.controller_params.filter_time_constant.0,
"target_temperature (C)":
policy_params.controller_params.target_temperature.0,
"e_integral_min": policy_params.controller_params.e_integral_min,
"e_integral_max": policy_params.controller_params.e_integral_max,
"sustainable_power (W)":
policy_params.controller_params.sustainable_power.0,
"proportional_gain": policy_params.controller_params.proportional_gain,
"integral_gain": policy_params.controller_params.integral_gain,
}
}
}
}
);
}
/// Tests that well-formed configuration JSON does not panic the `new_from_json` function.
#[fasync::run_singlethreaded(test)]
async fn test_new_from_json() {
let json_data = json::json!({
"type": "ThermalPolicy",
"name": "thermal_policy",
"config": {
"thermal_shutdown_temperature": 95.0,
"controller_params": {
"sample_interval": 1.0,
"filter_time_constant": 5.0,
"target_temperature": 80.0,
"e_integral_min": -20.0,
"e_integral_max": 0.0,
"sustainable_power": 0.876,
"proportional_gain": 0.0,
"integral_gain": 0.08
}
},
"dependencies": {
"cpu_control_nodes": [
"cpu_control"
],
"system_power_handler_node": "sys_power",
"temperature_handler_node": "temperature",
"thermal_load_notify_nodes": ["thermal_load_notify"],
"platform_metrics_node": "metrics"
},
});
let mut nodes: HashMap<String, Rc<dyn Node>> = HashMap::new();
nodes.insert("temperature".to_string(), create_dummy_node());
nodes.insert("cpu_control".to_string(), create_dummy_node());
nodes.insert("sys_power".to_string(), create_dummy_node());
nodes.insert("thermal_load_notify".to_string(), create_dummy_node());
nodes.insert("metrics".to_string(), create_dummy_node());
let _ = ThermalPolicyBuilder::new_from_json(json_data, &nodes);
}
/// Tests that the ThermalPolicy correctly updates the PlatformMetrics node as its thermal state
/// cycles between the various states.
#[test]
fn test_platform_metrics() {
let mut mock_maker = MockNodeMaker::new();
// Set custom thermal policy parameters to have easier control over throttling state changes
let mut policy_params = default_policy_params();
policy_params.controller_params.target_temperature = Celsius(50.0);
policy_params.controller_params.e_integral_max = 0.0;
policy_params.controller_params.e_integral_min = -20.0;
policy_params.thermal_shutdown_temperature = Celsius(80.0);
policy_params.controller_params.proportional_gain = 0.0;
policy_params.controller_params.integral_gain = 0.1;
policy_params.controller_params.sustainable_power = Watts(1.0);
// The executor's fake time must be set before node creation to ensure the periodic timer's
// deadline is properly initialized.
let mut executor = fasync::TestExecutor::new_with_fake_time().unwrap();
executor.set_fake_time(Seconds(0.0).into());
let mock_metrics = mock_maker.make("MockPlatformMetrics", vec![]);
let mock_temperature = mock_maker.make(
"MockTemperature",
vec![(msg_eq!(GetDriverPath), msg_ok_return!(GetDriverPath("sensor1".to_string())))],
);
let node_futures = FuturesUnordered::new();
let node_builder = ThermalPolicyBuilder::new(ThermalConfig {
temperature_node: mock_temperature.clone(),
cpu_control_nodes: vec![create_dummy_node()],
sys_pwr_handler: create_dummy_node(),
thermal_load_notify_nodes: vec![],
platform_metrics_node: mock_metrics.clone(),
policy_params,
})
.build(&node_futures);
let node = match executor.run_until_stalled(&mut node_builder.boxed_local()) {
futures::task::Poll::Ready(Ok(node)) => node,
_ => panic!("Failed to create node"),
};
// Wrap the executor, node, and futures in a simple struct that drives time steps. This
// enables the control flow:
// 1. Set metric expectations (add expected message to mock node(s)).
// 2. Call TimeStepper::iterate_policy to wake the periodic timer and iterate the
// ThermalPolicy.
// 3. Verify metric expectations.
struct TimeStepper<'a> {
executor: fasync::TestExecutor,
node: Rc<ThermalPolicy>,
node_futures: FuturesUnordered<LocalBoxFuture<'a, ()>>,
}
impl<'a> TimeStepper<'a> {
fn iterate_policy(&mut self) {
let wakeup_time = self.executor.wake_next_timer().unwrap();
self.executor.set_fake_time(wakeup_time);
self.node.state.temperature_filter.reset();
assert_eq!(
futures::task::Poll::Pending,
self.executor.run_until_stalled(&mut self.node_futures.next())
);
}
}
let mut stepper = TimeStepper { executor, node, node_futures };
// Not throttled yet
mock_temperature.add_msg_response_pair((
msg_eq!(ReadTemperature),
msg_ok_return!(ReadTemperature(Celsius(50.0))),
));
mock_metrics.add_msg_response_pair((
msg_eq!(LogPlatformMetric(PlatformMetric::AvailablePower(Watts(1.0),))),
msg_ok_return!(LogPlatformMetric),
));
stepper.iterate_policy();
// Throttling begins because 55 degC is greater than `target_temperature` (50 degC)
mock_temperature.add_msg_response_pair((
msg_eq!(ReadTemperature),
msg_ok_return!(ReadTemperature(Celsius(52.0))),
));
mock_metrics.add_msg_response_pair((
msg_eq!(LogPlatformMetric(PlatformMetric::AvailablePower(Watts(0.8),))),
msg_ok_return!(LogPlatformMetric),
));
stepper.iterate_policy();
// Throttling still active, available power is reduced
mock_temperature.add_msg_response_pair((
msg_eq!(ReadTemperature),
msg_ok_return!(ReadTemperature(Celsius(52.0))),
));
mock_metrics.add_msg_response_pair((
msg_eq!(LogPlatformMetric(PlatformMetric::AvailablePower(Watts(0.6),))),
msg_ok_return!(LogPlatformMetric),
));
stepper.iterate_policy();
// End thermal throttling
mock_temperature.add_msg_response_pair((
msg_eq!(ReadTemperature),
msg_ok_return!(ReadTemperature(Celsius(35.0))),
));
mock_metrics.add_msg_response_pair((
msg_eq!(LogPlatformMetric(PlatformMetric::AvailablePower(Watts(1.0),))),
msg_ok_return!(LogPlatformMetric),
));
stepper.iterate_policy();
// Cause the thermal policy to enter thermal shutdown
mock_temperature.add_msg_response_pair((
msg_eq!(ReadTemperature),
msg_ok_return!(ReadTemperature(Celsius(90.0))),
));
mock_metrics.add_msg_response_pair((
msg_eq!(LogPlatformMetric(PlatformMetric::ThrottlingResultShutdown)),
msg_ok_return!(LogPlatformMetric),
));
// On a real system, the shutdown would occur before these messages are sent. But since the
// test continues executing even after the shutdown request is sent, we need to add them to
// the expected messages.
mock_metrics.add_msg_response_pair((
msg_eq!(LogPlatformMetric(PlatformMetric::AvailablePower(Watts(0.0),))),
msg_ok_return!(LogPlatformMetric),
));
stepper.iterate_policy();
}
#[allow(clippy::unit_cmp)] // TODO(fxbug.dev/95036)
/// Tests that the ThermalPolicy node populates the correct temperature sensor driver path in
/// its UpdateThermalLoad messages.
#[fasync::run_singlethreaded(test)]
async fn test_thermal_load_sensor_path() {
let mut mock_maker = MockNodeMaker::new();
// Set up the ThermalPolicy node
let thermal_config = ThermalConfig {
temperature_node: mock_maker.make(
"TemperatureNode",
vec![(
msg_eq!(GetDriverPath),
msg_ok_return!(GetDriverPath("Sensor1".to_string())),
)],
),
cpu_control_nodes: vec![create_dummy_node()],
sys_pwr_handler: create_dummy_node(),
thermal_load_notify_nodes: vec![mock_maker.make(
"ThermalLoadNotify",
vec![(
msg_eq!(UpdateThermalLoad(ThermalLoad(20), "Sensor1".to_string())),
msg_ok_return!(UpdateThermalLoad),
)],
)],
policy_params: default_policy_params(),
platform_metrics_node: create_dummy_node(),
};
let node = ThermalPolicyBuilder::new(thermal_config)
.build(&FuturesUnordered::new())
.await
.unwrap();
// When `process_thermal_load` runs, the new ThermalLoad value will be sent to the
// ThermalLoadNotify node. The mock will verify the correct sensor path is found in the
// UpdateThermalLoad message.
assert_eq!(node.process_thermal_load(ThermalLoad(20)).await.unwrap(), ());
}
#[allow(clippy::unit_cmp)] // TODO(fxbug.dev/95036)
/// Tests that each of the configured `thermal_load_notify_nodes` nodes receive an
/// UpdateThermalLoad message as expected.
#[fasync::run_singlethreaded(test)]
async fn test_multiple_thermal_load_notify_nodes() {
let mut mock_maker = MockNodeMaker::new();
let mock_notify1 = mock_maker.make("ThermalLoadNotify1", vec![]);
let mock_notify2 = mock_maker.make("ThermalLoadNotify2", vec![]);
// Set up the ThermalPolicy node
let thermal_config = ThermalConfig {
temperature_node: mock_maker.make(
"TemperatureNode",
vec![(
msg_eq!(GetDriverPath),
msg_ok_return!(GetDriverPath("Sensor1".to_string())),
)],
),
cpu_control_nodes: vec![create_dummy_node()],
sys_pwr_handler: create_dummy_node(),
thermal_load_notify_nodes: vec![mock_notify1.clone(), mock_notify2.clone()],
policy_params: default_policy_params(),
platform_metrics_node: create_dummy_node(),
};
let node = ThermalPolicyBuilder::new(thermal_config)
.build(&FuturesUnordered::new())
.await
.unwrap();
// When `process_thermal_load` runs, the mocks will verify they each receive the
// UpdateThermalLoad message
mock_notify1.add_msg_response_pair((
msg_eq!(UpdateThermalLoad(ThermalLoad(20), "Sensor1".to_string())),
msg_ok_return!(UpdateThermalLoad),
));
mock_notify2.add_msg_response_pair((
msg_eq!(UpdateThermalLoad(ThermalLoad(20), "Sensor1".to_string())),
msg_ok_return!(UpdateThermalLoad),
));
assert_eq!(node.process_thermal_load(ThermalLoad(20)).await.unwrap(), ());
// Even if thermal load is unchanged, the nodes should still be updated
mock_notify1.add_msg_response_pair((
msg_eq!(UpdateThermalLoad(ThermalLoad(20), "Sensor1".to_string())),
msg_ok_return!(UpdateThermalLoad),
));
mock_notify2.add_msg_response_pair((
msg_eq!(UpdateThermalLoad(ThermalLoad(20), "Sensor1".to_string())),
msg_ok_return!(UpdateThermalLoad),
));
assert_eq!(node.process_thermal_load(ThermalLoad(20)).await.unwrap(), ());
}
}