| // Copyright 2019 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| use crate::error::PowerManagerError; |
| use crate::log_if_err; |
| use crate::message::{Message, MessageReturn}; |
| use crate::node::Node; |
| use crate::thermal_limiter; |
| use crate::types::{Celsius, Nanoseconds, Seconds, ThermalLoad, Watts}; |
| use crate::utils::{CobaltIntHistogram, CobaltIntHistogramConfig}; |
| use anyhow::{format_err, Error}; |
| use async_trait::async_trait; |
| use fidl_fuchsia_cobalt::HistogramBucket; |
| use fuchsia_async as fasync; |
| use fuchsia_cobalt::{CobaltConnector, CobaltSender, ConnectionType}; |
| use fuchsia_inspect::{ |
| self as inspect, ArrayProperty, HistogramProperty, LinearHistogramParams, Property, |
| }; |
| use fuchsia_syslog::fx_log_info; |
| use fuchsia_zircon as zx; |
| use futures::prelude::*; |
| use power_manager_metrics::power_manager_metrics as power_metrics_registry; |
| use power_metrics_registry::ThermalLimitResultMetricDimensionResult as thermal_limit_result; |
| use serde_derive::Deserialize; |
| use serde_json as json; |
| use std::cell::{Cell, RefCell, RefMut}; |
| use std::collections::{HashMap, VecDeque}; |
| use std::rc::Rc; |
| |
| /// Node: ThermalPolicy |
| /// |
| /// Summary: Implements the closed loop thermal control policy for the system |
| /// |
| /// Handles Messages: N/A |
| /// |
| /// Sends Messages: |
| /// - ReadTemperature |
| /// - SetMaxPowerConsumption |
| /// - SystemShutdown |
| /// |
| /// FIDL dependencies: N/A |
| |
| pub struct ThermalPolicyBuilder<'a> { |
| config: ThermalConfig, |
| inspect_root: Option<&'a inspect::Node>, |
| thermal_metrics: Option<CobaltMetrics>, |
| } |
| |
| impl<'a> ThermalPolicyBuilder<'a> { |
| pub fn new(config: ThermalConfig) -> Self { |
| Self { config, inspect_root: None, thermal_metrics: None } |
| } |
| |
| pub fn new_from_json(json_data: json::Value, nodes: &HashMap<String, Rc<dyn Node>>) -> Self { |
| #[derive(Deserialize)] |
| struct ControllerConfig { |
| sample_interval: f64, |
| filter_time_constant: f64, |
| target_temperature: f64, |
| e_integral_min: f64, |
| e_integral_max: f64, |
| sustainable_power: f64, |
| proportional_gain: f64, |
| integral_gain: f64, |
| }; |
| |
| #[derive(Deserialize)] |
| struct NodeConfig { |
| thermal_limiting_range: Vec<f64>, |
| thermal_shutdown_temperature: f64, |
| controller_params: ControllerConfig, |
| throttle_end_delay: f64, |
| }; |
| |
| #[derive(Deserialize)] |
| struct Dependencies { |
| crash_report_handler_node: String, |
| cpu_control_nodes: Vec<String>, |
| system_power_handler_node: String, |
| temperature_handler_node: String, |
| thermal_limiter_node: String, |
| } |
| |
| #[derive(Deserialize)] |
| struct JsonData { |
| config: NodeConfig, |
| dependencies: Dependencies, |
| }; |
| |
| let data: JsonData = json::from_value(json_data).unwrap(); |
| let thermal_config = ThermalConfig { |
| temperature_node: nodes[&data.dependencies.temperature_handler_node].clone(), |
| cpu_control_nodes: data |
| .dependencies |
| .cpu_control_nodes |
| .iter() |
| .map(|node| nodes[node].clone()) |
| .collect(), |
| sys_pwr_handler: nodes[&data.dependencies.system_power_handler_node].clone(), |
| thermal_limiter_node: nodes[&data.dependencies.thermal_limiter_node].clone(), |
| crash_report_handler: nodes[&data.dependencies.crash_report_handler_node].clone(), |
| policy_params: ThermalPolicyParams { |
| controller_params: ThermalControllerParams { |
| sample_interval: Seconds(data.config.controller_params.sample_interval), |
| filter_time_constant: Seconds( |
| data.config.controller_params.filter_time_constant, |
| ), |
| target_temperature: Celsius(data.config.controller_params.target_temperature), |
| e_integral_min: data.config.controller_params.e_integral_min, |
| e_integral_max: data.config.controller_params.e_integral_max, |
| sustainable_power: Watts(data.config.controller_params.sustainable_power), |
| proportional_gain: data.config.controller_params.proportional_gain, |
| integral_gain: data.config.controller_params.integral_gain, |
| }, |
| thermal_limiting_range: [ |
| Celsius(data.config.thermal_limiting_range[0]), |
| Celsius(data.config.thermal_limiting_range[1]), |
| ], |
| thermal_shutdown_temperature: Celsius(data.config.thermal_shutdown_temperature), |
| throttle_end_delay: Seconds(data.config.throttle_end_delay), |
| }, |
| }; |
| Self::new(thermal_config) |
| } |
| |
| #[cfg(test)] |
| fn with_inspect_root(mut self, root: &'a inspect::Node) -> Self { |
| self.inspect_root = Some(root); |
| self |
| } |
| |
| #[cfg(test)] |
| fn with_thermal_metrics(mut self, thermal_metrics: CobaltMetrics) -> Self { |
| self.thermal_metrics = Some(thermal_metrics); |
| self |
| } |
| |
| pub fn build(self) -> Result<Rc<ThermalPolicy>, Error> { |
| // Create default values |
| let inspect_root = self.inspect_root.unwrap_or(inspect::component::inspector().root()); |
| let thermal_metrics = self.thermal_metrics.unwrap_or(CobaltMetrics::new()); |
| |
| let node = Rc::new(ThermalPolicy { |
| config: self.config, |
| state: ThermalState { |
| prev_timestamp: Cell::new(Nanoseconds(0)), |
| max_time_delta: Cell::new(Seconds(0.0)), |
| prev_temperature: Cell::new(Celsius(0.0)), |
| error_integral: Cell::new(0.0), |
| state_initialized: Cell::new(false), |
| thermal_load: Cell::new(ThermalLoad(0)), |
| throttle_end_deadline: Cell::new(None), |
| }, |
| inspect: InspectData::new(inspect_root, "ThermalPolicy".to_string()), |
| thermal_metrics: RefCell::new(thermal_metrics), |
| }); |
| |
| node.inspect.set_thermal_config(&node.config); |
| node.clone().start_periodic_thermal_loop(); |
| Ok(node) |
| } |
| } |
| |
| pub struct ThermalPolicy { |
| config: ThermalConfig, |
| state: ThermalState, |
| |
| /// A struct for managing Component Inspection data. |
| inspect: InspectData, |
| |
| /// Metrics collection for thermals. |
| thermal_metrics: RefCell<CobaltMetrics>, |
| } |
| |
| /// A struct to store all configurable aspects of the ThermalPolicy node |
| pub struct ThermalConfig { |
| /// The node to provide temperature readings for the thermal control loop. It is expected that |
| /// this node responds to the ReadTemperature message. |
| pub temperature_node: Rc<dyn Node>, |
| |
| /// The nodes used to impose limits on CPU power state. There will be one node for each CPU |
| /// power domain (e.g., big.LITTLE). It is expected that these nodes respond to the |
| /// SetMaxPowerConsumption message. |
| pub cpu_control_nodes: Vec<Rc<dyn Node>>, |
| |
| /// The node to handle system power state changes (e.g., shutdown). It is expected that this |
| /// node responds to the SystemShutdown message. |
| pub sys_pwr_handler: Rc<dyn Node>, |
| |
| /// The node which will impose thermal limits on external clients according to the thermal |
| /// load of the system. It is expected that this node responds to the UpdateThermalLoad |
| /// message. |
| pub thermal_limiter_node: Rc<dyn Node>, |
| |
| /// The node used for filing a crash report. It is expected that this node responds to the |
| /// FileCrashReport message. |
| pub crash_report_handler: Rc<dyn Node>, |
| |
| /// All parameter values relating to the thermal policy itself |
| pub policy_params: ThermalPolicyParams, |
| } |
| |
| /// A struct to store all configurable aspects of the thermal policy itself |
| pub struct ThermalPolicyParams { |
| /// The thermal control loop parameters |
| pub controller_params: ThermalControllerParams, |
| |
| /// The temperature at which to begin limiting external subsystems which are not managed by the |
| /// thermal feedback controller |
| pub thermal_limiting_range: [Celsius; 2], |
| |
| /// If temperature reaches or exceeds this value, the policy will command a system shutdown |
| pub thermal_shutdown_temperature: Celsius, |
| |
| /// Time to wait after throttling ends to officially declare a throttle event complete. |
| pub throttle_end_delay: Seconds, |
| } |
| |
| /// A struct to store the tunable thermal control loop parameters |
| #[derive(Clone, Debug)] |
| pub struct ThermalControllerParams { |
| /// The interval at which to run the thermal control loop |
| pub sample_interval: Seconds, |
| |
| /// Time constant for the low-pass filter used for smoothing the temperature input signal |
| pub filter_time_constant: Seconds, |
| |
| /// Target temperature for the PID control calculation |
| pub target_temperature: Celsius, |
| |
| /// Minimum integral error [degC * s] for the PID control calculation |
| pub e_integral_min: f64, |
| |
| /// Maximum integral error [degC * s] for the PID control calculation |
| pub e_integral_max: f64, |
| |
| /// The available power when there is no temperature error |
| pub sustainable_power: Watts, |
| |
| /// The proportional gain [W / degC] for the PID control calculation |
| pub proportional_gain: f64, |
| |
| /// The integral gain [W / (degC * s)] for the PID control calculation |
| pub integral_gain: f64, |
| } |
| |
| /// State information that is used for calculations across controller iterations |
| struct ThermalState { |
| /// The time of the previous controller iteration |
| prev_timestamp: Cell<Nanoseconds>, |
| |
| /// The largest observed time between controller iterations (may be used to detect hangs) |
| max_time_delta: Cell<Seconds>, |
| |
| /// The temperature reading from the previous controller iteration |
| prev_temperature: Cell<Celsius>, |
| |
| /// The integral error [degC * s] that is accumulated across controller iterations |
| error_integral: Cell<f64>, |
| |
| /// A flag to know if the rest of ThermalState has not been initialized yet |
| state_initialized: Cell<bool>, |
| |
| /// A cached value in the range [0 - MAX_THERMAL_LOAD] which is defined as |
| /// ((temperature - range_start) / (range_end - range_start) * MAX_THERMAL_LOAD). |
| thermal_load: Cell<ThermalLoad>, |
| |
| /// After we exit throttling, if `throttle_end_delay` is nonzero then this value will |
| /// indicate the time that we may officially consider a throttle event complete. |
| throttle_end_deadline: Cell<Option<Nanoseconds>>, |
| } |
| |
| impl ThermalPolicy { |
| /// Starts a periodic timer that fires at the interval specified by |
| /// ThermalControllerParams.sample_interval. At each timer, `iterate_thermal_control` is called |
| /// and any resulting errors are logged. |
| fn start_periodic_thermal_loop(self: Rc<Self>) { |
| let mut periodic_timer = fasync::Interval::new(zx::Duration::from_nanos( |
| self.config.policy_params.controller_params.sample_interval.into_nanos(), |
| )); |
| |
| fasync::spawn_local(async move { |
| while let Some(()) = periodic_timer.next().await { |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::periodic_timer_fired", |
| fuchsia_trace::Scope::Thread |
| ); |
| let result = self.iterate_thermal_control().await; |
| log_if_err!(result, "Error while running thermal control iteration"); |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::iterate_thermal_control_result", |
| fuchsia_trace::Scope::Thread, |
| "result" => format!("{:?}", result).as_str() |
| ); |
| } |
| }); |
| } |
| |
| /// This is the main body of the closed loop thermal control logic. The function is called |
| /// periodically by the timer started in `start_periodic_thermal_loop`. For each iteration, the |
| /// following steps will be taken: |
| /// 1. Read the current temperature from the temperature driver specified in ThermalConfig |
| /// 2. Filter the raw temperature value using a low-pass filter |
| /// 3. Use the new filtered temperature value as input to the PID control algorithm |
| /// 4. The PID algorithm outputs the available power limit to impose in the system |
| /// 5. Distribute the available power to the power actors (initially this is only the CPU) |
| pub async fn iterate_thermal_control(&self) -> Result<(), Error> { |
| fuchsia_trace::duration!("power_manager", "ThermalPolicy::iterate_thermal_control"); |
| |
| let raw_temperature = self.get_temperature().await?; |
| self.thermal_metrics.borrow_mut().log_raw_temperature(raw_temperature); |
| |
| // Record the timestamp for this iteration now that we have all the data we need to proceed |
| let timestamp = Nanoseconds(fasync::Time::now().into_nanos()); |
| |
| // We should have run the iteration at least once before proceeding |
| if !self.state.state_initialized.get() { |
| self.state.prev_temperature.set(raw_temperature); |
| self.state.prev_timestamp.set(timestamp); |
| self.state.state_initialized.set(true); |
| self.inspect.state_initialized.set(1); |
| return Ok(()); |
| } |
| |
| let time_delta = Seconds::from_nanos(timestamp.0 - self.state.prev_timestamp.get().0); |
| if time_delta.0 > self.state.max_time_delta.get().0 { |
| self.state.max_time_delta.set(time_delta); |
| self.inspect.max_time_delta.set(time_delta.0); |
| } |
| self.state.prev_timestamp.set(timestamp); |
| |
| let filtered_temperature = Celsius(low_pass_filter( |
| raw_temperature.0, |
| self.state.prev_temperature.get().0, |
| time_delta.0, |
| self.config.policy_params.controller_params.filter_time_constant.0, |
| )); |
| self.state.prev_temperature.set(filtered_temperature); |
| |
| self.inspect.timestamp.set(timestamp.0); |
| self.inspect.time_delta.set(time_delta.0); |
| self.inspect.temperature_raw.set(raw_temperature.0); |
| self.inspect.temperature_filtered.set(filtered_temperature.0); |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::thermal_control_iteration_data", |
| fuchsia_trace::Scope::Thread, |
| "timestamp" => timestamp.0 |
| ); |
| fuchsia_trace::counter!( |
| "power_manager", |
| "ThermalPolicy raw_temperature", |
| 0, |
| "raw_temperature" => raw_temperature.0 |
| ); |
| fuchsia_trace::counter!( |
| "power_manager", |
| "ThermalPolicy filtered_temperature", |
| 0, |
| "filtered_temperature" => filtered_temperature.0 |
| ); |
| |
| // If the new temperature is above the critical threshold then shutdown the system |
| let result = self.check_critical_temperature(timestamp, filtered_temperature).await; |
| log_if_err!(result, "Error checking critical temperature"); |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::check_critical_temperature_result", |
| fuchsia_trace::Scope::Thread, |
| "result" => format!("{:?}", result).as_str() |
| ); |
| |
| // Update the ThermalLimiter node with the latest thermal load |
| let result = self.update_thermal_load(timestamp, filtered_temperature).await; |
| log_if_err!(result, "Error updating thermal load"); |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::update_thermal_load_result", |
| fuchsia_trace::Scope::Thread, |
| "result" => format!("{:?}", result).as_str() |
| ); |
| |
| // Run the thermal feedback controller |
| let result = self.iterate_controller(filtered_temperature, time_delta).await; |
| log_if_err!(result, "Error running thermal feedback controller"); |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::iterate_controller_result", |
| fuchsia_trace::Scope::Thread, |
| "result" => format!("{:?}", result).as_str() |
| ); |
| |
| Ok(()) |
| } |
| |
| /// Query the current temperature from the temperature handler node |
| async fn get_temperature(&self) -> Result<Celsius, Error> { |
| fuchsia_trace::duration!("power_manager", "ThermalPolicy::get_temperature"); |
| match self.send_message(&self.config.temperature_node, &Message::ReadTemperature).await { |
| Ok(MessageReturn::ReadTemperature(t)) => Ok(t), |
| Ok(r) => Err(format_err!("ReadTemperature had unexpected return value: {:?}", r)), |
| Err(e) => Err(format_err!("ReadTemperature failed: {:?}", e)), |
| } |
| } |
| |
| /// Compares the supplied temperature with the thermal config thermal shutdown temperature. If |
| /// we've reached or exceeded the shutdown temperature, message the system power handler node |
| /// to initiate a system shutdown. |
| async fn check_critical_temperature( |
| &self, |
| timestamp: Nanoseconds, |
| temperature: Celsius, |
| ) -> Result<(), Error> { |
| fuchsia_trace::duration!( |
| "power_manager", |
| "ThermalPolicy::check_critical_temperature", |
| "temperature" => temperature.0 |
| ); |
| |
| // Temperature has exceeded the thermal shutdown temperature |
| if temperature.0 >= self.config.policy_params.thermal_shutdown_temperature.0 { |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::thermal_shutdown_reached", |
| fuchsia_trace::Scope::Thread, |
| "temperature" => temperature.0, |
| "shutdown_temperature" => self.config.policy_params.thermal_shutdown_temperature.0 |
| ); |
| |
| self.thermal_metrics.borrow_mut().log_throttle_end_shutdown(timestamp); |
| self.inspect.throttle_history().mark_throttling_inactive(timestamp); |
| |
| // TODO(pshickel): We shouldn't ever get an error here. But we should probably have |
| // some type of fallback or secondary mechanism of halting the system if it somehow |
| // does happen. This could have physical safety implications. |
| self.send_message( |
| &self.config.sys_pwr_handler, |
| &Message::SystemShutdown( |
| format!( |
| "Exceeded thermal limit ({}C > {}C)", |
| temperature.0, self.config.policy_params.thermal_shutdown_temperature.0 |
| ) |
| .to_string(), |
| ), |
| ) |
| .await |
| .map_err(|e| format_err!("Failed to shutdown the system: {}", e))?; |
| } |
| |
| Ok(()) |
| } |
| |
| /// Determines the current thermal load. If there is a change from the cached thermal_load, |
| /// then the new value is sent out to the ThermalLimiter node. |
| async fn update_thermal_load( |
| &self, |
| timestamp: Nanoseconds, |
| temperature: Celsius, |
| ) -> Result<(), Error> { |
| fuchsia_trace::duration!( |
| "power_manager", |
| "ThermalPolicy::update_thermal_load", |
| "temperature" => temperature.0 |
| ); |
| |
| let mut return_val = Ok(()); |
| let thermal_load = Self::calculate_thermal_load( |
| temperature, |
| &self.config.policy_params.thermal_limiting_range, |
| ); |
| |
| fuchsia_trace::counter!( |
| "power_manager", |
| "ThermalPolicy thermal_load", |
| 0, |
| "thermal_load" => thermal_load.0 |
| ); |
| |
| if thermal_load != self.state.thermal_load.get() { |
| fuchsia_trace::instant!( |
| "power_manager", |
| "ThermalPolicy::thermal_load_changed", |
| fuchsia_trace::Scope::Thread, |
| "old_load" => self.state.thermal_load.get().0, |
| "new_load" => thermal_load.0 |
| ); |
| |
| if self.state.thermal_load.get().0 == 0 { |
| // We've just entered thermal limiting |
| fx_log_info!("Begin thermal mitigation"); |
| self.thermal_metrics.borrow_mut().log_throttle_start(timestamp); |
| self.inspect.throttle_history().mark_throttling_active(timestamp); |
| self.state.throttle_end_deadline.set(None); |
| } else if thermal_load.0 == 0 { |
| // We've just exited thermal limiting. Set the deadline time that we may consider |
| // the throttle event officially complete. |
| fx_log_info!("End thermal mitigation"); |
| let throttle_end_deadline = timestamp |
| + Nanoseconds(self.config.policy_params.throttle_end_delay.into_nanos()); |
| self.state.throttle_end_deadline.set(Some(throttle_end_deadline)); |
| } |
| |
| self.state.thermal_load.set(thermal_load); |
| |
| // Record any errors here, but don't return early from the function in case we have a |
| // pending throttle_end_deadline to deal with |
| return_val = match self |
| .send_message( |
| &self.config.thermal_limiter_node, |
| &Message::UpdateThermalLoad(thermal_load), |
| ) |
| .await |
| { |
| Ok(_) => Ok(()), |
| Err(e) => Err(Error::from(e)), |
| }; |
| } |
| |
| // If a throttle end deadline time was set and the deadline has been passed, then declare |
| // the throttle event complete and clear the deadline |
| if self.state.throttle_end_deadline.get().is_some() |
| && timestamp >= self.state.throttle_end_deadline.get().unwrap() |
| { |
| self.state.throttle_end_deadline.set(None); |
| self.thermal_metrics.borrow_mut().log_throttle_end_mitigated(timestamp); |
| self.inspect.throttle_history().mark_throttling_inactive(timestamp); |
| self.file_thermal_crash_report().await; |
| } |
| |
| self.inspect.throttle_history().record_thermal_load(thermal_load); |
| |
| return_val |
| } |
| |
| /// Calculates the thermal load which is a value in the range [0 - MAX_THERMAL_LOAD] defined as |
| /// ((temperature - range_start) / (range_end - range_start) * MAX_THERMAL_LOAD) |
| fn calculate_thermal_load(temperature: Celsius, range: &[Celsius; 2]) -> ThermalLoad { |
| let range_start = range[0]; |
| let range_end = range[1]; |
| if temperature.0 < range_start.0 { |
| ThermalLoad(0) |
| } else if temperature.0 > range_end.0 { |
| thermal_limiter::MAX_THERMAL_LOAD |
| } else { |
| ThermalLoad( |
| ((temperature.0 - range_start.0) / (range_end.0 - range_start.0) |
| * thermal_limiter::MAX_THERMAL_LOAD.0 as f64) as u32, |
| ) |
| } |
| } |
| |
| /// Execute the thermal feedback control loop |
| async fn iterate_controller( |
| &self, |
| filtered_temperature: Celsius, |
| time_delta: Seconds, |
| ) -> Result<(), Error> { |
| fuchsia_trace::duration!( |
| "power_manager", |
| "ThermalPolicy::iterate_controller", |
| "filtered_temperature" => filtered_temperature.0, |
| "time_delta" => time_delta.0 |
| ); |
| let available_power = self.calculate_available_power(filtered_temperature, time_delta); |
| self.inspect.throttle_history().record_available_power(available_power); |
| fuchsia_trace::counter!( |
| "power_manager", |
| "ThermalPolicy available_power", |
| 0, |
| "available_power" => available_power.0 |
| ); |
| |
| self.distribute_power(available_power).await |
| } |
| |
| /// A PID control algorithm that uses temperature as the measured process variable, and |
| /// available power as the control variable. Each call to the function will also |
| /// update the state variable `error_integral` to be used on subsequent iterations. |
| fn calculate_available_power(&self, temperature: Celsius, time_delta: Seconds) -> Watts { |
| fuchsia_trace::duration!( |
| "power_manager", |
| "ThermalPolicy::calculate_available_power", |
| "temperature" => temperature.0, |
| "time_delta" => time_delta.0 |
| ); |
| let controller_params = &self.config.policy_params.controller_params; |
| let temperature_error = controller_params.target_temperature.0 - temperature.0; |
| let error_integral = num_traits::clamp( |
| self.state.error_integral.get() + temperature_error * time_delta.0, |
| controller_params.e_integral_min, |
| controller_params.e_integral_max, |
| ); |
| self.state.error_integral.set(error_integral); |
| self.inspect.error_integral.set(error_integral); |
| fuchsia_trace::counter!( |
| "power_manager", |
| "ThermalPolicy error_integral", 0, |
| "error_integral" => error_integral |
| ); |
| |
| let p_term = temperature_error * controller_params.proportional_gain; |
| let i_term = error_integral * controller_params.integral_gain; |
| let power_available = |
| f64::max(0.0, controller_params.sustainable_power.0 + p_term + i_term); |
| |
| Watts(power_available) |
| } |
| |
| /// This function is responsible for distributing the available power (as determined by the |
| /// prior PID calculation) to the various power actors that are included in this closed loop |
| /// system. Initially, CPU is the only power actor. In later versions of the thermal policy, |
| /// there may be more power actors with associated "weights" for distributing power amongst |
| /// them. |
| async fn distribute_power(&self, mut total_available_power: Watts) -> Result<(), Error> { |
| fuchsia_trace::duration!( |
| "power_manager", |
| "ThermalPolicy::distribute_power", |
| "total_available_power" => total_available_power.0 |
| ); |
| |
| // The power distribution currently works by allocating the total available power to the |
| // first CPU control node in `cpu_control_nodes`. The node replies to the |
| // SetMaxPowerConsumption message with the amount of power it was able to utilize. This |
| // utilized amount is subtracted from the total available power, then the remaining power is |
| // allocated to the remaining CPU control nodes in the same way. |
| |
| // TODO(fxb/48205): We may want to revisit this distribution algorithm to avoid potentially |
| // starving some CPU control nodes. We'll want to have some discussions and learn more about |
| // intended big.LITTLE scheduling and operation to better inform our decisions here. We may |
| // find that we'll need to first query the nodes to learn how much power they intend to use |
| // before making allocation decisions. |
| for (i, node) in self.config.cpu_control_nodes.iter().enumerate() { |
| if let MessageReturn::SetMaxPowerConsumption(power_used) = self |
| .send_message(&node, &Message::SetMaxPowerConsumption(total_available_power)) |
| .await? |
| { |
| self.inspect |
| .throttle_history |
| .borrow_mut() |
| .record_cpu_power_consumption(i, power_used); |
| total_available_power = total_available_power - power_used; |
| } |
| } |
| |
| Ok(()) |
| } |
| |
| /// File a crash report with the signature "fuchsia-thermal-throttle". |
| async fn file_thermal_crash_report(&self) { |
| log_if_err!( |
| self.send_message( |
| &self.config.crash_report_handler, |
| &Message::FileCrashReport("fuchsia-thermal-throttle".to_string()), |
| ) |
| .await, |
| "Failed to file crash report" |
| ); |
| } |
| } |
| |
| fn low_pass_filter(y: f64, y_prev: f64, time_delta: f64, time_constant: f64) -> f64 { |
| y_prev + (time_delta / time_constant) * (y - y_prev) |
| } |
| |
| #[async_trait(?Send)] |
| impl Node for ThermalPolicy { |
| fn name(&self) -> &'static str { |
| "ThermalPolicy" |
| } |
| |
| async fn handle_message(&self, msg: &Message) -> Result<MessageReturn, PowerManagerError> { |
| match msg { |
| _ => Err(PowerManagerError::Unsupported), |
| } |
| } |
| } |
| |
| struct InspectData { |
| // Nodes |
| root_node: inspect::Node, |
| |
| // Properties |
| timestamp: inspect::IntProperty, |
| time_delta: inspect::DoubleProperty, |
| temperature_raw: inspect::DoubleProperty, |
| temperature_filtered: inspect::DoubleProperty, |
| error_integral: inspect::DoubleProperty, |
| state_initialized: inspect::UintProperty, |
| max_time_delta: inspect::DoubleProperty, |
| throttle_history: RefCell<InspectThrottleHistory>, |
| } |
| |
| impl InspectData { |
| /// Rolling number of throttle events to store in `throttle_history`. |
| const NUM_THROTTLE_EVENTS: usize = 10; |
| |
| fn new(parent: &inspect::Node, name: String) -> Self { |
| // Create a local root node and properties |
| let root_node = parent.create_child(name); |
| let state_node = root_node.create_child("state"); |
| let stats_node = root_node.create_child("stats"); |
| let timestamp = state_node.create_int("timestamp (ns)", 0); |
| let time_delta = state_node.create_double("time_delta (s)", 0.0); |
| let temperature_raw = state_node.create_double("temperature_raw (C)", 0.0); |
| let temperature_filtered = state_node.create_double("temperature_filtered (C)", 0.0); |
| let error_integral = state_node.create_double("error_integral", 0.0); |
| let state_initialized = state_node.create_uint("state_initialized", 0); |
| let max_time_delta = stats_node.create_double("max_time_delta (s)", 0.0); |
| let throttle_history = RefCell::new(InspectThrottleHistory::new( |
| root_node.create_child("throttle_history"), |
| Self::NUM_THROTTLE_EVENTS, |
| )); |
| |
| // Pass ownership of the new nodes to the root node, otherwise they'll be dropped |
| root_node.record(state_node); |
| root_node.record(stats_node); |
| |
| InspectData { |
| root_node, |
| timestamp, |
| time_delta, |
| max_time_delta, |
| temperature_raw, |
| temperature_filtered, |
| error_integral, |
| state_initialized, |
| throttle_history, |
| } |
| } |
| |
| fn set_thermal_config(&self, config: &ThermalConfig) { |
| let policy_params_node = self.root_node.create_child("policy_params"); |
| let ctrl_params_node = policy_params_node.create_child("controller_params"); |
| let params = &config.policy_params.controller_params; |
| ctrl_params_node.record_double("sample_interval (s)", params.sample_interval.0); |
| ctrl_params_node.record_double("filter_time_constant (s)", params.filter_time_constant.0); |
| ctrl_params_node.record_double("target_temperature (C)", params.target_temperature.0); |
| ctrl_params_node.record_double("e_integral_min", params.e_integral_min); |
| ctrl_params_node.record_double("e_integral_max", params.e_integral_max); |
| ctrl_params_node.record_double("sustainable_power (W)", params.sustainable_power.0); |
| ctrl_params_node.record_double("proportional_gain", params.proportional_gain); |
| ctrl_params_node.record_double("integral_gain", params.integral_gain); |
| policy_params_node.record(ctrl_params_node); |
| |
| let thermal_range = policy_params_node.create_double_array("thermal_limiting_range (C)", 2); |
| thermal_range.set(0, config.policy_params.thermal_limiting_range[0].0); |
| thermal_range.set(1, config.policy_params.thermal_limiting_range[1].0); |
| policy_params_node.record(thermal_range); |
| |
| self.root_node.record(policy_params_node); |
| } |
| |
| /// A convenient wrapper to mutably borrow `throttle_history`. |
| fn throttle_history(&self) -> RefMut<'_, InspectThrottleHistory> { |
| self.throttle_history.borrow_mut() |
| } |
| } |
| |
| /// Captures and retains data from previous throttling events in a rolling buffer. |
| struct InspectThrottleHistory { |
| /// The Inspect node that will be used as the parent for throttle event child nodes. |
| root_node: inspect::Node, |
| |
| /// A running count of the number of throttle events ever captured in `throttle_history_list`. |
| /// The count is always increasing, even when older throttle events are removed from the list. |
| entry_count: usize, |
| |
| /// The maximum number of throttling events to keep in `throttle_history_list`. |
| capacity: usize, |
| |
| /// State to track if throttling is currently active (used to ignore readings when throttling |
| /// isn't active). |
| throttling_active: bool, |
| |
| /// List to store the throttle entries. |
| throttle_history_list: VecDeque<InspectThrottleHistoryEntry>, |
| } |
| |
| impl InspectThrottleHistory { |
| fn new(root_node: inspect::Node, capacity: usize) -> Self { |
| Self { |
| entry_count: 0, |
| capacity, |
| throttling_active: false, |
| throttle_history_list: VecDeque::with_capacity(capacity), |
| root_node, |
| } |
| } |
| |
| /// Mark the start of throttling. |
| fn mark_throttling_active(&mut self, timestamp: Nanoseconds) { |
| // Must have ended previous throttling |
| assert_eq!(self.throttling_active, false); |
| |
| // Begin a new throttling entry |
| self.new_entry(); |
| |
| self.throttling_active = true; |
| self.throttle_history_list.back().unwrap().throttle_start_time.set(timestamp.0); |
| } |
| |
| /// Mark the end of throttling. |
| fn mark_throttling_inactive(&mut self, timestamp: Nanoseconds) { |
| if self.throttling_active { |
| self.throttle_history_list.back().unwrap().throttle_end_time.set(timestamp.0); |
| self.throttling_active = false |
| } |
| } |
| |
| /// Begin a new throttling entry. Removes the oldest entry once we've reached |
| /// InspectData::NUM_THROTTLE_EVENTS number of entries. |
| fn new_entry(&mut self) { |
| if self.throttle_history_list.len() >= self.capacity { |
| self.throttle_history_list.pop_front(); |
| } |
| |
| let node = self.root_node.create_child(&self.entry_count.to_string()); |
| let entry = InspectThrottleHistoryEntry::new(node); |
| self.throttle_history_list.push_back(entry); |
| self.entry_count += 1; |
| } |
| |
| /// Record the current thermal load. No-op unless throttling has been set active. |
| fn record_thermal_load(&self, thermal_load: ThermalLoad) { |
| if self.throttling_active { |
| self.throttle_history_list |
| .back() |
| .unwrap() |
| .thermal_load_hist |
| .insert(thermal_load.0.into()); |
| } |
| } |
| |
| /// Record the current available power. No-op unless throttling has been set active. |
| fn record_available_power(&self, available_power: Watts) { |
| if self.throttling_active { |
| self.throttle_history_list |
| .back() |
| .unwrap() |
| .available_power_hist |
| .insert(available_power.0); |
| } |
| } |
| |
| /// Record the current CPU power consumption for a given CPU index. No-op unless throttling has |
| /// been set active. |
| fn record_cpu_power_consumption(&mut self, cpu_index: usize, power_used: Watts) { |
| if self.throttling_active { |
| self.throttle_history_list |
| .back_mut() |
| .unwrap() |
| .get_cpu_power_usage_property(cpu_index) |
| .insert(power_used.0); |
| } |
| } |
| } |
| |
| /// Stores data for a single throttle event. |
| struct InspectThrottleHistoryEntry { |
| _node: inspect::Node, |
| throttle_start_time: inspect::IntProperty, |
| throttle_end_time: inspect::IntProperty, |
| thermal_load_hist: inspect::UintLinearHistogramProperty, |
| available_power_hist: inspect::DoubleLinearHistogramProperty, |
| cpu_power_usage_node: inspect::Node, |
| cpu_power_usage: Vec<inspect::DoubleLinearHistogramProperty>, |
| } |
| |
| impl InspectThrottleHistoryEntry { |
| /// Creates a new InspectThrottleHistoryEntry which creates new properties under `node`. |
| fn new(node: inspect::Node) -> Self { |
| Self { |
| throttle_start_time: node.create_int("throttle_start_time", 0), |
| throttle_end_time: node.create_int("throttle_end_time", 0), |
| thermal_load_hist: node.create_uint_linear_histogram( |
| "thermal_load_hist", |
| LinearHistogramParams { floor: 0, step_size: 1, buckets: 100 }, |
| ), |
| available_power_hist: node.create_double_linear_histogram( |
| "available_power_hist", |
| LinearHistogramParams { floor: 0.0, step_size: 0.1, buckets: 100 }, |
| ), |
| cpu_power_usage_node: node.create_child("cpu_power_usage"), |
| cpu_power_usage: Vec::new(), |
| _node: node, |
| } |
| } |
| |
| /// Gets the property to record CPU power usage for the given CPU index. These properties are |
| /// created dynamically because CPU domain count is not a fixed number. |
| fn get_cpu_power_usage_property( |
| &mut self, |
| index: usize, |
| ) -> &inspect::DoubleLinearHistogramProperty { |
| if self.cpu_power_usage.get(index).is_none() { |
| self.cpu_power_usage.push(self.cpu_power_usage_node.create_double_linear_histogram( |
| index.to_string(), |
| LinearHistogramParams { floor: 0.0, step_size: 0.1, buckets: 100 }, |
| )) |
| } |
| &self.cpu_power_usage[index] |
| } |
| } |
| |
| /// Stores and dispatches Cobalt metric data. |
| struct CobaltMetrics { |
| /// Sends Cobalt events to the Cobalt FIDL service. |
| cobalt_sender: CobaltSender, |
| |
| /// Timestamp of the start of a throttling duration. |
| throttle_start_time: Option<Nanoseconds>, |
| |
| /// Histogram of raw temperature readings. |
| temperature_histogram: CobaltIntHistogram, |
| } |
| |
| impl CobaltMetrics { |
| /// Number of temperature readings before dispatching a Cobalt event. |
| const NUM_TEMPERATURE_READINGS: u32 = 100; |
| |
| fn new() -> Self { |
| let (cobalt_sender, sender_future) = CobaltConnector::default() |
| .serve(ConnectionType::project_id(power_metrics_registry::PROJECT_ID)); |
| |
| // Spawn the future that handles sending data to Cobalt |
| fasync::spawn_local(sender_future); |
| |
| Self::new_with_cobalt_sender(cobalt_sender) |
| } |
| |
| /// Creates a new CobaltMetrics without connecting to the Cobalt FIDL service. Returns a channel |
| /// to receive the Cobalt events that would have otherwise been delivered to the Cobalt service. |
| fn new_with_cobalt_sender(cobalt_sender: CobaltSender) -> Self { |
| Self { |
| cobalt_sender, |
| throttle_start_time: None, |
| temperature_histogram: CobaltIntHistogram::new(CobaltIntHistogramConfig { |
| floor: power_metrics_registry::RAW_TEMPERATURE_INT_BUCKETS_FLOOR, |
| num_buckets: power_metrics_registry::RAW_TEMPERATURE_INT_BUCKETS_NUM_BUCKETS, |
| step_size: power_metrics_registry::RAW_TEMPERATURE_INT_BUCKETS_STEP_SIZE, |
| }), |
| } |
| } |
| |
| /// Log the start of a thermal throttling duration. Must call `throttle_end` before calling this |
| /// function a second time. |
| fn log_throttle_start(&mut self, timestamp: Nanoseconds) { |
| assert!( |
| self.throttle_start_time.is_none(), |
| "throttle_start called before ending previous throttle" |
| ); |
| |
| self.throttle_start_time = Some(timestamp); |
| } |
| |
| /// Log the end of a thermal throttling duration due to successful mitigation. |
| fn log_throttle_end_mitigated(&mut self, timestamp: Nanoseconds) { |
| self.log_throttle_end_with_result(thermal_limit_result::Mitigated, timestamp) |
| } |
| |
| /// Log the end of a thermal throttling duration due to a shutdown. |
| fn log_throttle_end_shutdown(&mut self, timestamp: Nanoseconds) { |
| self.log_throttle_end_with_result(thermal_limit_result::Shutdown, timestamp) |
| } |
| |
| /// Log the end of a thermal throttling duration with a specified reason. |
| fn log_throttle_end_with_result( |
| &mut self, |
| result: thermal_limit_result, |
| timestamp: Nanoseconds, |
| ) { |
| if self.throttle_start_time.is_some() { |
| let elapsed_time = timestamp - self.throttle_start_time.unwrap(); |
| self.throttle_start_time = None; |
| self.dispatch_limiting_elapsed_time_metric(elapsed_time); |
| } |
| |
| self.dispatch_limit_result_metric(result); |
| } |
| |
| /// Log a raw temperature reading. |
| fn log_raw_temperature(&mut self, temperature: Celsius) { |
| self.temperature_histogram.add_data(temperature.0 as i64); |
| if self.temperature_histogram.count() == Self::NUM_TEMPERATURE_READINGS { |
| self.dispatch_temperature_metric(self.temperature_histogram.get_data()); |
| self.temperature_histogram.clear(); |
| } |
| } |
| |
| /// Dispatch a Cobalt event for the thermal_limiting_elapsed_time metric. |
| fn dispatch_limiting_elapsed_time_metric(&mut self, elapsed_time: Nanoseconds) { |
| self.cobalt_sender.log_elapsed_time( |
| power_metrics_registry::THERMAL_LIMITING_ELAPSED_TIME_METRIC_ID, |
| (), |
| elapsed_time.0, |
| ); |
| } |
| |
| /// Dispatch a Cobalt event for the thermal_limit_result metric. |
| fn dispatch_limit_result_metric(&mut self, result: thermal_limit_result) { |
| self.cobalt_sender |
| .log_event(power_metrics_registry::THERMAL_LIMIT_RESULT_METRIC_ID, vec![result as u32]); |
| } |
| |
| /// Dispatch a Cobalt event for the raw_temperature metric. |
| fn dispatch_temperature_metric(&mut self, histogram: Vec<HistogramBucket>) { |
| self.cobalt_sender.log_int_histogram( |
| power_metrics_registry::RAW_TEMPERATURE_METRIC_ID, |
| (), |
| histogram, |
| ); |
| } |
| } |
| |
| #[cfg(test)] |
| pub mod tests { |
| use super::*; |
| use crate::test::mock_node::{create_dummy_node, create_mock_node, MessageMatcher}; |
| use crate::{msg_eq, msg_ok_return}; |
| use fidl_fuchsia_cobalt::{CobaltEvent, Event, EventPayload}; |
| use inspect::assert_inspect_tree; |
| |
| pub fn get_sample_interval(thermal_policy: &ThermalPolicy) -> Seconds { |
| thermal_policy.config.policy_params.controller_params.sample_interval |
| } |
| |
| fn default_policy_params() -> ThermalPolicyParams { |
| ThermalPolicyParams { |
| controller_params: ThermalControllerParams { |
| sample_interval: Seconds(1.0), |
| filter_time_constant: Seconds(10.0), |
| target_temperature: Celsius(85.0), |
| e_integral_min: -20.0, |
| e_integral_max: 0.0, |
| sustainable_power: Watts(1.1), |
| proportional_gain: 0.0, |
| integral_gain: 0.2, |
| }, |
| thermal_limiting_range: [Celsius(75.0), Celsius(85.0)], |
| thermal_shutdown_temperature: Celsius(95.0), |
| throttle_end_delay: Seconds(0.0), |
| } |
| } |
| |
| #[test] |
| fn test_low_pass_filter() { |
| let y_0 = 0.0; |
| let y_1 = 10.0; |
| let time_delta = 1.0; |
| let time_constant = 10.0; |
| assert_eq!(low_pass_filter(y_1, y_0, time_delta, time_constant), 1.0); |
| } |
| |
| #[test] |
| fn test_calculate_thermal_load() { |
| let thermal_limiting_range = [Celsius(85.0), Celsius(95.0)]; |
| |
| struct TestCase { |
| temperature: Celsius, // observed temperature |
| thermal_load: ThermalLoad, // expected thermal load |
| }; |
| |
| let test_cases = vec![ |
| // before thermal limit range |
| TestCase { temperature: Celsius(50.0), thermal_load: ThermalLoad(0) }, |
| // start of thermal limit range |
| TestCase { temperature: Celsius(85.0), thermal_load: ThermalLoad(0) }, |
| // arbitrary point within thermal limit range |
| TestCase { temperature: Celsius(88.0), thermal_load: ThermalLoad(30) }, |
| // arbitrary point within thermal limit range |
| TestCase { temperature: Celsius(93.0), thermal_load: ThermalLoad(80) }, |
| // end of thermal limit range |
| TestCase { temperature: Celsius(95.0), thermal_load: ThermalLoad(100) }, |
| // beyond thermal limit range |
| TestCase { temperature: Celsius(100.0), thermal_load: ThermalLoad(100) }, |
| ]; |
| |
| for test_case in test_cases { |
| assert_eq!( |
| ThermalPolicy::calculate_thermal_load( |
| test_case.temperature, |
| &thermal_limiting_range, |
| ), |
| test_case.thermal_load |
| ); |
| } |
| } |
| |
| /// Tests that the ThermalPolicy will correctly divide total available power amongst multiple |
| /// CPU control nodes. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_multiple_cpu_actors() { |
| // Set up the two CpuControlHandler mock nodes. The message reply to SetMaxPowerConsumption |
| // indicates how much power the mock node was able to utilize, and ultimately drives the |
| // test logic. |
| let cpu_node_1 = create_mock_node( |
| "CpuCtrlNode1", |
| vec![ |
| // On the first iteration, this node will consume all available power (1W) |
| ( |
| msg_eq!(SetMaxPowerConsumption(Watts(1.0))), |
| msg_ok_return!(SetMaxPowerConsumption(Watts(1.0))), |
| ), |
| // On the second iteration, this node will consume half of the available power |
| // (0.5W) |
| ( |
| msg_eq!(SetMaxPowerConsumption(Watts(1.0))), |
| msg_ok_return!(SetMaxPowerConsumption(Watts(0.5))), |
| ), |
| // On the third iteration, this node will consume none of the available power |
| // (0.0W) |
| ( |
| msg_eq!(SetMaxPowerConsumption(Watts(1.0))), |
| msg_ok_return!(SetMaxPowerConsumption(Watts(0.0))), |
| ), |
| ], |
| ); |
| let cpu_node_2 = create_mock_node( |
| "CpuCtrlNode2", |
| vec![ |
| // On the first iteration, the first node consumes all available power (1W), so |
| // expect to receive a power allocation of 0W |
| ( |
| msg_eq!(SetMaxPowerConsumption(Watts(0.0))), |
| msg_ok_return!(SetMaxPowerConsumption(Watts(0.0))), |
| ), |
| // On the second iteration, the first node consumes half of the available power |
| // (1W), so expect to receive a power allocation of 0.5W |
| ( |
| msg_eq!(SetMaxPowerConsumption(Watts(0.5))), |
| msg_ok_return!(SetMaxPowerConsumption(Watts(0.5))), |
| ), |
| // On the third iteration, the first node consumes none of the available power |
| // (1W), so expect to receive a power allocation of 1W |
| ( |
| msg_eq!(SetMaxPowerConsumption(Watts(1.0))), |
| msg_ok_return!(SetMaxPowerConsumption(Watts(1.0))), |
| ), |
| ], |
| ); |
| |
| let thermal_config = ThermalConfig { |
| temperature_node: create_mock_node("TemperatureNode", vec![]), |
| cpu_control_nodes: vec![cpu_node_1, cpu_node_2], |
| sys_pwr_handler: create_mock_node("SysPwrNode", vec![]), |
| thermal_limiter_node: create_mock_node("ThermalLimiterNode", vec![]), |
| crash_report_handler: create_dummy_node(), |
| policy_params: default_policy_params(), |
| }; |
| let node = ThermalPolicyBuilder::new(thermal_config).build().unwrap(); |
| |
| // Distribute 1W of total power across the two CPU nodes. The real test logic happens inside |
| // the mock node, where we verify that the expected power amounts are granted to both CPU |
| // nodes via the SetMaxPowerConsumption message. Repeat for the number of messages that the |
| // mock nodes expect to receive (three). |
| node.distribute_power(Watts(1.0)).await.unwrap(); |
| node.distribute_power(Watts(1.0)).await.unwrap(); |
| node.distribute_power(Watts(1.0)).await.unwrap(); |
| } |
| |
| /// Tests for the presence and correctness of dynamically-added inspect data |
| #[fasync::run_singlethreaded(test)] |
| async fn test_inspect_data() { |
| let policy_params = default_policy_params(); |
| let thermal_config = ThermalConfig { |
| temperature_node: create_mock_node("TemperatureNode", vec![]), |
| cpu_control_nodes: vec![create_mock_node("CpuCtrlNode", vec![])], |
| sys_pwr_handler: create_mock_node("SysPwrNode", vec![]), |
| thermal_limiter_node: create_mock_node("ThermalLimiterNode", vec![]), |
| crash_report_handler: create_dummy_node(), |
| policy_params: default_policy_params(), |
| }; |
| let inspector = inspect::Inspector::new(); |
| let _node = ThermalPolicyBuilder::new(thermal_config) |
| .with_inspect_root(inspector.root()) |
| .build() |
| .unwrap(); |
| |
| assert_inspect_tree!( |
| inspector, |
| root: { |
| ThermalPolicy: { |
| state: contains {}, |
| stats: contains {}, |
| throttle_history: contains {}, |
| policy_params: { |
| "thermal_limiting_range (C)": vec![ |
| policy_params.thermal_limiting_range[0].0, |
| policy_params.thermal_limiting_range[1].0 |
| ], |
| controller_params: { |
| "sample_interval (s)": |
| policy_params.controller_params.sample_interval.0, |
| "filter_time_constant (s)": |
| policy_params.controller_params.filter_time_constant.0, |
| "target_temperature (C)": |
| policy_params.controller_params.target_temperature.0, |
| "e_integral_min": policy_params.controller_params.e_integral_min, |
| "e_integral_max": policy_params.controller_params.e_integral_max, |
| "sustainable_power (W)": |
| policy_params.controller_params.sustainable_power.0, |
| "proportional_gain": policy_params.controller_params.proportional_gain, |
| "integral_gain": policy_params.controller_params.integral_gain, |
| } |
| } |
| } |
| } |
| ); |
| } |
| |
| /// Tests that throttle data is collected and stored properly in Inspect. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_inspect_throttle_history() { |
| // Set relevant policy parameters so we have deterministic power and thermal load |
| // calculations |
| let mut policy_params = default_policy_params(); |
| policy_params.controller_params.sustainable_power = Watts(1.0); |
| policy_params.controller_params.proportional_gain = 0.0; |
| policy_params.controller_params.integral_gain = 0.0; |
| policy_params.thermal_limiting_range[0] = Celsius(80.0); |
| policy_params.thermal_limiting_range[1] = Celsius(100.0); |
| let idle_temperature = Celsius(50.0); |
| let throttle_temperature = Celsius(90.0); |
| |
| // Calculate the values that we expect to be reported by the thermal policy based on the |
| // parameters chosen above |
| let thermal_load = ThermalPolicy::calculate_thermal_load( |
| throttle_temperature, |
| &policy_params.thermal_limiting_range, |
| ); |
| let throttle_available_power = policy_params.controller_params.sustainable_power; |
| let throttle_available_power_cpu1 = throttle_available_power; |
| let throttle_cpu1_power_used = throttle_available_power_cpu1 - Watts(0.3); // arbitrary |
| let throttle_available_power_cpu2 = |
| throttle_available_power_cpu1 - throttle_cpu1_power_used; |
| let throttle_cpu2_power_used = throttle_available_power_cpu2; |
| let throttle_start_time = Nanoseconds(0); |
| let throttle_end_time = Nanoseconds(1000); |
| |
| // Set up the ThermalPolicy node |
| let thermal_config = ThermalConfig { |
| temperature_node: create_dummy_node(), |
| cpu_control_nodes: vec![ |
| create_mock_node( |
| "Cpu1Node", |
| vec![( |
| msg_eq!(SetMaxPowerConsumption(throttle_available_power_cpu1)), |
| msg_ok_return!(SetMaxPowerConsumption(throttle_cpu1_power_used)), |
| )], |
| ), |
| create_mock_node( |
| "Cpu2Node", |
| vec![( |
| msg_eq!(SetMaxPowerConsumption(throttle_available_power_cpu2)), |
| msg_ok_return!(SetMaxPowerConsumption(throttle_cpu2_power_used)), |
| )], |
| ), |
| ], |
| sys_pwr_handler: create_dummy_node(), |
| thermal_limiter_node: create_dummy_node(), |
| crash_report_handler: create_dummy_node(), |
| policy_params, |
| }; |
| let inspector = inspect::Inspector::new(); |
| let node = ThermalPolicyBuilder::new(thermal_config) |
| .with_inspect_root(inspector.root()) |
| .build() |
| .unwrap(); |
| |
| // Causes Inspect to receive throttle_start_time and one reading into thermal_load_hist |
| let _ = node.update_thermal_load(throttle_start_time, throttle_temperature).await; |
| |
| // Causes Inspect to receive one reading into available_power_hist and one reading into both |
| // entries of cpu_power_usage |
| let _ = node.iterate_controller(throttle_temperature, Seconds(0.0)).await; |
| |
| // Causes Inspect to receive throttle_end_time |
| let _ = node.update_thermal_load(throttle_end_time, idle_temperature).await; |
| |
| // TODO(fxb/49483): The `assert_inspect_tree!` macro really needs better support for testing |
| // histogram correctness. Since that doesn't exist today, the only option is to recreate the |
| // histogram property in the format that assert_inspect_tree expects. This requires |
| // knowledge of the underlying Inspect histogram implementation, which is bad but it's our |
| // only option at this time. |
| fn create_hist_vec<T>(params: LinearHistogramParams<T>, data: Vec<T>) -> Vec<T> |
| where |
| T: std::convert::From<u32> |
| + std::ops::AddAssign |
| + std::ops::Sub<Output = T> |
| + std::ops::Div<Output = T> |
| + std::cmp::PartialOrd |
| + std::clone::Clone |
| + core::marker::Copy, |
| { |
| // The inspect histogram adds four extra buckets: |
| // - [0]: floor |
| // - [1]: step_size |
| // - [2]: underflow bucket |
| // - [-1]: overflow bucket |
| let mut hist_vec: Vec<T> = vec![T::from(0); params.buckets + 4]; |
| hist_vec[0] = params.floor; |
| hist_vec[1] = params.step_size; |
| |
| // Populate the histogram using the supplied data |
| for v in data.into_iter() { |
| // Index selection copied from the Inspect implementation: |
| // https://fuchsia.googlesource.com/fuchsia/+/ca376a675e18428aa31f2c2b78a4cfe2cf1c69a2/src/lib/inspect/rust/fuchsia-inspect/src/lib.rs#999 |
| let index = { |
| let mut current_floor = params.floor; |
| let mut _idx = 2; |
| while v >= current_floor && _idx < params.buckets - 1 { |
| current_floor += params.step_size; |
| _idx += 1; |
| } |
| _idx as usize |
| }; |
| hist_vec[index] += T::from(1); |
| } |
| hist_vec |
| } |
| |
| let expected_thermal_load_hist = create_hist_vec::<u64>( |
| LinearHistogramParams { floor: 0, step_size: 1, buckets: 100 }, |
| vec![thermal_load.0.into()], |
| ); |
| |
| let expected_available_power_hist = create_hist_vec::<f64>( |
| LinearHistogramParams { floor: 0.0, step_size: 0.1, buckets: 100 }, |
| vec![throttle_available_power.0], |
| ); |
| |
| let expected_cpu_1_power_usage_hist = create_hist_vec::<f64>( |
| LinearHistogramParams { floor: 0.0, step_size: 0.1, buckets: 100 }, |
| vec![throttle_cpu1_power_used.0], |
| ); |
| |
| let expected_cpu_2_power_usage_hist = create_hist_vec::<f64>( |
| LinearHistogramParams { floor: 0.0, step_size: 0.1, buckets: 100 }, |
| vec![throttle_cpu2_power_used.0], |
| ); |
| |
| assert_inspect_tree!( |
| inspector, |
| root: { |
| ThermalPolicy: contains { |
| throttle_history: { |
| "0": { |
| throttle_start_time: throttle_start_time.0, |
| throttle_end_time: throttle_end_time.0, |
| thermal_load_hist: expected_thermal_load_hist, |
| available_power_hist: expected_available_power_hist, |
| cpu_power_usage: { |
| "0": expected_cpu_1_power_usage_hist, |
| "1": expected_cpu_2_power_usage_hist |
| } |
| }, |
| } |
| } |
| } |
| ) |
| } |
| |
| /// Verifies that InspectThrottleHistory correctly removes old entries without increasing the |
| /// size of the underlying vector. |
| #[test] |
| fn test_inspect_throttle_history_length() { |
| // Create a InspectThrottleHistory with capacity for only one throttling entry |
| let mut throttle_history = InspectThrottleHistory::new( |
| inspect::Inspector::new().root().create_child("test_node"), |
| 1, |
| ); |
| |
| // Add a throttling entry |
| throttle_history.mark_throttling_active(Nanoseconds(0)); |
| throttle_history.mark_throttling_inactive(Nanoseconds(0)); |
| |
| // Verify one entry and unchanged capacity |
| assert_eq!(throttle_history.throttle_history_list.len(), 1); |
| assert_eq!(throttle_history.throttle_history_list.capacity(), 1); |
| assert_eq!(throttle_history.entry_count, 1); |
| |
| // Add one more throttling entry |
| throttle_history.mark_throttling_active(Nanoseconds(0)); |
| throttle_history.mark_throttling_inactive(Nanoseconds(0)); |
| |
| // Verify still one entry and unchanged capacity |
| assert_eq!(throttle_history.throttle_history_list.len(), 1); |
| assert_eq!(throttle_history.throttle_history_list.capacity(), 1); |
| assert_eq!(throttle_history.entry_count, 2); |
| } |
| |
| /// Tests that well-formed configuration JSON does not panic the `new_from_json` function. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_new_from_json() { |
| let json_data = json::json!({ |
| "type": "ThermalPolicy", |
| "name": "thermal_policy", |
| "config": { |
| "thermal_limiting_range": [77.0, 84.0], |
| "thermal_shutdown_temperature": 95.0, |
| "throttle_end_delay": 0.0, |
| "controller_params": { |
| "sample_interval": 1.0, |
| "filter_time_constant": 5.0, |
| "target_temperature": 80.0, |
| "e_integral_min": -20.0, |
| "e_integral_max": 0.0, |
| "sustainable_power": 0.876, |
| "proportional_gain": 0.0, |
| "integral_gain": 0.08 |
| } |
| }, |
| "dependencies": { |
| "cpu_control_nodes": [ |
| "cpu_control" |
| ], |
| "system_power_handler_node": "sys_power", |
| "temperature_handler_node": "temperature", |
| "thermal_limiter_node": "limiter", |
| "crash_report_handler_node": "crash_report" |
| }, |
| }); |
| |
| let mut nodes: HashMap<String, Rc<dyn Node>> = HashMap::new(); |
| nodes.insert("temperature".to_string(), create_dummy_node()); |
| nodes.insert("cpu_control".to_string(), create_dummy_node()); |
| nodes.insert("sys_power".to_string(), create_dummy_node()); |
| nodes.insert("limiter".to_string(), create_dummy_node()); |
| nodes.insert("crash_report".to_string(), create_dummy_node()); |
| let _ = ThermalPolicyBuilder::new_from_json(json_data, &nodes); |
| } |
| |
| /// Tests that the ThermalPolicy reports the thermal_limiting_elapsed_time and |
| /// thermal_limit_result metrics after successful thermal mitigation. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_cobalt_metrics_throttle_elapsed_time() { |
| // Specify the idle and throttle temperatures and throttle duration |
| let policy_params = default_policy_params(); |
| let idle_temperature = policy_params.thermal_limiting_range[0] - Celsius(1.0); |
| let throttle_temperature = policy_params.thermal_limiting_range[0] + Celsius(1.0); |
| let throttle_duration = Nanoseconds(1e9 as i64); // 1s |
| |
| // Set up the ThermalPolicy node |
| let thermal_config = ThermalConfig { |
| temperature_node: create_dummy_node(), |
| cpu_control_nodes: vec![create_dummy_node()], |
| sys_pwr_handler: create_dummy_node(), |
| thermal_limiter_node: create_dummy_node(), |
| crash_report_handler: create_dummy_node(), |
| policy_params, |
| }; |
| |
| let (sender, mut receiver) = futures::channel::mpsc::channel(10); |
| let cobalt_metrics = CobaltMetrics::new_with_cobalt_sender(CobaltSender::new(sender)); |
| let node = ThermalPolicyBuilder::new(thermal_config) |
| .with_thermal_metrics(cobalt_metrics) |
| .build() |
| .unwrap(); |
| |
| // Cause the thermal policy to begin thermal limiting |
| let _ = node.update_thermal_load(Nanoseconds(0), throttle_temperature).await; |
| |
| // Cause the thermal policy to end thermal limiting |
| let _ = node.update_thermal_load(throttle_duration, idle_temperature).await; |
| |
| // Verify the expected Cobalt event for the thermal_limiting_elapsed_time metric |
| assert_eq!( |
| receiver.try_next().unwrap().unwrap(), |
| CobaltEvent { |
| metric_id: power_metrics_registry::THERMAL_LIMITING_ELAPSED_TIME_METRIC_ID, |
| event_codes: vec![], |
| component: None, |
| payload: EventPayload::ElapsedMicros(throttle_duration.0 as i64) |
| } |
| ); |
| |
| // Verify the expected Cobalt event for the thermal_limit_result metric |
| assert_eq!( |
| receiver.try_next().unwrap().unwrap(), |
| CobaltEvent { |
| metric_id: power_metrics_registry::THERMAL_LIMIT_RESULT_METRIC_ID, |
| event_codes: vec![thermal_limit_result::Mitigated as u32], |
| component: None, |
| payload: EventPayload::Event(Event), |
| } |
| ); |
| |
| // Verify there were no more dispatched Cobalt events |
| assert!(receiver.try_next().is_err()); |
| } |
| |
| /// Tests that the ThermalPolicy reports the thermal_limiting_elapsed_time and |
| /// thermal_limit_result metrics in the case of a thermal shutdown. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_cobalt_metrics_thermal_shutdown() { |
| // Specify the thermal shutdown temperature |
| let policy_params = default_policy_params(); |
| let shutdown_temperature = policy_params.thermal_shutdown_temperature + Celsius(1.0); |
| |
| // Set up the ThermalPolicy node |
| let thermal_config = ThermalConfig { |
| temperature_node: create_dummy_node(), |
| cpu_control_nodes: vec![create_dummy_node()], |
| sys_pwr_handler: create_dummy_node(), |
| thermal_limiter_node: create_dummy_node(), |
| crash_report_handler: create_dummy_node(), |
| policy_params, |
| }; |
| let (sender, mut receiver) = futures::channel::mpsc::channel(10); |
| let cobalt_metrics = CobaltMetrics::new_with_cobalt_sender(CobaltSender::new(sender)); |
| let node = ThermalPolicyBuilder::new(thermal_config) |
| .with_thermal_metrics(cobalt_metrics) |
| .build() |
| .unwrap(); |
| |
| // Cause the thermal policy to enter thermal shutdown |
| let _ = node.check_critical_temperature(Nanoseconds(0), shutdown_temperature).await; |
| |
| // Verify the expected Cobalt event for the thermal_limit_result metric |
| assert_eq!( |
| receiver.try_next().unwrap().unwrap(), |
| CobaltEvent { |
| metric_id: power_metrics_registry::THERMAL_LIMIT_RESULT_METRIC_ID, |
| event_codes: vec![thermal_limit_result::Shutdown as u32], |
| component: None, |
| payload: EventPayload::Event(Event), |
| } |
| ); |
| |
| // Verify there were no more dispatched Cobalt events |
| assert!(receiver.try_next().is_err()); |
| } |
| |
| /// Tests that the ThermalPolicy reports the raw_temperature metric with the correct data and |
| /// after the expected number of temperature readings. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_cobalt_metrics_raw_temperature() { |
| // The temperature that the mock TemperatureNode will respond with, and that the test will |
| // verify is received in the Cobalt histogram event |
| let test_temperature = Celsius(50.0); |
| |
| // The number of temperature readings that the test will perform and are expected to be |
| // reported in the Cobalt histogram event |
| let num_temperature_readings = CobaltMetrics::NUM_TEMPERATURE_READINGS; |
| |
| // Mock temperature node that responds to `num_temperature_readings` number of |
| // ReadTemperature messages with a `test_temperature` reading |
| let temperature_node = create_mock_node( |
| "TemperatureNode", |
| (0..num_temperature_readings) |
| .map(|_| { |
| (msg_eq!(ReadTemperature), msg_ok_return!(ReadTemperature(test_temperature))) |
| }) |
| .collect(), |
| ); |
| |
| // Set up the ThermalPolicy node |
| let policy_params = default_policy_params(); |
| let thermal_config = ThermalConfig { |
| temperature_node, |
| cpu_control_nodes: vec![create_dummy_node()], |
| sys_pwr_handler: create_dummy_node(), |
| thermal_limiter_node: create_dummy_node(), |
| crash_report_handler: create_dummy_node(), |
| policy_params, |
| }; |
| let (sender, mut receiver) = futures::channel::mpsc::channel(10); |
| let cobalt_metrics = CobaltMetrics::new_with_cobalt_sender(CobaltSender::new(sender)); |
| let node = ThermalPolicyBuilder::new(thermal_config) |
| .with_thermal_metrics(cobalt_metrics) |
| .build() |
| .unwrap(); |
| |
| // Iterate the controller for `num_temperature_readings` iterations to trigger a Cobalt |
| // metric event |
| for _ in 0..num_temperature_readings { |
| node.iterate_thermal_control().await.unwrap(); |
| } |
| |
| // Generate the expected raw_temperature Cobalt event |
| let mut expected_histogram = CobaltIntHistogram::new(CobaltIntHistogramConfig { |
| floor: power_metrics_registry::RAW_TEMPERATURE_INT_BUCKETS_FLOOR, |
| num_buckets: power_metrics_registry::RAW_TEMPERATURE_INT_BUCKETS_NUM_BUCKETS, |
| step_size: power_metrics_registry::RAW_TEMPERATURE_INT_BUCKETS_STEP_SIZE, |
| }); |
| for _ in 0..num_temperature_readings { |
| expected_histogram.add_data(test_temperature.0 as i64); |
| } |
| |
| let expected_cobalt_event = CobaltEvent { |
| metric_id: power_metrics_registry::RAW_TEMPERATURE_METRIC_ID, |
| event_codes: vec![], |
| component: None, |
| payload: EventPayload::IntHistogram(expected_histogram.get_data()), |
| }; |
| |
| // Verify that the expected Cobalt event was received, and there were no extra events |
| assert_eq!(receiver.try_next().unwrap().unwrap(), expected_cobalt_event); |
| assert!(receiver.try_next().is_err()); |
| } |
| |
| /// Tests that we can call `throttle_start` a second time if we first call `throttle_end`. |
| #[test] |
| fn test_cobalt_metrics_throttle_restart() { |
| let (sender, _) = futures::channel::mpsc::channel(10); |
| let mut cobalt_metrics = CobaltMetrics::new_with_cobalt_sender(CobaltSender::new(sender)); |
| cobalt_metrics.log_throttle_start(Nanoseconds(0)); |
| cobalt_metrics.log_throttle_end_mitigated(Nanoseconds(1000)); |
| cobalt_metrics.log_throttle_start(Nanoseconds(2000)); |
| } |
| |
| /// Tests that calling CobaltMetrics `throttle_start` twice without first calling `throttle_end` |
| /// causes a panic. |
| #[test] |
| #[should_panic(expected = "throttle_start called before ending previous throttle")] |
| fn test_cobalt_metrics_double_start_panic() { |
| let (sender, _) = futures::channel::mpsc::channel(10); |
| let mut cobalt_metrics = CobaltMetrics::new_with_cobalt_sender(CobaltSender::new(sender)); |
| cobalt_metrics.log_throttle_start(Nanoseconds(0)); |
| cobalt_metrics.log_throttle_start(Nanoseconds(0)); |
| } |
| |
| /// Tests that Cobalt events are not dispatched until after a specified throttle end delay. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_cobalt_metrics_throttle_deadline() { |
| // Set the test parameters |
| let mut policy_params = default_policy_params(); |
| let throttle_end_delay = Seconds(60.0); |
| policy_params.throttle_end_delay = throttle_end_delay; |
| let idle_temperature = policy_params.thermal_limiting_range[0] - Celsius(1.0); |
| let throttle_temperature = policy_params.thermal_limiting_range[0] + Celsius(1.0); |
| let initial_throttle_duration = Nanoseconds(1e9 as i64); // 1s |
| let total_throttle_duration = |
| initial_throttle_duration + Nanoseconds(throttle_end_delay.into_nanos()); |
| |
| // Set up the ThermalPolicy node |
| let thermal_config = ThermalConfig { |
| temperature_node: create_dummy_node(), |
| cpu_control_nodes: vec![create_dummy_node()], |
| sys_pwr_handler: create_dummy_node(), |
| thermal_limiter_node: create_dummy_node(), |
| crash_report_handler: create_dummy_node(), |
| policy_params, |
| }; |
| |
| let (sender, mut receiver) = futures::channel::mpsc::channel(10); |
| let cobalt_metrics = CobaltMetrics::new_with_cobalt_sender(CobaltSender::new(sender)); |
| let node = ThermalPolicyBuilder::new(thermal_config) |
| .with_thermal_metrics(cobalt_metrics) |
| .build() |
| .unwrap(); |
| |
| // Cause the thermal policy to begin thermal limiting |
| let _ = node.update_thermal_load(Nanoseconds(0), throttle_temperature).await; |
| |
| // Cause the thermal policy to end thermal limiting, but the deadline timer should still be |
| // active |
| let _ = node.update_thermal_load(initial_throttle_duration, idle_temperature).await; |
| |
| // Verify there were no dispatched Cobalt events because the deadline timer is still running |
| assert!(receiver.try_next().is_err()); |
| |
| // Cause the deadline timer to expire |
| let _ = node.update_thermal_load(total_throttle_duration, idle_temperature).await; |
| |
| // Verify the expected Cobalt event for the thermal_limiting_elapsed_time metric |
| assert_eq!( |
| receiver.try_next().unwrap().unwrap(), |
| CobaltEvent { |
| metric_id: power_metrics_registry::THERMAL_LIMITING_ELAPSED_TIME_METRIC_ID, |
| event_codes: vec![], |
| component: None, |
| payload: EventPayload::ElapsedMicros(total_throttle_duration.0 as i64) |
| } |
| ); |
| |
| // Verify the expected Cobalt event for the thermal_limit_result metric |
| assert_eq!( |
| receiver.try_next().unwrap().unwrap(), |
| CobaltEvent { |
| metric_id: power_metrics_registry::THERMAL_LIMIT_RESULT_METRIC_ID, |
| event_codes: vec![thermal_limit_result::Mitigated as u32], |
| component: None, |
| payload: EventPayload::Event(Event), |
| } |
| ); |
| |
| // Verify there were no more dispatched Cobalt events |
| assert!(receiver.try_next().is_err()); |
| } |
| |
| /// Tests that when thermal throttling exits, the ThermalPolicy triggers a crash report on the |
| /// CrashReportHandler node. |
| #[fasync::run_singlethreaded(test)] |
| async fn test_throttle_crash_report() { |
| // Define the test parameters |
| let mut policy_params = default_policy_params(); |
| policy_params.throttle_end_delay = Seconds(0.0); |
| policy_params.thermal_limiting_range[0] = Celsius(80.0); |
| policy_params.thermal_limiting_range[1] = Celsius(100.0); |
| let idle_temperature = Celsius(50.0); |
| let throttle_temperature = Celsius(90.0); |
| |
| // Set up the ThermalPolicy node |
| let thermal_config = ThermalConfig { |
| temperature_node: create_dummy_node(), |
| cpu_control_nodes: vec![create_dummy_node()], |
| sys_pwr_handler: create_dummy_node(), |
| thermal_limiter_node: create_dummy_node(), |
| crash_report_handler: create_mock_node( |
| "CrashReportMock", |
| vec![( |
| msg_eq!(FileCrashReport("fuchsia-thermal-throttle".to_string())), |
| msg_ok_return!(FileCrashReport), |
| )], |
| ), |
| policy_params, |
| }; |
| let node = ThermalPolicyBuilder::new(thermal_config).build().unwrap(); |
| |
| // Enter and then exit thermal throttling. The mock crash_report_handler node will assert if |
| // it does not receive a FileCrashReport message. |
| let _ = node.update_thermal_load(Nanoseconds(0), throttle_temperature).await; |
| let _ = node.update_thermal_load(Nanoseconds(0), idle_temperature).await; |
| } |
| } |