| // Copyright 2020 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_SYS_APPMGR_CPU_WATCHER_H_ |
| #define SRC_SYS_APPMGR_CPU_WATCHER_H_ |
| |
| #include <lib/inspect/cpp/inspect.h> |
| #include <lib/inspect/cpp/vmo/types.h> |
| #include <lib/stdcompat/optional.h> |
| #include <lib/zx/job.h> |
| #include <lib/zx/vmo.h> |
| #include <zircon/syscalls.h> |
| #include <zircon/threads.h> |
| |
| #include <deque> |
| #include <map> |
| #include <string> |
| #include <thread> |
| |
| #include <src/lib/fxl/macros.h> |
| #include <src/sys/appmgr/component_controller_impl.h> |
| |
| namespace component { |
| |
| // Virtual class to support CPU stats injection for testing |
| class StatsReader { |
| public: |
| virtual ~StatsReader() = default; |
| // Returns ZX_OK when task runtime is written to *info. Otherwise the value of *info is unchanged. |
| virtual zx_status_t GetCpuStats(zx_info_task_runtime_t* info) = 0; |
| }; |
| |
| // Gets stats from a real job |
| class JobStatsReader final : public StatsReader { |
| public: |
| ~JobStatsReader() override = default; |
| explicit JobStatsReader(zx::job job) : job_(std::move(job)) {} |
| zx_status_t GetCpuStats(zx_info_task_runtime_t* info) override { |
| return job_.get_info(ZX_INFO_TASK_RUNTIME, info, sizeof(*info), nullptr, nullptr); |
| } |
| |
| private: |
| zx::job job_; |
| }; |
| |
| // Configures the CpuWatcher. num_cpus and get_time can be substituted for testing. |
| struct CpuWatcherParameters { |
| // How many CPU cores the system has |
| size_t num_cpus = zx_system_get_num_cpus(); |
| // How often samples are taken |
| zx::duration sample_period; |
| // A function that will be called to fetch monotonic time |
| std::function<zx::time()> get_time = [] { return zx::clock::get_monotonic(); }; |
| }; |
| |
| // Watch CPU usage for tasks on the system. |
| // |
| // The CpuWatcher periodically samples all CPU usage registered tasks and exposes it in an Inspect |
| // hierarchy. |
| class CpuWatcher { |
| public: |
| // Create a new CpuWatcher that exposes CPU data under the given inspect node. The given job |
| // appears as the root of the hierarchy. |
| CpuWatcher(inspect::Node node, CpuWatcherParameters parameters, |
| std::unique_ptr<StatsReader> stats_reader, size_t max_samples = kDefaultMaxSamples) |
| : parameters_(std::move(parameters)), |
| top_node_(std::move(node)), |
| measurements_( |
| top_node_.CreateLazyNode("measurements", [this] { return PopulateInspector(); })), |
| task_count_value_(top_node_.CreateInt("task_count", 0)), |
| process_times_(top_node_.CreateExponentialIntHistogram( |
| "process_time_ns", kProcessTimeFloor, kProcessTimeStep, kProcessTimeMultiplier, |
| kProcessTimeBuckets)), |
| root_(std::move(stats_reader), max_samples, cpp17::nullopt /*histogram*/, |
| parameters.get_time()), |
| total_node_(top_node_.CreateChild("@total")), |
| recent_cpu_usage_( |
| top_node_.CreateLazyNode("recent_usage", [this] { return PopulateRecentUsage(); })), |
| histograms_node_(top_node_.CreateChild("histograms")), |
| max_samples_(max_samples) {} |
| |
| // Add a task to this watcher by instance path. |
| // job: The job to sample CPU runtime from. |
| void AddTask(const InstancePath& instance_path, std::unique_ptr<StatsReader> stats_reader); |
| |
| // Remove a task by instance path. |
| void RemoveTask(const InstancePath& instance_path); |
| |
| // Execute a measurement at the current time. |
| void Measure(); |
| |
| private: |
| static constexpr int64_t kProcessTimeFloor = 1000; |
| static constexpr int64_t kProcessTimeStep = 1000; |
| static constexpr int64_t kProcessTimeMultiplier = 2; |
| static constexpr int64_t kProcessTimeBuckets = 16; |
| static constexpr size_t kDefaultMaxSamples = 60; |
| |
| fpromise::promise<inspect::Inspector> PopulateInspector() const; |
| fpromise::promise<inspect::Inspector> PopulateRecentUsage() const; |
| |
| // An individual measurement. |
| struct Measurement { |
| zx_time_t timestamp; |
| zx_duration_t cpu_time; |
| zx_duration_t queue_time; |
| }; |
| |
| // A task that can be measured. |
| class Task { |
| public: |
| Task(std::unique_ptr<StatsReader> stats_reader, size_t max_samples, |
| cpp17::optional<inspect::LinearUintHistogram> histogram, zx::time time) |
| : stats_reader_(std::move(stats_reader)), |
| max_samples_(max_samples), |
| // Histogram for values [0..100]. 0 in the lower overflow bucket, 100 in the upper. |
| histogram_(std::move(histogram)), |
| previous_histogram_timestamp_(time.get()), |
| previous_cpu_(0) {} |
| |
| // Add a measurement to this task's histogram. |
| void AddMeasurementToHistogram(zx::time time, zx_duration_t cpu_time, |
| const CpuWatcherParameters& parameters, |
| inspect::LinearUintHistogram* histogram_to_use) { |
| if (histogram_to_use == nullptr) { |
| return; |
| } |
| auto time_value = time.get(); |
| auto elapsed_time = zx_time_sub_time(time_value, previous_histogram_timestamp_); |
| previous_histogram_timestamp_ = time_value; |
| // Don't publish confusing or misleading values from too-short measurement period. |
| if (elapsed_time < parameters.sample_period.get() * 9 / 10) { |
| return; |
| } |
| // Elapsed time * the number of cores |
| auto available_core_time = elapsed_time * parameters.num_cpus; |
| if (available_core_time != 0) { |
| // Multiply by 100 to get percent. Add available_core_time-1 to compute ceil(). |
| auto cpu_numerator = cpu_time * 100 + available_core_time - 1; |
| histogram_to_use->Insert(cpu_numerator / available_core_time); |
| } |
| } |
| |
| // Add a measurement to this task's list of measurements. |
| void AddMeasurementToList(zx::time time, zx_duration_t cpu_time, zx_duration_t queue_time) { |
| measurements_.emplace_back( |
| Measurement{.timestamp = time.get(), .cpu_time = cpu_time, .queue_time = queue_time}); |
| while (measurements_.size() > max_samples_) { |
| measurements_.pop_front(); |
| } |
| } |
| |
| // Rotate measurements if not empty. This is used when a task is already destroyed to ensure |
| // that we still rotate measurements outside the window. |
| void rotate() { |
| if (!measurements_.empty()) { |
| measurements_.pop_front(); |
| } |
| } |
| |
| bool is_alive() const { |
| // Keep a task around if we will either take measurements from it, or we have existing |
| // measurements. |
| return stats_reader_ != nullptr || !measurements_.empty() || !children_.empty(); |
| } |
| |
| std::map<std::string, std::unique_ptr<Task>>& children() { return children_; } |
| const std::map<std::string, std::unique_ptr<Task>>& children() const { return children_; } |
| const std::deque<Measurement>& measurements() const { return measurements_; } |
| std::unique_ptr<StatsReader>& stats_reader() { return stats_reader_; } |
| inspect::LinearUintHistogram* histogram() { |
| return histogram_ ? &(histogram_.value()) : nullptr; |
| } |
| // Takes and records a new measurement for this task. A copy of the measurement is returned if |
| // one was taken. Parent must not be null. |
| cpp17::optional<Measurement> Measure(const zx::time& timestamp, |
| const CpuWatcherParameters& parameters, Task* parent); |
| |
| private: |
| std::unique_ptr<StatsReader> stats_reader_; |
| |
| // The maximum number of samples to store for this task. |
| const size_t max_samples_; |
| |
| // Deque of measurements. |
| std::deque<Measurement> measurements_; |
| |
| // Inspect histogram of CPU stat percentages. |
| // Multiple tasks may occur with different koid's but a shared moniker, for example |
| // due to restart. Use a histogram-for-all-koid's stored in the parent Task. |
| cpp17::optional<inspect::LinearUintHistogram> histogram_; |
| |
| // Map of children for this task. |
| std::map<std::string, std::unique_ptr<Task>> children_; |
| |
| // Time of previous CPU sample or creation of Task instance, or 0 if invalid. |
| zx_time_t previous_histogram_timestamp_; |
| |
| zx_duration_t previous_cpu_; |
| }; |
| |
| mutable std::mutex mutex_; |
| CpuWatcherParameters parameters_; |
| inspect::Node top_node_; |
| inspect::LazyNode measurements_; |
| inspect::IntProperty task_count_value_; |
| inspect::ExponentialIntHistogram process_times_; |
| |
| size_t task_count_ __TA_GUARDED(mutex_) = 1; // 1 for root_ |
| Task root_ __TA_GUARDED(mutex_); |
| |
| // Total CPU and queue time of exited tasks. Used to ensure those values are not lost when |
| // calculating overall CPU usage on the system. |
| zx_duration_t exited_cpu_ __TA_GUARDED(mutex_) = 0; |
| zx_duration_t exited_queue_ __TA_GUARDED(mutex_) = 0; |
| inspect::Node total_node_; |
| size_t next_total_measurement_id_ __TA_GUARDED(mutex_) = 0; |
| std::deque<inspect::Node> total_measurements_ __TA_GUARDED(mutex_); |
| |
| inspect::LazyNode recent_cpu_usage_; |
| inspect::Node histograms_node_; |
| Measurement most_recent_total_ = {}, second_most_recent_total_ = {}; |
| |
| const size_t max_samples_; |
| |
| FXL_DISALLOW_COPY_AND_ASSIGN(CpuWatcher); |
| }; |
| |
| } // namespace component |
| |
| #endif // SRC_SYS_APPMGR_CPU_WATCHER_H_ |