src/cobalt/bin/system-metrics/system_metrics_daemon.cc - fuchsia - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // The cobalt system metrics collection daemon uses cobalt to log system metrics
 // on a regular basis.
 #include "src/cobalt/bin/system-metrics/system_metrics_daemon.h"

 #include <assert.h>
 #include <fcntl.h>
 #include <fuchsia/cobalt/cpp/fidl.h>
 #include <lib/async/cpp/task.h>
 #include <lib/fdio/directory.h>
 #include <lib/fdio/fd.h>
 #include <lib/fdio/fdio.h>
 #include <lib/sys/inspect/cpp/component.h>
 #include <lib/syslog/cpp/macros.h>
 #include <lib/trace/event.h>
 #include <lib/zx/resource.h>
 #include <sys/statvfs.h>
 #include <zircon/status.h>

 #include <chrono>
 #include <cstdio>
 #include <fstream>
 #include <memory>
 #include <numeric>
 #include <thread>
 #include <utility>
 #include <vector>

 #include "src/cobalt/bin/system-metrics/cpu_stats_fetcher_impl.h"
 #include "src/cobalt/bin/system-metrics/log_stats_fetcher_impl.h"
 #include "src/cobalt/bin/system-metrics/metrics_registry.cb.h"
 #include "src/cobalt/bin/utils/clock.h"
 #include "src/cobalt/bin/utils/status_utils.h"
 #include "src/lib/cobalt/cpp/cobalt_event_builder.h"

 using cobalt::CobaltEventBuilder;
 using cobalt::IntegerBuckets;
 using cobalt::LinearIntegerBuckets;
 using cobalt::StatusToString;
 using cobalt::config::IntegerBucketConfig;
 using fuchsia::cobalt::CobaltEvent;
 using fuchsia::cobalt::HistogramBucket;
 using fuchsia::cobalt::Logger_Sync;
 using fuchsia::ui::activity::State;
 using fuchsia_system_metrics::FuchsiaLifetimeEventsMetricDimensionEvents;
 using fuchsia_system_metrics::FuchsiaUpPingMetricDimensionUptime;
 using fuchsia_system_metrics::FuchsiaUptimeMetricDimensionUptimeRange;
 using std::chrono::steady_clock;

 constexpr char kActivationFileSuffix[] = "activation";

 namespace {

 // Given a number of seconds, return the number of seconds before the next
 // multiple of 1 hour.
 std::chrono::seconds SecondsBeforeNextHour(std::chrono::seconds uptime) {
   return std::chrono::seconds(3600 - (uptime.count() % 3600));
 }

 constexpr char kDefaultGranularErrorStatsSpecFilePath[] =
     "/config/data/granular_error_stats_specs.txt";

 }  // namespace

 const SystemMetricsDaemon::MetricSpecs SystemMetricsDaemon::MetricSpecs::INVALID = {0, 0, 0};

 // static
 SystemMetricsDaemon::MetricSpecs SystemMetricsDaemon::LoadGranularErrorStatsSpecs(
     const char* spec_file_path) {
   std::ifstream spec_file_stream(spec_file_path);
   SystemMetricsDaemon::MetricSpecs result;
   if (!(spec_file_stream >> result.customer_id)) {
     FX_LOGS(ERROR) << "Unable to read customer ID from granular error stats spec file";
     return SystemMetricsDaemon::MetricSpecs::INVALID;
   }
   if (!(spec_file_stream >> result.project_id)) {
     FX_LOGS(ERROR) << "Unable to read project ID from granular error stats spec file";
     return SystemMetricsDaemon::MetricSpecs::INVALID;
   }
   if (!(spec_file_stream >> result.metric_id)) {
     FX_LOGS(ERROR) << "Unable to read metric ID from granular error stats spec file";
     return SystemMetricsDaemon::MetricSpecs::INVALID;
   }
   return result;
 }

 SystemMetricsDaemon::SystemMetricsDaemon(async_dispatcher_t* dispatcher,
                                          sys::ComponentContext* context)
     : SystemMetricsDaemon(
           dispatcher, context, LoadGranularErrorStatsSpecs(kDefaultGranularErrorStatsSpecFilePath),
           nullptr, nullptr, std::unique_ptr<cobalt::SteadyClock>(new cobalt::RealSteadyClock()),
           std::unique_ptr<cobalt::CpuStatsFetcher>(new cobalt::CpuStatsFetcherImpl()),
           std::unique_ptr<cobalt::LogStatsFetcher>(
               new cobalt::LogStatsFetcherImpl(dispatcher, context)),
           std::make_unique<cobalt::ActivityListener>(
               fit::bind_member(this, &SystemMetricsDaemon::UpdateState)),
           "data/") {
   InitializeLogger();
   InitializeGranularErrorStatsLogger();
   // Connect activity listener to service provider.
   activity_provider_ = context->svc()->Connect<fuchsia::ui::activity::Provider>();
   activity_provider_->WatchState(activity_listener_->NewHandle(dispatcher));
 }

 SystemMetricsDaemon::SystemMetricsDaemon(
     async_dispatcher_t* dispatcher, sys::ComponentContext* context,
     const MetricSpecs& granular_error_stats_specs, fuchsia::cobalt::Logger_Sync* logger,
     fuchsia::cobalt::Logger_Sync* granular_error_stats_logger,
     std::unique_ptr<cobalt::SteadyClock> clock,
     std::unique_ptr<cobalt::CpuStatsFetcher> cpu_stats_fetcher,
     std::unique_ptr<cobalt::LogStatsFetcher> log_stats_fetcher,
     std::unique_ptr<cobalt::ActivityListener> activity_listener, std::string activation_file_prefix)
     : dispatcher_(dispatcher),
       context_(context),
       granular_error_stats_specs_(granular_error_stats_specs),
       logger_(logger),
       granular_error_stats_logger_(granular_error_stats_logger),
       start_time_(clock->Now()),
       clock_(std::move(clock)),
       cpu_stats_fetcher_(std::move(cpu_stats_fetcher)),
       log_stats_fetcher_(std::move(log_stats_fetcher)),
       activity_listener_(std::move(activity_listener)),
       activation_file_prefix_(std::move(activation_file_prefix)),
       inspector_(context),
       platform_metric_node_(inspector_.root().CreateChild(kInspecPlatformtNodeName)),
       // Diagram of hierarchy can be seen below:
       // root
       // - platform_metrics
       //   - cpu
       //     - max
       //     - mean
       metric_cpu_node_(platform_metric_node_.CreateChild(kCPUNodeName)),
       inspect_cpu_max_(metric_cpu_node_.CreateDoubleArray(kReadingCPUMax, kCPUArraySize)),
       inspect_cpu_mean_(metric_cpu_node_.CreateDoubleArray(kReadingCPUMean, kCPUArraySize)) {}

 void SystemMetricsDaemon::StartLogging() {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::StartLogging");
   // We keep gathering metrics until this process is terminated.
   RepeatedlyLogUpPing();
   RepeatedlyLogUptime();
   RepeatedlyLogCpuUsage();
   RepeatedlyLogLogStats();
   LogLifetimeEvents();
 }

 void SystemMetricsDaemon::RepeatedlyLogUpPing() {
   std::chrono::seconds uptime = GetUpTime();
   std::chrono::seconds seconds_to_sleep = LogFuchsiaUpPing(uptime);
   async::PostDelayedTask(
       dispatcher_, [this]() { RepeatedlyLogUpPing(); }, zx::sec(seconds_to_sleep.count() + 5));
   return;
 }

 void SystemMetricsDaemon::LogLifetimeEvents() {
   LogLifetimeEventBoot();
   LogLifetimeEventActivation();
 }

 void SystemMetricsDaemon::LogLifetimeEventBoot() {
   if (!LogFuchsiaLifetimeEventBoot()) {
     // Pause for 5 minutes and try again.
     async::PostDelayedTask(
         dispatcher_, [this]() { LogLifetimeEventBoot(); }, zx::sec(300));
   }
 }

 void SystemMetricsDaemon::LogLifetimeEventActivation() {
   if (!LogFuchsiaLifetimeEventActivation()) {
     // Pause for 5 minutes and try again.
     async::PostDelayedTask(
         dispatcher_, [this]() { LogLifetimeEventActivation(); }, zx::sec(300));
   }
 }

 void SystemMetricsDaemon::RepeatedlyLogUptime() {
   std::chrono::seconds seconds_to_sleep = LogFuchsiaUptime();
   async::PostDelayedTask(
       dispatcher_, [this]() { RepeatedlyLogUptime(); }, zx::sec(seconds_to_sleep.count()));
   return;
 }

 void SystemMetricsDaemon::RepeatedlyLogCpuUsage() {
   cpu_bucket_config_ =
       InitializeLinearBucketConfig(fuchsia_system_metrics::kCpuPercentageIntBucketsFloor,
                                    fuchsia_system_metrics::kCpuPercentageIntBucketsNumBuckets,
                                    fuchsia_system_metrics::kCpuPercentageIntBucketsStepSize);
   std::chrono::seconds seconds_to_sleep = LogCpuUsage();
   async::PostDelayedTask(
       dispatcher_, [this]() { RepeatedlyLogCpuUsage(); }, zx::sec(seconds_to_sleep.count()));
   return;
 }

 void SystemMetricsDaemon::RepeatedlyLogLogStats() {
   // Repeatedly run LogLogStats every 15 minutes.
   async::PostDelayedTask(
       dispatcher_,
       [this]() {
         LogLogStats();
         RepeatedlyLogLogStats();
       },
       zx::sec(900));
 }

 std::unique_ptr<IntegerBucketConfig> SystemMetricsDaemon::InitializeLinearBucketConfig(
     int64_t bucket_floor, int32_t num_buckets, int32_t step_size) {
   IntegerBuckets bucket_proto;
   LinearIntegerBuckets* linear = bucket_proto.mutable_linear();
   linear->set_floor(bucket_floor);
   linear->set_num_buckets(num_buckets);
   linear->set_step_size(step_size);
   return IntegerBucketConfig::CreateFromProto(bucket_proto);
 }

 std::chrono::seconds SystemMetricsDaemon::GetUpTime() {
   // Note(rudominer) We are using the startime of the SystemMetricsDaemon
   // as a proxy for the system start time. This is fine as long as we don't
   // start seeing systematic restarts of the SystemMetricsDaemon. If that
   // starts happening we should look into how to capture actual boot time.
   auto now = clock_->Now();
   return std::chrono::duration_cast<std::chrono::seconds>(now - start_time_);
 }

 std::chrono::seconds SystemMetricsDaemon::LogFuchsiaUpPing(std::chrono::seconds uptime) {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::LogFuchsiaUpPing");

   typedef FuchsiaUpPingMetricDimensionUptime Uptime;

   // We always log that we are |Up|.
   // If |uptime| is at least one minute we log that we are |UpOneMinute|.
   // If |uptime| is at least ten minutes we log that we are |UpTenMinutes|.
   // If |uptime| is at least one hour we log that we are |UpOneHour|.
   // If |uptime| is at least 12 hours we log that we are |UpTwelveHours|.
   // If |uptime| is at least 24 hours we log that we are |UpOneDay|.
   //
   // To understand the logic of this function it is important to note that
   // the events we are logging are intended to take advantage of Cobalt's
   // local aggregation feature. Thus, for example, although we log the
   // |Up| event many times throughout a calendar day, only a single
   // Observation per day will be sent from the device to the Cobalt backend
   // indicating that this device was "Up" during the day.

   if (!logger_) {
     FX_LOGS(ERROR) << "No logger present. Reconnecting...";
     InitializeLogger();
     // Something went wrong. Pause for 5 minutes.
     return std::chrono::minutes(5);
   }

   fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   // Always log that we are "Up".
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::Up, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::minutes(1)) {
     // If we have been up for less than a minute, come back here after it
     // has been a minute.
     return std::chrono::minutes(1) - uptime;
   }
   // Log UpOneMinute
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpOneMinute, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::minutes(10)) {
     // If we have been up for less than 10 minutes, come back here after it
     // has been 10 minutes.
     return std::chrono::minutes(10) - uptime;
   }
   // Log UpTenMinutes
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpTenMinutes, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::hours(1)) {
     // If we have been up for less than an hour, come back here after it has
     // has been an hour.
     return std::chrono::hours(1) - uptime;
   }
   // Log UpOneHour
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpOneHour, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::hours(12)) {
     // If we have been up for less than 12 hours, come back here after *one*
     // hour. Notice this time we don't wait 12 hours to come back. The reason
     // is that it may be close to the end of the day. When the new day starts
     // we want to come back in a reasonable amount of time (we consider
     // one hour to be reasonable) so that we can log the earlier events
     // in the new day.
     return std::chrono::hours(1);
   }
   // Log UpTwelveHours.
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpTwelveHours, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::hours(24)) {
     // As above, come back in one hour.
     return std::chrono::hours(1);
   }
   // Log UpOneDay.
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpOneDay, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::hours(72)) {
     return std::chrono::hours(1);
   }
   // Log UpThreeDays.
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpThreeDays, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   if (uptime < std::chrono::hours(144)) {
     return std::chrono::hours(1);
   }
   // Log UpSixDays.
   status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   if (ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
                                                  Uptime::UpSixDays, &status)) != ZX_OK) {
     return std::chrono::minutes(5);
   }
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
   }
   // As above, come back in one hour.
   return std::chrono::hours(1);
 }

 std::chrono::seconds SystemMetricsDaemon::LogFuchsiaUptime() {
   std::chrono::seconds uptime = GetUpTime();
   if (!logger_) {
     FX_LOGS(ERROR) << "No logger present. Reconnecting...";
     InitializeLogger();
     // Something went wrong. Pause for 5 minutes.
     return std::chrono::minutes(5);
   }
   auto up_hours = std::chrono::duration_cast<std::chrono::hours>(uptime).count();
   uint32_t event_code = (up_hours < 336)
                             ? FuchsiaUptimeMetricDimensionUptimeRange::LessThanTwoWeeks
                             : event_code = FuchsiaUptimeMetricDimensionUptimeRange::TwoWeeksOrMore;

   fuchsia::cobalt::Status status;
   ReinitializeIfPeerClosed(logger_->LogElapsedTime(fuchsia_system_metrics::kFuchsiaUptimeMetricId,
                                                    event_code, "", up_hours, &status));
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogCobaltEvent() returned status=" << StatusToString(status);
   }
   // Schedule a call of this function for the next multiple of an hour.
   return SecondsBeforeNextHour(uptime);
 }

 bool SystemMetricsDaemon::LogFuchsiaLifetimeEventBoot() {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::LogFuchsiaLifetimeEventBoot");
   if (!logger_) {
     FX_LOGS(ERROR) << "No logger present. Reconnecting...";
     InitializeLogger();
     // Something went wrong. Pause and try again.
     return false;
   }

   fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaLifetimeEventsMetricId,
                                              FuchsiaLifetimeEventsMetricDimensionEvents::Boot,
                                              &status));
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
     if (status == fuchsia::cobalt::Status::BUFFER_FULL) {
       // Temporary error. Pause and try again.
       return false;
     }
   }
   return true;
 }

 bool SystemMetricsDaemon::LogFuchsiaLifetimeEventActivation() {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::LogFuchsiaLifetimeEventActivation");
   if (!logger_) {
     FX_LOGS(ERROR) << "No logger present. Reconnecting...";
     InitializeLogger();
     // Something went wrong. Pause and try again.
     return false;
   }

   std::ifstream file(activation_file_prefix_ + kActivationFileSuffix);
   if (file) {
     // This device has already logged activation. No work required.
     return true;
   }

   fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   ReinitializeIfPeerClosed(logger_->LogEvent(fuchsia_system_metrics::kFuchsiaLifetimeEventsMetricId,
                                              FuchsiaLifetimeEventsMetricDimensionEvents::Activation,
                                              &status));
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogEvent() returned status=" << StatusToString(status);
     return false;
   }

   // Create a new empty file to prevent relogging activation.
   std::ofstream c(activation_file_prefix_ + kActivationFileSuffix);
   c.close();
   return true;
 }

 std::chrono::seconds SystemMetricsDaemon::LogCpuUsage() {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::LogCpuUsage");
   if (!logger_) {
     FX_LOGS(ERROR) << "No logger present. Reconnecting...";
     InitializeLogger();
     return std::chrono::minutes(1);
   }
   double cpu_percentage;
   switch (cpu_stats_fetcher_->FetchCpuPercentage(&cpu_percentage)) {
     case cobalt::FetchCpuResult::Ok:
       StoreCpuData(cpu_percentage);
       return std::chrono::seconds(1);
     case cobalt::FetchCpuResult::FirstDataPoint:
       return std::chrono::seconds(1);
     case cobalt::FetchCpuResult::Error:
       return std::chrono::minutes(1);
   }
 }

 void SystemMetricsDaemon::LogLogStats() {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::LogLogStats");
   if (!logger_) {
     FX_LOGS(ERROR) << "No logger present. Reconnecting...";
     InitializeLogger();
     return;
   }
   log_stats_fetcher_->FetchMetrics([this](const cobalt::LogStatsFetcher::Metrics& metrics) {
     std::vector<CobaltEvent> events;

     events.push_back(CobaltEventBuilder(fuchsia_system_metrics::kErrorLogCountMetricId)
                          .as_count_event(0, metrics.error_count));

     events.push_back(CobaltEventBuilder(fuchsia_system_metrics::kKernelLogCountMetricId)
                          .as_count_event(0, metrics.klog_count));

     for (auto& it : metrics.per_component_error_count) {
       events.push_back(
           CobaltEventBuilder(fuchsia_system_metrics::kPerComponentErrorLogCountMetricId)
               .with_event_code(it.first)
               .as_count_event(0, it.second));
     }

     fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
     ReinitializeIfPeerClosed(logger_->LogCobaltEvents(std::move(events), &status));
     if (status != fuchsia::cobalt::Status::OK) {
       FX_LOGS(ERROR) << "LogCobaltEvents() for error log stats returned status="
                      << StatusToString(status);
     }

     std::vector<CobaltEvent> granular_events;
     for (auto& record : metrics.granular_stats) {
       // Component can only be up to 64 bytes, so truncate the beginning of the file path if it
       // doesn't fit.
       std::string component = record.file_path.size() > 64
                                   ? record.file_path.substr(record.file_path.size() - 64)
                                   : record.file_path;
       granular_events.push_back(CobaltEventBuilder(granular_error_stats_specs_.metric_id)
                                     .with_event_code((record.line_no - 1) % 1023)
                                     .with_component(component)
                                     .as_count_event(0, record.count));
     }
     if (!granular_events.empty()) {
       status = fuchsia::cobalt::Status::INTERNAL_ERROR;
       ReinitializeGranularErrorStatsIfPeerClosed(
           granular_error_stats_logger_->LogCobaltEvents(std::move(granular_events), &status));
       if (status != fuchsia::cobalt::Status::OK) {
         FX_LOGS(ERROR) << "LogCobaltEvents() for granular error log stats returned status="
                        << StatusToString(status);
       }
     }
   });
 }

 void SystemMetricsDaemon::StoreCpuData(double cpu_percentage) {
   uint32_t bucket_index = cpu_bucket_config_->BucketIndex(cpu_percentage * 100);
   activity_state_to_cpu_map_[current_state_][bucket_index]++;
   cpu_data_stored_++;
   if (cpu_data_stored_ >= 10 * 60) {  // Flush every 10 minutes.
     if (LogCpuToCobalt()) {
       // If failed, attempt to log agin next time.
       activity_state_to_cpu_map_.clear();
       cpu_data_stored_ = 0;
     }
   }

   cpu_usage_accumulator_ += cpu_percentage;
   if (cpu_percentage > cpu_usage_max_) {
     cpu_usage_max_ = cpu_percentage;
   }
   // Every 10 (kInspectSamplePeriod) seconds, write to inspect
   const size_t kInspectSamplePeriod = 10;
   if (cpu_data_stored_ % kInspectSamplePeriod == 0) {
     inspect_cpu_max_.Set(cpu_array_index_, cpu_usage_max_);
     inspect_cpu_mean_.Set(cpu_array_index_++, cpu_usage_accumulator_ / kInspectSamplePeriod);
     cpu_usage_max_ = cpu_usage_accumulator_ = 0;
     if (cpu_array_index_ == kCPUArraySize) {
       cpu_array_index_ = 0;
     }
   }
 }

 bool SystemMetricsDaemon::LogCpuToCobalt() {
   TRACE_DURATION("system_metrics", "SystemMetricsDaemon::LogCpuToCobalt");
   using EventCode = fuchsia_system_metrics::CpuPercentageMetricDimensionDeviceState;
   std::vector<CobaltEvent> events;
   auto builder = CobaltEventBuilder(fuchsia_system_metrics::kCpuPercentageMetricId);
   for (const auto& pair : activity_state_to_cpu_map_) {
     std::vector<HistogramBucket> cpu_buckets_;
     for (const auto& bucket_pair : pair.second) {
       HistogramBucket bucket;
       bucket.index = bucket_pair.first;
       bucket.count = bucket_pair.second;
       cpu_buckets_.push_back(std::move(bucket));
     }
     events.push_back(builder.Clone()
                          .with_event_code(GetCobaltEventCodeForDeviceState<EventCode>(pair.first))
                          .as_int_histogram(cpu_buckets_));
   }
   // call cobalt FIDL
   fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   ReinitializeIfPeerClosed(logger_->LogCobaltEvents(std::move(events), &status));
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "LogCpuToCobalt returned status=" << StatusToString(status);
     return false;
   }
   return true;
 }

 zx_status_t SystemMetricsDaemon::ReinitializeIfPeerClosed(zx_status_t zx_status) {
   if (zx_status == ZX_ERR_PEER_CLOSED) {
     FX_LOGS(ERROR) << "Logger connection closed. Reconnecting...";
     InitializeLogger();
   } else if (zx_status != ZX_OK) {
     FX_PLOGS(ERROR, zx_status) << "Logger failed";
   }
   return zx_status;
 }

 void SystemMetricsDaemon::InitializeLogger() {
   fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   // Create a Cobalt Logger. The project ID is the one we specified in the
   // Cobalt metrics registry.
   // Connect to the cobalt fidl service provided by the environment.
   context_->svc()->Connect(factory_.NewRequest());
   if (!factory_) {
     FX_LOGS(ERROR) << "Unable to get LoggerFactory.";
     return;
   }

   factory_->CreateLoggerFromProjectId(fuchsia_system_metrics::kProjectId,
                                       logger_fidl_proxy_.NewRequest(), &status);
   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "Unable to get Logger from factory. Status=" << StatusToString(status);
     return;
   }
   logger_ = logger_fidl_proxy_.get();
   if (!logger_) {
     FX_LOGS(ERROR) << "Unable to get Logger from factory.";
   }
 }

 zx_status_t SystemMetricsDaemon::ReinitializeGranularErrorStatsIfPeerClosed(zx_status_t zx_status) {
   if (zx_status == ZX_ERR_PEER_CLOSED) {
     FX_LOGS(ERROR) << "Granular error stats logger connection closed. Reconnecting...";
     InitializeGranularErrorStatsLogger();
   }
   return zx_status;
 }

 void SystemMetricsDaemon::InitializeGranularErrorStatsLogger() {
   fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
   context_->svc()->Connect(factory_.NewRequest());
   if (!factory_) {
     FX_LOGS(ERROR) << "Unable to get LoggerFactory.";
     return;
   }

   factory_->CreateLoggerFromProjectSpec(
       granular_error_stats_specs_.customer_id, granular_error_stats_specs_.project_id,
       granular_error_stats_logger_fidl_proxy_.NewRequest(), &status);

   if (status != fuchsia::cobalt::Status::OK) {
     FX_LOGS(ERROR) << "Unable to get granular error stats Logger from factory. Status="
                    << StatusToString(status);
     return;
   }
   granular_error_stats_logger_ = granular_error_stats_logger_fidl_proxy_.get();
   if (!granular_error_stats_logger_) {
     FX_LOGS(ERROR) << "Unable to get granular error stats Logger from factory.";
   }
 }