blob: 64ecc56b35ddde6b978d58f709b957860fbc6042 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// The cobalt system metrics collection daemon uses cobalt to log system metrics
// on a regular basis.
#include "garnet/bin/cobalt/system-metrics/system_metrics_daemon.h"
#include <fcntl.h>
#include <chrono>
#include <memory>
#include <thread>
#include <fuchsia/cobalt/cpp/fidl.h>
#include <fuchsia/sysinfo/c/fidl.h>
#include <lib/fdio/util.h>
#include <lib/fxl/logging.h>
#include <lib/zx/resource.h>
#include <zircon/device/device.h>
#include "garnet/bin/cobalt/system-metrics/metrics_registry.cb.h"
#include "garnet/bin/cobalt/utils/clock.h"
#include "garnet/bin/cobalt/utils/status_utils.h"
#include "lib/fxl/logging.h"
using cobalt::StatusToString;
using fuchsia::cobalt::Logger_Sync;
using fuchsia_system_metrics::FuchsiaLifetimeEventsEventCode;
using fuchsia_system_metrics::FuchsiaUpPingEventCode;
using std::chrono::steady_clock;
SystemMetricsDaemon::SystemMetricsDaemon(async_dispatcher_t* dispatcher,
component::StartupContext* context)
: SystemMetricsDaemon(
dispatcher, context, nullptr,
std::unique_ptr<cobalt::SteadyClock>(new cobalt::RealSteadyClock())) {
InitializeLogger();
}
SystemMetricsDaemon::SystemMetricsDaemon(
async_dispatcher_t* dispatcher, component::StartupContext* context,
fuchsia::cobalt::Logger_Sync* logger,
std::unique_ptr<cobalt::SteadyClock> clock)
: dispatcher_(dispatcher),
context_(context),
logger_(logger),
start_time_(clock->Now()),
clock_(std::move(clock)) {}
void SystemMetricsDaemon::Work() {
// We keep gathering metrics until this process is terminated.
std::chrono::seconds seconds_to_sleep = LogMetrics();
async::PostDelayedTask(
dispatcher_, [this]() { Work(); }, zx::sec(seconds_to_sleep.count() + 5));
}
std::chrono::seconds SystemMetricsDaemon::LogMetrics() {
auto now = clock_->Now();
// Note(rudominer) We are using the startime of the SystemMetricsDaemon
// as a proxy for the system start time. This is fine as long as we don't
//start seeing systematic restarts of the SystemMetricsDaemon. If that
// starts happening we should look into how to capture actual boot time.
auto uptime =
std::chrono::duration_cast<std::chrono::seconds>(now - start_time_);
std::chrono::seconds seconds_to_sleep = LogFuchsiaUpPing(uptime);
seconds_to_sleep = std::min(seconds_to_sleep, LogFuchsiaLifetimeEvents());
return seconds_to_sleep;
}
std::chrono::seconds SystemMetricsDaemon::LogFuchsiaUpPing(
std::chrono::seconds uptime) {
// We always log that we are |Up|.
// If |uptime| is at least one minute we log that we are |UpOneMinute|.
// If |uptime| is at least ten minutes we log that we are |UpTenMinutes|.
// If |uptime| is at least one hour we log that we are |UpOneHour|.
// If |uptime| is at least 12 hours we log that we are |UpTwelveHours|.
// If |uptime| is at least 24 hours we log that we are |UpOneDay|.
//
// To understand the logic of this function it is important to note that
// the events we are logging are intended to take advantage of Cobalt's
// local aggregation feature. Thus, for example, although we log the
// |Up| event many times throughout a calendar day, only a single
// Observation per day will be sent from the device to the Cobalt backend
// indicating that this device was "Up" during the day.
if (!logger_) {
FXL_LOG(ERROR)
<< "Cobalt SystemMetricsDaemon: No logger present. Reconnecting...";
InitializeLogger();
// Something went wrong. Pause for 5 minutes.
return std::chrono::minutes(5);
}
fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
// Always log that we are "Up".
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
FuchsiaUpPingEventCode::Up, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
}
if (uptime < std::chrono::minutes(1)) {
// If we have been up for less than a minute, come back here after it
// has been a minute.
return std::chrono::minutes(1) - uptime;
}
// Log UpOneMinute
status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
FuchsiaUpPingEventCode::UpOneMinute, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
}
if (uptime < std::chrono::minutes(10)) {
// If we have been up for less than 10 minutes, come back here after it
// has been 10 minutes.
return std::chrono::minutes(10) - uptime;
}
// Log UpTenMinutes
status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
FuchsiaUpPingEventCode::UpTenMinutes, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
}
if (uptime < std::chrono::hours(1)) {
// If we have been up for less than an hour, come back here after it has
// has been an hour.
return std::chrono::hours(1) - uptime;
}
// Log UpOneHour
status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
FuchsiaUpPingEventCode::UpOneHour, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
}
if (uptime < std::chrono::hours(12)) {
// If we have been up for less than 12 hours, come back here after *one*
// hour. Notice this time we don't wait 12 hours to come back. The reason
// is that it may be close to the end of the day. When the new day starts
// we want to come back in a reasonable amount of time (we consider
// one hour to be reasonable) so that we can log the earlier events
// in the new day.
return std::chrono::hours(1);
}
// Log UpTwelveHours.
status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
FuchsiaUpPingEventCode::UpTwelveHours, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
}
if (uptime < std::chrono::hours(24)) {
// As above, come back in one hour.
return std::chrono::hours(1);
}
// Log UpOneDay.
status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaUpPingMetricId,
FuchsiaUpPingEventCode::UpOneDay, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
}
// As above, come back in one hour.
return std::chrono::hours(1);
}
std::chrono::seconds SystemMetricsDaemon::LogFuchsiaLifetimeEvents() {
if (!logger_) {
FXL_LOG(ERROR)
<< "Cobalt SystemMetricsDaemon: No logger present. Reconnecting...";
InitializeLogger();
// Something went wrong. Pause for 5 minutes.
return std::chrono::minutes(5);
}
fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
if (!boot_reported_) {
logger_->LogEvent(fuchsia_system_metrics::kFuchsiaLifetimeEventsMetricId,
FuchsiaLifetimeEventsEventCode::Boot, &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR)
<< "Cobalt SystemMetricsDaemon: LogEvent() returned status="
<< StatusToString(status);
} else {
boot_reported_ = true;
}
}
return std::chrono::seconds::max();
}
void SystemMetricsDaemon::InitializeLogger() {
fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
// Create a Cobalt Logger. The project name is the one we specified in the
// Cobalt metrics registry. We specify that our release stage is DOGFOOD.
// This means we are not allowed to use any metrics declared as DEBUG
// or FISHFOOD.
static const char kProjectName[] = "fuchsia_system_metrics";
// Connect to the cobalt fidl service provided by the environment.
context_->ConnectToEnvironmentService(factory_.NewRequest());
if (!factory_) {
FXL_LOG(ERROR)
<< "Cobalt SystemMetricsDaemon: Unable to get LoggerFactory.";
return;
}
factory_->CreateLoggerFromProjectName(
kProjectName, fuchsia::cobalt::ReleaseStage::DOGFOOD,
logger_fidl_proxy_.NewRequest(), &status);
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: Unable to get Logger from "
"factory. Status="
<< StatusToString(status);
return;
}
logger_ = logger_fidl_proxy_.get();
if (!logger_) {
FXL_LOG(ERROR) << "Cobalt SystemMetricsDaemon: Unable to get Logger from "
"factory.";
}
}