blob: 214880e76f3da22d5c02279874c53108d602c547 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// The cobalt system metrics collection daemon uses cobalt to log system metrics
// on a regular basis.
#include <fcntl.h>
#include <chrono>
#include <memory>
#include <thread>
#include <fuchsia/cobalt/cpp/fidl.h>
#include <fuchsia/sysinfo/c/fidl.h>
#include <lib/async-loop/cpp/loop.h>
#include <lib/fdio/util.h>
#include <lib/zx/resource.h>
#include <zircon/device/device.h>
#include "lib/component/cpp/startup_context.h"
#include "lib/fsl/vmo/file.h"
#include "lib/fxl/logging.h"
constexpr char kConfigBinProtoPath[] =
"/pkg/data/cobalt_system_metrics_registry.pb";
const uint32_t kUptimeMetricId = 1;
const uint32_t kMemoryUsageMetricId = 3;
const unsigned int kIntervalMinutes = 1;
// Gets the root resource which is needed in order to access a variety of system
// metrics, including memory usage data.
zx_status_t get_root_resource(zx::resource* resource_out) {
static constexpr char kResourcePath[] = "/dev/misc/sysinfo";
int fd = open(kResourcePath, O_RDWR);
if (fd < 0) {
FXL_LOG(ERROR) << "Failed to open " << kResourcePath << " with "
<< strerror(errno);
return ZX_ERR_IO;
}
zx::channel channel;
zx_status_t status =
fdio_get_service_handle(fd, channel.reset_and_get_address());
if (status != ZX_OK) {
return status;
}
zx_handle_t raw_resource;
zx_status_t fidl_status = fuchsia_sysinfo_DeviceGetRootResource(
channel.get(), &status, &raw_resource);
if (fidl_status != ZX_OK) {
FXL_LOG(ERROR) << "Failed to get root resource: " << fidl_status;
return fidl_status;
} else if (status != ZX_OK) {
FXL_LOG(ERROR) << "Failed to get root resource: " << status;
return status;
}
resource_out->reset(raw_resource);
return ZX_OK;
}
std::string StatusToString(fuchsia::cobalt::Status status) {
switch (status) {
case fuchsia::cobalt::Status::OK:
return "OK";
case fuchsia::cobalt::Status::INVALID_ARGUMENTS:
return "INVALID_ARGUMENTS";
case fuchsia::cobalt::Status::EVENT_TOO_BIG:
return "EVENT_TOO_BIG";
case fuchsia::cobalt::Status::BUFFER_FULL:
return "BUFFER_FULL";
case fuchsia::cobalt::Status::INTERNAL_ERROR:
return "INTERNAL_ERROR";
}
};
// Loads the CobaltConfig proto for this project and writes it to a VMO.
fuchsia::cobalt::ProjectProfile LoadCobaltConfig() {
fsl::SizedVmo config_vmo;
bool success = fsl::VmoFromFilename(kConfigBinProtoPath, &config_vmo);
FXL_CHECK(success) << "Could not read Cobalt config file into VMO";
fuchsia::cobalt::ProjectProfile profile;
profile.config = std::move(config_vmo).ToTransport();
return profile;
}
class SystemMetricsApp {
public:
// tick_interval_minutes is the number of minutes to sleep in between calls to
// the GatherMetrics method.
SystemMetricsApp(unsigned int tick_interval_minutes)
: context_(component::StartupContext::CreateFromStartupInfo()),
start_time_(std::chrono::steady_clock::now()),
tick_interval_(tick_interval_minutes) {}
// Main is invoked to initialize the app and start the metric gathering loop.
void Main(async::Loop* loop);
private:
void ConnectToEnvironmentService();
void GatherMetrics();
// LogUptime returns the status returned by its call to Add*Observation.
fuchsia::cobalt::Status LogUptime(std::chrono::minutes uptime_minutes);
// LogMemoryUsage returns the status OK if everything went fine, or the
// logging was skipped due to scheduling, INTERNAL_ERROR if it was somehow
// unable to get the memory usage information and whatever was returned by
// Add*Observation otherwise.
fuchsia::cobalt::Status LogMemoryUsage(std::chrono::minutes uptime_minutes);
private:
std::unique_ptr<component::StartupContext> context_;
fuchsia::cobalt::LoggerSyncPtr logger_;
std::chrono::steady_clock::time_point start_time_;
std::chrono::minutes tick_interval_;
// We don't log every minute of uptime. We log in exponentially-growing
// increments. This keeps track of which minute should be logged.
int next_uptime_bucket_ = 0;
// We log memory usage no more than once every 5 minutes.
int next_log_memory_usage_ = 0;
};
void SystemMetricsApp::GatherMetrics() {
auto now = std::chrono::steady_clock::now();
auto uptime = now - start_time_;
auto uptime_minutes =
std::chrono::duration_cast<std::chrono::minutes>(uptime);
LogUptime(uptime_minutes);
LogMemoryUsage(uptime_minutes);
}
fuchsia::cobalt::Status SystemMetricsApp::LogUptime(
std::chrono::minutes uptime_minutes) {
while (next_uptime_bucket_ <= uptime_minutes.count()) {
fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogElapsedTime(kUptimeMetricId, 0, "", next_uptime_bucket_,
&status);
// If we failed to send an observation, we stop gathering metrics for up to
// one minute.
if (status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "LogElapsedTime() => " << StatusToString(status);
return status;
}
if (next_uptime_bucket_ == 0) {
next_uptime_bucket_ = 1;
} else {
next_uptime_bucket_ *= 2;
}
}
return fuchsia::cobalt::Status::OK;
}
fuchsia::cobalt::Status SystemMetricsApp::LogMemoryUsage(
std::chrono::minutes uptime_minutes) {
if (uptime_minutes.count() < next_log_memory_usage_) {
return fuchsia::cobalt::Status::OK;
}
zx::resource root_resource;
zx_status_t status = get_root_resource(&root_resource);
if (status != ZX_OK) {
FXL_LOG(ERROR) << "get_root_resource failed!!!";
return fuchsia::cobalt::Status::INTERNAL_ERROR;
}
zx_info_kmem_stats_t stats;
status = zx_object_get_info(root_resource.get(), ZX_INFO_KMEM_STATS, &stats,
sizeof(stats), NULL, NULL);
if (status != ZX_OK) {
FXL_LOG(ERROR) << "zx_object_get_info failed with " << status << ".";
return fuchsia::cobalt::Status::INTERNAL_ERROR;
}
auto cobalt_status = fuchsia::cobalt::Status::INTERNAL_ERROR;
logger_->LogMemoryUsage(kMemoryUsageMetricId, 0, "",
stats.total_bytes - stats.free_bytes, &cobalt_status);
if (cobalt_status != fuchsia::cobalt::Status::OK) {
FXL_LOG(ERROR) << "LogMemoryUsage() => " << StatusToString(cobalt_status);
return cobalt_status;
}
// The next time to log is in 5 minutes.
next_log_memory_usage_ = uptime_minutes.count() + 5;
return fuchsia::cobalt::Status::OK;
}
void SystemMetricsApp::Main(async::Loop* loop) {
ConnectToEnvironmentService();
// We keep gathering metrics until this process is terminated.
for (;;) {
GatherMetrics();
loop->Run(zx::clock::get_monotonic() + zx::min(tick_interval_.count()));
}
}
void SystemMetricsApp::ConnectToEnvironmentService() {
// connect to the cobalt fidl service provided by the environment.
fuchsia::cobalt::LoggerFactorySyncPtr factory;
context_->ConnectToEnvironmentService(factory.NewRequest());
fuchsia::cobalt::Status status = fuchsia::cobalt::Status::INTERNAL_ERROR;
factory->CreateLogger(LoadCobaltConfig(), logger_.NewRequest(), &status);
FXL_CHECK(status == fuchsia::cobalt::Status::OK)
<< "CreateLogger() => " << StatusToString(status);
}
int main(int argc, const char** argv) {
async::Loop loop(&kAsyncLoopConfigAttachToThread);
SystemMetricsApp app(kIntervalMinutes);
app.Main(&loop);
return 0;
}