blob: 68d46cd35004a3ac1572298e3c9ab91020712a6e [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/cobalt/bin/system-metrics/log_stats_fetcher_impl.h"
#include <lib/syslog/cpp/macros.h>
#include <fstream>
#include "src/lib/fxl/strings/string_number_conversions.h"
namespace cobalt {
namespace {
static constexpr char kDefaultAllowlistFilePath[] =
"/config/data/log_stats_component_allowlist.txt";
} // namespace
// static
std::unordered_map<std::string, ComponentEventCode> LogStatsFetcherImpl::LoadAllowlist(
const std::string& allowlist_path) {
std::unordered_map<std::string, ComponentEventCode> result;
std::ifstream allowlist_file_stream(allowlist_path);
uint32_t component_code;
while (allowlist_file_stream >> component_code) {
std::string component_url;
allowlist_file_stream >> component_url;
result[component_url] = static_cast<ComponentEventCode>(component_code);
}
return result;
}
LogStatsFetcherImpl::LogStatsFetcherImpl(async_dispatcher_t* dispatcher,
sys::ComponentContext* context)
: LogStatsFetcherImpl(
dispatcher,
[context]() { return context->svc()->Connect<fuchsia::diagnostics::ArchiveAccessor>(); },
LoadAllowlist(kDefaultAllowlistFilePath), abs_clock::RealClock::Get()) {}
LogStatsFetcherImpl::LogStatsFetcherImpl(
async_dispatcher_t* dispatcher,
fit::function<fuchsia::diagnostics::ArchiveAccessorPtr()> connector,
std::unordered_map<std::string, ComponentEventCode> component_code_map, abs_clock::Clock* clock)
: executor_(dispatcher),
archive_reader_(connector(),
{"core/log-stats:root:*", "core/log-stats:root/by_component/*:error_logs",
"core/log-stats:root/granular_stats"}),
component_code_map_(std::move(component_code_map)),
clock_(clock) {
// This establishes a baseline for error counts so that the next call to FetchMetrics() only
// includes logs since the daemon started as opposed to since the boot time. This is especially
// important if the daemon had previously crashed and has been relaunched.
FetchMetrics([](const Metrics& metrics) {});
}
void LogStatsFetcherImpl::FetchMetrics(MetricsCallback metrics_callback) {
metrics_callback_ = std::move(metrics_callback);
auto promise = archive_reader_.GetInspectSnapshot()
.and_then([this](std::vector<inspect::contrib::DiagnosticsData>& data_vector) {
this->OnInspectSnapshotReady(data_vector);
})
.or_else([](std::string& error) {
FX_LOGS(WARNING) << "Error while fetching log stats: " << error;
});
executor_.schedule_task(std::move(promise));
}
void LogStatsFetcherImpl::OnInspectSnapshotReady(
const std::vector<inspect::contrib::DiagnosticsData>& data_vector) {
// Since we only asked for one component's inspect, there shouldn't be more
// than one entry in the result.
if (data_vector.size() != 1u) {
FX_LOGS(ERROR) << "Expected 1 archive, received " << data_vector.size();
return;
}
LogStatsFetcher::Metrics metrics;
if (!CalculateTotalErrorCount(data_vector[0], &metrics.error_count)) {
return;
}
if (!CalculatePerComponentErrorCount(data_vector[0], metrics.error_count,
&metrics.per_component_error_count)) {
return;
}
if (!CalculateKlogCount(data_vector[0], &metrics.klog_count)) {
return;
}
if (!PopulateGranularStats(data_vector[0], &metrics.granular_stats)) {
return;
}
metrics_callback_(metrics);
}
bool LogStatsFetcherImpl::CalculateTotalErrorCount(const inspect::contrib::DiagnosticsData& inspect,
uint64_t* result) {
// Find the total error count.
const rapidjson::Value& total_error_count_value = inspect.GetByPath({"root", "error_logs"});
if (!total_error_count_value.IsUint64()) {
FX_LOGS(ERROR) << "error_logs doesn't exist or is not a uint64";
return false;
}
uint64_t total_error_count = total_error_count_value.GetUint64();
// If the new error count is lower, Archivist must have restarted, so
// assume all errors are new.
if (total_error_count < last_reported_total_error_count_) {
*result = total_error_count;
} else {
*result = total_error_count - last_reported_total_error_count_;
}
FX_LOGS(DEBUG) << "Current total error count: " << total_error_count
<< ", since last report: " << *result;
last_reported_total_error_count_ = total_error_count;
return true;
}
bool LogStatsFetcherImpl::CalculatePerComponentErrorCount(
const inspect::contrib::DiagnosticsData& inspect, uint64_t total_error_count,
std::unordered_map<ComponentEventCode, uint64_t>* result) {
const rapidjson::Value& component_list = inspect.GetByPath({"root", "by_component"});
if (!component_list.IsObject()) {
FX_LOGS(ERROR) << "root/by_component doesn't exist or is not an object";
return false;
}
uint64_t tracked_error_count_diff = 0;
for (auto& component_member : component_list.GetObject()) {
std::string component_url = component_member.name.GetString();
auto code_it = component_code_map_.find(component_url);
// Ignore components not in the whitelist.
if (code_it == component_code_map_.end())
continue;
ComponentEventCode component_code = code_it->second;
if (!component_member.value.IsObject()) {
FX_LOGS(ERROR) << "Component member must be an object: " << component_member.name.GetString();
return false;
}
const auto& component_object = component_member.value.GetObject();
auto error_logs_it = component_object.FindMember("error_logs");
if (error_logs_it == component_object.MemberEnd()) {
FX_LOGS(ERROR) << component_url << " does not have error_logs";
return false;
}
if (!error_logs_it->value.IsUint64()) {
FX_LOGS(ERROR) << "error_logs for component " << component_url << "is not uint64";
return false;
}
uint64_t new_count = error_logs_it->value.GetUint64();
uint64_t last_count = per_component_error_count_[component_code];
// If the new error count is lower, the component must have restarted, so
// assume all errors are new.
uint64_t diff = new_count < last_count ? new_count : new_count - last_count;
if (diff > 0) {
FX_LOGS(DEBUG) << "Found " << diff << " new error logs for component " << component_url
<< ", total is " << new_count;
}
(*result)[component_code] = diff;
per_component_error_count_[component_code] = new_count;
tracked_error_count_diff += diff;
}
uint64_t untracked_error_count;
if (tracked_error_count_diff <= total_error_count) {
untracked_error_count = total_error_count - tracked_error_count_diff;
} else {
FX_LOGS(ERROR) << "Error count in tracked components exceeds total error count";
untracked_error_count = 0;
}
FX_LOGS(DEBUG) << "Untracked error logs since last report: " << untracked_error_count;
(*result)[ComponentEventCode::Other] = untracked_error_count;
return true;
}
bool LogStatsFetcherImpl::CalculateKlogCount(const inspect::contrib::DiagnosticsData& inspect,
uint64_t* result) {
const rapidjson::Value& new_klog_count_value = inspect.GetByPath({"root", "kernel_logs"});
if (!new_klog_count_value.IsUint64()) {
FX_LOGS(ERROR) << "kernel_logs doesn't exist or is not a uint64";
return false;
}
uint64_t new_klog_count = new_klog_count_value.GetUint64();
if (new_klog_count < last_reported_klog_count_) {
*result = new_klog_count;
} else {
*result = new_klog_count - last_reported_klog_count_;
}
FX_LOGS(DEBUG) << "Current klog count: " << new_klog_count << ", since last report: " << *result;
last_reported_klog_count_ = new_klog_count;
return true;
}
bool LogStatsFetcherImpl::PopulateGranularStats(const inspect::contrib::DiagnosticsData& inspect,
std::vector<GranularStatsRecord>* granular_stats) {
const rapidjson::Value& granular_stats_value = inspect.GetByPath({"root", "granular_stats"});
if (!granular_stats_value.IsObject()) {
FX_LOGS(ERROR) << "granular_stats doesn't exist or is not an object";
return false;
}
// Calculate the current bucket number which is the number of 15 minute intervals that has passed
// since boot time. Older buckets need to be reported.
int64_t current_bucket_id = clock_->Now().get() / (1000 * 1000 * 1000) / 60 / 15;
int64_t last_reported_bucket_id = -1;
for (auto& bucket_entry : granular_stats_value.GetObject()) {
int64_t bucket_id;
if (!fxl::StringToNumberWithError(bucket_entry.name.GetString(), &bucket_id)) {
FX_LOGS(ERROR) << "Bucket id is not an integer: " << bucket_entry.name.GetString();
return false;
}
// Don't report if this bucket has already been reported or it's still being populated (i.e.
// the current bucket).
if (bucket_id <= last_reported_bucket_id_ || bucket_id >= current_bucket_id)
continue;
if (!bucket_entry.value.IsObject()) {
FX_LOGS(ERROR) << "Granular stats bucket is not an object";
return false;
}
const auto& bucket_object = bucket_entry.value.GetObject();
last_reported_bucket_id = std::max(last_reported_bucket_id, bucket_id);
// Go through the list of all records in this bucket.
for (uint32_t record_index = 0;; ++record_index) {
std::string record_index_str = fxl::NumberToString(record_index);
auto record_member = bucket_object.FindMember(record_index_str);
if (record_member == bucket_object.MemberEnd()) {
break;
}
if (!record_member->value.IsObject()) {
FX_LOGS(ERROR) << "Record is not an object";
return false;
}
auto record_object = record_member->value.GetObject();
auto file_path_member = record_object.FindMember("file_path");
if (file_path_member == record_object.MemberEnd()) {
FX_LOGS(ERROR) << "File path entry doesn't exist";
return false;
}
if (!file_path_member->value.IsString()) {
FX_LOGS(ERROR) << "File path entry is not a string";
return false;
}
auto line_no_member = record_object.FindMember("line_no");
if (line_no_member == record_object.MemberEnd()) {
FX_LOGS(ERROR) << "Line number entry doesn't exist";
return false;
}
if (!line_no_member->value.IsUint64()) {
FX_LOGS(ERROR) << "Line number is not an uint64";
return false;
}
if (line_no_member->value.GetUint64() == 0) {
FX_LOGS(ERROR) << "Line number must be positive";
return false;
}
auto count_member = record_object.FindMember("count");
if (count_member == record_object.MemberEnd()) {
FX_LOGS(ERROR) << "Count entry doesn't exist";
return false;
}
if (!count_member->value.IsUint64()) {
FX_LOGS(ERROR) << "Count is not uint64";
return false;
}
granular_stats->emplace_back(file_path_member->value.GetString(),
line_no_member->value.GetUint64(),
count_member->value.GetUint64());
}
}
last_reported_bucket_id_ = std::max(last_reported_bucket_id_, last_reported_bucket_id);
return true;
}
} // namespace cobalt