blob: 15fc77dc99e74afeb25637b6bdcb662b705e5a8a [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "converter.h"
#include <getopt.h>
#include <lib/syslog/cpp/macros.h>
#include <math.h>
#include <algorithm>
#include <map>
#include <numeric>
#include <vector>
#include "rapidjson/document.h"
#include "rapidjson/error/en.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/prettywriter.h"
#include "src/lib/fxl/strings/string_printf.h"
#if defined(OS_FUCHSIA)
#include <zircon/syscalls.h>
#else
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <fbl/unique_fd.h>
#include <src/lib/files/file_descriptor.h>
#endif
namespace {
// Calculate the variance, with Bessel's correction applied. Bessel's
// correction gives us a better estimation of the population's variance
// given a sample of the population.
double Variance(const std::vector<double>& values, double mean) {
// For 0 or 1 sample values, the variance value (with Bessel's
// correction) is not defined. Rather than returning a NaN or Inf value,
// which are not permitted in JSON, just return 0.
if (values.size() <= 1)
return 0;
double sum_of_squared_diffs = 0.0;
for (double value : values) {
double diff = value - mean;
sum_of_squared_diffs += diff * diff;
}
return sum_of_squared_diffs / static_cast<double>(values.size() - 1);
}
void WriteJson(FILE* fp, rapidjson::Document* doc) {
char buffer[100];
rapidjson::FileWriteStream output_stream(fp, buffer, sizeof(buffer));
rapidjson::PrettyWriter<rapidjson::FileWriteStream> writer(output_stream);
doc->Accept(writer);
// Check that all the output was serialized successfully as JSON. This
// can fail if the output contained NaN or infinite floating point
// values.
FX_CHECK(writer.IsComplete());
}
// rapidjson's API is rather verbose to use. This class provides some
// convenience wrappers.
class JsonHelper {
public:
explicit JsonHelper(rapidjson::Document::AllocatorType& alloc) : alloc_(alloc) {}
rapidjson::Value MakeString(const char* string) {
rapidjson::Value value;
value.SetString(string, alloc_);
return value;
};
rapidjson::Value Copy(const rapidjson::Value& value) { return rapidjson::Value(value, alloc_); }
private:
rapidjson::Document::AllocatorType& alloc_;
};
void ConvertSpacesToUnderscores(std::string* string) {
for (size_t index = 0; index < string->size(); ++index) {
if ((*string)[index] == ' ')
(*string)[index] = '_';
}
}
void ComputeStatistics(const std::vector<double>& vals, rapidjson::Value* output,
rapidjson::Document::AllocatorType* alloc) {
double sum = 0;
double sum_of_logs = 0;
for (auto val : vals) {
sum += val;
sum_of_logs += log(val);
}
double min = *std::min_element(vals.begin(), vals.end());
double max = *std::max_element(vals.begin(), vals.end());
double mean = sum / vals.size();
double variance = Variance(vals, mean);
// meanlogs is the mean of the logs of the values, which is useful for
// calculating the geometric mean of the values.
//
// If any of the values are zero or negative, meanlogs will be -Infinity
// or a NaN, which can't be serialized in JSON format. In those cases,
// we write 'null' in the JSON instead.
double meanlogs = sum_of_logs / vals.size();
rapidjson::Value meanlogs_json;
if (isfinite(meanlogs))
meanlogs_json.SetDouble(meanlogs);
output->SetArray();
output->PushBack(static_cast<uint64_t>(vals.size()),
*alloc); // "count" entry.
output->PushBack(max, *alloc);
output->PushBack(meanlogs_json, *alloc);
output->PushBack(mean, *alloc);
output->PushBack(min, *alloc);
output->PushBack(sum, *alloc);
output->PushBack(variance, *alloc);
}
// Takes the unit string as it appears in the input JSON file. Returns the
// unit string that should be used in the Catapult Histogram JSON file.
// Converts the data as necessary.
//
// The list of valid unit strings for the Catapult Histogram JSON format is
// available at:
// https://github.com/catapult-project/catapult/blob/8dc09eb0703647db9ca37b26f2d01a0a4dc0285c/tracing/tracing/value/histogram.py#L478
std::string ConvertUnits(const char* input_unit, std::vector<double>* vals) {
std::string catapult_unit;
if (strcmp(input_unit, "nanoseconds") == 0 || strcmp(input_unit, "ns") == 0) {
// Convert from nanoseconds to milliseconds.
for (auto& val : *vals) {
val /= 1e6;
}
return "ms_smallerIsBetter";
} else if (strcmp(input_unit, "milliseconds") == 0 || strcmp(input_unit, "ms") == 0) {
return "ms_smallerIsBetter";
} else if (strcmp(input_unit, "bytes/second") == 0) {
// Convert from bytes/second to mebibytes/second.
for (auto& val : *vals) {
val /= 1024 * 1024;
}
// The Catapult dashboard does not yet support a "bytes per unit time"
// unit (of any multiple), and it rejects unknown units, so we report
// this as "unitless" here for now. TODO(mseaborn): Add support for
// data rate units to Catapult.
return "unitless_biggerIsBetter";
} else if (strcmp(input_unit, "bytes") == 0) {
return "sizeInBytes_smallerIsBetter";
} else if (strcmp(input_unit, "frames/second") == 0) {
return "Hz_biggerIsBetter";
} else if (strcmp(input_unit, "percent") == 0) {
return "n%_smallerIsBetter";
} else if (strcmp(input_unit, "count") == 0) {
return "count";
} else if (strcmp(input_unit, "Watts") == 0) {
return "W_smallerIsBetter";
} else {
fprintf(stderr, "Units not recognized: %s\n", input_unit);
exit(1);
}
}
// Adds a Histogram to the given |output| Document.
void AddHistogram(rapidjson::Document* output, rapidjson::Document::AllocatorType* alloc,
const std::string& test_name, const char* input_unit, std::vector<double>&& vals,
rapidjson::Value diagnostic_map, rapidjson::Value guid) {
std::string catapult_unit = ConvertUnits(input_unit, &vals);
rapidjson::Value stats;
ComputeStatistics(vals, &stats, alloc);
rapidjson::Value histogram;
histogram.SetObject();
histogram.AddMember("name", test_name, *alloc);
histogram.AddMember("unit", catapult_unit, *alloc);
histogram.AddMember("description", "", *alloc);
histogram.AddMember("diagnostics", diagnostic_map, *alloc);
histogram.AddMember("running", stats, *alloc);
histogram.AddMember("guid", guid, *alloc);
// This field is redundant with the "count" entry in "stats".
histogram.AddMember("maxNumSampleValues", static_cast<uint64_t>(vals.size()), *alloc);
// Assume for now that we didn't get any NaN values.
histogram.AddMember("numNans", 0, *alloc);
output->PushBack(histogram, *alloc);
}
// Convert |type| into a string representation.
const char* TypeToString(rapidjson::Type type) {
switch (type) {
case rapidjson::kNullType:
return "null";
case rapidjson::kFalseType:
return "false";
case rapidjson::kTrueType:
return "true";
case rapidjson::kObjectType:
return "object";
case rapidjson::kArrayType:
return "array";
case rapidjson::kStringType:
return "string";
case rapidjson::kNumberType:
return "number";
}
FX_NOTREACHED() << "Unexpected rapidjson type " << static_cast<int>(type);
return "";
}
// Fills |output_length| bytes of |output| with random data.
void RandBytes(void* output, size_t output_length) {
FX_DCHECK(output);
#if defined(OS_FUCHSIA)
zx_cprng_draw(output, output_length);
#else
fbl::unique_fd fd(open("/dev/urandom", O_RDONLY | O_CLOEXEC));
FX_CHECK(fd.is_valid());
const ssize_t len = fxl::ReadFileDescriptor(fd.get(), static_cast<char*>(output), output_length);
FX_CHECK(len >= 0 && static_cast<size_t>(len) == output_length);
#endif
}
} // namespace
// Code copied from "//src/lib/uuid/uuid.cc", however it uses our |RandBytes|
// (which uses "/dev/urandom" when running on non-Fuchsia platforms), as
// opposed to an unconditional |zx_cprng_draw|, so that we can support
// Linux/Mac platforms as well.
std::string GenerateUuid() {
uint64_t bytes[2];
RandBytes(bytes, sizeof(bytes));
// Set the UUID to version 4 as described in RFC 4122, section 4.4.
// The format of UUID version 4 must be xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx,
// where y is one of [8, 9, A, B].
// Clear the version bits and set the version to 4:
bytes[0] &= 0xffffffffffff0fffULL;
bytes[0] |= 0x0000000000004000ULL;
// Set the two most significant bits (bits 6 and 7) of the
// clock_seq_hi_and_reserved to zero and one, respectively:
bytes[1] &= 0x3fffffffffffffffULL;
bytes[1] |= 0x8000000000000000ULL;
return fxl::StringPrintf("%08x-%04x-%04x-%04x-%012llx", static_cast<unsigned int>(bytes[0] >> 32),
static_cast<unsigned int>((bytes[0] >> 16) & 0x0000ffff),
static_cast<unsigned int>(bytes[0] & 0x0000ffff),
static_cast<unsigned int>(bytes[1] >> 48),
bytes[1] & 0x0000ffffffffffffULL);
}
void Convert(rapidjson::Document* input, rapidjson::Document* output, const ConverterArgs* args) {
rapidjson::Document::AllocatorType& alloc = output->GetAllocator();
JsonHelper helper(alloc);
output->SetArray();
uint32_t next_dummy_guid = 0;
auto MakeUuid = [&]() {
std::string uuid;
if (args->use_test_guids) {
uuid = fxl::StringPrintf("dummy_guid_%d", next_dummy_guid++);
} else {
uuid = GenerateUuid();
}
return helper.MakeString(uuid.c_str());
};
// Add a "diagnostic" entry representing the given value. Returns a GUID
// value identifying the diagnostic.
auto AddDiagnostic = [&](rapidjson::Value value) -> rapidjson::Value {
rapidjson::Value guid = MakeUuid();
// Add top-level description.
rapidjson::Value diagnostic;
diagnostic.SetObject();
diagnostic.AddMember("guid", helper.Copy(guid), alloc);
diagnostic.AddMember("type", "GenericSet", alloc);
rapidjson::Value values;
values.SetArray();
values.PushBack(value, alloc);
diagnostic.AddMember("values", values, alloc);
output->PushBack(diagnostic, alloc);
return guid;
};
// Build a JSON object containing the "diagnostic" values that are common
// to all the test cases.
rapidjson::Value shared_diagnostic_map;
shared_diagnostic_map.SetObject();
auto AddSharedDiagnostic = [&](const char* key, rapidjson::Value value) {
auto guid = AddDiagnostic(std::move(value));
shared_diagnostic_map.AddMember(helper.MakeString(key), guid, alloc);
};
rapidjson::Value timestamp;
timestamp.SetInt64(args->timestamp);
AddSharedDiagnostic("pointId", std::move(timestamp));
AddSharedDiagnostic("bots", helper.MakeString(args->bots));
AddSharedDiagnostic("masters", helper.MakeString(args->masters));
if (args->product_versions) {
AddSharedDiagnostic("a_productVersions", helper.MakeString(args->product_versions));
}
// The "logUrls" diagnostic contains a list of [name, url] tuples.
rapidjson::Value log_url_array;
log_url_array.SetArray();
log_url_array.PushBack(helper.MakeString("Build Log"), alloc);
log_url_array.PushBack(helper.MakeString(args->log_url), alloc);
AddSharedDiagnostic("logUrls", std::move(log_url_array));
// Allocate a GUID for the given test suite name (by creating a
// "diagnostic" entry). Memoize this allocation so that we don't
// allocate >1 GUID for the same test suite name.
std::map<std::string, rapidjson::Value> test_suite_to_guid;
auto MakeGuidForTestSuiteName = [&](const char* test_suite) {
auto it = test_suite_to_guid.find(test_suite);
if (it != test_suite_to_guid.end()) {
return helper.Copy(it->second);
}
rapidjson::Value guid = AddDiagnostic(helper.MakeString(test_suite));
test_suite_to_guid[test_suite] = helper.Copy(guid);
return guid;
};
if (!input->IsArray()) {
fprintf(stderr, "Expected input document to be of type array, and got %s instead\n",
TypeToString(input->GetType()));
exit(1);
}
for (auto& element : input->GetArray()) {
std::string name;
if (element.HasMember("test_name")) {
name = element["test_name"].GetString();
} else if (element.HasMember("label")) {
// TODO(fxb/59861): remove this when producers have been converted.
name = element["label"].GetString();
} else {
fprintf(stderr,
"Expect json element to have either label field (old version) or test_name field "
"(new version). (http://fxb/59861)\n");
exit(1);
}
// TODO (fxb/59861): Make "metric" field required once all the producers provide it.
if (element.HasMember("metric")) {
std::string metric = element["metric"].GetString();
if (metric != "real_time") {
name += "/";
name += metric;
}
}
ConvertSpacesToUnderscores(&name);
// The "test_suite" field in the input becomes the "benchmarks"
// diagnostic in the output.
rapidjson::Value test_suite_guid = MakeGuidForTestSuiteName(element["test_suite"].GetString());
rapidjson::Value diagnostic_map = helper.Copy(shared_diagnostic_map);
diagnostic_map.AddMember("benchmarks", test_suite_guid, alloc);
const rapidjson::Value& values = element["values"].GetArray();
if (values.Size() == 0) {
fprintf(stderr, "Input 'values' is empty");
exit(1);
}
std::vector<double> vals;
vals.reserve(values.Size());
for (auto& val : values.GetArray()) {
vals.push_back(val.GetDouble());
}
// Create a histogram for all |vals|.
AddHistogram(output, &alloc, name, element["unit"].GetString(), std::move(vals),
std::move(diagnostic_map), MakeUuid());
}
}
int ConverterMain(int argc, char** argv) {
const char* usage =
"Usage: %s [options]\n"
"\n"
"This tool takes results from Fuchsia performance tests (in Fuchsia's "
"JSON perf test results format) and converts them to the Catapult "
"Dashboard's JSON HistogramSet format.\n"
"\n"
"Options:\n"
" --input FILENAME\n"
" Input file: perf test results JSON file (required)\n"
" --output FILENAME\n"
" Output file: Catapult HistogramSet JSON file (default is stdout)\n"
" --product-versions STRING\n"
" Release version in the format 0.yyyymmdd.a.b if applicable. e.g. 0.20200101.1.2\n"
"\n"
"The following are required and specify parameters to copy into the "
"output file:\n"
" --execution-timestamp-ms NUMBER\n"
" --masters STRING\n"
" --bots STRING\n"
" --log-url URL\n"
"See README.md for the meanings of these parameters.\n";
// Parse command line arguments.
static const struct option opts[] = {
{"help", no_argument, nullptr, 'h'},
{"input", required_argument, nullptr, 'i'},
{"output", required_argument, nullptr, 'o'},
{"execution-timestamp-ms", required_argument, nullptr, 'e'},
{"masters", required_argument, nullptr, 'm'},
{"bots", required_argument, nullptr, 'b'},
{"log-url", required_argument, nullptr, 'l'},
{"product-versions", required_argument, nullptr, 'v'},
};
ConverterArgs args;
const char* input_filename = nullptr;
const char* output_filename = nullptr;
optind = 1;
for (;;) {
int opt = getopt_long(argc, argv, "h", opts, nullptr);
if (opt < 0)
break;
switch (opt) {
case 'h':
printf(usage, argv[0]);
return 0;
case 'i':
input_filename = optarg;
break;
case 'o':
output_filename = optarg;
break;
case 'e':
args.timestamp = strtoll(optarg, nullptr, 0);
break;
case 'm':
args.masters = optarg;
break;
case 'b':
args.bots = optarg;
break;
case 'l':
args.log_url = optarg;
break;
case 'v':
args.product_versions = optarg;
break;
}
}
if (optind < argc) {
fprintf(stderr, "Unrecognized argument: \"%s\"\n", argv[optind]);
return 1;
}
// Check arguments.
bool failed = false;
if (!input_filename) {
fprintf(stderr, "--input argument is required\n");
failed = true;
}
if (!args.timestamp) {
fprintf(stderr, "--execution-timestamp-ms argument is required\n");
failed = true;
}
if (!args.masters) {
fprintf(stderr, "--masters argument is required\n");
failed = true;
}
if (!args.bots) {
fprintf(stderr, "--bots argument is required\n");
failed = true;
}
if (!args.log_url) {
fprintf(stderr, "--log-url argument is required\n");
failed = true;
}
if (failed) {
fprintf(stderr, "\n");
fprintf(stderr, usage, argv[0]);
return 1;
}
// Read input file.
FILE* fp = fopen(input_filename, "r");
if (!fp) {
fprintf(stderr, "Failed to open input file, \"%s\"\n", input_filename);
return 1;
}
char buffer[100];
rapidjson::FileReadStream input_stream(fp, buffer, sizeof(buffer));
rapidjson::Document input;
rapidjson::ParseResult parse_result = input.ParseStream(input_stream);
if (!parse_result) {
fprintf(stderr, "Failed to parse input file, \"%s\": %s (offset %zd)\n", input_filename,
rapidjson::GetParseError_En(parse_result.Code()), parse_result.Offset());
return 1;
}
fclose(fp);
rapidjson::Document output;
Convert(&input, &output, &args);
// Write output.
if (output_filename) {
fp = fopen(output_filename, "w");
if (!fp) {
fprintf(stderr, "Failed to open output file, \"%s\"\n", output_filename);
return 1;
}
WriteJson(fp, &output);
fclose(fp);
} else {
WriteJson(stdout, &output);
}
return 0;
}