| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "converter.h" |
| |
| #include <getopt.h> |
| #include <math.h> |
| #include <algorithm> |
| #include <map> |
| #include <numeric> |
| #include <vector> |
| |
| #include "lib/fxl/logging.h" |
| #include "lib/fxl/random/uuid.h" |
| #include "lib/fxl/strings/string_printf.h" |
| #include "rapidjson/document.h" |
| #include "rapidjson/error/en.h" |
| #include "rapidjson/filereadstream.h" |
| #include "rapidjson/filewritestream.h" |
| #include "rapidjson/prettywriter.h" |
| |
| namespace { |
| |
| // Calculate the variance, with Bessel's correction applied. Bessel's |
| // correction gives us a better estimation of the population's variance |
| // given a sample of the population. |
| double Variance(const std::vector<double>& values, double mean) { |
| // For 0 or 1 sample values, the variance value (with Bessel's |
| // correction) is not defined. Rather than returning a NaN or Inf value, |
| // which are not permitted in JSON, just return 0. |
| if (values.size() <= 1) |
| return 0; |
| |
| double sum_of_squared_diffs = 0.0; |
| for (double value : values) { |
| double diff = value - mean; |
| sum_of_squared_diffs += diff * diff; |
| } |
| return sum_of_squared_diffs / static_cast<double>(values.size() - 1); |
| } |
| |
| void WriteJson(FILE* fp, rapidjson::Document* doc) { |
| char buffer[100]; |
| rapidjson::FileWriteStream output_stream(fp, buffer, sizeof(buffer)); |
| rapidjson::PrettyWriter<rapidjson::FileWriteStream> writer(output_stream); |
| doc->Accept(writer); |
| // Check that all the output was serialized successfully as JSON. This |
| // can fail if the output contained NaN or infinite floating point |
| // values. |
| FXL_CHECK(writer.IsComplete()); |
| } |
| |
| // rapidjson's API is rather verbose to use. This class provides some |
| // convenience wrappers. |
| class JsonHelper { |
| public: |
| explicit JsonHelper(rapidjson::Document::AllocatorType& alloc) |
| : alloc_(alloc) {} |
| |
| rapidjson::Value MakeString(const char* string) { |
| rapidjson::Value value; |
| value.SetString(string, alloc_); |
| return value; |
| }; |
| |
| rapidjson::Value Copy(const rapidjson::Value& value) { |
| return rapidjson::Value(value, alloc_); |
| } |
| |
| private: |
| rapidjson::Document::AllocatorType& alloc_; |
| }; |
| |
| void ConvertSpacesToUnderscores(std::string* string) { |
| for (size_t index = 0; index < string->size(); ++index) { |
| if ((*string)[index] == ' ') |
| (*string)[index] = '_'; |
| } |
| } |
| |
| void ComputeStatistics(const std::vector<double>& vals, |
| rapidjson::Value* output, |
| rapidjson::Document::AllocatorType* alloc) { |
| double sum = 0; |
| double sum_of_logs = 0; |
| |
| for (auto val : vals) { |
| sum += val; |
| sum_of_logs += log(val); |
| } |
| |
| double min = *std::min_element(vals.begin(), vals.end()); |
| double max = *std::max_element(vals.begin(), vals.end()); |
| double mean = sum / vals.size(); |
| double variance = Variance(vals, mean); |
| |
| // meanlogs is the mean of the logs of the values, which is useful for |
| // calculating the geometric mean of the values. |
| // |
| // If any of the values are zero or negative, meanlogs will be -Infinity |
| // or a NaN, which can't be serialized in JSON format. In those cases, |
| // we write 'null' in the JSON instead. |
| double meanlogs = sum_of_logs / vals.size(); |
| rapidjson::Value meanlogs_json; |
| if (isfinite(meanlogs)) |
| meanlogs_json.SetDouble(meanlogs); |
| |
| output->SetArray(); |
| output->PushBack(vals.size(), *alloc); // "count" entry. |
| output->PushBack(max, *alloc); |
| output->PushBack(meanlogs_json, *alloc); |
| output->PushBack(mean, *alloc); |
| output->PushBack(min, *alloc); |
| output->PushBack(sum, *alloc); |
| output->PushBack(variance, *alloc); |
| } |
| |
| // Takes the unit string as it appears in the input JSON file. Returns the |
| // unit string that should be used in the Catapult Histogram JSON file. |
| // Converts the data as necessary. |
| std::string ConvertUnits(const char* input_unit, std::vector<double>* vals) { |
| std::string catapult_unit; |
| if (strcmp(input_unit, "nanoseconds") == 0 || strcmp(input_unit, "ns") == 0) { |
| // Convert from nanoseconds to milliseconds. |
| for (auto& val : *vals) { |
| val /= 1e6; |
| } |
| return "ms_smallerIsBetter"; |
| } else if (strcmp(input_unit, "milliseconds") == 0 || |
| strcmp(input_unit, "ms") == 0) { |
| return "ms_smallerIsBetter"; |
| } else if (strcmp(input_unit, "bytes/second") == 0) { |
| // Convert from bytes/second to mebibytes/second. |
| for (auto& val : *vals) { |
| val /= 1024 * 1024; |
| } |
| |
| // The Catapult dashboard does not yet support a "bytes per unit time" |
| // unit (of any multiple), and it rejects unknown units, so we report |
| // this as "unitless" here for now. TODO(mseaborn): Add support for |
| // data rate units to Catapult. |
| return "unitless_biggerIsBetter"; |
| } else if (strcmp(input_unit, "bytes") == 0) { |
| return "sizeInBytes"; |
| } else if (strcmp(input_unit, "frames/second") == 0) { |
| return "Hz_biggerIsBetter"; |
| } else { |
| fprintf(stderr, "Units not recognized: %s\n", input_unit); |
| exit(1); |
| } |
| } |
| |
| // Adds a Histogram to the given |output| Document. |
| void AddHistogram(rapidjson::Document* output, |
| rapidjson::Document::AllocatorType* alloc, |
| const std::string& test_name, const char* input_unit, |
| std::vector<double>&& vals, rapidjson::Value diagnostic_map, |
| rapidjson::Value guid) { |
| std::string catapult_unit = ConvertUnits(input_unit, &vals); |
| rapidjson::Value stats; |
| ComputeStatistics(vals, &stats, alloc); |
| |
| rapidjson::Value histogram; |
| histogram.SetObject(); |
| histogram.AddMember("name", test_name, *alloc); |
| histogram.AddMember("unit", catapult_unit, *alloc); |
| histogram.AddMember("description", "", *alloc); |
| histogram.AddMember("diagnostics", diagnostic_map, *alloc); |
| histogram.AddMember("running", stats, *alloc); |
| histogram.AddMember("guid", guid, *alloc); |
| |
| // This field is redundant with the "count" entry in "stats". |
| histogram.AddMember("maxNumSampleValues", vals.size(), *alloc); |
| |
| // Assume for now that we didn't get any NaN values. |
| histogram.AddMember("numNans", 0, *alloc); |
| |
| output->PushBack(histogram, *alloc); |
| } |
| |
| // Convert |type| into a string representation. |
| const char* TypeToString(rapidjson::Type type) { |
| switch (type) { |
| case rapidjson::kNullType: |
| return "null"; |
| case rapidjson::kFalseType: |
| return "false"; |
| case rapidjson::kTrueType: |
| return "true"; |
| case rapidjson::kObjectType: |
| return "object"; |
| case rapidjson::kArrayType: |
| return "array"; |
| case rapidjson::kStringType: |
| return "string"; |
| case rapidjson::kNumberType: |
| return "number"; |
| } |
| FXL_NOTREACHED() << "Unexpected rapidjson type " << static_cast<int>(type); |
| return ""; |
| } |
| |
| } // namespace |
| |
| void Convert(rapidjson::Document* input, rapidjson::Document* output, |
| const ConverterArgs* args) { |
| rapidjson::Document::AllocatorType& alloc = output->GetAllocator(); |
| JsonHelper helper(alloc); |
| output->SetArray(); |
| |
| uint32_t next_dummy_guid = 0; |
| auto MakeUuid = [&]() { |
| std::string uuid; |
| if (args->use_test_guids) { |
| uuid = fxl::StringPrintf("dummy_guid_%d", next_dummy_guid++); |
| } else { |
| uuid = fxl::GenerateUUID(); |
| } |
| return helper.MakeString(uuid.c_str()); |
| }; |
| |
| // Add a "diagnostic" entry representing the given value. Returns a GUID |
| // value identifying the diagnostic. |
| auto AddDiagnostic = [&](rapidjson::Value value) -> rapidjson::Value { |
| rapidjson::Value guid = MakeUuid(); |
| |
| // Add top-level description. |
| rapidjson::Value diagnostic; |
| diagnostic.SetObject(); |
| diagnostic.AddMember("guid", helper.Copy(guid), alloc); |
| diagnostic.AddMember("type", "GenericSet", alloc); |
| rapidjson::Value values; |
| values.SetArray(); |
| values.PushBack(value, alloc); |
| diagnostic.AddMember("values", values, alloc); |
| output->PushBack(diagnostic, alloc); |
| |
| return guid; |
| }; |
| |
| // Build a JSON object containing the "diagnostic" values that are common |
| // to all the test cases. |
| rapidjson::Value shared_diagnostic_map; |
| shared_diagnostic_map.SetObject(); |
| auto AddSharedDiagnostic = [&](const char* key, rapidjson::Value value) { |
| auto guid = AddDiagnostic(std::move(value)); |
| shared_diagnostic_map.AddMember(helper.MakeString(key), guid, alloc); |
| }; |
| rapidjson::Value timestamp; |
| timestamp.SetInt64(args->timestamp); |
| AddSharedDiagnostic("pointId", std::move(timestamp)); |
| AddSharedDiagnostic("bots", helper.MakeString(args->bots)); |
| AddSharedDiagnostic("masters", helper.MakeString(args->masters)); |
| |
| // The "logUrls" diagnostic contains a list of [name, url] tuples. |
| rapidjson::Value log_url_array; |
| log_url_array.SetArray(); |
| log_url_array.PushBack(helper.MakeString("Build Log"), alloc); |
| log_url_array.PushBack(helper.MakeString(args->log_url), alloc); |
| AddSharedDiagnostic("logUrls", std::move(log_url_array)); |
| |
| // Allocate a GUID for the given test suite name (by creating a |
| // "diagnostic" entry). Memoize this allocation so that we don't |
| // allocate >1 GUID for the same test suite name. |
| std::map<std::string, rapidjson::Value> test_suite_to_guid; |
| auto MakeGuidForTestSuiteName = [&](const char* test_suite) { |
| auto it = test_suite_to_guid.find(test_suite); |
| if (it != test_suite_to_guid.end()) { |
| return helper.Copy(it->second); |
| } |
| rapidjson::Value guid = AddDiagnostic(helper.MakeString(test_suite)); |
| test_suite_to_guid[test_suite] = helper.Copy(guid); |
| return guid; |
| }; |
| |
| if (!input->IsArray()) { |
| fprintf(stderr, |
| "Expected input document to be of type array, and got %s instead\n", |
| TypeToString(input->GetType())); |
| exit(1); |
| } |
| |
| for (auto& element : input->GetArray()) { |
| std::string name = element["label"].GetString(); |
| ConvertSpacesToUnderscores(&name); |
| |
| // The "test_suite" field in the input becomes the "benchmarks" |
| // diagnostic in the output. |
| rapidjson::Value test_suite_guid = |
| MakeGuidForTestSuiteName(element["test_suite"].GetString()); |
| rapidjson::Value diagnostic_map = helper.Copy(shared_diagnostic_map); |
| diagnostic_map.AddMember("benchmarks", test_suite_guid, alloc); |
| |
| const rapidjson::Value& values = element["values"].GetArray(); |
| if (values.Size() == 0) { |
| fprintf(stderr, "Input 'values' is empty"); |
| exit(1); |
| } |
| |
| std::vector<double> vals; |
| vals.reserve(values.Size()); |
| for (auto& val : values.GetArray()) { |
| vals.push_back(val.GetDouble()); |
| } |
| |
| // Create the histogram. |
| if (element.HasMember("split_first") && element["split_first"].IsBool() && |
| element["split_first"].GetBool()) { |
| // Create a histogram for the first sample value. |
| std::string h1_name = name + "_samples_0_to_0"; |
| std::vector<double> h1_vals(vals.begin(), vals.begin() + 1); |
| AddHistogram(output, &alloc, h1_name, element["unit"].GetString(), |
| std::move(h1_vals), helper.Copy(diagnostic_map), MakeUuid()); |
| |
| // Create a histogram for the remaining sample values, if any. |
| if (vals.size() > 1) { |
| std::stringstream h2_name; |
| h2_name << name << "_samples_1_to_" << vals.size() - 1; |
| |
| std::vector<double> h2_vals(vals.begin() + 1, vals.end()); |
| AddHistogram(output, &alloc, h2_name.str(), element["unit"].GetString(), |
| std::move(h2_vals), helper.Copy(diagnostic_map), |
| MakeUuid()); |
| } |
| } else { |
| // Create a histogram for all |vals|. |
| AddHistogram(output, &alloc, name, element["unit"].GetString(), |
| std::move(vals), std::move(diagnostic_map), MakeUuid()); |
| } |
| } |
| } |
| |
| int ConverterMain(int argc, char** argv) { |
| const char* usage = |
| "Usage: %s [options]\n" |
| "\n" |
| "This tool takes results from Fuchsia performance tests (in Fuchsia's " |
| "JSON perf test results format) and converts them to the Catapult " |
| "Dashboard's JSON HistogramSet format.\n" |
| "\n" |
| "Options:\n" |
| " --input FILENAME\n" |
| " Input file: perf test results JSON file (required)\n" |
| " --output FILENAME\n" |
| " Output file: Catapult HistogramSet JSON file (default is stdout)\n" |
| "\n" |
| "The following are required and specify parameters to copy into the " |
| "output file:\n" |
| " --execution-timestamp-ms NUMBER\n" |
| " --masters STRING\n" |
| " --bots STRING\n" |
| " --log-url URL\n" |
| "See README.md for the meanings of these parameters.\n"; |
| |
| // Parse command line arguments. |
| static const struct option opts[] = { |
| {"help", no_argument, nullptr, 'h'}, |
| {"input", required_argument, nullptr, 'i'}, |
| {"output", required_argument, nullptr, 'o'}, |
| {"execution-timestamp-ms", required_argument, nullptr, 'e'}, |
| {"masters", required_argument, nullptr, 'm'}, |
| {"bots", required_argument, nullptr, 'b'}, |
| {"log-url", required_argument, nullptr, 'l'}, |
| }; |
| ConverterArgs args; |
| const char* input_filename = nullptr; |
| const char* output_filename = nullptr; |
| optind = 1; |
| for (;;) { |
| int opt = getopt_long(argc, argv, "h", opts, nullptr); |
| if (opt < 0) |
| break; |
| switch (opt) { |
| case 'h': |
| printf(usage, argv[0]); |
| return 0; |
| case 'i': |
| input_filename = optarg; |
| break; |
| case 'o': |
| output_filename = optarg; |
| break; |
| case 'e': |
| args.timestamp = strtoll(optarg, nullptr, 0); |
| break; |
| case 'm': |
| args.masters = optarg; |
| break; |
| case 'b': |
| args.bots = optarg; |
| break; |
| case 'l': |
| args.log_url = optarg; |
| break; |
| } |
| } |
| if (optind < argc) { |
| fprintf(stderr, "Unrecognized argument: \"%s\"\n", argv[optind]); |
| return 1; |
| } |
| |
| // Check arguments. |
| bool failed = false; |
| if (!input_filename) { |
| fprintf(stderr, "--input argument is required\n"); |
| failed = true; |
| } |
| if (!args.timestamp) { |
| fprintf(stderr, "--execution-timestamp-ms argument is required\n"); |
| failed = true; |
| } |
| if (!args.masters) { |
| fprintf(stderr, "--masters argument is required\n"); |
| failed = true; |
| } |
| if (!args.bots) { |
| fprintf(stderr, "--bots argument is required\n"); |
| failed = true; |
| } |
| if (!args.log_url) { |
| fprintf(stderr, "--log-url argument is required\n"); |
| failed = true; |
| } |
| if (failed) { |
| fprintf(stderr, "\n"); |
| fprintf(stderr, usage, argv[0]); |
| return 1; |
| } |
| |
| // Read input file. |
| FILE* fp = fopen(input_filename, "r"); |
| if (!fp) { |
| fprintf(stderr, "Failed to open input file, \"%s\"\n", input_filename); |
| return 1; |
| } |
| char buffer[100]; |
| rapidjson::FileReadStream input_stream(fp, buffer, sizeof(buffer)); |
| rapidjson::Document input; |
| rapidjson::ParseResult parse_result = input.ParseStream(input_stream); |
| if (!parse_result) { |
| fprintf(stderr, "Failed to parse input file, \"%s\": %s (offset %zd)\n", |
| input_filename, rapidjson::GetParseError_En(parse_result.Code()), |
| parse_result.Offset()); |
| return 1; |
| } |
| fclose(fp); |
| |
| rapidjson::Document output; |
| Convert(&input, &output, &args); |
| |
| // Write output. |
| if (output_filename) { |
| fp = fopen(output_filename, "w"); |
| if (!fp) { |
| fprintf(stderr, "Failed to open output file, \"%s\"\n", output_filename); |
| return 1; |
| } |
| WriteJson(fp, &output); |
| fclose(fp); |
| } else { |
| WriteJson(stdout, &output); |
| } |
| |
| return 0; |
| } |