// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "converter.h"

#include <getopt.h>
#include <math.h>
#include <algorithm>
#include <map>
#include <numeric>
#include <vector>

#include "lib/fxl/logging.h"
#include "lib/fxl/random/uuid.h"
#include "lib/fxl/strings/string_printf.h"
#include "rapidjson/document.h"
#include "rapidjson/error/en.h"
#include "rapidjson/filereadstream.h"
#include "rapidjson/filewritestream.h"
#include "rapidjson/prettywriter.h"

namespace {

// Calculate the variance, with Bessel's correction applied.  Bessel's
// correction gives us a better estimation of the population's variance
// given a sample of the population.
double Variance(const std::vector<double>& values, double mean) {
  // For 0 or 1 sample values, the variance value (with Bessel's
  // correction) is not defined.  Rather than returning a NaN or Inf value,
  // which are not permitted in JSON, just return 0.
  if (values.size() <= 1)
    return 0;

  double sum_of_squared_diffs = 0.0;
  for (double value : values) {
    double diff = value - mean;
    sum_of_squared_diffs += diff * diff;
  }
  return sum_of_squared_diffs / static_cast<double>(values.size() - 1);
}

void WriteJson(FILE* fp, rapidjson::Document* doc) {
  char buffer[100];
  rapidjson::FileWriteStream output_stream(fp, buffer, sizeof(buffer));
  rapidjson::PrettyWriter<rapidjson::FileWriteStream> writer(output_stream);
  doc->Accept(writer);
  // Check that all the output was serialized successfully as JSON.  This
  // can fail if the output contained NaN or infinite floating point
  // values.
  FXL_CHECK(writer.IsComplete());
}

// rapidjson's API is rather verbose to use.  This class provides some
// convenience wrappers.
class JsonHelper {
 public:
  explicit JsonHelper(rapidjson::Document::AllocatorType& alloc)
      : alloc_(alloc) {}

  rapidjson::Value MakeString(const char* string) {
    rapidjson::Value value;
    value.SetString(string, alloc_);
    return value;
  };

  rapidjson::Value Copy(const rapidjson::Value& value) {
    return rapidjson::Value(value, alloc_);
  }

 private:
  rapidjson::Document::AllocatorType& alloc_;
};

void ConvertSpacesToUnderscores(std::string* string) {
  for (size_t index = 0; index < string->size(); ++index) {
    if ((*string)[index] == ' ')
      (*string)[index] = '_';
  }
}

void ComputeStatistics(const std::vector<double>& vals,
                       rapidjson::Value* output,
                       rapidjson::Document::AllocatorType* alloc) {
  double sum = 0;
  double sum_of_logs = 0;

  for (auto val : vals) {
    sum += val;
    sum_of_logs += log(val);
  }

  double min = *std::min_element(vals.begin(), vals.end());
  double max = *std::max_element(vals.begin(), vals.end());
  double mean = sum / vals.size();
  double variance = Variance(vals, mean);

  // meanlogs is the mean of the logs of the values, which is useful for
  // calculating the geometric mean of the values.
  //
  // If any of the values are zero or negative, meanlogs will be -Infinity
  // or a NaN, which can't be serialized in JSON format.  In those cases,
  // we write 'null' in the JSON instead.
  double meanlogs = sum_of_logs / vals.size();
  rapidjson::Value meanlogs_json;
  if (isfinite(meanlogs))
    meanlogs_json.SetDouble(meanlogs);

  output->SetArray();
  output->PushBack(vals.size(), *alloc);  // "count" entry.
  output->PushBack(max, *alloc);
  output->PushBack(meanlogs_json, *alloc);
  output->PushBack(mean, *alloc);
  output->PushBack(min, *alloc);
  output->PushBack(sum, *alloc);
  output->PushBack(variance, *alloc);
}

// Takes the unit string as it appears in the input JSON file.  Returns the
// unit string that should be used in the Catapult Histogram JSON file.
// Converts the data as necessary.
std::string ConvertUnits(const char* input_unit, std::vector<double>* vals) {
  std::string catapult_unit;
  if (strcmp(input_unit, "nanoseconds") == 0 || strcmp(input_unit, "ns") == 0) {
    // Convert from nanoseconds to milliseconds.
    for (auto& val : *vals) {
      val /= 1e6;
    }
    return "ms_smallerIsBetter";
  } else if (strcmp(input_unit, "milliseconds") == 0 ||
             strcmp(input_unit, "ms") == 0) {
    return "ms_smallerIsBetter";
  } else if (strcmp(input_unit, "bytes/second") == 0) {
    // Convert from bytes/second to mebibytes/second.
    for (auto& val : *vals) {
      val /= 1024 * 1024;
    }

    // The Catapult dashboard does not yet support a "bytes per unit time"
    // unit (of any multiple), and it rejects unknown units, so we report
    // this as "unitless" here for now.  TODO(mseaborn): Add support for
    // data rate units to Catapult.
    return "unitless_biggerIsBetter";
  } else if (strcmp(input_unit, "bytes") == 0) {
    return "sizeInBytes";
  } else if (strcmp(input_unit, "frames/second") == 0) {
    return "Hz_biggerIsBetter";
  } else {
    fprintf(stderr, "Units not recognized: %s\n", input_unit);
    exit(1);
  }
}

// Adds a Histogram to the given |output| Document.
void AddHistogram(rapidjson::Document* output,
                  rapidjson::Document::AllocatorType* alloc,
                  const std::string& test_name, const char* input_unit,
                  std::vector<double>&& vals, rapidjson::Value diagnostic_map,
                  rapidjson::Value guid) {
  std::string catapult_unit = ConvertUnits(input_unit, &vals);
  rapidjson::Value stats;
  ComputeStatistics(vals, &stats, alloc);

  rapidjson::Value histogram;
  histogram.SetObject();
  histogram.AddMember("name", test_name, *alloc);
  histogram.AddMember("unit", catapult_unit, *alloc);
  histogram.AddMember("description", "", *alloc);
  histogram.AddMember("diagnostics", diagnostic_map, *alloc);
  histogram.AddMember("running", stats, *alloc);
  histogram.AddMember("guid", guid, *alloc);

  // This field is redundant with the "count" entry in "stats".
  histogram.AddMember("maxNumSampleValues", vals.size(), *alloc);

  // Assume for now that we didn't get any NaN values.
  histogram.AddMember("numNans", 0, *alloc);

  output->PushBack(histogram, *alloc);
}

// Convert |type| into a string representation.
const char* TypeToString(rapidjson::Type type) {
  switch (type) {
    case rapidjson::kNullType:
      return "null";
    case rapidjson::kFalseType:
      return "false";
    case rapidjson::kTrueType:
      return "true";
    case rapidjson::kObjectType:
      return "object";
    case rapidjson::kArrayType:
      return "array";
    case rapidjson::kStringType:
      return "string";
    case rapidjson::kNumberType:
      return "number";
  }
  FXL_NOTREACHED() << "Unexpected rapidjson type " << static_cast<int>(type);
  return "";
}

}  // namespace

void Convert(rapidjson::Document* input, rapidjson::Document* output,
             const ConverterArgs* args) {
  rapidjson::Document::AllocatorType& alloc = output->GetAllocator();
  JsonHelper helper(alloc);
  output->SetArray();

  uint32_t next_dummy_guid = 0;
  auto MakeUuid = [&]() {
    std::string uuid;
    if (args->use_test_guids) {
      uuid = fxl::StringPrintf("dummy_guid_%d", next_dummy_guid++);
    } else {
      uuid = fxl::GenerateUUID();
    }
    return helper.MakeString(uuid.c_str());
  };

  // Add a "diagnostic" entry representing the given value.  Returns a GUID
  // value identifying the diagnostic.
  auto AddDiagnostic = [&](rapidjson::Value value) -> rapidjson::Value {
    rapidjson::Value guid = MakeUuid();

    // Add top-level description.
    rapidjson::Value diagnostic;
    diagnostic.SetObject();
    diagnostic.AddMember("guid", helper.Copy(guid), alloc);
    diagnostic.AddMember("type", "GenericSet", alloc);
    rapidjson::Value values;
    values.SetArray();
    values.PushBack(value, alloc);
    diagnostic.AddMember("values", values, alloc);
    output->PushBack(diagnostic, alloc);

    return guid;
  };

  // Build a JSON object containing the "diagnostic" values that are common
  // to all the test cases.
  rapidjson::Value shared_diagnostic_map;
  shared_diagnostic_map.SetObject();
  auto AddSharedDiagnostic = [&](const char* key, rapidjson::Value value) {
    auto guid = AddDiagnostic(std::move(value));
    shared_diagnostic_map.AddMember(helper.MakeString(key), guid, alloc);
  };
  rapidjson::Value timestamp;
  timestamp.SetInt64(args->timestamp);
  AddSharedDiagnostic("pointId", std::move(timestamp));
  AddSharedDiagnostic("bots", helper.MakeString(args->bots));
  AddSharedDiagnostic("masters", helper.MakeString(args->masters));

  // The "logUrls" diagnostic contains a list of [name, url] tuples.
  rapidjson::Value log_url_array;
  log_url_array.SetArray();
  log_url_array.PushBack(helper.MakeString("Build Log"), alloc);
  log_url_array.PushBack(helper.MakeString(args->log_url), alloc);
  AddSharedDiagnostic("logUrls", std::move(log_url_array));

  // Allocate a GUID for the given test suite name (by creating a
  // "diagnostic" entry).  Memoize this allocation so that we don't
  // allocate >1 GUID for the same test suite name.
  std::map<std::string, rapidjson::Value> test_suite_to_guid;
  auto MakeGuidForTestSuiteName = [&](const char* test_suite) {
    auto it = test_suite_to_guid.find(test_suite);
    if (it != test_suite_to_guid.end()) {
      return helper.Copy(it->second);
    }
    rapidjson::Value guid = AddDiagnostic(helper.MakeString(test_suite));
    test_suite_to_guid[test_suite] = helper.Copy(guid);
    return guid;
  };

  if (!input->IsArray()) {
    fprintf(stderr,
            "Expected input document to be of type array, and got %s instead\n",
            TypeToString(input->GetType()));
    exit(1);
  }

  for (auto& element : input->GetArray()) {
    std::string name = element["label"].GetString();
    ConvertSpacesToUnderscores(&name);

    // The "test_suite" field in the input becomes the "benchmarks"
    // diagnostic in the output.
    rapidjson::Value test_suite_guid =
        MakeGuidForTestSuiteName(element["test_suite"].GetString());
    rapidjson::Value diagnostic_map = helper.Copy(shared_diagnostic_map);
    diagnostic_map.AddMember("benchmarks", test_suite_guid, alloc);

    const rapidjson::Value& values = element["values"].GetArray();
    if (values.Size() == 0) {
      fprintf(stderr, "Input 'values' is empty");
      exit(1);
    }

    std::vector<double> vals;
    vals.reserve(values.Size());
    for (auto& val : values.GetArray()) {
      vals.push_back(val.GetDouble());
    }

    // Create the histogram.
    if (element.HasMember("split_first") && element["split_first"].IsBool() &&
        element["split_first"].GetBool()) {
      // Create a histogram for the first sample value.
      std::string h1_name = name + "_samples_0_to_0";
      std::vector<double> h1_vals(vals.begin(), vals.begin() + 1);
      AddHistogram(output, &alloc, h1_name, element["unit"].GetString(),
                   std::move(h1_vals), helper.Copy(diagnostic_map), MakeUuid());

      // Create a histogram for the remaining sample values, if any.
      if (vals.size() > 1) {
        std::stringstream h2_name;
        h2_name << name << "_samples_1_to_" << vals.size() - 1;

        std::vector<double> h2_vals(vals.begin() + 1, vals.end());
        AddHistogram(output, &alloc, h2_name.str(), element["unit"].GetString(),
                     std::move(h2_vals), helper.Copy(diagnostic_map),
                     MakeUuid());
      }
    } else {
      // Create a histogram for all |vals|.
      AddHistogram(output, &alloc, name, element["unit"].GetString(),
                   std::move(vals), std::move(diagnostic_map), MakeUuid());
    }
  }
}

int ConverterMain(int argc, char** argv) {
  const char* usage =
      "Usage: %s [options]\n"
      "\n"
      "This tool takes results from Fuchsia performance tests (in Fuchsia's "
      "JSON perf test results format) and converts them to the Catapult "
      "Dashboard's JSON HistogramSet format.\n"
      "\n"
      "Options:\n"
      "  --input FILENAME\n"
      "      Input file: perf test results JSON file (required)\n"
      "  --output FILENAME\n"
      "      Output file: Catapult HistogramSet JSON file (default is stdout)\n"
      "\n"
      "The following are required and specify parameters to copy into the "
      "output file:\n"
      "  --execution-timestamp-ms NUMBER\n"
      "  --masters STRING\n"
      "  --bots STRING\n"
      "  --log-url URL\n"
      "See README.md for the meanings of these parameters.\n";

  // Parse command line arguments.
  static const struct option opts[] = {
      {"help", no_argument, nullptr, 'h'},
      {"input", required_argument, nullptr, 'i'},
      {"output", required_argument, nullptr, 'o'},
      {"execution-timestamp-ms", required_argument, nullptr, 'e'},
      {"masters", required_argument, nullptr, 'm'},
      {"bots", required_argument, nullptr, 'b'},
      {"log-url", required_argument, nullptr, 'l'},
  };
  ConverterArgs args;
  const char* input_filename = nullptr;
  const char* output_filename = nullptr;
  optind = 1;
  for (;;) {
    int opt = getopt_long(argc, argv, "h", opts, nullptr);
    if (opt < 0)
      break;
    switch (opt) {
      case 'h':
        printf(usage, argv[0]);
        return 0;
      case 'i':
        input_filename = optarg;
        break;
      case 'o':
        output_filename = optarg;
        break;
      case 'e':
        args.timestamp = strtoll(optarg, nullptr, 0);
        break;
      case 'm':
        args.masters = optarg;
        break;
      case 'b':
        args.bots = optarg;
        break;
      case 'l':
        args.log_url = optarg;
        break;
    }
  }
  if (optind < argc) {
    fprintf(stderr, "Unrecognized argument: \"%s\"\n", argv[optind]);
    return 1;
  }

  // Check arguments.
  bool failed = false;
  if (!input_filename) {
    fprintf(stderr, "--input argument is required\n");
    failed = true;
  }
  if (!args.timestamp) {
    fprintf(stderr, "--execution-timestamp-ms argument is required\n");
    failed = true;
  }
  if (!args.masters) {
    fprintf(stderr, "--masters argument is required\n");
    failed = true;
  }
  if (!args.bots) {
    fprintf(stderr, "--bots argument is required\n");
    failed = true;
  }
  if (!args.log_url) {
    fprintf(stderr, "--log-url argument is required\n");
    failed = true;
  }
  if (failed) {
    fprintf(stderr, "\n");
    fprintf(stderr, usage, argv[0]);
    return 1;
  }

  // Read input file.
  FILE* fp = fopen(input_filename, "r");
  if (!fp) {
    fprintf(stderr, "Failed to open input file, \"%s\"\n", input_filename);
    return 1;
  }
  char buffer[100];
  rapidjson::FileReadStream input_stream(fp, buffer, sizeof(buffer));
  rapidjson::Document input;
  rapidjson::ParseResult parse_result = input.ParseStream(input_stream);
  if (!parse_result) {
    fprintf(stderr, "Failed to parse input file, \"%s\": %s (offset %zd)\n",
            input_filename, rapidjson::GetParseError_En(parse_result.Code()),
            parse_result.Offset());
    return 1;
  }
  fclose(fp);

  rapidjson::Document output;
  Convert(&input, &output, &args);

  // Write output.
  if (output_filename) {
    fp = fopen(output_filename, "w");
    if (!fp) {
      fprintf(stderr, "Failed to open output file, \"%s\"\n", output_filename);
      return 1;
    }
    WriteJson(fp, &output);
    fclose(fp);
  } else {
    WriteJson(stdout, &output);
  }

  return 0;
}
