// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/developer/debug/zxdb/expr/number_parser.h"

#include <ctype.h>
#include <math.h>
#include <stdlib.h>

#include "src/developer/debug/zxdb/expr/builtin_types.h"
#include "src/developer/debug/zxdb/expr/expr_token.h"
#include "src/developer/debug/zxdb/expr/expr_value.h"
#include "src/developer/debug/zxdb/symbols/base_type.h"
#include "third_party/double-conversion/double-conversion/double-conversion.h"

namespace zxdb {

namespace {

// Max values converted to a uint64_t.
constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();

// Absolute value of the smallest number that can be put in a signed 32-bit number. Be careful, the
// negative numbers hold one larger than the corresponding positive number which makes it hard to
// compute.
constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;

// This hardcodes our current 64-bit type scheme where "long" and "long long" are both 64 bits, and
// "int" is 32. Note that we still support "long long" because it's surprising if you type "0x100ll"
// and don't get something called "long long" back.
//
// C++ has more rules about whether the input has a specific base (hex numbers prefer to be unsigned
// if possible), and the "l" suffix is particularly weird because it allows matching "unsigned long"
// while no other decimal numbers will match unsigned types without "u". Our requirements don't need
// all of these rules so keep things a bit simpler.
//
// See: https://en.cppreference.com/w/cpp/language/integer_literal
struct TypeLookup {
  const char* name;
  size_t byte_size;
  bool type_signed;

  // The largest positive value held by this type.
  uint64_t max_positive;

  // Absolute value of the most negative value held by this type. In the case of unsigned types,
  // this should hold the same value as the corresponding signed type. This allows "-23u" to specify
  // an unsigned version of the type that would normally hold "-23".
  uint64_t max_abs_negative;

  // Maximum suffix this type matches. If the number specifies "l" it will allow "long" or "long
  // long" but not int. Any lengths less than this will not match.
  IntegerSuffix::Length max_suffix;
} kTypeLookup[] = {
    // clang-format off
    // Name            bytes, signed max_positive    max_abs_negative,   max_suffix
    {"int",                4, true,  kSigned32Max,   kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
    {"unsigned",           4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
    {"long",               8, true,  kSigned64Max,   kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
    {"unsigned long",      8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
    {"long long",          8, true,  kSigned64Max,   kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
    {"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
    // clang-format on
};

bool IsDigitSeparator(ExprLanguage lang, char c) {
  switch (lang) {
    case ExprLanguage::kC:
      return c == '\'';
    case ExprLanguage::kRust:
      return c == '_';
  }
  return false;
}

// Supports only base 2, 8, 10, and 16.
bool ValidForBase(ExprLanguage lang, IntegerPrefix::Base base, char c) {
  if (IsDigitSeparator(lang, c))
    return true;

  switch (base) {
    case IntegerPrefix::kBin:
      return c == '0' || c == '1';
    case IntegerPrefix::kOct:
      return c >= '0' && c <= '7';
    case IntegerPrefix::kDec:
      return c >= '0' && c <= '9';
    case IntegerPrefix::kHex:
      return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
  }
  return false;
}

// Returns the length of a <digits> sequence (also allowing separators) starting at the beginning of
// the input.
size_t GetDigitsLength(ExprLanguage lang, std::string_view input) {
  size_t result = 0;

  // First digit doesn't allow separators.
  if (result >= input.size() || !isdigit(input[result]))
    return result;
  result++;

  // Continuing digits allow separators.
  while (result < input.size() && (isdigit(input[result]) || IsDigitSeparator(lang, input[result])))
    result++;
  return result;
}

bool IsExponentCharacter(char c) { return c == 'e' || c == 'E'; }

bool IsSign(char c) { return c == '+' || c == '-'; }

}  // namespace

ErrOrValue StringToNumber(ExprLanguage lang, std::string_view str) {
  IntegerPrefix prefix = ExtractIntegerPrefix(&str);
  if (prefix.base == IntegerPrefix::kOct && prefix.octal_type == IntegerPrefix::OctalType::kC) {
    // Require "0o" prefixes for octal numbers instead of allowing C-style "0" prefixes. Octal
    // numbers are very unusual to be typed interactively in a debugger, and it's easier to
    // accidentally copy-and-paste a decimal number with a "0" at the beginning and get surprising
    // results. The "0o" format is used by Rust so we require it for clarity.
    return Err("Octal numbers must be prefixed with '0o'.");
  }

  auto suffix = ExtractIntegerSuffix(&str);
  if (suffix.has_error())
    return suffix.err();

  if (str.empty())
    return Err("Expected a number.");

  // Validate the characters in the number. This prevents strtoull from being too smart and trying
  // to handle prefixes itself. We also remove the separators.
  std::string digits;
  digits.reserve(str.size());
  for (char c : str) {
    if (!ValidForBase(lang, prefix.base, c))
      return Err("Invalid character in number.");
    if (!IsDigitSeparator(lang, c))
      digits.push_back(c);
  }

  // strtoull doesn't take a const ending, but it doesn't modify the input.
  char* digits_end = &digits[digits.size()];
  char* parsed_end = digits_end;

  // This will be the absolute value of the returned number.
  uint64_t abs_value = strtoull(digits.data(), &parsed_end, static_cast<int>(prefix.base));

  // If strtoull stopped early it means it it hit an invalid character (shouldn't happen since we
  // validated above) or maybe the input was too long.
  if (parsed_end != digits_end)
    return Err("Invalid number.");

  // Pick the smallest type that fits the data size as well as satisfies any suffixes.
  const TypeLookup* matched_type = nullptr;
  for (const auto& cur : kTypeLookup) {
    // Type must hold enough data.
    if (prefix.sign == IntegerPrefix::kNegative) {
      if (abs_value > cur.max_abs_negative)
        continue;
    } else {
      if (abs_value > cur.max_positive)
        continue;
    }

    if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.value().length))
      continue;  // Requested length is larger.

    if (suffix.value().type_signed == IntegerSuffix::kUnsigned) {
      if (cur.type_signed)
        continue;  // Unsigned suffix requires unsigned type.
    } else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
      // Signed input requires a signed type unless a suffix overrode it which was checked above
      // ("-1u" should be unsigned).
      continue;
    }

    matched_type = &cur;
    break;
  }

  if (!matched_type) {
    // Anything not matched above will be an overflow. Put it into a unsigned 64-bit value and
    // tolerate the overflow.
    matched_type = &*(std::end(kTypeLookup) - 1);
  }

  int symbol_tag =
      matched_type->type_signed ? BaseType::kBaseTypeSigned : BaseType::kBaseTypeUnsigned;
  auto type =
      fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size, matched_type->name);

  uint64_t value = prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;

  // Construct the data. This assumes little-endian since it truncates or zero-fills off the right.
  std::vector<uint8_t> data(matched_type->byte_size);
  memcpy(data.data(), &value, matched_type->byte_size);

  return ExprValue(std::move(type), std::move(data));
}

IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
  IntegerPrefix prefix;
  if (s->empty())
    return prefix;  // Defaults OK for empty string.

  if ((*s)[0] == '-') {
    prefix.sign = IntegerPrefix::kNegative;

    // Allow whitespace between negative sign and the rest.
    size_t sign_len = 1;
    while (sign_len < s->size() && isspace((*s)[sign_len]))
      sign_len++;
    *s = s->substr(sign_len);
  }

  if (s->size() >= 2u && (*s)[0] == '0') {
    char second = (*s)[1];
    if (second == 'x' || second == 'X') {
      // Hex.
      *s = s->substr(2u);
      prefix.base = IntegerPrefix::kHex;
    } else if (second == 'b' || second == 'B') {
      // Binary.
      *s = s->substr(2u);
      prefix.base = IntegerPrefix::kBin;
    } else if (second == 'o' || second == 'O') {
      // Rust-style octal "0o".
      *s = s->substr(2u);
      prefix.base = IntegerPrefix::kOct;
      prefix.octal_type = IntegerPrefix::OctalType::kRust;
    } else {
      // Everything else beginning with a '0' is C-style octal. Note this requires >= 2 digits so
      // that "0" by itself is decimal.
      *s = s->substr(1u);
      prefix.base = IntegerPrefix::kOct;
      prefix.octal_type = IntegerPrefix::OctalType::kC;
    }
  }
  // Else case is decimal, doesn't need trimming, default is already correct.
  return prefix;
}

ErrOr<IntegerSuffix> ExtractIntegerSuffix(std::string_view* s) {
  IntegerSuffix suffix;

  // Check for any combination of "u" and either "l" or "ll". This works backwards to avoid two
  // passes since the suffix means the same in either order.
  bool have_unsigned = false;
  bool have_length = false;
  size_t suffix_begin = s->size();
  while (suffix_begin > 0) {
    char prev_char = (*s)[suffix_begin - 1];
    if (prev_char == 'U' || prev_char == 'u') {
      // Unsigned suffix.
      if (have_unsigned)
        return Err("Duplicate 'u' in number suffix.");
      have_unsigned = true;

      suffix.type_signed = IntegerSuffix::kUnsigned;
      suffix_begin--;
    } else if (prev_char == 'L' || prev_char == 'l') {
      // Suffix has an "l", disambiguate based on previous char.
      if (have_length)
        return Err("Duplicate 'l' or 'll' in number suffix.");
      have_length = true;

      // Technically C++ says "Ll" and "lL" aren't allowed, but we don't bother enforcing this.
      if (suffix_begin > 1 && ((*s)[suffix_begin - 2] == 'l' || (*s)[suffix_begin - 2] == 'L')) {
        // "ll" = Long long.
        suffix.length = IntegerSuffix::Length::kLongLong;
        suffix_begin -= 2;
      } else {
        // "l" by itself = Long.
        suffix.length = IntegerSuffix::Length::kLong;
        suffix_begin--;
      }
    } else {
      // Not a valid suffix number, stop.
      break;
    }
  }

  *s = s->substr(0, suffix_begin);
  return suffix;
}

// The floating-point format we expect is:
//
//   <digit-first> := ("0" - "9")
//
//   <digit-continuing> := <digit-first> | "_" | "'"
//
//   <digits> := <digits-first> <digits-continuing>*
//
//   <float> := ( <significand> [<exponent>] [<suffix>] ) |
//              ( <digis> <exponent> [<suffix>] )
//
//   <significant> := ( <digits> "." <digits> ) |
//                    ( "." <digits> ) |
//                    ( <digits> "." )
//
//   <exponent> := ("e" | "E") [("+" | "-")] <digits>
//
//   <suffix> := "f" | "F" | "l" | "L"
//
// In other words, a floating point number must have either a "." or an "e", and a "." must have
// digits on at least one side of it.
//
// Rust requires that there be digits before a ".". This is important to disambiguate cases like
// "tuple.0" as being "tuple dot zero" from "tuple float-zero".
//
// TODO(bug 43220) Handle Rust-specific suffixes.
// TODO(bug 43222) Support C++17 hex floating point literals "0x342.1a"
size_t GetFloatTokenLength(ExprLanguage lang, std::string_view input, FloatFollowing* following) {
  // Default to allowing floats to follow the current context. This will be reset in the specific
  // Rust case where a float can not follow the current context.
  *following = FloatFollowing::kCanFollow;

  std::string_view cur = input;

  if (lang == ExprLanguage::kRust && !cur.empty() && cur[0] == '.') {
    // Something starting with a dot can in Rust never be followed by a float (see function comment
    // above).
    *following = FloatFollowing::kCanNotFollow;
    return 0;
  }

  // Digits before the dot.
  size_t before_dot = GetDigitsLength(lang, cur);
  cur = cur.substr(before_dot);

  // "."
  bool has_dot = false;
  if (!cur.empty() && cur[0] == '.') {
    has_dot = true;
    cur = cur.substr(1);
  }
  if (!before_dot && !has_dot)
    return 0;  // Must begin with digits or a dot to be a float.

  // Digits after the dot.
  size_t after_dot = GetDigitsLength(lang, cur);
  cur = cur.substr(after_dot);
  if (has_dot && !before_dot && !after_dot)
    return 0;  // A dot must have digits on at least one side.

  // Optional exponent.
  bool has_exponent = false;
  if (!cur.empty() && IsExponentCharacter(cur[0])) {
    has_exponent = true;
    cur = cur.substr(1);

    if (!cur.empty() && IsSign(cur[0]))
      cur = cur.substr(1);  // Skip optional sign.

    size_t exponent = GetDigitsLength(lang, cur);
    if (!exponent)
      return 0;  // Must have exponent digits to be a float.
    cur = cur.substr(exponent);
  }
  if (!has_dot && !has_exponent)
    return 0;  // Must have a dot or an exponend to be a float.

  // Consider all alphanumeric characters immediately following to be part of the token. This will
  // get any suffix characters but may get garbage also. The tokenizer isn't in charge of validating
  // floating point formatting, and something like "2.3hello" should be considered one invalid
  // floating-point token rather than a valid float followed by a valid identifier.
  while (!cur.empty() && isalnum(cur[0]))
    cur = cur.substr(1);

  return std::distance(input.begin(), cur.begin());
}

FloatSuffix StripFloatSuffix(std::string_view* view) {
  if (view->empty())
    return FloatSuffix::kNone;

  char back = view->back();

  if (back == 'f' || back == 'F') {
    *view = view->substr(0, view->size() - 1);
    return FloatSuffix::kFloat;
  }

  if (back == 'l' || back == 'L') {
    *view = view->substr(0, view->size() - 1);
    return FloatSuffix::kLong;
  }

  return FloatSuffix::kNone;
}

ErrOrValue ValueForFloatToken(ExprLanguage lang, const ExprToken& token) {
  FX_DCHECK(token.type() == ExprTokenType::kFloat);

  std::string_view value = token.value();
  FloatSuffix suffix = StripFloatSuffix(&value);
  if (lang != ExprLanguage::kC && suffix == FloatSuffix::kLong)
    suffix = FloatSuffix::kNone;  // Only C has a "long double" type.

  // Strip digits separators.
  std::string digits;
  digits.reserve(value.size());
  for (char c : value) {
    if (!IsDigitSeparator(lang, c))
      digits.push_back(c);
  }

  double_conversion::StringToDoubleConverter converter(0, 0.0, nan(""), nullptr, nullptr);

  fxl::RefPtr<BaseType> type;
  std::vector<uint8_t> data;

  int consumed = 0;
  switch (suffix) {
    case FloatSuffix::kNone: {
      double d =
          converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
      data.resize(sizeof(double));
      memcpy(data.data(), &d, sizeof(double));
      type = GetBuiltinFloatType(lang, 8);
      break;
    }
    case FloatSuffix::kFloat: {
      float f = converter.StringToFloat(digits.data(), static_cast<int>(digits.size()), &consumed);
      data.resize(sizeof(float));
      memcpy(data.data(), &f, sizeof(float));
      type = GetBuiltinFloatType(lang, 4);
      break;
    }
    case FloatSuffix::kLong: {
      // The parser doesn't support long doubles, but we can at least upcast if the local system
      // supports it.
      double d =
          converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
      long double ld = d;
      data.resize(sizeof(long double));
      memcpy(data.data(), &ld, sizeof(long double));
      type = GetBuiltinFloatType(lang, data.size());
      break;
    }
  }

  if (consumed != static_cast<int>(digits.size()))
    return Err("Trailing characters on floating-point constant.");

  return ExprValue(std::move(type), std::move(data));
}

}  // namespace zxdb
