blob: 93a12b581dde5fd3dc6f9bb697fe26e8981fabe2 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/developer/debug/zxdb/expr/number_parser.h"
#include <ctype.h>
#include <math.h>
#include <stdlib.h>
#include "src/developer/debug/zxdb/expr/builtin_types.h"
#include "src/developer/debug/zxdb/expr/expr_token.h"
#include "src/developer/debug/zxdb/expr/expr_value.h"
#include "src/developer/debug/zxdb/symbols/base_type.h"
#include "third_party/double-conversion/double-conversion/double-conversion.h"
namespace zxdb {
namespace {
// Max values converted to a uint64_t.
constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();
// Absolute value of the smallest number that can be put in a signed 32-bit number. Be careful, the
// negative numbers hold one larger than the corresponding positive number which makes it hard to
// compute.
constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;
// This hardcodes our current 64-bit type scheme where "long" and "long long" are both 64 bits, and
// "int" is 32. Note that we still support "long long" because it's surprising if you type "0x100ll"
// and don't get something called "long long" back.
//
// C++ has more rules about whether the input has a specific base (hex numbers prefer to be unsigned
// if possible), and the "l" suffix is particularly weird because it allows matching "unsigned long"
// while no other decimal numbers will match unsigned types without "u". Our requirements don't need
// all of these rules so keep things a bit simpler.
//
// See: https://en.cppreference.com/w/cpp/language/integer_literal
struct TypeLookup {
const char* name;
size_t byte_size;
bool type_signed;
// The largest positive value held by this type.
uint64_t max_positive;
// Absolute value of the most negative value held by this type. In the case of unsigned types,
// this should hold the same value as the corresponding signed type. This allows "-23u" to specify
// an unsigned version of the type that would normally hold "-23".
uint64_t max_abs_negative;
// Maximum suffix this type matches. If the number specifies "l" it will allow "long" or "long
// long" but not int. Any lengths less than this will not match.
IntegerSuffix::Length max_suffix;
} kTypeLookup[] = {
// clang-format off
// Name bytes, signed max_positive max_abs_negative, max_suffix
{"int", 4, true, kSigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
{"unsigned", 4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
{"long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
{"unsigned long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
{"long long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
{"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
// clang-format on
};
bool IsDigitSeparator(ExprLanguage lang, char c) {
switch (lang) {
case ExprLanguage::kC:
return c == '\'';
case ExprLanguage::kRust:
return c == '_';
}
return false;
}
// Supports only base 2, 8, 10, and 16.
bool ValidForBase(ExprLanguage lang, IntegerPrefix::Base base, char c) {
if (IsDigitSeparator(lang, c))
return true;
switch (base) {
case IntegerPrefix::kBin:
return c == '0' || c == '1';
case IntegerPrefix::kOct:
return c >= '0' && c <= '7';
case IntegerPrefix::kDec:
return c >= '0' && c <= '9';
case IntegerPrefix::kHex:
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
return false;
}
// Returns the length of a <digits> sequence (also allowing separators) starting at the beginning of
// the input.
size_t GetDigitsLength(ExprLanguage lang, std::string_view input) {
size_t result = 0;
while (result < input.size() && (isdigit(input[result]) || IsDigitSeparator(lang, input[result])))
result++;
return result;
}
bool IsExponentCharacter(char c) { return c == 'e' || c == 'E'; }
bool IsSign(char c) { return c == '+' || c == '-'; }
} // namespace
ErrOrValue StringToNumber(ExprLanguage lang, std::string_view str) {
IntegerPrefix prefix = ExtractIntegerPrefix(&str);
if (prefix.base == IntegerPrefix::kOct && prefix.octal_type == IntegerPrefix::OctalType::kC) {
// Require "0o" prefixes for octal numbers instead of allowing C-style "0" prefixes. Octal
// numbers are very unusual to be typed interactively in a debugger, and it's easier to
// accidentally copy-and-paste a decimal number with a "0" at the beginning and get surprising
// results. The "0o" format is used by Rust so we require it for clarity.
return Err("Octal numbers must be prefixed with '0o'.");
}
auto suffix = ExtractIntegerSuffix(&str);
if (suffix.has_error())
return suffix.err();
if (str.empty())
return Err("Expected a number.");
// Validate the characters in the number. This prevents strtoull from being too smart and trying
// to handle prefixes itself. We also remove the separators.
std::string digits;
digits.reserve(str.size());
for (char c : str) {
if (!ValidForBase(lang, prefix.base, c))
return Err("Invalid character in number.");
if (!IsDigitSeparator(lang, c))
digits.push_back(c);
}
// strtoull doesn't take a const ending, but it doesn't modify the input.
char* digits_end = &digits[digits.size()];
char* parsed_end = digits_end;
// This will be the absolute value of the returned number.
uint64_t abs_value = strtoull(digits.data(), &parsed_end, static_cast<int>(prefix.base));
// If strtoull stopped early it means it it hit an invalid character (shouldn't happen since we
// validated above) or maybe the input was too long.
if (parsed_end != digits_end)
return Err("Invalid number.");
// Pick the smallest type that fits the data size as well as satisfies any suffixes.
const TypeLookup* matched_type = nullptr;
for (const auto& cur : kTypeLookup) {
// Type must hold enough data.
if (prefix.sign == IntegerPrefix::kNegative) {
if (abs_value > cur.max_abs_negative)
continue;
} else {
if (abs_value > cur.max_positive)
continue;
}
if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.value().length))
continue; // Requested length is larger.
if (suffix.value().type_signed == IntegerSuffix::kUnsigned) {
if (cur.type_signed)
continue; // Unsigned suffix requires unsigned type.
} else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
// Signed input requires a signed type unless a suffix overrode it which was checked above
// ("-1u" should be unsigned).
continue;
}
matched_type = &cur;
break;
}
if (!matched_type) {
// Anything not matched above will be an overflow. Put it into a unsigned 64-bit value and
// tolerate the overflow.
matched_type = &*(std::end(kTypeLookup) - 1);
}
int symbol_tag =
matched_type->type_signed ? BaseType::kBaseTypeSigned : BaseType::kBaseTypeUnsigned;
auto type =
fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size, matched_type->name);
uint64_t value = prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;
// Construct the data. This assumes little-endian since it truncates or zero-fills off the right.
std::vector<uint8_t> data(matched_type->byte_size);
memcpy(data.data(), &value, matched_type->byte_size);
return ExprValue(std::move(type), std::move(data));
}
IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
IntegerPrefix prefix;
if (s->empty())
return prefix; // Defaults OK for empty string.
if ((*s)[0] == '-') {
prefix.sign = IntegerPrefix::kNegative;
// Allow whitespace between negative sign and the rest.
size_t sign_len = 1;
while (sign_len < s->size() && isspace((*s)[sign_len]))
sign_len++;
*s = s->substr(sign_len);
}
if (s->size() >= 2u && (*s)[0] == '0') {
char second = (*s)[1];
if (second == 'x' || second == 'X') {
// Hex.
*s = s->substr(2u);
prefix.base = IntegerPrefix::kHex;
} else if (second == 'b' || second == 'B') {
// Binary.
*s = s->substr(2u);
prefix.base = IntegerPrefix::kBin;
} else if (second == 'o' || second == 'O') {
// Rust-style octal "0o".
*s = s->substr(2u);
prefix.base = IntegerPrefix::kOct;
prefix.octal_type = IntegerPrefix::OctalType::kRust;
} else {
// Everything else beginning with a '0' is C-style octal. Note this requires >= 2 digits so
// that "0" by itself is decimal.
*s = s->substr(1u);
prefix.base = IntegerPrefix::kOct;
prefix.octal_type = IntegerPrefix::OctalType::kC;
}
}
// Else case is decimal, doesn't need trimming, default is already correct.
return prefix;
}
ErrOr<IntegerSuffix> ExtractIntegerSuffix(std::string_view* s) {
IntegerSuffix suffix;
// Check for any combination of "u" and either "l" or "ll". This works backwards to avoid two
// passes since the suffix means the same in either order.
bool have_unsigned = false;
bool have_length = false;
size_t suffix_begin = s->size();
while (suffix_begin > 0) {
char prev_char = (*s)[suffix_begin - 1];
if (prev_char == 'U' || prev_char == 'u') {
// Unsigned suffix.
if (have_unsigned)
return Err("Duplicate 'u' in number suffix.");
have_unsigned = true;
suffix.type_signed = IntegerSuffix::kUnsigned;
suffix_begin--;
} else if (prev_char == 'L' || prev_char == 'l') {
// Suffix has an "l", disambiguate based on previous char.
if (have_length)
return Err("Duplicate 'l' or 'll' in number suffix.");
have_length = true;
// Technically C++ says "Ll" and "lL" aren't allowed, but we don't bother enforcing this.
if (suffix_begin > 1 && ((*s)[suffix_begin - 2] == 'l' || (*s)[suffix_begin - 2] == 'L')) {
// "ll" = Long long.
suffix.length = IntegerSuffix::Length::kLongLong;
suffix_begin -= 2;
} else {
// "l" by itself = Long.
suffix.length = IntegerSuffix::Length::kLong;
suffix_begin--;
}
} else {
// Not a valid suffix number, stop.
break;
}
}
*s = s->substr(0, suffix_begin);
return suffix;
}
// The floating-point format we expect is:
//
// <digits> := ("0" - "9") | "_" | "'"
//
// <float> := ( <significand> [<exponent>] [<suffix>] ) |
// ( <digis> <exponent> [<suffix>] )
//
// <significant> := ( <digits> "." <digits> ) |
// ( "." <digits> ) |
// ( <digits> "." )
//
// <exponent> := ("e" | "E") [("+" | "-")] <digits>
//
// <suffix> := "f" | "F" | "l" | "L"
//
// In other words, a floating point number must have either a "." or an "e", and a "." must have
// digits on at least one side of it.
//
// Rust requires that there be digits before a ".". This is important to disambiguate cases like
// "tuple.0" as being "tuple dot zero" from "tuple float-zero".
//
// TODO(bug 43220) Handle Rust-specific suffixes.
// TODO(bug 43222) Support C++17 hex floating point literals "0x342.1a"
size_t GetFloatTokenLength(ExprLanguage lang, std::string_view input) {
std::string_view cur = input;
// Digits before the dot.
size_t before_dot = GetDigitsLength(lang, cur);
cur = cur.substr(before_dot);
if (lang == ExprLanguage::kRust & before_dot == 0)
return 0;
// "."
bool has_dot = false;
if (!cur.empty() && cur[0] == '.') {
has_dot = true;
cur = cur.substr(1);
}
if (!before_dot && !has_dot)
return 0; // Must begin with digits or a dot to be a float.
// Digits after the dot.
size_t after_dot = GetDigitsLength(lang, cur);
cur = cur.substr(after_dot);
if (has_dot && !before_dot && !after_dot)
return 0; // A dot must have digits on at least one side.
// Optional exponent.
bool has_exponent = false;
if (!cur.empty() && IsExponentCharacter(cur[0])) {
has_exponent = true;
cur = cur.substr(1);
if (!cur.empty() && IsSign(cur[0]))
cur = cur.substr(1); // Skip optional sign.
size_t exponent = GetDigitsLength(lang, cur);
if (!exponent)
return 0; // Must have exponent digits to be a float.
cur = cur.substr(exponent);
}
if (!has_dot && !has_exponent)
return 0; // Must have a dot or an exponend to be a float.
// Consider all alphanumeric characters immediately following to be part of the token. This will
// get any suffix characters but may get garbage also. The tokenizer isn't in charge of validating
// floating point formatting, and something like "2.3hello" should be considered one invalid
// floating-point token rather than a valid float followed by a valid identifier.
while (!cur.empty() && isalnum(cur[0]))
cur = cur.substr(1);
return std::distance(input.begin(), cur.begin());
}
FloatSuffix StripFloatSuffix(std::string_view* view) {
if (view->empty())
return FloatSuffix::kNone;
char back = view->back();
if (back == 'f' || back == 'F') {
*view = view->substr(0, view->size() - 1);
return FloatSuffix::kFloat;
}
if (back == 'l' || back == 'L') {
*view = view->substr(0, view->size() - 1);
return FloatSuffix::kLong;
}
return FloatSuffix::kNone;
}
ErrOrValue ValueForFloatToken(ExprLanguage lang, const ExprToken& token) {
FX_DCHECK(token.type() == ExprTokenType::kFloat);
std::string_view value = token.value();
FloatSuffix suffix = StripFloatSuffix(&value);
if (lang != ExprLanguage::kC && suffix == FloatSuffix::kLong)
suffix = FloatSuffix::kNone; // Only C has a "long double" type.
// Strip digits separators.
std::string digits;
digits.reserve(value.size());
for (char c : value) {
if (!IsDigitSeparator(lang, c))
digits.push_back(c);
}
double_conversion::StringToDoubleConverter converter(0, 0.0, nan(""), nullptr, nullptr);
fxl::RefPtr<BaseType> type;
std::vector<uint8_t> data;
int consumed = 0;
switch (suffix) {
case FloatSuffix::kNone: {
double d =
converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
data.resize(sizeof(double));
memcpy(data.data(), &d, sizeof(double));
type = GetBuiltinFloatType(lang, 8);
break;
}
case FloatSuffix::kFloat: {
float f = converter.StringToFloat(digits.data(), static_cast<int>(digits.size()), &consumed);
data.resize(sizeof(float));
memcpy(data.data(), &f, sizeof(float));
type = GetBuiltinFloatType(lang, 4);
break;
}
case FloatSuffix::kLong: {
// The parser doesn't support long doubles, but we can at least upcast if the local system
// supports it.
double d =
converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
long double ld = d;
data.resize(sizeof(long double));
memcpy(data.data(), &ld, sizeof(long double));
type = GetBuiltinFloatType(lang, data.size());
break;
}
}
if (consumed != static_cast<int>(digits.size()))
return Err("Trailing characters on floating-point constant.");
return ExprValue(std::move(type), std::move(data));
}
} // namespace zxdb