src/developer/debug/zxdb/expr/number_parser.cc - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "src/developer/debug/zxdb/expr/number_parser.h"

 #include <ctype.h>
 #include <math.h>
 #include <stdlib.h>

 #include "src/developer/debug/zxdb/expr/builtin_types.h"
 #include "src/developer/debug/zxdb/expr/expr_token.h"
 #include "src/developer/debug/zxdb/expr/expr_value.h"
 #include "src/developer/debug/zxdb/symbols/base_type.h"
 #include "third_party/double-conversion/double-conversion/double-conversion.h"

 namespace zxdb {

 namespace {

 // Max values converted to a uint64_t.
 constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
 constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
 constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
 constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();

 // Absolute value of the smallest number that can be put in a signed 32-bit number. Be careful, the
 // negative numbers hold one larger than the corresponding positive number which makes it hard to
 // compute.
 constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
 constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;

 // This hardcodes our current 64-bit type scheme where "long" and "long long" are both 64 bits, and
 // "int" is 32. Note that we still support "long long" because it's surprising if you type "0x100ll"
 // and don't get something called "long long" back.
 //
 // C++ has more rules about whether the input has a specific base (hex numbers prefer to be unsigned
 // if possible), and the "l" suffix is particularly weird because it allows matching "unsigned long"
 // while no other decimal numbers will match unsigned types without "u". Our requirements don't need
 // all of these rules so keep things a bit simpler.
 //
 // See: https://en.cppreference.com/w/cpp/language/integer_literal
 struct TypeLookup {
   const char* name;
   size_t byte_size;
   bool type_signed;

   // The largest positive value held by this type.
   uint64_t max_positive;

   // Absolute value of the most negative value held by this type. In the case of unsigned types,
   // this should hold the same value as the corresponding signed type. This allows "-23u" to specify
   // an unsigned version of the type that would normally hold "-23".
   uint64_t max_abs_negative;

   // Maximum suffix this type matches. If the number specifies "l" it will allow "long" or "long
   // long" but not int. Any lengths less than this will not match.
   IntegerSuffix::Length max_suffix;
 } kTypeLookup[] = {
     // clang-format off
     // Name            bytes, signed max_positive    max_abs_negative,   max_suffix
     {"int",                4, true,  kSigned32Max,   kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
     {"unsigned",           4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
     {"long",               8, true,  kSigned64Max,   kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
     {"unsigned long",      8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
     {"long long",          8, true,  kSigned64Max,   kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
     {"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
     // clang-format on
 };

 bool IsDigitSeparator(ExprLanguage lang, char c) {
   switch (lang) {
     case ExprLanguage::kC:
       return c == '\'';
     case ExprLanguage::kRust:
       return c == '_';
   }
   return false;
 }

 // Supports only base 2, 8, 10, and 16.
 bool ValidForBase(ExprLanguage lang, IntegerPrefix::Base base, char c) {
   if (IsDigitSeparator(lang, c))
     return true;

   switch (base) {
     case IntegerPrefix::kBin:
       return c == '0' || c == '1';
     case IntegerPrefix::kOct:
       return c >= '0' && c <= '7';
     case IntegerPrefix::kDec:
       return c >= '0' && c <= '9';
     case IntegerPrefix::kHex:
       return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
   }
   return false;
 }

 // Returns the length of a <digits> sequence (also allowing separators) starting at the beginning of
 // the input.
 size_t GetDigitsLength(ExprLanguage lang, std::string_view input) {
   size_t result = 0;
   while (result < input.size() && (isdigit(input[result]) || IsDigitSeparator(lang, input[result])))
     result++;
   return result;
 }

 bool IsExponentCharacter(char c) { return c == 'e' || c == 'E'; }

 bool IsSign(char c) { return c == '+' || c == '-'; }

 }  // namespace

 ErrOrValue StringToNumber(ExprLanguage lang, std::string_view str) {
   IntegerPrefix prefix = ExtractIntegerPrefix(&str);
   if (prefix.base == IntegerPrefix::kOct && prefix.octal_type == IntegerPrefix::OctalType::kC) {
     // Require "0o" prefixes for octal numbers instead of allowing C-style "0" prefixes. Octal
     // numbers are very unusual to be typed interactively in a debugger, and it's easier to
     // accidentally copy-and-paste a decimal number with a "0" at the beginning and get surprising
     // results. The "0o" format is used by Rust so we require it for clarity.
     return Err("Octal numbers must be prefixed with '0o'.");
   }

   auto suffix = ExtractIntegerSuffix(&str);
   if (suffix.has_error())
     return suffix.err();

   if (str.empty())
     return Err("Expected a number.");

   // Validate the characters in the number. This prevents strtoull from being too smart and trying
   // to handle prefixes itself. We also remove the separators.
   std::string digits;
   digits.reserve(str.size());
   for (char c : str) {
     if (!ValidForBase(lang, prefix.base, c))
       return Err("Invalid character in number.");
     if (!IsDigitSeparator(lang, c))
       digits.push_back(c);
   }

   // strtoull doesn't take a const ending, but it doesn't modify the input.
   char* digits_end = &digits[digits.size()];
   char* parsed_end = digits_end;

   // This will be the absolute value of the returned number.
   uint64_t abs_value = strtoull(digits.data(), &parsed_end, static_cast<int>(prefix.base));

   // If strtoull stopped early it means it it hit an invalid character (shouldn't happen since we
   // validated above) or maybe the input was too long.
   if (parsed_end != digits_end)
     return Err("Invalid number.");

   // Pick the smallest type that fits the data size as well as satisfies any suffixes.
   const TypeLookup* matched_type = nullptr;
   for (const auto& cur : kTypeLookup) {
     // Type must hold enough data.
     if (prefix.sign == IntegerPrefix::kNegative) {
       if (abs_value > cur.max_abs_negative)
         continue;
     } else {
       if (abs_value > cur.max_positive)
         continue;
     }

     if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.value().length))
       continue;  // Requested length is larger.

     if (suffix.value().type_signed == IntegerSuffix::kUnsigned) {
       if (cur.type_signed)
         continue;  // Unsigned suffix requires unsigned type.
     } else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
       // Signed input requires a signed type unless a suffix overrode it which was checked above
       // ("-1u" should be unsigned).
       continue;
     }

     matched_type = &cur;
     break;
   }

   if (!matched_type) {
     // Anything not matched above will be an overflow. Put it into a unsigned 64-bit value and
     // tolerate the overflow.
     matched_type = &*(std::end(kTypeLookup) - 1);
   }

   int symbol_tag =
       matched_type->type_signed ? BaseType::kBaseTypeSigned : BaseType::kBaseTypeUnsigned;
   auto type =
       fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size, matched_type->name);

   uint64_t value = prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;

   // Construct the data. This assumes little-endian since it truncates or zero-fills off the right.
   std::vector<uint8_t> data(matched_type->byte_size);
   memcpy(data.data(), &value, matched_type->byte_size);

   return ExprValue(std::move(type), std::move(data));
 }

 IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
   IntegerPrefix prefix;
   if (s->empty())
     return prefix;  // Defaults OK for empty string.

   if ((*s)[0] == '-') {
     prefix.sign = IntegerPrefix::kNegative;

     // Allow whitespace between negative sign and the rest.
     size_t sign_len = 1;
     while (sign_len < s->size() && isspace((*s)[sign_len]))
       sign_len++;
     *s = s->substr(sign_len);
   }

   if (s->size() >= 2u && (*s)[0] == '0') {
     char second = (*s)[1];
     if (second == 'x' || second == 'X') {
       // Hex.
       *s = s->substr(2u);
       prefix.base = IntegerPrefix::kHex;
     } else if (second == 'b' || second == 'B') {
       // Binary.
       *s = s->substr(2u);
       prefix.base = IntegerPrefix::kBin;
     } else if (second == 'o' || second == 'O') {
       // Rust-style octal "0o".
       *s = s->substr(2u);
       prefix.base = IntegerPrefix::kOct;
       prefix.octal_type = IntegerPrefix::OctalType::kRust;
     } else {
       // Everything else beginning with a '0' is C-style octal. Note this requires >= 2 digits so
       // that "0" by itself is decimal.
       *s = s->substr(1u);
       prefix.base = IntegerPrefix::kOct;
       prefix.octal_type = IntegerPrefix::OctalType::kC;
     }
   }
   // Else case is decimal, doesn't need trimming, default is already correct.
   return prefix;
 }

 ErrOr<IntegerSuffix> ExtractIntegerSuffix(std::string_view* s) {
   IntegerSuffix suffix;

   // Check for any combination of "u" and either "l" or "ll". This works backwards to avoid two
   // passes since the suffix means the same in either order.
   bool have_unsigned = false;
   bool have_length = false;
   size_t suffix_begin = s->size();
   while (suffix_begin > 0) {
     char prev_char = (*s)[suffix_begin - 1];
     if (prev_char == 'U' || prev_char == 'u') {
       // Unsigned suffix.
       if (have_unsigned)
         return Err("Duplicate 'u' in number suffix.");
       have_unsigned = true;

       suffix.type_signed = IntegerSuffix::kUnsigned;
       suffix_begin--;
     } else if (prev_char == 'L' || prev_char == 'l') {
       // Suffix has an "l", disambiguate based on previous char.
       if (have_length)
         return Err("Duplicate 'l' or 'll' in number suffix.");
       have_length = true;

       // Technically C++ says "Ll" and "lL" aren't allowed, but we don't bother enforcing this.
       if (suffix_begin > 1 && ((*s)[suffix_begin - 2] == 'l' || (*s)[suffix_begin - 2] == 'L')) {
         // "ll" = Long long.
         suffix.length = IntegerSuffix::Length::kLongLong;
         suffix_begin -= 2;
       } else {
         // "l" by itself = Long.
         suffix.length = IntegerSuffix::Length::kLong;
         suffix_begin--;
       }
     } else {
       // Not a valid suffix number, stop.
       break;
     }
   }

   *s = s->substr(0, suffix_begin);
   return suffix;
 }

 // The floating-point format we expect is:
 //
 //   <digits> := ("0" - "9") | "_" | "'"
 //
 //   <float> := ( <significand> [<exponent>] [<suffix>] ) |
 //              ( <digis> <exponent> [<suffix>] )
 //
 //   <significant> := ( <digits> "." <digits> ) |
 //                    ( "." <digits> ) |
 //                    ( <digits> "." )
 //
 //   <exponent> := ("e" | "E") [("+" | "-")] <digits>
 //
 //   <suffix> := "f" | "F" | "l" | "L"
 //
 // In other words, a floating point number must have either a "." or an "e", and a "." must have
 // digits on at least one side of it.
 //
 // Rust requires that there be digits before a ".". This is important to disambiguate cases like
 // "tuple.0" as being "tuple dot zero" from "tuple float-zero".
 //
 // TODO(bug 43220) Handle Rust-specific suffixes.
 // TODO(bug 43222) Support C++17 hex floating point literals "0x342.1a"
 size_t GetFloatTokenLength(ExprLanguage lang, std::string_view input) {
   std::string_view cur = input;

   // Digits before the dot.
   size_t before_dot = GetDigitsLength(lang, cur);
   cur = cur.substr(before_dot);
   if (lang == ExprLanguage::kRust & before_dot == 0)
     return 0;

   // "."
   bool has_dot = false;
   if (!cur.empty() && cur[0] == '.') {
     has_dot = true;
     cur = cur.substr(1);
   }
   if (!before_dot && !has_dot)
     return 0;  // Must begin with digits or a dot to be a float.

   // Digits after the dot.
   size_t after_dot = GetDigitsLength(lang, cur);
   cur = cur.substr(after_dot);
   if (has_dot && !before_dot && !after_dot)
     return 0;  // A dot must have digits on at least one side.

   // Optional exponent.
   bool has_exponent = false;
   if (!cur.empty() && IsExponentCharacter(cur[0])) {
     has_exponent = true;
     cur = cur.substr(1);

     if (!cur.empty() && IsSign(cur[0]))
       cur = cur.substr(1);  // Skip optional sign.

     size_t exponent = GetDigitsLength(lang, cur);
     if (!exponent)
       return 0;  // Must have exponent digits to be a float.
     cur = cur.substr(exponent);
   }
   if (!has_dot && !has_exponent)
     return 0;  // Must have a dot or an exponend to be a float.

   // Consider all alphanumeric characters immediately following to be part of the token. This will
   // get any suffix characters but may get garbage also. The tokenizer isn't in charge of validating
   // floating point formatting, and something like "2.3hello" should be considered one invalid
   // floating-point token rather than a valid float followed by a valid identifier.
   while (!cur.empty() && isalnum(cur[0]))
     cur = cur.substr(1);

   return std::distance(input.begin(), cur.begin());
 }

 FloatSuffix StripFloatSuffix(std::string_view* view) {
   if (view->empty())
     return FloatSuffix::kNone;

   char back = view->back();

   if (back == 'f' || back == 'F') {
     *view = view->substr(0, view->size() - 1);
     return FloatSuffix::kFloat;
   }

   if (back == 'l' || back == 'L') {
     *view = view->substr(0, view->size() - 1);
     return FloatSuffix::kLong;
   }

   return FloatSuffix::kNone;
 }

 ErrOrValue ValueForFloatToken(ExprLanguage lang, const ExprToken& token) {
   FX_DCHECK(token.type() == ExprTokenType::kFloat);

   std::string_view value = token.value();
   FloatSuffix suffix = StripFloatSuffix(&value);
   if (lang != ExprLanguage::kC && suffix == FloatSuffix::kLong)
     suffix = FloatSuffix::kNone;  // Only C has a "long double" type.

   // Strip digits separators.
   std::string digits;
   digits.reserve(value.size());
   for (char c : value) {
     if (!IsDigitSeparator(lang, c))
       digits.push_back(c);
   }

   double_conversion::StringToDoubleConverter converter(0, 0.0, nan(""), nullptr, nullptr);

   fxl::RefPtr<BaseType> type;
   std::vector<uint8_t> data;

   int consumed = 0;
   switch (suffix) {
     case FloatSuffix::kNone: {
       double d =
           converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
       data.resize(sizeof(double));
       memcpy(data.data(), &d, sizeof(double));
       type = GetBuiltinFloatType(lang, 8);
       break;
     }
     case FloatSuffix::kFloat: {
       float f = converter.StringToFloat(digits.data(), static_cast<int>(digits.size()), &consumed);
       data.resize(sizeof(float));
       memcpy(data.data(), &f, sizeof(float));
       type = GetBuiltinFloatType(lang, 4);
       break;
     }
     case FloatSuffix::kLong: {
       // The parser doesn't support long doubles, but we can at least upcast if the local system
       // supports it.
       double d =
           converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
       long double ld = d;
       data.resize(sizeof(long double));
       memcpy(data.data(), &ld, sizeof(long double));
       type = GetBuiltinFloatType(lang, data.size());
       break;
     }
   }

   if (consumed != static_cast<int>(digits.size()))
     return Err("Trailing characters on floating-point constant.");

   return ExprValue(std::move(type), std::move(data));
 }

 }  // namespace zxdb
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "src/developer/debug/zxdb/expr/number_parser.h"

	#include <ctype.h>
	#include <math.h>
	#include <stdlib.h>

	#include "src/developer/debug/zxdb/expr/builtin_types.h"
	#include "src/developer/debug/zxdb/expr/expr_token.h"
	#include "src/developer/debug/zxdb/expr/expr_value.h"
	#include "src/developer/debug/zxdb/symbols/base_type.h"
	#include "third_party/double-conversion/double-conversion/double-conversion.h"

	namespace zxdb {

	namespace {

	// Max values converted to a uint64_t.
	constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
	constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
	constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
	constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();

	// Absolute value of the smallest number that can be put in a signed 32-bit number. Be careful, the
	// negative numbers hold one larger than the corresponding positive number which makes it hard to
	// compute.
	constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
	constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;

	// This hardcodes our current 64-bit type scheme where "long" and "long long" are both 64 bits, and
	// "int" is 32. Note that we still support "long long" because it's surprising if you type "0x100ll"
	// and don't get something called "long long" back.
	//
	// C++ has more rules about whether the input has a specific base (hex numbers prefer to be unsigned
	// if possible), and the "l" suffix is particularly weird because it allows matching "unsigned long"
	// while no other decimal numbers will match unsigned types without "u". Our requirements don't need
	// all of these rules so keep things a bit simpler.
	//
	// See: https://en.cppreference.com/w/cpp/language/integer_literal
	struct TypeLookup {
	const char* name;
	size_t byte_size;
	bool type_signed;

	// The largest positive value held by this type.
	uint64_t max_positive;

	// Absolute value of the most negative value held by this type. In the case of unsigned types,
	// this should hold the same value as the corresponding signed type. This allows "-23u" to specify
	// an unsigned version of the type that would normally hold "-23".
	uint64_t max_abs_negative;

	// Maximum suffix this type matches. If the number specifies "l" it will allow "long" or "long
	// long" but not int. Any lengths less than this will not match.
	IntegerSuffix::Length max_suffix;
	} kTypeLookup[] = {
	// clang-format off
	// Name bytes, signed max_positive max_abs_negative, max_suffix
	{"int", 4, true, kSigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
	{"unsigned", 4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
	{"long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
	{"unsigned long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
	{"long long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
	{"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
	// clang-format on
	};

	bool IsDigitSeparator(ExprLanguage lang, char c) {
	switch (lang) {
	case ExprLanguage::kC:
	return c == '\'';
	case ExprLanguage::kRust:
	return c == '_';
	}
	return false;
	}

	// Supports only base 2, 8, 10, and 16.
	bool ValidForBase(ExprLanguage lang, IntegerPrefix::Base base, char c) {
	if (IsDigitSeparator(lang, c))
	return true;

	switch (base) {
	case IntegerPrefix::kBin:
	return c == '0' \|\| c == '1';
	case IntegerPrefix::kOct:
	return c >= '0' && c <= '7';
	case IntegerPrefix::kDec:
	return c >= '0' && c <= '9';
	case IntegerPrefix::kHex:
	return (c >= '0' && c <= '9') \|\| (c >= 'A' && c <= 'F') \|\| (c >= 'a' && c <= 'f');
	}
	return false;
	}

	// Returns the length of a <digits> sequence (also allowing separators) starting at the beginning of
	// the input.
	size_t GetDigitsLength(ExprLanguage lang, std::string_view input) {
	size_t result = 0;
	while (result < input.size() && (isdigit(input[result]) \|\| IsDigitSeparator(lang, input[result])))
	result++;
	return result;
	}

	bool IsExponentCharacter(char c) { return c == 'e' \|\| c == 'E'; }

	bool IsSign(char c) { return c == '+' \|\| c == '-'; }

	} // namespace

	ErrOrValue StringToNumber(ExprLanguage lang, std::string_view str) {
	IntegerPrefix prefix = ExtractIntegerPrefix(&str);
	if (prefix.base == IntegerPrefix::kOct && prefix.octal_type == IntegerPrefix::OctalType::kC) {
	// Require "0o" prefixes for octal numbers instead of allowing C-style "0" prefixes. Octal
	// numbers are very unusual to be typed interactively in a debugger, and it's easier to
	// accidentally copy-and-paste a decimal number with a "0" at the beginning and get surprising
	// results. The "0o" format is used by Rust so we require it for clarity.
	return Err("Octal numbers must be prefixed with '0o'.");
	}

	auto suffix = ExtractIntegerSuffix(&str);
	if (suffix.has_error())
	return suffix.err();

	if (str.empty())
	return Err("Expected a number.");

	// Validate the characters in the number. This prevents strtoull from being too smart and trying
	// to handle prefixes itself. We also remove the separators.
	std::string digits;
	digits.reserve(str.size());
	for (char c : str) {
	if (!ValidForBase(lang, prefix.base, c))
	return Err("Invalid character in number.");
	if (!IsDigitSeparator(lang, c))
	digits.push_back(c);
	}

	// strtoull doesn't take a const ending, but it doesn't modify the input.
	char* digits_end = &digits[digits.size()];
	char* parsed_end = digits_end;

	// This will be the absolute value of the returned number.
	uint64_t abs_value = strtoull(digits.data(), &parsed_end, static_cast<int>(prefix.base));

	// If strtoull stopped early it means it it hit an invalid character (shouldn't happen since we
	// validated above) or maybe the input was too long.
	if (parsed_end != digits_end)
	return Err("Invalid number.");

	// Pick the smallest type that fits the data size as well as satisfies any suffixes.
	const TypeLookup* matched_type = nullptr;
	for (const auto& cur : kTypeLookup) {
	// Type must hold enough data.
	if (prefix.sign == IntegerPrefix::kNegative) {
	if (abs_value > cur.max_abs_negative)
	continue;
	} else {
	if (abs_value > cur.max_positive)
	continue;
	}

	if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.value().length))
	continue; // Requested length is larger.

	if (suffix.value().type_signed == IntegerSuffix::kUnsigned) {
	if (cur.type_signed)
	continue; // Unsigned suffix requires unsigned type.
	} else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
	// Signed input requires a signed type unless a suffix overrode it which was checked above
	// ("-1u" should be unsigned).
	continue;
	}

	matched_type = &cur;
	break;
	}

	if (!matched_type) {
	// Anything not matched above will be an overflow. Put it into a unsigned 64-bit value and
	// tolerate the overflow.
	matched_type = &*(std::end(kTypeLookup) - 1);
	}

	int symbol_tag =
	matched_type->type_signed ? BaseType::kBaseTypeSigned : BaseType::kBaseTypeUnsigned;
	auto type =
	fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size, matched_type->name);

	uint64_t value = prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;

	// Construct the data. This assumes little-endian since it truncates or zero-fills off the right.
	std::vector<uint8_t> data(matched_type->byte_size);
	memcpy(data.data(), &value, matched_type->byte_size);

	return ExprValue(std::move(type), std::move(data));
	}

	IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
	IntegerPrefix prefix;
	if (s->empty())
	return prefix; // Defaults OK for empty string.

	if ((*s)[0] == '-') {
	prefix.sign = IntegerPrefix::kNegative;

	// Allow whitespace between negative sign and the rest.
	size_t sign_len = 1;
	while (sign_len < s->size() && isspace((*s)[sign_len]))
	sign_len++;
	*s = s->substr(sign_len);
	}

	if (s->size() >= 2u && (*s)[0] == '0') {
	char second = (*s)[1];
	if (second == 'x' \|\| second == 'X') {
	// Hex.
	*s = s->substr(2u);
	prefix.base = IntegerPrefix::kHex;
	} else if (second == 'b' \|\| second == 'B') {
	// Binary.
	*s = s->substr(2u);
	prefix.base = IntegerPrefix::kBin;
	} else if (second == 'o' \|\| second == 'O') {
	// Rust-style octal "0o".
	*s = s->substr(2u);
	prefix.base = IntegerPrefix::kOct;
	prefix.octal_type = IntegerPrefix::OctalType::kRust;
	} else {
	// Everything else beginning with a '0' is C-style octal. Note this requires >= 2 digits so
	// that "0" by itself is decimal.
	*s = s->substr(1u);
	prefix.base = IntegerPrefix::kOct;
	prefix.octal_type = IntegerPrefix::OctalType::kC;
	}
	}
	// Else case is decimal, doesn't need trimming, default is already correct.
	return prefix;
	}

	ErrOr<IntegerSuffix> ExtractIntegerSuffix(std::string_view* s) {
	IntegerSuffix suffix;

	// Check for any combination of "u" and either "l" or "ll". This works backwards to avoid two
	// passes since the suffix means the same in either order.
	bool have_unsigned = false;
	bool have_length = false;
	size_t suffix_begin = s->size();
	while (suffix_begin > 0) {
	char prev_char = (*s)[suffix_begin - 1];
	if (prev_char == 'U' \|\| prev_char == 'u') {
	// Unsigned suffix.
	if (have_unsigned)
	return Err("Duplicate 'u' in number suffix.");
	have_unsigned = true;

	suffix.type_signed = IntegerSuffix::kUnsigned;
	suffix_begin--;
	} else if (prev_char == 'L' \|\| prev_char == 'l') {
	// Suffix has an "l", disambiguate based on previous char.
	if (have_length)
	return Err("Duplicate 'l' or 'll' in number suffix.");
	have_length = true;

	// Technically C++ says "Ll" and "lL" aren't allowed, but we don't bother enforcing this.
	if (suffix_begin > 1 && ((s)[suffix_begin - 2] == 'l' \|\| (s)[suffix_begin - 2] == 'L')) {
	// "ll" = Long long.
	suffix.length = IntegerSuffix::Length::kLongLong;
	suffix_begin -= 2;
	} else {
	// "l" by itself = Long.
	suffix.length = IntegerSuffix::Length::kLong;
	suffix_begin--;
	}
	} else {
	// Not a valid suffix number, stop.
	break;
	}
	}

	*s = s->substr(0, suffix_begin);
	return suffix;
	}

	// The floating-point format we expect is:
	//
	// <digits> := ("0" - "9") \| "_" \| "'"
	//
	// <float> := ( <significand> [<exponent>] [<suffix>] ) \|
	// ( <digis> <exponent> [<suffix>] )
	//
	// <significant> := ( <digits> "." <digits> ) \|
	// ( "." <digits> ) \|
	// ( <digits> "." )
	//
	// <exponent> := ("e" \| "E") [("+" \| "-")] <digits>
	//
	// <suffix> := "f" \| "F" \| "l" \| "L"
	//
	// In other words, a floating point number must have either a "." or an "e", and a "." must have
	// digits on at least one side of it.
	//
	// Rust requires that there be digits before a ".". This is important to disambiguate cases like
	// "tuple.0" as being "tuple dot zero" from "tuple float-zero".
	//
	// TODO(bug 43220) Handle Rust-specific suffixes.
	// TODO(bug 43222) Support C++17 hex floating point literals "0x342.1a"
	size_t GetFloatTokenLength(ExprLanguage lang, std::string_view input) {
	std::string_view cur = input;

	// Digits before the dot.
	size_t before_dot = GetDigitsLength(lang, cur);
	cur = cur.substr(before_dot);
	if (lang == ExprLanguage::kRust & before_dot == 0)
	return 0;

	// "."
	bool has_dot = false;
	if (!cur.empty() && cur[0] == '.') {
	has_dot = true;
	cur = cur.substr(1);
	}
	if (!before_dot && !has_dot)
	return 0; // Must begin with digits or a dot to be a float.

	// Digits after the dot.
	size_t after_dot = GetDigitsLength(lang, cur);
	cur = cur.substr(after_dot);
	if (has_dot && !before_dot && !after_dot)
	return 0; // A dot must have digits on at least one side.

	// Optional exponent.
	bool has_exponent = false;
	if (!cur.empty() && IsExponentCharacter(cur[0])) {
	has_exponent = true;
	cur = cur.substr(1);

	if (!cur.empty() && IsSign(cur[0]))
	cur = cur.substr(1); // Skip optional sign.

	size_t exponent = GetDigitsLength(lang, cur);
	if (!exponent)
	return 0; // Must have exponent digits to be a float.
	cur = cur.substr(exponent);
	}
	if (!has_dot && !has_exponent)
	return 0; // Must have a dot or an exponend to be a float.

	// Consider all alphanumeric characters immediately following to be part of the token. This will
	// get any suffix characters but may get garbage also. The tokenizer isn't in charge of validating
	// floating point formatting, and something like "2.3hello" should be considered one invalid
	// floating-point token rather than a valid float followed by a valid identifier.
	while (!cur.empty() && isalnum(cur[0]))
	cur = cur.substr(1);

	return std::distance(input.begin(), cur.begin());
	}

	FloatSuffix StripFloatSuffix(std::string_view* view) {
	if (view->empty())
	return FloatSuffix::kNone;

	char back = view->back();

	if (back == 'f' \|\| back == 'F') {
	*view = view->substr(0, view->size() - 1);
	return FloatSuffix::kFloat;
	}

	if (back == 'l' \|\| back == 'L') {
	*view = view->substr(0, view->size() - 1);
	return FloatSuffix::kLong;
	}

	return FloatSuffix::kNone;
	}

	ErrOrValue ValueForFloatToken(ExprLanguage lang, const ExprToken& token) {
	FX_DCHECK(token.type() == ExprTokenType::kFloat);

	std::string_view value = token.value();
	FloatSuffix suffix = StripFloatSuffix(&value);
	if (lang != ExprLanguage::kC && suffix == FloatSuffix::kLong)
	suffix = FloatSuffix::kNone; // Only C has a "long double" type.

	// Strip digits separators.
	std::string digits;
	digits.reserve(value.size());
	for (char c : value) {
	if (!IsDigitSeparator(lang, c))
	digits.push_back(c);
	}

	double_conversion::StringToDoubleConverter converter(0, 0.0, nan(""), nullptr, nullptr);

	fxl::RefPtr<BaseType> type;
	std::vector<uint8_t> data;

	int consumed = 0;
	switch (suffix) {
	case FloatSuffix::kNone: {
	double d =
	converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
	data.resize(sizeof(double));
	memcpy(data.data(), &d, sizeof(double));
	type = GetBuiltinFloatType(lang, 8);
	break;
	}
	case FloatSuffix::kFloat: {
	float f = converter.StringToFloat(digits.data(), static_cast<int>(digits.size()), &consumed);
	data.resize(sizeof(float));
	memcpy(data.data(), &f, sizeof(float));
	type = GetBuiltinFloatType(lang, 4);
	break;
	}
	case FloatSuffix::kLong: {
	// The parser doesn't support long doubles, but we can at least upcast if the local system
	// supports it.
	double d =
	converter.StringToDouble(digits.data(), static_cast<int>(digits.size()), &consumed);
	long double ld = d;
	data.resize(sizeof(long double));
	memcpy(data.data(), &ld, sizeof(long double));
	type = GetBuiltinFloatType(lang, data.size());
	break;
	}
	}

	if (consumed != static_cast<int>(digits.size()))
	return Err("Trailing characters on floating-point constant.");

	return ExprValue(std::move(type), std::move(data));
	}

	} // namespace zxdb