src/developer/debug/zxdb/expr/number_parser.cc - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "src/developer/debug/zxdb/expr/number_parser.h"

 #include <ctype.h>
 #include <stdlib.h>

 #include "src/developer/debug/zxdb/expr/expr_value.h"
 #include "src/developer/debug/zxdb/symbols/base_type.h"

 namespace zxdb {

 namespace {

 // Max values converted to a uint64_t.
 constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
 constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
 constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
 constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();

 // Absolute value of the smallest number that can be put in a signed 32-bit
 // number. Be careful, the negative numbers hold one larger than the
 // corresponding positive number which makes it hard to compute.
 constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
 constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;

 // This hardcodes our current 64-bit type scheme where "long" and "long long"
 // are both 64 bits, and "int" is 32. Note that we still support "long long"
 // because it's surprising if you type "0x100ll" and don't get something called
 // "long long" back.
 //
 // C++ has more rules about whether the input has a specific base (hex numbers
 // prefer to be unsigned if possible), and the "l" suffix is particularly weird
 // because it allows matching "unsigned long" while no other decimal numbers
 // will match unsigned types without "u". Our requirements don't need all of
 // these rules so keep things a bit simpler.
 //
 // See: https://en.cppreference.com/w/cpp/language/integer_literal
 struct TypeLookup {
   const char* name;
   size_t byte_size;
   bool type_signed;

   // The largest positive value held by this type.
   uint64_t max_positive;

   // Absolute value of the most negative value held by this type. In the case
   // of unsigned types, this should hold the same value as the corresponding
   // signed type. This allows "-23u" to specify an unsigned version of the type
   // that would normally hold "-23".
   uint64_t max_abs_negative;

   // Maximum suffix this type matches. If the number specifies "l" it will
   // allow "long" or "long long" but not int. Any lengths less than this will
   // not match.
   IntegerSuffix::Length max_suffix;
 } kTypeLookup[] = {
     // clang-format off
     // Name            bytes, signed max_positive    max_abs_negative,   max_suffix
     {"int",                4, true,  kSigned32Max,   kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
     {"unsigned",           4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
     {"long",               8, true,  kSigned64Max,   kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
     {"unsigned long",      8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
     {"long long",          8, true,  kSigned64Max,   kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
     {"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
     // clang-format on
 };

 // Supports only base 2, 8, 10, and 16.
 bool ValidForBase(IntegerPrefix::Base base, char c) {
   switch (base) {
     case IntegerPrefix::kBin:
       return c == '0' || c == '1';
     case IntegerPrefix::kOct:
       return c >= '0' && c <= '7';
     case IntegerPrefix::kDec:
       return c >= '0' && c <= '9';
     case IntegerPrefix::kHex:
       return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
              (c >= 'a' && c <= 'f');
   }
   return false;
 }

 }  // namespace

 Err StringToNumber(std::string_view str, ExprValue* output) {
   IntegerPrefix prefix = ExtractIntegerPrefix(&str);
   if (prefix.base == IntegerPrefix::kOct &&
       prefix.octal_type == IntegerPrefix::OctalType::kC) {
     // Require "0o" prefixes for octal numbers instead of allowing C-style
     // "0" prefixes. Octal numbers are very unusual to be typed interactively
     // in a debugger, and it's easier to accidentally copy-and-paste a decimal
     // number with a "0" at the beginning and get surprising results. The
     // "0o" format is used by Rust so we require it for clarity.
     return Err("Octal numbers must be prefixed with '0o'.");
   }

   IntegerSuffix suffix;
   Err err = ExtractIntegerSuffix(&str, &suffix);
   if (err.has_error())
     return err;

   if (str.empty())
     return Err("Expected a number.");

   // Validate the characters in the number. This prevents strtoull from
   // being too smart and trying to handle prefixes itself.
   for (char c : str) {
     if (!ValidForBase(prefix.base, c))
       return Err("Invalid character in number.");
   }

   // strtoull doesn't take a const ending, but it doesn't modify the input.
   char* str_end = const_cast<char*>(str.end());
   char* parsed_end = str_end;

   // This will be the absolute value of the returned number.
   uint64_t abs_value =
       strtoull(str.data(), &parsed_end, static_cast<int>(prefix.base));

   // If strtoull stopped early it means it it hit an invalid character
   // (shouldn't happen since we validated above) or maybe the input was too
   // long.
   if (parsed_end != str_end)
     return Err("Invalid number.");

   // Pick the smallest type that fits the data size as well as satisfies any
   // suffixes.
   const TypeLookup* matched_type = nullptr;
   for (const auto& cur : kTypeLookup) {
     // Type must hold enough data.
     if (prefix.sign == IntegerPrefix::kNegative) {
       if (abs_value > cur.max_abs_negative)
         continue;
     } else {
       if (abs_value > cur.max_positive)
         continue;
     }

     if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.length))
       continue;  // Requested length is larger.

     if (suffix.type_signed == IntegerSuffix::kUnsigned) {
       if (cur.type_signed)
         continue;  // Unsigned suffix requires unsigned type.
     } else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
       // Signed input requires a signed type unless the a suffix overrode it
       // which was checked above ("-1u" should be unsigned).
       continue;
     }

     matched_type = &cur;
     break;
   }

   if (!matched_type) {
     // Anything not matched above will be an overflow. Put it into a unsigned
     // 64-bit value and tolerate the overflow.
     matched_type = &*(std::end(kTypeLookup) - 1);
   }

   int symbol_tag = matched_type->type_signed ? BaseType::kBaseTypeSigned
                                              : BaseType::kBaseTypeUnsigned;
   auto type = fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size,
                                             matched_type->name);

   uint64_t value =
       prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;

   // Construct the data. This assumes little-endian since it truncates or
   // zero-fills off the right.
   std::vector<uint8_t> data(matched_type->byte_size);
   memcpy(&data[0], &value, matched_type->byte_size);

   *output = ExprValue(std::move(type), std::move(data));
   return Err();
 }

 IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
   IntegerPrefix prefix;
   if (s->empty())
     return prefix;  // Defaults OK for empty string.

   if ((*s)[0] == '-') {
     prefix.sign = IntegerPrefix::kNegative;

     // Allow whitespace between negative sign and the rest.
     size_t sign_len = 1;
     while (sign_len < s->size() && isspace((*s)[sign_len]))
       sign_len++;
     *s = s->substr(sign_len);
   }

   if (s->size() >= 2u && (*s)[0] == '0') {
     char second = (*s)[1];
     if (second == 'x' || second == 'X') {
       // Hex.
       *s = s->substr(2u);
       prefix.base = IntegerPrefix::kHex;
     } else if (second == 'b' || second == 'B') {
       // Binary.
       *s = s->substr(2u);
       prefix.base = IntegerPrefix::kBin;
     } else if (second == 'o' || second == 'O') {
       // Rust-style octal "0o".
       *s = s->substr(2u);
       prefix.base = IntegerPrefix::kOct;
       prefix.octal_type = IntegerPrefix::OctalType::kRust;
     } else {
       // Everything else beginning with a '0' is C-style octal. Note this
       // requires >= 2 digits so that "0" by itself is decimal.
       *s = s->substr(1u);
       prefix.base = IntegerPrefix::kOct;
       prefix.octal_type = IntegerPrefix::OctalType::kC;
     }
   }
   // Else case is decimal, doesn't need trimming, default is already correct.
   return prefix;
 }

 Err ExtractIntegerSuffix(std::string_view* s, IntegerSuffix* suffix) {
   *suffix = IntegerSuffix();

   // Check for any combination of "u" and either "l" or "ll". This works
   // backwards to avoid two passes since the suffix means the same in either
   // order.
   bool have_unsigned = false;
   bool have_length = false;
   size_t suffix_begin = s->size();
   while (suffix_begin > 0) {
     char prev_char = (*s)[suffix_begin - 1];
     if (prev_char == 'U' || prev_char == 'u') {
       // Unsigned suffix.
       if (have_unsigned)
         return Err("Duplicate 'u' in number suffix.");
       have_unsigned = true;

       suffix->type_signed = IntegerSuffix::kUnsigned;
       suffix_begin--;
     } else if (prev_char == 'L' || prev_char == 'l') {
       // Suffix has an "l", disambiguate based on previous char.
       if (have_length)
         return Err("Duplicate 'l' or 'll' in number suffix.");
       have_length = true;

       // Technically C++ says "Ll" and "lL" aren't allowed, but we don't
       // bother enforcing this.
       if (suffix_begin > 1 &&
           ((*s)[suffix_begin - 2] == 'l' || (*s)[suffix_begin - 2] == 'L')) {
         // "ll" = Long long.
         suffix->length = IntegerSuffix::Length::kLongLong;
         suffix_begin -= 2;
       } else {
         // "l" by itself = Long.
         suffix->length = IntegerSuffix::Length::kLong;
         suffix_begin--;
       }
     } else {
       // Not a valid suffix number, stop.
       break;
     }
   }

   *s = s->substr(0, suffix_begin);
   return Err();
 }

 }  // namespace zxdb
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "src/developer/debug/zxdb/expr/number_parser.h"

	#include <ctype.h>
	#include <stdlib.h>

	#include "src/developer/debug/zxdb/expr/expr_value.h"
	#include "src/developer/debug/zxdb/symbols/base_type.h"

	namespace zxdb {

	namespace {

	// Max values converted to a uint64_t.
	constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
	constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
	constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
	constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();

	// Absolute value of the smallest number that can be put in a signed 32-bit
	// number. Be careful, the negative numbers hold one larger than the
	// corresponding positive number which makes it hard to compute.
	constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
	constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;

	// This hardcodes our current 64-bit type scheme where "long" and "long long"
	// are both 64 bits, and "int" is 32. Note that we still support "long long"
	// because it's surprising if you type "0x100ll" and don't get something called
	// "long long" back.
	//
	// C++ has more rules about whether the input has a specific base (hex numbers
	// prefer to be unsigned if possible), and the "l" suffix is particularly weird
	// because it allows matching "unsigned long" while no other decimal numbers
	// will match unsigned types without "u". Our requirements don't need all of
	// these rules so keep things a bit simpler.
	//
	// See: https://en.cppreference.com/w/cpp/language/integer_literal
	struct TypeLookup {
	const char* name;
	size_t byte_size;
	bool type_signed;

	// The largest positive value held by this type.
	uint64_t max_positive;

	// Absolute value of the most negative value held by this type. In the case
	// of unsigned types, this should hold the same value as the corresponding
	// signed type. This allows "-23u" to specify an unsigned version of the type
	// that would normally hold "-23".
	uint64_t max_abs_negative;

	// Maximum suffix this type matches. If the number specifies "l" it will
	// allow "long" or "long long" but not int. Any lengths less than this will
	// not match.
	IntegerSuffix::Length max_suffix;
	} kTypeLookup[] = {
	// clang-format off
	// Name bytes, signed max_positive max_abs_negative, max_suffix
	{"int", 4, true, kSigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
	{"unsigned", 4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
	{"long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
	{"unsigned long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
	{"long long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
	{"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
	// clang-format on
	};

	// Supports only base 2, 8, 10, and 16.
	bool ValidForBase(IntegerPrefix::Base base, char c) {
	switch (base) {
	case IntegerPrefix::kBin:
	return c == '0' \|\| c == '1';
	case IntegerPrefix::kOct:
	return c >= '0' && c <= '7';
	case IntegerPrefix::kDec:
	return c >= '0' && c <= '9';
	case IntegerPrefix::kHex:
	return (c >= '0' && c <= '9') \|\| (c >= 'A' && c <= 'F') \|\|
	(c >= 'a' && c <= 'f');
	}
	return false;
	}

	} // namespace

	Err StringToNumber(std::string_view str, ExprValue* output) {
	IntegerPrefix prefix = ExtractIntegerPrefix(&str);
	if (prefix.base == IntegerPrefix::kOct &&
	prefix.octal_type == IntegerPrefix::OctalType::kC) {
	// Require "0o" prefixes for octal numbers instead of allowing C-style
	// "0" prefixes. Octal numbers are very unusual to be typed interactively
	// in a debugger, and it's easier to accidentally copy-and-paste a decimal
	// number with a "0" at the beginning and get surprising results. The
	// "0o" format is used by Rust so we require it for clarity.
	return Err("Octal numbers must be prefixed with '0o'.");
	}

	IntegerSuffix suffix;
	Err err = ExtractIntegerSuffix(&str, &suffix);
	if (err.has_error())
	return err;

	if (str.empty())
	return Err("Expected a number.");

	// Validate the characters in the number. This prevents strtoull from
	// being too smart and trying to handle prefixes itself.
	for (char c : str) {
	if (!ValidForBase(prefix.base, c))
	return Err("Invalid character in number.");
	}

	// strtoull doesn't take a const ending, but it doesn't modify the input.
	char* str_end = const_cast<char*>(str.end());
	char* parsed_end = str_end;

	// This will be the absolute value of the returned number.
	uint64_t abs_value =
	strtoull(str.data(), &parsed_end, static_cast<int>(prefix.base));

	// If strtoull stopped early it means it it hit an invalid character
	// (shouldn't happen since we validated above) or maybe the input was too
	// long.
	if (parsed_end != str_end)
	return Err("Invalid number.");

	// Pick the smallest type that fits the data size as well as satisfies any
	// suffixes.
	const TypeLookup* matched_type = nullptr;
	for (const auto& cur : kTypeLookup) {
	// Type must hold enough data.
	if (prefix.sign == IntegerPrefix::kNegative) {
	if (abs_value > cur.max_abs_negative)
	continue;
	} else {
	if (abs_value > cur.max_positive)
	continue;
	}

	if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.length))
	continue; // Requested length is larger.

	if (suffix.type_signed == IntegerSuffix::kUnsigned) {
	if (cur.type_signed)
	continue; // Unsigned suffix requires unsigned type.
	} else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
	// Signed input requires a signed type unless the a suffix overrode it
	// which was checked above ("-1u" should be unsigned).
	continue;
	}

	matched_type = &cur;
	break;
	}

	if (!matched_type) {
	// Anything not matched above will be an overflow. Put it into a unsigned
	// 64-bit value and tolerate the overflow.
	matched_type = &*(std::end(kTypeLookup) - 1);
	}

	int symbol_tag = matched_type->type_signed ? BaseType::kBaseTypeSigned
	: BaseType::kBaseTypeUnsigned;
	auto type = fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size,
	matched_type->name);

	uint64_t value =
	prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;

	// Construct the data. This assumes little-endian since it truncates or
	// zero-fills off the right.
	std::vector<uint8_t> data(matched_type->byte_size);
	memcpy(&data[0], &value, matched_type->byte_size);

	*output = ExprValue(std::move(type), std::move(data));
	return Err();
	}

	IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
	IntegerPrefix prefix;
	if (s->empty())
	return prefix; // Defaults OK for empty string.

	if ((*s)[0] == '-') {
	prefix.sign = IntegerPrefix::kNegative;

	// Allow whitespace between negative sign and the rest.
	size_t sign_len = 1;
	while (sign_len < s->size() && isspace((*s)[sign_len]))
	sign_len++;
	*s = s->substr(sign_len);
	}

	if (s->size() >= 2u && (*s)[0] == '0') {
	char second = (*s)[1];
	if (second == 'x' \|\| second == 'X') {
	// Hex.
	*s = s->substr(2u);
	prefix.base = IntegerPrefix::kHex;
	} else if (second == 'b' \|\| second == 'B') {
	// Binary.
	*s = s->substr(2u);
	prefix.base = IntegerPrefix::kBin;
	} else if (second == 'o' \|\| second == 'O') {
	// Rust-style octal "0o".
	*s = s->substr(2u);
	prefix.base = IntegerPrefix::kOct;
	prefix.octal_type = IntegerPrefix::OctalType::kRust;
	} else {
	// Everything else beginning with a '0' is C-style octal. Note this
	// requires >= 2 digits so that "0" by itself is decimal.
	*s = s->substr(1u);
	prefix.base = IntegerPrefix::kOct;
	prefix.octal_type = IntegerPrefix::OctalType::kC;
	}
	}
	// Else case is decimal, doesn't need trimming, default is already correct.
	return prefix;
	}

	Err ExtractIntegerSuffix(std::string_view* s, IntegerSuffix* suffix) {
	*suffix = IntegerSuffix();

	// Check for any combination of "u" and either "l" or "ll". This works
	// backwards to avoid two passes since the suffix means the same in either
	// order.
	bool have_unsigned = false;
	bool have_length = false;
	size_t suffix_begin = s->size();
	while (suffix_begin > 0) {
	char prev_char = (*s)[suffix_begin - 1];
	if (prev_char == 'U' \|\| prev_char == 'u') {
	// Unsigned suffix.
	if (have_unsigned)
	return Err("Duplicate 'u' in number suffix.");
	have_unsigned = true;

	suffix->type_signed = IntegerSuffix::kUnsigned;
	suffix_begin--;
	} else if (prev_char == 'L' \|\| prev_char == 'l') {
	// Suffix has an "l", disambiguate based on previous char.
	if (have_length)
	return Err("Duplicate 'l' or 'll' in number suffix.");
	have_length = true;

	// Technically C++ says "Ll" and "lL" aren't allowed, but we don't
	// bother enforcing this.
	if (suffix_begin > 1 &&
	((s)[suffix_begin - 2] == 'l' \|\| (s)[suffix_begin - 2] == 'L')) {
	// "ll" = Long long.
	suffix->length = IntegerSuffix::Length::kLongLong;
	suffix_begin -= 2;
	} else {
	// "l" by itself = Long.
	suffix->length = IntegerSuffix::Length::kLong;
	suffix_begin--;
	}
	} else {
	// Not a valid suffix number, stop.
	break;
	}
	}

	*s = s->substr(0, suffix_begin);
	return Err();
	}

	} // namespace zxdb