blob: 95ac1866c95c89313eb3cff10a910fb571f34ea1 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/developer/debug/zxdb/expr/number_parser.h"
#include <ctype.h>
#include <stdlib.h>
#include "src/developer/debug/zxdb/expr/expr_value.h"
#include "src/developer/debug/zxdb/symbols/base_type.h"
namespace zxdb {
namespace {
// Max values converted to a uint64_t.
constexpr uint64_t kSigned32Max = std::numeric_limits<int32_t>::max();
constexpr uint64_t kSigned64Max = std::numeric_limits<int64_t>::max();
constexpr uint64_t kUnsigned32Max = std::numeric_limits<uint32_t>::max();
constexpr uint64_t kUnsigned64Max = std::numeric_limits<uint64_t>::max();
// Absolute value of the smallest number that can be put in a signed 32-bit
// number. Be careful, the negative numbers hold one larger than the
// corresponding positive number which makes it hard to compute.
constexpr uint64_t kSigned32MaxAbsNeg = 0x80000000;
constexpr uint64_t kSigned64MaxAbsNeg = 0x8000000000000000ul;
// This hardcodes our current 64-bit type scheme where "long" and "long long"
// are both 64 bits, and "int" is 32. Note that we still support "long long"
// because it's surprising if you type "0x100ll" and don't get something called
// "long long" back.
//
// C++ has more rules about whether the input has a specific base (hex numbers
// prefer to be unsigned if possible), and the "l" suffix is particularly weird
// because it allows matching "unsigned long" while no other decimal numbers
// will match unsigned types without "u". Our requirements don't need all of
// these rules so keep things a bit simpler.
//
// See: https://en.cppreference.com/w/cpp/language/integer_literal
struct TypeLookup {
const char* name;
size_t byte_size;
bool type_signed;
// The largest positive value held by this type.
uint64_t max_positive;
// Absolute value of the most negative value held by this type. In the case
// of unsigned types, this should hold the same value as the corresponding
// signed type. This allows "-23u" to specify an unsigned version of the type
// that would normally hold "-23".
uint64_t max_abs_negative;
// Maximum suffix this type matches. If the number specifies "l" it will
// allow "long" or "long long" but not int. Any lengths less than this will
// not match.
IntegerSuffix::Length max_suffix;
} kTypeLookup[] = {
// clang-format off
// Name bytes, signed max_positive max_abs_negative, max_suffix
{"int", 4, true, kSigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
{"unsigned", 4, false, kUnsigned32Max, kSigned32MaxAbsNeg, IntegerSuffix::Length::kInteger},
{"long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
{"unsigned long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLong},
{"long long", 8, true, kSigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
{"unsigned long long", 8, false, kUnsigned64Max, kSigned64MaxAbsNeg, IntegerSuffix::Length::kLongLong},
// clang-format on
};
// Supports only base 2, 8, 10, and 16.
bool ValidForBase(IntegerPrefix::Base base, char c) {
switch (base) {
case IntegerPrefix::kBin:
return c == '0' || c == '1';
case IntegerPrefix::kOct:
return c >= '0' && c <= '7';
case IntegerPrefix::kDec:
return c >= '0' && c <= '9';
case IntegerPrefix::kHex:
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f');
}
return false;
}
} // namespace
Err StringToNumber(std::string_view str, ExprValue* output) {
IntegerPrefix prefix = ExtractIntegerPrefix(&str);
if (prefix.base == IntegerPrefix::kOct &&
prefix.octal_type == IntegerPrefix::OctalType::kC) {
// Require "0o" prefixes for octal numbers instead of allowing C-style
// "0" prefixes. Octal numbers are very unusual to be typed interactively
// in a debugger, and it's easier to accidentally copy-and-paste a decimal
// number with a "0" at the beginning and get surprising results. The
// "0o" format is used by Rust so we require it for clarity.
return Err("Octal numbers must be prefixed with '0o'.");
}
IntegerSuffix suffix;
Err err = ExtractIntegerSuffix(&str, &suffix);
if (err.has_error())
return err;
if (str.empty())
return Err("Expected a number.");
// Validate the characters in the number. This prevents strtoull from
// being too smart and trying to handle prefixes itself.
for (char c : str) {
if (!ValidForBase(prefix.base, c))
return Err("Invalid character in number.");
}
// strtoull doesn't take a const ending, but it doesn't modify the input.
char* str_end = const_cast<char*>(str.end());
char* parsed_end = str_end;
// This will be the absolute value of the returned number.
uint64_t abs_value =
strtoull(str.data(), &parsed_end, static_cast<int>(prefix.base));
// If strtoull stopped early it means it it hit an invalid character
// (shouldn't happen since we validated above) or maybe the input was too
// long.
if (parsed_end != str_end)
return Err("Invalid number.");
// Pick the smallest type that fits the data size as well as satisfies any
// suffixes.
const TypeLookup* matched_type = nullptr;
for (const auto& cur : kTypeLookup) {
// Type must hold enough data.
if (prefix.sign == IntegerPrefix::kNegative) {
if (abs_value > cur.max_abs_negative)
continue;
} else {
if (abs_value > cur.max_positive)
continue;
}
if (static_cast<int>(cur.max_suffix) < static_cast<int>(suffix.length))
continue; // Requested length is larger.
if (suffix.type_signed == IntegerSuffix::kUnsigned) {
if (cur.type_signed)
continue; // Unsigned suffix requires unsigned type.
} else if (prefix.sign == IntegerPrefix::kNegative && !cur.type_signed) {
// Signed input requires a signed type unless the a suffix overrode it
// which was checked above ("-1u" should be unsigned).
continue;
}
matched_type = &cur;
break;
}
if (!matched_type) {
// Anything not matched above will be an overflow. Put it into a unsigned
// 64-bit value and tolerate the overflow.
matched_type = &*(std::end(kTypeLookup) - 1);
}
int symbol_tag = matched_type->type_signed ? BaseType::kBaseTypeSigned
: BaseType::kBaseTypeUnsigned;
auto type = fxl::MakeRefCounted<BaseType>(symbol_tag, matched_type->byte_size,
matched_type->name);
uint64_t value =
prefix.sign == IntegerPrefix::kNegative ? -abs_value : abs_value;
// Construct the data. This assumes little-endian since it truncates or
// zero-fills off the right.
std::vector<uint8_t> data(matched_type->byte_size);
memcpy(&data[0], &value, matched_type->byte_size);
*output = ExprValue(std::move(type), std::move(data));
return Err();
}
IntegerPrefix ExtractIntegerPrefix(std::string_view* s) {
IntegerPrefix prefix;
if (s->empty())
return prefix; // Defaults OK for empty string.
if ((*s)[0] == '-') {
prefix.sign = IntegerPrefix::kNegative;
// Allow whitespace between negative sign and the rest.
size_t sign_len = 1;
while (sign_len < s->size() && isspace((*s)[sign_len]))
sign_len++;
*s = s->substr(sign_len);
}
if (s->size() >= 2u && (*s)[0] == '0') {
char second = (*s)[1];
if (second == 'x' || second == 'X') {
// Hex.
*s = s->substr(2u);
prefix.base = IntegerPrefix::kHex;
} else if (second == 'b' || second == 'B') {
// Binary.
*s = s->substr(2u);
prefix.base = IntegerPrefix::kBin;
} else if (second == 'o' || second == 'O') {
// Rust-style octal "0o".
*s = s->substr(2u);
prefix.base = IntegerPrefix::kOct;
prefix.octal_type = IntegerPrefix::OctalType::kRust;
} else {
// Everything else beginning with a '0' is C-style octal. Note this
// requires >= 2 digits so that "0" by itself is decimal.
*s = s->substr(1u);
prefix.base = IntegerPrefix::kOct;
prefix.octal_type = IntegerPrefix::OctalType::kC;
}
}
// Else case is decimal, doesn't need trimming, default is already correct.
return prefix;
}
Err ExtractIntegerSuffix(std::string_view* s, IntegerSuffix* suffix) {
*suffix = IntegerSuffix();
// Check for any combination of "u" and either "l" or "ll". This works
// backwards to avoid two passes since the suffix means the same in either
// order.
bool have_unsigned = false;
bool have_length = false;
size_t suffix_begin = s->size();
while (suffix_begin > 0) {
char prev_char = (*s)[suffix_begin - 1];
if (prev_char == 'U' || prev_char == 'u') {
// Unsigned suffix.
if (have_unsigned)
return Err("Duplicate 'u' in number suffix.");
have_unsigned = true;
suffix->type_signed = IntegerSuffix::kUnsigned;
suffix_begin--;
} else if (prev_char == 'L' || prev_char == 'l') {
// Suffix has an "l", disambiguate based on previous char.
if (have_length)
return Err("Duplicate 'l' or 'll' in number suffix.");
have_length = true;
// Technically C++ says "Ll" and "lL" aren't allowed, but we don't
// bother enforcing this.
if (suffix_begin > 1 &&
((*s)[suffix_begin - 2] == 'l' || (*s)[suffix_begin - 2] == 'L')) {
// "ll" = Long long.
suffix->length = IntegerSuffix::Length::kLongLong;
suffix_begin -= 2;
} else {
// "l" by itself = Long.
suffix->length = IntegerSuffix::Length::kLong;
suffix_begin--;
}
} else {
// Not a valid suffix number, stop.
break;
}
}
*s = s->substr(0, suffix_begin);
return Err();
}
} // namespace zxdb