| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "garnet/bin/zxdb/expr/expr_tokenizer.h" |
| |
| #include <ctype.h> |
| |
| #include "lib/fxl/logging.h" |
| #include "lib/fxl/strings/string_printf.h" |
| |
| namespace zxdb { |
| |
| namespace { |
| |
| bool IsNameFirstChar(char c) { |
| return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; |
| } |
| |
| bool IsNameContinuingChar(char c) { |
| return IsNameFirstChar(c) || (c >= '0' && c <= '9'); |
| } |
| |
| bool IsIntegerFirstChar(char c) { return isdigit(c); } |
| |
| // This allows all alphanumeric characters for simplicity. Integer literals |
| // aren't validated at the tokenizer level and will be checked later. Our job |
| // is to find the extent of the literal. |
| bool IsIntegerContinuingChar(char c) { return isalnum(c); } |
| |
| } // namespace |
| |
| ExprTokenizer::ExprTokenizer(const std::string& input) : input_(input) {} |
| |
| bool ExprTokenizer::Tokenize() { |
| while (!done()) { |
| AdvanceToNextToken(); |
| if (done()) |
| break; |
| |
| ExprTokenType type = ClassifyCurrent(); |
| if (has_error()) |
| break; |
| |
| size_t token_begin = cur_; |
| AdvanceToEndOfToken(type); |
| if (has_error()) |
| break; |
| |
| size_t token_end = cur_; |
| std::string token_value(&input_[token_begin], token_end - token_begin); |
| tokens_.emplace_back(type, token_value, token_begin); |
| } |
| return !has_error(); |
| } |
| |
| // static |
| std::string ExprTokenizer::GetErrorContext(const std::string& input, |
| size_t byte_offset) { |
| // Index should be in range of the input string. Also allow indicating one |
| // character past the end. |
| FXL_DCHECK(byte_offset <= input.size()); |
| |
| // Future enhancements: |
| // - If we allow multiline expressions in the, the returned context should |
| // not cross newlines or it will be messed up. |
| // - Input longer than 80 chars should be clipped to guarantee it doesn't |
| // wrap. |
| |
| std::string output; |
| output = " " + input + "\n "; |
| output.append(byte_offset, ' '); |
| output.push_back('^'); |
| return output; |
| } |
| |
| void ExprTokenizer::AdvanceChars(int n) { cur_ += n; } |
| |
| void ExprTokenizer::AdvanceOneChar() { cur_++; } |
| |
| void ExprTokenizer::AdvanceToNextToken() { |
| while (!at_end() && IsCurrentWhitespace()) |
| AdvanceOneChar(); |
| } |
| |
| void ExprTokenizer::AdvanceToEndOfToken(ExprTokenType type) { |
| switch (type) { |
| case ExprTokenType::kInteger: |
| do { |
| AdvanceOneChar(); |
| } while (!at_end() && IsIntegerContinuingChar(cur_char())); |
| break; |
| |
| case ExprTokenType::kName: |
| do { |
| AdvanceOneChar(); |
| } while (!at_end() && IsNameContinuingChar(cur_char())); |
| break; |
| |
| case ExprTokenType::kArrow: |
| case ExprTokenType::kColonColon: |
| case ExprTokenType::kEquality: |
| case ExprTokenType::kDoubleAnd: |
| case ExprTokenType::kLogicalOr: |
| // The classification code should already have validated there were two |
| // characters available. |
| AdvanceOneChar(); |
| AdvanceOneChar(); |
| break; |
| |
| case ExprTokenType::kEquals: |
| case ExprTokenType::kDot: |
| case ExprTokenType::kComma: |
| case ExprTokenType::kStar: |
| case ExprTokenType::kAmpersand: |
| case ExprTokenType::kBitwiseOr: |
| case ExprTokenType::kLeftSquare: |
| case ExprTokenType::kRightSquare: |
| case ExprTokenType::kLeftParen: |
| case ExprTokenType::kRightParen: |
| case ExprTokenType::kLess: |
| case ExprTokenType::kGreater: |
| case ExprTokenType::kMinus: |
| case ExprTokenType::kPlus: |
| AdvanceOneChar(); // All are one char. |
| break; |
| |
| // If we add too many more keywords we should have a more flexible system |
| // rather than hardcoding all lengths here. |
| case ExprTokenType::kTrue: |
| AdvanceChars(4); |
| break; |
| case ExprTokenType::kFalse: |
| AdvanceChars(5); |
| break; |
| case ExprTokenType::kConst: |
| AdvanceChars(5); |
| break; |
| case ExprTokenType::kVolatile: |
| AdvanceChars(8); |
| break; |
| case ExprTokenType::kRestrict: |
| AdvanceChars(8); |
| break; |
| |
| case ExprTokenType::kInvalid: |
| case ExprTokenType::kNumTypes: |
| FXL_NOTREACHED(); |
| err_ = Err("Internal parser error."); |
| error_location_ = cur_; |
| break; |
| } |
| } |
| |
| bool ExprTokenizer::IsCurrentString(std::string_view s) const { |
| if (!can_advance(s.size() - 1)) |
| return false; |
| for (size_t i = 0; i < s.size(); i++) { |
| if (input_[cur_ + i] != s[i]) |
| return false; |
| } |
| return true; |
| } |
| |
| bool ExprTokenizer::IsCurrentName(std::string_view s) const { |
| if (!IsCurrentString(s)) |
| return false; |
| return input_.size() == cur_ + s.size() || // End of buffer. |
| !IsNameContinuingChar(input_[cur_ + s.size()]); // Non-name char. |
| } |
| |
| bool ExprTokenizer::IsCurrentWhitespace() const { |
| FXL_DCHECK(!at_end()); |
| char c = input_[cur_]; |
| return c == 0x0A || c == 0x0D || c == 0x20; |
| } |
| |
| ExprTokenType ExprTokenizer::ClassifyCurrent() { |
| FXL_DCHECK(!at_end()); |
| char cur = cur_char(); |
| |
| // Numbers. |
| if (IsIntegerFirstChar(cur)) |
| return ExprTokenType::kInteger; |
| |
| // Words. |
| if (IsNameFirstChar(cur)) { |
| // Check for special keywords. |
| if (IsCurrentName("true")) |
| return ExprTokenType::kTrue; |
| else if (IsCurrentName("false")) |
| return ExprTokenType::kFalse; |
| else if (IsCurrentName("const")) |
| return ExprTokenType::kConst; |
| else if (IsCurrentName("volatile")) |
| return ExprTokenType::kVolatile; |
| else if (IsCurrentName("restrict")) |
| return ExprTokenType::kRestrict; |
| |
| // Everything else is a general name. |
| return ExprTokenType::kName; |
| } |
| |
| // Punctuation. |
| switch (cur) { |
| case '-': |
| // Hyphen could be itself or an arrow, look ahead. |
| if (can_advance()) { |
| if (input_[cur_ + 1] == '>') |
| return ExprTokenType::kArrow; |
| } |
| // Anything else is a standalone hyphen. |
| return ExprTokenType::kMinus; |
| case '=': |
| // Check for "==". |
| if (can_advance()) { |
| if (input_[cur_ + 1] == '=') |
| return ExprTokenType::kEquality; |
| } |
| return ExprTokenType::kEquals; |
| case '.': |
| return ExprTokenType::kDot; |
| case ',': |
| return ExprTokenType::kComma; |
| case '*': |
| return ExprTokenType::kStar; |
| case '&': |
| // Check for "&&". |
| if (can_advance()) { |
| if (input_[cur_ + 1] == '&') |
| return ExprTokenType::kDoubleAnd; |
| } |
| return ExprTokenType::kAmpersand; |
| case '|': |
| // Check for "||". |
| if (can_advance()) { |
| if (input_[cur_ + 1] == '|') |
| return ExprTokenType::kLogicalOr; |
| } |
| return ExprTokenType::kBitwiseOr; |
| case '[': |
| return ExprTokenType::kLeftSquare; |
| case ']': |
| return ExprTokenType::kRightSquare; |
| case '(': |
| return ExprTokenType::kLeftParen; |
| case ')': |
| return ExprTokenType::kRightParen; |
| case '<': |
| return ExprTokenType::kLess; |
| case '>': |
| return ExprTokenType::kGreater; |
| case ':': |
| // Currently only support colons as part of "::", look ahead. |
| if (can_advance()) { |
| if (input_[cur_ + 1] == ':') |
| return ExprTokenType::kColonColon; |
| } |
| // Any other use of colon is an error. |
| error_location_ = cur_; |
| err_ = Err("Invalid standalone ':' in expression.\n" + |
| GetErrorContext(input_, cur_)); |
| return ExprTokenType::kInvalid; |
| default: |
| error_location_ = cur_; |
| err_ = Err( |
| fxl::StringPrintf("Invalid character '%c' in expression.\n", cur) + |
| GetErrorContext(input_, cur_)); |
| return ExprTokenType::kInvalid; |
| } |
| } |
| |
| } // namespace zxdb |