| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_DEVELOPER_DEBUG_ZXDB_EXPR_EXPR_TOKENIZER_H_ |
| #define SRC_DEVELOPER_DEBUG_ZXDB_EXPR_EXPR_TOKENIZER_H_ |
| |
| #include <string> |
| #include <string_view> |
| #include <vector> |
| |
| #include "src/developer/debug/zxdb/common/err.h" |
| #include "src/developer/debug/zxdb/expr/expr_language.h" |
| #include "src/developer/debug/zxdb/expr/expr_token.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_lang.h" |
| |
| namespace zxdb { |
| |
| class ExprTokenizer { |
| public: |
| explicit ExprTokenizer(const std::string& input, ExprLanguage lang = ExprLanguage::kC); |
| |
| // Returns true on successful tokenizing. In this case, the tokens can be read from tokens(). On |
| // failure, err() will contain the error message, and error_location() will contain the error |
| // location. |
| bool Tokenize(); |
| |
| const std::string& input() const { return input_; } |
| |
| ExprLanguage language() const { return language_; } |
| |
| // The result of parsing. This will be multiline and will indicate the location of the problem. |
| const Err& err() const { return err_; } |
| |
| // When err is set, this will be the index into the input() string where the |
| // error occurred. |
| size_t error_location() const { return error_location_; } |
| |
| // When parsing is successful, this contains the extracted tokens. |
| const std::vector<ExprToken>& tokens() const { return tokens_; } |
| |
| std::vector<ExprToken> TakeTokens() { return std::move(tokens_); } |
| |
| // Returns the number of bytes that start at the given input that are valid name tokens. |
| // If the input does not begin with a name token, this will return 0. |
| static size_t GetNameTokenLength(ExprLanguage lang, std::string_view input); |
| |
| // Returns whether the input is a valid unescaped name token. This does no trimming of whitespace |
| // and does not accept "$" escaping. An empty string is not a valid name token. |
| static bool IsNameToken(ExprLanguage lang, std::string_view input); |
| |
| // Returns two context lines for an error message. It will quote a relevant portion of the input |
| // showing the byte offset, and add a "^" on the next line to indicate where the error is. |
| static std::string GetErrorContext(const std::string& input, size_t byte_offset); |
| |
| private: |
| void AdvanceChars(int n); |
| void AdvanceOneChar(); |
| void AdvanceToNextToken(); |
| void AdvanceToEndOfToken(const ExprTokenRecord& record); |
| |
| bool IsCurrentWhitespace() const; |
| |
| // Returns true if the next characters in the buffer match the static value of the given token |
| // record. If the token is alphanumeric, requires that the end of the token be nonalphanumeric. |
| bool CurrentMatchesTokenRecord(const ExprTokenRecord& record) const; |
| |
| const ExprTokenRecord& ClassifyCurrent(); |
| |
| // Checks for a comment beginning at the cur_char(). If it is one, appends a token for the entire |
| // comment contents and returns true. Returns false if a comment does not begin here. |
| bool HandleComment(); |
| |
| bool done() const { return at_end() || has_error(); } |
| bool has_error() const { return err_.has_error(); } |
| bool at_end() const { return cur_ == input_.size(); } |
| char cur_char() const { return input_[cur_]; } |
| bool can_advance(int n) const { return cur_ + n <= input_.size(); } |
| |
| std::string input_; |
| ExprLanguage language_; |
| |
| size_t cur_ = 0; // Character offset into input_. |
| |
| Err err_; |
| size_t error_location_ = 0; |
| |
| std::vector<ExprToken> tokens_; |
| }; |
| |
| } // namespace zxdb |
| |
| #endif // SRC_DEVELOPER_DEBUG_ZXDB_EXPR_EXPR_TOKENIZER_H_ |