| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_DEVELOPER_DEBUG_ZXDB_EXPR_EXPR_PARSER_H_ |
| #define SRC_DEVELOPER_DEBUG_ZXDB_EXPR_EXPR_PARSER_H_ |
| |
| #include <memory> |
| #include <vector> |
| |
| #include "src/developer/debug/zxdb/common/err.h" |
| #include "src/developer/debug/zxdb/expr/expr_language.h" |
| #include "src/developer/debug/zxdb/expr/expr_node.h" |
| #include "src/developer/debug/zxdb/expr/expr_token.h" |
| #include "src/developer/debug/zxdb/expr/name_lookup.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_tag.h" |
| |
| namespace zxdb { |
| |
| class ExprParser { |
| public: |
| enum class BlockDelimiter { |
| kExplicit, // Require the block to be surrounded with { }. |
| kImplicit, // No {} needed. If the first thing is a { it will start a nested block. |
| }; |
| enum class BlockEnd { |
| // Stops block parsing at "}", it is not consumed. If end-of-input is reached first, this will |
| // it will report an error. |
| kExplicit, |
| |
| // Stops block parsing when end-of-input is reached. If an "}" is encountered, it will be an |
| // error. |
| kEndOfInput, |
| }; |
| |
| // The name lookup callback can be empty if the caller doesn't have any symbol context. This means |
| // that we can't disambiguate some cases like how to parse "Foo < 1 > bar". In this mode, we'll |
| // assume that "<" after a name always mean a template rather than a comparison operation. |
| ExprParser(std::vector<ExprToken> tokens, ExprLanguage lang, |
| NameLookupCallback name_lookup = NameLookupCallback()); |
| |
| // Returns the root expression node on successful parsing. On error, returns an empty pointer in |
| // which case the error message can be read from err() ad error_token() |
| fxl::RefPtr<ExprNode> ParseExpression(); |
| |
| // Parses a block. { and } will be consumed. The input is counted as being complete. |
| fxl::RefPtr<BlockExprNode> ParseBlock(BlockDelimiter delimiter); |
| |
| // Attempts to parse the given string as an identifier. The returned err indicates whether the |
| // output identifier is valid. |
| static Err ParseIdentifier(const std::string& input, Identifier* output); |
| static Err ParseIdentifier(const std::string& input, ParsedIdentifier* output); |
| |
| // The result of parsing. Since this does not have access to the initial string, it will not |
| // indicate context for the error. That can be generated from the error_token() if desired. |
| const Err& err() const { return err_; } |
| |
| ExprToken error_token() const { return error_token_; } |
| |
| private: |
| struct DispatchInfo; |
| |
| enum StatementEnd { |
| // Requires an explicit statement end according to language rules. For C++ this requires a |
| // semicolon at the end, or that the statement be a block (blocks don't require semicolons at |
| // the end). |
| kExplicit, |
| |
| // Does not consume an end-of-statement marker (";" in C) even if present. |
| kNone, |
| |
| // Allow an explicit ending as above, or and end-of-input to indicate the end of the statement, |
| // as long as the result is syntactically valid. The statement end token will be consumed if |
| // present. |
| kAny, |
| }; |
| |
| enum class EmptyExpression { |
| kAllow, // Allow empty expressions. ParseExpression will return null but not an error. |
| kReject, // Reject empty expressions and report an error. |
| }; |
| |
| // Internal version of ParseExpression that takes the current operator precedence being parsed. |
| // |
| // THIS CAN RETURN NULL if the input is empty or starts with a semicolon. |
| // |
| // When recursively calling this function, call with the same precedence as the current expression |
| // for left-associativity (operators evaluated from left-to-right), and one less for |
| // right-associativity. |
| // |
| // The value optionally pointed to by had_statement_end will be set to true if the statement was |
| // explicitly ended as per the rules for StatementEnd::kExplicit. This allows code that passes |
| // StatementEnd::kAny to determine how the statement was ended. |
| fxl::RefPtr<ExprNode> ParseExpression(int precedence, |
| EmptyExpression empty_expr = EmptyExpression::kReject, |
| StatementEnd statement_end = StatementEnd::kNone, |
| bool* had_statement_end = nullptr); |
| |
| // Parses the contents of a block. Stops when a "}" (it is not consumed) or end of input is hit, |
| // depending on the option parameter. Blocks accept multiple expressions. |
| fxl::RefPtr<BlockExprNode> ParseBlockContents(BlockEnd block_end); |
| |
| // Parses the name of a symbol or a non-type identifier (e.g. a variable name) starting at |
| // cur_token(). |
| // |
| // This is separate from the regular parsing to simplify the structure. These names can be parsed |
| // linearly (we don't go into templates which is where recursion comes in) so the implementation |
| // is more straightforward, and it's nicer to get the handling out of the general "<" token |
| // handler, for example. |
| // |
| // On error, has_error() will be set and an empty ParseNameResult will be returned (with empty |
| // identifier and type). |
| // |
| // The |expand_types| flag indicates if ParseName() should call ParseType() when it identifies a |
| // type name identifier. This will then handle following type modifiers like "*" and "&&". |
| // External callers will want to set this. This flag is set to false when called by ParseType() to |
| // avoid recursive calls. |
| struct ParseNameResult { |
| ParseNameResult() = default; |
| |
| // On success, always contains the identifier name. |
| ParsedIdentifier ident; |
| |
| // When the result is a type, this will contain the resolved type. When null, the result is a |
| // non-type or an error. |
| fxl::RefPtr<Type> type; |
| }; |
| ParseNameResult ParseName(bool expand_types); |
| |
| // Converts the current token to an identifier component. The current token must be a kName or a |
| // kSpecialName token. On error, the error will be set and an empty IdentifierComponent will be |
| // returned. This doesn't handle templates, it is called as part of ParseName to do the full name |
| // parsing. |
| ParsedIdentifierComponent GetIdentifierComponent(); |
| |
| // Parses a type starting at cur_token() and returns it. Returns a null type and sets has_error() |
| // on failure. |
| // |
| // If the optional_base is empty, the whole type will be parsed. Examples of things that it will |
| // handle in this case: "const Foo *", "Bar &", "int". |
| // |
| // This may be called by ParseName when it realizes that it just generated a type. This is |
| // necessary to handle stuff like "*" and "&&" that follow the type name and modify it. In this |
| // case, the optional_base would be the type name corresponding to the identifier (e.g. |
| // "my_ns::MyClass") and the tokens at cur_token() might be "* *" or "&&" or something that's not |
| // a valid type modifier at all (which will mark the type parsing complete). |
| fxl::RefPtr<Type> ParseType(fxl::RefPtr<Type> optional_base); |
| |
| // Parse a Rust Array type name, which is of the form [Type] or [Type; 24] |
| fxl::RefPtr<Type> ParseRustArrayType(); |
| |
| // Parses template parameter lists. The "stop_before" parameter indicates how the list is expected |
| // to end (i.e. ">"). Sets the error on failure. |
| std::vector<std::string> ParseTemplateList(ExprTokenType stop_before); |
| |
| // Parses comma-separated lists of expressions. Runs until the given ending token is found |
| // (normally a ')' for a function call). |
| std::vector<fxl::RefPtr<ExprNode>> ParseExpressionList(ExprTokenType stop_before); |
| |
| // These handlers will be passed a token that was just consumed, so the current state of the |
| // parser will point to the *next* token (or the end). |
| fxl::RefPtr<ExprNode> AmpersandPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> BadToken(const ExprToken& token); |
| fxl::RefPtr<ExprNode> BinaryOpInfix(fxl::RefPtr<ExprNode> left, const ExprToken& token); |
| fxl::RefPtr<ExprNode> DotOrArrowInfix(fxl::RefPtr<ExprNode> left, const ExprToken& token); |
| fxl::RefPtr<ExprNode> LeftParenPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> LeftBracketPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> LeftParenInfix(fxl::RefPtr<ExprNode> left, const ExprToken& token); |
| fxl::RefPtr<ExprNode> LeftSquareInfix(fxl::RefPtr<ExprNode> left, const ExprToken& token); |
| fxl::RefPtr<ExprNode> RustCastInfix(fxl::RefPtr<ExprNode> left, const ExprToken& token); |
| fxl::RefPtr<ExprNode> QuestionInfix(fxl::RefPtr<ExprNode> left, const ExprToken& token); |
| fxl::RefPtr<ExprNode> LiteralPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> UnaryPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> NamePrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> StarPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> CastPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> SizeofPrefix(const ExprToken& token); |
| fxl::RefPtr<ExprNode> IfPrefix(const ExprToken& token); |
| |
| // Returns true if the next token is the given type. |
| bool LookAhead(ExprTokenType type) const; |
| |
| // Returns the next token or the invalid token if nothing is left. Advances to the next token. |
| const ExprToken& Consume(); |
| |
| // Consumes a token of the given type, returning it if there was one available and the type |
| // matches. Otherwise, sets the error condition using the given message and returns a reference to |
| // an invalid token. |
| // |
| // If the error_token is provided (it's not kInvalid type) it will be used to blame the error on. |
| // Otherwise, the next token checked will be blamed. |
| const ExprToken& Consume(ExprTokenType type, const char* error_msg, |
| const ExprToken& error_token = ExprToken()); |
| |
| // Like Consume() but remaps two adjacent ">" tokens to a shift right ">>" operator. This is used |
| // in cases where you need to handle shifts and know that it's not part of an identifier. See |
| // IsCurTokenShiftRight(). |
| ExprToken ConsumeWithShiftTokenConversion(); |
| |
| // Reads a sequence of cv-qualifiers (plus "restrict" for C) and appends to the vector in order. |
| // Only matching tokens are consumed, it stops consuming at the next non-qualifier. |
| // |
| // Duplicate qualifications will trigger errors (has_error() will be set). The input is *not* |
| // reset so this can be used to add qualifiers to an existing set while also triggering errors for |
| // duplicates for the additions. |
| void ConsumeCVQualifier(std::vector<DwarfTag>* qual); |
| |
| // Applies the given type modifier tags to the given input in order and returns the newly |
| // qualified type. |
| fxl::RefPtr<Type> ApplyQualifiers(fxl::RefPtr<Type> input, const std::vector<DwarfTag>& qual); |
| |
| void SetError(const ExprToken& token, Err err); |
| void SetError(const ExprToken& token, std::string msg); |
| |
| // Sets the error referencing the current token if there is one. If the current token is at the |
| // end, " Hit the end of input instead." will be appended to the message. |
| void SetErrorAtCur(std::string msg); |
| |
| // Returns true if the current token is the first of a pair of adjacent ">" tokens that might |
| // compose a shift right token (">>"). Because of ambiguity, the tokenizer always tokenizes these |
| // separately and we have to decide based on context what it is. |
| bool IsCurTokenShiftRight() const; |
| bool IsCurTokenShiftRightEquals() const; |
| |
| // Equivalent to cur_token().precedence except this remaps two adjacent ">" to a ">>" precedence. |
| // See IsCurTokenShiftRight(). |
| int CurPrecedenceWithShiftTokenConversion() const; |
| |
| // Call this only if !at_end(). |
| const ExprToken& cur_token() const { return tokens_[cur_]; } |
| |
| bool has_error() const { return err_.has_error(); } |
| bool at_end() const { return cur_ == tokens_.size(); } |
| |
| static const DispatchInfo& DispatchForToken(const ExprToken& token); |
| static DispatchInfo kDispatchInfo[]; |
| |
| ExprLanguage language_; |
| |
| // Possibly null, see constructor. |
| NameLookupCallback name_lookup_callback_; |
| |
| std::vector<ExprToken> tokens_; |
| size_t cur_ = 0; // Current index into tokens_. |
| |
| // On error, the message and token where an error was encountered. |
| Err err_; |
| ExprToken error_token_; |
| |
| // This is a kInvalid token that we can return in error cases without having to reference |
| // something in the tokens_ array. |
| static const ExprToken kInvalidToken; |
| }; |
| |
| } // namespace zxdb |
| |
| #endif // SRC_DEVELOPER_DEBUG_ZXDB_EXPR_EXPR_PARSER_H_ |