| // Copyright 2020 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_ |
| #define SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_ |
| |
| #include <iostream> |
| #include <memory> |
| #include <ostream> |
| #include <string> |
| |
| #include "src/lib/fidl_codec/semantic.h" |
| |
| namespace fidl_codec { |
| |
| class InterfaceMethod; |
| class LibraryLoader; |
| |
| namespace semantic { |
| |
| // Defines a location within a buffer. |
| class Location { |
| public: |
| Location(const std::string& buffer, std::string::const_iterator location) |
| : buffer_(buffer), location_(location) {} |
| |
| const std::string& buffer() const { return buffer_; } |
| std::string::const_iterator location() const { return location_; } |
| |
| private: |
| // Reference to the buffer which contains the text we are parsing. |
| const std::string& buffer_; |
| // Location within this buffer. |
| const std::string::const_iterator location_; |
| }; |
| |
| // Handles the parser errors. |
| class ParserErrors { |
| public: |
| explicit ParserErrors(std::ostream& os = std::cerr) : os_(os) {} |
| |
| int error_count() const { return error_count_; } |
| bool has_error() const { return error_count_ > 0; } |
| |
| // Adds a global error (not associated to a location in the buffer). |
| std::ostream& AddError(); |
| |
| // Adds an error at the specified location. |
| std::ostream& AddError(const Location& location); |
| |
| private: |
| // The stream which receives the errors. |
| std::ostream& os_; |
| // The number of errors which have been generated. |
| int error_count_ = 0; |
| }; |
| |
| // All the lexical tokens we can reduce. |
| enum class LexicalToken { |
| // The end of the file has been found. |
| kEof, |
| // An identifier. If allow_dots_in_indentifiers is true, an identifier can contain dots. |
| kIdentifier, |
| // A string (delimited by single quotes). |
| kString, |
| // A left brace: { |
| kLeftBrace, |
| // A right brace: } |
| kRightBrace, |
| // A left parenthesis: ( |
| kLeftParenthesis, |
| // A right parenthesis: ) |
| kRightParenthesis, |
| // One colon: : |
| kColon, |
| // Two colons: :: |
| kColonColon, |
| // A comma: , |
| kComma, |
| // A dot: . |
| kDot, |
| // The equal sign: = |
| kEqual, |
| // A semicolon: ; |
| kSemicolon, |
| // A slash: / |
| kSlash |
| }; |
| |
| // Parser for the language which defines semantic rules for FIDL methods. |
| class SemanticParser { |
| public: |
| SemanticParser(LibraryLoader* library_loader, const std::string& buffer, ParserErrors* errors) |
| : library_loader_(library_loader), buffer_(buffer), errors_(errors) { |
| next_ = buffer_.begin(); |
| NextLexicalToken(); |
| } |
| |
| // Returns the location of the current lexical token. |
| Location GetLocation() const { return Location(buffer_, current_location_); } |
| |
| // Adds an error at the current lexical token location. |
| std::ostream& AddError() { return errors_->AddError(GetLocation()); } |
| |
| // Reduce the next lexical token. The parser always has a current not used yet lexical token |
| // reduced by NextLexicalToken. |
| void NextLexicalToken(); |
| |
| // Skips text until a semicolon or a right brace are found. If a semicolon or a right brace are |
| // found, the parsing continues before the semicolon or the right brace. |
| void JumpToSemicolon(); |
| // Skips text until a semicolon or a right brace are found. If a semicolon is found, the parsing |
| // continues after the semicolon. If a right brace is found, the parsing continues before the |
| // right brace. |
| void SkipSemicolon(); |
| // Skips text until a semicolon or a right brace are found. The parsing continue after the |
| // semicolon or the right brace. If an embeded block is found (delimited by left and right |
| // braces), the block is skipped. |
| void SkipBlock(); |
| // Skips text until a right brace is found. The parsing continue after the right brace. If an |
| // embeded block is found (delimited by left and right braces), the block is skipped. |
| void SkipRightBrace(); |
| // Skips text until a right parenthesis is found. The parsing continue after the right |
| // parenthesis. If an embeded block is found (delimited by left and right braces or left and right |
| // parentheses), the block is skipped. If a semicolon is found, the parsing resumes before the |
| // semicolon. |
| void SkipRightParenthesis(); |
| |
| // Helpers to check that we currently have a specified lexical token. |
| bool Is(std::string_view keyword) { return IsIdentifier() && (current_string_ == keyword); } |
| bool IsEof() const { return current_lexical_token_ == LexicalToken::kEof; } |
| bool IsIdentifier() const { return current_lexical_token_ == LexicalToken::kIdentifier; } |
| bool IsString() const { return current_lexical_token_ == LexicalToken::kString; } |
| bool IsLeftBrace() const { return current_lexical_token_ == LexicalToken::kLeftBrace; } |
| bool IsRightBrace() const { return current_lexical_token_ == LexicalToken::kRightBrace; } |
| bool IsRightParenthesis() const { |
| return current_lexical_token_ == LexicalToken::kRightParenthesis; |
| } |
| bool IsColonColon() const { return current_lexical_token_ == LexicalToken::kColonColon; } |
| bool IsDot() const { return current_lexical_token_ == LexicalToken::kDot; } |
| bool IsEqual() const { return current_lexical_token_ == LexicalToken::kEqual; } |
| bool IsSemicolon() const { return current_lexical_token_ == LexicalToken::kSemicolon; } |
| bool IsSlash() const { return current_lexical_token_ == LexicalToken::kSlash; } |
| |
| // Helpers to check and consume a specified lexical token. |
| bool Consume(std::string_view keyword) { |
| bool result = IsIdentifier() && (current_string_ == keyword); |
| if (result) { |
| NextLexicalToken(); |
| } |
| return result; |
| } |
| bool Consume(LexicalToken token) { |
| bool result = current_lexical_token_ == token; |
| if (result) { |
| NextLexicalToken(); |
| } |
| return result; |
| } |
| bool ConsumeLeftBrace() { return Consume(LexicalToken::kLeftBrace); } |
| bool ConsumeRightBrace() { return Consume(LexicalToken::kRightBrace); } |
| bool ConsumeLeftParenthesis() { return Consume(LexicalToken::kLeftParenthesis); } |
| bool ConsumeRightParenthesis() { return Consume(LexicalToken::kRightParenthesis); } |
| bool ConsumeColon() { return Consume(LexicalToken::kColon); } |
| bool ConsumeDot() { return Consume(LexicalToken::kDot); } |
| bool ConsumeEqual() { return Consume(LexicalToken::kEqual); } |
| bool ConsumeSemicolon() { return Consume(LexicalToken::kSemicolon); } |
| bool ConsumeSlash() { return Consume(LexicalToken::kSlash); } |
| |
| // Helpers to check and consume a specified lexical token. If the token is not found, an error is |
| // generated. |
| bool Parse(std::string_view keyword) { |
| bool result = IsIdentifier() && (current_string_ == keyword); |
| if (result) { |
| NextLexicalToken(); |
| } else { |
| AddError() << "Keyword '" << keyword << "' expected.\n"; |
| } |
| return result; |
| } |
| bool Parse(LexicalToken token, std::string_view token_string) { |
| bool result = current_lexical_token_ == token; |
| if (result) { |
| NextLexicalToken(); |
| } else { |
| AddError() << "Symbol '" << token_string << "' expected.\n"; |
| } |
| return result; |
| } |
| bool ParseLeftBrace() { return Parse(LexicalToken::kLeftBrace, "{"); } |
| bool ParseRightBrace() { return Parse(LexicalToken::kRightBrace, "}"); } |
| bool ParseLeftParenthesis() { return Parse(LexicalToken::kLeftParenthesis, "("); } |
| bool ParseRightParenthesis() { return Parse(LexicalToken::kRightParenthesis, ")"); } |
| bool ParseColon() { return Parse(LexicalToken::kColon, ":"); } |
| bool ParseColonColon() { return Parse(LexicalToken::kColonColon, "::"); } |
| bool ParseComma() { return Parse(LexicalToken::kComma, ","); } |
| bool ParseEqual() { return Parse(LexicalToken::kEqual, "="); } |
| bool ParseSemicolon() { return Parse(LexicalToken::kSemicolon, ";"); } |
| |
| // Returns the current string. Escaped characters are resolved. |
| // Then it advances to the next lexical item. |
| std::string ConsumeString(); |
| |
| // Parses a file which contains handle semantic rules. |
| void ParseSemantic(); |
| // Parses a library block. |
| void ParseLibrary(); |
| // Parses a method (semantic rules and short displays). |
| void ParseMethod(InterfaceMethod* method); |
| // Parses an expression to display. |
| std::unique_ptr<DisplayExpression> ParseDisplayExpression(); |
| // Parses an assignment (that is a semantic rule). |
| void ParseAssignment(MethodSemantic* method_semantic); |
| // Parses an expression. |
| std::unique_ptr<Expression> ParseExpression(); |
| // Parses a multiplicative expression (a factor). |
| std::unique_ptr<Expression> ParseMultiplicativeExpression(); |
| // Parses a field access expression. |
| std::unique_ptr<Expression> ParseAccessExpression(); |
| // Parses terminal expressions. |
| std::unique_ptr<Expression> ParseTerminalExpression(); |
| // Parses a handle description: HandleDescription(type, path). |
| std::unique_ptr<Expression> ParseHandleDescription(); |
| |
| private: |
| // Lexical reduction of an identifier. |
| void LexerIdentifier(); |
| |
| // Lexical reduction of a string. |
| void LexerString(); |
| |
| // The library loader for which we are parsing the semantic rules. The field semantic from |
| // protocol methods is assigned when a rule is parsed. |
| LibraryLoader* const library_loader_; |
| // The text buffer we are currently parsing. |
| const std::string& buffer_; |
| // The error handling object. |
| ParserErrors* errors_; |
| // Location in the buffer of the last lexical token reduced by NextLexicalToken. |
| std::string::const_iterator current_location_; |
| // Next location in the buffer which will be analyzed by NextLexicalToken. |
| std::string::const_iterator next_; |
| // Last lexical token reduced by NextLexicalToken. |
| LexicalToken current_lexical_token_ = LexicalToken::kEof; |
| // For LexicalToken::kIdentifier, the value of the identifier. |
| std::string_view current_string_; |
| // When this field is true, LexerIdentifier accepts dots within the identifiers. This is used to |
| // be able to parse library names like "fuchsia.shell". |
| bool allow_dots_in_identifiers_ = false; |
| // True when we are doing error recovery to ignore unknown characters. |
| bool ignore_unknown_characters_ = false; |
| |
| // Used to define a scope for which unknown characters are ignored. |
| class IgnoreUnknownCharacters { |
| public: |
| IgnoreUnknownCharacters(SemanticParser* parser) |
| : parser_(parser), saved_value_(parser_->ignore_unknown_characters_) { |
| parser->ignore_unknown_characters_ = true; |
| } |
| ~IgnoreUnknownCharacters() { parser_->ignore_unknown_characters_ = saved_value_; } |
| |
| private: |
| SemanticParser* const parser_; |
| const bool saved_value_; |
| }; |
| }; |
| |
| } // namespace semantic |
| } // namespace fidl_codec |
| |
| #endif // SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_ |