src/lib/fidl_codec/semantic_parser.h - fuchsia - Git at Google

 // Copyright 2020 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_
 #define SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_

 #include <iostream>
 #include <memory>
 #include <ostream>
 #include <string>

 #include "src/lib/fidl_codec/semantic.h"

 namespace fidl_codec {

 class ProtocolMethod;
 class LibraryLoader;

 namespace semantic {

 // Defines a location within a buffer.
 class Location {
  public:
   Location(const std::string& buffer, std::string::const_iterator location)
       : buffer_(buffer), location_(location) {}

   const std::string& buffer() const { return buffer_; }
   std::string::const_iterator location() const { return location_; }

  private:
   // Reference to the buffer which contains the text we are parsing.
   const std::string& buffer_;
   // Location within this buffer.
   const std::string::const_iterator location_;
 };

 // Handles the parser errors.
 class ParserErrors {
  public:
   explicit ParserErrors(std::ostream& os = std::cerr) : os_(os) {}

   int error_count() const { return error_count_; }
   bool has_error() const { return error_count_ > 0; }

   // Adds a global error (not associated to a location in the buffer).
   std::ostream& AddError();

   // Adds an error at the specified location.
   std::ostream& AddError(const Location& location);

  private:
   // The stream which receives the errors.
   std::ostream& os_;
   // The number of errors which have been generated.
   int error_count_ = 0;
 };

 // All the lexical tokens we can reduce.
 enum class LexicalToken {
   // The end of the file has been found.
   kEof,
   // An identifier. If allow_dots_in_indentifiers is true, an identifier can contain dots.
   kIdentifier,
   // A string (delimited by single quotes).
   kString,
   // A left brace: {
   kLeftBrace,
   // A right brace: }
   kRightBrace,
   // A left parenthesis: (
   kLeftParenthesis,
   // A right parenthesis: )
   kRightParenthesis,
   // One colon: :
   kColon,
   // Two colons: ::
   kColonColon,
   // A comma: ,
   kComma,
   // A dot: .
   kDot,
   // The equal sign: =
   kEqual,
   // A semicolon: ;
   kSemicolon,
   // A slash: /
   kSlash
 };

 // Parser for the language which defines semantic rules for FIDL methods.
 class SemanticParser {
  public:
   SemanticParser(LibraryLoader* library_loader, const std::string& buffer, ParserErrors* errors)
       : library_loader_(library_loader), buffer_(buffer), errors_(errors) {
     next_ = buffer_.begin();
     NextLexicalToken();
   }

   // Returns the location of the current lexical token.
   Location GetLocation() const { return Location(buffer_, current_location_); }

   // Adds an error at the current lexical token location.
   std::ostream& AddError() { return errors_->AddError(GetLocation()); }

   // Reduce the next lexical token. The parser always has a current not used yet lexical token
   // reduced by NextLexicalToken.
   void NextLexicalToken();

   // Skips text until a semicolon or a right brace are found. If a semicolon or a right brace are
   // found, the parsing continues before the semicolon or the right brace.
   void JumpToSemicolon();
   // Skips text until a semicolon or a right brace are found. If a semicolon is found, the parsing
   // continues after the semicolon. If a right brace is found, the parsing continues before the
   // right brace.
   void SkipSemicolon();
   // Skips text until a semicolon or a right brace are found. The parsing continue after the
   // semicolon or the right brace. If an embeded block is found (delimited by left and right
   // braces), the block is skipped.
   void SkipBlock();
   // Skips text until a right brace is found. The parsing continue after the right brace. If an
   // embeded block is found (delimited by left and right braces), the block is skipped.
   void SkipRightBrace();
   // Skips text until a right parenthesis is found. The parsing continue after the right
   // parenthesis. If an embeded block is found (delimited by left and right braces or left and right
   // parentheses), the block is skipped. If a semicolon is found, the parsing resumes before the
   // semicolon.
   void SkipRightParenthesis();

   // Helpers to check that we currently have a specified lexical token.
   bool Is(std::string_view keyword) { return IsIdentifier() && (current_string_ == keyword); }
   bool IsEof() const { return current_lexical_token_ == LexicalToken::kEof; }
   bool IsIdentifier() const { return current_lexical_token_ == LexicalToken::kIdentifier; }
   bool IsString() const { return current_lexical_token_ == LexicalToken::kString; }
   bool IsLeftBrace() const { return current_lexical_token_ == LexicalToken::kLeftBrace; }
   bool IsRightBrace() const { return current_lexical_token_ == LexicalToken::kRightBrace; }
   bool IsRightParenthesis() const {
     return current_lexical_token_ == LexicalToken::kRightParenthesis;
   }
   bool IsColonColon() const { return current_lexical_token_ == LexicalToken::kColonColon; }
   bool IsDot() const { return current_lexical_token_ == LexicalToken::kDot; }
   bool IsEqual() const { return current_lexical_token_ == LexicalToken::kEqual; }
   bool IsSemicolon() const { return current_lexical_token_ == LexicalToken::kSemicolon; }
   bool IsSlash() const { return current_lexical_token_ == LexicalToken::kSlash; }

   // Helpers to check and consume a specified lexical token.
   bool Consume(std::string_view keyword) {
     bool result = IsIdentifier() && (current_string_ == keyword);
     if (result) {
       NextLexicalToken();
     }
     return result;
   }
   bool Consume(LexicalToken token) {
     bool result = current_lexical_token_ == token;
     if (result) {
       NextLexicalToken();
     }
     return result;
   }
   bool ConsumeLeftBrace() { return Consume(LexicalToken::kLeftBrace); }
   bool ConsumeRightBrace() { return Consume(LexicalToken::kRightBrace); }
   bool ConsumeLeftParenthesis() { return Consume(LexicalToken::kLeftParenthesis); }
   bool ConsumeRightParenthesis() { return Consume(LexicalToken::kRightParenthesis); }
   bool ConsumeColon() { return Consume(LexicalToken::kColon); }
   bool ConsumeDot() { return Consume(LexicalToken::kDot); }
   bool ConsumeEqual() { return Consume(LexicalToken::kEqual); }
   bool ConsumeSemicolon() { return Consume(LexicalToken::kSemicolon); }
   bool ConsumeSlash() { return Consume(LexicalToken::kSlash); }

   // Helpers to check and consume a specified lexical token. If the token is not found, an error is
   // generated.
   bool Parse(std::string_view keyword) {
     bool result = IsIdentifier() && (current_string_ == keyword);
     if (result) {
       NextLexicalToken();
     } else {
       AddError() << "Keyword '" << keyword << "' expected.\n";
     }
     return result;
   }
   bool Parse(LexicalToken token, std::string_view token_string) {
     bool result = current_lexical_token_ == token;
     if (result) {
       NextLexicalToken();
     } else {
       AddError() << "Symbol '" << token_string << "' expected.\n";
     }
     return result;
   }
   bool ParseLeftBrace() { return Parse(LexicalToken::kLeftBrace, "{"); }
   bool ParseRightBrace() { return Parse(LexicalToken::kRightBrace, "}"); }
   bool ParseLeftParenthesis() { return Parse(LexicalToken::kLeftParenthesis, "("); }
   bool ParseRightParenthesis() { return Parse(LexicalToken::kRightParenthesis, ")"); }
   bool ParseColon() { return Parse(LexicalToken::kColon, ":"); }
   bool ParseColonColon() { return Parse(LexicalToken::kColonColon, "::"); }
   bool ParseComma() { return Parse(LexicalToken::kComma, ","); }
   bool ParseEqual() { return Parse(LexicalToken::kEqual, "="); }
   bool ParseSemicolon() { return Parse(LexicalToken::kSemicolon, ";"); }

   // Returns the current string. Escaped characters are resolved.
   // Then it advances to the next lexical item.
   std::string ConsumeString();

   // Parses a file which contains handle semantic rules.
   void ParseSemantic();
   // Parses a library block.
   void ParseLibrary();
   // Parses a method (semantic rules and short displays).
   void ParseMethod(ProtocolMethod* method);
   // Parses an expression to display.
   std::unique_ptr<DisplayExpression> ParseDisplayExpression();
   // Parses an assignment (that is a semantic rule).
   void ParseAssignment(MethodSemantic* method_semantic);
   // Parses an expression.
   std::unique_ptr<Expression> ParseExpression();
   // Parses a multiplicative expression (a factor).
   std::unique_ptr<Expression> ParseMultiplicativeExpression();
   // Parses a field access expression.
   std::unique_ptr<Expression> ParseAccessExpression();
   // Parses terminal expressions.
   std::unique_ptr<Expression> ParseTerminalExpression();
   // Parses a handle description: HandleDescription(type, path).
   std::unique_ptr<Expression> ParseHandleDescription();

  private:
   // Lexical reduction of an identifier.
   void LexerIdentifier();

   // Lexical reduction of a string.
   void LexerString();

   // The library loader for which we are parsing the semantic rules. The field semantic from
   // protocol methods is assigned when a rule is parsed.
   LibraryLoader* const library_loader_;
   // The text buffer we are currently parsing.
   const std::string& buffer_;
   // The error handling object.
   ParserErrors* errors_;
   // Location in the buffer of the last lexical token reduced by NextLexicalToken.
   std::string::const_iterator current_location_;
   // Next location in the buffer which will be analyzed by NextLexicalToken.
   std::string::const_iterator next_;
   // Last lexical token reduced by NextLexicalToken.
   LexicalToken current_lexical_token_ = LexicalToken::kEof;
   // For LexicalToken::kIdentifier, the value of the identifier.
   std::string_view current_string_;
   // When this field is true, LexerIdentifier accepts dots within the identifiers. This is used to
   // be able to parse library names like "fuchsia.shell".
   bool allow_dots_in_identifiers_ = false;
   // True when we are doing error recovery to ignore unknown characters.
   bool ignore_unknown_characters_ = false;

   // Used to define a scope for which unknown characters are ignored.
   class IgnoreUnknownCharacters {
    public:
     IgnoreUnknownCharacters(SemanticParser* parser)
         : parser_(parser), saved_value_(parser_->ignore_unknown_characters_) {
       parser->ignore_unknown_characters_ = true;
     }
     ~IgnoreUnknownCharacters() { parser_->ignore_unknown_characters_ = saved_value_; }

    private:
     SemanticParser* const parser_;
     const bool saved_value_;
   };
 };

 }  // namespace semantic
 }  // namespace fidl_codec

 #endif  // SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_
	// Copyright 2020 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_
	#define SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_

	#include <iostream>
	#include <memory>
	#include <ostream>
	#include <string>

	#include "src/lib/fidl_codec/semantic.h"

	namespace fidl_codec {

	class ProtocolMethod;
	class LibraryLoader;

	namespace semantic {

	// Defines a location within a buffer.
	class Location {
	public:
	Location(const std::string& buffer, std::string::const_iterator location)
	: buffer_(buffer), location_(location) {}

	const std::string& buffer() const { return buffer_; }
	std::string::const_iterator location() const { return location_; }

	private:
	// Reference to the buffer which contains the text we are parsing.
	const std::string& buffer_;
	// Location within this buffer.
	const std::string::const_iterator location_;
	};

	// Handles the parser errors.
	class ParserErrors {
	public:
	explicit ParserErrors(std::ostream& os = std::cerr) : os_(os) {}

	int error_count() const { return error_count_; }
	bool has_error() const { return error_count_ > 0; }

	// Adds a global error (not associated to a location in the buffer).
	std::ostream& AddError();

	// Adds an error at the specified location.
	std::ostream& AddError(const Location& location);

	private:
	// The stream which receives the errors.
	std::ostream& os_;
	// The number of errors which have been generated.
	int error_count_ = 0;
	};

	// All the lexical tokens we can reduce.
	enum class LexicalToken {
	// The end of the file has been found.
	kEof,
	// An identifier. If allow_dots_in_indentifiers is true, an identifier can contain dots.
	kIdentifier,
	// A string (delimited by single quotes).
	kString,
	// A left brace: {
	kLeftBrace,
	// A right brace: }
	kRightBrace,
	// A left parenthesis: (
	kLeftParenthesis,
	// A right parenthesis: )
	kRightParenthesis,
	// One colon: :
	kColon,
	// Two colons: ::
	kColonColon,
	// A comma: ,
	kComma,
	// A dot: .
	kDot,
	// The equal sign: =
	kEqual,
	// A semicolon: ;
	kSemicolon,
	// A slash: /
	kSlash
	};

	// Parser for the language which defines semantic rules for FIDL methods.
	class SemanticParser {
	public:
	SemanticParser(LibraryLoader* library_loader, const std::string& buffer, ParserErrors* errors)
	: library_loader_(library_loader), buffer_(buffer), errors_(errors) {
	next_ = buffer_.begin();
	NextLexicalToken();
	}

	// Returns the location of the current lexical token.
	Location GetLocation() const { return Location(buffer_, current_location_); }

	// Adds an error at the current lexical token location.
	std::ostream& AddError() { return errors_->AddError(GetLocation()); }

	// Reduce the next lexical token. The parser always has a current not used yet lexical token
	// reduced by NextLexicalToken.
	void NextLexicalToken();

	// Skips text until a semicolon or a right brace are found. If a semicolon or a right brace are
	// found, the parsing continues before the semicolon or the right brace.
	void JumpToSemicolon();
	// Skips text until a semicolon or a right brace are found. If a semicolon is found, the parsing
	// continues after the semicolon. If a right brace is found, the parsing continues before the
	// right brace.
	void SkipSemicolon();
	// Skips text until a semicolon or a right brace are found. The parsing continue after the
	// semicolon or the right brace. If an embeded block is found (delimited by left and right
	// braces), the block is skipped.
	void SkipBlock();
	// Skips text until a right brace is found. The parsing continue after the right brace. If an
	// embeded block is found (delimited by left and right braces), the block is skipped.
	void SkipRightBrace();
	// Skips text until a right parenthesis is found. The parsing continue after the right
	// parenthesis. If an embeded block is found (delimited by left and right braces or left and right
	// parentheses), the block is skipped. If a semicolon is found, the parsing resumes before the
	// semicolon.
	void SkipRightParenthesis();

	// Helpers to check that we currently have a specified lexical token.
	bool Is(std::string_view keyword) { return IsIdentifier() && (current_string_ == keyword); }
	bool IsEof() const { return current_lexical_token_ == LexicalToken::kEof; }
	bool IsIdentifier() const { return current_lexical_token_ == LexicalToken::kIdentifier; }
	bool IsString() const { return current_lexical_token_ == LexicalToken::kString; }
	bool IsLeftBrace() const { return current_lexical_token_ == LexicalToken::kLeftBrace; }
	bool IsRightBrace() const { return current_lexical_token_ == LexicalToken::kRightBrace; }
	bool IsRightParenthesis() const {
	return current_lexical_token_ == LexicalToken::kRightParenthesis;
	}
	bool IsColonColon() const { return current_lexical_token_ == LexicalToken::kColonColon; }
	bool IsDot() const { return current_lexical_token_ == LexicalToken::kDot; }
	bool IsEqual() const { return current_lexical_token_ == LexicalToken::kEqual; }
	bool IsSemicolon() const { return current_lexical_token_ == LexicalToken::kSemicolon; }
	bool IsSlash() const { return current_lexical_token_ == LexicalToken::kSlash; }

	// Helpers to check and consume a specified lexical token.
	bool Consume(std::string_view keyword) {
	bool result = IsIdentifier() && (current_string_ == keyword);
	if (result) {
	NextLexicalToken();
	}
	return result;
	}
	bool Consume(LexicalToken token) {
	bool result = current_lexical_token_ == token;
	if (result) {
	NextLexicalToken();
	}
	return result;
	}
	bool ConsumeLeftBrace() { return Consume(LexicalToken::kLeftBrace); }
	bool ConsumeRightBrace() { return Consume(LexicalToken::kRightBrace); }
	bool ConsumeLeftParenthesis() { return Consume(LexicalToken::kLeftParenthesis); }
	bool ConsumeRightParenthesis() { return Consume(LexicalToken::kRightParenthesis); }
	bool ConsumeColon() { return Consume(LexicalToken::kColon); }
	bool ConsumeDot() { return Consume(LexicalToken::kDot); }
	bool ConsumeEqual() { return Consume(LexicalToken::kEqual); }
	bool ConsumeSemicolon() { return Consume(LexicalToken::kSemicolon); }
	bool ConsumeSlash() { return Consume(LexicalToken::kSlash); }

	// Helpers to check and consume a specified lexical token. If the token is not found, an error is
	// generated.
	bool Parse(std::string_view keyword) {
	bool result = IsIdentifier() && (current_string_ == keyword);
	if (result) {
	NextLexicalToken();
	} else {
	AddError() << "Keyword '" << keyword << "' expected.\n";
	}
	return result;
	}
	bool Parse(LexicalToken token, std::string_view token_string) {
	bool result = current_lexical_token_ == token;
	if (result) {
	NextLexicalToken();
	} else {
	AddError() << "Symbol '" << token_string << "' expected.\n";
	}
	return result;
	}
	bool ParseLeftBrace() { return Parse(LexicalToken::kLeftBrace, "{"); }
	bool ParseRightBrace() { return Parse(LexicalToken::kRightBrace, "}"); }
	bool ParseLeftParenthesis() { return Parse(LexicalToken::kLeftParenthesis, "("); }
	bool ParseRightParenthesis() { return Parse(LexicalToken::kRightParenthesis, ")"); }
	bool ParseColon() { return Parse(LexicalToken::kColon, ":"); }
	bool ParseColonColon() { return Parse(LexicalToken::kColonColon, "::"); }
	bool ParseComma() { return Parse(LexicalToken::kComma, ","); }
	bool ParseEqual() { return Parse(LexicalToken::kEqual, "="); }
	bool ParseSemicolon() { return Parse(LexicalToken::kSemicolon, ";"); }

	// Returns the current string. Escaped characters are resolved.
	// Then it advances to the next lexical item.
	std::string ConsumeString();

	// Parses a file which contains handle semantic rules.
	void ParseSemantic();
	// Parses a library block.
	void ParseLibrary();
	// Parses a method (semantic rules and short displays).
	void ParseMethod(ProtocolMethod* method);
	// Parses an expression to display.
	std::unique_ptr<DisplayExpression> ParseDisplayExpression();
	// Parses an assignment (that is a semantic rule).
	void ParseAssignment(MethodSemantic* method_semantic);
	// Parses an expression.
	std::unique_ptr<Expression> ParseExpression();
	// Parses a multiplicative expression (a factor).
	std::unique_ptr<Expression> ParseMultiplicativeExpression();
	// Parses a field access expression.
	std::unique_ptr<Expression> ParseAccessExpression();
	// Parses terminal expressions.
	std::unique_ptr<Expression> ParseTerminalExpression();
	// Parses a handle description: HandleDescription(type, path).
	std::unique_ptr<Expression> ParseHandleDescription();

	private:
	// Lexical reduction of an identifier.
	void LexerIdentifier();

	// Lexical reduction of a string.
	void LexerString();

	// The library loader for which we are parsing the semantic rules. The field semantic from
	// protocol methods is assigned when a rule is parsed.
	LibraryLoader* const library_loader_;
	// The text buffer we are currently parsing.
	const std::string& buffer_;
	// The error handling object.
	ParserErrors* errors_;
	// Location in the buffer of the last lexical token reduced by NextLexicalToken.
	std::string::const_iterator current_location_;
	// Next location in the buffer which will be analyzed by NextLexicalToken.
	std::string::const_iterator next_;
	// Last lexical token reduced by NextLexicalToken.
	LexicalToken current_lexical_token_ = LexicalToken::kEof;
	// For LexicalToken::kIdentifier, the value of the identifier.
	std::string_view current_string_;
	// When this field is true, LexerIdentifier accepts dots within the identifiers. This is used to
	// be able to parse library names like "fuchsia.shell".
	bool allow_dots_in_identifiers_ = false;
	// True when we are doing error recovery to ignore unknown characters.
	bool ignore_unknown_characters_ = false;

	// Used to define a scope for which unknown characters are ignored.
	class IgnoreUnknownCharacters {
	public:
	IgnoreUnknownCharacters(SemanticParser* parser)
	: parser_(parser), saved_value_(parser_->ignore_unknown_characters_) {
	parser->ignore_unknown_characters_ = true;
	}
	~IgnoreUnknownCharacters() { parser_->ignore_unknown_characters_ = saved_value_; }

	private:
	SemanticParser* const parser_;
	const bool saved_value_;
	};
	};

	} // namespace semantic
	} // namespace fidl_codec

	#endif // SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_