blob: 979e2a3a8d6cdb67097b2bd88644157f9e1d0014 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_
#define SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_
#include <iostream>
#include <memory>
#include <ostream>
#include <string>
#include "src/lib/fidl_codec/semantic.h"
namespace fidl_codec {
class ProtocolMethod;
class LibraryLoader;
namespace semantic {
// Defines a location within a buffer.
class Location {
public:
Location(const std::string& buffer, std::string::const_iterator location)
: buffer_(buffer), location_(location) {}
const std::string& buffer() const { return buffer_; }
std::string::const_iterator location() const { return location_; }
private:
// Reference to the buffer which contains the text we are parsing.
const std::string& buffer_;
// Location within this buffer.
const std::string::const_iterator location_;
};
// Handles the parser errors.
class ParserErrors {
public:
explicit ParserErrors(std::ostream& os = std::cerr) : os_(os) {}
int error_count() const { return error_count_; }
bool has_error() const { return error_count_ > 0; }
// Adds a global error (not associated to a location in the buffer).
std::ostream& AddError();
// Adds an error at the specified location.
std::ostream& AddError(const Location& location);
private:
// The stream which receives the errors.
std::ostream& os_;
// The number of errors which have been generated.
int error_count_ = 0;
};
// All the lexical tokens we can reduce.
enum class LexicalToken {
// The end of the file has been found.
kEof,
// An identifier. If allow_dots_in_indentifiers is true, an identifier can contain dots.
kIdentifier,
// A string (delimited by single quotes).
kString,
// A left brace: {
kLeftBrace,
// A right brace: }
kRightBrace,
// A left parenthesis: (
kLeftParenthesis,
// A right parenthesis: )
kRightParenthesis,
// One colon: :
kColon,
// Two colons: ::
kColonColon,
// A comma: ,
kComma,
// A dot: .
kDot,
// The equal sign: =
kEqual,
// A semicolon: ;
kSemicolon,
// A slash: /
kSlash
};
// Parser for the language which defines semantic rules for FIDL methods.
class SemanticParser {
public:
SemanticParser(LibraryLoader* library_loader, const std::string& buffer, ParserErrors* errors)
: library_loader_(library_loader), buffer_(buffer), errors_(errors) {
next_ = buffer_.begin();
NextLexicalToken();
}
// Returns the location of the current lexical token.
Location GetLocation() const { return Location(buffer_, current_location_); }
// Adds an error at the current lexical token location.
std::ostream& AddError() { return errors_->AddError(GetLocation()); }
// Reduce the next lexical token. The parser always has a current not used yet lexical token
// reduced by NextLexicalToken.
void NextLexicalToken();
// Skips text until a semicolon or a right brace are found. If a semicolon or a right brace are
// found, the parsing continues before the semicolon or the right brace.
void JumpToSemicolon();
// Skips text until a semicolon or a right brace are found. If a semicolon is found, the parsing
// continues after the semicolon. If a right brace is found, the parsing continues before the
// right brace.
void SkipSemicolon();
// Skips text until a semicolon or a right brace are found. The parsing continue after the
// semicolon or the right brace. If an embeded block is found (delimited by left and right
// braces), the block is skipped.
void SkipBlock();
// Skips text until a right brace is found. The parsing continue after the right brace. If an
// embeded block is found (delimited by left and right braces), the block is skipped.
void SkipRightBrace();
// Skips text until a right parenthesis is found. The parsing continue after the right
// parenthesis. If an embeded block is found (delimited by left and right braces or left and right
// parentheses), the block is skipped. If a semicolon is found, the parsing resumes before the
// semicolon.
void SkipRightParenthesis();
// Helpers to check that we currently have a specified lexical token.
bool Is(std::string_view keyword) { return IsIdentifier() && (current_string_ == keyword); }
bool IsEof() const { return current_lexical_token_ == LexicalToken::kEof; }
bool IsIdentifier() const { return current_lexical_token_ == LexicalToken::kIdentifier; }
bool IsString() const { return current_lexical_token_ == LexicalToken::kString; }
bool IsLeftBrace() const { return current_lexical_token_ == LexicalToken::kLeftBrace; }
bool IsRightBrace() const { return current_lexical_token_ == LexicalToken::kRightBrace; }
bool IsRightParenthesis() const {
return current_lexical_token_ == LexicalToken::kRightParenthesis;
}
bool IsColonColon() const { return current_lexical_token_ == LexicalToken::kColonColon; }
bool IsDot() const { return current_lexical_token_ == LexicalToken::kDot; }
bool IsEqual() const { return current_lexical_token_ == LexicalToken::kEqual; }
bool IsSemicolon() const { return current_lexical_token_ == LexicalToken::kSemicolon; }
bool IsSlash() const { return current_lexical_token_ == LexicalToken::kSlash; }
// Helpers to check and consume a specified lexical token.
bool Consume(std::string_view keyword) {
bool result = IsIdentifier() && (current_string_ == keyword);
if (result) {
NextLexicalToken();
}
return result;
}
bool Consume(LexicalToken token) {
bool result = current_lexical_token_ == token;
if (result) {
NextLexicalToken();
}
return result;
}
bool ConsumeLeftBrace() { return Consume(LexicalToken::kLeftBrace); }
bool ConsumeRightBrace() { return Consume(LexicalToken::kRightBrace); }
bool ConsumeLeftParenthesis() { return Consume(LexicalToken::kLeftParenthesis); }
bool ConsumeRightParenthesis() { return Consume(LexicalToken::kRightParenthesis); }
bool ConsumeColon() { return Consume(LexicalToken::kColon); }
bool ConsumeDot() { return Consume(LexicalToken::kDot); }
bool ConsumeEqual() { return Consume(LexicalToken::kEqual); }
bool ConsumeSemicolon() { return Consume(LexicalToken::kSemicolon); }
bool ConsumeSlash() { return Consume(LexicalToken::kSlash); }
// Helpers to check and consume a specified lexical token. If the token is not found, an error is
// generated.
bool Parse(std::string_view keyword) {
bool result = IsIdentifier() && (current_string_ == keyword);
if (result) {
NextLexicalToken();
} else {
AddError() << "Keyword '" << keyword << "' expected.\n";
}
return result;
}
bool Parse(LexicalToken token, std::string_view token_string) {
bool result = current_lexical_token_ == token;
if (result) {
NextLexicalToken();
} else {
AddError() << "Symbol '" << token_string << "' expected.\n";
}
return result;
}
bool ParseLeftBrace() { return Parse(LexicalToken::kLeftBrace, "{"); }
bool ParseRightBrace() { return Parse(LexicalToken::kRightBrace, "}"); }
bool ParseLeftParenthesis() { return Parse(LexicalToken::kLeftParenthesis, "("); }
bool ParseRightParenthesis() { return Parse(LexicalToken::kRightParenthesis, ")"); }
bool ParseColon() { return Parse(LexicalToken::kColon, ":"); }
bool ParseColonColon() { return Parse(LexicalToken::kColonColon, "::"); }
bool ParseComma() { return Parse(LexicalToken::kComma, ","); }
bool ParseEqual() { return Parse(LexicalToken::kEqual, "="); }
bool ParseSemicolon() { return Parse(LexicalToken::kSemicolon, ";"); }
// Returns the current string. Escaped characters are resolved.
// Then it advances to the next lexical item.
std::string ConsumeString();
// Parses a file which contains handle semantic rules.
void ParseSemantic();
// Parses a library block.
void ParseLibrary();
// Parses a method (semantic rules and short displays).
void ParseMethod(ProtocolMethod* method);
// Parses an expression to display.
std::unique_ptr<DisplayExpression> ParseDisplayExpression();
// Parses an assignment (that is a semantic rule).
void ParseAssignment(MethodSemantic* method_semantic);
// Parses an expression.
std::unique_ptr<Expression> ParseExpression();
// Parses a multiplicative expression (a factor).
std::unique_ptr<Expression> ParseMultiplicativeExpression();
// Parses a field access expression.
std::unique_ptr<Expression> ParseAccessExpression();
// Parses terminal expressions.
std::unique_ptr<Expression> ParseTerminalExpression();
// Parses a handle description: HandleDescription(type, path).
std::unique_ptr<Expression> ParseHandleDescription();
private:
// Lexical reduction of an identifier.
void LexerIdentifier();
// Lexical reduction of a string.
void LexerString();
// The library loader for which we are parsing the semantic rules. The field semantic from
// protocol methods is assigned when a rule is parsed.
LibraryLoader* const library_loader_;
// The text buffer we are currently parsing.
const std::string& buffer_;
// The error handling object.
ParserErrors* errors_;
// Location in the buffer of the last lexical token reduced by NextLexicalToken.
std::string::const_iterator current_location_;
// Next location in the buffer which will be analyzed by NextLexicalToken.
std::string::const_iterator next_;
// Last lexical token reduced by NextLexicalToken.
LexicalToken current_lexical_token_ = LexicalToken::kEof;
// For LexicalToken::kIdentifier, the value of the identifier.
std::string_view current_string_;
// When this field is true, LexerIdentifier accepts dots within the identifiers. This is used to
// be able to parse library names like "fuchsia.shell".
bool allow_dots_in_identifiers_ = false;
// True when we are doing error recovery to ignore unknown characters.
bool ignore_unknown_characters_ = false;
// Used to define a scope for which unknown characters are ignored.
class IgnoreUnknownCharacters {
public:
IgnoreUnknownCharacters(SemanticParser* parser)
: parser_(parser), saved_value_(parser_->ignore_unknown_characters_) {
parser->ignore_unknown_characters_ = true;
}
~IgnoreUnknownCharacters() { parser_->ignore_unknown_characters_ = saved_value_; }
private:
SemanticParser* const parser_;
const bool saved_value_;
};
};
} // namespace semantic
} // namespace fidl_codec
#endif // SRC_LIB_FIDL_CODEC_SEMANTIC_PARSER_H_