| // Copyright 2020 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/developer/shell/parser/parser.h" |
| |
| #include <lib/syslog/cpp/macros.h> |
| |
| #include "src/developer/shell/parser/ast.h" |
| #include "src/developer/shell/parser/combinators.h" |
| #include "src/developer/shell/parser/error.h" |
| #include "src/developer/shell/parser/text_match.h" |
| |
| namespace shell::parser { |
| namespace { |
| |
| ParseResult Whitespace(ParseResult prefix); |
| |
| // Create a parser that runs a sequence of parsers consecutively, with optional whitespace parsed |
| // between each parser. |
| fit::function<ParseResult(ParseResult)> WSSeq(fit::function<ParseResult(ParseResult)> first) { |
| return Seq(Maybe(Whitespace), std::move(first), Maybe(Whitespace)); |
| } |
| |
| template <typename... Args> |
| fit::function<ParseResult(ParseResult)> WSSeq(fit::function<ParseResult(ParseResult)> first, |
| Args... args) { |
| return Seq(Maybe(Whitespace), std::move(first), WSSeq(std::move(args)...)); |
| } |
| |
| ParseResult IdentifierCharacter(ParseResult prefix); |
| |
| // Parse a keyword. |
| template <typename T = ast::Terminal> |
| fit::function<ParseResult(ParseResult)> KW(const std::string& keyword) { |
| return Seq(Token<T>(keyword), Alt(Not(IdentifierCharacter), ErInsert("Expected space"))); |
| } |
| |
| // Parse a token. If it isn't there, insert an error. |
| template <typename T = ast::Terminal> |
| fit::function<ParseResult(ParseResult)> ExToken(const std::string& token) { |
| return Alt(Token<T>(token), ErInsert("Expected '" + token + "'")); |
| } |
| |
| // Token Rules ------------------------------------------------------------------------------------- |
| |
| ParseResult IdentifierCharacter(ParseResult prefix) { |
| return CharGroup("a-zA-Z0-9_")(std::move(prefix)); |
| } |
| |
| ParseResult Whitespace(ParseResult prefix) { |
| return NT<ast::Whitespace>( |
| OnePlus(Alt(AnyChar(" \n\r\t"), Seq(Token("#"), ZeroPlus(AnyCharBut("\n")), Token("\n")))))( |
| std::move(prefix)); |
| } |
| |
| ParseResult Digit(ParseResult prefix) { return CharGroup("0-9")(std::move(prefix)); } |
| |
| ParseResult HexDigit(ParseResult prefix) { return CharGroup("a-fA-F0-9")(std::move(prefix)); } |
| |
| ParseResult UnescapedIdentifier(ParseResult prefix) { |
| return Token<ast::UnescapedIdentifier>(OnePlus(IdentifierCharacter))(std::move(prefix)); |
| } |
| |
| ParseResult PathCharacter(ParseResult prefix) { |
| return Seq(Not(Whitespace), AnyCharBut("`&;|/\\()[]{}"))(std::move(prefix)); |
| } |
| |
| ParseResult PathElement(ParseResult prefix) { |
| return Alt(Token<ast::PathEscape>(Seq(Token("\\"), AnyChar)), |
| Token<ast::PathElement>(OnePlus(PathCharacter)), |
| Seq(Token<ast::PathEscape>("`"), Token<ast::PathElement>(ZeroPlus(AnyCharBut("`"))), |
| ExToken<ast::PathEscape>("`")))(std::move(prefix)); |
| } |
| |
| // Grammar Rules ----------------------------------------------------------------------------------- |
| |
| // Parses an identifier |
| // myVariable |
| ParseResult Identifier(ParseResult prefix) { |
| return NT<ast::Identifier>(Seq(Alt(Not(Digit), ErInsert("Identifier cannot begin with a digit")), |
| UnescapedIdentifier))(std::move(prefix)); |
| } |
| |
| // Parses a root path with at least one element and no trailing slash |
| // /foo |
| // /foo/bar |
| ParseResult RootPath(ParseResult prefix) { |
| return OnePlus(Seq(Token<ast::PathSeparator>("/"), OnePlus(PathElement)))(std::move(prefix)); |
| } |
| |
| // Parses a path |
| // /foo |
| // /foo/bar |
| // /foo/bar/ |
| // ./foo/bar/ |
| // ./ |
| // / |
| // . |
| ParseResult Path(ParseResult prefix) { |
| return NT<ast::Path>(Alt(Seq(Maybe(Token<ast::PathElement>(".")), RootPath, Maybe(Token("/"))), |
| Seq(Maybe(Token<ast::PathElement>(".")), Token<ast::PathSeparator>("/")), |
| Token<ast::PathElement>(".")))(std::move(prefix)); |
| } |
| |
| // Parses an unadorned decimal Integer |
| // 0 |
| // 12345 |
| // 12_345 |
| ParseResult DecimalInteger(ParseResult prefix) { |
| return Alt( |
| Seq(Token<ast::DecimalGroup>("0"), Not(Digit)), |
| Seq(Not(Token("0")), Token<ast::DecimalGroup>(OnePlus(Digit)), |
| ZeroPlus(Seq(Token("_"), Token<ast::DecimalGroup>(OnePlus(Digit))))))(std::move(prefix)); |
| } |
| |
| // Parses a hexadecimal integer marked by '0x' |
| // 0x1234abcd |
| // 0x12_abcd |
| ParseResult HexInteger(ParseResult prefix) { |
| return Seq(Token("0x"), Seq(Token<ast::HexGroup>(OnePlus(HexDigit)), |
| ZeroPlus(Seq(Token("_"), Token<ast::HexGroup>(OnePlus(HexDigit))))))( |
| std::move(prefix)); |
| } |
| |
| // Parses an integer. |
| // 0 |
| // 12345 |
| // 12_345 |
| // 0x1234abcd |
| // 0x12_abcd |
| ParseResult Integer(ParseResult prefix) { |
| // TODO: Binary integers, once we ask the FIDL team about them. |
| return NT<ast::Integer>(Alt(HexInteger, DecimalInteger))(std::move(prefix)); |
| } |
| |
| // Parse a Real (unimplemented). |
| ParseResult Real(ParseResult prefix) { return ParseResult::kEnd; } |
| |
| // Parses an escape sequence. |
| // \n |
| // \r |
| // \xF0 |
| ParseResult EscapeSequence(ParseResult prefix) { |
| return Alt(Token<ast::EscapeSequence>("\\n"), Token<ast::EscapeSequence>("\\t"), |
| Token<ast::EscapeSequence>("\\\n"), Token<ast::EscapeSequence>("\\r"), |
| Token<ast::EscapeSequence>("\\\\"), Token<ast::EscapeSequence>("\\\""), |
| Token<ast::EscapeSequence>(Seq(Token<ast::EscapeSequence>("\\u"), Multi(6, HexDigit))), |
| Seq(Token("\\"), Alt(ErSkip("Bad escape sequence: '\\%MATCH%'", AnyChar), |
| ErInsert("Escape sequence at end of input"))))(std::move(prefix)); |
| } |
| |
| // Parses a sequence of characters that might be within a string body. |
| // The quick brown fox jumped over the lazy dog. |
| ParseResult StringEntity(ParseResult prefix) { |
| return Alt(Token<ast::StringEntity>(OnePlus(AnyCharBut("\n\\\""))), |
| EscapeSequence)(std::move(prefix)); |
| } |
| |
| // Parses an ordinary string literal. |
| // "The quick brown fox jumped over the lazy dog." |
| // "A newline.\nA tab\tA code point\xF0" |
| ParseResult NormalString(ParseResult prefix) { |
| return NT<ast::String>(Seq(Token("\""), ZeroPlus(StringEntity), ExToken("\"")))( |
| std::move(prefix)); |
| } |
| |
| // Parse an ordinary string literal, or a multiline string literal. |
| // "The quick brown fox jumped over the lazy dog." |
| // "A newline.\nA tab\tA code point\xF0" |
| // TODO: Decide on a MultiString syntax we like. |
| ParseResult String(ParseResult prefix) { |
| // return Alt(NormalString, MultiString)(prefix); |
| return NormalString(std::move(prefix)); |
| } |
| |
| // Parse an Atom (a simple literal value). |
| // "The quick brown fox jumped over the lazy dog." |
| // 0x1234abcd |
| // my_variable |
| // 3.2156 |
| // ./some/path |
| ParseResult Atom(ParseResult prefix) { |
| return Alt(Identifier, String, Real, Integer, Path)(std::move(prefix)); |
| } |
| |
| ParseResult LogicalOr(ParseResult prefix); |
| const auto& SimpleExpression = LogicalOr; |
| |
| // Parse a field in an object literal. |
| // foo: 6 |
| // "bar & grill": "Open now" |
| ParseResult Field(ParseResult prefix) { |
| return NT<ast::Field>(WSSeq(Alt(NormalString, Identifier), ExToken<ast::FieldSeparator>(":"), |
| SimpleExpression))(std::move(prefix)); |
| } |
| |
| // Parse the body of an object literal. |
| // foo: 6 |
| // foo: 6, "bar & grill": "Open now", |
| ParseResult ObjectBody(ParseResult prefix) { |
| return WSSeq(Field, ZeroPlus(WSSeq(ExToken(","), Field)), Maybe(Token(",")))(std::move(prefix)); |
| } |
| |
| // Parse an object literal. |
| // {} |
| // { foo: 6, "bar & grill": "Open now" } |
| // { foo: { bar: 6 }, "bar & grill": "Open now" } |
| ParseResult Object(ParseResult prefix) { |
| return NT<ast::Object>(WSSeq(Token("{"), Maybe(ObjectBody), ExToken("}")))(std::move(prefix)); |
| } |
| |
| // Parse a Value. |
| // "The quick brown fox jumped over the lazy dog." |
| // 0x1234abcd |
| // { foo: 3, bar: 6 } |
| ParseResult Value(ParseResult prefix) { |
| /* Eventual full version of this rule is: |
| return Alt(List, Object, Range, Lambda, Parenthetical, Block, If, Atom)(std::move(prefix)); |
| */ |
| return Alt(Object, Atom, ErInsert("Expected value"))(std::move(prefix)); |
| } |
| |
| // Unimplemented. |
| ParseResult Lookup(ParseResult prefix) { return Value(std::move(prefix)); } |
| |
| // Unimplemented. |
| ParseResult Negate(ParseResult prefix) { return Lookup(std::move(prefix)); } |
| |
| // Unimplemented. |
| ParseResult Mul(ParseResult prefix) { return Negate(std::move(prefix)); } |
| |
| // Parse an addition expression. |
| // 2 + 2 |
| ParseResult Add(ParseResult prefix) { |
| return LAssoc<ast::AddSub>(Seq(Mul, Maybe(Whitespace)), |
| WSSeq(Token<ast::Operator>(AnyChar("+-")), Mul))(std::move(prefix)); |
| } |
| |
| // Unimplemented. |
| ParseResult Comparison(ParseResult prefix) { return Add(std::move(prefix)); } |
| |
| // Unimplemented. |
| ParseResult LogicalNot(ParseResult prefix) { return Comparison(std::move(prefix)); } |
| |
| // Unimplemented. |
| ParseResult LogicalAnd(ParseResult prefix) { return LogicalNot(std::move(prefix)); } |
| |
| // Unimplemented. |
| ParseResult LogicalOr(ParseResult prefix) { return LogicalAnd(std::move(prefix)); } |
| |
| // Parses an expression. This is effectively unimplemented right now. |
| ParseResult Expression(ParseResult prefix) { |
| // Unimplemented |
| return NT<ast::Expression>(Alt(SimpleExpression, ErInsert("Expected expression")))( |
| std::move(prefix)); |
| } |
| |
| // Parses a variable declaration: |
| // var foo = 4.5 |
| // const foo = "Ham sandwich" |
| ParseResult VariableDecl(ParseResult prefix) { |
| return NT<ast::VariableDecl>(WSSeq(Alt(KW<ast::Var>("var"), KW<ast::Const>("const")), Identifier, |
| Token("="), Expression))(std::move(prefix)); |
| } |
| |
| // Parses the body of a program, but doesn't create an AST node. This is useful because the rule is |
| // recursive, but we want to flatten its structure. |
| ParseResult ProgramContent(ParseResult prefix) { |
| /* Eventual full version of this rule is: |
| return Alt(WSSeq(VariableDecl, Maybe(WSSeq(AnyChar(";&", "; or &"), ProgramMeta))), |
| WSSeq(FunctionDecl, Program), |
| WSSeq(Expression, Maybe(WSSeq(AnyChar(";&", "; or &"), ProgramMeta))), |
| Empty)(std::move(prefix)); |
| */ |
| return Alt(WSSeq(VariableDecl, Maybe(WSSeq(AnyChar(";&"), ProgramContent))), |
| Empty)(std::move(prefix)); |
| } |
| |
| } // namespace |
| |
| std::shared_ptr<ast::Node> Parse(std::string_view text) { |
| auto res = |
| NT<ast::Program>(Alt(Seq(ProgramContent, EOS), ErSkip("Unrecoverable parse error", |
| ZeroPlus(AnyChar))))(ParseResult(text)); |
| |
| FX_DCHECK(res) << "Incorrectly handled parse error."; |
| |
| return res.node(); |
| } |
| |
| } // namespace shell::parser |