blob: ffdd2b40e5fb6cfba8fc24159fbd09ed4da47129 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/developer/shell/parser/parser.h"
#include <lib/syslog/cpp/macros.h>
#include "src/developer/shell/parser/ast.h"
#include "src/developer/shell/parser/combinators.h"
#include "src/developer/shell/parser/error.h"
#include "src/developer/shell/parser/text_match.h"
namespace shell::parser {
namespace {
ParseResult Whitespace(ParseResult prefix);
// Create a parser that runs a sequence of parsers consecutively, with optional whitespace parsed
// between each parser.
fit::function<ParseResult(ParseResult)> WSSeq(fit::function<ParseResult(ParseResult)> first) {
return Seq(Maybe(Whitespace), std::move(first), Maybe(Whitespace));
}
template <typename... Args>
fit::function<ParseResult(ParseResult)> WSSeq(fit::function<ParseResult(ParseResult)> first,
Args... args) {
return Seq(Maybe(Whitespace), std::move(first), WSSeq(std::move(args)...));
}
ParseResult IdentifierCharacter(ParseResult prefix);
// Parse a keyword.
template <typename T = ast::Terminal>
fit::function<ParseResult(ParseResult)> KW(const std::string& keyword) {
return Seq(Token<T>(keyword), Alt(Not(IdentifierCharacter), ErInsert("Expected space")));
}
// Parse a token. If it isn't there, insert an error.
template <typename T = ast::Terminal>
fit::function<ParseResult(ParseResult)> ExToken(const std::string& token) {
return Alt(Token<T>(token), ErInsert("Expected '" + token + "'"));
}
// Token Rules -------------------------------------------------------------------------------------
ParseResult IdentifierCharacter(ParseResult prefix) {
return CharGroup("a-zA-Z0-9_")(std::move(prefix));
}
ParseResult Whitespace(ParseResult prefix) {
return NT<ast::Whitespace>(
OnePlus(Alt(AnyChar(" \n\r\t"), Seq(Token("#"), ZeroPlus(AnyCharBut("\n")), Token("\n")))))(
std::move(prefix));
}
ParseResult Digit(ParseResult prefix) { return CharGroup("0-9")(std::move(prefix)); }
ParseResult HexDigit(ParseResult prefix) { return CharGroup("a-fA-F0-9")(std::move(prefix)); }
ParseResult UnescapedIdentifier(ParseResult prefix) {
return Token<ast::UnescapedIdentifier>(OnePlus(IdentifierCharacter))(std::move(prefix));
}
ParseResult PathCharacter(ParseResult prefix) {
return Seq(Not(Whitespace), AnyCharBut("`&;|/\\()[]{}"))(std::move(prefix));
}
ParseResult PathElement(ParseResult prefix) {
return Alt(Token<ast::PathEscape>(Seq(Token("\\"), AnyChar)),
Token<ast::PathElement>(OnePlus(PathCharacter)),
Seq(Token<ast::PathEscape>("`"), Token<ast::PathElement>(ZeroPlus(AnyCharBut("`"))),
ExToken<ast::PathEscape>("`")))(std::move(prefix));
}
// Grammar Rules -----------------------------------------------------------------------------------
// Parses an identifier
// myVariable
ParseResult Identifier(ParseResult prefix) {
return NT<ast::Identifier>(Seq(Alt(Not(Digit), ErInsert("Identifier cannot begin with a digit")),
UnescapedIdentifier))(std::move(prefix));
}
// Parses a root path with at least one element and no trailing slash
// /foo
// /foo/bar
ParseResult RootPath(ParseResult prefix) {
return OnePlus(Seq(Token<ast::PathSeparator>("/"), OnePlus(PathElement)))(std::move(prefix));
}
// Parses a path
// /foo
// /foo/bar
// /foo/bar/
// ./foo/bar/
// ./
// /
// .
ParseResult Path(ParseResult prefix) {
return NT<ast::Path>(Alt(Seq(Maybe(Token<ast::PathElement>(".")), RootPath, Maybe(Token("/"))),
Seq(Maybe(Token<ast::PathElement>(".")), Token<ast::PathSeparator>("/")),
Token<ast::PathElement>(".")))(std::move(prefix));
}
// Parses an unadorned decimal Integer
// 0
// 12345
// 12_345
ParseResult DecimalInteger(ParseResult prefix) {
return Alt(
Seq(Token<ast::DecimalGroup>("0"), Not(Digit)),
Seq(Not(Token("0")), Token<ast::DecimalGroup>(OnePlus(Digit)),
ZeroPlus(Seq(Token("_"), Token<ast::DecimalGroup>(OnePlus(Digit))))))(std::move(prefix));
}
// Parses a hexadecimal integer marked by '0x'
// 0x1234abcd
// 0x12_abcd
ParseResult HexInteger(ParseResult prefix) {
return Seq(Token("0x"), Seq(Token<ast::HexGroup>(OnePlus(HexDigit)),
ZeroPlus(Seq(Token("_"), Token<ast::HexGroup>(OnePlus(HexDigit))))))(
std::move(prefix));
}
// Parses an integer.
// 0
// 12345
// 12_345
// 0x1234abcd
// 0x12_abcd
ParseResult Integer(ParseResult prefix) {
// TODO: Binary integers, once we ask the FIDL team about them.
return NT<ast::Integer>(Alt(HexInteger, DecimalInteger))(std::move(prefix));
}
// Parse a Real (unimplemented).
ParseResult Real(ParseResult prefix) { return ParseResult::kEnd; }
// Parses an escape sequence.
// \n
// \r
// \xF0
ParseResult EscapeSequence(ParseResult prefix) {
return Alt(Token<ast::EscapeSequence>("\\n"), Token<ast::EscapeSequence>("\\t"),
Token<ast::EscapeSequence>("\\\n"), Token<ast::EscapeSequence>("\\r"),
Token<ast::EscapeSequence>("\\\\"), Token<ast::EscapeSequence>("\\\""),
Token<ast::EscapeSequence>(Seq(Token<ast::EscapeSequence>("\\u"), Multi(6, HexDigit))),
Seq(Token("\\"), Alt(ErSkip("Bad escape sequence: '\\%MATCH%'", AnyChar),
ErInsert("Escape sequence at end of input"))))(std::move(prefix));
}
// Parses a sequence of characters that might be within a string body.
// The quick brown fox jumped over the lazy dog.
ParseResult StringEntity(ParseResult prefix) {
return Alt(Token<ast::StringEntity>(OnePlus(AnyCharBut("\n\\\""))),
EscapeSequence)(std::move(prefix));
}
// Parses an ordinary string literal.
// "The quick brown fox jumped over the lazy dog."
// "A newline.\nA tab\tA code point\xF0"
ParseResult NormalString(ParseResult prefix) {
return NT<ast::String>(Seq(Token("\""), ZeroPlus(StringEntity), ExToken("\"")))(
std::move(prefix));
}
// Parse an ordinary string literal, or a multiline string literal.
// "The quick brown fox jumped over the lazy dog."
// "A newline.\nA tab\tA code point\xF0"
// TODO: Decide on a MultiString syntax we like.
ParseResult String(ParseResult prefix) {
// return Alt(NormalString, MultiString)(prefix);
return NormalString(std::move(prefix));
}
// Parse an Atom (a simple literal value).
// "The quick brown fox jumped over the lazy dog."
// 0x1234abcd
// my_variable
// 3.2156
// ./some/path
ParseResult Atom(ParseResult prefix) {
return Alt(Identifier, String, Real, Integer, Path)(std::move(prefix));
}
ParseResult LogicalOr(ParseResult prefix);
const auto& SimpleExpression = LogicalOr;
// Parse a field in an object literal.
// foo: 6
// "bar & grill": "Open now"
ParseResult Field(ParseResult prefix) {
return NT<ast::Field>(WSSeq(Alt(NormalString, Identifier), ExToken<ast::FieldSeparator>(":"),
SimpleExpression))(std::move(prefix));
}
// Parse the body of an object literal.
// foo: 6
// foo: 6, "bar & grill": "Open now",
ParseResult ObjectBody(ParseResult prefix) {
return WSSeq(Field, ZeroPlus(WSSeq(ExToken(","), Field)), Maybe(Token(",")))(std::move(prefix));
}
// Parse an object literal.
// {}
// { foo: 6, "bar & grill": "Open now" }
// { foo: { bar: 6 }, "bar & grill": "Open now" }
ParseResult Object(ParseResult prefix) {
return NT<ast::Object>(WSSeq(Token("{"), Maybe(ObjectBody), ExToken("}")))(std::move(prefix));
}
// Parse a Value.
// "The quick brown fox jumped over the lazy dog."
// 0x1234abcd
// { foo: 3, bar: 6 }
ParseResult Value(ParseResult prefix) {
/* Eventual full version of this rule is:
return Alt(List, Object, Range, Lambda, Parenthetical, Block, If, Atom)(std::move(prefix));
*/
return Alt(Object, Atom, ErInsert("Expected value"))(std::move(prefix));
}
// Unimplemented.
ParseResult Lookup(ParseResult prefix) { return Value(std::move(prefix)); }
// Unimplemented.
ParseResult Negate(ParseResult prefix) { return Lookup(std::move(prefix)); }
// Unimplemented.
ParseResult Mul(ParseResult prefix) { return Negate(std::move(prefix)); }
// Parse an addition expression.
// 2 + 2
ParseResult Add(ParseResult prefix) {
return LAssoc<ast::AddSub>(Seq(Mul, Maybe(Whitespace)),
WSSeq(Token<ast::Operator>(AnyChar("+-")), Mul))(std::move(prefix));
}
// Unimplemented.
ParseResult Comparison(ParseResult prefix) { return Add(std::move(prefix)); }
// Unimplemented.
ParseResult LogicalNot(ParseResult prefix) { return Comparison(std::move(prefix)); }
// Unimplemented.
ParseResult LogicalAnd(ParseResult prefix) { return LogicalNot(std::move(prefix)); }
// Unimplemented.
ParseResult LogicalOr(ParseResult prefix) { return LogicalAnd(std::move(prefix)); }
// Parses an expression. This is effectively unimplemented right now.
ParseResult Expression(ParseResult prefix) {
// Unimplemented
return NT<ast::Expression>(Alt(SimpleExpression, ErInsert("Expected expression")))(
std::move(prefix));
}
// Parses a variable declaration:
// var foo = 4.5
// const foo = "Ham sandwich"
ParseResult VariableDecl(ParseResult prefix) {
return NT<ast::VariableDecl>(WSSeq(Alt(KW<ast::Var>("var"), KW<ast::Const>("const")), Identifier,
Token("="), Expression))(std::move(prefix));
}
// Parses the body of a program, but doesn't create an AST node. This is useful because the rule is
// recursive, but we want to flatten its structure.
ParseResult ProgramContent(ParseResult prefix) {
/* Eventual full version of this rule is:
return Alt(WSSeq(VariableDecl, Maybe(WSSeq(AnyChar(";&", "; or &"), ProgramMeta))),
WSSeq(FunctionDecl, Program),
WSSeq(Expression, Maybe(WSSeq(AnyChar(";&", "; or &"), ProgramMeta))),
Empty)(std::move(prefix));
*/
return Alt(WSSeq(VariableDecl, Maybe(WSSeq(AnyChar(";&"), ProgramContent))),
Empty)(std::move(prefix));
}
} // namespace
std::shared_ptr<ast::Node> Parse(std::string_view text) {
auto res =
NT<ast::Program>(Alt(Seq(ProgramContent, EOS), ErSkip("Unrecoverable parse error",
ZeroPlus(AnyChar))))(ParseResult(text));
FX_DCHECK(res) << "Incorrectly handled parse error.";
return res.node();
}
} // namespace shell::parser