blob: 7dccc8379c149818fb25d379927c02645a0b6e17 [file] [log] [blame] [edit]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/developer/shell/parser/parser.h"
#include "src/developer/shell/parser/ast.h"
#include "src/developer/shell/parser/combinators.h"
#include "src/developer/shell/parser/text_match.h"
namespace shell::parser {
namespace {
ParseResultStream Whitespace(ParseResultStream prefixes);
// Create a parser that runs a sequence of parsers consecutively, with optional whitespace parsed
// between each parser.
fit::function<ParseResultStream(ParseResultStream)> WSSeq(
fit::function<ParseResultStream(ParseResultStream)> first) {
return Seq(Maybe(Whitespace), std::move(first), Maybe(Whitespace));
}
template <typename... Args>
fit::function<ParseResultStream(ParseResultStream)> WSSeq(
fit::function<ParseResultStream(ParseResultStream)> first, Args... args) {
return Seq(Maybe(Whitespace), std::move(first), WSSeq(std::move(args)...));
}
ParseResultStream IdentifierCharacter(ParseResultStream prefixes);
// Parse a keyword.
template <typename T = ast::Terminal>
fit::function<ParseResultStream(ParseResultStream)> KW(const std::string& keyword) {
return Seq(Token<T>(keyword), Not(IdentifierCharacter));
}
// Token Rules -------------------------------------------------------------------------------------
ParseResultStream IdentifierCharacter(ParseResultStream prefixes) {
return CharGroup("identifier character", "a-zA-Z0-9_")(std::move(prefixes));
}
ParseResultStream Whitespace(ParseResultStream prefixes) {
return NT<ast::Whitespace>(OnePlus(
Alt(AnyChar("space", " \n\r\t"), Seq(Token("#"), ZeroPlus(AnyCharBut("non-newline", "\n")),
Token("\n")))))(std::move(prefixes));
}
ParseResultStream Digit(ParseResultStream prefixes) {
return CharGroup("digit", "0-9")(std::move(prefixes));
}
ParseResultStream HexDigit(ParseResultStream prefixes) {
return CharGroup("hex digit", "a-fA-F0-9")(std::move(prefixes));
}
ParseResultStream UnescapedIdentifier(ParseResultStream prefixes) {
return Token<ast::UnescapedIdentifier>(OnePlus(IdentifierCharacter))(std::move(prefixes));
}
ParseResultStream PathCharacter(ParseResultStream prefixes) {
return Seq(Not(Whitespace), AnyCharBut("path character", "`&;|/\\()[]{}"))(std::move(prefixes));
}
ParseResultStream PathElement(ParseResultStream prefixes) {
return Alt(Token<ast::PathEscape>(Seq(Token("\\"), AnyChar)),
Token<ast::PathElement>(OnePlus(PathCharacter)),
Seq(Token<ast::PathEscape>("`"),
Token<ast::PathElement>(ZeroPlus(AnyCharBut("character other than '`'", "`"))),
Token<ast::PathEscape>("`")))(std::move(prefixes));
}
// Grammar Rules -----------------------------------------------------------------------------------
// Parses an identifier
// myVariable
ParseResultStream Identifier(ParseResultStream prefixes) {
return NT<ast::Identifier>(Seq(Not(Digit), UnescapedIdentifier))(std::move(prefixes));
}
// Parses a root path with at least one element and no trailing slash
// /foo
// /foo/bar
ParseResultStream RootPath(ParseResultStream prefixes) {
return OnePlus(Seq(Token<ast::PathSeparator>("/"), OnePlus(PathElement)))(std::move(prefixes));
}
// Parses a path
// /foo
// /foo/bar
// /foo/bar/
// ./foo/bar/
// ./
// /
// .
ParseResultStream Path(ParseResultStream prefixes) {
return NT<ast::Path>(Alt(Seq(Maybe(Token<ast::PathElement>(".")), RootPath, Maybe(Token("/"))),
Seq(Maybe(Token<ast::PathElement>(".")), Token<ast::PathSeparator>("/")),
Token<ast::PathElement>(".")))(std::move(prefixes));
}
// Parses an unadorned decimal Integer
// 0
// 12345
// 12_345
ParseResultStream DecimalInteger(ParseResultStream prefixes) {
return Alt(Seq(Token<ast::DecimalGroup>("0"), Not(Digit)),
Seq(Not(Token("0")), Token<ast::DecimalGroup>(OnePlus(Digit)),
ZeroPlus(Seq(Token("_"), Token<ast::DecimalGroup>(OnePlus(Digit))))))(
std::move(prefixes));
}
// Parses a hexadecimal integer marked by '0x'
// 0x1234abcd
// 0x12_abcd
ParseResultStream HexInteger(ParseResultStream prefixes) {
return Seq(Token("0x"), Seq(Token<ast::HexGroup>(OnePlus(HexDigit)),
ZeroPlus(Seq(Token("_"), Token<ast::HexGroup>(OnePlus(HexDigit))))))(
std::move(prefixes));
}
// Parses an integer.
// 0
// 12345
// 12_345
// 0x1234abcd
// 0x12_abcd
ParseResultStream Integer(ParseResultStream prefixes) {
// TODO: Binary integers, once we ask the FIDL team about them.
return NT<ast::Integer>(Alt(HexInteger, DecimalInteger))(std::move(prefixes));
}
// Parse a Real (unimplemented).
ParseResultStream Real(ParseResultStream prefixes) { return std::move(prefixes).Fail(); }
// Parses an escape sequence.
// \n
// \r
// \xF0
ParseResultStream EscapeSequence(ParseResultStream prefixes) {
return Alt(Token<ast::EscapeSequence>("\\n"), Token<ast::EscapeSequence>("\\t"),
Token<ast::EscapeSequence>("\\\n"), Token<ast::EscapeSequence>("\\r"),
Token<ast::EscapeSequence>("\\\\"), Token<ast::EscapeSequence>("\\\""),
Token<ast::EscapeSequence>(
Seq(Token<ast::EscapeSequence>("\\u"), Multi(6, HexDigit))))(std::move(prefixes));
}
// Parses a sequence of characters that might be within a string body.
// The quick brown fox jumped over the lazy dog.
ParseResultStream StringEntity(ParseResultStream prefixes) {
return Alt(Token<ast::StringEntity>(OnePlus(AnyCharBut("string body character", "\n\\\""))),
EscapeSequence)(std::move(prefixes));
}
// Parses an ordinary string literal.
// "The quick brown fox jumped over the lazy dog."
// "A newline.\nA tab\tA code point\xF0"
ParseResultStream NormalString(ParseResultStream prefixes) {
return NT<ast::String>(Seq(Token("\""), ZeroPlus(StringEntity), Token("\"")))(
std::move(prefixes));
}
// Parse an ordinary string literal, or a multiline string literal.
// "The quick brown fox jumped over the lazy dog."
// "A newline.\nA tab\tA code point\xF0"
// TODO: Decide on a MultiString syntax we like.
ParseResultStream String(ParseResultStream prefixes) {
// return Alt(NormalString, MultiString)(prefixes);
return NormalString(std::move(prefixes));
}
// Parse an Atom (a simple literal value).
// "The quick brown fox jumped over the lazy dog."
// 0x1234abcd
// my_variable
// 3.2156
// ./some/path
ParseResultStream Atom(ParseResultStream prefixes) {
return Alt(Identifier, String, Real, Integer, Path)(std::move(prefixes));
}
ParseResultStream Value(ParseResultStream prefixes);
const auto& SimpleExpression = Value;
// Parse a field in an object literal.
// foo: 6
// "bar & grill": "Open now"
ParseResultStream Field(ParseResultStream prefixes) {
return NT<ast::Field>(WSSeq(Alt(NormalString, Identifier), Token<ast::FieldSeparator>(":"),
SimpleExpression))(std::move(prefixes));
}
// Parse the body of an object literal.
// foo: 6
// foo: 6, "bar & grill": "Open now",
ParseResultStream ObjectBody(ParseResultStream prefixes) {
return WSSeq(Field, ZeroPlus(WSSeq(Token(","), Field)), Maybe(Token(",")))(std::move(prefixes));
}
// Parse an object literal.
// {}
// { foo: 6, "bar & grill": "Open now" }
// { foo: { bar: 6 }, "bar & grill": "Open now" }
ParseResultStream Object(ParseResultStream prefixes) {
return NT<ast::Object>(WSSeq(Token("{"), Maybe(ObjectBody), Token("}")))(std::move(prefixes));
}
// Parse a Value.
// "The quick brown fox jumped over the lazy dog."
// 0x1234abcd
// { foo: 3, bar: 6 }
ParseResultStream Value(ParseResultStream prefixes) {
/* Eventual full version of this rule is:
return Alt(List, Object, Range, Lambda, Parenthetical, Block, If, Atom)(std::move(prefixes));
*/
return Alt(Object, Atom)(std::move(prefixes));
}
// Parses an expression. This is effectively unimplemented right now.
ParseResultStream Expression(ParseResultStream prefixes) {
// Unimplemented
return NT<ast::Expression>(SimpleExpression)(std::move(prefixes));
}
// Parses a variable declaration:
// var foo = 4.5
// const foo = "Ham sandwich"
ParseResultStream VariableDecl(ParseResultStream prefixes) {
return NT<ast::VariableDecl>(WSSeq(Alt(KW<ast::Var>("var"), KW<ast::Const>("const")), Identifier,
Token("="), Expression))(std::move(prefixes));
}
// Parses the body of a program, but doesn't create an AST node. This is useful because the rule is
// recursive, but we want to flatten its structure.
ParseResultStream ProgramContent(ParseResultStream prefixes) {
/* Eventual full version of this rule is:
return Alt(WSSeq(VariableDecl, Maybe(WSSeq(AnyChar(";&", "; or &"), ProgramMeta))),
WSSeq(FunctionDecl, Program),
WSSeq(Expression, Maybe(WSSeq(AnyChar(";&", "; or &"), ProgramMeta))),
Empty)(prefixes);
*/
return Alt(WSSeq(VariableDecl, Maybe(WSSeq(AnyChar("; or &", ";&"), ProgramContent))),
Empty)(std::move(prefixes));
}
} // namespace
std::shared_ptr<ast::Node> Parse(std::string_view text) {
return NT<ast::Program>(Seq(ProgramContent, EOS))(text).Next().node();
}
} // namespace shell::parser