| # Copyright 2019 Google LLC |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Routines to generate a shift-reduce parser from the module_ir module.""" |
| |
| import pkgutil |
| |
| from front_end import lr1 |
| from front_end import module_ir |
| from front_end import tokenizer |
| from util import simple_memoizer |
| |
| |
| class ParserGenerationError(Exception): |
| """An error occurred during parser generation.""" |
| pass |
| |
| |
| def parse_error_examples(error_example_text): |
| """Parses error examples from error_example_text. |
| |
| Arguments: |
| error_example_text: The text of an error example file. |
| |
| Returns: |
| A list of tuples, suitable for passing into generate_parser. |
| |
| Raises: |
| ParserGenerationError: There is a problem parsing the error examples. |
| """ |
| error_examples = error_example_text.split("\n" + "=" * 80 + "\n") |
| result = [] |
| # Everything before the first "======" line is explanatory text: ignore it. |
| for error_example in error_examples[1:]: |
| message_and_examples = error_example.split("\n" + "-" * 80 + "\n") |
| if len(message_and_examples) != 2: |
| raise ParserGenerationError( |
| "Expected one error message and one example section in:\n" + |
| error_example) |
| message, example_text = message_and_examples |
| examples = example_text.split("\n---\n") |
| for example in examples: |
| # TODO(bolms): feed a line number into tokenize, so that tokenization |
| # failures refer to the correct line within error_example_text. |
| tokens, errors = tokenizer.tokenize(example, "") |
| if errors: |
| raise ParserGenerationError(str(errors)) |
| |
| for i in range(len(tokens)): |
| if tokens[i].symbol == "BadWord" and tokens[i].text == "$ANY": |
| tokens[i] = lr1.ANY_TOKEN |
| |
| error_token = None |
| for i in range(len(tokens)): |
| if tokens[i].symbol == "BadWord" and tokens[i].text == "$ERR": |
| error_token = tokens[i + 1] |
| del tokens[i] |
| break |
| else: |
| raise ParserGenerationError( |
| "No error token marker '$ERR' in:\n" + error_example) |
| |
| result.append((tokens, error_token, message.strip(), example)) |
| return result |
| |
| |
| def generate_parser(start_symbol, productions, error_examples): |
| """Generates a parser from grammar, and applies error_examples. |
| |
| Arguments: |
| start_symbol: the start symbol of the grammar (a string) |
| productions: a list of parser_types.Production in the grammar |
| error_examples: A list of (source tokens, error message, source text) |
| tuples. |
| |
| Returns: |
| A parser. |
| |
| Raises: |
| ParserGenerationError: There is a problem generating the parser. |
| """ |
| parser = lr1.Grammar(start_symbol, productions).parser() |
| if parser.conflicts: |
| raise ParserGenerationError("\n".join([str(c) for c in parser.conflicts])) |
| for example in error_examples: |
| mark_result = parser.mark_error(example[0], example[1], example[2]) |
| if mark_result: |
| raise ParserGenerationError( |
| "error marking example: {}\nExample:\n{}".format( |
| mark_result, example[3])) |
| return parser |
| |
| |
| @simple_memoizer.memoize |
| def _load_module_parser(): |
| path = "front_end" |
| error_examples = parse_error_examples( |
| pkgutil.get_data(path, "error_examples").decode("utf-8")) |
| return generate_parser(module_ir.START_SYMBOL, module_ir.PRODUCTIONS, |
| error_examples) |
| |
| |
| @simple_memoizer.memoize |
| def _load_expression_parser(): |
| return generate_parser(module_ir.EXPRESSION_START_SYMBOL, |
| module_ir.PRODUCTIONS, []) |
| |
| |
| def parse_module(tokens): |
| """Parses the provided Emboss token list into an Emboss module parse tree.""" |
| return _load_module_parser().parse(tokens) |
| |
| |
| def parse_expression(tokens): |
| """Parses the provided Emboss token list into an expression parse tree.""" |
| return _load_expression_parser().parse(tokens) |