| """Tokenizers for three string formatting methods""" |
| |
| from typing import List, Tuple, Optional |
| from typing_extensions import Final |
| from enum import Enum, unique |
| |
| from mypy.checkstrformat import ( |
| parse_format_value, ConversionSpecifier, parse_conversion_specifiers |
| ) |
| from mypy.errors import Errors |
| from mypy.messages import MessageBuilder |
| from mypy.nodes import Context, Expression |
| |
| from mypyc.ir.ops import Value, Integer |
| from mypyc.ir.rtypes import ( |
| c_pyssize_t_rprimitive, is_str_rprimitive, is_int_rprimitive, is_short_int_rprimitive, |
| is_bytes_rprimitive |
| ) |
| from mypyc.irbuild.builder import IRBuilder |
| from mypyc.primitives.bytes_ops import bytes_build_op |
| from mypyc.primitives.int_ops import int_to_str_op |
| from mypyc.primitives.str_ops import str_build_op, str_op |
| |
| |
| @unique |
| class FormatOp(Enum): |
| """FormatOp represents conversion operations of string formatting during |
| compile time. |
| |
| Compare to ConversionSpecifier, FormatOp has fewer attributes. |
| For example, to mark a conversion from any object to string, |
| ConversionSpecifier may have several representations, like '%s', '{}' |
| or '{:{}}'. However, there would only exist one corresponding FormatOp. |
| """ |
| STR = 's' |
| INT = 'd' |
| BYTES = 'b' |
| |
| |
| def generate_format_ops(specifiers: List[ConversionSpecifier]) -> Optional[List[FormatOp]]: |
| """Convert ConversionSpecifier to FormatOp. |
| |
| Different ConversionSpecifiers may share a same FormatOp. |
| """ |
| format_ops = [] |
| for spec in specifiers: |
| # TODO: Match specifiers instead of using whole_seq |
| if spec.whole_seq == '%s' or spec.whole_seq == '{:{}}': |
| format_op = FormatOp.STR |
| elif spec.whole_seq == '%d': |
| format_op = FormatOp.INT |
| elif spec.whole_seq == '%b': |
| format_op = FormatOp.BYTES |
| elif spec.whole_seq: |
| return None |
| else: |
| format_op = FormatOp.STR |
| format_ops.append(format_op) |
| return format_ops |
| |
| |
| def tokenizer_printf_style(format_str: str) -> Optional[Tuple[List[str], List[FormatOp]]]: |
| """Tokenize a printf-style format string using regex. |
| |
| Return: |
| A list of string literals and a list of FormatOps. |
| """ |
| literals: List[str] = [] |
| specifiers: List[ConversionSpecifier] = parse_conversion_specifiers(format_str) |
| format_ops = generate_format_ops(specifiers) |
| if format_ops is None: |
| return None |
| |
| last_end = 0 |
| for spec in specifiers: |
| cur_start = spec.start_pos |
| literals.append(format_str[last_end:cur_start]) |
| last_end = cur_start + len(spec.whole_seq) |
| literals.append(format_str[last_end:]) |
| |
| return literals, format_ops |
| |
| |
| # The empty Context as an argument for parse_format_value(). |
| # It wouldn't be used since the code has passed the type-checking. |
| EMPTY_CONTEXT: Final = Context() |
| |
| |
| def tokenizer_format_call( |
| format_str: str) -> Optional[Tuple[List[str], List[FormatOp]]]: |
| """Tokenize a str.format() format string. |
| |
| The core function parse_format_value() is shared with mypy. |
| With these specifiers, we then parse the literal substrings |
| of the original format string and convert `ConversionSpecifier` |
| to `FormatOp`. |
| |
| Return: |
| A list of string literals and a list of FormatOps. The literals |
| are interleaved with FormatOps and the length of returned literals |
| should be exactly one more than FormatOps. |
| Return None if it cannot parse the string. |
| """ |
| # Creates an empty MessageBuilder here. |
| # It wouldn't be used since the code has passed the type-checking. |
| specifiers = parse_format_value(format_str, EMPTY_CONTEXT, |
| MessageBuilder(Errors(), {})) |
| if specifiers is None: |
| return None |
| format_ops = generate_format_ops(specifiers) |
| if format_ops is None: |
| return None |
| |
| literals: List[str] = [] |
| last_end = 0 |
| for spec in specifiers: |
| # Skip { and } |
| literals.append(format_str[last_end:spec.start_pos - 1]) |
| last_end = spec.start_pos + len(spec.whole_seq) + 1 |
| literals.append(format_str[last_end:]) |
| # Deal with escaped {{ |
| literals = [x.replace('{{', '{').replace('}}', '}') for x in literals] |
| |
| return literals, format_ops |
| |
| |
| def convert_format_expr_to_str(builder: IRBuilder, format_ops: List[FormatOp], |
| exprs: List[Expression], line: int) -> Optional[List[Value]]: |
| """Convert expressions into string literal objects with the guidance |
| of FormatOps. Return None when fails.""" |
| if len(format_ops) != len(exprs): |
| return None |
| |
| converted = [] |
| for x, format_op in zip(exprs, format_ops): |
| node_type = builder.node_type(x) |
| if format_op == FormatOp.STR: |
| if is_str_rprimitive(node_type): |
| var_str = builder.accept(x) |
| elif is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type): |
| var_str = builder.call_c(int_to_str_op, [builder.accept(x)], line) |
| else: |
| var_str = builder.call_c(str_op, [builder.accept(x)], line) |
| elif format_op == FormatOp.INT: |
| if is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type): |
| var_str = builder.call_c(int_to_str_op, [builder.accept(x)], line) |
| else: |
| return None |
| else: |
| return None |
| converted.append(var_str) |
| return converted |
| |
| |
| def join_formatted_strings(builder: IRBuilder, literals: Optional[List[str]], |
| substitutions: List[Value], line: int) -> Value: |
| """Merge the list of literals and the list of substitutions |
| alternatively using 'str_build_op'. |
| |
| `substitutions` is the result value of formatting conversions. |
| |
| If the `literals` is set to None, we simply join the substitutions; |
| Otherwise, the `literals` is the literal substrings of the original |
| format string and its length should be exactly one more than |
| substitutions. |
| |
| For example: |
| (1) 'This is a %s and the value is %d' |
| -> literals: ['This is a ', ' and the value is', ''] |
| (2) '{} and the value is {}' |
| -> literals: ['', ' and the value is', ''] |
| """ |
| # The first parameter for str_build_op is the total size of |
| # the following PyObject* |
| result_list: List[Value] = [Integer(0, c_pyssize_t_rprimitive)] |
| |
| if literals is not None: |
| for a, b in zip(literals, substitutions): |
| if a: |
| result_list.append(builder.load_str(a)) |
| result_list.append(b) |
| if literals[-1]: |
| result_list.append(builder.load_str(literals[-1])) |
| else: |
| result_list.extend(substitutions) |
| |
| # Special case for empty string and literal string |
| if len(result_list) == 1: |
| return builder.load_str("") |
| if not substitutions and len(result_list) == 2: |
| return result_list[1] |
| |
| result_list[0] = Integer(len(result_list) - 1, c_pyssize_t_rprimitive) |
| return builder.call_c(str_build_op, result_list, line) |
| |
| |
| def convert_format_expr_to_bytes(builder: IRBuilder, format_ops: List[FormatOp], |
| exprs: List[Expression], line: int) -> Optional[List[Value]]: |
| """Convert expressions into bytes literal objects with the guidance |
| of FormatOps. Return None when fails.""" |
| if len(format_ops) != len(exprs): |
| return None |
| |
| converted = [] |
| for x, format_op in zip(exprs, format_ops): |
| node_type = builder.node_type(x) |
| # conversion type 's' is an alias of 'b' in bytes formatting |
| if format_op == FormatOp.BYTES or format_op == FormatOp.STR: |
| if is_bytes_rprimitive(node_type): |
| var_bytes = builder.accept(x) |
| else: |
| return None |
| else: |
| return None |
| converted.append(var_bytes) |
| return converted |
| |
| |
| def join_formatted_bytes(builder: IRBuilder, literals: List[str], |
| substitutions: List[Value], line: int) -> Value: |
| """Merge the list of literals and the list of substitutions |
| alternatively using 'bytes_build_op'.""" |
| result_list: List[Value] = [Integer(0, c_pyssize_t_rprimitive)] |
| |
| for a, b in zip(literals, substitutions): |
| if a: |
| result_list.append(builder.load_bytes_from_str_literal(a)) |
| result_list.append(b) |
| if literals[-1]: |
| result_list.append(builder.load_bytes_from_str_literal(literals[-1])) |
| |
| # Special case for empty bytes and literal |
| if len(result_list) == 1: |
| return builder.load_bytes_from_str_literal('') |
| if not substitutions and len(result_list) == 2: |
| return result_list[1] |
| |
| result_list[0] = Integer(len(result_list) - 1, c_pyssize_t_rprimitive) |
| return builder.call_c(bytes_build_op, result_list, line) |