blob: 0b46887811fb5e464ffd767225a7db3ad67ac625 [file] [log] [blame]
"""Tokenizers for three string formatting methods"""
from __future__ import annotations
from enum import Enum, unique
from typing import Final
from mypy.checkstrformat import (
from mypy.errors import Errors
from mypy.messages import MessageBuilder
from mypy.nodes import Context, Expression
from mypy.options import Options
from import Integer, Value
from import (
from mypyc.irbuild.builder import IRBuilder
from mypyc.primitives.bytes_ops import bytes_build_op
from mypyc.primitives.int_ops import int_to_str_op
from mypyc.primitives.str_ops import str_build_op, str_op
class FormatOp(Enum):
"""FormatOp represents conversion operations of string formatting during
compile time.
Compare to ConversionSpecifier, FormatOp has fewer attributes.
For example, to mark a conversion from any object to string,
ConversionSpecifier may have several representations, like '%s', '{}'
or '{:{}}'. However, there would only exist one corresponding FormatOp.
STR = "s"
INT = "d"
BYTES = "b"
def generate_format_ops(specifiers: list[ConversionSpecifier]) -> list[FormatOp] | None:
"""Convert ConversionSpecifier to FormatOp.
Different ConversionSpecifiers may share a same FormatOp.
format_ops = []
for spec in specifiers:
# TODO: Match specifiers instead of using whole_seq
if spec.whole_seq == "%s" or spec.whole_seq == "{:{}}":
format_op = FormatOp.STR
elif spec.whole_seq == "%d":
format_op = FormatOp.INT
elif spec.whole_seq == "%b":
format_op = FormatOp.BYTES
elif spec.whole_seq:
return None
format_op = FormatOp.STR
return format_ops
def tokenizer_printf_style(format_str: str) -> tuple[list[str], list[FormatOp]] | None:
"""Tokenize a printf-style format string using regex.
A list of string literals and a list of FormatOps.
literals: list[str] = []
specifiers: list[ConversionSpecifier] = parse_conversion_specifiers(format_str)
format_ops = generate_format_ops(specifiers)
if format_ops is None:
return None
last_end = 0
for spec in specifiers:
cur_start = spec.start_pos
last_end = cur_start + len(spec.whole_seq)
return literals, format_ops
# The empty Context as an argument for parse_format_value().
# It wouldn't be used since the code has passed the type-checking.
EMPTY_CONTEXT: Final = Context()
def tokenizer_format_call(format_str: str) -> tuple[list[str], list[FormatOp]] | None:
"""Tokenize a str.format() format string.
The core function parse_format_value() is shared with mypy.
With these specifiers, we then parse the literal substrings
of the original format string and convert `ConversionSpecifier`
to `FormatOp`.
A list of string literals and a list of FormatOps. The literals
are interleaved with FormatOps and the length of returned literals
should be exactly one more than FormatOps.
Return None if it cannot parse the string.
# Creates an empty MessageBuilder here.
# It wouldn't be used since the code has passed the type-checking.
specifiers = parse_format_value(
format_str, EMPTY_CONTEXT, MessageBuilder(Errors(Options()), {})
if specifiers is None:
return None
format_ops = generate_format_ops(specifiers)
if format_ops is None:
return None
literals: list[str] = []
last_end = 0
for spec in specifiers:
# Skip { and }
literals.append(format_str[last_end : spec.start_pos - 1])
last_end = spec.start_pos + len(spec.whole_seq) + 1
# Deal with escaped {{
literals = [x.replace("{{", "{").replace("}}", "}") for x in literals]
return literals, format_ops
def convert_format_expr_to_str(
builder: IRBuilder, format_ops: list[FormatOp], exprs: list[Expression], line: int
) -> list[Value] | None:
"""Convert expressions into string literal objects with the guidance
of FormatOps. Return None when fails."""
if len(format_ops) != len(exprs):
return None
converted = []
for x, format_op in zip(exprs, format_ops):
node_type = builder.node_type(x)
if format_op == FormatOp.STR:
if is_str_rprimitive(node_type):
var_str = builder.accept(x)
elif is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
var_str = builder.call_c(int_to_str_op, [builder.accept(x)], line)
var_str = builder.call_c(str_op, [builder.accept(x)], line)
elif format_op == FormatOp.INT:
if is_int_rprimitive(node_type) or is_short_int_rprimitive(node_type):
var_str = builder.call_c(int_to_str_op, [builder.accept(x)], line)
return None
return None
return converted
def join_formatted_strings(
builder: IRBuilder, literals: list[str] | None, substitutions: list[Value], line: int
) -> Value:
"""Merge the list of literals and the list of substitutions
alternatively using 'str_build_op'.
`substitutions` is the result value of formatting conversions.
If the `literals` is set to None, we simply join the substitutions;
Otherwise, the `literals` is the literal substrings of the original
format string and its length should be exactly one more than
For example:
(1) 'This is a %s and the value is %d'
-> literals: ['This is a ', ' and the value is', '']
(2) '{} and the value is {}'
-> literals: ['', ' and the value is', '']
# The first parameter for str_build_op is the total size of
# the following PyObject*
result_list: list[Value] = [Integer(0, c_pyssize_t_rprimitive)]
if literals is not None:
for a, b in zip(literals, substitutions):
if a:
if literals[-1]:
# Special case for empty string and literal string
if len(result_list) == 1:
return builder.load_str("")
if not substitutions and len(result_list) == 2:
return result_list[1]
result_list[0] = Integer(len(result_list) - 1, c_pyssize_t_rprimitive)
return builder.call_c(str_build_op, result_list, line)
def convert_format_expr_to_bytes(
builder: IRBuilder, format_ops: list[FormatOp], exprs: list[Expression], line: int
) -> list[Value] | None:
"""Convert expressions into bytes literal objects with the guidance
of FormatOps. Return None when fails."""
if len(format_ops) != len(exprs):
return None
converted = []
for x, format_op in zip(exprs, format_ops):
node_type = builder.node_type(x)
# conversion type 's' is an alias of 'b' in bytes formatting
if format_op == FormatOp.BYTES or format_op == FormatOp.STR:
if is_bytes_rprimitive(node_type):
var_bytes = builder.accept(x)
return None
return None
return converted
def join_formatted_bytes(
builder: IRBuilder, literals: list[str], substitutions: list[Value], line: int
) -> Value:
"""Merge the list of literals and the list of substitutions
alternatively using 'bytes_build_op'."""
result_list: list[Value] = [Integer(0, c_pyssize_t_rprimitive)]
for a, b in zip(literals, substitutions):
if a:
if literals[-1]:
# Special case for empty bytes and literal
if len(result_list) == 1:
return builder.load_bytes_from_str_literal("")
if not substitutions and len(result_list) == 2:
return result_list[1]
result_list[0] = Integer(len(result_list) - 1, c_pyssize_t_rprimitive)
return builder.call_c(bytes_build_op, result_list, line)