blob: 1ba5dc98643df998b759d6ed442fac9686b4c18c [file] [log] [blame]
from typing import List, Optional, Tuple, Union
import attr
@attr.s(slots=True)
class Token:
# Type of the token (string, e.g. "paragraph_open")
type: str = attr.ib()
# html tag name, e.g. "p"
tag: str = attr.ib()
# Level change (number in {-1, 0, 1} set), where:
# - `1` means the tag is opening
# - `0` means the tag is self-closing
# - `-1` means the tag is closing
nesting: int = attr.ib()
# Html attributes. Format: `[ [ name1, value1 ], [ name2, value2 ] ]`
attrs: Optional[list] = attr.ib(default=None)
# Source map info. Format: `[ line_begin, line_end ]`
map: Optional[Tuple[int, int]] = attr.ib(default=None)
# nesting level, the same as `state.level`
level: int = attr.ib(default=0)
# An array of child nodes (inline and img tokens)
children: Optional[List["Token"]] = attr.ib(default=None)
# In a case of self-closing tag (code, html, fence, etc.),
# it has contents of this tag.
content: str = attr.ib(default="")
# '*' or '_' for emphasis, fence string for fence, etc.
markup: str = attr.ib(default="")
# fence infostring
info: str = attr.ib(default="")
# A place for plugins to store an arbitrary data
meta: dict = attr.ib(factory=dict)
# True for block-level tokens, false for inline tokens.
# Used in renderer to calculate line breaks
block: bool = attr.ib(default=False)
# If it's true, ignore this element when rendering.
# Used for tight lists to hide paragraphs.
hidden: bool = attr.ib(default=False)
def attrIndex(self, name: str) -> int:
if not self.attrs:
return -1
for i, at in enumerate(self.attrs):
if at[0] == name:
return i
return -1
def attrPush(self, attrData: Tuple[str, str]):
"""Add `[ name, value ]` attribute to list. Init attrs if necessary."""
if self.attrs:
self.attrs.append(attrData)
else:
self.attrs = [attrData]
def attrSet(self, name: str, value: str):
"""Set `name` attribute to `value`. Override old value if exists."""
idx = self.attrIndex(name)
if idx < 0:
self.attrPush([name, value])
else:
self.attrs[idx] = [name, value]
def attrGet(self, name: str) -> str:
""" Get the value of attribute `name`, or null if it does not exist."""
idx = self.attrIndex(name)
if idx >= 0:
return self.attrs[idx][1]
return None
def attrJoin(self, name, value):
"""Join value to existing attribute via space.
Or create new attribute if not exists.
Useful to operate with token classes.
"""
idx = self.attrIndex(name)
if idx < 0:
self.attrPush([name, value])
else:
self.attrs[idx][1] = self.attrs[idx][1] + " " + value
def copy(self):
"""Return a shallow copy of the instance."""
return attr.evolve(self)
def as_dict(self, children=True, filter=None, dict_factory=dict):
"""Return the token as a dict.
:param bool children: Also convert children to dicts
:param filter: A callable whose return code determines whether an
attribute or element is included (``True``) or dropped (``False``). Is
called with the `attr.Attribute` as the first argument and the
value as the second argument.
:param dict_factory: A callable to produce dictionaries from. For
example, to produce ordered dictionaries instead of normal Python
dictionaries, pass in ``collections.OrderedDict``.
"""
return attr.asdict(
self, recurse=children, filter=filter, dict_factory=dict_factory
)
@classmethod
def from_dict(cls, dct):
token = cls(**dct)
if token.children:
token.children = [cls.from_dict(c) for c in token.children]
return token
@attr.s(slots=True)
class NestedTokens:
"""A class that closely resembles a Token,
but for a an opening/closing Token pair, and their containing children.
"""
opening: Token = attr.ib()
closing: Optional[Token] = attr.ib()
children: List[Union[Token, "NestedTokens"]] = attr.ib(factory=list)
def __getattr__(self, name):
return getattr(self.opening, name)
def attrGet(self, name: str) -> str:
""" Get the value of attribute `name`, or null if it does not exist."""
return self.opening.attrGet(name)
def nest_tokens(tokens: List[Token]) -> List[Union[Token, NestedTokens]]:
"""Convert the token stream to a list of tokens and nested tokens.
``NestedTokens`` contain the open and close tokens and a list of children
of all tokens in between (recursively nested)
"""
output = []
tokens = list(reversed(tokens))
while tokens:
token = tokens.pop()
if token.nesting == 0:
token = token.copy()
output.append(token)
if token.children:
token.children = nest_tokens(token.children)
continue
assert token.nesting == 1, token.nesting
nested_tokens = [token]
nesting = 1
while tokens and nesting != 0:
token = tokens.pop()
nested_tokens.append(token)
nesting += token.nesting
if nesting != 0:
raise ValueError(f"unclosed tokens starting {nested_tokens[0]}")
child = NestedTokens(nested_tokens[0], nested_tokens[-1])
output.append(child)
child.children = nest_tokens(nested_tokens[1:-1])
return output