blob: b1c7bdbc7485469644f17d058b398d3a7d806185 [file] [log] [blame]
from typing import List
from ..token import Token
from ..ruler import StateBase
from ..common.utils import isSpace
class StateBlock(StateBase):
def __init__(
self, src: str, md, env, tokens: List[Token], srcCharCode: List[int] = None
):
self.src = src
if srcCharCode is not None:
self.srcCharCode = srcCharCode
else:
self.srcCharCode = [ord(c) for c in src] if src is not None else []
# link to parser instance
self.md = md
self.env = env
#
# Internal state variables
#
self.tokens = tokens
self.bMarks = [] # line begin offsets for fast jumps
self.eMarks = [] # line end offsets for fast jumps
# offsets of the first non-space characters (tabs not expanded)
self.tShift = []
self.sCount = [] # indents for each line (tabs expanded)
# An amount of virtual spaces (tabs expanded) between beginning
# of each line (bMarks) and real beginning of that line.
#
# It exists only as a hack because blockquotes override bMarks
# losing information in the process.
#
# It's used only when expanding tabs, you can think about it as
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
# means first tab should be expanded to 4-21%4 === 3 spaces.
#
self.bsCount = []
# block parser variables
self.blkIndent = 0 # required block content indent (for example, if we are
# inside a list, it would be positioned after list marker)
self.line = 0 # line index in src
self.lineMax = 0 # lines count
self.tight = False # loose/tight mode for lists
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
# used in lists to determine if they interrupt a paragraph
self.parentType = "root"
self.level = 0
# renderer
self.result = ""
# Create caches
# Generate markers.
indent_found = False
start = pos = indent = offset = 0
length = len(self.src)
for pos, character in enumerate(self.srcCharCode):
if not indent_found:
if isSpace(character):
indent += 1
if character == 0x09:
offset += 4 - offset % 4
else:
offset += 1
continue
else:
indent_found = True
if character == 0x0A or pos == length - 1:
if character != 0x0A:
pos += 1
self.bMarks.append(start)
self.eMarks.append(pos)
self.tShift.append(indent)
self.sCount.append(offset)
self.bsCount.append(0)
indent_found = False
indent = 0
offset = 0
start = pos + 1
# Push fake entry to simplify cache bounds checks
self.bMarks.append(length)
self.eMarks.append(length)
self.tShift.append(0)
self.sCount.append(0)
self.bsCount.append(0)
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
def __repr__(self):
return (
f"{self.__class__.__name__}"
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
)
def push(self, ttype, tag, nesting):
"""Push new token to "stream"."""
token = Token(ttype, tag, nesting)
token.block = True
if nesting < 0:
self.level -= 1 # closing tag
token.level = self.level
if nesting > 0:
self.level += 1 # opening tag
self.tokens.append(token)
return token
def isEmpty(self, line):
"""."""
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
def skipEmptyLines(self, from_pos):
"""."""
while from_pos < self.lineMax:
try:
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
from_pos
]:
break
except IndexError:
from_pos += 1
break
from_pos += 1
return from_pos
def skipSpaces(self, pos: int):
"""Skip spaces from given position."""
while pos < len(self.src):
if not isSpace(self.srcCharCode[pos]):
break
pos += 1
return pos
def skipSpacesBack(self, pos: int, minimum: int):
"""Skip spaces from given position in reverse."""
if pos <= minimum:
return pos
while pos > minimum:
if not isSpace(self.srcCharCode[pos]):
return pos + 1
pos -= 1
return pos
def skipChars(self, pos: int, code: int):
"""Skip char codes from given position."""
while pos < len(self.src):
if self.srcCharCode[pos] != code:
break
pos += 1
return pos
def skipCharsBack(self, pos, code, minimum):
"""Skip char codes reverse from given position - 1."""
if pos <= minimum:
return pos
while pos > minimum:
if code != self.srcCharCode[pos]:
return pos + 1
pos -= 1
return pos
def getLines(self, begin: int, end: int, indent, keepLastLF):
"""Cut lines range from source."""
line = begin
if begin >= end:
return ""
queue = [""] * (end - begin)
i = 1
while line < end:
lineIndent = 0
lineStart = first = self.bMarks[line]
if line + 1 < end or keepLastLF:
last = self.eMarks[line] + 1
else:
last = self.eMarks[line]
while (first < last) and (lineIndent < indent):
ch = self.srcCharCode[first]
if isSpace(ch):
if ch == 0x09:
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
else:
lineIndent += 1
elif first - lineStart < self.tShift[line]:
lineIndent += 1
else:
break
first += 1
if lineIndent > indent:
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
# with indent=2 becomes ' \tfoobar'
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
else:
queue[i - 1] = self.src[first:last]
line += 1
i += 1
return "".join(queue)