blob: 0acc6f11692008afe9cc71b6fd9d00b5f39bee8b [file] [log] [blame]
import re
from ..common.utils import arrayReplaceAt
from .state_core import StateCore
from ..token import Token
LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE)
LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE)
HTTP_RE = re.compile(r"^http://")
MAILTO_RE = re.compile(r"^mailto:")
TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
def isLinkOpen(string: str) -> bool:
return bool(LINK_OPEN_RE.search(string))
def isLinkClose(string: str) -> bool:
return bool(LINK_CLOSE_RE.search(string))
def linkify(state: StateCore) -> None:
blockTokens = state.tokens
if not state.md.options.linkify:
return
if not state.md.linkify:
raise ModuleNotFoundError("Linkify enabled but not installed.")
for j in range(len(blockTokens)):
if blockTokens[j].type != "inline" or not state.md.linkify.pretest(
blockTokens[j].content
):
continue
tokens = blockTokens[j].children
htmlLinkLevel = 0
# We scan from the end, to keep position when new tags added.
# Use reversed logic in links start/end match
assert tokens is not None
i = len(tokens)
while i >= 1:
i -= 1
assert isinstance(tokens, list)
currentToken = tokens[i]
# Skip content of markdown links
if currentToken.type == "link_close":
i -= 1
while (
tokens[i].level != currentToken.level
and tokens[i].type != "link_open"
):
i -= 1
continue
# Skip content of html tag links
if currentToken.type == "html_inline":
if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
htmlLinkLevel -= 1
if isLinkClose(currentToken.content):
htmlLinkLevel += 1
if htmlLinkLevel > 0:
continue
if currentToken.type == "text" and state.md.linkify.test(
currentToken.content
):
text = currentToken.content
links = state.md.linkify.match(text)
# Now split string to nodes
nodes = []
level = currentToken.level
lastPos = 0
for ln in range(len(links)):
url = links[ln].url
fullUrl = state.md.normalizeLink(url)
if not state.md.validateLink(fullUrl):
continue
urlText = links[ln].text
# Linkifier might send raw hostnames like "example.com", where url
# starts with domain name. So we prepend http:// in those cases,
# and remove it afterwards.
if not links[ln].schema:
urlText = HTTP_RE.sub(
"", state.md.normalizeLinkText("http://" + urlText)
)
elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search(
urlText
):
urlText = MAILTO_RE.sub(
"", state.md.normalizeLinkText("mailto:" + urlText)
)
else:
urlText = state.md.normalizeLinkText(urlText)
pos = links[ln].index
if pos > lastPos:
token = Token("text", "", 0)
token.content = text[lastPos:pos]
token.level = level
nodes.append(token)
token = Token("link_open", "a", 1)
token.attrs = {"href": fullUrl}
token.level = level
level += 1
token.markup = "linkify"
token.info = "auto"
nodes.append(token)
token = Token("text", "", 0)
token.content = urlText
token.level = level
nodes.append(token)
token = Token("link_close", "a", -1)
level -= 1
token.level = level
token.markup = "linkify"
token.info = "auto"
nodes.append(token)
lastPos = links[ln].last_index
if lastPos < len(text):
token = Token("text", "", 0)
token.content = text[lastPos:]
token.level = level
nodes.append(token)
blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes)