| """Convert straight quotation marks to typographic ones |
| """ |
| import re |
| from typing import List |
| |
| from .state_core import StateCore |
| from ..common.utils import charCodeAt |
| from ..common.utils import isWhiteSpace, isPunctChar, isMdAsciiPunct |
| from ..token import Token |
| |
| |
| QUOTE_TEST_RE = re.compile(r"['\"]") |
| QUOTE_RE = re.compile(r"['\"]") |
| APOSTROPHE = "\u2019" # ’ |
| |
| |
| def replaceAt(string: str, index: int, ch: str): |
| # When the index is negative, the behavior is different from the js version. |
| # But basically, the index will not be negative. |
| assert index >= 0 |
| return string[:index] + ch + string[index + 1 :] |
| |
| |
| def process_inlines(tokens: List[Token], state: StateCore): |
| stack = [] |
| |
| for i in range(len(tokens)): |
| token = tokens[i] |
| |
| thisLevel = token.level |
| |
| j = 0 |
| for j in range(len(stack))[::-1]: |
| if stack[j]["level"] <= thisLevel: |
| break |
| else: |
| # When the loop is terminated without a "break". |
| # Subtract 1 to get the same index as the js version. |
| j -= 1 |
| |
| stack = stack[: j + 1] |
| |
| if token.type != "text": |
| continue |
| |
| text = token.content |
| pos = 0 |
| maximum = len(text) |
| |
| while pos < maximum: |
| goto_outer = False |
| lastIndex = pos |
| t = QUOTE_RE.search(text[lastIndex:]) |
| if not t: |
| break |
| |
| canOpen = canClose = True |
| pos = t.start(0) + lastIndex + 1 |
| isSingle = t.group(0) == "'" |
| |
| # Find previous character, |
| # default to space if it's the beginning of the line |
| lastChar = 0x20 |
| |
| if t.start(0) + lastIndex - 1 >= 0: |
| lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) |
| else: |
| for j in range(i)[::-1]: |
| # lastChar defaults to 0x20 |
| if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": |
| break |
| # should skip all tokens except 'text', 'html_inline' or 'code_inline' |
| if not tokens[j].content: |
| continue |
| |
| lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1) |
| break |
| |
| # Find next character, |
| # default to space if it's the end of the line |
| nextChar = 0x20 |
| |
| if pos < maximum: |
| nextChar = charCodeAt(text, pos) |
| else: |
| for j in range(i + 1, len(tokens)): |
| # nextChar defaults to 0x20 |
| if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": |
| break |
| # should skip all tokens except 'text', 'html_inline' or 'code_inline' |
| if not tokens[j].content: |
| continue |
| |
| nextChar = charCodeAt(tokens[j].content, 0) |
| break |
| |
| isLastPunctChar = isMdAsciiPunct(chr(lastChar)) or isPunctChar( |
| chr(lastChar) |
| ) |
| isNextPunctChar = isMdAsciiPunct(chr(nextChar)) or isPunctChar( |
| chr(nextChar) |
| ) |
| |
| isLastWhiteSpace = isWhiteSpace(lastChar) |
| isNextWhiteSpace = isWhiteSpace(nextChar) |
| |
| if isNextWhiteSpace: |
| canOpen = False |
| elif isNextPunctChar: |
| if not (isLastWhiteSpace or isLastPunctChar): |
| canOpen = False |
| |
| if isLastWhiteSpace: |
| canClose = False |
| elif isLastPunctChar: |
| if not (isNextWhiteSpace or isNextPunctChar): |
| canClose = False |
| |
| if nextChar == 0x22 and t.group(0) == '"': # 0x22: " |
| if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 |
| # special case: 1"" - count first quote as an inch |
| canClose = canOpen = False |
| |
| if canOpen and canClose: |
| # Replace quotes in the middle of punctuation sequence, but not |
| # in the middle of the words, i.e.: |
| # |
| # 1. foo " bar " baz - not replaced |
| # 2. foo-"-bar-"-baz - replaced |
| # 3. foo"bar"baz - not replaced |
| canOpen = isLastPunctChar |
| canClose = isNextPunctChar |
| |
| if not canOpen and not canClose: |
| # middle of word |
| if isSingle: |
| token.content = replaceAt( |
| token.content, t.start(0) + lastIndex, APOSTROPHE |
| ) |
| continue |
| |
| if canClose: |
| # this could be a closing quote, rewind the stack to get a match |
| for j in range(len(stack))[::-1]: |
| item = stack[j] |
| if stack[j]["level"] < thisLevel: |
| break |
| if item["single"] == isSingle and stack[j]["level"] == thisLevel: |
| item = stack[j] |
| |
| if isSingle: |
| openQuote = state.md.options.quotes[2] |
| closeQuote = state.md.options.quotes[3] |
| else: |
| openQuote = state.md.options.quotes[0] |
| closeQuote = state.md.options.quotes[1] |
| |
| # replace token.content *before* tokens[item.token].content, |
| # because, if they are pointing at the same token, replaceAt |
| # could mess up indices when quote length != 1 |
| token.content = replaceAt( |
| token.content, t.start(0) + lastIndex, closeQuote |
| ) |
| tokens[item["token"]].content = replaceAt( |
| tokens[item["token"]].content, item["pos"], openQuote |
| ) |
| |
| pos += len(closeQuote) - 1 |
| if item["token"] == i: |
| pos += len(openQuote) - 1 |
| |
| text = token.content |
| maximum = len(text) |
| |
| stack = stack[:j] |
| goto_outer = True |
| break |
| if goto_outer: |
| goto_outer = False |
| continue |
| |
| if canOpen: |
| stack.append( |
| { |
| "token": i, |
| "pos": t.start(0) + lastIndex, |
| "single": isSingle, |
| "level": thisLevel, |
| } |
| ) |
| elif canClose and isSingle: |
| token.content = replaceAt( |
| token.content, t.start(0) + lastIndex, APOSTROPHE |
| ) |
| |
| |
| def smartquotes(state: StateCore): |
| if not state.md.options.typographer: |
| return |
| |
| for token in state.tokens: |
| |
| if token.type != "inline" or not QUOTE_RE.search(token.content): |
| continue |
| |
| process_inlines(token.children, state) |