src/inlines.c - third_party/swift-cmark - Git at Google

 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>

 #include "cmark_ctype.h"
 #include "config.h"
 #include "node.h"
 #include "parser.h"
 #include "references.h"
 #include "cmark.h"
 #include "houdini.h"
 #include "utf8.h"
 #include "scanners.h"
 #include "inlines.h"

 static const char *EMDASH = "\xE2\x80\x94";
 static const char *ENDASH = "\xE2\x80\x93";
 static const char *ELLIPSES = "\xE2\x80\xA6";
 static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
 static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
 static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
 static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";

 // Macros for creating various kinds of simple.
 #define make_str(s) make_literal(CMARK_NODE_TEXT, s)
 #define make_code(s) make_literal(CMARK_NODE_CODE, s)
 #define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s)
 #define make_linebreak() make_simple(CMARK_NODE_LINEBREAK)
 #define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK)
 #define make_emph() make_simple(CMARK_NODE_EMPH)
 #define make_strong() make_simple(CMARK_NODE_STRONG)

 typedef struct delimiter {
   struct delimiter *previous;
   struct delimiter *next;
   cmark_node *inl_text;
   bufsize_t position;
   unsigned char delim_char;
   bool can_open;
   bool can_close;
   bool active;
 } delimiter;

 typedef struct {
   cmark_chunk input;
   bufsize_t pos;
   cmark_reference_map *refmap;
   delimiter *last_delim;
 } subject;

 static inline bool S_is_line_end_char(char c) {
   return (c == '\n' || c == '\r');
 }

 static delimiter *S_insert_emph(subject *subj, delimiter *opener,
                                 delimiter *closer);

 static int parse_inline(subject *subj, cmark_node *parent, int options);

 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
 static bufsize_t subject_find_special_char(subject *subj, int options);

 // Create an inline with a literal string value.
 static inline cmark_node *make_literal(cmark_node_type t, cmark_chunk s) {
   cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
   if (e != NULL) {
     e->type = t;
     e->as.literal = s;
     e->next = NULL;
     e->prev = NULL;
     e->parent = NULL;
     e->first_child = NULL;
     e->last_child = NULL;
     // These fields aren't used for inlines:
     e->start_line = 0;
     e->start_column = 0;
     e->end_line = 0;
   }
   return e;
 }

 // Create an inline with no value.
 static inline cmark_node *make_simple(cmark_node_type t) {
   cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
   if (e != NULL) {
     e->type = t;
     e->next = NULL;
     e->prev = NULL;
     e->parent = NULL;
     e->first_child = NULL;
     e->last_child = NULL;
     // These fields aren't used for inlines:
     e->start_line = 0;
     e->start_column = 0;
     e->end_line = 0;
   }
   return e;
 }

 // Like make_str, but parses entities.
 static cmark_node *make_str_with_entities(cmark_chunk *content) {
   cmark_strbuf unescaped = GH_BUF_INIT;

   if (houdini_unescape_html(&unescaped, content->data, content->len)) {
     return make_str(cmark_chunk_buf_detach(&unescaped));
   } else {
     return make_str(*content);
   }
 }

 // Duplicate a chunk by creating a copy of the buffer not by reusing the
 // buffer like cmark_chunk_dup does.
 static cmark_chunk chunk_clone(cmark_chunk *src) {
   cmark_chunk c;
   bufsize_t len = src->len;

   c.len = len;
   c.data = (unsigned char *)malloc(len + 1);
   c.alloc = 1;
   memcpy(c.data, src->data, len);
   c.data[len] = '\0';

   return c;
 }

 static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) {
   cmark_strbuf buf = GH_BUF_INIT;

   cmark_chunk_trim(url);

   if (url->len == 0) {
     cmark_chunk result = CMARK_CHUNK_EMPTY;
     return result;
   }

   if (is_email)
     cmark_strbuf_puts(&buf, "mailto:");

   houdini_unescape_html_f(&buf, url->data, url->len);
   return cmark_chunk_buf_detach(&buf);
 }

 static inline cmark_node *make_autolink(cmark_chunk url, int is_email) {
   cmark_node *link = make_simple(CMARK_NODE_LINK);
   link->as.link.url = cmark_clean_autolink(&url, is_email);
   link->as.link.title = cmark_chunk_literal("");
   cmark_node_append_child(link, make_str_with_entities(&url));
   return link;
 }

 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap) {
   e->input.data = buffer->ptr;
   e->input.len = buffer->size;
   e->input.alloc = 0;
   e->pos = 0;
   e->refmap = refmap;
   e->last_delim = NULL;
 }

 static inline int isbacktick(int c) { return (c == '`'); }

 static inline unsigned char peek_char(subject *subj) {
   // NULL bytes should have been stripped out by now.  If they're
   // present, it's a programming error:
   assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
   return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }

 static inline unsigned char peek_at(subject *subj, bufsize_t pos) {
   return subj->input.data[pos];
 }

 // Return true if there are more characters in the subject.
 static inline int is_eof(subject *subj) {
   return (subj->pos >= subj->input.len);
 }

 // Advance the subject.  Doesn't check for eof.
 #define advance(subj) (subj)->pos += 1

 static inline bool skip_spaces(subject *subj) {
   bool skipped = false;
   while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
     advance(subj);
     skipped = true;
   }
   return skipped;
 }

 static inline bool skip_line_end(subject *subj) {
   bool seen_line_end_char = false;
   if (peek_char(subj) == '\r') {
     advance(subj);
     seen_line_end_char = true;
   }
   if (peek_char(subj) == '\n') {
     advance(subj);
     seen_line_end_char = true;
   }
   return seen_line_end_char || is_eof(subj);
 }

 // Take characters while a predicate holds, and return a string.
 static inline cmark_chunk take_while(subject *subj, int (*f)(int)) {
   unsigned char c;
   bufsize_t startpos = subj->pos;
   bufsize_t len = 0;

   while ((c = peek_char(subj)) && (*f)(c)) {
     advance(subj);
     len++;
   }

   return cmark_chunk_dup(&subj->input, startpos, len);
 }

 // Try to process a backtick code span that began with a
 // span of ticks of length openticklength length (already
 // parsed).  Return 0 if you don't find matching closing
 // backticks, otherwise return the position in the subject
 // after the closing backticks.
 static bufsize_t scan_to_closing_backticks(subject *subj,
                                            bufsize_t openticklength) {
   // read non backticks
   unsigned char c;
   while ((c = peek_char(subj)) && c != '`') {
     advance(subj);
   }
   if (is_eof(subj)) {
     return 0; // did not find closing ticks, return 0
   }
   bufsize_t numticks = 0;
   while (peek_char(subj) == '`') {
     advance(subj);
     numticks++;
   }
   if (numticks != openticklength) {
     return (scan_to_closing_backticks(subj, openticklength));
   }
   return (subj->pos);
 }

 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
 static cmark_node *handle_backticks(subject *subj) {
   cmark_chunk openticks = take_while(subj, isbacktick);
   bufsize_t startpos = subj->pos;
   bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);

   if (endpos == 0) {      // not found
     subj->pos = startpos; // rewind
     return make_str(openticks);
   } else {
     cmark_strbuf buf = GH_BUF_INIT;

     cmark_strbuf_set(&buf, subj->input.data + startpos,
                      endpos - startpos - openticks.len);
     cmark_strbuf_trim(&buf);
     cmark_strbuf_normalize_whitespace(&buf);

     return make_code(cmark_chunk_buf_detach(&buf));
   }
 }

 // Scan ***, **, or * and return number scanned, or 0.
 // Advances position.
 static int scan_delims(subject *subj, unsigned char c, bool *can_open,
                        bool *can_close) {
   int numdelims = 0;
   bufsize_t before_char_pos;
   int32_t after_char = 0;
   int32_t before_char = 0;
   int len;
   bool left_flanking, right_flanking;

   if (subj->pos == 0) {
     before_char = 10;
   } else {
     before_char_pos = subj->pos - 1;
     // walk back to the beginning of the UTF_8 sequence:
     while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
       before_char_pos -= 1;
     }
     len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
                            subj->pos - before_char_pos, &before_char);
     if (len == -1) {
       before_char = 10;
     }
   }

   if (c == '\'' || c == '"') {
     numdelims++;
     advance(subj); // limit to 1 delim for quotes
   } else {
     while (peek_char(subj) == c) {
       numdelims++;
       advance(subj);
     }
   }

   len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
                          subj->input.len - subj->pos, &after_char);
   if (len == -1) {
     after_char = 10;
   }
   left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
                   !(cmark_utf8proc_is_punctuation(after_char) &&
                     !cmark_utf8proc_is_space(before_char) &&
                     !cmark_utf8proc_is_punctuation(before_char));
   right_flanking =
       numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
       !(cmark_utf8proc_is_punctuation(before_char) &&
         !cmark_utf8proc_is_space(after_char) && !cmark_utf8proc_is_punctuation(after_char));
   if (c == '_') {
     *can_open = left_flanking &&
                 (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
     *can_close = right_flanking &&
                  (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
   } else if (c == '\'' || c == '"') {
     *can_open = left_flanking && !right_flanking;
     *can_close = right_flanking;
   } else {
     *can_open = left_flanking;
     *can_close = right_flanking;
   }
   return numdelims;
 }

 /*
 static void print_delimiters(subject *subj)
 {
         delimiter *delim;
         delim = subj->last_delim;
         while (delim != NULL) {
                 printf("Item at stack pos %p, text pos %d: %d %d %d next(%p)
 prev(%p)\n",
                        (void*)delim, delim->position, delim->delim_char,
                        delim->can_open, delim->can_close,
                        (void*)delim->next, (void*)delim->previous);
                 delim = delim->previous;
         }
 }
 */

 static void remove_delimiter(subject *subj, delimiter *delim) {
   if (delim == NULL)
     return;
   if (delim->next == NULL) {
     // end of list:
     assert(delim == subj->last_delim);
     subj->last_delim = delim->previous;
   } else {
     delim->next->previous = delim->previous;
   }
   if (delim->previous != NULL) {
     delim->previous->next = delim->next;
   }
   free(delim);
 }

 static void push_delimiter(subject *subj, unsigned char c, bool can_open,
                            bool can_close, cmark_node *inl_text) {
   delimiter *delim = (delimiter *)malloc(sizeof(delimiter));
   if (delim == NULL) {
     return;
   }
   delim->delim_char = c;
   delim->can_open = can_open;
   delim->can_close = can_close;
   delim->inl_text = inl_text;
   delim->previous = subj->last_delim;
   delim->next = NULL;
   if (delim->previous != NULL) {
     delim->previous->next = delim;
   }
   delim->position = subj->pos;
   delim->active = true;
   subj->last_delim = delim;
 }

 // Assumes the subject has a c at the current position.
 static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
   bufsize_t numdelims;
   cmark_node *inl_text;
   bool can_open, can_close;
   cmark_chunk contents;

   numdelims = scan_delims(subj, c, &can_open, &can_close);

   if (c == '\'' && smart) {
     contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
   } else if (c == '"' && smart) {
     contents =
         cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
   } else {
     contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
   }

   inl_text = make_str(contents);

   if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
     push_delimiter(subj, c, can_open, can_close, inl_text);
   }

   return inl_text;
 }

 // Assumes we have a hyphen at the current position.
 static cmark_node *handle_hyphen(subject *subj, bool smart) {
   int startpos = subj->pos;

   advance(subj);

   if (!smart || peek_char(subj) != '-') {
     return make_str(cmark_chunk_literal("-"));
   }

   while (smart && peek_char(subj) == '-') {
     advance(subj);
   }

   int numhyphens = subj->pos - startpos;
   int en_count = 0;
   int em_count = 0;
   int i;
   cmark_strbuf buf = GH_BUF_INIT;

   if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
     em_count = numhyphens / 3;
   } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
     en_count = numhyphens / 2;
   } else if (numhyphens % 3 == 2) { // use one en dash at end
     en_count = 1;
     em_count = (numhyphens - 2) / 3;
   } else { // use two en dashes at the end
     en_count = 2;
     em_count = (numhyphens - 4) / 3;
   }

   for (i = em_count; i > 0; i--) {
     cmark_strbuf_puts(&buf, EMDASH);
   }

   for (i = en_count; i > 0; i--) {
     cmark_strbuf_puts(&buf, ENDASH);
   }

   return make_str(cmark_chunk_buf_detach(&buf));
 }

 // Assumes we have a period at the current position.
 static cmark_node *handle_period(subject *subj, bool smart) {
   advance(subj);
   if (smart && peek_char(subj) == '.') {
     advance(subj);
     if (peek_char(subj) == '.') {
       advance(subj);
       return make_str(cmark_chunk_literal(ELLIPSES));
     } else {
       return make_str(cmark_chunk_literal(".."));
     }
   } else {
     return make_str(cmark_chunk_literal("."));
   }
 }

 static void process_emphasis(subject *subj, delimiter *stack_bottom) {
   delimiter *closer = subj->last_delim;
   delimiter *opener;
   delimiter *old_closer;
   bool opener_found;
   delimiter *openers_bottom[128];

   // initialize openers_bottom:
   openers_bottom['*'] = stack_bottom;
   openers_bottom['_'] = stack_bottom;
   openers_bottom['\''] = stack_bottom;
   openers_bottom['"'] = stack_bottom;

   // move back to first relevant delim.
   while (closer != NULL && closer->previous != stack_bottom) {
     closer = closer->previous;
   }

   // now move forward, looking for closers, and handling each
   while (closer != NULL) {
     if (closer->can_close &&
         (closer->delim_char == '*' || closer->delim_char == '_' ||
          closer->delim_char == '"' || closer->delim_char == '\'')) {
       // Now look backwards for first matching opener:
       opener = closer->previous;
       opener_found = false;
       while (opener != NULL && opener != stack_bottom &&
              opener != openers_bottom[closer->delim_char]) {
         if (opener->delim_char == closer->delim_char && opener->can_open) {
           opener_found = true;
           break;
         }
         opener = opener->previous;
       }
       old_closer = closer;
       if (closer->delim_char == '*' || closer->delim_char == '_') {
         if (opener_found) {
           closer = S_insert_emph(subj, opener, closer);
         } else {
           closer = closer->next;
         }
       } else if (closer->delim_char == '\'') {
         cmark_chunk_free(&closer->inl_text->as.literal);
         closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
         if (opener_found) {
           cmark_chunk_free(&opener->inl_text->as.literal);
           opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
         }
         closer = closer->next;
       } else if (closer->delim_char == '"') {
         cmark_chunk_free(&closer->inl_text->as.literal);
         closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
         if (opener_found) {
           cmark_chunk_free(&opener->inl_text->as.literal);
           opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
         }
         closer = closer->next;
       }
       if (!opener_found) {
         // set lower bound for future searches for openers:
         openers_bottom[old_closer->delim_char] = old_closer->previous;
         if (!old_closer->can_open) {
           // we can remove a closer that can't be an
           // opener, once we've seen there's no
           // matching opener:
           remove_delimiter(subj, old_closer);
         }
       }
     } else {
       closer = closer->next;
     }
   }
   // free all delimiters in list until stack_bottom:
   while (subj->last_delim != stack_bottom) {
     remove_delimiter(subj, subj->last_delim);
   }
 }

 static delimiter *S_insert_emph(subject *subj, delimiter *opener,
                                 delimiter *closer) {
   delimiter *delim, *tmp_delim;
   bufsize_t use_delims;
   cmark_node *opener_inl = opener->inl_text;
   cmark_node *closer_inl = closer->inl_text;
   bufsize_t opener_num_chars = opener_inl->as.literal.len;
   bufsize_t closer_num_chars = closer_inl->as.literal.len;
   cmark_node *tmp, *emph, *first_child, *last_child;

   // calculate the actual number of characters used from this closer
   if (closer_num_chars < 3 || opener_num_chars < 3) {
     use_delims = closer_num_chars <= opener_num_chars ? closer_num_chars
                                                       : opener_num_chars;
   } else { // closer and opener both have >= 3 characters
     use_delims = closer_num_chars % 2 == 0 ? 2 : 1;
   }

   // remove used characters from associated inlines.
   opener_num_chars -= use_delims;
   closer_num_chars -= use_delims;
   opener_inl->as.literal.len = opener_num_chars;
   closer_inl->as.literal.len = closer_num_chars;

   // free delimiters between opener and closer
   delim = closer->previous;
   while (delim != NULL && delim != opener) {
     tmp_delim = delim->previous;
     remove_delimiter(subj, delim);
     delim = tmp_delim;
   }

   first_child = opener_inl->next;
   last_child = closer_inl->prev;

   // if opener has 0 characters, remove it and its associated inline
   if (opener_num_chars == 0) {
     // replace empty opener inline with emph
     cmark_chunk_free(&(opener_inl->as.literal));
     emph = opener_inl;
     emph->type = use_delims == 1 ? CMARK_NODE_EMPH : CMARK_NODE_STRONG;
     // remove opener from list
     remove_delimiter(subj, opener);
   } else {
     // create new emph or strong, and splice it in to our inlines
     // between the opener and closer
     emph = use_delims == 1 ? make_emph() : make_strong();
     emph->parent = opener_inl->parent;
     emph->prev = opener_inl;
     opener_inl->next = emph;
   }

   // push children below emph
   emph->next = closer_inl;
   closer_inl->prev = emph;
   emph->first_child = first_child;
   emph->last_child = last_child;

   // fix children pointers
   first_child->prev = NULL;
   last_child->next = NULL;
   for (tmp = first_child; tmp != NULL; tmp = tmp->next) {
     tmp->parent = emph;
   }

   // if closer has 0 characters, remove it and its associated inline
   if (closer_num_chars == 0) {
     // remove empty closer inline
     cmark_node_free(closer_inl);
     // remove closer from list
     tmp_delim = closer->next;
     remove_delimiter(subj, closer);
     closer = tmp_delim;
   }

   return closer;
 }

 // Parse backslash-escape or just a backslash, returning an inline.
 static cmark_node *handle_backslash(subject *subj) {
   advance(subj);
   unsigned char nextchar = peek_char(subj);
   if (cmark_ispunct(
           nextchar)) { // only ascii symbols and newline can be escaped
     advance(subj);
     return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
   } else if (!is_eof(subj) && skip_line_end(subj)) {
     return make_linebreak();
   } else {
     return make_str(cmark_chunk_literal("\\"));
   }
 }

 // Parse an entity or a regular "&" string.
 // Assumes the subject has an '&' character at the current position.
 static cmark_node *handle_entity(subject *subj) {
   cmark_strbuf ent = GH_BUF_INIT;
   bufsize_t len;

   advance(subj);

   len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
                              subj->input.len - subj->pos);

   if (len == 0)
     return make_str(cmark_chunk_literal("&"));

   subj->pos += len;
   return make_str(cmark_chunk_buf_detach(&ent));
 }

 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
 cmark_chunk cmark_clean_url(cmark_chunk *url) {
   cmark_strbuf buf = GH_BUF_INIT;

   cmark_chunk_trim(url);

   if (url->len == 0) {
     cmark_chunk result = CMARK_CHUNK_EMPTY;
     return result;
   }

   if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
     houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
   } else {
     houdini_unescape_html_f(&buf, url->data, url->len);
   }

   cmark_strbuf_unescape(&buf);
   return cmark_chunk_buf_detach(&buf);
 }

 cmark_chunk cmark_clean_title(cmark_chunk *title) {
   cmark_strbuf buf = GH_BUF_INIT;
   unsigned char first, last;

   if (title->len == 0) {
     cmark_chunk result = CMARK_CHUNK_EMPTY;
     return result;
   }

   first = title->data[0];
   last = title->data[title->len - 1];

   // remove surrounding quotes if any:
   if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
       (first == '"' && last == '"')) {
     houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
   } else {
     houdini_unescape_html_f(&buf, title->data, title->len);
   }

   cmark_strbuf_unescape(&buf);
   return cmark_chunk_buf_detach(&buf);
 }

 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
 static cmark_node *handle_pointy_brace(subject *subj) {
   bufsize_t matchlen = 0;
   cmark_chunk contents;

   advance(subj); // advance past first <

   // first try to match a URL autolink
   matchlen = scan_autolink_uri(&subj->input, subj->pos);
   if (matchlen > 0) {
     contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
     subj->pos += matchlen;

     return make_autolink(contents, 0);
   }

   // next try to match an email autolink
   matchlen = scan_autolink_email(&subj->input, subj->pos);
   if (matchlen > 0) {
     contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
     subj->pos += matchlen;

     return make_autolink(contents, 1);
   }

   // finally, try to match an html tag
   matchlen = scan_html_tag(&subj->input, subj->pos);
   if (matchlen > 0) {
     contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
     subj->pos += matchlen;
     return make_raw_html(contents);
   }

   // if nothing matches, just return the opening <:
   return make_str(cmark_chunk_literal("<"));
 }

 // Parse a link label.  Returns 1 if successful.
 // Note:  unescaped brackets are not allowed in labels.
 // The label begins with `[` and ends with the first `]` character
 // encountered.  Backticks in labels do not start code spans.
 static int link_label(subject *subj, cmark_chunk *raw_label) {
   bufsize_t startpos = subj->pos;
   int length = 0;
   unsigned char c;

   // advance past [
   if (peek_char(subj) == '[') {
     advance(subj);
   } else {
     return 0;
   }

   while ((c = peek_char(subj)) && c != '[' && c != ']') {
     if (c == '\\') {
       advance(subj);
       length++;
       if (cmark_ispunct(peek_char(subj))) {
         advance(subj);
         length++;
       }
     } else {
       advance(subj);
       length++;
     }
     if (length > MAX_LINK_LABEL_LENGTH) {
       goto noMatch;
     }
   }

   if (c == ']') { // match found
     *raw_label =
         cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
     cmark_chunk_trim(raw_label);
     advance(subj); // advance past ]
     return 1;
   }

 noMatch:
   subj->pos = startpos; // rewind
   return 0;
 }

 // Return a link, an image, or a literal close bracket.
 static cmark_node *handle_close_bracket(subject *subj, cmark_node *parent) {
   bufsize_t initial_pos;
   bufsize_t starturl, endurl, starttitle, endtitle, endall;
   bufsize_t n;
   bufsize_t sps;
   cmark_reference *ref;
   bool is_image = false;
   cmark_chunk url_chunk, title_chunk;
   cmark_chunk url, title;
   delimiter *opener;
   cmark_node *link_text;
   cmark_node *inl;
   cmark_chunk raw_label;
   int found_label;

   advance(subj); // advance past ]
   initial_pos = subj->pos;

   // look through list of delimiters for a [ or !
   opener = subj->last_delim;
   while (opener) {
     if (opener->delim_char == '[' || opener->delim_char == '!') {
       break;
     }
     opener = opener->previous;
   }

   if (opener == NULL) {
     return make_str(cmark_chunk_literal("]"));
   }

   if (!opener->active) {
     // take delimiter off stack
     remove_delimiter(subj, opener);
     return make_str(cmark_chunk_literal("]"));
   }

   // If we got here, we matched a potential link/image text.
   is_image = opener->delim_char == '!';
   link_text = opener->inl_text->next;

   // Now we check to see if it's a link/image.

   // First, look for an inline link.
   if (peek_char(subj) == '(' &&
       ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
       ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {

     // try to parse an explicit link:
     starturl = subj->pos + 1 + sps; // after (
     endurl = starturl + n;
     starttitle = endurl + scan_spacechars(&subj->input, endurl);

     // ensure there are spaces btw url and title
     endtitle = (starttitle == endurl)
                    ? starttitle
                    : starttitle + scan_link_title(&subj->input, starttitle);

     endall = endtitle + scan_spacechars(&subj->input, endtitle);

     if (peek_at(subj, endall) == ')') {
       subj->pos = endall + 1;

       url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
       title_chunk =
           cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
       url = cmark_clean_url(&url_chunk);
       title = cmark_clean_title(&title_chunk);
       cmark_chunk_free(&url_chunk);
       cmark_chunk_free(&title_chunk);
       goto match;

     } else {
       goto noMatch;
     }
   }

   // Next, look for a following [link label] that matches in refmap.
   // skip spaces
   subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
   raw_label = cmark_chunk_literal("");
   found_label = link_label(subj, &raw_label);
   if (!found_label || raw_label.len == 0) {
     cmark_chunk_free(&raw_label);
     raw_label = cmark_chunk_dup(&subj->input, opener->position,
                                 initial_pos - opener->position - 1);
   }

   if (!found_label) {
     // If we have a shortcut reference link, back up
     // to before the spacse we skipped.
     subj->pos = initial_pos;
   }

   ref = cmark_reference_lookup(subj->refmap, &raw_label);
   cmark_chunk_free(&raw_label);

   if (ref != NULL) { // found
     url = chunk_clone(&ref->url);
     title = chunk_clone(&ref->title);
     goto match;
   } else {
     goto noMatch;
   }

 noMatch:
   // If we fall through to here, it means we didn't match a link:
   remove_delimiter(subj, opener); // remove this opener from delimiter list
   subj->pos = initial_pos;
   return make_str(cmark_chunk_literal("]"));

 match:
   inl = opener->inl_text;
   inl->type = is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK;
   cmark_chunk_free(&inl->as.literal);
   inl->first_child = link_text;
   process_emphasis(subj, opener);
   inl->as.link.url = url;
   inl->as.link.title = title;
   inl->next = NULL;
   if (link_text) {
     cmark_node *tmp;
     link_text->prev = NULL;
     for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
       tmp->parent = inl;
     }
     tmp->parent = inl;
     inl->last_child = tmp;
   }
   parent->last_child = inl;

   // Now, if we have a link, we also want to deactivate earlier link
   // delimiters. (This code can be removed if we decide to allow links
   // inside links.)
   remove_delimiter(subj, opener);
   if (!is_image) {
     opener = subj->last_delim;
     while (opener != NULL) {
       if (opener->delim_char == '[') {
         if (!opener->active) {
           break;
         } else {
           opener->active = false;
         }
       }
       opener = opener->previous;
     }
   }

   return NULL;
 }

 // Parse a hard or soft linebreak, returning an inline.
 // Assumes the subject has a cr or newline at the current position.
 static cmark_node *handle_newline(subject *subj) {
   bufsize_t nlpos = subj->pos;
   // skip over cr, crlf, or lf:
   if (peek_at(subj, subj->pos) == '\r') {
 	  advance(subj);
   }
   if (peek_at(subj, subj->pos) == '\n') {
 	  advance(subj);
   }
   // skip spaces at beginning of line
   skip_spaces(subj);
   if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
       peek_at(subj, nlpos - 2) == ' ') {
     return make_linebreak();
   } else {
     return make_softbreak();
   }
 }

 static bufsize_t subject_find_special_char(subject *subj, int options) {
   // "\r\n\\`&_*[]<!"
   static const int8_t SPECIAL_CHARS[256] = {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

   // " ' . -
   static const char SMART_PUNCT_CHARS[] = {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   };

   bufsize_t n = subj->pos + 1;

   while (n < subj->input.len) {
     if (SPECIAL_CHARS[subj->input.data[n]])
       return n;
     if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
       return n;
     n++;
   }

   return subj->input.len;
 }

 // Parse an inline, advancing subject, and add it as a child of parent.
 // Return 0 if no inline can be parsed, 1 otherwise.
 static int parse_inline(subject *subj, cmark_node *parent, int options) {
   cmark_node *new_inl = NULL;
   cmark_chunk contents;
   unsigned char c;
   bufsize_t endpos;
   c = peek_char(subj);
   if (c == 0) {
     return 0;
   }
   switch (c) {
   case '\r':
   case '\n':
     new_inl = handle_newline(subj);
     break;
   case '`':
     new_inl = handle_backticks(subj);
     break;
   case '\\':
     new_inl = handle_backslash(subj);
     break;
   case '&':
     new_inl = handle_entity(subj);
     break;
   case '<':
     new_inl = handle_pointy_brace(subj);
     break;
   case '*':
   case '_':
   case '\'':
   case '"':
     new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
     break;
   case '-':
     new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
     break;
   case '.':
     new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
     break;
   case '[':
     advance(subj);
     new_inl = make_str(cmark_chunk_literal("["));
     push_delimiter(subj, '[', true, false, new_inl);
     break;
   case ']':
     new_inl = handle_close_bracket(subj, parent);
     break;
   case '!':
     advance(subj);
     if (peek_char(subj) == '[') {
       advance(subj);
       new_inl = make_str(cmark_chunk_literal("!["));
       push_delimiter(subj, '!', false, true, new_inl);
     } else {
       new_inl = make_str(cmark_chunk_literal("!"));
     }
     break;
   default:
     endpos = subject_find_special_char(subj, options);
     contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
     subj->pos = endpos;

     // if we're at a newline, strip trailing spaces.
     if (S_is_line_end_char(peek_char(subj))) {
       cmark_chunk_rtrim(&contents);
     }

     new_inl = make_str(contents);
   }
   if (new_inl != NULL) {
     cmark_node_append_child(parent, new_inl);
   }

   return 1;
 }

 // Parse inlines from parent's string_content, adding as children of parent.
 extern void cmark_parse_inlines(cmark_node *parent, cmark_reference_map *refmap,
                                 int options) {
   subject subj;
   subject_from_buf(&subj, &parent->string_content, refmap);
   cmark_chunk_rtrim(&subj.input);

   while (!is_eof(&subj) && parse_inline(&subj, parent, options))
     ;

   process_emphasis(&subj, NULL);
 }

 // Parse zero or more space characters, including at most one newline.
 static void spnl(subject *subj) {
   skip_spaces(subj);
   if (skip_line_end(subj)) {
     skip_spaces(subj);
   }
 }

 // Parse reference.  Assumes string begins with '[' character.
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
 bufsize_t cmark_parse_reference_inline(cmark_strbuf *input,
                                        cmark_reference_map *refmap) {
   subject subj;

   cmark_chunk lab;
   cmark_chunk url;
   cmark_chunk title;

   bufsize_t matchlen = 0;
   bufsize_t beforetitle;

   subject_from_buf(&subj, input, NULL);

   // parse label:
   if (!link_label(&subj, &lab) || lab.len == 0)
     return 0;

   // colon:
   if (peek_char(&subj) == ':') {
     advance(&subj);
   } else {
     return 0;
   }

   // parse link url:
   spnl(&subj);
   matchlen = scan_link_url(&subj.input, subj.pos);
   if (matchlen) {
     url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
     subj.pos += matchlen;
   } else {
     return 0;
   }

   // parse optional link_title
   beforetitle = subj.pos;
   spnl(&subj);
   matchlen = scan_link_title(&subj.input, subj.pos);
   if (matchlen) {
     title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
     subj.pos += matchlen;
   } else {
     subj.pos = beforetitle;
     title = cmark_chunk_literal("");
   }

   // parse final spaces and newline:
   skip_spaces(&subj);
   if (!skip_line_end(&subj)) {
     if (matchlen) { // try rewinding before title
       subj.pos = beforetitle;
       skip_spaces(&subj);
       if (!skip_line_end(&subj)) {
         return 0;
       }
     } else {
       return 0;
     }
   }
   // insert reference into refmap
   cmark_reference_create(refmap, &lab, &url, &title);
   return subj.pos;
 }