blob: cbf99c1b48a35cc6c9f17d37bc566159be9dd98a [file] [log] [blame]
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#define safe_strlen(s) cmark_strbuf_safe_strlen(s)
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
// Functions to convert cmark_nodes to commonmark strings.
static inline void outc(cmark_renderer *renderer, cmark_escaping escape,
int32_t c, unsigned char nextc) {
bool needs_escaping = false;
char encoded[20];
needs_escaping =
escape != LITERAL &&
((escape == NORMAL &&
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '!' ||
(c == '&' && isalpha(nextc)) || (c == '!' && nextc == '[') ||
(renderer->begin_line && (c == '-' || c == '+' || c == '=')) ||
((c == '.' || c == ')') &&
isdigit(renderer->buffer->ptr[renderer->buffer->size - 1])))) ||
(escape == URL && (c == '`' || c == '<' || c == '>' || isspace(c) ||
c == '\\' || c == ')' || c == '(')) ||
(escape == TITLE &&
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
if (needs_escaping) {
if (isspace(c)) {
// use percent encoding for spaces
sprintf(encoded, "%%%2x", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += 3;
} else {
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
}
} else {
cmark_render_code_point(renderer, c);
}
}
static int longest_backtick_sequence(const char *code) {
int longest = 0;
int current = 0;
size_t i = 0;
size_t code_len = safe_strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > longest) {
longest = current;
}
current = 0;
}
i++;
}
return longest;
}
static int shortest_unused_backtick_sequence(const char *code) {
int32_t used = 1;
int current = 0;
size_t i = 0;
size_t code_len = safe_strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current) {
used |= (1 << current);
}
current = 0;
}
i++;
}
// return number of first bit that is 0:
i = 0;
while (used & 1) {
used = used >> 1;
i++;
}
return (int)i;
}
static bool is_autolink(cmark_node *node) {
cmark_chunk *title;
cmark_chunk *url;
cmark_node *link_text;
char *realurl;
int realurllen;
if (node->type != CMARK_NODE_LINK) {
return false;
}
url = &node->as.link.url;
if (url->len == 0 || scan_scheme(url, 0) == 0) {
return false;
}
title = &node->as.link.title;
// if it has a title, we can't treat it as an autolink:
if (title->len > 0) {
return false;
}
link_text = node->first_child;
cmark_consolidate_text_nodes(link_text);
realurl = (char *)url->data;
realurllen = url->len;
if (strncmp(realurl, "mailto:", 7) == 0) {
realurl += 7;
realurllen -= 7;
}
return (realurllen == link_text->as.literal.len &&
strncmp(realurl, (char *)link_text->as.literal.data,
link_text->as.literal.len) == 0);
}
// if node is a block node, returns node.
// otherwise returns first block-level node that is an ancestor of node.
static cmark_node *get_containing_block(cmark_node *node) {
while (node && (node->type < CMARK_NODE_FIRST_BLOCK ||
node->type > CMARK_NODE_LAST_BLOCK)) {
node = node->parent;
}
return node;
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *tmp;
int list_number;
cmark_delim_type list_delim;
int numticks;
int i;
bool entering = (ev_type == CMARK_EVENT_ENTER);
const char *info, *code, *title;
size_t info_len, code_len;
char listmarker[20];
char *emph_delim;
bufsize_t marker_width;
// Don't adjust tight list status til we've started the list.
// Otherwise we loose the blank line between a paragraph and
// a following list.
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
tmp = get_containing_block(node);
renderer->in_tight_list_item =
(tmp->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent)) ||
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent->parent));
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
LIT("> ");
cmark_strbuf_puts(renderer->prefix, "> ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
BLANKLINE();
}
break;
case CMARK_NODE_LIST:
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
node->next->type == CMARK_NODE_LIST)) {
// this ensures 2 blank lines after list,
// if before code block or list:
LIT("\n");
}
break;
case CMARK_NODE_ITEM:
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
marker_width = 2;
} else {
list_number = cmark_node_get_list_start(node->parent);
list_delim = cmark_node_get_list_delim(node->parent);
tmp = node;
while (tmp->prev) {
tmp = tmp->prev;
list_number += 1;
}
// we ensure a width of at least 4 so
// we get nice transition from single digits
// to double
sprintf(listmarker, "%d%s%s", list_number,
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
list_number < 10 ? " " : " ");
marker_width = safe_strlen(listmarker);
}
if (entering) {
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT("* ");
cmark_strbuf_puts(renderer->prefix, " ");
} else {
LIT(listmarker);
for (i = marker_width; i--;) {
cmark_strbuf_putc(renderer->prefix, ' ');
}
}
} else {
cmark_strbuf_truncate(renderer->prefix,
renderer->prefix->size - marker_width);
CR();
}
break;
case CMARK_NODE_HEADER:
if (entering) {
for (i = cmark_node_get_header_level(node); i > 0; i--) {
LIT("#");
}
LIT(" ");
renderer->no_wrap = true;
} else {
renderer->no_wrap = false;
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
BLANKLINE();
info = cmark_node_get_fence_info(node);
info_len = safe_strlen(info);
code = cmark_node_get_literal(node);
code_len = safe_strlen(code);
// use indented form if no info, and code doesn't
// begin or end with a blank line, and code isn't
// first thing in a list item
if (info_len == 0 &&
(code_len > 2 && !isspace(code[0]) &&
!(isspace(code[code_len - 1]) && isspace(code[code_len - 2]))) &&
!(node->prev == NULL && node->parent &&
node->parent->type == CMARK_NODE_ITEM)) {
LIT(" ");
cmark_strbuf_puts(renderer->prefix, " ");
OUT(cmark_node_get_literal(node), false, LITERAL);
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
} else {
numticks = longest_backtick_sequence(code) + 1;
if (numticks < 3) {
numticks = 3;
}
for (i = 0; i < numticks; i++) {
LIT("`");
}
LIT(" ");
OUT(info, false, LITERAL);
CR();
OUT(cmark_node_get_literal(node), false, LITERAL);
CR();
for (i = 0; i < numticks; i++) {
LIT("`");
}
}
BLANKLINE();
break;
case CMARK_NODE_HTML:
BLANKLINE();
OUT(cmark_node_get_literal(node), false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_HRULE:
BLANKLINE();
LIT("-----");
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), true, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
if (!(CMARK_OPT_HARDBREAKS & options)) {
LIT("\\");
}
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (renderer->width == 0 && !(CMARK_OPT_HARDBREAKS & options)) {
CR();
} else {
OUT(" ", true, LITERAL);
}
break;
case CMARK_NODE_CODE:
code = cmark_node_get_literal(node);
code_len = safe_strlen(code);
numticks = shortest_unused_backtick_sequence(code);
for (i = 0; i < numticks; i++) {
LIT("`");
}
if (code_len == 0 || code[0] == '`') {
LIT(" ");
}
OUT(cmark_node_get_literal(node), true, LITERAL);
if (code_len == 0 || code[code_len - 1] == '`') {
LIT(" ");
}
for (i = 0; i < numticks; i++) {
LIT("`");
}
break;
case CMARK_NODE_INLINE_HTML:
OUT(cmark_node_get_literal(node), false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("**");
} else {
LIT("**");
}
break;
case CMARK_NODE_EMPH:
// If we have EMPH(EMPH(x)), we need to use *_x_*
// because **x** is STRONG(x):
if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
node->next == NULL && node->prev == NULL) {
emph_delim = "_";
} else {
emph_delim = "*";
}
if (entering) {
LIT(emph_delim);
} else {
LIT(emph_delim);
}
break;
case CMARK_NODE_LINK:
if (is_autolink(node)) {
if (entering) {
LIT("<");
if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
LIT((const char *)cmark_node_get_url(node) + 7);
} else {
LIT((const char *)cmark_node_get_url(node));
}
LIT(">");
// return signal to skip contents of node...
return 0;
}
} else {
if (entering) {
LIT("[");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (safe_strlen(title) > 0) {
LIT(" \"");
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("![");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (safe_strlen(title) > 0) {
OUT(" \"", true, LITERAL);
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
if (options & CMARK_OPT_HARDBREAKS) {
// disable breaking on width, since it has
// a different meaning with OPT_HARDBREAKS
width = 0;
}
return cmark_render(root, options, width, outc, S_render_node);
}