| /* |
| * Copyright (c) 2011, Vicent Marti |
| * |
| * Permission to use, copy, modify, and distribute this software for any |
| * purpose with or without fee is hereby granted, provided that the above |
| * copyright notice and this permission notice appear in all copies. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| */ |
| |
| #include "buffer.h" |
| #include "html.h" |
| |
| #include <string.h> |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <ctype.h> |
| |
| #if defined(_WIN32) |
| #define snprintf _snprintf |
| #endif |
| |
| struct smartypants_data { |
| int in_squote; |
| int in_dquote; |
| }; |
| |
| static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); |
| |
| static size_t (*smartypants_cb_ptrs[]) |
| (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) = |
| { |
| NULL, /* 0 */ |
| smartypants_cb__dash, /* 1 */ |
| smartypants_cb__parens, /* 2 */ |
| smartypants_cb__squote, /* 3 */ |
| smartypants_cb__dquote, /* 4 */ |
| smartypants_cb__amp, /* 5 */ |
| smartypants_cb__period, /* 6 */ |
| smartypants_cb__number, /* 7 */ |
| smartypants_cb__ltag, /* 8 */ |
| smartypants_cb__backtick, /* 9 */ |
| smartypants_cb__escape, /* 10 */ |
| }; |
| |
| static const uint8_t smartypants_cb_chars[] = { |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0, |
| 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, |
| 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| }; |
| |
| static inline int |
| word_boundary(uint8_t c) |
| { |
| return c == 0 || isspace(c) || ispunct(c); |
| } |
| |
| static int |
| smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open) |
| { |
| char ent[8]; |
| |
| if (*is_open && !word_boundary(next_char)) |
| return 0; |
| |
| if (!(*is_open) && !word_boundary(previous_char)) |
| return 0; |
| |
| snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote); |
| *is_open = !(*is_open); |
| bufputs(ob, ent); |
| return 1; |
| } |
| |
| static size_t |
| smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size >= 2) { |
| uint8_t t1 = tolower(text[1]); |
| |
| if (t1 == '\'') { |
| if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) |
| return 1; |
| } |
| |
| if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && |
| (size == 3 || word_boundary(text[2]))) { |
| BUFPUTSL(ob, "’"); |
| return 0; |
| } |
| |
| if (size >= 3) { |
| uint8_t t2 = tolower(text[2]); |
| |
| if (((t1 == 'r' && t2 == 'e') || |
| (t1 == 'l' && t2 == 'l') || |
| (t1 == 'v' && t2 == 'e')) && |
| (size == 4 || word_boundary(text[3]))) { |
| BUFPUTSL(ob, "’"); |
| return 0; |
| } |
| } |
| } |
| |
| if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote)) |
| return 0; |
| |
| bufputc(ob, text[0]); |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size >= 3) { |
| uint8_t t1 = tolower(text[1]); |
| uint8_t t2 = tolower(text[2]); |
| |
| if (t1 == 'c' && t2 == ')') { |
| BUFPUTSL(ob, "©"); |
| return 2; |
| } |
| |
| if (t1 == 'r' && t2 == ')') { |
| BUFPUTSL(ob, "®"); |
| return 2; |
| } |
| |
| if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') { |
| BUFPUTSL(ob, "™"); |
| return 3; |
| } |
| } |
| |
| bufputc(ob, text[0]); |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size >= 3 && text[1] == '-' && text[2] == '-') { |
| BUFPUTSL(ob, "—"); |
| return 2; |
| } |
| |
| if (size >= 2 && text[1] == '-') { |
| BUFPUTSL(ob, "–"); |
| return 1; |
| } |
| |
| bufputc(ob, text[0]); |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size >= 6 && memcmp(text, """, 6) == 0) { |
| if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote)) |
| return 5; |
| } |
| |
| if (size >= 4 && memcmp(text, "�", 4) == 0) |
| return 3; |
| |
| bufputc(ob, '&'); |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size >= 3 && text[1] == '.' && text[2] == '.') { |
| BUFPUTSL(ob, "…"); |
| return 2; |
| } |
| |
| if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') { |
| BUFPUTSL(ob, "…"); |
| return 4; |
| } |
| |
| bufputc(ob, text[0]); |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size >= 2 && text[1] == '`') { |
| if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (word_boundary(previous_char) && size >= 3) { |
| if (text[0] == '1' && text[1] == '/' && text[2] == '2') { |
| if (size == 3 || word_boundary(text[3])) { |
| BUFPUTSL(ob, "½"); |
| return 2; |
| } |
| } |
| |
| if (text[0] == '1' && text[1] == '/' && text[2] == '4') { |
| if (size == 3 || word_boundary(text[3]) || |
| (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) { |
| BUFPUTSL(ob, "¼"); |
| return 2; |
| } |
| } |
| |
| if (text[0] == '3' && text[1] == '/' && text[2] == '4') { |
| if (size == 3 || word_boundary(text[3]) || |
| (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) { |
| BUFPUTSL(ob, "¾"); |
| return 2; |
| } |
| } |
| } |
| |
| bufputc(ob, text[0]); |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote)) |
| BUFPUTSL(ob, """); |
| |
| return 0; |
| } |
| |
| static size_t |
| smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| static const char *skip_tags[] = { |
| "pre", "code", "var", "samp", "kbd", "math", "script", "style" |
| }; |
| static const size_t skip_tags_count = 8; |
| |
| size_t tag, i = 0; |
| |
| while (i < size && text[i] != '>') |
| i++; |
| |
| for (tag = 0; tag < skip_tags_count; ++tag) { |
| if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN) |
| break; |
| } |
| |
| if (tag < skip_tags_count) { |
| for (;;) { |
| while (i < size && text[i] != '<') |
| i++; |
| |
| if (i == size) |
| break; |
| |
| if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE) |
| break; |
| |
| i++; |
| } |
| |
| while (i < size && text[i] != '>') |
| i++; |
| } |
| |
| bufput(ob, text, i + 1); |
| return i; |
| } |
| |
| static size_t |
| smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) |
| { |
| if (size < 2) |
| return 0; |
| |
| switch (text[1]) { |
| case '\\': |
| case '"': |
| case '\'': |
| case '.': |
| case '-': |
| case '`': |
| bufputc(ob, text[1]); |
| return 1; |
| |
| default: |
| bufputc(ob, '\\'); |
| return 0; |
| } |
| } |
| |
| #if 0 |
| static struct { |
| uint8_t c0; |
| const uint8_t *pattern; |
| const uint8_t *entity; |
| int skip; |
| } smartypants_subs[] = { |
| { '\'', "'s>", "’", 0 }, |
| { '\'', "'t>", "’", 0 }, |
| { '\'', "'re>", "’", 0 }, |
| { '\'', "'ll>", "’", 0 }, |
| { '\'', "'ve>", "’", 0 }, |
| { '\'', "'m>", "’", 0 }, |
| { '\'', "'d>", "’", 0 }, |
| { '-', "--", "—", 1 }, |
| { '-', "<->", "–", 0 }, |
| { '.', "...", "…", 2 }, |
| { '.', ". . .", "…", 4 }, |
| { '(', "(c)", "©", 2 }, |
| { '(', "(r)", "®", 2 }, |
| { '(', "(tm)", "™", 3 }, |
| { '3', "<3/4>", "¾", 2 }, |
| { '3', "<3/4ths>", "¾", 2 }, |
| { '1', "<1/2>", "½", 2 }, |
| { '1', "<1/4>", "¼", 2 }, |
| { '1', "<1/4th>", "¼", 2 }, |
| { '&', "�", 0, 3 }, |
| }; |
| #endif |
| |
| void |
| sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size) |
| { |
| size_t i; |
| struct smartypants_data smrt = {0, 0}; |
| |
| if (!text) |
| return; |
| |
| bufgrow(ob, size); |
| |
| for (i = 0; i < size; ++i) { |
| size_t org; |
| uint8_t action = 0; |
| |
| org = i; |
| while (i < size && (action = smartypants_cb_chars[text[i]]) == 0) |
| i++; |
| |
| if (i > org) |
| bufput(ob, text + org, i - org); |
| |
| if (i < size) { |
| i += smartypants_cb_ptrs[(int)action] |
| (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i); |
| } |
| } |
| } |
| |
| |