blob: b31227f4fae5256285deaf3c4b1c7ac7afeac9f7 [file] [log] [blame]
use crate::decor::InternalString;
use crate::parser::errors::CustomError;
use crate::parser::trivia::{newline, ws, ws_newlines};
use combine::error::{Commit, Info};
use combine::parser::char::char;
use combine::parser::range::{range, take, take_while};
use combine::stream::RangeStream;
use combine::*;
use std::char;
// ;; String
// string = ml-basic-string / basic-string / ml-literal-string / literal-string
parse!(string() -> InternalString, {
choice((
ml_basic_string(),
basic_string(),
ml_literal_string(),
literal_string().map(|s: &'a str| s.into()),
))
});
// basic-unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
#[inline]
fn is_basic_unescaped(c: char) -> bool {
matches!(c, '\u{20}'..='\u{21}' | '\u{23}'..='\u{5B}' | '\u{5D}'..='\u{10FFFF}')
}
// escaped = escape ( %x22 / ; " quotation mark U+0022
// %x5C / ; \ reverse solidus U+005C
// %x2F / ; / solidus U+002F
// %x62 / ; b backspace U+0008
// %x66 / ; f form feed U+000C
// %x6E / ; n line feed U+000A
// %x72 / ; r carriage return U+000D
// %x74 / ; t tab U+0009
// %x75 4HEXDIG / ; uXXXX U+XXXX
// %x55 8HEXDIG ) ; UXXXXXXXX U+XXXXXXXX
#[inline]
fn is_escape_char(c: char) -> bool {
matches!(
c,
'\\' | '"' | 'b' | '/' | 'f' | 'n' | 'r' | 't' | 'u' | 'U'
)
}
parse!(escape() -> char, {
satisfy(is_escape_char)
.message("While parsing escape sequence")
.then(|c| {
parser(move |input| {
match c {
'b' => Ok(('\u{8}', Commit::Peek(()))),
'f' => Ok(('\u{c}', Commit::Peek(()))),
'n' => Ok(('\n', Commit::Peek(()))),
'r' => Ok(('\r', Commit::Peek(()))),
't' => Ok(('\t', Commit::Peek(()))),
'u' => hexescape(4).parse_stream(input).into_result(),
'U' => hexescape(8).parse_stream(input).into_result(),
// ['\\', '"', '/']
_ => Ok((c, Commit::Peek(()))),
}
})
})
});
parse!(hexescape(n: usize) -> char, {
take(*n)
.and_then(|s| u32::from_str_radix(s, 16))
.and_then(|h| char::from_u32(h).ok_or(CustomError::InvalidHexEscape(h)))
});
// escape = %x5C ; \
const ESCAPE: char = '\\';
// basic-char = basic-unescaped / escaped
parse!(basic_char() -> char, {
satisfy(|c| is_basic_unescaped(c) || c == ESCAPE)
.then(|c| parser(move |input| {
match c {
ESCAPE => escape().parse_stream(input).into_result(),
_ => Ok((c, Commit::Peek(()))),
}
}))
});
// quotation-mark = %x22 ; "
const QUOTATION_MARK: char = '"';
// basic-string = quotation-mark *basic-char quotation-mark
parse!(basic_string() -> InternalString, {
between(char(QUOTATION_MARK), char(QUOTATION_MARK),
many(basic_char()))
.message("While parsing a Basic String")
});
// ;; Multiline Basic String
// ml-basic-unescaped = %x20-5B / %x5D-10FFFF
#[inline]
fn is_ml_basic_unescaped(c: char) -> bool {
matches!(c, '\u{20}'..='\u{5B}' | '\u{5D}'..='\u{10FFFF}')
}
// ml-basic-string-delim = 3quotation-mark
const ML_BASIC_STRING_DELIM: &str = "\"\"\"";
// ml-basic-char = ml-basic-unescaped / escaped
parse!(ml_basic_char() -> char, {
satisfy(|c| is_ml_basic_unescaped(c) || c == ESCAPE)
.then(|c| parser(move |input| {
match c {
ESCAPE => escape().parse_stream(input).into_result(),
_ => Ok((c, Commit::Peek(()))),
}
}))
});
// When the last non-whitespace character on a line is a \,
// it will be trimmed along with all whitespace
// (including newlines) up to the next non-whitespace
// character or closing delimiter.
parse!(try_eat_escaped_newline() -> (), {
skip_many(attempt((
char(ESCAPE),
ws(),
ws_newlines(),
)))
});
// ml-basic-body = *( ( escape ws-newline ) / ml-basic-char / newline )
parse!(ml_basic_body() -> InternalString, {
// A newline immediately following the opening delimiter will be trimmed.
optional(newline())
.skip(try_eat_escaped_newline())
.with(
many(
not_followed_by(range(ML_BASIC_STRING_DELIM).map(Info::Range))
.with(
choice((
// `TOML parsers should feel free to normalize newline
// to whatever makes sense for their platform.`
newline(),
ml_basic_char(),
))
)
.skip(try_eat_escaped_newline())
)
)
});
// ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim
parse!(ml_basic_string() -> InternalString, {
between(range(ML_BASIC_STRING_DELIM),
range(ML_BASIC_STRING_DELIM),
ml_basic_body())
.message("While parsing a Multiline Basic String")
});
// ;; Literal String
// apostrophe = %x27 ; ' apostrophe
const APOSTROPHE: char = '\'';
// literal-char = %x09 / %x20-26 / %x28-10FFFF
#[inline]
fn is_literal_char(c: char) -> bool {
matches!(c, '\u{09}' | '\u{20}'..='\u{26}' | '\u{28}'..='\u{10FFFF}')
}
// literal-string = apostrophe *literal-char apostrophe
parse!(literal_string() -> &'a str, {
between(char(APOSTROPHE), char(APOSTROPHE),
take_while(is_literal_char))
.message("While parsing a Literal String")
});
// ;; Multiline Literal String
// ml-literal-string-delim = 3apostrophe
const ML_LITERAL_STRING_DELIM: &str = "'''";
// ml-literal-char = %x09 / %x20-10FFFF
#[inline]
fn is_ml_literal_char(c: char) -> bool {
matches!(c, '\u{09}' | '\u{20}'..='\u{10FFFF}')
}
// ml-literal-body = *( ml-literal-char / newline )
parse!(ml_literal_body() -> InternalString, {
// A newline immediately following the opening delimiter will be trimmed.
optional(newline())
.with(
many(
not_followed_by(range(ML_LITERAL_STRING_DELIM).map(Info::Range))
.with(
choice((
// `TOML parsers should feel free to normalize newline
// to whatever makes sense for their platform.`
newline(),
satisfy(is_ml_literal_char),
))
)
)
)
});
// ml-literal-string = ml-literal-string-delim ml-literal-body ml-literal-string-delim
parse!(ml_literal_string() -> InternalString, {
between(range(ML_LITERAL_STRING_DELIM),
range(ML_LITERAL_STRING_DELIM),
ml_literal_body())
.message("While parsing a Multiline Literal String")
});