blob: 9f158e60813fa19fee72c4526b234900ffb050e3 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! Defines the lexer for the Documentation Compiler.
//! It extracts all the lexical items (with their location) from a source text (Documentation).
use crate::source::Location;
use crate::source::Source;
use crate::DocCompiler;
use std::rc::Rc;
use std::str::CharIndices;
/// Defines all the lexical items we can parse.
#[derive(Clone)]
pub enum LexicalContent {
/// A number. For example:
/// - 1234
/// - 0xabcd
Number(String),
/// A name from an English text point of view. For example:
/// - Hello
/// - heap-specific
/// - doesn't
Name(String),
/// A reference to an existing applicative concept like a method name, a field name, ...
/// A reference is held between back quotes.
Reference(String),
/// A block of text between two triple back quotes.
CodeBlock(String),
/// A text between single quotes. The left quote must be after a white space or at the
/// beginning of the text. The text inside the quotes can't start with a space (in that
/// particular case, it's a stand alone single quote).
SingleQuoteString(String),
/// A text between double quotes. The text inside the quotes can't start with a space (in
/// that particular case, it's a stand alone double quote).
DoubleQuoteString(String),
/// A standalone single quote.
SingleQuote,
/// A standalone double quote.
DoubleQuote,
/// A comma.
Comma,
/// A semicolon.
Semicolon,
/// The plus character.
Plus,
/// The minus character.
Minus,
/// The asterisk character.
Asterisk,
/// The slash character (not immediately following a name).
Slash,
/// The percent character.
Percent,
/// The backslash character.
BackSlash,
/// The ampersand character.
Ampersand,
/// The hash (number sign) character.
Hash,
/// Two hash characters in a row.
HashHash,
/// The pipe character.
Pipe,
/// The tilde character.
Tilde,
/// The caret character.
Caret,
/// The dollar character.
Dollar,
/// The at sign character.
AtSign,
/// The unicode paragraph character.
Paragraph,
/// The equal character.
Equal,
/// Two equal characters in a row.
EqualEqual,
/// The left angle bracket.
LowerThan,
// The left angle bracket followed by an equal.
LowerOrEqual,
// The right angle bracket.
GreaterThan,
// The right angle bracket followed by an equal.
GreaterOrEqual,
/// A left parenthesis.
LeftParenthesis,
/// A Right parenthesis.
RightParenthesis,
/// A left bracket (square).
LeftBracket,
/// A Right bracket (square).
RightBracket,
/// A left brace (curly bracket).
LeftBrace,
/// A Right brace (curly bracket).
RightBrace,
/// A standalone unicode symbol like: ⮬ or ⮯.
UnicodeCharacter(char),
/// The end of an english sentence.
/// For example ".", ":", "!", "?".
EndOfSentence(char),
/// Some consecutive blank spaces.
Spaces(u32),
/// Some consecutive new lines.
NewLines(u32),
/// The end of the documentation. The parser generates one and only one EndOfInput.
/// This is always the last item in the list.
EndOfInput,
}
/// Defines a lexical item (item + location in the source file).
#[derive(Clone)]
pub struct LexicalItem {
pub location: Location,
pub content: LexicalContent,
}
/// Parses some documentation and extract all the lexical items found in the documentation.
///
/// Returns the reduced items.
pub fn reduce_lexems(compiler: &mut DocCompiler, source: &Rc<Source>) -> Option<Vec<LexicalItem>> {
let mut items: Vec<LexicalItem> = Vec::new();
let mut ok = true;
let mut iter = source.text.char_indices();
let mut current = iter.next();
while let Some((index, character)) = current {
current = match character {
'0'..='9' => reduce_number_or_name(&mut items, &source, index, character, &mut iter),
'a'..='z' | 'A'..='Z' | '_' => reduce_name(&mut items, &source, index, &mut iter),
'`' => reduce_back_quote(compiler, &mut items, &source, &mut iter),
'\'' => {
reduce_single_quote_string(compiler, &mut items, &source, index, &mut iter, &mut ok)
}
'"' => {
reduce_double_quote_string(compiler, &mut items, &source, index, &mut iter, &mut ok)
}
',' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Comma,
),
';' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Semicolon,
),
'+' => {
reduce_single_character(&mut items, &source, index, &mut iter, LexicalContent::Plus)
}
'-' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Minus,
),
'*' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Asterisk,
),
'/' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Slash,
),
'%' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Percent,
),
'\\' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::BackSlash,
),
'&' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Ampersand,
),
'#' => reduce_one_or_two_characters(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Hash,
'#',
LexicalContent::HashHash,
),
'|' => {
reduce_single_character(&mut items, &source, index, &mut iter, LexicalContent::Pipe)
}
'~' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Tilde,
),
'^' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Caret,
),
'$' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Dollar,
),
'@' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::AtSign,
),
'§' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Paragraph,
),
'=' => reduce_one_or_two_characters(
&mut items,
&source,
index,
&mut iter,
LexicalContent::Equal,
'=',
LexicalContent::EqualEqual,
),
'<' => reduce_one_or_two_characters(
&mut items,
&source,
index,
&mut iter,
LexicalContent::LowerThan,
'=',
LexicalContent::LowerOrEqual,
),
'>' => reduce_one_or_two_characters(
&mut items,
&source,
index,
&mut iter,
LexicalContent::GreaterThan,
'=',
LexicalContent::GreaterOrEqual,
),
'(' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::LeftParenthesis,
),
')' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::RightParenthesis,
),
'[' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::LeftBracket,
),
']' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::RightBracket,
),
'{' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::LeftBrace,
),
'}' => reduce_single_character(
&mut items,
&source,
index,
&mut iter,
LexicalContent::RightBrace,
),
'⮬' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮯' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮫' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮨' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮭' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮮' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮪' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⮩' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'↵' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
'⌘' => reduce_unicode_character(&mut items, &source, index, &mut iter, character),
// Reduces a sentence end character.
'.' | ':' | '!' | '?' => {
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start: index, end: index + 1 },
content: LexicalContent::EndOfSentence(character),
});
iter.next()
}
' ' => reduce_spaces(&mut items, &source, index, &mut iter),
'\n' => reduce_new_lines(&mut items, &source, index, &mut iter),
// Unknown character.
_ => {
ok = false;
compiler.add_error(
&Location { source: Rc::clone(&source), start: index, end: index },
format!("Unknown character <{}>", character),
);
iter.next()
}
}
}
// We reached the end of the text. We add a EndOfInput item. This way, the next stage won't
// need to check if we are at the end of the vector when trying to reduce something.
items.push(LexicalItem {
location: Location {
source: Rc::clone(&source),
start: source.text.len(),
end: source.text.len(),
},
content: LexicalContent::EndOfInput,
});
if ok {
Some(items)
} else {
None
}
}
/// Reduces a number.
///
/// For example:
/// - 1234
/// - 0xabcd
///
/// If we find characters which are not valid for a number but valid for a name then, the whole
/// sequence of characters (including the leading digits) are used to return a Name.
///
/// For example:
/// - 123abc
fn reduce_number_or_name(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
first_character: char,
iter: &mut CharIndices<'_>,
) -> Option<(usize, char)> {
let mut hexadecimal = false;
let mut number = true;
let mut current: Option<(usize, char)> = iter.next();
if first_character == '0' {
// Checks for an hexadecimal number.
if let Some((_, character)) = current {
if character == 'x' || character == 'X' {
hexadecimal = true;
current = iter.next();
}
}
}
let end = loop {
match current {
Some((index, character)) => match character {
'0'..='9' => {}
'a'..='f' | 'A'..='F' => {
if !hexadecimal {
number = false;
}
}
'g'..='z' | 'G'..='Z' | '_' | '-' | '\'' => number = false,
_ => {
break index;
}
},
None => {
break source.text.len();
}
}
current = iter.next();
};
if number {
// Only digits (potentially hexadecimal) have been found => Number.
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end },
content: LexicalContent::Number(source.text[start..end].to_string()),
});
} else {
// Not a valid number => Name.
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end },
content: LexicalContent::Name(source.text[start..end].to_string()),
});
}
current
}
/// Reduces a name.
///
/// A name is a valid name from the English language point of view.
///
/// Examples of valid names:
/// - Hello
/// - heap-specific
/// - doesn't
fn reduce_name(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
) -> Option<(usize, char)> {
let mut current: Option<(usize, char)>;
let end = loop {
current = iter.next();
match current {
Some((index, character)) => match character {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => {}
_ => {
break index;
}
},
None => {
break source.text.len();
}
}
};
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end },
content: LexicalContent::Name(source.text[start..end].to_string()),
});
current
}
/// Reduces an item which starts with a back quote.
///
/// It can be a reference (text between two back quotes).
/// It can be a code block (text between two triple back quotes).
fn reduce_back_quote(
compiler: &mut DocCompiler,
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
iter: &mut CharIndices<'_>,
) -> Option<(usize, char)> {
let mut current = iter.next();
let start = if let Some((index, _)) = current { index } else { 0 };
// A fist back quote has already been reduced. Look for a second one.
if let Some((_, character)) = current {
if character == '`' {
current = iter.next();
// Two back quotes have already been reduced. Look for a third one.
let end = if let Some((index, character)) = current {
if character == '`' {
// Three back quotes have been found. That means this is a code block.
return reduce_code_block(compiler, items, source, iter);
}
index
} else {
source.text.len()
};
// Only two back quotes have been found. It would be an empty reference. That means an
// error.
compiler.add_error(
&Location { source: Rc::clone(&source), start, end },
"Empty reference".to_owned(),
);
return current;
}
}
loop {
match current {
Some((index, character)) => match character {
'`' => {
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: index },
content: LexicalContent::Reference(source.text[start..index].to_string()),
});
current = iter.next();
break;
}
_ => {}
},
None => {
compiler.add_error(
&Location { source: Rc::clone(&source), start, end: source.text.len() },
"Unterminated reference".to_owned(),
);
break;
}
}
current = iter.next();
}
current
}
/// Reduces a block of text within two triple back quotes.
///
/// The opening triple back quotes have already been reduced.
fn reduce_code_block(
compiler: &mut DocCompiler,
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
iter: &mut CharIndices<'_>,
) -> Option<(usize, char)> {
let mut current = iter.next();
let start = if let Some((index, _)) = current { index } else { 0 };
// Skip the characters between the opening three back quotes and the end of the line (these
// characters define the code syntax to use).
loop {
match current {
Some((_, character)) => {
if character == '\n' {
current = iter.next();
break;
}
}
None => {
compiler.add_error(
&Location { source: Rc::clone(&source), start, end: source.text.len() },
"Unterminated code block".to_owned(),
);
return current;
}
}
current = iter.next();
}
// Reduce the code block. The end of the block is reached when three back quotes followed by a
// new line are found at the beginning of a line.
// If three back quote are found at the beginning of a line without a following new line, this
// is an error.
let start_block = if let Some((index, _)) = current { index } else { 0 };
loop {
match current {
Some((_, character)) => {
if character != '\n' {
current = iter.next();
continue;
}
// A new line is found.
current = iter.next();
if current.is_none() {
continue;
}
let (end_block, first_quote) = current.unwrap();
if first_quote != '`' {
continue;
}
// A fist back quote is found.
current = iter.next();
if current.is_none() {
continue;
}
let (_, second_quote) = current.unwrap();
if second_quote != '`' {
continue;
}
// A second back quote is found.
current = iter.next();
if current.is_none() {
continue;
}
let (_, third_quote) = current.unwrap();
if third_quote != '`' {
continue;
}
// A third back quote is found.
current = iter.next();
let new_line_pos = if let Some((index, ending_new_line)) = current {
if ending_new_line == '\n' {
// A following new line is found. We have reduced a code block.
items.push(LexicalItem {
location: Location {
source: Rc::clone(&source),
start: start_block,
end: end_block,
},
content: LexicalContent::CodeBlock(
source.text[start_block..end_block].to_string(),
),
});
return iter.next();
}
index
} else {
source.text.len()
};
// The three back quote are not followed by a new line. This is an error.
compiler.add_error(
&Location {
source: Rc::clone(&source),
start: new_line_pos,
end: new_line_pos,
},
"New line expected to end the code block".to_owned(),
);
return current;
}
None => {
compiler.add_error(
&Location { source: Rc::clone(&source), start, end: source.text.len() },
"Unterminated code block".to_owned(),
);
break;
}
}
}
current
}
/// Reduces a string between single quotes, a single quote or a name.
///
/// If the first single quote is not after a white space or at the beginning of the text then,
/// a Name is returned.
/// For example, with the input: `a_reference`'s
/// This function is called to reduce 's and the result is Name("'s").
///
/// If the first single quote is followed by a space, the result is SingleQuote.
/// For example, the following text is valid: A ' is valid.
fn reduce_single_quote_string(
compiler: &mut DocCompiler,
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
ok: &mut bool,
) -> Option<(usize, char)> {
// If the quote is not at the beginning of the line and not preceded by a white space then
// reduce a Name.
if let Some(last) = items.last() {
match last.content {
LexicalContent::Spaces(_) | LexicalContent::NewLines(_) => {}
_ => return reduce_name(items, source, start, iter),
}
}
// If the quote is immediately followed by a space then only reduce a SingleQuote.
let mut current = iter.next();
let text_start = if let Some((index, character)) = current {
if character == ' ' || character == '\n' {
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: start },
content: LexicalContent::SingleQuote,
});
return current;
}
index
} else {
0
};
// Reduces a string.
loop {
match current {
Some((index, character)) => match character {
'\'' => {
current = iter.next();
let end =
if let Some((index, _)) = current { index } else { source.text.len() };
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: end },
content: LexicalContent::SingleQuoteString(
source.text[text_start..index].to_string(),
),
});
return current;
}
_ => {}
},
None => {
break;
}
}
current = iter.next();
}
*ok = false;
compiler.add_error(
&Location { source: Rc::clone(&source), start, end: source.text.len() },
"Unterminated string (character <'> expected).".to_owned(),
);
current
}
/// Reduces a string between double quotes or a double quote.
///
/// If the first double quote is followed by a space, the result is DoubleQuote.
/// For example, the following text is valid: A " is valid.
fn reduce_double_quote_string(
compiler: &mut DocCompiler,
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
ok: &mut bool,
) -> Option<(usize, char)> {
let mut current = iter.next();
// If the quote is immediately followed by a space then only reduce a DoubleQuote.
let text_start = if let Some((index, character)) = current {
if character == ' ' || character == '\n' {
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: start },
content: LexicalContent::DoubleQuote,
});
return current;
}
index
} else {
0
};
// Reduces a string.
loop {
match current {
Some((index, character)) => match character {
'"' => {
current = iter.next();
let end =
if let Some((index, _)) = current { index } else { source.text.len() };
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: end },
content: LexicalContent::DoubleQuoteString(
source.text[text_start..index].to_string(),
),
});
return current;
}
_ => {}
},
None => {
break;
}
}
current = iter.next();
}
*ok = false;
compiler.add_error(
&Location { source: Rc::clone(&source), start, end: source.text.len() },
"Unterminated string (character <\"> expected).".to_owned(),
);
current
}
/// Reduce a single character.
///
/// The caller already found what character it is. Only adds the content to items and returns the
/// next character to reduce.
fn reduce_single_character(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
content: LexicalContent,
) -> Option<(usize, char)> {
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: start },
content,
});
iter.next()
}
/// Reduce one or two characters.
///
/// Add either one_character_content or two_character_content to items based on whether the next
/// character matches second_character.
fn reduce_one_or_two_characters(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
one_character_content: LexicalContent,
second_character: char,
two_character_content: LexicalContent,
) -> Option<(usize, char)> {
let current = iter.next();
if let Some((_, character)) = current {
if character == second_character {
let current = iter.next();
let end = if let Some((index, _)) = current { index } else { source.text.len() };
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end },
content: two_character_content,
});
return current;
}
}
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: start },
content: one_character_content,
});
current
}
/// Reduce a single unicode character.
fn reduce_unicode_character(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
character: char,
) -> Option<(usize, char)> {
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end: start },
content: LexicalContent::UnicodeCharacter(character),
});
iter.next()
}
/// Reduces spaces (at least one).
fn reduce_spaces(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
) -> Option<(usize, char)> {
// Counts the number of blank spaces found.
let mut count = 1;
let mut current: Option<(usize, char)>;
let end = loop {
current = iter.next();
match current {
Some((index, character)) => match character {
' ' => count += 1,
_ => {
break index;
}
},
None => {
break source.text.len();
}
}
};
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end },
content: LexicalContent::Spaces(count),
});
current
}
/// Reduces new lines (at least one).
///
/// Consecutives new lines are grouped together.
/// A count greater than 1 means one new line followed by (count - 1) blank lines.
fn reduce_new_lines(
items: &mut Vec<LexicalItem>,
source: &Rc<Source>,
start: usize,
iter: &mut CharIndices<'_>,
) -> Option<(usize, char)> {
// Counts the number of new lines found.
let mut count = 1;
let mut current: Option<(usize, char)>;
let end = loop {
current = iter.next();
match current {
Some((index, character)) => match character {
'\n' => count += 1,
_ => {
break index;
}
},
None => {
break source.text.len();
}
}
};
items.push(LexicalItem {
location: Location { source: Rc::clone(&source), start, end },
content: LexicalContent::NewLines(count),
});
current
}
#[cfg(test)]
mod test {
use crate::lexer::reduce_lexems;
use crate::source::Source;
use crate::utils::test::lexical_items_to_errors;
use crate::DocCompiler;
use std::rc::Rc;
#[test]
fn lexer_ok() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"Some documentation.\n".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
assert!(compiler.errors.is_empty());
}
#[test]
fn lexer_numbers() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"1234 0x789abc 123abc 0x78abg".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
1234 0x789abc 123abc 0x78abg
^^^^
sdk/foo/foo.fidl: 10:4: Number <1234>
1234 0x789abc 123abc 0x78abg
^^^^^^^^
sdk/foo/foo.fidl: 10:9: Number <0x789abc>
1234 0x789abc 123abc 0x78abg
^^^^^^
sdk/foo/foo.fidl: 10:18: Name <123abc>
1234 0x789abc 123abc 0x78abg
^^^^^^^
sdk/foo/foo.fidl: 10:25: Name <0x78abg>
"
);
}
#[test]
fn lexer_names() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"It's correct to use heap-specific.\n".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
It's correct to use heap-specific.
^^^^
sdk/foo/foo.fidl: 10:4: Name <It's>
It's correct to use heap-specific.
^^^^^^^
sdk/foo/foo.fidl: 10:9: Name <correct>
It's correct to use heap-specific.
^^
sdk/foo/foo.fidl: 10:17: Name <to>
It's correct to use heap-specific.
^^^
sdk/foo/foo.fidl: 10:20: Name <use>
It's correct to use heap-specific.
^^^^^^^^^^^^^
sdk/foo/foo.fidl: 10:24: Name <heap-specific>
It's correct to use heap-specific.
^
sdk/foo/foo.fidl: 10:37: EndOfSentence <.>
"
);
}
#[test]
fn lexer_reference() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"`xyz` isn't `abc`.".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
`xyz` isn't `abc`.
^^^
sdk/foo/foo.fidl: 10:5: Reference <xyz>
`xyz` isn't `abc`.
^^^^^
sdk/foo/foo.fidl: 10:10: Name <isn't>
`xyz` isn't `abc`.
^^^
sdk/foo/foo.fidl: 10:17: Reference <abc>
`xyz` isn't `abc`.
^
sdk/foo/foo.fidl: 10:21: EndOfSentence <.>
"
);
}
#[test]
fn lexer_reference_with_apostrophe() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "`xyz`'s.".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
`xyz`'s.
^^^
sdk/foo/foo.fidl: 10:5: Reference <xyz>
`xyz`'s.
^^
sdk/foo/foo.fidl: 10:9: Name <'s>
`xyz`'s.
^
sdk/foo/foo.fidl: 10:11: EndOfSentence <.>
"
);
}
#[test]
fn lexer_reference_with_apostrophe_at_the_end() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "`xyz`'s".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
`xyz`'s
^^^
sdk/foo/foo.fidl: 10:5: Reference <xyz>
`xyz`'s
^^
sdk/foo/foo.fidl: 10:9: Name <'s>
"
);
}
#[test]
fn lexer_empty_reference() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "`` abc".to_owned()));
let _items = reduce_lexems(&mut compiler, &source);
assert_eq!(
compiler.errors,
"\
`` abc
^
sdk/foo/foo.fidl: 10:5: Empty reference
"
);
}
#[test]
fn lexer_code_block() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"```c++\nint a;\n```\n".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
int a;
^^^^^^
sdk/foo/foo.fidl: 11:4: CodeBlock <int a;
>
"
);
}
#[test]
fn lexer_unterminated_code_block_1() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "```c++".to_owned()));
let _items = reduce_lexems(&mut compiler, &source);
assert_eq!(
compiler.errors,
"\
```c++
^^^
sdk/foo/foo.fidl: 10:7: Unterminated code block
"
);
}
#[test]
fn lexer_unterminated_code_block_2() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"```c++\nint a;\n".to_owned(),
));
let _items = reduce_lexems(&mut compiler, &source);
assert_eq!(
compiler.errors,
"\
```c++
^^^
sdk/foo/foo.fidl: 10:7: Unterminated code block
"
);
}
#[test]
fn lexer_code_block_missing_ending_new_line_1() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"```c++\nint a;\n```".to_owned(),
));
let _items = reduce_lexems(&mut compiler, &source);
assert_eq!(
compiler.errors,
"\
```
^
sdk/foo/foo.fidl: 12:7: New line expected to end the code block
"
);
}
#[test]
fn lexer_code_block_missing_ending_new_line_2() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"```c++\nint a;\n```xyz".to_owned(),
));
let _items = reduce_lexems(&mut compiler, &source);
assert_eq!(
compiler.errors,
"\
```xyz
^
sdk/foo/foo.fidl: 12:7: New line expected to end the code block
"
);
}
#[test]
fn lexer_single_quotes() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"'abcd' ' abc's 'xyz' '' `abc`'s".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
'abcd' ' abc's 'xyz' '' `abc`'s
^^^^^^
sdk/foo/foo.fidl: 10:4: SingleQuoteString <abcd>
'abcd' ' abc's 'xyz' '' `abc`'s
^
sdk/foo/foo.fidl: 10:11: SingleQuote
'abcd' ' abc's 'xyz' '' `abc`'s
^^^^^
sdk/foo/foo.fidl: 10:13: Name <abc's>
'abcd' ' abc's 'xyz' '' `abc`'s
^^^^^
sdk/foo/foo.fidl: 10:19: SingleQuoteString <xyz>
'abcd' ' abc's 'xyz' '' `abc`'s
^^
sdk/foo/foo.fidl: 10:25: SingleQuoteString <>
'abcd' ' abc's 'xyz' '' `abc`'s
^^^
sdk/foo/foo.fidl: 10:29: Reference <abc>
'abcd' ' abc's 'xyz' '' `abc`'s
^^
sdk/foo/foo.fidl: 10:33: Name <'s>
"
);
}
#[test]
/// Checks that we can have a backslashes including at the end of a string
fn lexer_single_quotes_with_backslash() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "'xxx\\ \\'".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
'xxx\\ \\'
^^^^^^^^
sdk/foo/foo.fidl: 10:4: SingleQuoteString <xxx\\ \\>
"
);
}
#[test]
fn lexer_unterminated_single_quotes_1() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "'abcd".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(items.is_none());
assert_eq!(
compiler.errors,
"\
'abcd
^^^^^
sdk/foo/foo.fidl: 10:4: Unterminated string (character <'> expected).
"
);
}
#[test]
fn lexer_unterminated_single_quotes_2() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "'abcd\\".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(items.is_none());
assert_eq!(
compiler.errors,
"\
'abcd\\
^^^^^^
sdk/foo/foo.fidl: 10:4: Unterminated string (character <'> expected).
"
);
}
#[test]
fn lexer_double_quotes() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"\"abcd\" \" \"\" \"xyz\"".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
\"abcd\" \" \"\" \"xyz\"
^^^^^^
sdk/foo/foo.fidl: 10:4: DoubleQuoteString <abcd>
\"abcd\" \" \"\" \"xyz\"
^
sdk/foo/foo.fidl: 10:11: DoubleQuote
\"abcd\" \" \"\" \"xyz\"
^^
sdk/foo/foo.fidl: 10:13: DoubleQuoteString <>
\"abcd\" \" \"\" \"xyz\"
^^^^^
sdk/foo/foo.fidl: 10:16: DoubleQuoteString <xyz>
"
);
}
#[test]
/// Checks that we can have a backslashes including at the end of a string
fn lexer_double_quotes_with_backslash() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "\"xxx\\ \\\"".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
\"xxx\\ \\\"
^^^^^^^^
sdk/foo/foo.fidl: 10:4: DoubleQuoteString <xxx\\ \\>
"
);
}
#[test]
fn lexer_unterminated_double_quotes_1() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "\"abcd".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(items.is_none());
assert_eq!(
compiler.errors,
"\
\"abcd
^^^^^
sdk/foo/foo.fidl: 10:4: Unterminated string (character <\"> expected).
"
);
}
#[test]
fn lexer_unterminated_double_quotes_2() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "\"abcd\\".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(items.is_none());
assert_eq!(
compiler.errors,
"\
\"abcd\\
^^^^^^
sdk/foo/foo.fidl: 10:4: Unterminated string (character <\"> expected).
"
);
}
#[test]
fn lexer_symbols_and_punctuation() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
", ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:4: Comma
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:6: Semicolon
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:8: Plus
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:10: Minus
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:12: Asterisk
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:14: Slash
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:16: Percent
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:18: Ampersand
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:20: Hash
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^^
sdk/foo/foo.fidl: 10:22: HashHash
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:25: Pipe
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:27: Tilde
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:29: Caret
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:31: Dollar
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:33: AtSign
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:35: Paragraph
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:37: Equal
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^^
sdk/foo/foo.fidl: 10:39: EqualEqual
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:42: LowerThan
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^^
sdk/foo/foo.fidl: 10:44: LowerOrEqual
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:47: GreaterThan
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^^
sdk/foo/foo.fidl: 10:49: GreaterOrEqual
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:52: LeftParenthesis
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:54: RightParenthesis
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:56: LeftBracket
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:58: RightBracket
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:60: LeftBrace
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:62: RightBrace
, ; + - * / % & # ## | ~ ^ $ @ § = == < <= > >= ( ) [ ] { } \\
^
sdk/foo/foo.fidl: 10:64: BackSlash
"
);
}
#[test]
fn lexer_unicode_symbols() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"⮬ ⮯ ⮫ ⮨ ⮭ ⮮ ⮪ ⮩ ↵ ⌘".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
^
sdk/foo/foo.fidl: 10:4: UnicodeCharacter <⮬>
^
sdk/foo/foo.fidl: 10:6: UnicodeCharacter <⮯>
^
sdk/foo/foo.fidl: 10:8: UnicodeCharacter <⮫>
^
sdk/foo/foo.fidl: 10:10: UnicodeCharacter <⮨>
^
sdk/foo/foo.fidl: 10:12: UnicodeCharacter <⮭>
^
sdk/foo/foo.fidl: 10:14: UnicodeCharacter <⮮>
^
sdk/foo/foo.fidl: 10:16: UnicodeCharacter <⮪>
^
sdk/foo/foo.fidl: 10:18: UnicodeCharacter <⮩>
^
sdk/foo/foo.fidl: 10:20: UnicodeCharacter <↵>
^
sdk/foo/foo.fidl: 10:22: UnicodeCharacter <⌘>
"
);
}
#[test]
fn lexer_end_of_sentence() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"Aa. Bb? Cc! Dd:".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ false);
assert_eq!(
compiler.errors,
"\
Aa. Bb? Cc! Dd:
^^
sdk/foo/foo.fidl: 10:4: Name <Aa>
Aa. Bb? Cc! Dd:
^
sdk/foo/foo.fidl: 10:6: EndOfSentence <.>
Aa. Bb? Cc! Dd:
^^
sdk/foo/foo.fidl: 10:8: Name <Bb>
Aa. Bb? Cc! Dd:
^
sdk/foo/foo.fidl: 10:10: EndOfSentence <?>
Aa. Bb? Cc! Dd:
^^
sdk/foo/foo.fidl: 10:12: Name <Cc>
Aa. Bb? Cc! Dd:
^
sdk/foo/foo.fidl: 10:14: EndOfSentence <!>
Aa. Bb? Cc! Dd:
^^
sdk/foo/foo.fidl: 10:16: Name <Dd>
Aa. Bb? Cc! Dd:
^
sdk/foo/foo.fidl: 10:18: EndOfSentence <:>
"
);
}
#[test]
fn lexer_spaces_and_new_lines() {
let mut compiler = DocCompiler::new();
let source = Rc::new(Source::new(
"sdk/foo/foo.fidl".to_owned(),
10,
4,
"Some documentation.\n\n\nAnd spaces.".to_owned(),
));
let items = reduce_lexems(&mut compiler, &source);
assert!(!items.is_none());
lexical_items_to_errors(&mut compiler, &items.unwrap(), /*with_spaces=*/ true);
assert_eq!(
compiler.errors,
"\
Some documentation.
^^^^
sdk/foo/foo.fidl: 10:4: Name <Some>
Some documentation.
^^^^^
sdk/foo/foo.fidl: 10:8: Spaces (5)
Some documentation.
^^^^^^^^^^^^^
sdk/foo/foo.fidl: 10:13: Name <documentation>
Some documentation.
^
sdk/foo/foo.fidl: 10:26: EndOfSentence <.>
Some documentation.
^
sdk/foo/foo.fidl: 10:27: NewLines (3)
And spaces.
^^^
sdk/foo/foo.fidl: 13:4: Name <And>
And spaces.
^^^
sdk/foo/foo.fidl: 13:7: Spaces (3)
And spaces.
^^^^^^
sdk/foo/foo.fidl: 13:10: Name <spaces>
And spaces.
^
sdk/foo/foo.fidl: 13:16: EndOfSentence <.>
And spaces.
^
sdk/foo/foo.fidl: 13:17: End
"
);
}
#[test]
fn lexer_bad_character() {
let mut compiler = DocCompiler::new();
let source =
Rc::new(Source::new("sdk/foo/foo.fidl".to_owned(), 10, 4, "En dash —.\n".to_owned()));
let items = reduce_lexems(&mut compiler, &source);
assert!(items.is_none());
assert_eq!(
compiler.errors,
"\
En dash —.
^
sdk/foo/foo.fidl: 10:12: Unknown character <—>
"
);
}
}