blob: d5ffc0de37a4563d9554f986ce4e623cfd9b326f [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! parser handles parsing markdown via pulldown_cmark into higher level elements.
use pulldown_cmark::Event::{
self, Code, End, FootnoteReference, HardBreak, Html, SoftBreak, Start, TaskListMarker, Text,
};
use pulldown_cmark::{CodeBlockKind, CowStr, Tag};
use std::ops::Range;
use crate::md_element::{DocContext, Element};
pub(crate) fn element_from_event<'a>(
event: Event<'a>,
range: Range<usize>,
doc_context: &mut DocContext<'a>,
) -> Element<'a> {
let span = doc_context.span(&range);
let newlines = span.chars().filter(|c| c == &'\n').count();
let element = match event {
// Start indicates the start of a Tag.
Start(Tag::Heading(_, _, _)) => {
let mut element = parse_tag_element(event, doc_context);
// update the line number in doc_context, returning the line_number of the element.
element.set_line_num(update_line_number(doc_context, span, newlines));
element
}
Start(Tag::Paragraph) => {
let element = parse_tag_element(event, doc_context);
doc_context.line_num += newlines;
element
}
Start(_) => parse_tag_element(event, doc_context),
End(_) => {
//should not happen
panic!("Got End event unexpectedly: {:?}", event)
}
Text(text) => {
let mut element = Element::Text(text, doc_context.line());
// update the line number in doc_context, returning the line_number of the element.
element.set_line_num(update_line_number(doc_context, span, newlines));
element
}
Code(text) => {
let mut element = Element::Code(text, doc_context.line());
// update the line number in doc_context, returning the line_number of the element.
element.set_line_num(update_line_number(doc_context, span, newlines));
element
}
Html(text) => {
let mut element = Element::Html(text, doc_context.line());
// update the line number in doc_context, returning the line_number of the element.
element.set_line_num(update_line_number(doc_context, span, newlines));
element
}
FootnoteReference(text) => Element::FootnoteReference(text, doc_context.line()),
HardBreak => {
let element = Element::HardBreak(doc_context.line());
doc_context.line_num += newlines;
element
}
SoftBreak => {
let element = Element::SoftBreak(doc_context.line());
// Don't increment line_num here, it is handled in the Text event.
element
}
Event::Rule => {
let element = Element::Rule(doc_context.line());
doc_context.line_num += newlines;
element
}
TaskListMarker(checked) => Element::TaskListMarker(checked, doc_context.line()),
};
element
}
fn update_line_number<'a>(doc_context: &mut DocContext<'a>, span: &str, newlines: usize) -> usize {
let current_line_num = doc_context.line_num;
if let Some(line_num) = doc_context.line_number_of(span) {
doc_context.line_num = line_num + 1;
return line_num;
} else {
doc_context.line_num += newlines;
}
current_line_num
}
fn parse_tag_element<'a>(event: Event<'a>, doc_context: &mut DocContext<'a>) -> Element<'a> {
match event {
Start(Tag::Paragraph) => {
let block = read_block(Tag::Paragraph, doc_context);
doc_context.line_num += 1;
block
}
Start(Tag::Heading(level, id, classes)) => {
let block = read_block(Tag::Heading(level, id, classes), doc_context);
doc_context.line_num += 1;
block
}
Start(Tag::BlockQuote) => {
let block = read_block(Tag::BlockQuote, doc_context);
block
}
Start(Tag::CodeBlock(CodeBlockKind::Fenced(code))) => {
let block = read_codeblock(code, doc_context);
block
}
Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
let block = read_block(Tag::CodeBlock(CodeBlockKind::Indented), doc_context);
block
}
Start(Tag::List(starting)) => {
let block = read_list(starting, doc_context);
block
}
Start(Tag::Item) => {
let block = read_block(Tag::Item, doc_context);
doc_context.line_num += 1;
block
}
Start(Tag::FootnoteDefinition(text)) => {
read_block(Tag::FootnoteDefinition(text), doc_context)
}
Start(Tag::Table(alignment)) => read_block(Tag::Table(alignment), doc_context),
Start(Tag::TableCell) => read_block(Tag::TableCell, doc_context),
Start(Tag::TableHead) => read_block(Tag::TableHead, doc_context),
Start(Tag::TableRow) => read_block(Tag::TableRow, doc_context),
Start(Tag::Emphasis) => read_block(Tag::Emphasis, doc_context),
Start(Tag::Strong) => read_block(Tag::Strong, doc_context),
Start(Tag::Strikethrough) => read_block(Tag::Strikethrough, doc_context),
Start(Tag::Link(link_type, link_url, title)) => {
read_link(link_type, link_url, title, doc_context)
}
Start(Tag::Image(link_type, link_url, title)) => {
read_image(link_type, link_url, title, doc_context)
}
End(tag) => panic!("Unexpected tag: {:?}", tag),
Text(text) => {
let element = Element::Text(text, doc_context.line());
element
}
Code(text) => Element::Code(text, doc_context.line()),
Html(text) => Element::Html(text, doc_context.line()),
FootnoteReference(text) => Element::FootnoteReference(text, doc_context.line()),
HardBreak => Element::HardBreak(doc_context.line()),
SoftBreak => Element::SoftBreak(doc_context.line()),
Event::Rule => Element::Rule(doc_context.line()),
TaskListMarker(checked) => Element::TaskListMarker(checked, doc_context.line()),
}
}
fn read_image<'a>(
link_type: pulldown_cmark::LinkType,
link_url: pulldown_cmark::CowStr<'a>,
title: pulldown_cmark::CowStr<'a>,
doc_context: &mut DocContext<'a>,
) -> Element<'a> {
let mut contents = vec![];
while let Some((event, range)) = doc_context.parser.next() {
match event {
End(Tag::Image(link_type, link_url, title)) => {
return Element::Image(link_type, link_url, title, contents, doc_context.line())
}
_ => contents.push(element_from_event(event, range, doc_context)),
};
}
// Using :? since link_type does not implement Display.
// This should not happen, so panic is the way to go.
panic!("{:?} {} {} has no end?", link_type, link_url, title);
}
fn read_link<'a>(
link_type: pulldown_cmark::LinkType,
link_url: pulldown_cmark::CowStr<'a>,
title: pulldown_cmark::CowStr<'a>,
doc_context: &mut DocContext<'a>,
) -> Element<'a> {
let mut contents = vec![];
// Using loop here vs. while let so that it is easy to
// catch the situation where the End tag is not found
while let Some((event, range)) = doc_context.parser.next() {
match event {
End(Tag::Link(link_type, link_url, title)) => {
return Element::Link(link_type, link_url, title, contents, doc_context.line())
}
_ => contents.push(element_from_event(event, range, doc_context)),
};
}
// This should not happen, so not sure how to handle it happening?
panic!("{:?} {} {} has no end?", link_type, link_url, title);
}
fn read_list<'a>(starting: Option<u64>, doc_context: &mut DocContext<'a>) -> Element<'a> {
let mut items = vec![];
let start = doc_context.line();
while let Some((event, range)) = doc_context.parser.next() {
match event {
End(Tag::List(starting)) => return Element::List(starting, items, start),
_ => items.push(element_from_event(event, range, doc_context)),
};
}
// This should not happen, so panic.
panic!("{:?} has no end?", starting);
}
fn read_codeblock<'a>(code: CowStr<'a>, doc_context: &mut DocContext<'a>) -> Element<'a> {
let mut elements = vec![];
let start = doc_context.line();
while let Some((event, range)) = doc_context.parser.next() {
match event {
End(Tag::CodeBlock(CodeBlockKind::Fenced(code))) => {
return Element::CodeBlock(code, elements, start)
}
_ => elements.push(element_from_event(event, range, doc_context)),
};
}
// This should not happen, so panic.
panic!("{:?} has no end?", code);
}
fn read_block<'a>(block_type: Tag<'a>, doc_context: &mut DocContext<'a>) -> Element<'a> {
let mut elements = vec![];
let start = doc_context.line();
while let Some((event, range)) = doc_context.parser.next() {
match event {
End(block_type) => {
let block = Element::Block(block_type, elements, start);
return block;
}
_ => elements.push(element_from_event(event, range, doc_context)),
};
}
// This should not happen, so panic.
panic!("{:?} has no end?", block_type);
}
#[cfg(test)]
mod test {
use super::*;
use crate::DocLine;
use pulldown_cmark::CowStr::Borrowed;
use pulldown_cmark::{HeadingLevel, LinkType};
use std::path::PathBuf;
#[test]
fn test_basic_elements() {
let doc_line1 = DocLine { line_num: 1, file_name: PathBuf::from("test1") };
let doc_line2 = DocLine { line_num: 2, file_name: PathBuf::from("test1") };
let test_data = [
(
"This is a string",
vec![Element::Block(
Tag::Paragraph,
vec![Element::Text(Borrowed("This is a string"), doc_line1.clone())],
doc_line1.clone(),
)],
),
(
"`This is code`",
vec![Element::Block(
Tag::Paragraph,
vec![Element::Code(Borrowed("This is code"), doc_line1.clone())],
doc_line1.clone(),
)],
),
("<b>", vec![Element::Html(Borrowed("<b>"), doc_line1.clone())]),
(
"# Header 1",
vec![Element::Block(
Tag::Heading(HeadingLevel::H1, None, vec![]),
vec![Element::Text(Borrowed("Header 1"), doc_line1.clone())],
doc_line1.clone(),
)],
),
(
r##"# Header 1
Multiline
"##,
vec![
Element::Block(
Tag::Heading(HeadingLevel::H1, None, vec![]),
vec![Element::Text(Borrowed("Header 1"), doc_line1.clone())],
doc_line1.clone(),
),
Element::Block(
Tag::Paragraph,
vec![Element::Text(Borrowed("Multiline"), doc_line2.clone())],
doc_line2,
),
],
),
(
"[link text](http://to.my/webpage)",
vec![Element::Block(
Tag::Paragraph,
vec![Element::Link(
LinkType::Inline,
Borrowed("http://to.my/webpage"),
Borrowed(""),
vec![Element::Text(Borrowed("link text"), doc_line1.clone())],
doc_line1.clone(),
)],
doc_line1,
)],
),
];
for (input, expected) in test_data {
let callback = &mut |broken_link: pulldown_cmark::BrokenLink<'_>| {
DocContext::handle_broken_link(broken_link, input)
};
let ctx = DocContext::new(PathBuf::from("test1"), input, Some(callback));
let actual: Vec<Element<'_>> = ctx.collect();
assert_eq!(actual.len(), expected.len());
let mut expected_iter = expected.iter();
for a in actual {
if let Some(b) = expected_iter.next() {
assert_eq!(&a, b);
}
}
}
}
}