rustdoc: check code block syntax in early pass
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index 87e979b..29a4138 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -25,40 +25,51 @@
tooltip: Option<(&str, &str)>,
) -> String {
debug!("highlighting: ================\n{}\n==============", src);
- let sess = parse::ParseSess::new(FilePathMapping::empty());
- let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
- src.to_string());
-
let mut out = Vec::new();
if let Some((tooltip, class)) = tooltip {
write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
class='tooltiptext'>{}</span></div></div>",
class, tooltip).unwrap();
}
- write_header(class, &mut out).unwrap();
- let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
- Ok(l) => l,
- Err(_) => {
- let first_line = src.lines().next().unwrap_or_else(|| "");
- let mut err = sess.span_diagnostic
- .struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
- (Ignoring this codeblock)",
- first_line));
- err.emit();
- return String::new();
+ let sess = parse::ParseSess::new(FilePathMapping::empty());
+ let fm = sess.source_map().new_source_file(
+ FileName::Custom(String::from("rustdoc-highlighting")),
+ src.to_owned(),
+ );
+ let highlight_result =
+ lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
+ let mut classifier = Classifier::new(lexer, sess.source_map());
+
+ let mut highlighted_source = vec![];
+ if classifier.write_source(&mut highlighted_source).is_err() {
+ Err(classifier.lexer.buffer_fatal_errors())
+ } else {
+ Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
+ }
+ });
+
+ match highlight_result {
+ Ok(highlighted_source) => {
+ write_header(class, &mut out).unwrap();
+ write!(out, "{}", highlighted_source).unwrap();
+ if let Some(extension) = extension {
+ write!(out, "{}", extension).unwrap();
+ }
+ write_footer(&mut out).unwrap();
}
- };
- let mut classifier = Classifier::new(lexer, sess.source_map());
- if classifier.write_source(&mut out).is_err() {
- classifier.lexer.emit_fatal_errors();
- return format!("<pre>{}</pre>", src);
+ Err(errors) => {
+ // If errors are encountered while trying to highlight, cancel the errors and just emit
+ // the unhighlighted source. The errors will have already been reported in the
+ // `check-code-block-syntax` pass.
+ for mut error in errors {
+ error.cancel();
+ }
+
+ write!(out, "<pre><code>{}</code></pre>", src).unwrap();
+ }
}
- if let Some(extension) = extension {
- write!(out, "{}", extension).unwrap();
- }
- write_footer(&mut out).unwrap();
String::from_utf8_lossy(&out[..]).into_owned()
}
@@ -151,6 +162,17 @@
}
}
+enum HighlightError {
+ LexError,
+ IoError(io::Error),
+}
+
+impl From<io::Error> for HighlightError {
+ fn from(err: io::Error) -> Self {
+ HighlightError::IoError(err)
+ }
+}
+
impl<'a> Classifier<'a> {
fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
Classifier {
@@ -162,17 +184,11 @@
}
}
- /// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
- fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
+ /// Gets the next token out of the lexer.
+ fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
match self.lexer.try_next_token() {
Ok(tas) => Ok(tas),
- Err(_) => {
- let mut err = self.lexer.sess.span_diagnostic
- .struct_warn("Backing out of syntax highlighting");
- err.note("You probably did not intend to render this as a rust code-block");
- err.emit();
- Err(io::Error::new(io::ErrorKind::Other, ""))
- }
+ Err(_) => Err(HighlightError::LexError),
}
}
@@ -185,7 +201,7 @@
/// source.
fn write_source<W: Writer>(&mut self,
out: &mut W)
- -> io::Result<()> {
+ -> Result<(), HighlightError> {
loop {
let next = self.try_next_token()?;
if next.tok == token::Eof {
@@ -202,7 +218,7 @@
fn write_token<W: Writer>(&mut self,
out: &mut W,
tas: TokenAndSpan)
- -> io::Result<()> {
+ -> Result<(), HighlightError> {
let klass = match tas.tok {
token::Shebang(s) => {
out.string(Escape(&s.as_str()), Class::None)?;
@@ -341,7 +357,9 @@
// Anything that didn't return above is the simple case where we the
// class just spans a single token, so we can use the `string` method.
- out.string(Escape(&self.snip(tas.sp)), klass)
+ out.string(Escape(&self.snip(tas.sp)), klass)?;
+
+ Ok(())
}
// Helper function to get a snippet from the source_map.
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index 05a9a2d..6b7f540 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -919,6 +919,115 @@
links
}
+#[derive(Debug)]
+crate struct RustCodeBlock {
+ /// The range in the markdown that the code block occupies. Note that this includes the fences
+ /// for fenced code blocks.
+ pub range: Range<usize>,
+ /// The range in the markdown that the code within the code block occupies.
+ pub code: Range<usize>,
+ pub is_fenced: bool,
+ pub syntax: Option<String>,
+}
+
+/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
+/// untagged (and assumed to be rust).
+crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
+ let mut code_blocks = vec![];
+
+ if md.is_empty() {
+ return code_blocks;
+ }
+
+ let mut opts = Options::empty();
+ opts.insert(OPTION_ENABLE_TABLES);
+ opts.insert(OPTION_ENABLE_FOOTNOTES);
+ let mut p = Parser::new_ext(md, opts);
+
+ let mut code_block_start = 0;
+ let mut code_start = 0;
+ let mut is_fenced = false;
+ let mut previous_offset = 0;
+ let mut in_rust_code_block = false;
+ while let Some(event) = p.next() {
+ let offset = p.get_offset();
+
+ match event {
+ Event::Start(Tag::CodeBlock(syntax)) => {
+ let lang_string = if syntax.is_empty() {
+ LangString::all_false()
+ } else {
+ LangString::parse(&*syntax, ErrorCodes::Yes)
+ };
+
+ if lang_string.rust {
+ in_rust_code_block = true;
+
+ code_start = offset;
+ code_block_start = match md[previous_offset..offset].find("```") {
+ Some(fence_idx) => {
+ is_fenced = true;
+ previous_offset + fence_idx
+ }
+ None => offset,
+ };
+ }
+ }
+ Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
+ in_rust_code_block = false;
+
+ let code_block_end = if is_fenced {
+ let fence_str = &md[previous_offset..offset]
+ .chars()
+ .rev()
+ .collect::<String>();
+ fence_str
+ .find("```")
+ .map(|fence_idx| offset - fence_idx)
+ .unwrap_or_else(|| offset)
+ } else if md
+ .as_bytes()
+ .get(offset)
+ .map(|b| *b == b'\n')
+ .unwrap_or_default()
+ {
+ offset - 1
+ } else {
+ offset
+ };
+
+ let code_end = if is_fenced {
+ previous_offset
+ } else {
+ code_block_end
+ };
+
+ code_blocks.push(RustCodeBlock {
+ is_fenced,
+ range: Range {
+ start: code_block_start,
+ end: code_block_end,
+ },
+ code: Range {
+ start: code_start,
+ end: code_end,
+ },
+ syntax: if !syntax.is_empty() {
+ Some(syntax.into_owned())
+ } else {
+ None
+ },
+ });
+ }
+ _ => (),
+ }
+
+ previous_offset = offset;
+ }
+
+ code_blocks
+}
+
#[derive(Clone, Default, Debug)]
pub struct IdMap {
map: FxHashMap<String, usize>,
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
index 1b6d7e8..6e7141f 100644
--- a/src/librustdoc/lib.rs
+++ b/src/librustdoc/lib.rs
@@ -3,6 +3,7 @@
html_root_url = "https://doc.rust-lang.org/nightly/",
html_playground_url = "https://play.rust-lang.org/")]
+#![feature(bind_by_move_pattern_guards)]
#![feature(rustc_private)]
#![feature(box_patterns)]
#![feature(box_syntax)]
diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs
new file mode 100644
index 0000000..a013cc3
--- /dev/null
+++ b/src/librustdoc/passes/check_code_block_syntax.rs
@@ -0,0 +1,109 @@
+use errors::Applicability;
+use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
+use syntax::parse::{ParseSess, token};
+use syntax::source_map::FilePathMapping;
+use syntax_pos::FileName;
+
+use clean;
+use core::DocContext;
+use fold::DocFolder;
+use html::markdown::{self, RustCodeBlock};
+use passes::Pass;
+
+pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
+ Pass::early("check-code-block-syntax", check_code_block_syntax,
+ "validates syntax inside Rust code blocks");
+
+pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
+ SyntaxChecker { cx }.fold_crate(krate)
+}
+
+struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
+ cx: &'a DocContext<'a, 'tcx, 'rcx>,
+}
+
+impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
+ fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
+ let sess = ParseSess::new(FilePathMapping::empty());
+ let source_file = sess.source_map().new_source_file(
+ FileName::Custom(String::from("doctest")),
+ dox[code_block.code].to_owned(),
+ );
+
+ let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
+ while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
+ if tok == token::Eof {
+ break;
+ }
+ }
+
+ let errors = lexer.buffer_fatal_errors();
+
+ if !errors.is_empty() {
+ Err(errors)
+ } else {
+ Ok(())
+ }
+ });
+
+ if let Err(errors) = errors {
+ let mut diag = if let Some(sp) =
+ super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
+ {
+ let mut diag = self
+ .cx
+ .sess()
+ .struct_span_warn(sp, "could not parse code block as Rust code");
+
+ for mut err in errors {
+ diag.note(&format!("error from rustc: {}", err.message()));
+ err.cancel();
+ }
+
+ if code_block.syntax.is_none() && code_block.is_fenced {
+ let sp = sp.from_inner_byte_pos(0, 3);
+ diag.span_suggestion_with_applicability(
+ sp,
+ "mark blocks that do not contain Rust code as text",
+ String::from("```text"),
+ Applicability::MachineApplicable,
+ );
+ }
+
+ diag
+ } else {
+ // We couldn't calculate the span of the markdown block that had the error, so our
+ // diagnostics are going to be a bit lacking.
+ let mut diag = self.cx.sess().struct_span_warn(
+ super::span_of_attrs(&item.attrs),
+ "doc comment contains an invalid Rust code block",
+ );
+
+ for mut err in errors {
+ // Don't bother reporting the error, because we can't show where it happened.
+ err.cancel();
+ }
+
+ if code_block.syntax.is_none() && code_block.is_fenced {
+ diag.help("mark blocks that do not contain Rust code as text: ```text");
+ }
+
+ diag
+ };
+
+ diag.emit();
+ }
+ }
+}
+
+impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
+ fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
+ if let Some(dox) = &item.attrs.collapsed_doc_value() {
+ for code_block in markdown::rust_code_blocks(&dox) {
+ self.check_rust_syntax(&item, &dox, code_block);
+ }
+ }
+
+ self.fold_item_recur(item)
+ }
+}
diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs
index 56d6671..3d6096b 100644
--- a/src/librustdoc/passes/collect_intra_doc_links.rs
+++ b/src/librustdoc/passes/collect_intra_doc_links.rs
@@ -6,7 +6,7 @@
use syntax::ast::{self, Ident, NodeId};
use syntax::feature_gate::UnstableFeatures;
use syntax::symbol::Symbol;
-use syntax_pos::{self, DUMMY_SP};
+use syntax_pos::DUMMY_SP;
use std::ops::Range;
@@ -16,6 +16,7 @@
use clean::*;
use passes::{look_for_tests, Pass};
+use super::span_of_attrs;
pub const COLLECT_INTRA_DOC_LINKS: Pass =
Pass::early("collect-intra-doc-links", collect_intra_doc_links,
@@ -440,15 +441,6 @@
None
}
-pub fn span_of_attrs(attrs: &Attributes) -> syntax_pos::Span {
- if attrs.doc_strings.is_empty() {
- return DUMMY_SP;
- }
- let start = attrs.doc_strings[0].span();
- let end = attrs.doc_strings.last().expect("No doc strings provided").span();
- start.to(end)
-}
-
/// Reports a resolution failure diagnostic.
///
/// If we cannot find the exact source span of the resolution failure, we use the span of the
diff --git a/src/librustdoc/passes/mod.rs b/src/librustdoc/passes/mod.rs
index 23581d0..c9a3a2c 100644
--- a/src/librustdoc/passes/mod.rs
+++ b/src/librustdoc/passes/mod.rs
@@ -8,7 +8,7 @@
use std::mem;
use std::fmt;
use syntax::ast::NodeId;
-use syntax_pos::Span;
+use syntax_pos::{DUMMY_SP, Span};
use std::ops::Range;
use clean::{self, GetDefId, Item};
@@ -18,8 +18,6 @@
use html::markdown::{find_testable_code, ErrorCodes, LangString};
-use self::collect_intra_doc_links::span_of_attrs;
-
mod collapse_docs;
pub use self::collapse_docs::COLLAPSE_DOCS;
@@ -47,6 +45,9 @@
mod collect_trait_impls;
pub use self::collect_trait_impls::COLLECT_TRAIT_IMPLS;
+mod check_code_block_syntax;
+pub use self::check_code_block_syntax::CHECK_CODE_BLOCK_SYNTAX;
+
/// Represents a single pass.
#[derive(Copy, Clone)]
pub enum Pass {
@@ -137,6 +138,7 @@
STRIP_PRIV_IMPORTS,
PROPAGATE_DOC_CFG,
COLLECT_INTRA_DOC_LINKS,
+ CHECK_CODE_BLOCK_SYNTAX,
COLLECT_TRAIT_IMPLS,
];
@@ -147,6 +149,7 @@
"strip-hidden",
"strip-private",
"collect-intra-doc-links",
+ "check-code-block-syntax",
"collapse-docs",
"unindent-comments",
"propagate-doc-cfg",
@@ -158,6 +161,7 @@
"check-private-items-doc-tests",
"strip-priv-imports",
"collect-intra-doc-links",
+ "check-code-block-syntax",
"collapse-docs",
"unindent-comments",
"propagate-doc-cfg",
@@ -399,6 +403,16 @@
}
}
+/// Return a span encompassing all the given attributes.
+crate fn span_of_attrs(attrs: &clean::Attributes) -> Span {
+ if attrs.doc_strings.is_empty() {
+ return DUMMY_SP;
+ }
+ let start = attrs.doc_strings[0].span();
+ let end = attrs.doc_strings.last().expect("No doc strings provided").span();
+ start.to(end)
+}
+
/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
///
/// This method will return `None` if we cannot construct a span from the source map or if the
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index ecb34e4..53650bd 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -238,19 +238,6 @@
sr
}
- pub fn new_without_err(sess: &'a ParseSess,
- source_file: Lrc<syntax_pos::SourceFile>,
- override_span: Option<Span>,
- prepend_error_text: &str) -> Result<Self, ()> {
- let mut sr = StringReader::new_raw(sess, source_file, override_span);
- if sr.advance_token().is_err() {
- eprintln!("{}", prepend_error_text);
- sr.emit_fatal_errors();
- return Err(());
- }
- Ok(sr)
- }
-
pub fn new_or_buffered_errs(sess: &'a ParseSess,
source_file: Lrc<syntax_pos::SourceFile>,
override_span: Option<Span>) -> Result<Self, Vec<Diagnostic>> {
diff --git a/src/test/rustdoc-ui/invalid-syntax.rs b/src/test/rustdoc-ui/invalid-syntax.rs
index 537816b..924e038 100644
--- a/src/test/rustdoc-ui/invalid-syntax.rs
+++ b/src/test/rustdoc-ui/invalid-syntax.rs
@@ -1,7 +1,66 @@
// compile-pass
-// compile-flags: --error-format=human
/// ```
/// \__________pkt->size___________/ \_result->size_/ \__pkt->size__/
/// ```
pub fn foo() {}
+
+/// ```
+/// |
+/// LL | use foobar::Baz;
+/// | ^^^^^^ did you mean `baz::foobar`?
+/// ```
+pub fn bar() {}
+
+/// ```
+/// valid
+/// ```
+///
+/// ```
+/// \_
+/// ```
+///
+/// ```text
+/// "invalid
+/// ```
+pub fn valid_and_invalid() {}
+
+/// This is a normal doc comment, but...
+///
+/// There's a code block with bad syntax in it:
+///
+/// ```rust
+/// \_
+/// ```
+///
+/// Good thing we tested it!
+pub fn baz() {}
+
+/// Indented block start
+///
+/// code with bad syntax
+/// \_
+///
+/// Indented block end
+pub fn quux() {}
+
+/// Unclosed fence
+///
+/// ```
+/// slkdjf
+pub fn xyzzy() {}
+
+/// Indented code that contains a fence
+///
+/// ```
+pub fn blah() {}
+
+/// ```edition2018
+/// \_
+/// ```
+pub fn blargh() {}
+
+#[doc = "```"]
+/// \_
+#[doc = "```"]
+pub fn crazy_attrs() {}
diff --git a/src/test/rustdoc-ui/invalid-syntax.stderr b/src/test/rustdoc-ui/invalid-syntax.stderr
index b566133..1080038 100644
--- a/src/test/rustdoc-ui/invalid-syntax.stderr
+++ b/src/test/rustdoc-ui/invalid-syntax.stderr
@@ -1,10 +1,97 @@
-Output from rustc:
-error: unknown start of token: /
- --> <stdin>:1:1
- |
-1 | /__________pkt->size___________/ /_result->size_/ /__pkt->size__/
- | ^
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:3:5
+ |
+LL | /// ```
+ | _____^
+LL | | /// /__________pkt->size___________/ /_result->size_/ /__pkt->size__/
+LL | | /// ```
+ | |_______^
+ |
+ = note: error from rustc: unknown start of token: /
+help: mark blocks that do not contain Rust code as text
+ |
+LL | /// ```text
+ | ^^^^^^^
-warning: Invalid doc comment starting with: `/__________pkt->size___________/ /_result->size_/ /__pkt->size__/`
-(Ignoring this codeblock)
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:8:5
+ |
+LL | /// ```
+ | _____^
+LL | | /// |
+LL | | /// LL | use foobar::Baz;
+LL | | /// | ^^^^^^ did you mean `baz::foobar`?
+LL | | /// ```
+ | |_______^
+ |
+ = note: error from rustc: unknown start of token: `
+help: mark blocks that do not contain Rust code as text
+ |
+LL | /// ```text
+ | ^^^^^^^
+
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:19:5
+ |
+LL | /// ```
+ | _____^
+LL | | /// /_
+LL | | /// ```
+ | |_______^
+ |
+ = note: error from rustc: unknown start of token: /
+help: mark blocks that do not contain Rust code as text
+ |
+LL | /// ```text
+ | ^^^^^^^
+
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:32:5
+ |
+LL | /// ```rust
+ | _____^
+LL | | /// /_
+LL | | /// ```
+ | |_______^
+ |
+ = note: error from rustc: unknown start of token: /
+
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:41:9
+ |
+LL | /// code with bad syntax
+ | _________^
+LL | | /// /_
+ | |__________^
+ |
+ = note: error from rustc: unknown start of token: /
+
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:55:9
+ |
+LL | /// ```
+ | ^^^
+ |
+ = note: error from rustc: unknown start of token: `
+
+warning: could not parse code block as Rust code
+ --> $DIR/invalid-syntax.rs:58:5
+ |
+LL | /// ```edition2018
+ | _____^
+LL | | /// /_
+LL | | /// ```
+ | |_______^
+ |
+ = note: error from rustc: unknown start of token: /
+
+warning: doc comment contains an invalid Rust code block
+ --> $DIR/invalid-syntax.rs:63:1
+ |
+LL | / #[doc = "```"]
+LL | | /// /_
+LL | | #[doc = "```"]
+ | |______________^
+ |
+ = help: mark blocks that do not contain Rust code as text: ```text
diff --git a/src/test/rustdoc/bad-codeblock-syntax.rs b/src/test/rustdoc/bad-codeblock-syntax.rs
new file mode 100644
index 0000000..0ab2f68
--- /dev/null
+++ b/src/test/rustdoc/bad-codeblock-syntax.rs
@@ -0,0 +1,27 @@
+// @has bad_codeblock_syntax/fn.foo.html
+// @has - '//*[@class="docblock"]/pre/code' '\_'
+/// ```
+/// \_
+/// ```
+pub fn foo() {}
+
+// @has bad_codeblock_syntax/fn.bar.html
+// @has - '//*[@class="docblock"]/pre/code' '`baz::foobar`'
+/// ```
+/// `baz::foobar`
+/// ```
+pub fn bar() {}
+
+// @has bad_codeblock_syntax/fn.quux.html
+// @has - '//*[@class="docblock"]/pre/code' '\_'
+/// ```rust
+/// \_
+/// ```
+pub fn quux() {}
+
+// @has bad_codeblock_syntax/fn.ok.html
+// @has - '//*[@class="docblock"]/pre/code[@class="language-text"]' '\_'
+/// ```text
+/// \_
+/// ```
+pub fn ok() {}