Rollup merge of #141666 - lolbinarycat:rustdoc-source_span_for_markdown_range-bug-141665, r=GuillaumeGomez
source_span_for_markdown_range: fix utf8 violation
it is non-trivial to reproduce this bug through rustdoc, which uses this function less than clippy, so the regression test was added as a unit test instead of an integration test.
fixes https://github.com/rust-lang/rust/issues/141665
r? ``@GuillaumeGomez``
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs
index 01bb132..fa839d2 100644
--- a/compiler/rustc_resolve/src/rustdoc.rs
+++ b/compiler/rustc_resolve/src/rustdoc.rs
@@ -12,10 +12,14 @@
use rustc_data_structures::unord::UnordSet;
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::DefId;
+use rustc_span::source_map::SourceMap;
use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
use thin_vec::ThinVec;
use tracing::{debug, trace};
+#[cfg(test)]
+mod tests;
+
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DocFragmentKind {
/// A doc fragment created from a `///` or `//!` doc comment.
@@ -532,9 +536,19 @@ pub fn source_span_for_markdown_range(
md_range: &Range<usize>,
fragments: &[DocFragment],
) -> Option<Span> {
+ let map = tcx.sess.source_map();
+ source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
+}
+
+// inner function used for unit testing
+pub fn source_span_for_markdown_range_inner(
+ map: &SourceMap,
+ markdown: &str,
+ md_range: &Range<usize>,
+ fragments: &[DocFragment],
+) -> Option<Span> {
use rustc_span::BytePos;
- let map = tcx.sess.source_map();
if let &[fragment] = &fragments
&& fragment.kind == DocFragmentKind::RawDoc
&& let Ok(snippet) = map.span_to_snippet(fragment.span)
@@ -570,7 +584,13 @@ pub fn source_span_for_markdown_range(
{
// If there is either a match in a previous fragment, or
// multiple matches in this fragment, there is ambiguity.
- if match_data.is_none() && !snippet[match_start + 1..].contains(pat) {
+ // the snippet cannot be zero-sized, because it matches
+ // the pattern, which is checked to not be zero sized.
+ if match_data.is_none()
+ && !snippet.as_bytes()[match_start + 1..]
+ .windows(pat.len())
+ .any(|s| s == pat.as_bytes())
+ {
match_data = Some((i, match_start));
} else {
// Heirustic produced ambiguity, return nothing.
diff --git a/compiler/rustc_resolve/src/rustdoc/tests.rs b/compiler/rustc_resolve/src/rustdoc/tests.rs
new file mode 100644
index 0000000..221ac90
--- /dev/null
+++ b/compiler/rustc_resolve/src/rustdoc/tests.rs
@@ -0,0 +1,50 @@
+use std::path::PathBuf;
+
+use rustc_span::source_map::{FilePathMapping, SourceMap};
+use rustc_span::symbol::sym;
+use rustc_span::{BytePos, Span};
+
+use super::{DocFragment, DocFragmentKind, source_span_for_markdown_range_inner};
+
+#[test]
+fn single_backtick() {
+ let sm = SourceMap::new(FilePathMapping::empty());
+ sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "`"] fn foo() {}"#.to_string());
+ let span = source_span_for_markdown_range_inner(
+ &sm,
+ "`",
+ &(0..1),
+ &[DocFragment {
+ span: Span::with_root_ctxt(BytePos(8), BytePos(11)),
+ item_id: None,
+ kind: DocFragmentKind::RawDoc,
+ doc: sym::empty, // unused placeholder
+ indent: 0,
+ }],
+ )
+ .unwrap();
+ assert_eq!(span.lo(), BytePos(9));
+ assert_eq!(span.hi(), BytePos(10));
+}
+
+#[test]
+fn utf8() {
+ // regression test for https://github.com/rust-lang/rust/issues/141665
+ let sm = SourceMap::new(FilePathMapping::empty());
+ sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "⚠"] fn foo() {}"#.to_string());
+ let span = source_span_for_markdown_range_inner(
+ &sm,
+ "⚠",
+ &(0..3),
+ &[DocFragment {
+ span: Span::with_root_ctxt(BytePos(8), BytePos(14)),
+ item_id: None,
+ kind: DocFragmentKind::RawDoc,
+ doc: sym::empty, // unused placeholder
+ indent: 0,
+ }],
+ )
+ .unwrap();
+ assert_eq!(span.lo(), BytePos(9));
+ assert_eq!(span.hi(), BytePos(12));
+}