Rollup merge of #141666 - lolbinarycat:rustdoc-source_span_for_markdown_range-bug-141665, r=GuillaumeGomez

source_span_for_markdown_range: fix utf8 violation

it is non-trivial to reproduce this bug through rustdoc, which uses this function less than clippy, so the regression test was added as a unit test instead of an integration test.

fixes https://github.com/rust-lang/rust/issues/141665

r? ``@GuillaumeGomez``
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs
index 01bb132..fa839d2 100644
--- a/compiler/rustc_resolve/src/rustdoc.rs
+++ b/compiler/rustc_resolve/src/rustdoc.rs
@@ -12,10 +12,14 @@
 use rustc_data_structures::unord::UnordSet;
 use rustc_middle::ty::TyCtxt;
 use rustc_span::def_id::DefId;
+use rustc_span::source_map::SourceMap;
 use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
 use thin_vec::ThinVec;
 use tracing::{debug, trace};
 
+#[cfg(test)]
+mod tests;
+
 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum DocFragmentKind {
     /// A doc fragment created from a `///` or `//!` doc comment.
@@ -532,9 +536,19 @@ pub fn source_span_for_markdown_range(
     md_range: &Range<usize>,
     fragments: &[DocFragment],
 ) -> Option<Span> {
+    let map = tcx.sess.source_map();
+    source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
+}
+
+// inner function used for unit testing
+pub fn source_span_for_markdown_range_inner(
+    map: &SourceMap,
+    markdown: &str,
+    md_range: &Range<usize>,
+    fragments: &[DocFragment],
+) -> Option<Span> {
     use rustc_span::BytePos;
 
-    let map = tcx.sess.source_map();
     if let &[fragment] = &fragments
         && fragment.kind == DocFragmentKind::RawDoc
         && let Ok(snippet) = map.span_to_snippet(fragment.span)
@@ -570,7 +584,13 @@ pub fn source_span_for_markdown_range(
             {
                 // If there is either a match in a previous fragment, or
                 // multiple matches in this fragment, there is ambiguity.
-                if match_data.is_none() && !snippet[match_start + 1..].contains(pat) {
+                // the snippet cannot be zero-sized, because it matches
+                // the pattern, which is checked to not be zero sized.
+                if match_data.is_none()
+                    && !snippet.as_bytes()[match_start + 1..]
+                        .windows(pat.len())
+                        .any(|s| s == pat.as_bytes())
+                {
                     match_data = Some((i, match_start));
                 } else {
                     // Heirustic produced ambiguity, return nothing.
diff --git a/compiler/rustc_resolve/src/rustdoc/tests.rs b/compiler/rustc_resolve/src/rustdoc/tests.rs
new file mode 100644
index 0000000..221ac90
--- /dev/null
+++ b/compiler/rustc_resolve/src/rustdoc/tests.rs
@@ -0,0 +1,50 @@
+use std::path::PathBuf;
+
+use rustc_span::source_map::{FilePathMapping, SourceMap};
+use rustc_span::symbol::sym;
+use rustc_span::{BytePos, Span};
+
+use super::{DocFragment, DocFragmentKind, source_span_for_markdown_range_inner};
+
+#[test]
+fn single_backtick() {
+    let sm = SourceMap::new(FilePathMapping::empty());
+    sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "`"] fn foo() {}"#.to_string());
+    let span = source_span_for_markdown_range_inner(
+        &sm,
+        "`",
+        &(0..1),
+        &[DocFragment {
+            span: Span::with_root_ctxt(BytePos(8), BytePos(11)),
+            item_id: None,
+            kind: DocFragmentKind::RawDoc,
+            doc: sym::empty, // unused placeholder
+            indent: 0,
+        }],
+    )
+    .unwrap();
+    assert_eq!(span.lo(), BytePos(9));
+    assert_eq!(span.hi(), BytePos(10));
+}
+
+#[test]
+fn utf8() {
+    // regression test for https://github.com/rust-lang/rust/issues/141665
+    let sm = SourceMap::new(FilePathMapping::empty());
+    sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "⚠"] fn foo() {}"#.to_string());
+    let span = source_span_for_markdown_range_inner(
+        &sm,
+        "⚠",
+        &(0..3),
+        &[DocFragment {
+            span: Span::with_root_ctxt(BytePos(8), BytePos(14)),
+            item_id: None,
+            kind: DocFragmentKind::RawDoc,
+            doc: sym::empty, // unused placeholder
+            indent: 0,
+        }],
+    )
+    .unwrap();
+    assert_eq!(span.lo(), BytePos(9));
+    assert_eq!(span.hi(), BytePos(12));
+}