crates/parser/src/frontmatter.rs - third_party/github.com/rust-lang/rust-analyzer - Git at Google

 // Copied from https://github.com/rust-lang/cargo/blob/367fd9f213750cd40317803dd0a5a3ce3f0c676d/src/cargo/util/frontmatter.rs
 #![expect(dead_code)] // avoid editing
 #![expect(unreachable_pub)] // avoid editing
 #![expect(clippy::useless_format)] // avoid editing

 type Span = std::ops::Range<usize>;

 #[derive(Debug)]
 pub struct ScriptSource<'s> {
     /// The full file
     raw: &'s str,
     /// The `#!/usr/bin/env cargo` line, if present
     shebang: Option<Span>,
     /// The code fence opener (`---`)
     open: Option<Span>,
     /// Trailing text after `ScriptSource::open` that identifies the meaning of
     /// `ScriptSource::frontmatter`
     info: Option<Span>,
     /// The lines between `ScriptSource::open` and `ScriptSource::close`
     frontmatter: Option<Span>,
     /// The code fence closer (`---`)
     close: Option<Span>,
     /// All content after the frontmatter and shebang
     content: Span,
 }

 impl<'s> ScriptSource<'s> {
     pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
         use winnow::stream::FindSlice as _;
         use winnow::stream::Location as _;
         use winnow::stream::Offset as _;
         use winnow::stream::Stream as _;

         let content_end = raw.len();
         let mut source = Self {
             raw,
             shebang: None,
             open: None,
             info: None,
             frontmatter: None,
             close: None,
             content: 0..content_end,
         };

         let mut input = winnow::stream::LocatingSlice::new(raw);

         if let Some(shebang_end) = strip_shebang(input.as_ref()) {
             let shebang_start = input.current_token_start();
             let _ = input.next_slice(shebang_end);
             let shebang_end = input.current_token_start();
             source.shebang = Some(shebang_start..shebang_end);
             source.content = shebang_end..content_end;
         }

         // Whitespace may precede a frontmatter but must end with a newline
         if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
             let _ = input.next_slice(nl_end);
         }

         // Opens with a line that starts with 3 or more `-` followed by an optional identifier
         const FENCE_CHAR: char = '-';
         let fence_length = input
             .as_ref()
             .char_indices()
             .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
             .unwrap_or_else(|| input.eof_offset());
         let open_start = input.current_token_start();
         let fence_pattern = input.next_slice(fence_length);
         let open_end = input.current_token_start();
         match fence_length {
             0 => {
                 return Ok(source);
             }
             1 | 2 => {
                 // either not a frontmatter or invalid frontmatter opening
                 return Err(FrontmatterError::new(
                     format!(
                         "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
                     ),
                     raw.len()..raw.len(),
                 ).push_visible_span(open_start..open_end));
             }
             _ => {}
         }
         source.open = Some(open_start..open_end);
         let Some(info_nl) = input.find_slice("\n") else {
             return Err(FrontmatterError::new(
                 format!("unclosed frontmatter; expected `{fence_pattern}`"),
                 raw.len()..raw.len(),
             )
             .push_visible_span(open_start..open_end));
         };
         let info = input.next_slice(info_nl.start);
         let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n`
         let info = info.trim_matches(is_horizontal_whitespace);
         if !info.is_empty() {
             let info_start = info.offset_from(&raw);
             let info_end = info_start + info.len();
             source.info = Some(info_start..info_end);
         }

         // Ends with a line that starts with a matching number of `-` only followed by whitespace
         let nl_fence_pattern = format!("\n{fence_pattern}");
         let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
             for len in (2..(nl_fence_pattern.len() - 1)).rev() {
                 let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
                     continue;
                 };
                 let _ = input.next_slice(frontmatter_nl.start + 1);
                 let close_start = input.current_token_start();
                 let _ = input.next_slice(len);
                 let close_end = input.current_token_start();
                 let fewer_dashes = fence_length - len;
                 return Err(FrontmatterError::new(
                     format!(
                         "closing code fence has {fewer_dashes} less `-` than the opening fence"
                     ),
                     close_start..close_end,
                 )
                 .push_visible_span(open_start..open_end));
             }
             return Err(FrontmatterError::new(
                 format!("unclosed frontmatter; expected `{fence_pattern}`"),
                 raw.len()..raw.len(),
             )
             .push_visible_span(open_start..open_end));
         };
         let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
         let _ = input.next_slice(frontmatter_nl.start + 1);
         let frontmatter_end = input.current_token_start();
         source.frontmatter = Some(frontmatter_start..frontmatter_end);
         let close_start = input.current_token_start();
         let _ = input.next_slice(fence_length);
         let close_end = input.current_token_start();
         source.close = Some(close_start..close_end);

         let nl = input.find_slice("\n");
         let after_closing_fence =
             input.next_slice(nl.map(|span| span.end).unwrap_or_else(|| input.eof_offset()));
         let content_start = input.current_token_start();
         let extra_dashes = after_closing_fence.chars().take_while(|b| *b == FENCE_CHAR).count();
         if 0 < extra_dashes {
             let extra_start = close_end;
             let extra_end = extra_start + extra_dashes;
             return Err(FrontmatterError::new(
                 format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
                 extra_start..extra_end,
             )
             .push_visible_span(open_start..open_end));
         } else {
             let after_closing_fence = strip_newline(after_closing_fence);
             let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
             if !after_closing_fence.is_empty() {
                 // extra characters beyond the original fence pattern
                 let after_start = after_closing_fence.offset_from(&raw);
                 let after_end = after_start + after_closing_fence.len();
                 return Err(FrontmatterError::new(
                     format!("unexpected characters after frontmatter close"),
                     after_start..after_end,
                 )
                 .push_visible_span(open_start..open_end));
             }
         }

         source.content = content_start..content_end;

         if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
             let _ = input.next_slice(nl_end);
         }
         let fence_length = input
             .as_ref()
             .char_indices()
             .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
             .unwrap_or_else(|| input.eof_offset());
         if 0 < fence_length {
             let fence_start = input.current_token_start();
             let fence_end = fence_start + fence_length;
             return Err(FrontmatterError::new(
                 format!("only one frontmatter is supported"),
                 fence_start..fence_end,
             )
             .push_visible_span(open_start..open_end)
             .push_visible_span(close_start..close_end));
         }

         Ok(source)
     }

     pub fn shebang(&self) -> Option<&'s str> {
         self.shebang.clone().map(|span| &self.raw[span])
     }

     pub fn shebang_span(&self) -> Option<Span> {
         self.shebang.clone()
     }

     pub fn open_span(&self) -> Option<Span> {
         self.open.clone()
     }

     pub fn info(&self) -> Option<&'s str> {
         self.info.clone().map(|span| &self.raw[span])
     }

     pub fn info_span(&self) -> Option<Span> {
         self.info.clone()
     }

     pub fn frontmatter(&self) -> Option<&'s str> {
         self.frontmatter.clone().map(|span| &self.raw[span])
     }

     pub fn frontmatter_span(&self) -> Option<Span> {
         self.frontmatter.clone()
     }

     pub fn close_span(&self) -> Option<Span> {
         self.close.clone()
     }

     pub fn content(&self) -> &'s str {
         &self.raw[self.content.clone()]
     }

     pub fn content_span(&self) -> Span {
         self.content.clone()
     }
 }

 /// Returns the index after the shebang line, if present
 pub fn strip_shebang(input: &str) -> Option<usize> {
     // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
     // Shebang must start with `#!` literally, without any preceding whitespace.
     // For simplicity we consider any line starting with `#!` a shebang,
     // regardless of restrictions put on shebangs by specific platforms.
     if let Some(rest) = input.strip_prefix("#!") {
         // Ok, this is a shebang but if the next non-whitespace token is `[`,
         // then it may be valid Rust code, so consider it Rust code.
         //
         // NOTE: rustc considers line and block comments to be whitespace but to avoid
         // any more awareness of Rust grammar, we are excluding it.
         if !rest.trim_start().starts_with('[') {
             // No other choice than to consider this a shebang.
             let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
             return Some(newline_end);
         }
     }
     None
 }

 /// Returns the index after any lines with only whitespace, if present
 pub fn strip_ws_lines(input: &str) -> Option<usize> {
     let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
     if ws_end == 0 {
         return None;
     }

     let nl_start = input[0..ws_end].rfind('\n')?;
     let nl_end = nl_start + 1;
     Some(nl_end)
 }

 /// True if `c` is considered a whitespace according to Rust language definition.
 /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
 /// for definitions of these classes.
 fn is_whitespace(c: char) -> bool {
     // This is Pattern_White_Space.
     //
     // Note that this set is stable (ie, it doesn't change with different
     // Unicode versions), so it's ok to just hard-code the values.

     matches!(
         c,
         // End-of-line characters
         | '\u{000A}' // line feed (\n)
         | '\u{000B}' // vertical tab
         | '\u{000C}' // form feed
         | '\u{000D}' // carriage return (\r)
         | '\u{0085}' // next line (from latin1)
         | '\u{2028}' // LINE SEPARATOR
         | '\u{2029}' // PARAGRAPH SEPARATOR

         // `Default_Ignorable_Code_Point` characters
         | '\u{200E}' // LEFT-TO-RIGHT MARK
         | '\u{200F}' // RIGHT-TO-LEFT MARK

         // Horizontal space characters
         | '\u{0009}'   // tab (\t)
         | '\u{0020}' // space
     )
 }

 /// True if `c` is considered horizontal whitespace according to Rust language definition.
 fn is_horizontal_whitespace(c: char) -> bool {
     // This is Pattern_White_Space.
     //
     // Note that this set is stable (ie, it doesn't change with different
     // Unicode versions), so it's ok to just hard-code the values.

     matches!(
         c,
         // Horizontal space characters
         '\u{0009}'   // tab (\t)
         | '\u{0020}' // space
     )
 }

 fn strip_newline(text: &str) -> &str {
     text.strip_suffix("\r\n").or_else(|| text.strip_suffix('\n')).unwrap_or(text)
 }

 #[derive(Debug)]
 pub struct FrontmatterError {
     message: String,
     primary_span: Span,
     visible_spans: Vec<Span>,
 }

 impl FrontmatterError {
     pub fn new(message: impl Into<String>, span: Span) -> Self {
         Self { message: message.into(), primary_span: span, visible_spans: Vec::new() }
     }

     pub fn push_visible_span(mut self, span: Span) -> Self {
         self.visible_spans.push(span);
         self
     }

     pub fn message(&self) -> &str {
         self.message.as_str()
     }

     pub fn primary_span(&self) -> Span {
         self.primary_span.clone()
     }

     pub fn visible_spans(&self) -> &[Span] {
         &self.visible_spans
     }
 }

 impl std::fmt::Display for FrontmatterError {
     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         self.message.fmt(fmt)
     }
 }

 impl std::error::Error for FrontmatterError {}
	// Copied from https://github.com/rust-lang/cargo/blob/367fd9f213750cd40317803dd0a5a3ce3f0c676d/src/cargo/util/frontmatter.rs
	#![expect(dead_code)] // avoid editing
	#![expect(unreachable_pub)] // avoid editing
	#![expect(clippy::useless_format)] // avoid editing

	type Span = std::ops::Range<usize>;

	#[derive(Debug)]
	pub struct ScriptSource<'s> {
	/// The full file
	raw: &'s str,
	/// The `#!/usr/bin/env cargo` line, if present
	shebang: Option<Span>,
	/// The code fence opener (`---`)
	open: Option<Span>,
	/// Trailing text after `ScriptSource::open` that identifies the meaning of
	/// `ScriptSource::frontmatter`
	info: Option<Span>,
	/// The lines between `ScriptSource::open` and `ScriptSource::close`
	frontmatter: Option<Span>,
	/// The code fence closer (`---`)
	close: Option<Span>,
	/// All content after the frontmatter and shebang
	content: Span,
	}

	impl<'s> ScriptSource<'s> {
	pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
	use winnow::stream::FindSlice as _;
	use winnow::stream::Location as _;
	use winnow::stream::Offset as _;
	use winnow::stream::Stream as _;

	let content_end = raw.len();
	let mut source = Self {
	raw,
	shebang: None,
	open: None,
	info: None,
	frontmatter: None,
	close: None,
	content: 0..content_end,
	};

	let mut input = winnow::stream::LocatingSlice::new(raw);

	if let Some(shebang_end) = strip_shebang(input.as_ref()) {
	let shebang_start = input.current_token_start();
	let _ = input.next_slice(shebang_end);
	let shebang_end = input.current_token_start();
	source.shebang = Some(shebang_start..shebang_end);
	source.content = shebang_end..content_end;
	}

	// Whitespace may precede a frontmatter but must end with a newline
	if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
	let _ = input.next_slice(nl_end);
	}

	// Opens with a line that starts with 3 or more `-` followed by an optional identifier
	const FENCE_CHAR: char = '-';
	let fence_length = input
	.as_ref()
	.char_indices()
	.find_map(\|(i, c)\| (c != FENCE_CHAR).then_some(i))
	.unwrap_or_else(\|\| input.eof_offset());
	let open_start = input.current_token_start();
	let fence_pattern = input.next_slice(fence_length);
	let open_end = input.current_token_start();
	match fence_length {
	0 => {
	return Ok(source);
	}
	1 \| 2 => {
	// either not a frontmatter or invalid frontmatter opening
	return Err(FrontmatterError::new(
	format!(
	"found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
	),
	raw.len()..raw.len(),
	).push_visible_span(open_start..open_end));
	}
	_ => {}
	}
	source.open = Some(open_start..open_end);
	let Some(info_nl) = input.find_slice("\n") else {
	return Err(FrontmatterError::new(
	format!("unclosed frontmatter; expected `{fence_pattern}`"),
	raw.len()..raw.len(),
	)
	.push_visible_span(open_start..open_end));
	};
	let info = input.next_slice(info_nl.start);
	let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n`
	let info = info.trim_matches(is_horizontal_whitespace);
	if !info.is_empty() {
	let info_start = info.offset_from(&raw);
	let info_end = info_start + info.len();
	source.info = Some(info_start..info_end);
	}

	// Ends with a line that starts with a matching number of `-` only followed by whitespace
	let nl_fence_pattern = format!("\n{fence_pattern}");
	let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
	for len in (2..(nl_fence_pattern.len() - 1)).rev() {
	let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
	continue;
	};
	let _ = input.next_slice(frontmatter_nl.start + 1);
	let close_start = input.current_token_start();
	let _ = input.next_slice(len);
	let close_end = input.current_token_start();
	let fewer_dashes = fence_length - len;
	return Err(FrontmatterError::new(
	format!(
	"closing code fence has {fewer_dashes} less `-` than the opening fence"
	),
	close_start..close_end,
	)
	.push_visible_span(open_start..open_end));
	}
	return Err(FrontmatterError::new(
	format!("unclosed frontmatter; expected `{fence_pattern}`"),
	raw.len()..raw.len(),
	)
	.push_visible_span(open_start..open_end));
	};
	let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
	let _ = input.next_slice(frontmatter_nl.start + 1);
	let frontmatter_end = input.current_token_start();
	source.frontmatter = Some(frontmatter_start..frontmatter_end);
	let close_start = input.current_token_start();
	let _ = input.next_slice(fence_length);
	let close_end = input.current_token_start();
	source.close = Some(close_start..close_end);

	let nl = input.find_slice("\n");
	let after_closing_fence =
	input.next_slice(nl.map(\|span\| span.end).unwrap_or_else(\|\| input.eof_offset()));
	let content_start = input.current_token_start();
	let extra_dashes = after_closing_fence.chars().take_while(\|b\| *b == FENCE_CHAR).count();
	if 0 < extra_dashes {
	let extra_start = close_end;
	let extra_end = extra_start + extra_dashes;
	return Err(FrontmatterError::new(
	format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
	extra_start..extra_end,
	)
	.push_visible_span(open_start..open_end));
	} else {
	let after_closing_fence = strip_newline(after_closing_fence);
	let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
	if !after_closing_fence.is_empty() {
	// extra characters beyond the original fence pattern
	let after_start = after_closing_fence.offset_from(&raw);
	let after_end = after_start + after_closing_fence.len();
	return Err(FrontmatterError::new(
	format!("unexpected characters after frontmatter close"),
	after_start..after_end,
	)
	.push_visible_span(open_start..open_end));
	}
	}

	source.content = content_start..content_end;

	if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
	let _ = input.next_slice(nl_end);
	}
	let fence_length = input
	.as_ref()
	.char_indices()
	.find_map(\|(i, c)\| (c != FENCE_CHAR).then_some(i))
	.unwrap_or_else(\|\| input.eof_offset());
	if 0 < fence_length {
	let fence_start = input.current_token_start();
	let fence_end = fence_start + fence_length;
	return Err(FrontmatterError::new(
	format!("only one frontmatter is supported"),
	fence_start..fence_end,
	)
	.push_visible_span(open_start..open_end)
	.push_visible_span(close_start..close_end));
	}

	Ok(source)
	}

	pub fn shebang(&self) -> Option<&'s str> {
	self.shebang.clone().map(\|span\| &self.raw[span])
	}

	pub fn shebang_span(&self) -> Option<Span> {
	self.shebang.clone()
	}

	pub fn open_span(&self) -> Option<Span> {
	self.open.clone()
	}

	pub fn info(&self) -> Option<&'s str> {
	self.info.clone().map(\|span\| &self.raw[span])
	}

	pub fn info_span(&self) -> Option<Span> {
	self.info.clone()
	}

	pub fn frontmatter(&self) -> Option<&'s str> {
	self.frontmatter.clone().map(\|span\| &self.raw[span])
	}

	pub fn frontmatter_span(&self) -> Option<Span> {
	self.frontmatter.clone()
	}

	pub fn close_span(&self) -> Option<Span> {
	self.close.clone()
	}

	pub fn content(&self) -> &'s str {
	&self.raw[self.content.clone()]
	}

	pub fn content_span(&self) -> Span {
	self.content.clone()
	}
	}

	/// Returns the index after the shebang line, if present
	pub fn strip_shebang(input: &str) -> Option<usize> {
	// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
	// Shebang must start with `#!` literally, without any preceding whitespace.
	// For simplicity we consider any line starting with `#!` a shebang,
	// regardless of restrictions put on shebangs by specific platforms.
	if let Some(rest) = input.strip_prefix("#!") {
	// Ok, this is a shebang but if the next non-whitespace token is `[`,
	// then it may be valid Rust code, so consider it Rust code.
	//
	// NOTE: rustc considers line and block comments to be whitespace but to avoid
	// any more awareness of Rust grammar, we are excluding it.
	if !rest.trim_start().starts_with('[') {
	// No other choice than to consider this a shebang.
	let newline_end = input.find('\n').map(\|pos\| pos + 1).unwrap_or(input.len());
	return Some(newline_end);
	}
	}
	None
	}

	/// Returns the index after any lines with only whitespace, if present
	pub fn strip_ws_lines(input: &str) -> Option<usize> {
	let ws_end = input.find(\|c\| !is_whitespace(c)).unwrap_or(input.len());
	if ws_end == 0 {
	return None;
	}

	let nl_start = input[0..ws_end].rfind('\n')?;
	let nl_end = nl_start + 1;
	Some(nl_end)
	}

	/// True if `c` is considered a whitespace according to Rust language definition.
	/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
	/// for definitions of these classes.
	fn is_whitespace(c: char) -> bool {
	// This is Pattern_White_Space.
	//
	// Note that this set is stable (ie, it doesn't change with different
	// Unicode versions), so it's ok to just hard-code the values.

	matches!(
	c,
	// End-of-line characters
	\| '\u{000A}' // line feed (\n)
	\| '\u{000B}' // vertical tab
	\| '\u{000C}' // form feed
	\| '\u{000D}' // carriage return (\r)
	\| '\u{0085}' // next line (from latin1)
	\| '\u{2028}' // LINE SEPARATOR
	\| '\u{2029}' // PARAGRAPH SEPARATOR

	// `Default_Ignorable_Code_Point` characters
	\| '\u{200E}' // LEFT-TO-RIGHT MARK
	\| '\u{200F}' // RIGHT-TO-LEFT MARK

	// Horizontal space characters
	\| '\u{0009}' // tab (\t)
	\| '\u{0020}' // space
	)
	}

	/// True if `c` is considered horizontal whitespace according to Rust language definition.
	fn is_horizontal_whitespace(c: char) -> bool {
	// This is Pattern_White_Space.
	//
	// Note that this set is stable (ie, it doesn't change with different
	// Unicode versions), so it's ok to just hard-code the values.

	matches!(
	c,
	// Horizontal space characters
	'\u{0009}' // tab (\t)
	\| '\u{0020}' // space
	)
	}

	fn strip_newline(text: &str) -> &str {
	text.strip_suffix("\r\n").or_else(\|\| text.strip_suffix('\n')).unwrap_or(text)
	}

	#[derive(Debug)]
	pub struct FrontmatterError {
	message: String,
	primary_span: Span,
	visible_spans: Vec<Span>,
	}

	impl FrontmatterError {
	pub fn new(message: impl Into<String>, span: Span) -> Self {
	Self { message: message.into(), primary_span: span, visible_spans: Vec::new() }
	}

	pub fn push_visible_span(mut self, span: Span) -> Self {
	self.visible_spans.push(span);
	self
	}

	pub fn message(&self) -> &str {
	self.message.as_str()
	}

	pub fn primary_span(&self) -> Span {
	self.primary_span.clone()
	}

	pub fn visible_spans(&self) -> &[Span] {
	&self.visible_spans
	}
	}

	impl std::fmt::Display for FrontmatterError {
	fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
	self.message.fmt(fmt)
	}
	}

	impl std::error::Error for FrontmatterError {}