crates/parser/src/lexed_str.rs - third_party/github.com/rust-lang/rust-analyzer - Git at Google

 //! Lexing `&str` into a sequence of Rust tokens.
 //!
 //! Note that strictly speaking the parser in this crate is not required to work
 //! on tokens which originated from text. Macros, eg, can synthesize tokens out
 //! of thin air. So, ideally, lexer should be an orthogonal crate. It is however
 //! convenient to include a text-based lexer here!
 //!
 //! Note that these tokens, unlike the tokens we feed into the parser, do
 //! include info about comments and whitespace.

 use std::ops;

 use rustc_literal_escaper::{
     EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
     unescape_str,
 };

 use crate::{
     Edition,
     SyntaxKind::{self, *},
     T,
 };

 pub struct LexedStr<'a> {
     text: &'a str,
     kind: Vec<SyntaxKind>,
     start: Vec<u32>,
     error: Vec<LexError>,
 }

 struct LexError {
     msg: String,
     token: u32,
 }

 impl<'a> LexedStr<'a> {
     pub fn new(edition: Edition, text: &'a str) -> LexedStr<'a> {
         let _p = tracing::info_span!("LexedStr::new").entered();
         let mut conv = Converter::new(edition, text);
         if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
             conv.push(SHEBANG, shebang_len, Vec::new());
         };

         // Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer
         // but we want to split it to two in edition <2024.
         while let Some(token) =
             rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next()
         {
             let token_text = &text[conv.offset..][..token.len as usize];

             conv.extend_token(&token.kind, token_text);
         }

         conv.finalize_with_eof()
     }

     pub fn single_token(edition: Edition, text: &'a str) -> Option<(SyntaxKind, Option<String>)> {
         if text.is_empty() {
             return None;
         }

         let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?;
         if token.len as usize != text.len() {
             return None;
         }

         let mut conv = Converter::new(edition, text);
         conv.extend_token(&token.kind, text);
         match &*conv.res.kind {
             [kind] => Some((*kind, conv.res.error.pop().map(|it| it.msg))),
             _ => None,
         }
     }

     pub fn as_str(&self) -> &str {
         self.text
     }

     pub fn len(&self) -> usize {
         self.kind.len() - 1
     }

     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }

     pub fn kind(&self, i: usize) -> SyntaxKind {
         assert!(i < self.len());
         self.kind[i]
     }

     pub fn text(&self, i: usize) -> &str {
         self.range_text(i..i + 1)
     }

     pub fn range_text(&self, r: ops::Range<usize>) -> &str {
         assert!(r.start < r.end && r.end <= self.len());
         let lo = self.start[r.start] as usize;
         let hi = self.start[r.end] as usize;
         &self.text[lo..hi]
     }

     // Naming is hard.
     pub fn text_range(&self, i: usize) -> ops::Range<usize> {
         assert!(i < self.len());
         let lo = self.start[i] as usize;
         let hi = self.start[i + 1] as usize;
         lo..hi
     }
     pub fn text_start(&self, i: usize) -> usize {
         assert!(i <= self.len());
         self.start[i] as usize
     }
     pub fn text_len(&self, i: usize) -> usize {
         assert!(i < self.len());
         let r = self.text_range(i);
         r.end - r.start
     }

     pub fn error(&self, i: usize) -> Option<&str> {
         assert!(i < self.len());
         let err = self.error.binary_search_by_key(&(i as u32), |i| i.token).ok()?;
         Some(self.error[err].msg.as_str())
     }

     pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ {
         self.error.iter().map(|it| (it.token as usize, it.msg.as_str()))
     }

     fn push(&mut self, kind: SyntaxKind, offset: usize) {
         self.kind.push(kind);
         self.start.push(offset as u32);
     }
 }

 struct Converter<'a> {
     res: LexedStr<'a>,
     offset: usize,
     edition: Edition,
 }

 impl<'a> Converter<'a> {
     fn new(edition: Edition, text: &'a str) -> Self {
         Self {
             res: LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() },
             offset: 0,
             edition,
         }
     }

     /// Check for likely unterminated string by analyzing STRING token content
     fn has_likely_unterminated_string(&self) -> bool {
         let Some(last_idx) = self.res.kind.len().checked_sub(1) else { return false };

         for i in (0..=last_idx).rev().take(5) {
             if self.res.kind[i] == STRING {
                 let start = self.res.start[i] as usize;
                 let end = self.res.start.get(i + 1).map(|&s| s as usize).unwrap_or(self.offset);
                 let content = &self.res.text[start..end];

                 if content.contains('(') && (content.contains("//") || content.contains(";\n")) {
                     return true;
                 }
             }
         }
         false
     }

     fn finalize_with_eof(mut self) -> LexedStr<'a> {
         self.res.push(EOF, self.offset);
         self.res
     }

     fn push(&mut self, kind: SyntaxKind, len: usize, errors: Vec<String>) {
         self.res.push(kind, self.offset);
         self.offset += len;

         for msg in errors {
             if !msg.is_empty() {
                 self.res.error.push(LexError { msg, token: self.res.len() as u32 });
             }
         }
     }

     fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, mut token_text: &str) {
         // A note on an intended tradeoff:
         // We drop some useful information here (see patterns with double dots `..`)
         // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
         // being `u16` that come from `rowan::SyntaxKind`.
         let mut errors: Vec<String> = vec![];

         let syntax_kind = {
             match kind {
                 rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
                 rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {
                     if !terminated {
                         errors.push(
                             "Missing trailing `*/` symbols to terminate the block comment".into(),
                         );
                     }
                     COMMENT
                 }

                 rustc_lexer::TokenKind::Frontmatter {
                     has_invalid_preceding_whitespace,
                     invalid_infostring,
                 } => {
                     if *has_invalid_preceding_whitespace {
                         errors.push("invalid preceding whitespace for frontmatter opening".into());
                     } else if *invalid_infostring {
                         errors.push("invalid infostring for frontmatter".into());
                     }
                     FRONTMATTER
                 }

                 rustc_lexer::TokenKind::Whitespace => WHITESPACE,

                 rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
                 rustc_lexer::TokenKind::Ident => {
                     SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
                 }
                 rustc_lexer::TokenKind::InvalidIdent => {
                     errors.push("Ident contains invalid characters".into());
                     IDENT
                 }

                 rustc_lexer::TokenKind::RawIdent => IDENT,

                 rustc_lexer::TokenKind::GuardedStrPrefix if self.edition.at_least_2024() => {
                     // FIXME: rustc does something better for recovery.
                     errors.push("Invalid string literal (reserved syntax)".into());
                     ERROR
                 }
                 rustc_lexer::TokenKind::GuardedStrPrefix => {
                     // The token is `#"` or `##`, split it into two.
                     token_text = &token_text[1..];
                     POUND
                 }

                 rustc_lexer::TokenKind::Literal { kind, .. } => {
                     self.extend_literal(token_text.len(), kind);
                     return;
                 }

                 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
                     if *starts_with_number {
                         errors.push("Lifetime name cannot start with a number".into());
                     }
                     LIFETIME_IDENT
                 }
                 rustc_lexer::TokenKind::UnknownPrefixLifetime => {
                     errors.push("Unknown lifetime prefix".into());
                     LIFETIME_IDENT
                 }
                 rustc_lexer::TokenKind::RawLifetime => LIFETIME_IDENT,

                 rustc_lexer::TokenKind::Semi => T![;],
                 rustc_lexer::TokenKind::Comma => T![,],
                 rustc_lexer::TokenKind::Dot => T![.],
                 rustc_lexer::TokenKind::OpenParen => T!['('],
                 rustc_lexer::TokenKind::CloseParen => T![')'],
                 rustc_lexer::TokenKind::OpenBrace => T!['{'],
                 rustc_lexer::TokenKind::CloseBrace => T!['}'],
                 rustc_lexer::TokenKind::OpenBracket => T!['['],
                 rustc_lexer::TokenKind::CloseBracket => T![']'],
                 rustc_lexer::TokenKind::At => T![@],
                 rustc_lexer::TokenKind::Pound => T![#],
                 rustc_lexer::TokenKind::Tilde => T![~],
                 rustc_lexer::TokenKind::Question => T![?],
                 rustc_lexer::TokenKind::Colon => T![:],
                 rustc_lexer::TokenKind::Dollar => T![$],
                 rustc_lexer::TokenKind::Eq => T![=],
                 rustc_lexer::TokenKind::Bang => T![!],
                 rustc_lexer::TokenKind::Lt => T![<],
                 rustc_lexer::TokenKind::Gt => T![>],
                 rustc_lexer::TokenKind::Minus => T![-],
                 rustc_lexer::TokenKind::And => T![&],
                 rustc_lexer::TokenKind::Or => T![|],
                 rustc_lexer::TokenKind::Plus => T![+],
                 rustc_lexer::TokenKind::Star => T![*],
                 rustc_lexer::TokenKind::Slash => T![/],
                 rustc_lexer::TokenKind::Caret => T![^],
                 rustc_lexer::TokenKind::Percent => T![%],
                 rustc_lexer::TokenKind::Unknown => ERROR,
                 rustc_lexer::TokenKind::UnknownPrefix if token_text == "builtin" => IDENT,
                 rustc_lexer::TokenKind::UnknownPrefix => {
                     let has_unterminated = self.has_likely_unterminated_string();

                     let error_msg = if has_unterminated {
                         format!(
                             "unknown literal prefix `{token_text}` (note: check for unterminated string literal)"
                         )
                     } else {
                         "unknown literal prefix".to_owned()
                     };
                     errors.push(error_msg);
                     IDENT
                 }
                 rustc_lexer::TokenKind::Eof => EOF,
             }
         };

         self.push(syntax_kind, token_text.len(), errors);
     }

     fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
         let invalid_raw_msg = String::from("Invalid raw string literal");

         let mut errors = vec![];
         let mut no_end_quote = |c: char, kind: &str| {
             errors.push(format!("Missing trailing `{c}` symbol to terminate the {kind} literal"));
         };

         let syntax_kind = match *kind {
             rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
                 if empty_int {
                     errors.push("Missing digits after the integer base prefix".into());
                 }
                 INT_NUMBER
             }
             rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
                 if empty_exponent {
                     errors.push("Missing digits after the exponent symbol".into());
                 }
                 FLOAT_NUMBER
             }
             rustc_lexer::LiteralKind::Char { terminated } => {
                 if !terminated {
                     no_end_quote('\'', "character");
                 } else {
                     let text = &self.res.text[self.offset + 1..][..len - 1];
                     let text = &text[..text.rfind('\'').unwrap()];
                     if let Err(e) = unescape_char(text) {
                         errors.push(err_to_msg(e, Mode::Char));
                     }
                 }
                 CHAR
             }
             rustc_lexer::LiteralKind::Byte { terminated } => {
                 if !terminated {
                     no_end_quote('\'', "byte");
                 } else {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let text = &text[..text.rfind('\'').unwrap()];
                     if let Err(e) = unescape_byte(text) {
                         errors.push(err_to_msg(e, Mode::Byte));
                     }
                 }
                 BYTE
             }
             rustc_lexer::LiteralKind::Str { terminated } => {
                 if !terminated {
                     no_end_quote('"', "string");
                 } else {
                     let text = &self.res.text[self.offset + 1..][..len - 1];
                     let text = &text[..text.rfind('"').unwrap()];
                     unescape_str(text, |_, res| {
                         if let Err(e) = res {
                             errors.push(err_to_msg(e, Mode::Str));
                         }
                     });
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::ByteStr { terminated } => {
                 if !terminated {
                     no_end_quote('"', "byte string");
                 } else {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let text = &text[..text.rfind('"').unwrap()];
                     unescape_byte_str(text, |_, res| {
                         if let Err(e) = res {
                             errors.push(err_to_msg(e, Mode::ByteStr));
                         }
                     });
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
                     no_end_quote('"', "C string")
                 } else {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let text = &text[..text.rfind('"').unwrap()];
                     unescape_c_str(text, |_, res| {
                         if let Err(e) = res {
                             errors.push(err_to_msg(e, Mode::CStr));
                         }
                     });
                 }
                 C_STRING
             }
             rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                 if n_hashes.is_none() {
                     errors.push(invalid_raw_msg);
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
                 if n_hashes.is_none() {
                     errors.push(invalid_raw_msg);
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
                 if n_hashes.is_none() {
                     errors.push(invalid_raw_msg);
                 }
                 C_STRING
             }
         };

         self.push(syntax_kind, len, errors);
     }
 }

 fn err_to_msg(error: EscapeError, mode: Mode) -> String {
     match error {
         EscapeError::ZeroChars => "empty character literal",
         EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
         EscapeError::LoneSlash => "",
         EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => {
             "unknown byte escape"
         }
         EscapeError::InvalidEscape => "unknown character escape",
         EscapeError::BareCarriageReturn => "",
         EscapeError::BareCarriageReturnInRawString => "",
         EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
         EscapeError::EscapeOnlyChar => "character constant must be escaped",
         EscapeError::TooShortHexEscape => "numeric character escape is too short",
         EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
         EscapeError::OutOfRangeHexEscape => "out of range hex escape",
         EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
         EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
         EscapeError::EmptyUnicodeEscape => "empty unicode escape",
         EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
         EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
         EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
         EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
         EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
         EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
         EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
             "non-ASCII character in byte literal"
         }
         EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
             "non-ASCII character in byte string literal"
         }
         EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
         EscapeError::NulInCStr => "null character in C string literal",
         EscapeError::UnskippedWhitespaceWarning => "",
         EscapeError::MultipleSkippedLinesWarning => "",
     }
     .into()
 }
	//! Lexing `&str` into a sequence of Rust tokens.
	//!
	//! Note that strictly speaking the parser in this crate is not required to work
	//! on tokens which originated from text. Macros, eg, can synthesize tokens out
	//! of thin air. So, ideally, lexer should be an orthogonal crate. It is however
	//! convenient to include a text-based lexer here!
	//!
	//! Note that these tokens, unlike the tokens we feed into the parser, do
	//! include info about comments and whitespace.

	use std::ops;

	use rustc_literal_escaper::{
	EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
	unescape_str,
	};

	use crate::{
	Edition,
	SyntaxKind::{self, *},
	T,
	};

	pub struct LexedStr<'a> {
	text: &'a str,
	kind: Vec<SyntaxKind>,
	start: Vec<u32>,
	error: Vec<LexError>,
	}

	struct LexError {
	msg: String,
	token: u32,
	}

	impl<'a> LexedStr<'a> {
	pub fn new(edition: Edition, text: &'a str) -> LexedStr<'a> {
	let _p = tracing::info_span!("LexedStr::new").entered();
	let mut conv = Converter::new(edition, text);
	if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
	conv.push(SHEBANG, shebang_len, Vec::new());
	};

	// Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer
	// but we want to split it to two in edition <2024.
	while let Some(token) =
	rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next()
	{
	let token_text = &text[conv.offset..][..token.len as usize];

	conv.extend_token(&token.kind, token_text);
	}

	conv.finalize_with_eof()
	}

	pub fn single_token(edition: Edition, text: &'a str) -> Option<(SyntaxKind, Option<String>)> {
	if text.is_empty() {
	return None;
	}

	let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?;
	if token.len as usize != text.len() {
	return None;
	}

	let mut conv = Converter::new(edition, text);
	conv.extend_token(&token.kind, text);
	match &*conv.res.kind {
	[kind] => Some((*kind, conv.res.error.pop().map(\|it\| it.msg))),
	_ => None,
	}
	}

	pub fn as_str(&self) -> &str {
	self.text
	}

	pub fn len(&self) -> usize {
	self.kind.len() - 1
	}

	pub fn is_empty(&self) -> bool {
	self.len() == 0
	}

	pub fn kind(&self, i: usize) -> SyntaxKind {
	assert!(i < self.len());
	self.kind[i]
	}

	pub fn text(&self, i: usize) -> &str {
	self.range_text(i..i + 1)
	}

	pub fn range_text(&self, r: ops::Range<usize>) -> &str {
	assert!(r.start < r.end && r.end <= self.len());
	let lo = self.start[r.start] as usize;
	let hi = self.start[r.end] as usize;
	&self.text[lo..hi]
	}

	// Naming is hard.
	pub fn text_range(&self, i: usize) -> ops::Range<usize> {
	assert!(i < self.len());
	let lo = self.start[i] as usize;
	let hi = self.start[i + 1] as usize;
	lo..hi
	}
	pub fn text_start(&self, i: usize) -> usize {
	assert!(i <= self.len());
	self.start[i] as usize
	}
	pub fn text_len(&self, i: usize) -> usize {
	assert!(i < self.len());
	let r = self.text_range(i);
	r.end - r.start
	}

	pub fn error(&self, i: usize) -> Option<&str> {
	assert!(i < self.len());
	let err = self.error.binary_search_by_key(&(i as u32), \|i\| i.token).ok()?;
	Some(self.error[err].msg.as_str())
	}

	pub fn errors(&self) -> impl Iterator<Item = (usize, &str)> + '_ {
	self.error.iter().map(\|it\| (it.token as usize, it.msg.as_str()))
	}

	fn push(&mut self, kind: SyntaxKind, offset: usize) {
	self.kind.push(kind);
	self.start.push(offset as u32);
	}
	}

	struct Converter<'a> {
	res: LexedStr<'a>,
	offset: usize,
	edition: Edition,
	}

	impl<'a> Converter<'a> {
	fn new(edition: Edition, text: &'a str) -> Self {
	Self {
	res: LexedStr { text, kind: Vec::new(), start: Vec::new(), error: Vec::new() },
	offset: 0,
	edition,
	}
	}

	/// Check for likely unterminated string by analyzing STRING token content
	fn has_likely_unterminated_string(&self) -> bool {
	let Some(last_idx) = self.res.kind.len().checked_sub(1) else { return false };

	for i in (0..=last_idx).rev().take(5) {
	if self.res.kind[i] == STRING {
	let start = self.res.start[i] as usize;
	let end = self.res.start.get(i + 1).map(\|&s\| s as usize).unwrap_or(self.offset);
	let content = &self.res.text[start..end];

	if content.contains('(') && (content.contains("//") \|\| content.contains(";\n")) {
	return true;
	}
	}
	}
	false
	}

	fn finalize_with_eof(mut self) -> LexedStr<'a> {
	self.res.push(EOF, self.offset);
	self.res
	}

	fn push(&mut self, kind: SyntaxKind, len: usize, errors: Vec<String>) {
	self.res.push(kind, self.offset);
	self.offset += len;

	for msg in errors {
	if !msg.is_empty() {
	self.res.error.push(LexError { msg, token: self.res.len() as u32 });
	}
	}
	}

	fn extend_token(&mut self, kind: &rustc_lexer::TokenKind, mut token_text: &str) {
	// A note on an intended tradeoff:
	// We drop some useful information here (see patterns with double dots `..`)
	// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
	// being `u16` that come from `rowan::SyntaxKind`.
	let mut errors: Vec<String> = vec![];

	let syntax_kind = {
	match kind {
	rustc_lexer::TokenKind::LineComment { doc_style: _ } => COMMENT,
	rustc_lexer::TokenKind::BlockComment { doc_style: _, terminated } => {
	if !terminated {
	errors.push(
	"Missing trailing `*/` symbols to terminate the block comment".into(),
	);
	}
	COMMENT
	}

	rustc_lexer::TokenKind::Frontmatter {
	has_invalid_preceding_whitespace,
	invalid_infostring,
	} => {
	if *has_invalid_preceding_whitespace {
	errors.push("invalid preceding whitespace for frontmatter opening".into());
	} else if *invalid_infostring {
	errors.push("invalid infostring for frontmatter".into());
	}
	FRONTMATTER
	}

	rustc_lexer::TokenKind::Whitespace => WHITESPACE,

	rustc_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
	rustc_lexer::TokenKind::Ident => {
	SyntaxKind::from_keyword(token_text, self.edition).unwrap_or(IDENT)
	}
	rustc_lexer::TokenKind::InvalidIdent => {
	errors.push("Ident contains invalid characters".into());
	IDENT
	}

	rustc_lexer::TokenKind::RawIdent => IDENT,

	rustc_lexer::TokenKind::GuardedStrPrefix if self.edition.at_least_2024() => {
	// FIXME: rustc does something better for recovery.
	errors.push("Invalid string literal (reserved syntax)".into());
	ERROR
	}
	rustc_lexer::TokenKind::GuardedStrPrefix => {
	// The token is `#"` or `##`, split it into two.
	token_text = &token_text[1..];
	POUND
	}

	rustc_lexer::TokenKind::Literal { kind, .. } => {
	self.extend_literal(token_text.len(), kind);
	return;
	}

	rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
	if *starts_with_number {
	errors.push("Lifetime name cannot start with a number".into());
	}
	LIFETIME_IDENT
	}
	rustc_lexer::TokenKind::UnknownPrefixLifetime => {
	errors.push("Unknown lifetime prefix".into());
	LIFETIME_IDENT
	}
	rustc_lexer::TokenKind::RawLifetime => LIFETIME_IDENT,

	rustc_lexer::TokenKind::Semi => T![;],
	rustc_lexer::TokenKind::Comma => T![,],
	rustc_lexer::TokenKind::Dot => T![.],
	rustc_lexer::TokenKind::OpenParen => T!['('],
	rustc_lexer::TokenKind::CloseParen => T![')'],
	rustc_lexer::TokenKind::OpenBrace => T!['{'],
	rustc_lexer::TokenKind::CloseBrace => T!['}'],
	rustc_lexer::TokenKind::OpenBracket => T!['['],
	rustc_lexer::TokenKind::CloseBracket => T![']'],
	rustc_lexer::TokenKind::At => T![@],
	rustc_lexer::TokenKind::Pound => T![#],
	rustc_lexer::TokenKind::Tilde => T![~],
	rustc_lexer::TokenKind::Question => T![?],
	rustc_lexer::TokenKind::Colon => T![:],
	rustc_lexer::TokenKind::Dollar => T![$],
	rustc_lexer::TokenKind::Eq => T![=],
	rustc_lexer::TokenKind::Bang => T![!],
	rustc_lexer::TokenKind::Lt => T![<],
	rustc_lexer::TokenKind::Gt => T![>],
	rustc_lexer::TokenKind::Minus => T![-],
	rustc_lexer::TokenKind::And => T![&],
	rustc_lexer::TokenKind::Or => T![\|],
	rustc_lexer::TokenKind::Plus => T![+],
	rustc_lexer::TokenKind::Star => T![*],
	rustc_lexer::TokenKind::Slash => T![/],
	rustc_lexer::TokenKind::Caret => T![^],
	rustc_lexer::TokenKind::Percent => T![%],
	rustc_lexer::TokenKind::Unknown => ERROR,
	rustc_lexer::TokenKind::UnknownPrefix if token_text == "builtin" => IDENT,
	rustc_lexer::TokenKind::UnknownPrefix => {
	let has_unterminated = self.has_likely_unterminated_string();

	let error_msg = if has_unterminated {
	format!(
	"unknown literal prefix `{token_text}` (note: check for unterminated string literal)"
	)
	} else {
	"unknown literal prefix".to_owned()
	};
	errors.push(error_msg);
	IDENT
	}
	rustc_lexer::TokenKind::Eof => EOF,
	}
	};

	self.push(syntax_kind, token_text.len(), errors);
	}

	fn extend_literal(&mut self, len: usize, kind: &rustc_lexer::LiteralKind) {
	let invalid_raw_msg = String::from("Invalid raw string literal");

	let mut errors = vec![];
	let mut no_end_quote = \|c: char, kind: &str\| {
	errors.push(format!("Missing trailing `{c}` symbol to terminate the {kind} literal"));
	};

	let syntax_kind = match *kind {
	rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
	if empty_int {
	errors.push("Missing digits after the integer base prefix".into());
	}
	INT_NUMBER
	}
	rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
	if empty_exponent {
	errors.push("Missing digits after the exponent symbol".into());
	}
	FLOAT_NUMBER
	}
	rustc_lexer::LiteralKind::Char { terminated } => {
	if !terminated {
	no_end_quote('\'', "character");
	} else {
	let text = &self.res.text[self.offset + 1..][..len - 1];
	let text = &text[..text.rfind('\'').unwrap()];
	if let Err(e) = unescape_char(text) {
	errors.push(err_to_msg(e, Mode::Char));
	}
	}
	CHAR
	}
	rustc_lexer::LiteralKind::Byte { terminated } => {
	if !terminated {
	no_end_quote('\'', "byte");
	} else {
	let text = &self.res.text[self.offset + 2..][..len - 2];
	let text = &text[..text.rfind('\'').unwrap()];
	if let Err(e) = unescape_byte(text) {
	errors.push(err_to_msg(e, Mode::Byte));
	}
	}
	BYTE
	}
	rustc_lexer::LiteralKind::Str { terminated } => {
	if !terminated {
	no_end_quote('"', "string");
	} else {
	let text = &self.res.text[self.offset + 1..][..len - 1];
	let text = &text[..text.rfind('"').unwrap()];
	unescape_str(text, \|_, res\| {
	if let Err(e) = res {
	errors.push(err_to_msg(e, Mode::Str));
	}
	});
	}
	STRING
	}
	rustc_lexer::LiteralKind::ByteStr { terminated } => {
	if !terminated {
	no_end_quote('"', "byte string");
	} else {
	let text = &self.res.text[self.offset + 2..][..len - 2];
	let text = &text[..text.rfind('"').unwrap()];
	unescape_byte_str(text, \|_, res\| {
	if let Err(e) = res {
	errors.push(err_to_msg(e, Mode::ByteStr));
	}
	});
	}
	BYTE_STRING
	}
	rustc_lexer::LiteralKind::CStr { terminated } => {
	if !terminated {
	no_end_quote('"', "C string")
	} else {
	let text = &self.res.text[self.offset + 2..][..len - 2];
	let text = &text[..text.rfind('"').unwrap()];
	unescape_c_str(text, \|_, res\| {
	if let Err(e) = res {
	errors.push(err_to_msg(e, Mode::CStr));
	}
	});
	}
	C_STRING
	}
	rustc_lexer::LiteralKind::RawStr { n_hashes } => {
	if n_hashes.is_none() {
	errors.push(invalid_raw_msg);
	}
	STRING
	}
	rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
	if n_hashes.is_none() {
	errors.push(invalid_raw_msg);
	}
	BYTE_STRING
	}
	rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
	if n_hashes.is_none() {
	errors.push(invalid_raw_msg);
	}
	C_STRING
	}
	};

	self.push(syntax_kind, len, errors);
	}
	}

	fn err_to_msg(error: EscapeError, mode: Mode) -> String {
	match error {
	EscapeError::ZeroChars => "empty character literal",
	EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
	EscapeError::LoneSlash => "",
	EscapeError::InvalidEscape if mode == Mode::Byte \|\| mode == Mode::ByteStr => {
	"unknown byte escape"
	}
	EscapeError::InvalidEscape => "unknown character escape",
	EscapeError::BareCarriageReturn => "",
	EscapeError::BareCarriageReturnInRawString => "",
	EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
	EscapeError::EscapeOnlyChar => "character constant must be escaped",
	EscapeError::TooShortHexEscape => "numeric character escape is too short",
	EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
	EscapeError::OutOfRangeHexEscape => "out of range hex escape",
	EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
	EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
	EscapeError::EmptyUnicodeEscape => "empty unicode escape",
	EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
	EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
	EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
	EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
	EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
	EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
	EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
	"non-ASCII character in byte literal"
	}
	EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
	"non-ASCII character in byte string literal"
	}
	EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
	EscapeError::NulInCStr => "null character in C string literal",
	EscapeError::UnskippedWhitespaceWarning => "",
	EscapeError::MultipleSkippedLinesWarning => "",
	}
	.into()
	}