| //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`]. |
| |
| use std::fmt; |
| |
| use intern::Symbol; |
| use rustc_hash::{FxHashMap, FxHashSet}; |
| use span::{Edition, SpanAnchor, SpanData, SpanMap}; |
| use stdx::{format_to, never, non_empty_vec::NonEmptyVec}; |
| use syntax::{ |
| ast::{self, make::tokens::doc_comment}, |
| format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, |
| SyntaxKind::{self, *}, |
| SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T, |
| }; |
| use tt::{ |
| buffer::{Cursor, TokenBuffer}, |
| iter::TtIter, |
| token_to_literal, |
| }; |
| |
| use crate::to_parser_input::to_parser_input; |
| |
| #[cfg(test)] |
| mod tests; |
| |
| pub trait SpanMapper<S> { |
| fn span_for(&self, range: TextRange) -> S; |
| } |
| |
| impl<S> SpanMapper<SpanData<S>> for SpanMap<S> |
| where |
| SpanData<S>: Copy, |
| { |
| fn span_for(&self, range: TextRange) -> SpanData<S> { |
| self.span_at(range.start()) |
| } |
| } |
| |
| impl<S: Copy, SM: SpanMapper<S>> SpanMapper<S> for &SM { |
| fn span_for(&self, range: TextRange) -> S { |
| SM::span_for(self, range) |
| } |
| } |
| |
| /// Dummy things for testing where spans don't matter. |
| pub(crate) mod dummy_test_span_utils { |
| |
| use span::{Span, SyntaxContextId}; |
| |
| use super::*; |
| |
| pub const DUMMY: Span = Span { |
| range: TextRange::empty(TextSize::new(0)), |
| anchor: span::SpanAnchor { |
| file_id: span::EditionedFileId::new( |
| span::FileId::from_raw(0xe4e4e), |
| span::Edition::CURRENT, |
| ), |
| ast_id: span::ROOT_ERASED_FILE_AST_ID, |
| }, |
| ctx: SyntaxContextId::ROOT, |
| }; |
| |
| pub struct DummyTestSpanMap; |
| |
| impl SpanMapper<Span> for DummyTestSpanMap { |
| fn span_for(&self, range: syntax::TextRange) -> Span { |
| Span { |
| range, |
| anchor: span::SpanAnchor { |
| file_id: span::EditionedFileId::new( |
| span::FileId::from_raw(0xe4e4e), |
| span::Edition::CURRENT, |
| ), |
| ast_id: span::ROOT_ERASED_FILE_AST_ID, |
| }, |
| ctx: SyntaxContextId::ROOT, |
| } |
| } |
| } |
| } |
| |
| /// Doc comment desugaring differs between mbe and proc-macros. |
| #[derive(Copy, Clone, PartialEq, Eq)] |
| pub enum DocCommentDesugarMode { |
| /// Desugars doc comments as quoted raw strings |
| Mbe, |
| /// Desugars doc comments as quoted strings |
| ProcMacro, |
| } |
| |
| /// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the |
| /// subtree's spans. |
| pub fn syntax_node_to_token_tree<Ctx, SpanMap>( |
| node: &SyntaxNode, |
| map: SpanMap, |
| span: SpanData<Ctx>, |
| mode: DocCommentDesugarMode, |
| ) -> tt::Subtree<SpanData<Ctx>> |
| where |
| SpanData<Ctx>: Copy + fmt::Debug, |
| SpanMap: SpanMapper<SpanData<Ctx>>, |
| { |
| let mut c = Converter::new(node, map, Default::default(), Default::default(), span, mode); |
| convert_tokens(&mut c) |
| } |
| |
| /// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the |
| /// subtree's spans. Additionally using the append and remove parameters, the additional tokens can |
| /// be injected or hidden from the output. |
| pub fn syntax_node_to_token_tree_modified<Ctx, SpanMap>( |
| node: &SyntaxNode, |
| map: SpanMap, |
| append: FxHashMap<SyntaxElement, Vec<tt::Leaf<SpanData<Ctx>>>>, |
| remove: FxHashSet<SyntaxElement>, |
| call_site: SpanData<Ctx>, |
| mode: DocCommentDesugarMode, |
| ) -> tt::Subtree<SpanData<Ctx>> |
| where |
| SpanMap: SpanMapper<SpanData<Ctx>>, |
| SpanData<Ctx>: Copy + fmt::Debug, |
| { |
| let mut c = Converter::new(node, map, append, remove, call_site, mode); |
| convert_tokens(&mut c) |
| } |
| |
| // The following items are what `rustc` macro can be parsed into : |
| // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141 |
| // * Expr(P<ast::Expr>) -> token_tree_to_expr |
| // * Pat(P<ast::Pat>) -> token_tree_to_pat |
| // * Ty(P<ast::Ty>) -> token_tree_to_ty |
| // * Stmts(SmallVec<[ast::Stmt; 1]>) -> token_tree_to_stmts |
| // * Items(SmallVec<[P<ast::Item>; 1]>) -> token_tree_to_items |
| // |
| // * TraitItems(SmallVec<[ast::TraitItem; 1]>) |
| // * AssocItems(SmallVec<[ast::AssocItem; 1]>) |
| // * ForeignItems(SmallVec<[ast::ForeignItem; 1]> |
| |
| /// Converts a [`tt::Subtree`] back to a [`SyntaxNode`]. |
| /// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans. |
| pub fn token_tree_to_syntax_node<Ctx>( |
| tt: &tt::Subtree<SpanData<Ctx>>, |
| entry_point: parser::TopEntryPoint, |
| edition: parser::Edition, |
| ) -> (Parse<SyntaxNode>, SpanMap<Ctx>) |
| where |
| SpanData<Ctx>: Copy + fmt::Debug, |
| { |
| let buffer = match tt { |
| tt::Subtree { |
| delimiter: tt::Delimiter { kind: tt::DelimiterKind::Invisible, .. }, |
| token_trees, |
| } => TokenBuffer::from_tokens(token_trees), |
| _ => TokenBuffer::from_subtree(tt), |
| }; |
| let parser_input = to_parser_input(edition, &buffer); |
| let parser_output = entry_point.parse(&parser_input, edition); |
| let mut tree_sink = TtTreeSink::new(buffer.begin()); |
| for event in parser_output.iter() { |
| match event { |
| parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { |
| tree_sink.token(kind, n_raw_tokens) |
| } |
| parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { |
| tree_sink.float_split(has_pseudo_dot) |
| } |
| parser::Step::Enter { kind } => tree_sink.start_node(kind), |
| parser::Step::Exit => tree_sink.finish_node(), |
| parser::Step::Error { msg } => tree_sink.error(msg.to_owned()), |
| } |
| } |
| tree_sink.finish() |
| } |
| |
| /// Convert a string to a `TokenTree`. The spans of the subtree will be anchored to the provided |
| /// anchor with the given context. |
| pub fn parse_to_token_tree<Ctx>( |
| edition: Edition, |
| anchor: SpanAnchor, |
| ctx: Ctx, |
| text: &str, |
| ) -> Option<tt::Subtree<SpanData<Ctx>>> |
| where |
| SpanData<Ctx>: Copy + fmt::Debug, |
| Ctx: Copy, |
| { |
| let lexed = parser::LexedStr::new(edition, text); |
| if lexed.errors().next().is_some() { |
| return None; |
| } |
| let mut conv = |
| RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro }; |
| Some(convert_tokens(&mut conv)) |
| } |
| |
| /// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree. |
| pub fn parse_to_token_tree_static_span<S>( |
| edition: Edition, |
| span: S, |
| text: &str, |
| ) -> Option<tt::Subtree<S>> |
| where |
| S: Copy + fmt::Debug, |
| { |
| let lexed = parser::LexedStr::new(edition, text); |
| if lexed.errors().next().is_some() { |
| return None; |
| } |
| let mut conv = |
| StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro }; |
| Some(convert_tokens(&mut conv)) |
| } |
| |
| /// Split token tree with separate expr: $($e:expr)SEP* |
| pub fn parse_exprs_with_sep( |
| tt: &tt::Subtree<span::Span>, |
| sep: char, |
| span: span::Span, |
| edition: Edition, |
| ) -> Vec<tt::Subtree<span::Span>> { |
| if tt.token_trees.is_empty() { |
| return Vec::new(); |
| } |
| |
| let mut iter = TtIter::new(tt); |
| let mut res = Vec::new(); |
| |
| while iter.peek_n(0).is_some() { |
| let expanded = crate::expect_fragment( |
| &mut iter, |
| parser::PrefixEntryPoint::Expr, |
| edition, |
| tt::DelimSpan { open: tt.delimiter.open, close: tt.delimiter.close }, |
| ); |
| |
| res.push(match expanded.value { |
| None => break, |
| Some(tt) => tt.subtree_or_wrap(tt::DelimSpan { open: span, close: span }), |
| }); |
| |
| let mut fork = iter.clone(); |
| if fork.expect_char(sep).is_err() { |
| break; |
| } |
| iter = fork; |
| } |
| |
| if iter.peek_n(0).is_some() { |
| res.push(tt::Subtree { |
| delimiter: tt::Delimiter::invisible_spanned(span), |
| token_trees: iter.cloned().collect(), |
| }); |
| } |
| |
| res |
| } |
| |
| fn convert_tokens<S, C>(conv: &mut C) -> tt::Subtree<S> |
| where |
| C: TokenConverter<S>, |
| S: Copy + fmt::Debug, |
| C::Token: fmt::Debug, |
| { |
| let entry = tt::SubtreeBuilder { |
| delimiter: tt::Delimiter::invisible_spanned(conv.call_site()), |
| token_trees: vec![], |
| }; |
| let mut stack = NonEmptyVec::new(entry); |
| |
| while let Some((token, abs_range)) = conv.bump() { |
| let tt::SubtreeBuilder { delimiter, token_trees } = stack.last_mut(); |
| |
| let tt = match token.as_leaf() { |
| Some(leaf) => tt::TokenTree::Leaf(leaf.clone()), |
| None => match token.kind(conv) { |
| // Desugar doc comments into doc attributes |
| COMMENT => { |
| let span = conv.span_for(abs_range); |
| if let Some(tokens) = conv.convert_doc_comment(&token, span) { |
| token_trees.extend(tokens); |
| } |
| continue; |
| } |
| kind if kind.is_punct() && kind != UNDERSCORE => { |
| let expected = match delimiter.kind { |
| tt::DelimiterKind::Parenthesis => Some(T![')']), |
| tt::DelimiterKind::Brace => Some(T!['}']), |
| tt::DelimiterKind::Bracket => Some(T![']']), |
| tt::DelimiterKind::Invisible => None, |
| }; |
| |
| // Current token is a closing delimiter that we expect, fix up the closing span |
| // and end the subtree here |
| if matches!(expected, Some(expected) if expected == kind) { |
| if let Some(mut subtree) = stack.pop() { |
| subtree.delimiter.close = conv.span_for(abs_range); |
| stack.last_mut().token_trees.push(subtree.build().into()); |
| } |
| continue; |
| } |
| |
| let delim = match kind { |
| T!['('] => Some(tt::DelimiterKind::Parenthesis), |
| T!['{'] => Some(tt::DelimiterKind::Brace), |
| T!['['] => Some(tt::DelimiterKind::Bracket), |
| _ => None, |
| }; |
| |
| // Start a new subtree |
| if let Some(kind) = delim { |
| let open = conv.span_for(abs_range); |
| stack.push(tt::SubtreeBuilder { |
| delimiter: tt::Delimiter { |
| open, |
| // will be overwritten on subtree close above |
| close: open, |
| kind, |
| }, |
| token_trees: vec![], |
| }); |
| continue; |
| } |
| |
| let spacing = match conv.peek().map(|next| next.kind(conv)) { |
| Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint, |
| _ => tt::Spacing::Alone, |
| }; |
| let Some(char) = token.to_char(conv) else { |
| panic!("Token from lexer must be single char: token = {token:#?}") |
| }; |
| tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) }) |
| .into() |
| } |
| kind => { |
| macro_rules! make_ident { |
| () => { |
| tt::Ident { |
| span: conv.span_for(abs_range), |
| sym: Symbol::intern(&token.to_text(conv)), |
| is_raw: tt::IdentIsRaw::No, |
| } |
| .into() |
| }; |
| } |
| let leaf: tt::Leaf<_> = match kind { |
| T![true] | T![false] => make_ident!(), |
| IDENT => { |
| let text = token.to_text(conv); |
| tt::Ident::new(&text, conv.span_for(abs_range)).into() |
| } |
| UNDERSCORE => make_ident!(), |
| k if k.is_keyword() => make_ident!(), |
| k if k.is_literal() => { |
| let text = token.to_text(conv); |
| let span = conv.span_for(abs_range); |
| token_to_literal(&text, span).into() |
| } |
| LIFETIME_IDENT => { |
| let apostrophe = tt::Leaf::from(tt::Punct { |
| char: '\'', |
| spacing: tt::Spacing::Joint, |
| span: conv |
| .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))), |
| }); |
| token_trees.push(apostrophe.into()); |
| |
| let ident = tt::Leaf::from(tt::Ident { |
| sym: Symbol::intern(&token.to_text(conv)[1..]), |
| span: conv.span_for(TextRange::new( |
| abs_range.start() + TextSize::of('\''), |
| abs_range.end(), |
| )), |
| is_raw: tt::IdentIsRaw::No, |
| }); |
| token_trees.push(ident.into()); |
| continue; |
| } |
| _ => continue, |
| }; |
| |
| leaf.into() |
| } |
| }, |
| }; |
| |
| token_trees.push(tt); |
| } |
| |
| // If we get here, we've consumed all input tokens. |
| // We might have more than one subtree in the stack, if the delimiters are improperly balanced. |
| // Merge them so we're left with one. |
| while let Some(entry) = stack.pop() { |
| let parent = stack.last_mut(); |
| |
| let leaf: tt::Leaf<_> = tt::Punct { |
| span: entry.delimiter.open, |
| char: match entry.delimiter.kind { |
| tt::DelimiterKind::Parenthesis => '(', |
| tt::DelimiterKind::Brace => '{', |
| tt::DelimiterKind::Bracket => '[', |
| tt::DelimiterKind::Invisible => '$', |
| }, |
| spacing: tt::Spacing::Alone, |
| } |
| .into(); |
| parent.token_trees.push(leaf.into()); |
| parent.token_trees.extend(entry.token_trees); |
| } |
| |
| let subtree = stack.into_last().build(); |
| if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees { |
| first.clone() |
| } else { |
| subtree |
| } |
| } |
| |
| fn is_single_token_op(kind: SyntaxKind) -> bool { |
| matches!( |
| kind, |
| EQ | L_ANGLE |
| | R_ANGLE |
| | BANG |
| | AMP |
| | PIPE |
| | TILDE |
| | AT |
| | DOT |
| | COMMA |
| | SEMICOLON |
| | COLON |
| | POUND |
| | DOLLAR |
| | QUESTION |
| | PLUS |
| | MINUS |
| | STAR |
| | SLASH |
| | PERCENT |
| | CARET |
| // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an |
| // identifier. |
| | LIFETIME_IDENT |
| ) |
| } |
| |
| /// Returns the textual content of a doc comment block as a quoted string |
| /// That is, strips leading `///` (or `/**`, etc) |
| /// and strips the ending `*/` |
| /// And then quote the string, which is needed to convert to `tt::Literal` |
| /// |
| /// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals. |
| pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Symbol, tt::LitKind) { |
| match mode { |
| DocCommentDesugarMode::Mbe => { |
| let mut num_of_hashes = 0; |
| let mut count = 0; |
| for ch in text.chars() { |
| count = match ch { |
| '"' => 1, |
| '#' if count > 0 => count + 1, |
| _ => 0, |
| }; |
| num_of_hashes = num_of_hashes.max(count); |
| } |
| |
| // Quote raw string with delimiters |
| (Symbol::intern(text), tt::LitKind::StrRaw(num_of_hashes)) |
| } |
| // Quote string with delimiters |
| DocCommentDesugarMode::ProcMacro => { |
| (Symbol::intern(&format_smolstr!("{}", text.escape_debug())), tt::LitKind::Str) |
| } |
| } |
| } |
| |
| fn convert_doc_comment<S: Copy>( |
| token: &syntax::SyntaxToken, |
| span: S, |
| mode: DocCommentDesugarMode, |
| ) -> Option<Vec<tt::TokenTree<S>>> { |
| cov_mark::hit!(test_meta_doc_comments); |
| let comment = ast::Comment::cast(token.clone())?; |
| let doc = comment.kind().doc?; |
| |
| let mk_ident = |s: &str| { |
| tt::TokenTree::from(tt::Leaf::from(tt::Ident { |
| sym: Symbol::intern(s), |
| span, |
| is_raw: tt::IdentIsRaw::No, |
| })) |
| }; |
| |
| let mk_punct = |c: char| { |
| tt::TokenTree::from(tt::Leaf::from(tt::Punct { |
| char: c, |
| spacing: tt::Spacing::Alone, |
| span, |
| })) |
| }; |
| |
| let mk_doc_literal = |comment: &ast::Comment| { |
| let prefix_len = comment.prefix().len(); |
| let mut text = &comment.text()[prefix_len..]; |
| |
| // Remove ending "*/" |
| if comment.kind().shape == ast::CommentShape::Block { |
| text = &text[0..text.len() - 2]; |
| } |
| let (text, kind) = desugar_doc_comment_text(text, mode); |
| let lit = tt::Literal { symbol: text, span, kind, suffix: None }; |
| |
| tt::TokenTree::from(tt::Leaf::from(lit)) |
| }; |
| |
| // Make `doc="\" Comments\"" |
| let meta_tkns = Box::new([mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)]); |
| |
| // Make `#![]` |
| let mut token_trees = Vec::with_capacity(3); |
| token_trees.push(mk_punct('#')); |
| if let ast::CommentPlacement::Inner = doc { |
| token_trees.push(mk_punct('!')); |
| } |
| token_trees.push(tt::TokenTree::from(tt::Subtree { |
| delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket }, |
| token_trees: meta_tkns, |
| })); |
| |
| Some(token_trees) |
| } |
| |
| /// A raw token (straight from lexer) converter |
| struct RawConverter<'a, Ctx> { |
| lexed: parser::LexedStr<'a>, |
| pos: usize, |
| anchor: SpanAnchor, |
| ctx: Ctx, |
| mode: DocCommentDesugarMode, |
| } |
| /// A raw token (straight from lexer) converter that gives every token the same span. |
| struct StaticRawConverter<'a, S> { |
| lexed: parser::LexedStr<'a>, |
| pos: usize, |
| span: S, |
| mode: DocCommentDesugarMode, |
| } |
| |
| trait SrcToken<Ctx, S> { |
| fn kind(&self, ctx: &Ctx) -> SyntaxKind; |
| |
| fn to_char(&self, ctx: &Ctx) -> Option<char>; |
| |
| fn to_text(&self, ctx: &Ctx) -> SmolStr; |
| |
| fn as_leaf(&self) -> Option<&tt::Leaf<S>> { |
| None |
| } |
| } |
| |
| trait TokenConverter<S>: Sized { |
| type Token: SrcToken<Self, S>; |
| |
| fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option<Vec<tt::TokenTree<S>>>; |
| |
| fn bump(&mut self) -> Option<(Self::Token, TextRange)>; |
| |
| fn peek(&self) -> Option<Self::Token>; |
| |
| fn span_for(&self, range: TextRange) -> S; |
| |
| fn call_site(&self) -> S; |
| } |
| |
| impl<S, Ctx> SrcToken<RawConverter<'_, Ctx>, S> for usize { |
| fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind { |
| ctx.lexed.kind(*self) |
| } |
| |
| fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option<char> { |
| ctx.lexed.text(*self).chars().next() |
| } |
| |
| fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr { |
| ctx.lexed.text(*self).into() |
| } |
| } |
| |
| impl<S: Copy> SrcToken<StaticRawConverter<'_, S>, S> for usize { |
| fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind { |
| ctx.lexed.kind(*self) |
| } |
| |
| fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option<char> { |
| ctx.lexed.text(*self).chars().next() |
| } |
| |
| fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr { |
| ctx.lexed.text(*self).into() |
| } |
| } |
| |
| impl<Ctx: Copy> TokenConverter<SpanData<Ctx>> for RawConverter<'_, Ctx> |
| where |
| SpanData<Ctx>: Copy, |
| { |
| type Token = usize; |
| |
| fn convert_doc_comment( |
| &self, |
| &token: &usize, |
| span: SpanData<Ctx>, |
| ) -> Option<Vec<tt::TokenTree<SpanData<Ctx>>>> { |
| let text = self.lexed.text(token); |
| convert_doc_comment(&doc_comment(text), span, self.mode) |
| } |
| |
| fn bump(&mut self) -> Option<(Self::Token, TextRange)> { |
| if self.pos == self.lexed.len() { |
| return None; |
| } |
| let token = self.pos; |
| self.pos += 1; |
| let range = self.lexed.text_range(token); |
| let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); |
| |
| Some((token, range)) |
| } |
| |
| fn peek(&self) -> Option<Self::Token> { |
| if self.pos == self.lexed.len() { |
| return None; |
| } |
| Some(self.pos) |
| } |
| |
| fn span_for(&self, range: TextRange) -> SpanData<Ctx> { |
| SpanData { range, anchor: self.anchor, ctx: self.ctx } |
| } |
| |
| fn call_site(&self) -> SpanData<Ctx> { |
| SpanData { range: TextRange::empty(0.into()), anchor: self.anchor, ctx: self.ctx } |
| } |
| } |
| |
| impl<S> TokenConverter<S> for StaticRawConverter<'_, S> |
| where |
| S: Copy, |
| { |
| type Token = usize; |
| |
| fn convert_doc_comment(&self, &token: &usize, span: S) -> Option<Vec<tt::TokenTree<S>>> { |
| let text = self.lexed.text(token); |
| convert_doc_comment(&doc_comment(text), span, self.mode) |
| } |
| |
| fn bump(&mut self) -> Option<(Self::Token, TextRange)> { |
| if self.pos == self.lexed.len() { |
| return None; |
| } |
| let token = self.pos; |
| self.pos += 1; |
| let range = self.lexed.text_range(token); |
| let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); |
| |
| Some((token, range)) |
| } |
| |
| fn peek(&self) -> Option<Self::Token> { |
| if self.pos == self.lexed.len() { |
| return None; |
| } |
| Some(self.pos) |
| } |
| |
| fn span_for(&self, _: TextRange) -> S { |
| self.span |
| } |
| |
| fn call_site(&self) -> S { |
| self.span |
| } |
| } |
| |
| struct Converter<SpanMap, S> { |
| current: Option<SyntaxToken>, |
| current_leaves: Vec<tt::Leaf<S>>, |
| preorder: PreorderWithTokens, |
| range: TextRange, |
| punct_offset: Option<(SyntaxToken, TextSize)>, |
| /// Used to make the emitted text ranges in the spans relative to the span anchor. |
| map: SpanMap, |
| append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>, |
| remove: FxHashSet<SyntaxElement>, |
| call_site: S, |
| mode: DocCommentDesugarMode, |
| } |
| |
| impl<SpanMap, S> Converter<SpanMap, S> { |
| fn new( |
| node: &SyntaxNode, |
| map: SpanMap, |
| append: FxHashMap<SyntaxElement, Vec<tt::Leaf<S>>>, |
| remove: FxHashSet<SyntaxElement>, |
| call_site: S, |
| mode: DocCommentDesugarMode, |
| ) -> Self { |
| let mut this = Converter { |
| current: None, |
| preorder: node.preorder_with_tokens(), |
| range: node.text_range(), |
| punct_offset: None, |
| map, |
| append, |
| remove, |
| call_site, |
| current_leaves: vec![], |
| mode, |
| }; |
| let first = this.next_token(); |
| this.current = first; |
| this |
| } |
| |
| fn next_token(&mut self) -> Option<SyntaxToken> { |
| while let Some(ev) = self.preorder.next() { |
| match ev { |
| WalkEvent::Enter(token) => { |
| if self.remove.contains(&token) { |
| match token { |
| syntax::NodeOrToken::Token(_) => { |
| continue; |
| } |
| node => { |
| self.preorder.skip_subtree(); |
| if let Some(mut v) = self.append.remove(&node) { |
| v.reverse(); |
| self.current_leaves.extend(v); |
| return None; |
| } |
| } |
| } |
| } else if let syntax::NodeOrToken::Token(token) = token { |
| return Some(token); |
| } |
| } |
| WalkEvent::Leave(ele) => { |
| if let Some(mut v) = self.append.remove(&ele) { |
| v.reverse(); |
| self.current_leaves.extend(v); |
| return None; |
| } |
| } |
| } |
| } |
| None |
| } |
| } |
| |
| #[derive(Debug)] |
| enum SynToken<S> { |
| Ordinary(SyntaxToken), |
| Punct { token: SyntaxToken, offset: usize }, |
| Leaf(tt::Leaf<S>), |
| } |
| |
| impl<S> SynToken<S> { |
| fn token(&self) -> &SyntaxToken { |
| match self { |
| SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it, |
| SynToken::Leaf(_) => unreachable!(), |
| } |
| } |
| } |
| |
| impl<SpanMap, S> SrcToken<Converter<SpanMap, S>, S> for SynToken<S> { |
| fn kind(&self, _ctx: &Converter<SpanMap, S>) -> SyntaxKind { |
| match self { |
| SynToken::Ordinary(token) => token.kind(), |
| SynToken::Punct { token, offset: i } => { |
| SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap() |
| } |
| SynToken::Leaf(_) => { |
| never!(); |
| SyntaxKind::ERROR |
| } |
| } |
| } |
| fn to_char(&self, _ctx: &Converter<SpanMap, S>) -> Option<char> { |
| match self { |
| SynToken::Ordinary(_) => None, |
| SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i), |
| SynToken::Leaf(_) => None, |
| } |
| } |
| fn to_text(&self, _ctx: &Converter<SpanMap, S>) -> SmolStr { |
| match self { |
| SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(), |
| SynToken::Leaf(_) => { |
| never!(); |
| "".into() |
| } |
| } |
| } |
| fn as_leaf(&self) -> Option<&tt::Leaf<S>> { |
| match self { |
| SynToken::Ordinary(_) | SynToken::Punct { .. } => None, |
| SynToken::Leaf(it) => Some(it), |
| } |
| } |
| } |
| |
| impl<S, SpanMap> TokenConverter<S> for Converter<SpanMap, S> |
| where |
| S: Copy, |
| SpanMap: SpanMapper<S>, |
| { |
| type Token = SynToken<S>; |
| fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option<Vec<tt::TokenTree<S>>> { |
| convert_doc_comment(token.token(), span, self.mode) |
| } |
| |
| fn bump(&mut self) -> Option<(Self::Token, TextRange)> { |
| if let Some((punct, offset)) = self.punct_offset.clone() { |
| if usize::from(offset) + 1 < punct.text().len() { |
| let offset = offset + TextSize::of('.'); |
| let range = punct.text_range(); |
| self.punct_offset = Some((punct.clone(), offset)); |
| let range = TextRange::at(range.start() + offset, TextSize::of('.')); |
| return Some(( |
| SynToken::Punct { token: punct, offset: u32::from(offset) as usize }, |
| range, |
| )); |
| } |
| } |
| |
| if let Some(leaf) = self.current_leaves.pop() { |
| if self.current_leaves.is_empty() { |
| self.current = self.next_token(); |
| } |
| return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0)))); |
| } |
| |
| let curr = self.current.clone()?; |
| if !self.range.contains_range(curr.text_range()) { |
| return None; |
| } |
| |
| self.current = self.next_token(); |
| let token = if curr.kind().is_punct() { |
| self.punct_offset = Some((curr.clone(), 0.into())); |
| let range = curr.text_range(); |
| let range = TextRange::at(range.start(), TextSize::of('.')); |
| (SynToken::Punct { token: curr, offset: 0_usize }, range) |
| } else { |
| self.punct_offset = None; |
| let range = curr.text_range(); |
| (SynToken::Ordinary(curr), range) |
| }; |
| |
| Some(token) |
| } |
| |
| fn peek(&self) -> Option<Self::Token> { |
| if let Some((punct, mut offset)) = self.punct_offset.clone() { |
| offset += TextSize::of('.'); |
| if usize::from(offset) < punct.text().len() { |
| return Some(SynToken::Punct { token: punct, offset: usize::from(offset) }); |
| } |
| } |
| |
| let curr = self.current.clone()?; |
| if !self.range.contains_range(curr.text_range()) { |
| return None; |
| } |
| |
| let token = if curr.kind().is_punct() { |
| SynToken::Punct { token: curr, offset: 0_usize } |
| } else { |
| SynToken::Ordinary(curr) |
| }; |
| Some(token) |
| } |
| |
| fn span_for(&self, range: TextRange) -> S { |
| self.map.span_for(range) |
| } |
| fn call_site(&self) -> S { |
| self.call_site |
| } |
| } |
| |
| struct TtTreeSink<'a, Ctx> |
| where |
| SpanData<Ctx>: Copy, |
| { |
| buf: String, |
| cursor: Cursor<'a, SpanData<Ctx>>, |
| text_pos: TextSize, |
| inner: SyntaxTreeBuilder, |
| token_map: SpanMap<Ctx>, |
| } |
| |
| impl<'a, Ctx> TtTreeSink<'a, Ctx> |
| where |
| SpanData<Ctx>: Copy, |
| { |
| fn new(cursor: Cursor<'a, SpanData<Ctx>>) -> Self { |
| TtTreeSink { |
| buf: String::new(), |
| cursor, |
| text_pos: 0.into(), |
| inner: SyntaxTreeBuilder::default(), |
| token_map: SpanMap::empty(), |
| } |
| } |
| |
| fn finish(mut self) -> (Parse<SyntaxNode>, SpanMap<Ctx>) { |
| self.token_map.finish(); |
| (self.inner.finish(), self.token_map) |
| } |
| } |
| |
| fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { |
| let texts = match d { |
| tt::DelimiterKind::Parenthesis => "()", |
| tt::DelimiterKind::Brace => "{}", |
| tt::DelimiterKind::Bracket => "[]", |
| tt::DelimiterKind::Invisible => return None, |
| }; |
| |
| let idx = closing as usize; |
| Some(&texts[idx..texts.len() - (1 - idx)]) |
| } |
| |
| impl<Ctx> TtTreeSink<'_, Ctx> |
| where |
| SpanData<Ctx>: Copy + fmt::Debug, |
| { |
| /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. |
| /// This occurs when a float literal is used as a field access. |
| fn float_split(&mut self, has_pseudo_dot: bool) { |
| let (text, span) = match self.cursor.token_tree() { |
| Some(tt::buffer::TokenTreeRef::Leaf( |
| tt::Leaf::Literal(tt::Literal { |
| symbol: text, |
| span, |
| kind: tt::LitKind::Float, |
| suffix: _, |
| }), |
| _, |
| )) => (text.as_str(), *span), |
| tt => unreachable!("{tt:?}"), |
| }; |
| // FIXME: Span splitting |
| match text.split_once('.') { |
| Some((left, right)) => { |
| assert!(!left.is_empty()); |
| |
| self.inner.start_node(SyntaxKind::NAME_REF); |
| self.inner.token(SyntaxKind::INT_NUMBER, left); |
| self.inner.finish_node(); |
| self.token_map.push(self.text_pos + TextSize::of(left), span); |
| |
| // here we move the exit up, the original exit has been deleted in process |
| self.inner.finish_node(); |
| |
| self.inner.token(SyntaxKind::DOT, "."); |
| self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span); |
| |
| if has_pseudo_dot { |
| assert!(right.is_empty(), "{left}.{right}"); |
| } else { |
| assert!(!right.is_empty(), "{left}.{right}"); |
| self.inner.start_node(SyntaxKind::NAME_REF); |
| self.inner.token(SyntaxKind::INT_NUMBER, right); |
| self.token_map.push(self.text_pos + TextSize::of(text), span); |
| self.inner.finish_node(); |
| |
| // the parser creates an unbalanced start node, we are required to close it here |
| self.inner.finish_node(); |
| } |
| self.text_pos += TextSize::of(text); |
| } |
| None => unreachable!(), |
| } |
| self.cursor = self.cursor.bump(); |
| } |
| |
| fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { |
| if kind == LIFETIME_IDENT { |
| n_tokens = 2; |
| } |
| |
| let mut last = self.cursor; |
| 'tokens: for _ in 0..n_tokens { |
| let tmp: u8; |
| if self.cursor.eof() { |
| break; |
| } |
| last = self.cursor; |
| let (text, span) = loop { |
| break match self.cursor.token_tree() { |
| Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => match leaf { |
| tt::Leaf::Ident(ident) => { |
| if ident.is_raw.yes() { |
| self.buf.push_str("r#"); |
| self.text_pos += TextSize::of("r#"); |
| } |
| let r = (ident.sym.as_str(), ident.span); |
| self.cursor = self.cursor.bump(); |
| r |
| } |
| tt::Leaf::Punct(punct) => { |
| assert!(punct.char.is_ascii()); |
| tmp = punct.char as u8; |
| let r = ( |
| std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), |
| punct.span, |
| ); |
| self.cursor = self.cursor.bump(); |
| r |
| } |
| tt::Leaf::Literal(lit) => { |
| let buf_l = self.buf.len(); |
| format_to!(self.buf, "{lit}"); |
| debug_assert_ne!(self.buf.len() - buf_l, 0); |
| self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32); |
| self.token_map.push(self.text_pos, lit.span); |
| self.cursor = self.cursor.bump(); |
| continue 'tokens; |
| } |
| }, |
| Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { |
| self.cursor = self.cursor.subtree().unwrap(); |
| match delim_to_str(subtree.delimiter.kind, false) { |
| Some(it) => (it, subtree.delimiter.open), |
| None => continue, |
| } |
| } |
| None => { |
| let parent = self.cursor.end().unwrap(); |
| self.cursor = self.cursor.bump(); |
| match delim_to_str(parent.delimiter.kind, true) { |
| Some(it) => (it, parent.delimiter.close), |
| None => continue, |
| } |
| } |
| }; |
| }; |
| self.buf += text; |
| self.text_pos += TextSize::of(text); |
| self.token_map.push(self.text_pos, span); |
| } |
| |
| self.inner.token(kind, self.buf.as_str()); |
| self.buf.clear(); |
| // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it. |
| // Add whitespace between adjoint puncts |
| let next = last.bump(); |
| if let ( |
| Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)), |
| Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)), |
| ) = (last.token_tree(), next.token_tree()) |
| { |
| // Note: We always assume the semi-colon would be the last token in |
| // other parts of RA such that we don't add whitespace here. |
| // |
| // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't |
| // need to add whitespace either. |
| if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' { |
| self.inner.token(WHITESPACE, " "); |
| self.text_pos += TextSize::of(' '); |
| self.token_map.push(self.text_pos, curr.span); |
| } |
| } |
| } |
| |
| fn start_node(&mut self, kind: SyntaxKind) { |
| self.inner.start_node(kind); |
| } |
| |
| fn finish_node(&mut self) { |
| self.inner.finish_node(); |
| } |
| |
| fn error(&mut self, error: String) { |
| self.inner.error(error, self.text_pos) |
| } |
| } |