| //! This module provides a way to construct a `File`. |
| //! It is intended to be completely decoupled from the |
| //! parser, so as to allow to evolve the tree representation |
| //! and the parser algorithm independently. |
| use std::mem; |
| |
| use crate::{ |
| SyntaxKind::{self, *}, |
| output::Output, |
| }; |
| |
| /// `Parser` produces a flat list of `Event`s. |
| /// They are converted to a tree-structure in |
| /// a separate pass, via `TreeBuilder`. |
| #[derive(Debug, PartialEq)] |
| pub(crate) enum Event { |
| /// This event signifies the start of the node. |
| /// It should be either abandoned (in which case the |
| /// `kind` is `TOMBSTONE`, and the event is ignored), |
| /// or completed via a `Finish` event. |
| /// |
| /// All tokens between a `Start` and a `Finish` would |
| /// become the children of the respective node. |
| /// |
| /// For left-recursive syntactic constructs, the parser produces |
| /// a child node before it sees a parent. `forward_parent` |
| /// saves the position of current event's parent. |
| /// |
| /// Consider this path |
| /// |
| /// foo::bar |
| /// |
| /// The events for it would look like this: |
| /// |
| /// ```text |
| /// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH |
| /// | /\ |
| /// | | |
| /// +------forward-parent------+ |
| /// ``` |
| /// |
| /// And the tree would look like this |
| /// |
| /// ```text |
| /// +--PATH---------+ |
| /// | | | |
| /// | | | |
| /// | '::' 'bar' |
| /// | |
| /// PATH |
| /// | |
| /// 'foo' |
| /// ``` |
| /// |
| /// See also `CompletedMarker::precede`. |
| Start { |
| kind: SyntaxKind, |
| forward_parent: Option<u32>, |
| }, |
| |
| /// Complete the previous `Start` event |
| Finish, |
| |
| /// Produce a single leaf-element. |
| /// `n_raw_tokens` is used to glue complex contextual tokens. |
| /// For example, lexer tokenizes `>>` as `>`, `>`, and |
| /// `n_raw_tokens = 2` is used to produced a single `>>`. |
| Token { |
| kind: SyntaxKind, |
| n_raw_tokens: u8, |
| }, |
| /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal |
| /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge. |
| /// This event instructs whatever consumes the events to split the float literal into |
| /// the corresponding parts. |
| FloatSplitHack { |
| ends_in_dot: bool, |
| }, |
| Error { |
| msg: String, |
| }, |
| } |
| |
| impl Event { |
| pub(crate) fn tombstone() -> Self { |
| Event::Start { kind: TOMBSTONE, forward_parent: None } |
| } |
| } |
| |
| /// Generate the syntax tree with the control of events. |
| pub(super) fn process(mut events: Vec<Event>) -> Output { |
| let mut res = Output::default(); |
| let mut forward_parents = Vec::new(); |
| |
| for i in 0..events.len() { |
| match mem::replace(&mut events[i], Event::tombstone()) { |
| Event::Start { kind, forward_parent } => { |
| // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, |
| // in the normal control flow, the parent-child relation: `A -> B -> C`, |
| // while with the magic forward_parent, it writes: `C <- B <- A`. |
| |
| // append `A` into parents. |
| forward_parents.push(kind); |
| let mut idx = i; |
| let mut fp = forward_parent; |
| while let Some(fwd) = fp { |
| idx += fwd as usize; |
| // append `A`'s forward_parent `B` |
| fp = match mem::replace(&mut events[idx], Event::tombstone()) { |
| Event::Start { kind, forward_parent } => { |
| forward_parents.push(kind); |
| forward_parent |
| } |
| _ => unreachable!(), |
| }; |
| // append `B`'s forward_parent `C` in the next stage. |
| } |
| |
| for kind in forward_parents.drain(..).rev() { |
| if kind != TOMBSTONE { |
| res.enter_node(kind); |
| } |
| } |
| } |
| Event::Finish => res.leave_node(), |
| Event::Token { kind, n_raw_tokens } => { |
| res.token(kind, n_raw_tokens); |
| } |
| Event::FloatSplitHack { ends_in_dot } => { |
| res.float_split_hack(ends_in_dot); |
| let ev = mem::replace(&mut events[i + 1], Event::tombstone()); |
| assert!(matches!(ev, Event::Finish), "{ev:?}"); |
| } |
| Event::Error { msg } => res.error(msg), |
| } |
| } |
| |
| res |
| } |