| //! This module provides a way to construct a `File`. |
| //! It is intended to be completely decoupled from the |
| //! parser, so as to allow to evolve the tree representation |
| //! and the parser algorithm independently. |
| use std::{mem, num::NonZeroU32}; |
| |
| use crate::{ |
| SyntaxKind::{self, *}, |
| output::Output, |
| }; |
| |
| /// `Parser` produces a flat list of `Event`s. |
| /// They are converted to a tree-structure in |
| /// a separate pass, via `TreeBuilder`. |
| /// |
| /// Kept to 8 bytes: error messages live in a side table on the `Parser` |
| /// (the `errors` vec) and `Event::Error` only stores an index into it. |
| /// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away |
| /// (the offset is always ≥ 1 because the forward parent sits later in the |
| /// event stream). |
| #[derive(Debug, PartialEq, Clone, Copy)] |
| pub(crate) enum Event { |
| /// This event signifies the start of the node. |
| /// It should be either abandoned (in which case the |
| /// `kind` is `TOMBSTONE`, and the event is ignored), |
| /// or completed via a `Finish` event. |
| /// |
| /// All tokens between a `Start` and a `Finish` would |
| /// become the children of the respective node. |
| /// |
| /// For left-recursive syntactic constructs, the parser produces |
| /// a child node before it sees a parent. `forward_parent` |
| /// saves the position of current event's parent. |
| /// |
| /// Consider this path |
| /// |
| /// foo::bar |
| /// |
| /// The events for it would look like this: |
| /// |
| /// ```text |
| /// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH |
| /// | /\ |
| /// | | |
| /// +------forward-parent------+ |
| /// ``` |
| /// |
| /// And the tree would look like this |
| /// |
| /// ```text |
| /// +--PATH---------+ |
| /// | | | |
| /// | | | |
| /// | '::' 'bar' |
| /// | |
| /// PATH |
| /// | |
| /// 'foo' |
| /// ``` |
| /// |
| /// See also `CompletedMarker::precede`. |
| Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> }, |
| |
| /// Complete the previous `Start` event |
| Finish, |
| |
| /// Produce a single leaf-element. |
| /// `n_raw_tokens` is used to glue complex contextual tokens. |
| /// For example, lexer tokenizes `>>` as `>`, `>`, and |
| /// `n_raw_tokens = 2` is used to produced a single `>>`. |
| Token { kind: SyntaxKind, n_raw_tokens: u8 }, |
| /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal |
| /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge. |
| /// This event instructs whatever consumes the events to split the float literal into |
| /// the corresponding parts. |
| FloatSplitHack { ends_in_dot: bool }, |
| /// Index into the parser's side `errors` vec. |
| Error { err: u32 }, |
| } |
| |
| impl Event { |
| pub(crate) fn tombstone() -> Self { |
| Event::Start { kind: TOMBSTONE, forward_parent: None } |
| } |
| } |
| |
| /// Generate the syntax tree with the control of events. `errors` is the |
| /// side table of error messages built up alongside the `events` stream. |
| pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output { |
| // Each event becomes roughly one u32 in Output, so preallocate to avoid |
| // the amortized grow-one churn we used to see in Output::enter_node. |
| let mut res = Output::with_event_capacity(events.len()); |
| let mut forward_parents = Vec::new(); |
| |
| for i in 0..events.len() { |
| match events[i] { |
| Event::Start { kind, forward_parent } => { |
| // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, |
| // in the normal control flow, the parent-child relation: `A -> B -> C`, |
| // while with the magic forward_parent, it writes: `C <- B <- A`. |
| |
| // append `A` into parents. |
| forward_parents.push(kind); |
| let mut idx = i; |
| let mut fp = forward_parent; |
| while let Some(fwd) = fp { |
| idx += fwd.get() as usize; |
| // append `A`'s forward_parent `B` |
| fp = match mem::replace(&mut events[idx], Event::tombstone()) { |
| Event::Start { kind, forward_parent } => { |
| forward_parents.push(kind); |
| forward_parent |
| } |
| _ => unreachable!(), |
| }; |
| // append `B`'s forward_parent `C` in the next stage. |
| } |
| |
| for kind in forward_parents.drain(..).rev() { |
| if kind != TOMBSTONE { |
| res.enter_node(kind); |
| } |
| } |
| } |
| Event::Finish => res.leave_node(), |
| Event::Token { kind, n_raw_tokens } => { |
| res.token(kind, n_raw_tokens); |
| } |
| Event::FloatSplitHack { ends_in_dot } => { |
| res.float_split_hack(ends_in_dot); |
| let ev = mem::replace(&mut events[i + 1], Event::tombstone()); |
| assert!(matches!(ev, Event::Finish), "{ev:?}"); |
| } |
| Event::Error { err } => { |
| // Move the string out of the side table; each index is visited |
| // exactly once, so swapping with an empty String is cheap and |
| // avoids any clone. |
| let msg = mem::take(&mut errors[err as usize]); |
| res.error(msg); |
| } |
| } |
| } |
| |
| res |
| } |