| //! See [`Parser`]. |
| |
| use std::cell::Cell; |
| |
| use drop_bomb::DropBomb; |
| use limit::Limit; |
| |
| use crate::{ |
| event::Event, |
| input::Input, |
| Edition, |
| SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, |
| TokenSet, T, |
| }; |
| |
| /// `Parser` struct provides the low-level API for |
| /// navigating through the stream of tokens and |
| /// constructing the parse tree. The actual parsing |
| /// happens in the [`grammar`](super::grammar) module. |
| /// |
| /// However, the result of this `Parser` is not a real |
| /// tree, but rather a flat stream of events of the form |
| /// "start expression, consume number literal, |
| /// finish expression". See `Event` docs for more. |
| pub(crate) struct Parser<'t> { |
| inp: &'t Input, |
| pos: usize, |
| events: Vec<Event>, |
| steps: Cell<u32>, |
| edition: Edition, |
| } |
| |
| static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000); |
| |
| impl<'t> Parser<'t> { |
| pub(super) fn new(inp: &'t Input, edition: Edition) -> Parser<'t> { |
| Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0), edition } |
| } |
| |
| pub(crate) fn finish(self) -> Vec<Event> { |
| self.events |
| } |
| |
| /// Returns the kind of the current token. |
| /// If parser has already reached the end of input, |
| /// the special `EOF` kind is returned. |
| pub(crate) fn current(&self) -> SyntaxKind { |
| self.nth(0) |
| } |
| |
| /// Lookahead operation: returns the kind of the next nth |
| /// token. |
| pub(crate) fn nth(&self, n: usize) -> SyntaxKind { |
| assert!(n <= 3); |
| |
| let steps = self.steps.get(); |
| assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck"); |
| self.steps.set(steps + 1); |
| |
| self.inp.kind(self.pos + n) |
| } |
| |
| /// Checks if the current token is `kind`. |
| pub(crate) fn at(&self, kind: SyntaxKind) -> bool { |
| self.nth_at(0, kind) |
| } |
| |
| pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { |
| match kind { |
| T![-=] => self.at_composite2(n, T![-], T![=]), |
| T![->] => self.at_composite2(n, T![-], T![>]), |
| T![::] => self.at_composite2(n, T![:], T![:]), |
| T![!=] => self.at_composite2(n, T![!], T![=]), |
| T![..] => self.at_composite2(n, T![.], T![.]), |
| T![*=] => self.at_composite2(n, T![*], T![=]), |
| T![/=] => self.at_composite2(n, T![/], T![=]), |
| T![&&] => self.at_composite2(n, T![&], T![&]), |
| T![&=] => self.at_composite2(n, T![&], T![=]), |
| T![%=] => self.at_composite2(n, T![%], T![=]), |
| T![^=] => self.at_composite2(n, T![^], T![=]), |
| T![+=] => self.at_composite2(n, T![+], T![=]), |
| T![<<] => self.at_composite2(n, T![<], T![<]), |
| T![<=] => self.at_composite2(n, T![<], T![=]), |
| T![==] => self.at_composite2(n, T![=], T![=]), |
| T![=>] => self.at_composite2(n, T![=], T![>]), |
| T![>=] => self.at_composite2(n, T![>], T![=]), |
| T![>>] => self.at_composite2(n, T![>], T![>]), |
| T![|=] => self.at_composite2(n, T![|], T![=]), |
| T![||] => self.at_composite2(n, T![|], T![|]), |
| |
| T![...] => self.at_composite3(n, T![.], T![.], T![.]), |
| T![..=] => self.at_composite3(n, T![.], T![.], T![=]), |
| T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), |
| T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), |
| |
| _ => self.inp.kind(self.pos + n) == kind, |
| } |
| } |
| |
| /// Consume the next token if `kind` matches. |
| pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { |
| if !self.at(kind) { |
| return false; |
| } |
| let n_raw_tokens = match kind { |
| T![-=] |
| | T![->] |
| | T![::] |
| | T![!=] |
| | T![..] |
| | T![*=] |
| | T![/=] |
| | T![&&] |
| | T![&=] |
| | T![%=] |
| | T![^=] |
| | T![+=] |
| | T![<<] |
| | T![<=] |
| | T![==] |
| | T![=>] |
| | T![>=] |
| | T![>>] |
| | T![|=] |
| | T![||] => 2, |
| |
| T![...] | T![..=] | T![<<=] | T![>>=] => 3, |
| _ => 1, |
| }; |
| self.do_bump(kind, n_raw_tokens); |
| true |
| } |
| |
| fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { |
| self.inp.kind(self.pos + n) == k1 |
| && self.inp.kind(self.pos + n + 1) == k2 |
| && self.inp.is_joint(self.pos + n) |
| } |
| |
| fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { |
| self.inp.kind(self.pos + n) == k1 |
| && self.inp.kind(self.pos + n + 1) == k2 |
| && self.inp.kind(self.pos + n + 2) == k3 |
| && self.inp.is_joint(self.pos + n) |
| && self.inp.is_joint(self.pos + n + 1) |
| } |
| |
| /// Checks if the current token is in `kinds`. |
| pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { |
| kinds.contains(self.current()) |
| } |
| |
| /// Checks if the current token is contextual keyword `kw`. |
| pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool { |
| self.inp.contextual_kind(self.pos) == kw |
| } |
| |
| /// Checks if the nth token is contextual keyword `kw`. |
| pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool { |
| self.inp.contextual_kind(self.pos + n) == kw |
| } |
| |
| /// Starts a new node in the syntax tree. All nodes and tokens |
| /// consumed between the `start` and the corresponding `Marker::complete` |
| /// belong to the same node. |
| pub(crate) fn start(&mut self) -> Marker { |
| let pos = self.events.len() as u32; |
| self.push_event(Event::tombstone()); |
| Marker::new(pos) |
| } |
| |
| /// Consume the next token. Panics if the parser isn't currently at `kind`. |
| pub(crate) fn bump(&mut self, kind: SyntaxKind) { |
| assert!(self.eat(kind)); |
| } |
| |
| /// Advances the parser by one token |
| pub(crate) fn bump_any(&mut self) { |
| let kind = self.nth(0); |
| if kind == EOF { |
| return; |
| } |
| self.do_bump(kind, 1); |
| } |
| |
| /// Advances the parser by one token |
| pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) { |
| assert!(self.at(SyntaxKind::FLOAT_NUMBER)); |
| // we have parse `<something>.` |
| // `<something>`.0.1 |
| // here we need to insert an extra event |
| // |
| // `<something>`. 0. 1; |
| // here we need to change the follow up parse, the return value will cause us to emulate a dot |
| // the actual splitting happens later |
| let ends_in_dot = !self.inp.is_joint(self.pos); |
| if !ends_in_dot { |
| let new_marker = self.start(); |
| let idx = marker.pos as usize; |
| match &mut self.events[idx] { |
| Event::Start { forward_parent, kind } => { |
| *kind = SyntaxKind::FIELD_EXPR; |
| *forward_parent = Some(new_marker.pos - marker.pos); |
| } |
| _ => unreachable!(), |
| } |
| marker.bomb.defuse(); |
| marker = new_marker; |
| }; |
| self.pos += 1; |
| self.push_event(Event::FloatSplitHack { ends_in_dot }); |
| (ends_in_dot, marker) |
| } |
| |
| /// Advances the parser by one token, remapping its kind. |
| /// This is useful to create contextual keywords from |
| /// identifiers. For example, the lexer creates a `union` |
| /// *identifier* token, but the parser remaps it to the |
| /// `union` keyword, and keyword is what ends up in the |
| /// final tree. |
| pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { |
| if self.nth(0) == EOF { |
| // FIXME: panic!? |
| return; |
| } |
| self.do_bump(kind, 1); |
| } |
| |
| /// Emit error with the `message` |
| /// FIXME: this should be much more fancy and support |
| /// structured errors with spans and notes, like rustc |
| /// does. |
| pub(crate) fn error<T: Into<String>>(&mut self, message: T) { |
| let msg = message.into(); |
| self.push_event(Event::Error { msg }); |
| } |
| |
| /// Consume the next token if it is `kind` or emit an error |
| /// otherwise. |
| pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { |
| if self.eat(kind) { |
| return true; |
| } |
| self.error(format!("expected {kind:?}")); |
| false |
| } |
| |
| /// Create an error node and consume the next token. |
| pub(crate) fn err_and_bump(&mut self, message: &str) { |
| self.err_recover(message, TokenSet::EMPTY); |
| } |
| |
| /// Create an error node and consume the next token. |
| pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { |
| if matches!(self.current(), T!['{'] | T!['}']) { |
| self.error(message); |
| return; |
| } |
| |
| if self.at_ts(recovery) { |
| self.error(message); |
| return; |
| } |
| |
| let m = self.start(); |
| self.error(message); |
| self.bump_any(); |
| m.complete(self, ERROR); |
| } |
| |
| fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { |
| self.pos += n_raw_tokens as usize; |
| self.steps.set(0); |
| self.push_event(Event::Token { kind, n_raw_tokens }); |
| } |
| |
| fn push_event(&mut self, event: Event) { |
| self.events.push(event); |
| } |
| |
| pub(crate) fn edition(&self) -> Edition { |
| self.edition |
| } |
| } |
| |
| /// See [`Parser::start`]. |
| pub(crate) struct Marker { |
| pos: u32, |
| bomb: DropBomb, |
| } |
| |
| impl Marker { |
| fn new(pos: u32) -> Marker { |
| Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } |
| } |
| |
| /// Finishes the syntax tree node and assigns `kind` to it, |
| /// and mark the create a `CompletedMarker` for possible future |
| /// operation like `.precede()` to deal with forward_parent. |
| pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker { |
| self.bomb.defuse(); |
| let idx = self.pos as usize; |
| match &mut p.events[idx] { |
| Event::Start { kind: slot, .. } => { |
| *slot = kind; |
| } |
| _ => unreachable!(), |
| } |
| p.push_event(Event::Finish); |
| CompletedMarker::new(self.pos, kind) |
| } |
| |
| /// Abandons the syntax tree node. All its children |
| /// are attached to its parent instead. |
| pub(crate) fn abandon(mut self, p: &mut Parser<'_>) { |
| self.bomb.defuse(); |
| let idx = self.pos as usize; |
| if idx == p.events.len() - 1 { |
| match p.events.pop() { |
| Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), |
| _ => unreachable!(), |
| } |
| } |
| } |
| } |
| |
| pub(crate) struct CompletedMarker { |
| pos: u32, |
| kind: SyntaxKind, |
| } |
| |
| impl CompletedMarker { |
| fn new(pos: u32, kind: SyntaxKind) -> Self { |
| CompletedMarker { pos, kind } |
| } |
| |
| /// This method allows to create a new node which starts |
| /// *before* the current one. That is, parser could start |
| /// node `A`, then complete it, and then after parsing the |
| /// whole `A`, decide that it should have started some node |
| /// `B` before starting `A`. `precede` allows to do exactly |
| /// that. See also docs about |
| /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent). |
| /// |
| /// Given completed events `[START, FINISH]` and its corresponding |
| /// `CompletedMarker(pos: 0, _)`. |
| /// Append a new `START` events as `[START, FINISH, NEWSTART]`, |
| /// then mark `NEWSTART` as `START`'s parent with saving its relative |
| /// distance to `NEWSTART` into forward_parent(=2 in this case); |
| pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker { |
| let new_pos = p.start(); |
| let idx = self.pos as usize; |
| match &mut p.events[idx] { |
| Event::Start { forward_parent, .. } => { |
| *forward_parent = Some(new_pos.pos - self.pos); |
| } |
| _ => unreachable!(), |
| } |
| new_pos |
| } |
| |
| /// Extends this completed marker *to the left* up to `m`. |
| pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker { |
| m.bomb.defuse(); |
| let idx = m.pos as usize; |
| match &mut p.events[idx] { |
| Event::Start { forward_parent, .. } => { |
| *forward_parent = Some(self.pos - m.pos); |
| } |
| _ => unreachable!(), |
| } |
| self |
| } |
| |
| pub(crate) fn kind(&self) -> SyntaxKind { |
| self.kind |
| } |
| } |