| //! Defines input for code generation process. |
| |
| use quote::ToTokens; |
| |
| use crate::codegen::grammar::to_upper_snake_case; |
| |
| #[derive(Copy, Clone, Debug)] |
| pub(crate) struct KindsSrc { |
| pub(crate) punct: &'static [(&'static str, &'static str)], |
| pub(crate) keywords: &'static [&'static str], |
| pub(crate) contextual_keywords: &'static [&'static str], |
| pub(crate) literals: &'static [&'static str], |
| pub(crate) tokens: &'static [&'static str], |
| pub(crate) nodes: &'static [&'static str], |
| pub(crate) _enums: &'static [&'static str], |
| pub(crate) edition_dependent_keywords: &'static [(&'static str, Edition)], |
| } |
| |
| #[allow(dead_code)] |
| #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] |
| pub(super) enum Edition { |
| Edition2015, |
| Edition2018, |
| Edition2021, |
| Edition2024, |
| } |
| |
| impl ToTokens for Edition { |
| fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { |
| match self { |
| Edition::Edition2015 => { |
| tokens.extend(quote::quote! { Edition::Edition2015 }); |
| } |
| Edition::Edition2018 => { |
| tokens.extend(quote::quote! { Edition::Edition2018 }); |
| } |
| Edition::Edition2021 => { |
| tokens.extend(quote::quote! { Edition::Edition2021 }); |
| } |
| Edition::Edition2024 => { |
| tokens.extend(quote::quote! { Edition::Edition2024 }); |
| } |
| } |
| } |
| } |
| |
| /// The punctuations of the language. |
| const PUNCT: &[(&str, &str)] = &[ |
| // KEEP THE DOLLAR AT THE TOP ITS SPECIAL |
| ("$", "DOLLAR"), |
| (";", "SEMICOLON"), |
| (",", "COMMA"), |
| ("(", "L_PAREN"), |
| (")", "R_PAREN"), |
| ("{", "L_CURLY"), |
| ("}", "R_CURLY"), |
| ("[", "L_BRACK"), |
| ("]", "R_BRACK"), |
| ("<", "L_ANGLE"), |
| (">", "R_ANGLE"), |
| ("@", "AT"), |
| ("#", "POUND"), |
| ("~", "TILDE"), |
| ("?", "QUESTION"), |
| ("&", "AMP"), |
| ("|", "PIPE"), |
| ("+", "PLUS"), |
| ("*", "STAR"), |
| ("/", "SLASH"), |
| ("^", "CARET"), |
| ("%", "PERCENT"), |
| ("_", "UNDERSCORE"), |
| (".", "DOT"), |
| ("..", "DOT2"), |
| ("...", "DOT3"), |
| ("..=", "DOT2EQ"), |
| (":", "COLON"), |
| ("::", "COLON2"), |
| ("=", "EQ"), |
| ("==", "EQ2"), |
| ("=>", "FAT_ARROW"), |
| ("!", "BANG"), |
| ("!=", "NEQ"), |
| ("-", "MINUS"), |
| ("->", "THIN_ARROW"), |
| ("<=", "LTEQ"), |
| (">=", "GTEQ"), |
| ("+=", "PLUSEQ"), |
| ("-=", "MINUSEQ"), |
| ("|=", "PIPEEQ"), |
| ("&=", "AMPEQ"), |
| ("^=", "CARETEQ"), |
| ("/=", "SLASHEQ"), |
| ("*=", "STAREQ"), |
| ("%=", "PERCENTEQ"), |
| ("&&", "AMP2"), |
| ("||", "PIPE2"), |
| ("<<", "SHL"), |
| (">>", "SHR"), |
| ("<<=", "SHLEQ"), |
| (">>=", "SHREQ"), |
| ]; |
| const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"]; |
| // &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],; |
| |
| const EOF: &str = "EOF"; |
| |
| const RESERVED: &[&str] = &[ |
| "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", |
| "virtual", "yield", |
| ]; |
| // keywords that are keywords only in specific parse contexts |
| #[doc(alias = "WEAK_KEYWORDS")] |
| const CONTEXTUAL_KEYWORDS: &[&str] = |
| &["macro_rules", "union", "default", "raw", "dyn", "auto", "yeet", "safe"]; |
| // keywords we use for special macro expansions |
| const CONTEXTUAL_BUILTIN_KEYWORDS: &[&str] = &[ |
| "asm", |
| "naked_asm", |
| "global_asm", |
| "att_syntax", |
| "builtin", |
| "clobber_abi", |
| "format_args", |
| // "in", |
| "inlateout", |
| "inout", |
| "label", |
| "lateout", |
| "may_unwind", |
| "nomem", |
| "noreturn", |
| "nostack", |
| "offset_of", |
| "options", |
| "out", |
| "preserves_flags", |
| "pure", |
| // "raw", |
| "readonly", |
| "sym", |
| ]; |
| |
| // keywords that are keywords depending on the edition |
| const EDITION_DEPENDENT_KEYWORDS: &[(&str, Edition)] = &[ |
| ("try", Edition::Edition2018), |
| ("dyn", Edition::Edition2018), |
| ("async", Edition::Edition2018), |
| ("await", Edition::Edition2018), |
| ("gen", Edition::Edition2024), |
| ]; |
| |
| pub(crate) fn generate_kind_src( |
| nodes: &[AstNodeSrc], |
| enums: &[AstEnumSrc], |
| grammar: &ungrammar::Grammar, |
| ) -> KindsSrc { |
| let mut contextual_keywords: Vec<&_> = |
| CONTEXTUAL_KEYWORDS.iter().chain(CONTEXTUAL_BUILTIN_KEYWORDS).copied().collect(); |
| |
| let mut keywords: Vec<&_> = Vec::new(); |
| let mut tokens: Vec<&_> = TOKENS.to_vec(); |
| let mut literals: Vec<&_> = Vec::new(); |
| let mut used_puncts = vec![false; PUNCT.len()]; |
| // Mark $ as used |
| used_puncts[0] = true; |
| grammar.tokens().for_each(|token| { |
| let name = &*grammar[token].name; |
| if name == EOF { |
| return; |
| } |
| match name.split_at(1) { |
| ("@", lit) if !lit.is_empty() => { |
| literals.push(String::leak(to_upper_snake_case(lit))); |
| } |
| ("#", token) if !token.is_empty() => { |
| tokens.push(String::leak(to_upper_snake_case(token))); |
| } |
| _ if contextual_keywords.contains(&name) => {} |
| _ if name.chars().all(char::is_alphabetic) => { |
| keywords.push(String::leak(name.to_owned())); |
| } |
| _ => { |
| let idx = PUNCT |
| .iter() |
| .position(|(punct, _)| punct == &name) |
| .unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}")); |
| used_puncts[idx] = true; |
| } |
| } |
| }); |
| PUNCT.iter().zip(used_puncts).filter(|(_, used)| !used).for_each(|((punct, _), _)| { |
| panic!("Punctuation {punct:?} is not used in grammar"); |
| }); |
| keywords.extend(RESERVED.iter().copied()); |
| keywords.sort(); |
| keywords.dedup(); |
| contextual_keywords.sort(); |
| contextual_keywords.dedup(); |
| let mut edition_dependent_keywords: Vec<(&_, _)> = EDITION_DEPENDENT_KEYWORDS.to_vec(); |
| edition_dependent_keywords.sort(); |
| edition_dependent_keywords.dedup(); |
| |
| keywords.retain(|&it| !contextual_keywords.contains(&it)); |
| keywords.retain(|&it| !edition_dependent_keywords.iter().any(|&(kw, _)| kw == it)); |
| |
| // we leak things here for simplicity, that way we don't have to deal with lifetimes |
| // The execution is a one shot job so thats fine |
| let nodes = nodes |
| .iter() |
| .map(|it| &it.name) |
| .map(|it| to_upper_snake_case(it)) |
| .map(String::leak) |
| .map(|it| &*it) |
| .collect(); |
| let nodes = Vec::leak(nodes); |
| nodes.sort(); |
| let enums = enums |
| .iter() |
| .map(|it| &it.name) |
| .map(|it| to_upper_snake_case(it)) |
| .map(String::leak) |
| .map(|it| &*it) |
| .collect(); |
| let enums = Vec::leak(enums); |
| enums.sort(); |
| let keywords = Vec::leak(keywords); |
| let contextual_keywords = Vec::leak(contextual_keywords); |
| let edition_dependent_keywords = Vec::leak(edition_dependent_keywords); |
| let literals = Vec::leak(literals); |
| literals.sort(); |
| let tokens = Vec::leak(tokens); |
| tokens.sort(); |
| |
| KindsSrc { |
| punct: PUNCT, |
| nodes, |
| _enums: enums, |
| keywords, |
| contextual_keywords, |
| edition_dependent_keywords, |
| literals, |
| tokens, |
| } |
| } |
| |
| #[derive(Default, Debug)] |
| pub(crate) struct AstSrc { |
| pub(crate) tokens: Vec<String>, |
| pub(crate) nodes: Vec<AstNodeSrc>, |
| pub(crate) enums: Vec<AstEnumSrc>, |
| } |
| |
| #[derive(Debug)] |
| pub(crate) struct AstNodeSrc { |
| pub(crate) doc: Vec<String>, |
| pub(crate) name: String, |
| pub(crate) traits: Vec<String>, |
| pub(crate) fields: Vec<Field>, |
| } |
| |
| #[derive(Debug, Eq, PartialEq)] |
| pub(crate) enum Field { |
| Token(String), |
| Node { name: String, ty: String, cardinality: Cardinality }, |
| } |
| |
| #[derive(Debug, Eq, PartialEq)] |
| pub(crate) enum Cardinality { |
| Optional, |
| Many, |
| } |
| |
| #[derive(Debug)] |
| pub(crate) struct AstEnumSrc { |
| pub(crate) doc: Vec<String>, |
| pub(crate) name: String, |
| pub(crate) traits: Vec<String>, |
| pub(crate) variants: Vec<String>, |
| } |