| // pest. The Elegant Parser |
| // Copyright (c) 2018 DragoČ™ Tiselice |
| // |
| // Licensed under the Apache License, Version 2.0 |
| // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
| // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. All files in the project carrying such notice may not be copied, |
| // modified, or distributed except according to those terms. |
| |
| use ast::*; |
| use std::collections::HashMap; |
| |
| #[cfg(test)] |
| macro_rules! box_tree { |
| ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => ( |
| $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ ) |
| ); |
| ($expr:expr) => ($expr); |
| } |
| |
| mod concatenator; |
| mod factorizer; |
| mod restorer; |
| mod rotater; |
| mod skipper; |
| mod unroller; |
| |
| pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> { |
| let optimized: Vec<OptimizedRule> = rules |
| .into_iter() |
| .map(rotater::rotate) |
| .map(skipper::skip) |
| .map(unroller::unroll) |
| .map(concatenator::concatenate) |
| .map(factorizer::factor) |
| .map(rule_to_optimized_rule) |
| .collect(); |
| |
| let rules = to_hash_map(&optimized); |
| optimized |
| .into_iter() |
| .map(|rule| restorer::restore_on_err(rule, &rules)) |
| .collect() |
| } |
| |
| fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule { |
| fn to_optimized(expr: Expr) -> OptimizedExpr { |
| match expr { |
| Expr::Str(string) => OptimizedExpr::Str(string), |
| Expr::Insens(string) => OptimizedExpr::Insens(string), |
| Expr::Range(start, end) => OptimizedExpr::Range(start, end), |
| Expr::Ident(ident) => OptimizedExpr::Ident(ident), |
| Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end), |
| Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))), |
| Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))), |
| Expr::Seq(lhs, rhs) => { |
| OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) |
| } |
| Expr::Choice(lhs, rhs) => { |
| OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) |
| } |
| Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))), |
| Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))), |
| Expr::Skip(strings) => OptimizedExpr::Skip(strings), |
| Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))), |
| Expr::RepOnce(_) |
| | Expr::RepExact(..) |
| | Expr::RepMin(..) |
| | Expr::RepMax(..) |
| | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"), |
| } |
| } |
| |
| OptimizedRule { |
| name: rule.name, |
| ty: rule.ty, |
| expr: to_optimized(rule.expr), |
| } |
| } |
| |
| fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> { |
| rules |
| .iter() |
| .map(|r| (r.name.clone(), r.expr.clone())) |
| .collect() |
| } |
| |
| #[derive(Clone, Debug, Eq, PartialEq)] |
| pub struct OptimizedRule { |
| pub name: String, |
| pub ty: RuleType, |
| pub expr: OptimizedExpr, |
| } |
| |
| #[derive(Clone, Debug, Eq, PartialEq)] |
| pub enum OptimizedExpr { |
| Str(String), |
| Insens(String), |
| Range(String, String), |
| Ident(String), |
| PeekSlice(i32, Option<i32>), |
| PosPred(Box<OptimizedExpr>), |
| NegPred(Box<OptimizedExpr>), |
| Seq(Box<OptimizedExpr>, Box<OptimizedExpr>), |
| Choice(Box<OptimizedExpr>, Box<OptimizedExpr>), |
| Opt(Box<OptimizedExpr>), |
| Rep(Box<OptimizedExpr>), |
| Skip(Vec<String>), |
| Push(Box<OptimizedExpr>), |
| RestoreOnErr(Box<OptimizedExpr>), |
| } |
| |
| impl OptimizedExpr { |
| pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator { |
| OptimizedExprTopDownIterator::new(self) |
| } |
| |
| pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr |
| where |
| F: FnMut(OptimizedExpr) -> OptimizedExpr, |
| { |
| fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr |
| where |
| F: FnMut(OptimizedExpr) -> OptimizedExpr, |
| { |
| let expr = f(expr); |
| |
| match expr { |
| // TODO: Use box syntax when it gets stabilized. |
| OptimizedExpr::PosPred(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::PosPred(mapped) |
| } |
| OptimizedExpr::NegPred(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::NegPred(mapped) |
| } |
| OptimizedExpr::Seq(lhs, rhs) => { |
| let mapped_lhs = Box::new(map_internal(*lhs, f)); |
| let mapped_rhs = Box::new(map_internal(*rhs, f)); |
| OptimizedExpr::Seq(mapped_lhs, mapped_rhs) |
| } |
| OptimizedExpr::Choice(lhs, rhs) => { |
| let mapped_lhs = Box::new(map_internal(*lhs, f)); |
| let mapped_rhs = Box::new(map_internal(*rhs, f)); |
| OptimizedExpr::Choice(mapped_lhs, mapped_rhs) |
| } |
| OptimizedExpr::Rep(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::Rep(mapped) |
| } |
| OptimizedExpr::Opt(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::Opt(mapped) |
| } |
| OptimizedExpr::Push(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::Push(mapped) |
| } |
| expr => expr, |
| } |
| } |
| |
| map_internal(self, &mut f) |
| } |
| |
| pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr |
| where |
| F: FnMut(OptimizedExpr) -> OptimizedExpr, |
| { |
| fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr |
| where |
| F: FnMut(OptimizedExpr) -> OptimizedExpr, |
| { |
| let mapped = match expr { |
| OptimizedExpr::PosPred(expr) => { |
| // TODO: Use box syntax when it gets stabilized. |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::PosPred(mapped) |
| } |
| OptimizedExpr::NegPred(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::NegPred(mapped) |
| } |
| OptimizedExpr::Seq(lhs, rhs) => { |
| let mapped_lhs = Box::new(map_internal(*lhs, f)); |
| let mapped_rhs = Box::new(map_internal(*rhs, f)); |
| OptimizedExpr::Seq(mapped_lhs, mapped_rhs) |
| } |
| OptimizedExpr::Choice(lhs, rhs) => { |
| let mapped_lhs = Box::new(map_internal(*lhs, f)); |
| let mapped_rhs = Box::new(map_internal(*rhs, f)); |
| OptimizedExpr::Choice(mapped_lhs, mapped_rhs) |
| } |
| OptimizedExpr::Rep(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::Rep(mapped) |
| } |
| OptimizedExpr::Opt(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::Opt(mapped) |
| } |
| OptimizedExpr::Push(expr) => { |
| let mapped = Box::new(map_internal(*expr, f)); |
| OptimizedExpr::Push(mapped) |
| } |
| expr => expr, |
| }; |
| |
| f(mapped) |
| } |
| |
| map_internal(self, &mut f) |
| } |
| } |
| |
| pub struct OptimizedExprTopDownIterator { |
| current: Option<OptimizedExpr>, |
| next: Option<OptimizedExpr>, |
| right_branches: Vec<OptimizedExpr>, |
| } |
| |
| impl OptimizedExprTopDownIterator { |
| pub fn new(expr: &OptimizedExpr) -> Self { |
| let mut iter = OptimizedExprTopDownIterator { |
| current: None, |
| next: None, |
| right_branches: vec![], |
| }; |
| iter.iterate_expr(expr.clone()); |
| iter |
| } |
| |
| fn iterate_expr(&mut self, expr: OptimizedExpr) { |
| self.current = Some(expr.clone()); |
| match expr { |
| OptimizedExpr::Seq(lhs, rhs) => { |
| self.right_branches.push(*rhs); |
| self.next = Some(*lhs); |
| } |
| OptimizedExpr::Choice(lhs, rhs) => { |
| self.right_branches.push(*rhs); |
| self.next = Some(*lhs); |
| } |
| OptimizedExpr::PosPred(expr) |
| | OptimizedExpr::NegPred(expr) |
| | OptimizedExpr::Rep(expr) |
| | OptimizedExpr::Opt(expr) |
| | OptimizedExpr::Push(expr) => { |
| self.next = Some(*expr); |
| } |
| _ => { |
| self.next = None; |
| } |
| } |
| } |
| } |
| |
| impl Iterator for OptimizedExprTopDownIterator { |
| type Item = OptimizedExpr; |
| |
| fn next(&mut self) -> Option<Self::Item> { |
| let result = self.current.take(); |
| |
| if let Some(expr) = self.next.take() { |
| self.iterate_expr(expr); |
| } else if let Some(expr) = self.right_branches.pop() { |
| self.iterate_expr(expr); |
| } |
| |
| result |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| #[test] |
| fn rotate() { |
| let rules = { |
| use ast::Expr::*; |
| vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Normal, |
| expr: box_tree!(Choice( |
| Choice( |
| Choice(Str(String::from("a")), Str(String::from("b"))), |
| Str(String::from("c")) |
| ), |
| Str(String::from("d")) |
| )), |
| }] |
| }; |
| let rotated = { |
| use optimizer::OptimizedExpr::*; |
| vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Normal, |
| expr: box_tree!(Choice( |
| Str(String::from("a")), |
| Choice( |
| Str(String::from("b")), |
| Choice(Str(String::from("c")), Str(String::from("d"))) |
| ) |
| )), |
| }] |
| }; |
| |
| assert_eq!(optimize(rules), rotated); |
| } |
| |
| #[test] |
| fn skip() { |
| let rules = { |
| use ast::Expr::*; |
| vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: box_tree!(Rep(Seq( |
| NegPred(Choice(Str(String::from("a")), Str(String::from("b")))), |
| Ident(String::from("ANY")) |
| ))), |
| }] |
| }; |
| let skipped = vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]), |
| }]; |
| |
| assert_eq!(optimize(rules), skipped); |
| } |
| |
| #[test] |
| fn concat_strings() { |
| let rules = { |
| use ast::Expr::*; |
| vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: box_tree!(Seq( |
| Seq(Str(String::from("a")), Str(String::from("b"))), |
| Seq(Str(String::from("c")), Str(String::from("d"))) |
| )), |
| }] |
| }; |
| let concatenated = vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: OptimizedExpr::Str(String::from("abcd")), |
| }]; |
| |
| assert_eq!(optimize(rules), concatenated); |
| } |
| |
| #[test] |
| fn unroll_loop_exact() { |
| let rules = vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3), |
| }]; |
| let unrolled = { |
| use optimizer::OptimizedExpr::*; |
| vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: box_tree!(Seq( |
| Ident(String::from("a")), |
| Seq(Ident(String::from("a")), Ident(String::from("a"))) |
| )), |
| }] |
| }; |
| |
| assert_eq!(optimize(rules), unrolled); |
| } |
| |
| #[test] |
| fn unroll_loop_max() { |
| let rules = vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3), |
| }]; |
| let unrolled = { |
| use optimizer::OptimizedExpr::*; |
| vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: box_tree!(Seq( |
| Opt(Str(String::from("a"))), |
| Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a")))) |
| )), |
| }] |
| }; |
| |
| assert_eq!(optimize(rules), unrolled); |
| } |
| |
| #[test] |
| fn unroll_loop_min() { |
| let rules = vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2), |
| }]; |
| let unrolled = { |
| use optimizer::OptimizedExpr::*; |
| vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: box_tree!(Seq( |
| Str(String::from("a")), |
| Seq(Str(String::from("a")), Rep(Str(String::from("a")))) |
| )), |
| }] |
| }; |
| |
| assert_eq!(optimize(rules), unrolled); |
| } |
| |
| #[test] |
| fn unroll_loop_min_max() { |
| let rules = vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3), |
| }]; |
| let unrolled = { |
| use optimizer::OptimizedExpr::*; |
| vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| /* TODO possible room for improvement here: |
| * if the sequences were rolled out in the opposite |
| * order, we could further optimize the strings |
| * in cases like this. |
| Str(String::from(("aa")), |
| Opt(Str(String::from("a"))) |
| */ |
| expr: box_tree!(Seq( |
| Str(String::from("a")), |
| Seq(Str(String::from("a")), Opt(Str(String::from("a")))) |
| )), |
| }] |
| }; |
| |
| assert_eq!(optimize(rules), unrolled); |
| } |
| |
| #[test] |
| fn concat_insensitive_strings() { |
| let rules = { |
| use ast::Expr::*; |
| vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: box_tree!(Seq( |
| Seq(Insens(String::from("a")), Insens(String::from("b"))), |
| Seq(Insens(String::from("c")), Insens(String::from("d"))) |
| )), |
| }] |
| }; |
| let concatenated = vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Atomic, |
| expr: OptimizedExpr::Insens(String::from("abcd")), |
| }]; |
| |
| assert_eq!(optimize(rules), concatenated); |
| } |
| |
| #[test] |
| fn long_common_sequence() { |
| let rules = { |
| use ast::Expr::*; |
| vec![Rule { |
| name: "rule".to_owned(), |
| ty: RuleType::Silent, |
| expr: box_tree!(Choice( |
| Seq( |
| Ident(String::from("a")), |
| Seq(Ident(String::from("b")), Ident(String::from("c"))) |
| ), |
| Seq( |
| Seq(Ident(String::from("a")), Ident(String::from("b"))), |
| Ident(String::from("d")) |
| ) |
| )), |
| }] |
| }; |
| let optimized = { |
| use optimizer::OptimizedExpr::*; |
| vec![OptimizedRule { |
| name: "rule".to_owned(), |
| ty: RuleType::Silent, |
| expr: box_tree!(Seq( |
| Ident(String::from("a")), |
| Seq( |
| Ident(String::from("b")), |
| Choice(Ident(String::from("c")), Ident(String::from("d"))) |
| ) |
| )), |
| }] |
| }; |
| |
| assert_eq!(optimize(rules), optimized); |
| } |
| } |