blob: 7013c43f9eb95fc53dd45ce11bc9c96d6ef201bc [file] [log] [blame]
// pest. The Elegant Parser
// Copyright (c) 2018 DragoČ™ Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use ast::*;
use std::collections::HashMap;
#[cfg(test)]
macro_rules! box_tree {
( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => (
$node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ )
);
($expr:expr) => ($expr);
}
mod concatenator;
mod factorizer;
mod restorer;
mod rotater;
mod skipper;
mod unroller;
pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
let optimized: Vec<OptimizedRule> = rules
.into_iter()
.map(rotater::rotate)
.map(skipper::skip)
.map(unroller::unroll)
.map(concatenator::concatenate)
.map(factorizer::factor)
.map(rule_to_optimized_rule)
.collect();
let rules = to_hash_map(&optimized);
optimized
.into_iter()
.map(|rule| restorer::restore_on_err(rule, &rules))
.collect()
}
fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
fn to_optimized(expr: Expr) -> OptimizedExpr {
match expr {
Expr::Str(string) => OptimizedExpr::Str(string),
Expr::Insens(string) => OptimizedExpr::Insens(string),
Expr::Range(start, end) => OptimizedExpr::Range(start, end),
Expr::Ident(ident) => OptimizedExpr::Ident(ident),
Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
Expr::Seq(lhs, rhs) => {
OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
}
Expr::Choice(lhs, rhs) => {
OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
}
Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
Expr::Skip(strings) => OptimizedExpr::Skip(strings),
Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
Expr::RepOnce(_)
| Expr::RepExact(..)
| Expr::RepMin(..)
| Expr::RepMax(..)
| Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"),
}
}
OptimizedRule {
name: rule.name,
ty: rule.ty,
expr: to_optimized(rule.expr),
}
}
fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
rules
.iter()
.map(|r| (r.name.clone(), r.expr.clone()))
.collect()
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct OptimizedRule {
pub name: String,
pub ty: RuleType,
pub expr: OptimizedExpr,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum OptimizedExpr {
Str(String),
Insens(String),
Range(String, String),
Ident(String),
PeekSlice(i32, Option<i32>),
PosPred(Box<OptimizedExpr>),
NegPred(Box<OptimizedExpr>),
Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
Opt(Box<OptimizedExpr>),
Rep(Box<OptimizedExpr>),
Skip(Vec<String>),
Push(Box<OptimizedExpr>),
RestoreOnErr(Box<OptimizedExpr>),
}
impl OptimizedExpr {
pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
OptimizedExprTopDownIterator::new(self)
}
pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
where
F: FnMut(OptimizedExpr) -> OptimizedExpr,
{
fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
where
F: FnMut(OptimizedExpr) -> OptimizedExpr,
{
let expr = f(expr);
match expr {
// TODO: Use box syntax when it gets stabilized.
OptimizedExpr::PosPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::PosPred(mapped)
}
OptimizedExpr::NegPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::NegPred(mapped)
}
OptimizedExpr::Seq(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
}
OptimizedExpr::Choice(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
}
OptimizedExpr::Rep(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::Rep(mapped)
}
OptimizedExpr::Opt(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::Opt(mapped)
}
OptimizedExpr::Push(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::Push(mapped)
}
expr => expr,
}
}
map_internal(self, &mut f)
}
pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
where
F: FnMut(OptimizedExpr) -> OptimizedExpr,
{
fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
where
F: FnMut(OptimizedExpr) -> OptimizedExpr,
{
let mapped = match expr {
OptimizedExpr::PosPred(expr) => {
// TODO: Use box syntax when it gets stabilized.
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::PosPred(mapped)
}
OptimizedExpr::NegPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::NegPred(mapped)
}
OptimizedExpr::Seq(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
}
OptimizedExpr::Choice(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
}
OptimizedExpr::Rep(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::Rep(mapped)
}
OptimizedExpr::Opt(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::Opt(mapped)
}
OptimizedExpr::Push(expr) => {
let mapped = Box::new(map_internal(*expr, f));
OptimizedExpr::Push(mapped)
}
expr => expr,
};
f(mapped)
}
map_internal(self, &mut f)
}
}
pub struct OptimizedExprTopDownIterator {
current: Option<OptimizedExpr>,
next: Option<OptimizedExpr>,
right_branches: Vec<OptimizedExpr>,
}
impl OptimizedExprTopDownIterator {
pub fn new(expr: &OptimizedExpr) -> Self {
let mut iter = OptimizedExprTopDownIterator {
current: None,
next: None,
right_branches: vec![],
};
iter.iterate_expr(expr.clone());
iter
}
fn iterate_expr(&mut self, expr: OptimizedExpr) {
self.current = Some(expr.clone());
match expr {
OptimizedExpr::Seq(lhs, rhs) => {
self.right_branches.push(*rhs);
self.next = Some(*lhs);
}
OptimizedExpr::Choice(lhs, rhs) => {
self.right_branches.push(*rhs);
self.next = Some(*lhs);
}
OptimizedExpr::PosPred(expr)
| OptimizedExpr::NegPred(expr)
| OptimizedExpr::Rep(expr)
| OptimizedExpr::Opt(expr)
| OptimizedExpr::Push(expr) => {
self.next = Some(*expr);
}
_ => {
self.next = None;
}
}
}
}
impl Iterator for OptimizedExprTopDownIterator {
type Item = OptimizedExpr;
fn next(&mut self) -> Option<Self::Item> {
let result = self.current.take();
if let Some(expr) = self.next.take() {
self.iterate_expr(expr);
} else if let Some(expr) = self.right_branches.pop() {
self.iterate_expr(expr);
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rotate() {
let rules = {
use ast::Expr::*;
vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Choice(
Choice(
Choice(Str(String::from("a")), Str(String::from("b"))),
Str(String::from("c"))
),
Str(String::from("d"))
)),
}]
};
let rotated = {
use optimizer::OptimizedExpr::*;
vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Choice(
Str(String::from("a")),
Choice(
Str(String::from("b")),
Choice(Str(String::from("c")), Str(String::from("d")))
)
)),
}]
};
assert_eq!(optimize(rules), rotated);
}
#[test]
fn skip() {
let rules = {
use ast::Expr::*;
vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: box_tree!(Rep(Seq(
NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
Ident(String::from("ANY"))
))),
}]
};
let skipped = vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
}];
assert_eq!(optimize(rules), skipped);
}
#[test]
fn concat_strings() {
let rules = {
use ast::Expr::*;
vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: box_tree!(Seq(
Seq(Str(String::from("a")), Str(String::from("b"))),
Seq(Str(String::from("c")), Str(String::from("d")))
)),
}]
};
let concatenated = vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: OptimizedExpr::Str(String::from("abcd")),
}];
assert_eq!(optimize(rules), concatenated);
}
#[test]
fn unroll_loop_exact() {
let rules = vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
}];
let unrolled = {
use optimizer::OptimizedExpr::*;
vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: box_tree!(Seq(
Ident(String::from("a")),
Seq(Ident(String::from("a")), Ident(String::from("a")))
)),
}]
};
assert_eq!(optimize(rules), unrolled);
}
#[test]
fn unroll_loop_max() {
let rules = vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
}];
let unrolled = {
use optimizer::OptimizedExpr::*;
vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: box_tree!(Seq(
Opt(Str(String::from("a"))),
Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
)),
}]
};
assert_eq!(optimize(rules), unrolled);
}
#[test]
fn unroll_loop_min() {
let rules = vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
}];
let unrolled = {
use optimizer::OptimizedExpr::*;
vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: box_tree!(Seq(
Str(String::from("a")),
Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
)),
}]
};
assert_eq!(optimize(rules), unrolled);
}
#[test]
fn unroll_loop_min_max() {
let rules = vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
}];
let unrolled = {
use optimizer::OptimizedExpr::*;
vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
/* TODO possible room for improvement here:
* if the sequences were rolled out in the opposite
* order, we could further optimize the strings
* in cases like this.
Str(String::from(("aa")),
Opt(Str(String::from("a")))
*/
expr: box_tree!(Seq(
Str(String::from("a")),
Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
)),
}]
};
assert_eq!(optimize(rules), unrolled);
}
#[test]
fn concat_insensitive_strings() {
let rules = {
use ast::Expr::*;
vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: box_tree!(Seq(
Seq(Insens(String::from("a")), Insens(String::from("b"))),
Seq(Insens(String::from("c")), Insens(String::from("d")))
)),
}]
};
let concatenated = vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Atomic,
expr: OptimizedExpr::Insens(String::from("abcd")),
}];
assert_eq!(optimize(rules), concatenated);
}
#[test]
fn long_common_sequence() {
let rules = {
use ast::Expr::*;
vec![Rule {
name: "rule".to_owned(),
ty: RuleType::Silent,
expr: box_tree!(Choice(
Seq(
Ident(String::from("a")),
Seq(Ident(String::from("b")), Ident(String::from("c")))
),
Seq(
Seq(Ident(String::from("a")), Ident(String::from("b"))),
Ident(String::from("d"))
)
)),
}]
};
let optimized = {
use optimizer::OptimizedExpr::*;
vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Silent,
expr: box_tree!(Seq(
Ident(String::from("a")),
Seq(
Ident(String::from("b")),
Choice(Ident(String::from("c")), Ident(String::from("d")))
)
)),
}]
};
assert_eq!(optimize(rules), optimized);
}
}