src/libsyntax/util/parser_testing.rs - third_party/rust - Git at Google

 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 use ast;
 use parse::{new_parse_sess};
 use parse::{ParseSess,string_to_filemap,filemap_to_tts};
 use parse::{new_parser_from_source_str};
 use parse::parser::Parser;
 use parse::token;

 // map a string to tts, using a made-up filename: return both the token_trees
 // and the ParseSess
 pub fn string_to_tts_and_sess (source_str : @str) -> (~[ast::token_tree],@mut ParseSess) {
     let ps = new_parse_sess(None);
     (filemap_to_tts(ps,string_to_filemap(ps,source_str,@"bogofile")),ps)
 }

 // map a string to tts, using a made-up filename:
 pub fn string_to_tts(source_str : @str) -> ~[ast::token_tree] {
     let (tts,_) = string_to_tts_and_sess(source_str);
     tts
 }

 pub fn string_to_parser_and_sess(source_str: @str) -> (Parser,@mut ParseSess) {
     let ps = new_parse_sess(None);
     (new_parser_from_source_str(ps,~[],@"bogofile",source_str),ps)
 }

 // map string to parser (via tts)
 pub fn string_to_parser(source_str: @str) -> Parser {
     let (p,_) = string_to_parser_and_sess(source_str);
     p
 }

 fn with_error_checking_parse<T>(s: @str, f: &fn(&mut Parser) -> T) -> T {
     let mut p = string_to_parser(s);
     let x = f(&mut p);
     p.abort_if_errors();
     x
 }

 // parse a string, return a crate.
 pub fn string_to_crate (source_str : @str) -> @ast::Crate {
     do with_error_checking_parse(source_str) |p| {
         p.parse_crate_mod()
     }
 }

 // parse a string, return a crate and the ParseSess
 pub fn string_to_crate_and_sess (source_str : @str) -> (@ast::Crate,@mut ParseSess) {
     let (p,ps) = string_to_parser_and_sess(source_str);
     (p.parse_crate_mod(),ps)
 }

 // parse a string, return an expr
 pub fn string_to_expr (source_str : @str) -> @ast::Expr {
     do with_error_checking_parse(source_str) |p| {
         p.parse_expr()
     }
 }

 // parse a string, return an item
 pub fn string_to_item (source_str : @str) -> Option<@ast::item> {
     do with_error_checking_parse(source_str) |p| {
         p.parse_item(~[])
     }
 }

 // parse a string, return a stmt
 pub fn string_to_stmt(source_str : @str) -> @ast::Stmt {
     do with_error_checking_parse(source_str) |p| {
         p.parse_stmt(~[])
     }
 }

 // parse a string, return a pat. Uses "irrefutable"... which doesn't
 // (currently) affect parsing.
 pub fn string_to_pat(source_str : @str) -> @ast::Pat {
     string_to_parser(source_str).parse_pat()
 }

 // convert a vector of strings to a vector of ast::idents
 pub fn strs_to_idents(ids: ~[&str]) -> ~[ast::Ident] {
     ids.map(|u| token::str_to_ident(*u))
 }

 // does the given string match the pattern? whitespace in the first string
 // may be deleted or replaced with other whitespace to match the pattern.
 // this function is unicode-ignorant; fortunately, the careful design of
 // UTF-8 mitigates this ignorance.  In particular, this function only collapses
 // sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode
 // chars. Unsurprisingly, it doesn't do NKF-normalization(?).
 pub fn matches_codepattern(a : &str, b : &str) -> bool {
     let mut idx_a = 0;
     let mut idx_b = 0;
     loop {
         if (idx_a == a.len() && idx_b == b.len()) {
             return true;
         }
         else if (idx_a == a.len()) {return false;}
         else if (idx_b == b.len()) {
             // maybe the stuff left in a is all ws?
             if (is_whitespace(a.char_at(idx_a))) {
                 return (scan_for_non_ws_or_end(a,idx_a) == a.len());
             } else {
                 return false;
             }
         }
         // ws in both given and pattern:
         else if (is_whitespace(a.char_at(idx_a))
            && is_whitespace(b.char_at(idx_b))) {
             idx_a = scan_for_non_ws_or_end(a,idx_a);
             idx_b = scan_for_non_ws_or_end(b,idx_b);
         }
         // ws in given only:
         else if (is_whitespace(a.char_at(idx_a))) {
             idx_a = scan_for_non_ws_or_end(a,idx_a);
         }
         // *don't* silently eat ws in expected only.
         else if (a.char_at(idx_a) == b.char_at(idx_b)) {
             idx_a += 1;
             idx_b += 1;
         }
         else {
             return false;
         }
     }
 }

 // given a string and an index, return the first uint >= idx
 // that is a non-ws-char or is outside of the legal range of
 // the string.
 fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint {
     let mut i = idx;
     let len = a.len();
     while ((i < len) && (is_whitespace(a.char_at(i)))) {
         i += 1;
     }
     i
 }

 // copied from lexer.
 pub fn is_whitespace(c: char) -> bool {
     return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 }

 #[cfg(test)]
 mod test {
     use super::*;

     #[test] fn eqmodws() {
         assert_eq!(matches_codepattern("",""),true);
         assert_eq!(matches_codepattern("","a"),false);
         assert_eq!(matches_codepattern("a",""),false);
         assert_eq!(matches_codepattern("a","a"),true);
         assert_eq!(matches_codepattern("a b","a   \n\t\r  b"),true);
         assert_eq!(matches_codepattern("a b ","a   \n\t\r  b"),true);
         assert_eq!(matches_codepattern("a b","a   \n\t\r  b "),false);
         assert_eq!(matches_codepattern("a   b","a b"),true);
         assert_eq!(matches_codepattern("ab","a b"),false);
         assert_eq!(matches_codepattern("a   b","ab"),true);
     }
 }
	// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
	// file at the top-level directory of this distribution and at
	// http://rust-lang.org/COPYRIGHT.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	use ast;
	use parse::{new_parse_sess};
	use parse::{ParseSess,string_to_filemap,filemap_to_tts};
	use parse::{new_parser_from_source_str};
	use parse::parser::Parser;
	use parse::token;

	// map a string to tts, using a made-up filename: return both the token_trees
	// and the ParseSess
	pub fn string_to_tts_and_sess (source_str : @str) -> (~[ast::token_tree],@mut ParseSess) {
	let ps = new_parse_sess(None);
	(filemap_to_tts(ps,string_to_filemap(ps,source_str,@"bogofile")),ps)
	}

	// map a string to tts, using a made-up filename:
	pub fn string_to_tts(source_str : @str) -> ~[ast::token_tree] {
	let (tts,_) = string_to_tts_and_sess(source_str);
	tts
	}

	pub fn string_to_parser_and_sess(source_str: @str) -> (Parser,@mut ParseSess) {
	let ps = new_parse_sess(None);
	(new_parser_from_source_str(ps,~[],@"bogofile",source_str),ps)
	}

	// map string to parser (via tts)
	pub fn string_to_parser(source_str: @str) -> Parser {
	let (p,_) = string_to_parser_and_sess(source_str);
	p
	}

	fn with_error_checking_parse<T>(s: @str, f: &fn(&mut Parser) -> T) -> T {
	let mut p = string_to_parser(s);
	let x = f(&mut p);
	p.abort_if_errors();
	x
	}

	// parse a string, return a crate.
	pub fn string_to_crate (source_str : @str) -> @ast::Crate {
	do with_error_checking_parse(source_str) \|p\| {
	p.parse_crate_mod()
	}
	}

	// parse a string, return a crate and the ParseSess
	pub fn string_to_crate_and_sess (source_str : @str) -> (@ast::Crate,@mut ParseSess) {
	let (p,ps) = string_to_parser_and_sess(source_str);
	(p.parse_crate_mod(),ps)
	}

	// parse a string, return an expr
	pub fn string_to_expr (source_str : @str) -> @ast::Expr {
	do with_error_checking_parse(source_str) \|p\| {
	p.parse_expr()
	}
	}

	// parse a string, return an item
	pub fn string_to_item (source_str : @str) -> Option<@ast::item> {
	do with_error_checking_parse(source_str) \|p\| {
	p.parse_item(~[])
	}
	}

	// parse a string, return a stmt
	pub fn string_to_stmt(source_str : @str) -> @ast::Stmt {
	do with_error_checking_parse(source_str) \|p\| {
	p.parse_stmt(~[])
	}
	}

	// parse a string, return a pat. Uses "irrefutable"... which doesn't
	// (currently) affect parsing.
	pub fn string_to_pat(source_str : @str) -> @ast::Pat {
	string_to_parser(source_str).parse_pat()
	}

	// convert a vector of strings to a vector of ast::idents
	pub fn strs_to_idents(ids: ~[&str]) -> ~[ast::Ident] {
	ids.map(\|u\| token::str_to_ident(*u))
	}

	// does the given string match the pattern? whitespace in the first string
	// may be deleted or replaced with other whitespace to match the pattern.
	// this function is unicode-ignorant; fortunately, the careful design of
	// UTF-8 mitigates this ignorance. In particular, this function only collapses
	// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode
	// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
	pub fn matches_codepattern(a : &str, b : &str) -> bool {
	let mut idx_a = 0;
	let mut idx_b = 0;
	loop {
	if (idx_a == a.len() && idx_b == b.len()) {
	return true;
	}
	else if (idx_a == a.len()) {return false;}
	else if (idx_b == b.len()) {
	// maybe the stuff left in a is all ws?
	if (is_whitespace(a.char_at(idx_a))) {
	return (scan_for_non_ws_or_end(a,idx_a) == a.len());
	} else {
	return false;
	}
	}
	// ws in both given and pattern:
	else if (is_whitespace(a.char_at(idx_a))
	&& is_whitespace(b.char_at(idx_b))) {
	idx_a = scan_for_non_ws_or_end(a,idx_a);
	idx_b = scan_for_non_ws_or_end(b,idx_b);
	}
	// ws in given only:
	else if (is_whitespace(a.char_at(idx_a))) {
	idx_a = scan_for_non_ws_or_end(a,idx_a);
	}
	// don't silently eat ws in expected only.
	else if (a.char_at(idx_a) == b.char_at(idx_b)) {
	idx_a += 1;
	idx_b += 1;
	}
	else {
	return false;
	}
	}
	}

	// given a string and an index, return the first uint >= idx
	// that is a non-ws-char or is outside of the legal range of
	// the string.
	fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint {
	let mut i = idx;
	let len = a.len();
	while ((i < len) && (is_whitespace(a.char_at(i)))) {
	i += 1;
	}
	i
	}

	// copied from lexer.
	pub fn is_whitespace(c: char) -> bool {
	return c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n';
	}

	#[cfg(test)]
	mod test {
	use super::*;

	#[test] fn eqmodws() {
	assert_eq!(matches_codepattern("",""),true);
	assert_eq!(matches_codepattern("","a"),false);
	assert_eq!(matches_codepattern("a",""),false);
	assert_eq!(matches_codepattern("a","a"),true);
	assert_eq!(matches_codepattern("a b","a \n\t\r b"),true);
	assert_eq!(matches_codepattern("a b ","a \n\t\r b"),true);
	assert_eq!(matches_codepattern("a b","a \n\t\r b "),false);
	assert_eq!(matches_codepattern("a b","a b"),true);
	assert_eq!(matches_codepattern("ab","a b"),false);
	assert_eq!(matches_codepattern("a b","ab"),true);
	}
	}