src/libsyntax/parse/comments.rs - third_party/rust - Git at Google

 use io::println;//XXXXXXXXxxx
 use io::ReaderUtil;
 use util::interner;
 use lexer::{string_reader, bump, is_eof, nextch,
                is_whitespace, get_str_from, reader};

 export cmnt;
 export lit;
 export cmnt_style;
 export gather_comments_and_literals;
 export is_doc_comment, doc_comment_style, strip_doc_comment_decoration;
 export isolated, trailing, mixed, blank_line;

 enum cmnt_style {
     isolated, // No code on either side of each line of the comment
     trailing, // Code exists to the left of the comment
     mixed, // Code before /* foo */ and after the comment
     blank_line, // Just a manual blank line "\n\n", for layout
 }

 impl cmnt_style : cmp::Eq {
     pure fn eq(other: &cmnt_style) -> bool {
         (self as uint) == ((*other) as uint)
     }
     pure fn ne(other: &cmnt_style) -> bool {
         (self as uint) != ((*other) as uint)
     }
 }

 type cmnt = {style: cmnt_style, lines: ~[~str], pos: uint};

 fn is_doc_comment(s: ~str) -> bool {
     s.starts_with(~"///") ||
     s.starts_with(~"//!") ||
     s.starts_with(~"/**") ||
     s.starts_with(~"/*!")
 }

 fn doc_comment_style(comment: ~str) -> ast::attr_style {
     assert is_doc_comment(comment);
     if comment.starts_with(~"//!") || comment.starts_with(~"/*!") {
         ast::attr_inner
     } else {
         ast::attr_outer
     }
 }

 fn strip_doc_comment_decoration(comment: ~str) -> ~str {

     /// remove whitespace-only lines from the start/end of lines
     fn vertical_trim(lines: ~[~str]) -> ~[~str] {
         let mut i = 0u, j = lines.len();
         while i < j && lines[i].trim().is_empty() {
             i += 1u;
         }
         while j > i && lines[j - 1u].trim().is_empty() {
             j -= 1u;
         }
         return lines.slice(i, j);
     }

     // drop leftmost columns that contain only values in chars
     fn block_trim(lines: ~[~str], chars: ~str, max: Option<uint>) -> ~[~str] {

         let mut i = max.get_default(uint::max_value);
         for lines.each |line| {
             if line.trim().is_empty() {
                 loop;
             }
             for line.each_chari |j, c| {
                 if j >= i {
                     break;
                 }
                 if !chars.contains_char(c) {
                     i = j;
                     break;
                 }
             }
         }

         return do lines.map |line| {
             let chars = str::chars(*line);
             if i > chars.len() {
                 ~""
             } else {
                 str::from_chars(chars.slice(i, chars.len()))
             }
         };
     }

     if comment.starts_with(~"//") {
         return comment.slice(3u, comment.len()).trim();
     }

     if comment.starts_with(~"/*") {
         let lines = str::lines_any(comment.slice(3u, comment.len() - 2u));
         let lines = vertical_trim(lines);
         let lines = block_trim(lines, ~"\t ", None);
         let lines = block_trim(lines, ~"*", Some(1u));
         let lines = block_trim(lines, ~"\t ", None);
         return str::connect(lines, ~"\n");
     }

     fail ~"not a doc-comment: " + comment;
 }

 fn read_to_eol(rdr: string_reader) -> ~str {
     let mut val = ~"";
     while rdr.curr != '\n' && !is_eof(rdr) {
         str::push_char(&mut val, rdr.curr);
         bump(rdr);
     }
     if rdr.curr == '\n' { bump(rdr); }
     return val;
 }

 fn read_one_line_comment(rdr: string_reader) -> ~str {
     let val = read_to_eol(rdr);
     assert ((val[0] == '/' as u8 && val[1] == '/' as u8) ||
             (val[0] == '#' as u8 && val[1] == '!' as u8));
     return val;
 }

 fn consume_non_eol_whitespace(rdr: string_reader) {
     while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
         bump(rdr);
     }
 }

 fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
     debug!(">>> blank-line comment");
     let v: ~[~str] = ~[];
     comments.push({style: blank_line, lines: v, pos: rdr.chpos});
 }

 fn consume_whitespace_counting_blank_lines(rdr: string_reader,
                                            comments: &mut ~[cmnt]) {
     while is_whitespace(rdr.curr) && !is_eof(rdr) {
         if rdr.col == 0u && rdr.curr == '\n' {
             push_blank_line_comment(rdr, comments);
         }
         bump(rdr);
     }
 }


 fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
                                             comments: &mut ~[cmnt]) {
     debug!(">>> shebang comment");
     let p = rdr.chpos;
     debug!("<<< shebang comment");
     comments.push({
         style: if code_to_the_left { trailing } else { isolated },
         lines: ~[read_one_line_comment(rdr)],
         pos: p
     });
 }

 fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
                                           comments: &mut ~[cmnt]) {
     debug!(">>> line comments");
     let p = rdr.chpos;
     let mut lines: ~[~str] = ~[];
     while rdr.curr == '/' && nextch(rdr) == '/' {
         let line = read_one_line_comment(rdr);
         log(debug, line);
         if is_doc_comment(line) { // doc-comments are not put in comments
             break;
         }
         lines.push(line);
         consume_non_eol_whitespace(rdr);
     }
     debug!("<<< line comments");
     if !lines.is_empty() {
         comments.push({
             style: if code_to_the_left { trailing } else { isolated },
             lines: lines,
             pos: p
         });
     }
 }

 fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool {
     let mut i: uint = begin;
     while i != end {
         if !is_whitespace(s[i] as char) { return false; } i += 1u;
     }
     return true;
 }

 fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
                                         s: ~str, col: uint) {
     let mut s1;
     let len = str::len(s);
     if all_whitespace(s, 0u, uint::min(len, col)) {
         if col < len {
             s1 = str::slice(s, col, len);
         } else { s1 = ~""; }
     } else { s1 = s; }
     log(debug, ~"pushing line: " + s1);
     lines.push(s1);
 }

 fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
                                           comments: &mut ~[cmnt]) {
     debug!(">>> block comment");
     let p = rdr.chpos;
     let mut lines: ~[~str] = ~[];
     let mut col: uint = rdr.col;
     bump(rdr);
     bump(rdr);

     // doc-comments are not really comments, they are attributes
     if rdr.curr == '*' || rdr.curr == '!' {
         while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
             bump(rdr);
         }
         if !is_eof(rdr) {
             bump(rdr);
             bump(rdr);
         }
         return;
     }

     let mut curr_line = ~"/*";
     let mut level: int = 1;
     while level > 0 {
         debug!("=== block comment level %d", level);
         if is_eof(rdr) {(rdr as reader).fatal(~"unterminated block comment");}
         if rdr.curr == '\n' {
             trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
             curr_line = ~"";
             bump(rdr);
         } else {
             str::push_char(&mut curr_line, rdr.curr);
             if rdr.curr == '/' && nextch(rdr) == '*' {
                 bump(rdr);
                 bump(rdr);
                 curr_line += ~"*";
                 level += 1;
             } else {
                 if rdr.curr == '*' && nextch(rdr) == '/' {
                     bump(rdr);
                     bump(rdr);
                     curr_line += ~"/";
                     level -= 1;
                 } else { bump(rdr); }
             }
         }
     }
     if str::len(curr_line) != 0 {
         trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
     }
     let mut style = if code_to_the_left { trailing } else { isolated };
     consume_non_eol_whitespace(rdr);
     if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u {
         style = mixed;
     }
     debug!("<<< block comment");
     comments.push({style: style, lines: lines, pos: p});
 }

 fn peeking_at_comment(rdr: string_reader) -> bool {
     return ((rdr.curr == '/' && nextch(rdr) == '/') ||
          (rdr.curr == '/' && nextch(rdr) == '*')) ||
          (rdr.curr == '#' && nextch(rdr) == '!');
 }

 fn consume_comment(rdr: string_reader, code_to_the_left: bool,
                    comments: &mut ~[cmnt]) {
     debug!(">>> consume comment");
     if rdr.curr == '/' && nextch(rdr) == '/' {
         read_line_comments(rdr, code_to_the_left, comments);
     } else if rdr.curr == '/' && nextch(rdr) == '*' {
         read_block_comment(rdr, code_to_the_left, comments);
     } else if rdr.curr == '#' && nextch(rdr) == '!' {
         read_shebang_comment(rdr, code_to_the_left, comments);
     } else { fail; }
     debug!("<<< consume comment");
 }

 type lit = {lit: ~str, pos: uint};

 fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
                                 path: ~str,
                                 srdr: io::Reader) ->
    {cmnts: ~[cmnt], lits: ~[lit]} {
     let src = @str::from_bytes(srdr.read_whole_stream());
     let itr = parse::token::mk_fake_ident_interner();
     let rdr = lexer::new_low_level_string_reader
         (span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr);

     let mut comments: ~[cmnt] = ~[];
     let mut literals: ~[lit] = ~[];
     let mut first_read: bool = true;
     while !is_eof(rdr) {
         loop {
             let mut code_to_the_left = !first_read;
             consume_non_eol_whitespace(rdr);
             if rdr.curr == '\n' {
                 code_to_the_left = false;
                 consume_whitespace_counting_blank_lines(rdr, &mut comments);
             }
             while peeking_at_comment(rdr) {
                 consume_comment(rdr, code_to_the_left, &mut comments);
                 consume_whitespace_counting_blank_lines(rdr, &mut comments);
             }
             break;
         }


         let bstart = rdr.pos;
         rdr.next_token();
         //discard, and look ahead; we're working with internal state
         let {tok: tok, sp: sp} = rdr.peek();
         if token::is_lit(tok) {
             let s = get_str_from(rdr, bstart);
             literals.push({lit: s, pos: sp.lo});
             log(debug, ~"tok lit: " + s);
         } else {
             log(debug, ~"tok: " + token::to_str(rdr.interner, tok));
         }
         first_read = false;
     }
     return {cmnts: comments, lits: literals};
 }
	use io::println;//XXXXXXXXxxx
	use io::ReaderUtil;
	use util::interner;
	use lexer::{string_reader, bump, is_eof, nextch,
	is_whitespace, get_str_from, reader};

	export cmnt;
	export lit;
	export cmnt_style;
	export gather_comments_and_literals;
	export is_doc_comment, doc_comment_style, strip_doc_comment_decoration;
	export isolated, trailing, mixed, blank_line;

	enum cmnt_style {
	isolated, // No code on either side of each line of the comment
	trailing, // Code exists to the left of the comment
	mixed, // Code before /* foo */ and after the comment
	blank_line, // Just a manual blank line "\n\n", for layout
	}

	impl cmnt_style : cmp::Eq {
	pure fn eq(other: &cmnt_style) -> bool {
	(self as uint) == ((*other) as uint)
	}
	pure fn ne(other: &cmnt_style) -> bool {
	(self as uint) != ((*other) as uint)
	}
	}

	type cmnt = {style: cmnt_style, lines: ~[~str], pos: uint};

	fn is_doc_comment(s: ~str) -> bool {
	s.starts_with(~"///") \|\|
	s.starts_with(~"//!") \|\|
	s.starts_with(~"/**") \|\|
	s.starts_with(~"/*!")
	}

	fn doc_comment_style(comment: ~str) -> ast::attr_style {
	assert is_doc_comment(comment);
	if comment.starts_with(~"//!") \|\| comment.starts_with(~"/*!") {
	ast::attr_inner
	} else {
	ast::attr_outer
	}
	}

	fn strip_doc_comment_decoration(comment: ~str) -> ~str {

	/// remove whitespace-only lines from the start/end of lines
	fn vertical_trim(lines: ~[~str]) -> ~[~str] {
	let mut i = 0u, j = lines.len();
	while i < j && lines[i].trim().is_empty() {
	i += 1u;
	}
	while j > i && lines[j - 1u].trim().is_empty() {
	j -= 1u;
	}
	return lines.slice(i, j);
	}

	// drop leftmost columns that contain only values in chars
	fn block_trim(lines: ~[~str], chars: ~str, max: Option<uint>) -> ~[~str] {

	let mut i = max.get_default(uint::max_value);
	for lines.each \|line\| {
	if line.trim().is_empty() {
	loop;
	}
	for line.each_chari \|j, c\| {
	if j >= i {
	break;
	}
	if !chars.contains_char(c) {
	i = j;
	break;
	}
	}
	}

	return do lines.map \|line\| {
	let chars = str::chars(*line);
	if i > chars.len() {
	~""
	} else {
	str::from_chars(chars.slice(i, chars.len()))
	}
	};
	}

	if comment.starts_with(~"//") {
	return comment.slice(3u, comment.len()).trim();
	}

	if comment.starts_with(~"/*") {
	let lines = str::lines_any(comment.slice(3u, comment.len() - 2u));
	let lines = vertical_trim(lines);
	let lines = block_trim(lines, ~"\t ", None);
	let lines = block_trim(lines, ~"*", Some(1u));
	let lines = block_trim(lines, ~"\t ", None);
	return str::connect(lines, ~"\n");
	}

	fail ~"not a doc-comment: " + comment;
	}

	fn read_to_eol(rdr: string_reader) -> ~str {
	let mut val = ~"";
	while rdr.curr != '\n' && !is_eof(rdr) {
	str::push_char(&mut val, rdr.curr);
	bump(rdr);
	}
	if rdr.curr == '\n' { bump(rdr); }
	return val;
	}

	fn read_one_line_comment(rdr: string_reader) -> ~str {
	let val = read_to_eol(rdr);
	assert ((val[0] == '/' as u8 && val[1] == '/' as u8) \|\|
	(val[0] == '#' as u8 && val[1] == '!' as u8));
	return val;
	}

	fn consume_non_eol_whitespace(rdr: string_reader) {
	while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
	bump(rdr);
	}
	}

	fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
	debug!(">>> blank-line comment");
	let v: ~[~str] = ~[];
	comments.push({style: blank_line, lines: v, pos: rdr.chpos});
	}

	fn consume_whitespace_counting_blank_lines(rdr: string_reader,
	comments: &mut ~[cmnt]) {
	while is_whitespace(rdr.curr) && !is_eof(rdr) {
	if rdr.col == 0u && rdr.curr == '\n' {
	push_blank_line_comment(rdr, comments);
	}
	bump(rdr);
	}
	}


	fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> shebang comment");
	let p = rdr.chpos;
	debug!("<<< shebang comment");
	comments.push({
	style: if code_to_the_left { trailing } else { isolated },
	lines: ~[read_one_line_comment(rdr)],
	pos: p
	});
	}

	fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> line comments");
	let p = rdr.chpos;
	let mut lines: ~[~str] = ~[];
	while rdr.curr == '/' && nextch(rdr) == '/' {
	let line = read_one_line_comment(rdr);
	log(debug, line);
	if is_doc_comment(line) { // doc-comments are not put in comments
	break;
	}
	lines.push(line);
	consume_non_eol_whitespace(rdr);
	}
	debug!("<<< line comments");
	if !lines.is_empty() {
	comments.push({
	style: if code_to_the_left { trailing } else { isolated },
	lines: lines,
	pos: p
	});
	}
	}

	fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool {
	let mut i: uint = begin;
	while i != end {
	if !is_whitespace(s[i] as char) { return false; } i += 1u;
	}
	return true;
	}

	fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
	s: ~str, col: uint) {
	let mut s1;
	let len = str::len(s);
	if all_whitespace(s, 0u, uint::min(len, col)) {
	if col < len {
	s1 = str::slice(s, col, len);
	} else { s1 = ~""; }
	} else { s1 = s; }
	log(debug, ~"pushing line: " + s1);
	lines.push(s1);
	}

	fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> block comment");
	let p = rdr.chpos;
	let mut lines: ~[~str] = ~[];
	let mut col: uint = rdr.col;
	bump(rdr);
	bump(rdr);

	// doc-comments are not really comments, they are attributes
	if rdr.curr == '*' \|\| rdr.curr == '!' {
	while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
	bump(rdr);
	}
	if !is_eof(rdr) {
	bump(rdr);
	bump(rdr);
	}
	return;
	}

	let mut curr_line = ~"/*";
	let mut level: int = 1;
	while level > 0 {
	debug!("=== block comment level %d", level);
	if is_eof(rdr) {(rdr as reader).fatal(~"unterminated block comment");}
	if rdr.curr == '\n' {
	trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
	curr_line = ~"";
	bump(rdr);
	} else {
	str::push_char(&mut curr_line, rdr.curr);
	if rdr.curr == '/' && nextch(rdr) == '*' {
	bump(rdr);
	bump(rdr);
	curr_line += ~"*";
	level += 1;
	} else {
	if rdr.curr == '*' && nextch(rdr) == '/' {
	bump(rdr);
	bump(rdr);
	curr_line += ~"/";
	level -= 1;
	} else { bump(rdr); }
	}
	}
	}
	if str::len(curr_line) != 0 {
	trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
	}
	let mut style = if code_to_the_left { trailing } else { isolated };
	consume_non_eol_whitespace(rdr);
	if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u {
	style = mixed;
	}
	debug!("<<< block comment");
	comments.push({style: style, lines: lines, pos: p});
	}

	fn peeking_at_comment(rdr: string_reader) -> bool {
	return ((rdr.curr == '/' && nextch(rdr) == '/') \|\|
	(rdr.curr == '/' && nextch(rdr) == '*')) \|\|
	(rdr.curr == '#' && nextch(rdr) == '!');
	}

	fn consume_comment(rdr: string_reader, code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> consume comment");
	if rdr.curr == '/' && nextch(rdr) == '/' {
	read_line_comments(rdr, code_to_the_left, comments);
	} else if rdr.curr == '/' && nextch(rdr) == '*' {
	read_block_comment(rdr, code_to_the_left, comments);
	} else if rdr.curr == '#' && nextch(rdr) == '!' {
	read_shebang_comment(rdr, code_to_the_left, comments);
	} else { fail; }
	debug!("<<< consume comment");
	}

	type lit = {lit: ~str, pos: uint};

	fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
	path: ~str,
	srdr: io::Reader) ->
	{cmnts: ~[cmnt], lits: ~[lit]} {
	let src = @str::from_bytes(srdr.read_whole_stream());
	let itr = parse::token::mk_fake_ident_interner();
	let rdr = lexer::new_low_level_string_reader
	(span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr);

	let mut comments: ~[cmnt] = ~[];
	let mut literals: ~[lit] = ~[];
	let mut first_read: bool = true;
	while !is_eof(rdr) {
	loop {
	let mut code_to_the_left = !first_read;
	consume_non_eol_whitespace(rdr);
	if rdr.curr == '\n' {
	code_to_the_left = false;
	consume_whitespace_counting_blank_lines(rdr, &mut comments);
	}
	while peeking_at_comment(rdr) {
	consume_comment(rdr, code_to_the_left, &mut comments);
	consume_whitespace_counting_blank_lines(rdr, &mut comments);
	}
	break;
	}


	let bstart = rdr.pos;
	rdr.next_token();
	//discard, and look ahead; we're working with internal state
	let {tok: tok, sp: sp} = rdr.peek();
	if token::is_lit(tok) {
	let s = get_str_from(rdr, bstart);
	literals.push({lit: s, pos: sp.lo});
	log(debug, ~"tok lit: " + s);
	} else {
	log(debug, ~"tok: " + token::to_str(rdr.interner, tok));
	}
	first_read = false;
	}
	return {cmnts: comments, lits: literals};
	}