src/libsyntax/parse/comments.rs - third_party/rust - Git at Google

 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 use ast;
 use codemap::{BytePos, CharPos, CodeMap, Pos};
 use diagnostic;
 use parse::lexer::{is_whitespace, with_str_from, reader};
 use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
 use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
 use parse::lexer;
 use parse::token;
 use parse::token::{get_ident_interner};

 use std::io;
 use std::str;
 use std::uint;

 #[deriving(Clone, Eq)]
 pub enum cmnt_style {
     isolated, // No code on either side of each line of the comment
     trailing, // Code exists to the left of the comment
     mixed, // Code before /* foo */ and after the comment
     blank_line, // Just a manual blank line "\n\n", for layout
 }

 #[deriving(Clone)]
 pub struct cmnt {
     style: cmnt_style,
     lines: ~[~str],
     pos: BytePos
 }

 pub fn is_doc_comment(s: &str) -> bool {
     (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
     s.starts_with("//!") ||
     (s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
     s.starts_with("/*!")
 }

 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
     assert!(is_doc_comment(comment));
     if comment.starts_with("//!") || comment.starts_with("/*!") {
         ast::AttrInner
     } else {
         ast::AttrOuter
     }
 }

 pub fn strip_doc_comment_decoration(comment: &str) -> ~str {

     /// remove whitespace-only lines from the start/end of lines
     fn vertical_trim(lines: ~[~str]) -> ~[~str] {
         let mut i = 0u;
         let mut j = lines.len();
         // first line of all-stars should be omitted
         if lines.len() > 0 && lines[0].iter().all(|c| c == '*') {
             i += 1;
         }
         while i < j && lines[i].trim().is_empty() {
             i += 1;
         }
         // like the first, a last line of all stars should be omitted
         if j > i && lines[j - 1].iter().skip(1).all(|c| c == '*') {
             j -= 1;
         }
         while j > i && lines[j - 1].trim().is_empty() {
             j -= 1;
         }
         return lines.slice(i, j).to_owned();
     }

     /// remove a "[ \t]*\*" block from each line, if possible
     fn horizontal_trim(lines: ~[~str]) -> ~[~str] {
         let mut i = uint::max_value;
         let mut can_trim = true;
         let mut first = true;
         for line in lines.iter() {
             for (j, c) in line.iter().enumerate() {
                 if j > i || !"* \t".contains_char(c) {
                     can_trim = false;
                     break;
                 }
                 if c == '*' {
                     if first {
                         i = j;
                         first = false;
                     } else if i != j {
                         can_trim = false;
                     }
                     break;
                 }
             }
             if i > line.len() {
                 can_trim = false;
             }
             if !can_trim {
                 break;
             }
         }

         if can_trim {
             do lines.map |line| {
                 line.slice(i + 1, line.len()).to_owned()
             }
         } else {
             lines
         }
     }

     // one-line comments lose their prefix
     static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
     for prefix in ONLINERS.iter() {
         if comment.starts_with(*prefix) {
             return comment.slice_from(prefix.len()).to_owned();
         }
     }

     if comment.starts_with("/*") {
         let lines = comment.slice(3u, comment.len() - 2u)
             .any_line_iter()
             .map(|s| s.to_owned())
             .collect::<~[~str]>();

         let lines = vertical_trim(lines);
         let lines = horizontal_trim(lines);

         return lines.connect("\n");
     }

     fail!("not a doc-comment: %s", comment);
 }

 fn read_to_eol(rdr: @mut StringReader) -> ~str {
     let mut val = ~"";
     while rdr.curr != '\n' && !is_eof(rdr) {
         val.push_char(rdr.curr);
         bump(rdr);
     }
     if rdr.curr == '\n' { bump(rdr); }
     return val;
 }

 fn read_one_line_comment(rdr: @mut StringReader) -> ~str {
     let val = read_to_eol(rdr);
     assert!((val[0] == '/' as u8 && val[1] == '/' as u8) ||
                  (val[0] == '#' as u8 && val[1] == '!' as u8));
     return val;
 }

 fn consume_non_eol_whitespace(rdr: @mut StringReader) {
     while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
         bump(rdr);
     }
 }

 fn push_blank_line_comment(rdr: @mut StringReader, comments: &mut ~[cmnt]) {
     debug!(">>> blank-line comment");
     let v: ~[~str] = ~[];
     comments.push(cmnt {style: blank_line, lines: v, pos: rdr.last_pos});
 }

 fn consume_whitespace_counting_blank_lines(rdr: @mut StringReader,
                                            comments: &mut ~[cmnt]) {
     while is_whitespace(rdr.curr) && !is_eof(rdr) {
         if rdr.col == CharPos(0u) && rdr.curr == '\n' {
             push_blank_line_comment(rdr, &mut *comments);
         }
         bump(rdr);
     }
 }


 fn read_shebang_comment(rdr: @mut StringReader, code_to_the_left: bool,
                                             comments: &mut ~[cmnt]) {
     debug!(">>> shebang comment");
     let p = rdr.last_pos;
     debug!("<<< shebang comment");
     comments.push(cmnt {
         style: if code_to_the_left { trailing } else { isolated },
         lines: ~[read_one_line_comment(rdr)],
         pos: p
     });
 }

 fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
                                           comments: &mut ~[cmnt]) {
     debug!(">>> line comments");
     let p = rdr.last_pos;
     let mut lines: ~[~str] = ~[];
     while rdr.curr == '/' && nextch(rdr) == '/' {
         let line = read_one_line_comment(rdr);
         debug!("%s", line);
         if is_doc_comment(line) { // doc-comments are not put in comments
             break;
         }
         lines.push(line);
         consume_non_eol_whitespace(rdr);
     }
     debug!("<<< line comments");
     if !lines.is_empty() {
         comments.push(cmnt {
             style: if code_to_the_left { trailing } else { isolated },
             lines: lines,
             pos: p
         });
     }
 }

 // Returns None if the first col chars of s contain a non-whitespace char.
 // Otherwise returns Some(k) where k is first char offset after that leading
 // whitespace.  Note k may be outside bounds of s.
 fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
     let len = s.len();
     let mut col = col.to_uint();
     let mut cursor: uint = 0;
     while col > 0 && cursor < len {
         let r: str::CharRange = s.char_range_at(cursor);
         if !r.ch.is_whitespace() {
             return None;
         }
         cursor = r.next;
         col -= 1;
     }
     return Some(cursor);
 }

 fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
                                         s: ~str, col: CharPos) {
     let len = s.len();
     let s1 = match all_whitespace(s, col) {
         Some(col) => {
             if col < len {
                 s.slice(col, len).to_owned()
             } else {  ~"" }
         }
         None => s,
     };
     debug!("pushing line: %s", s1);
     lines.push(s1);
 }

 fn read_block_comment(rdr: @mut StringReader,
                       code_to_the_left: bool,
                       comments: &mut ~[cmnt]) {
     debug!(">>> block comment");
     let p = rdr.last_pos;
     let mut lines: ~[~str] = ~[];
     let col: CharPos = rdr.col;
     bump(rdr);
     bump(rdr);

     let mut curr_line = ~"/*";

     // doc-comments are not really comments, they are attributes
     if rdr.curr == '*' || rdr.curr == '!' {
         while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
             curr_line.push_char(rdr.curr);
             bump(rdr);
         }
         if !is_eof(rdr) {
             curr_line.push_str("*/");
             bump(rdr);
             bump(rdr);
         }
         if !is_block_non_doc_comment(curr_line) { return; }
         assert!(!curr_line.contains_char('\n'));
         lines.push(curr_line);
     } else {
         let mut level: int = 1;
         while level > 0 {
             debug!("=== block comment level %d", level);
             if is_eof(rdr) {
                 (rdr as @mut reader).fatal(~"unterminated block comment");
             }
             if rdr.curr == '\n' {
                 trim_whitespace_prefix_and_push_line(&mut lines, curr_line,
                                                      col);
                 curr_line = ~"";
                 bump(rdr);
             } else {
                 curr_line.push_char(rdr.curr);
                 if rdr.curr == '/' && nextch(rdr) == '*' {
                     bump(rdr);
                     bump(rdr);
                     curr_line.push_char('*');
                     level += 1;
                 } else {
                     if rdr.curr == '*' && nextch(rdr) == '/' {
                         bump(rdr);
                         bump(rdr);
                         curr_line.push_char('/');
                         level -= 1;
                     } else { bump(rdr); }
                 }
             }
         }
         if curr_line.len() != 0 {
             trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
         }
     }

     let mut style = if code_to_the_left { trailing } else { isolated };
     consume_non_eol_whitespace(rdr);
     if !is_eof(rdr) && rdr.curr != '\n' && lines.len() == 1u {
         style = mixed;
     }
     debug!("<<< block comment");
     comments.push(cmnt {style: style, lines: lines, pos: p});
 }

 fn peeking_at_comment(rdr: @mut StringReader) -> bool {
     return ((rdr.curr == '/' && nextch(rdr) == '/') ||
          (rdr.curr == '/' && nextch(rdr) == '*')) ||
          (rdr.curr == '#' && nextch(rdr) == '!');
 }

 fn consume_comment(rdr: @mut StringReader,
                    code_to_the_left: bool,
                    comments: &mut ~[cmnt]) {
     debug!(">>> consume comment");
     if rdr.curr == '/' && nextch(rdr) == '/' {
         read_line_comments(rdr, code_to_the_left, comments);
     } else if rdr.curr == '/' && nextch(rdr) == '*' {
         read_block_comment(rdr, code_to_the_left, comments);
     } else if rdr.curr == '#' && nextch(rdr) == '!' {
         read_shebang_comment(rdr, code_to_the_left, comments);
     } else { fail!(); }
     debug!("<<< consume comment");
 }

 #[deriving(Clone)]
 pub struct lit {
     lit: ~str,
     pos: BytePos
 }

 // it appears this function is called only from pprust... that's
 // probably not a good thing.
 pub fn gather_comments_and_literals(span_diagnostic:
                                     @mut diagnostic::span_handler,
                                     path: @str,
                                     srdr: @io::Reader)
                                  -> (~[cmnt], ~[lit]) {
     let src = str::from_utf8(srdr.read_whole_stream()).to_managed();
     let cm = CodeMap::new();
     let filemap = cm.new_filemap(path, src);
     let rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);

     let mut comments: ~[cmnt] = ~[];
     let mut literals: ~[lit] = ~[];
     let mut first_read: bool = true;
     while !is_eof(rdr) {
         loop {
             let mut code_to_the_left = !first_read;
             consume_non_eol_whitespace(rdr);
             if rdr.curr == '\n' {
                 code_to_the_left = false;
                 consume_whitespace_counting_blank_lines(rdr, &mut comments);
             }
             while peeking_at_comment(rdr) {
                 consume_comment(rdr, code_to_the_left, &mut comments);
                 consume_whitespace_counting_blank_lines(rdr, &mut comments);
             }
             break;
         }


         let bstart = rdr.last_pos;
         rdr.next_token();
         //discard, and look ahead; we're working with internal state
         let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
         if token::is_lit(&tok) {
             do with_str_from(rdr, bstart) |s| {
                 debug!("tok lit: %s", s);
                 literals.push(lit {lit: s.to_owned(), pos: sp.lo});
             }
         } else {
             debug!("tok: %s", token::to_str(get_ident_interner(), &tok));
         }
         first_read = false;
     }

     (comments, literals)
 }

 #[cfg(test)]
 mod test {
     use super::*;

     #[test] fn test_block_doc_comment_1() {
         let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
         let stripped = strip_doc_comment_decoration(comment);
         assert_eq!(stripped, ~" Test \n*  Test\n   Test");
     }

     #[test] fn test_block_doc_comment_2() {
         let comment = "/**\n * Test\n *  Test\n*/";
         let stripped = strip_doc_comment_decoration(comment);
         assert_eq!(stripped, ~" Test\n  Test");
     }

     #[test] fn test_block_doc_comment_3() {
         let comment = "/**\n let a: *int;\n *a = 5;\n*/";
         let stripped = strip_doc_comment_decoration(comment);
         assert_eq!(stripped, ~" let a: *int;\n *a = 5;");
     }

     #[test] fn test_block_doc_comment_4() {
         let comment = "/*******************\n test\n *********************/";
         let stripped = strip_doc_comment_decoration(comment);
         assert_eq!(stripped, ~" test");
     }

     #[test] fn test_line_doc_comment() {
         let stripped = strip_doc_comment_decoration("/// test");
         assert_eq!(stripped, ~" test");
         let stripped = strip_doc_comment_decoration("///! test");
         assert_eq!(stripped, ~" test");
         let stripped = strip_doc_comment_decoration("// test");
         assert_eq!(stripped, ~" test");
         let stripped = strip_doc_comment_decoration("// test");
         assert_eq!(stripped, ~" test");
         let stripped = strip_doc_comment_decoration("///test");
         assert_eq!(stripped, ~"test");
         let stripped = strip_doc_comment_decoration("///!test");
         assert_eq!(stripped, ~"test");
         let stripped = strip_doc_comment_decoration("//test");
         assert_eq!(stripped, ~"test");
     }
 }
	// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
	// file at the top-level directory of this distribution and at
	// http://rust-lang.org/COPYRIGHT.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	use ast;
	use codemap::{BytePos, CharPos, CodeMap, Pos};
	use diagnostic;
	use parse::lexer::{is_whitespace, with_str_from, reader};
	use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
	use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
	use parse::lexer;
	use parse::token;
	use parse::token::{get_ident_interner};

	use std::io;
	use std::str;
	use std::uint;

	#[deriving(Clone, Eq)]
	pub enum cmnt_style {
	isolated, // No code on either side of each line of the comment
	trailing, // Code exists to the left of the comment
	mixed, // Code before /* foo */ and after the comment
	blank_line, // Just a manual blank line "\n\n", for layout
	}

	#[deriving(Clone)]
	pub struct cmnt {
	style: cmnt_style,
	lines: ~[~str],
	pos: BytePos
	}

	pub fn is_doc_comment(s: &str) -> bool {
	(s.starts_with("///") && !is_line_non_doc_comment(s)) \|\|
	s.starts_with("//!") \|\|
	(s.starts_with("/**") && !is_block_non_doc_comment(s)) \|\|
	s.starts_with("/*!")
	}

	pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
	assert!(is_doc_comment(comment));
	if comment.starts_with("//!") \|\| comment.starts_with("/*!") {
	ast::AttrInner
	} else {
	ast::AttrOuter
	}
	}

	pub fn strip_doc_comment_decoration(comment: &str) -> ~str {

	/// remove whitespace-only lines from the start/end of lines
	fn vertical_trim(lines: ~[~str]) -> ~[~str] {
	let mut i = 0u;
	let mut j = lines.len();
	// first line of all-stars should be omitted
	if lines.len() > 0 && lines[0].iter().all(\|c\| c == '*') {
	i += 1;
	}
	while i < j && lines[i].trim().is_empty() {
	i += 1;
	}
	// like the first, a last line of all stars should be omitted
	if j > i && lines[j - 1].iter().skip(1).all(\|c\| c == '*') {
	j -= 1;
	}
	while j > i && lines[j - 1].trim().is_empty() {
	j -= 1;
	}
	return lines.slice(i, j).to_owned();
	}

	/// remove a "[ \t]\" block from each line, if possible
	fn horizontal_trim(lines: ~[~str]) -> ~[~str] {
	let mut i = uint::max_value;
	let mut can_trim = true;
	let mut first = true;
	for line in lines.iter() {
	for (j, c) in line.iter().enumerate() {
	if j > i \|\| !"* \t".contains_char(c) {
	can_trim = false;
	break;
	}
	if c == '*' {
	if first {
	i = j;
	first = false;
	} else if i != j {
	can_trim = false;
	}
	break;
	}
	}
	if i > line.len() {
	can_trim = false;
	}
	if !can_trim {
	break;
	}
	}

	if can_trim {
	do lines.map \|line\| {
	line.slice(i + 1, line.len()).to_owned()
	}
	} else {
	lines
	}
	}

	// one-line comments lose their prefix
	static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
	for prefix in ONLINERS.iter() {
	if comment.starts_with(*prefix) {
	return comment.slice_from(prefix.len()).to_owned();
	}
	}

	if comment.starts_with("/*") {
	let lines = comment.slice(3u, comment.len() - 2u)
	.any_line_iter()
	.map(\|s\| s.to_owned())
	.collect::<~[~str]>();

	let lines = vertical_trim(lines);
	let lines = horizontal_trim(lines);

	return lines.connect("\n");
	}

	fail!("not a doc-comment: %s", comment);
	}

	fn read_to_eol(rdr: @mut StringReader) -> ~str {
	let mut val = ~"";
	while rdr.curr != '\n' && !is_eof(rdr) {
	val.push_char(rdr.curr);
	bump(rdr);
	}
	if rdr.curr == '\n' { bump(rdr); }
	return val;
	}

	fn read_one_line_comment(rdr: @mut StringReader) -> ~str {
	let val = read_to_eol(rdr);
	assert!((val[0] == '/' as u8 && val[1] == '/' as u8) \|\|
	(val[0] == '#' as u8 && val[1] == '!' as u8));
	return val;
	}

	fn consume_non_eol_whitespace(rdr: @mut StringReader) {
	while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
	bump(rdr);
	}
	}

	fn push_blank_line_comment(rdr: @mut StringReader, comments: &mut ~[cmnt]) {
	debug!(">>> blank-line comment");
	let v: ~[~str] = ~[];
	comments.push(cmnt {style: blank_line, lines: v, pos: rdr.last_pos});
	}

	fn consume_whitespace_counting_blank_lines(rdr: @mut StringReader,
	comments: &mut ~[cmnt]) {
	while is_whitespace(rdr.curr) && !is_eof(rdr) {
	if rdr.col == CharPos(0u) && rdr.curr == '\n' {
	push_blank_line_comment(rdr, &mut *comments);
	}
	bump(rdr);
	}
	}


	fn read_shebang_comment(rdr: @mut StringReader, code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> shebang comment");
	let p = rdr.last_pos;
	debug!("<<< shebang comment");
	comments.push(cmnt {
	style: if code_to_the_left { trailing } else { isolated },
	lines: ~[read_one_line_comment(rdr)],
	pos: p
	});
	}

	fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> line comments");
	let p = rdr.last_pos;
	let mut lines: ~[~str] = ~[];
	while rdr.curr == '/' && nextch(rdr) == '/' {
	let line = read_one_line_comment(rdr);
	debug!("%s", line);
	if is_doc_comment(line) { // doc-comments are not put in comments
	break;
	}
	lines.push(line);
	consume_non_eol_whitespace(rdr);
	}
	debug!("<<< line comments");
	if !lines.is_empty() {
	comments.push(cmnt {
	style: if code_to_the_left { trailing } else { isolated },
	lines: lines,
	pos: p
	});
	}
	}

	// Returns None if the first col chars of s contain a non-whitespace char.
	// Otherwise returns Some(k) where k is first char offset after that leading
	// whitespace. Note k may be outside bounds of s.
	fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
	let len = s.len();
	let mut col = col.to_uint();
	let mut cursor: uint = 0;
	while col > 0 && cursor < len {
	let r: str::CharRange = s.char_range_at(cursor);
	if !r.ch.is_whitespace() {
	return None;
	}
	cursor = r.next;
	col -= 1;
	}
	return Some(cursor);
	}

	fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
	s: ~str, col: CharPos) {
	let len = s.len();
	let s1 = match all_whitespace(s, col) {
	Some(col) => {
	if col < len {
	s.slice(col, len).to_owned()
	} else { ~"" }
	}
	None => s,
	};
	debug!("pushing line: %s", s1);
	lines.push(s1);
	}

	fn read_block_comment(rdr: @mut StringReader,
	code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> block comment");
	let p = rdr.last_pos;
	let mut lines: ~[~str] = ~[];
	let col: CharPos = rdr.col;
	bump(rdr);
	bump(rdr);

	let mut curr_line = ~"/*";

	// doc-comments are not really comments, they are attributes
	if rdr.curr == '*' \|\| rdr.curr == '!' {
	while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
	curr_line.push_char(rdr.curr);
	bump(rdr);
	}
	if !is_eof(rdr) {
	curr_line.push_str("*/");
	bump(rdr);
	bump(rdr);
	}
	if !is_block_non_doc_comment(curr_line) { return; }
	assert!(!curr_line.contains_char('\n'));
	lines.push(curr_line);
	} else {
	let mut level: int = 1;
	while level > 0 {
	debug!("=== block comment level %d", level);
	if is_eof(rdr) {
	(rdr as @mut reader).fatal(~"unterminated block comment");
	}
	if rdr.curr == '\n' {
	trim_whitespace_prefix_and_push_line(&mut lines, curr_line,
	col);
	curr_line = ~"";
	bump(rdr);
	} else {
	curr_line.push_char(rdr.curr);
	if rdr.curr == '/' && nextch(rdr) == '*' {
	bump(rdr);
	bump(rdr);
	curr_line.push_char('*');
	level += 1;
	} else {
	if rdr.curr == '*' && nextch(rdr) == '/' {
	bump(rdr);
	bump(rdr);
	curr_line.push_char('/');
	level -= 1;
	} else { bump(rdr); }
	}
	}
	}
	if curr_line.len() != 0 {
	trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
	}
	}

	let mut style = if code_to_the_left { trailing } else { isolated };
	consume_non_eol_whitespace(rdr);
	if !is_eof(rdr) && rdr.curr != '\n' && lines.len() == 1u {
	style = mixed;
	}
	debug!("<<< block comment");
	comments.push(cmnt {style: style, lines: lines, pos: p});
	}

	fn peeking_at_comment(rdr: @mut StringReader) -> bool {
	return ((rdr.curr == '/' && nextch(rdr) == '/') \|\|
	(rdr.curr == '/' && nextch(rdr) == '*')) \|\|
	(rdr.curr == '#' && nextch(rdr) == '!');
	}

	fn consume_comment(rdr: @mut StringReader,
	code_to_the_left: bool,
	comments: &mut ~[cmnt]) {
	debug!(">>> consume comment");
	if rdr.curr == '/' && nextch(rdr) == '/' {
	read_line_comments(rdr, code_to_the_left, comments);
	} else if rdr.curr == '/' && nextch(rdr) == '*' {
	read_block_comment(rdr, code_to_the_left, comments);
	} else if rdr.curr == '#' && nextch(rdr) == '!' {
	read_shebang_comment(rdr, code_to_the_left, comments);
	} else { fail!(); }
	debug!("<<< consume comment");
	}

	#[deriving(Clone)]
	pub struct lit {
	lit: ~str,
	pos: BytePos
	}

	// it appears this function is called only from pprust... that's
	// probably not a good thing.
	pub fn gather_comments_and_literals(span_diagnostic:
	@mut diagnostic::span_handler,
	path: @str,
	srdr: @io::Reader)
	-> (~[cmnt], ~[lit]) {
	let src = str::from_utf8(srdr.read_whole_stream()).to_managed();
	let cm = CodeMap::new();
	let filemap = cm.new_filemap(path, src);
	let rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);

	let mut comments: ~[cmnt] = ~[];
	let mut literals: ~[lit] = ~[];
	let mut first_read: bool = true;
	while !is_eof(rdr) {
	loop {
	let mut code_to_the_left = !first_read;
	consume_non_eol_whitespace(rdr);
	if rdr.curr == '\n' {
	code_to_the_left = false;
	consume_whitespace_counting_blank_lines(rdr, &mut comments);
	}
	while peeking_at_comment(rdr) {
	consume_comment(rdr, code_to_the_left, &mut comments);
	consume_whitespace_counting_blank_lines(rdr, &mut comments);
	}
	break;
	}


	let bstart = rdr.last_pos;
	rdr.next_token();
	//discard, and look ahead; we're working with internal state
	let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
	if token::is_lit(&tok) {
	do with_str_from(rdr, bstart) \|s\| {
	debug!("tok lit: %s", s);
	literals.push(lit {lit: s.to_owned(), pos: sp.lo});
	}
	} else {
	debug!("tok: %s", token::to_str(get_ident_interner(), &tok));
	}
	first_read = false;
	}

	(comments, literals)
	}

	#[cfg(test)]
	mod test {
	use super::*;

	#[test] fn test_block_doc_comment_1() {
	let comment = "/*\n Test \n ** Test\n * Test\n*/";
	let stripped = strip_doc_comment_decoration(comment);
	assert_eq!(stripped, ~" Test \n* Test\n Test");
	}

	#[test] fn test_block_doc_comment_2() {
	let comment = "/*\n Test\n * Test\n*/";
	let stripped = strip_doc_comment_decoration(comment);
	assert_eq!(stripped, ~" Test\n Test");
	}

	#[test] fn test_block_doc_comment_3() {
	let comment = "/*\n let a: int;\n a = 5;\n/";
	let stripped = strip_doc_comment_decoration(comment);
	assert_eq!(stripped, ~" let a: int;\n a = 5;");
	}

	#[test] fn test_block_doc_comment_4() {
	let comment = "/*****************\n test\n *******************/";
	let stripped = strip_doc_comment_decoration(comment);
	assert_eq!(stripped, ~" test");
	}

	#[test] fn test_line_doc_comment() {
	let stripped = strip_doc_comment_decoration("/// test");
	assert_eq!(stripped, ~" test");
	let stripped = strip_doc_comment_decoration("///! test");
	assert_eq!(stripped, ~" test");
	let stripped = strip_doc_comment_decoration("// test");
	assert_eq!(stripped, ~" test");
	let stripped = strip_doc_comment_decoration("// test");
	assert_eq!(stripped, ~" test");
	let stripped = strip_doc_comment_decoration("///test");
	assert_eq!(stripped, ~"test");
	let stripped = strip_doc_comment_decoration("///!test");
	assert_eq!(stripped, ~"test");
	let stripped = strip_doc_comment_decoration("//test");
	assert_eq!(stripped, ~"test");
	}
	}