| //! An iterator over the `char`s of a reader. |
| //! |
| //! A copy of the unstable code from the stdlib's std::io::Read::chars. |
| |
| use std::error; |
| use std::fmt; |
| use std::io; |
| use std::io::Read; |
| use std::str; |
| |
| pub fn chars<R: Read>(read: R) -> Chars<R> where R: Sized { |
| Chars { inner: read } |
| } |
| |
| // https://tools.ietf.org/html/rfc3629 |
| static UTF8_CHAR_WIDTH: [u8; 256] = [ |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF |
| 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF |
| 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF |
| 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF |
| ]; |
| |
| /// Given a first byte, determine how many bytes are in this UTF-8 character |
| #[inline] |
| fn utf8_char_width(b: u8) -> usize { |
| return UTF8_CHAR_WIDTH[b as usize] as usize; |
| } |
| |
| pub struct Chars<R> { |
| inner: R, |
| } |
| |
| #[derive(Debug)] |
| pub enum CharsError { |
| NotUtf8, |
| Other(io::Error), |
| } |
| |
| impl<R: Read> Iterator for Chars<R> { |
| type Item = Result<char, CharsError>; |
| |
| fn next(&mut self) -> Option<Result<char, CharsError>> { |
| let mut buf = [0]; |
| let first_byte = match self.inner.read(&mut buf) { |
| Ok(0) => return None, |
| Ok(..) => buf[0], |
| Err(e) => return Some(Err(CharsError::Other(e))), |
| }; |
| let width = utf8_char_width(first_byte); |
| if width == 1 { return Some(Ok(first_byte as char)) } |
| if width == 0 { return Some(Err(CharsError::NotUtf8)) } |
| let mut buf = [first_byte, 0, 0, 0]; |
| { |
| let mut start = 1; |
| while start < width { |
| match self.inner.read(&mut buf[start..width]) { |
| Ok(0) => return Some(Err(CharsError::NotUtf8)), |
| Ok(n) => start += n, |
| Err(e) => return Some(Err(CharsError::Other(e))), |
| } |
| } |
| } |
| Some(match str::from_utf8(&buf[..width]).ok() { |
| Some(s) => Ok(s.chars().next().unwrap()), |
| None => Err(CharsError::NotUtf8), |
| }) |
| } |
| } |
| |
| impl error::Error for CharsError { |
| fn description(&self) -> &str { |
| match *self { |
| CharsError::NotUtf8 => "invalid utf8 encoding", |
| CharsError::Other(ref e) => error::Error::description(e), |
| } |
| } |
| fn cause(&self) -> Option<&error::Error> { |
| match *self { |
| CharsError::NotUtf8 => None, |
| CharsError::Other(ref e) => e.cause(), |
| } |
| } |
| } |
| |
| impl fmt::Display for CharsError { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| match *self { |
| CharsError::NotUtf8 => { |
| "byte stream did not contain valid utf8".fmt(f) |
| } |
| CharsError::Other(ref e) => e.fmt(f), |
| } |
| } |
| } |