blob: 7d0c46de3bb95a52a0dd20d24efcd3825257a836 [file] [log] [blame]
//! An iterator over the `char`s of a reader.
//!
//! A copy of the unstable code from the stdlib's std::io::Read::chars.
use std::error;
use std::fmt;
use std::io;
use std::io::Read;
use std::str;
pub fn chars<R: Read>(read: R) -> Chars<R> where R: Sized {
Chars { inner: read }
}
// https://tools.ietf.org/html/rfc3629
static UTF8_CHAR_WIDTH: [u8; 256] = [
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
];
/// Given a first byte, determine how many bytes are in this UTF-8 character
#[inline]
fn utf8_char_width(b: u8) -> usize {
return UTF8_CHAR_WIDTH[b as usize] as usize;
}
pub struct Chars<R> {
inner: R,
}
#[derive(Debug)]
pub enum CharsError {
NotUtf8,
Other(io::Error),
}
impl<R: Read> Iterator for Chars<R> {
type Item = Result<char, CharsError>;
fn next(&mut self) -> Option<Result<char, CharsError>> {
let mut buf = [0];
let first_byte = match self.inner.read(&mut buf) {
Ok(0) => return None,
Ok(..) => buf[0],
Err(e) => return Some(Err(CharsError::Other(e))),
};
let width = utf8_char_width(first_byte);
if width == 1 { return Some(Ok(first_byte as char)) }
if width == 0 { return Some(Err(CharsError::NotUtf8)) }
let mut buf = [first_byte, 0, 0, 0];
{
let mut start = 1;
while start < width {
match self.inner.read(&mut buf[start..width]) {
Ok(0) => return Some(Err(CharsError::NotUtf8)),
Ok(n) => start += n,
Err(e) => return Some(Err(CharsError::Other(e))),
}
}
}
Some(match str::from_utf8(&buf[..width]).ok() {
Some(s) => Ok(s.chars().next().unwrap()),
None => Err(CharsError::NotUtf8),
})
}
}
impl error::Error for CharsError {
fn description(&self) -> &str {
match *self {
CharsError::NotUtf8 => "invalid utf8 encoding",
CharsError::Other(ref e) => error::Error::description(e),
}
}
fn cause(&self) -> Option<&error::Error> {
match *self {
CharsError::NotUtf8 => None,
CharsError::Other(ref e) => e.cause(),
}
}
}
impl fmt::Display for CharsError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
CharsError::NotUtf8 => {
"byte stream did not contain valid utf8".fmt(f)
}
CharsError::Other(ref e) => e.fmt(f),
}
}
}