Introduce utf8parse
diff --git a/Cargo.toml b/Cargo.toml
index 253b543..47ad106 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@
[target.'cfg(unix)'.dependencies]
nix = "0.11"
+utf8parse = "0.1"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["consoleapi", "handleapi", "minwindef", "processenv", "winbase", "wincon", "winuser"] }
diff --git a/src/error.rs b/src/error.rs
index d9cfdb8..f7c1abd 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -20,7 +20,7 @@
Interrupted,
/// Chars Error
#[cfg(unix)]
- Char(str::Utf8Error),
+ Utf8Error,
/// Unix Error from syscall
#[cfg(unix)]
Errno(nix::Error),
@@ -37,7 +37,7 @@
ReadlineError::Eof => write!(f, "EOF"),
ReadlineError::Interrupted => write!(f, "Interrupted"),
#[cfg(unix)]
- ReadlineError::Char(ref err) => err.fmt(f),
+ ReadlineError::Utf8Error => write!(f, "invalid utf-8: corrupt contents"),
#[cfg(unix)]
ReadlineError::Errno(ref err) => err.fmt(f),
#[cfg(windows)]
@@ -55,7 +55,7 @@
ReadlineError::Eof => "EOF",
ReadlineError::Interrupted => "Interrupted",
#[cfg(unix)]
- ReadlineError::Char(ref err) => err.description(),
+ ReadlineError::Utf8Error => "invalid utf-8: corrupt contents",
#[cfg(unix)]
ReadlineError::Errno(ref err) => err.description(),
#[cfg(windows)]
@@ -79,13 +79,6 @@
}
}
-#[cfg(unix)]
-impl From<str::Utf8Error> for ReadlineError {
- fn from(err: str::Utf8Error) -> ReadlineError {
- ReadlineError::Char(err)
- }
-}
-
#[cfg(windows)]
impl From<char::DecodeUtf16Error> for ReadlineError {
fn from(err: char::DecodeUtf16Error) -> ReadlineError {
diff --git a/src/lib.rs b/src/lib.rs
index 2a741da..25a2ff4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -25,6 +25,8 @@
extern crate nix;
extern crate unicode_segmentation;
extern crate unicode_width;
+#[cfg(unix)]
+extern crate utf8parse;
#[cfg(windows)]
extern crate winapi;
diff --git a/src/tty/unix.rs b/src/tty/unix.rs
index 22dd675..e029bff 100644
--- a/src/tty/unix.rs
+++ b/src/tty/unix.rs
@@ -11,6 +11,7 @@
use nix::sys::termios;
use nix::sys::termios::SetArg;
use unicode_segmentation::UnicodeSegmentation;
+use utf8parse::{Parser, Receiver};
use super::{truncate, width, Position, RawMode, RawReader, Renderer, Term};
use config::{ColorMode, Config};
@@ -101,7 +102,14 @@
pub struct PosixRawReader {
stdin: StdinRaw,
timeout_ms: i32,
- buf: [u8; 4],
+ buf: [u8; 1],
+ parser: Parser,
+ receiver: Utf8,
+}
+
+struct Utf8 {
+ c: Option<char>,
+ valid: bool,
}
impl PosixRawReader {
@@ -109,7 +117,12 @@
Ok(PosixRawReader {
stdin: StdinRaw {},
timeout_ms: config.keyseq_timeout(),
- buf: [0; 4],
+ buf: [0; 1],
+ parser: Parser::new(),
+ receiver: Utf8 {
+ c: None,
+ valid: true,
+ },
})
}
@@ -290,27 +303,6 @@
}
}
-// https://tools.ietf.org/html/rfc3629
-#[cfg_attr(rustfmt, rustfmt_skip)]
-static UTF8_CHAR_WIDTH: [u8; 256] = [
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
-0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
-4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
-];
-
impl RawReader for PosixRawReader {
fn next_key(&mut self, single_esc_abort: bool) -> Result<KeyPress> {
let c = try!(self.next_char());
@@ -340,26 +332,36 @@
}
fn next_char(&mut self) -> Result<char> {
- let n = try!(self.stdin.read(&mut self.buf[..1]));
- if n == 0 {
- return Err(error::ReadlineError::Eof);
- }
- let first = self.buf[0];
- if first >= 128 {
- let width = UTF8_CHAR_WIDTH[first as usize] as usize;
- if width == 0 {
- try!(std::str::from_utf8(&self.buf[..1]));
- unreachable!()
+ loop {
+ let n = try!(self.stdin.read(&mut self.buf));
+ if n == 0 {
+ return Err(error::ReadlineError::Eof);
}
- try!(self.stdin.read_exact(&mut self.buf[1..width]));
- let s = try!(std::str::from_utf8(&self.buf[..width]));
- Ok(s.chars().next().unwrap())
- } else {
- Ok(first as char)
+ let b = self.buf[0];
+ self.parser.advance(&mut self.receiver, b);
+ if !self.receiver.valid {
+ return Err(error::ReadlineError::Utf8Error);
+ } else if self.receiver.c.is_some() {
+ return Ok(self.receiver.c.take().unwrap());
+ }
}
}
}
+impl Receiver for Utf8 {
+ /// Called whenever a codepoint is parsed successfully
+ fn codepoint(&mut self, c: char) {
+ self.c = Some(c);
+ self.valid = true;
+ }
+
+ /// Called when an invalid_sequence is detected
+ fn invalid_sequence(&mut self) {
+ self.c = None;
+ self.valid = false;
+ }
+}
+
/// Console output writer
pub struct PosixRenderer {
out: Stdout,