blob: 7272230fc1ec64df4f1c00dba11b9bc9472e2199 [file] [log] [blame]
//! Parser for implementing virtual terminal emulators
//!
//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
//! state machine]. The state machine doesn't assign meaning to the parsed data
//! and is thus not itself sufficient for writing a terminal emulator. Instead,
//! it is expected that an implementation of [`Perform`] is provided which does
//! something useful with the parsed data. The [`Parser`] handles the book
//! keeping, and the [`Perform`] gets to simply handle actions.
//!
//! # Examples
//!
//! For an example of using the [`Parser`] please see the examples folder. The example included
//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
//! pipe `vim` into it
//!
//! ```ignore
//! cargo build --release --example parselog
//! vim | target/release/examples/parselog
//! ```
//!
//! Just type `:q` to exit.
//!
//! # Differences from original state machine description
//!
//! * UTF-8 Support for Input
//! * OSC Strings can be terminated by 0x07
//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they
//! no longer work in all states.
//!
//! [`Parser`]: struct.Parser.html
//! [`Perform`]: trait.Perform.html
//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
#![no_std]
extern crate utf8parse as utf8;
use core::mem;
mod table;
mod definitions;
use definitions::{Action, State, unpack};
use table::{EXIT_ACTIONS, ENTRY_ACTIONS, STATE_CHANGE};
impl State {
/// Get exit action for this state
#[inline(always)]
pub fn exit_action(&self) -> Action {
unsafe {
*EXIT_ACTIONS.get_unchecked(*self as usize)
}
}
/// Get entry action for this state
#[inline(always)]
pub fn entry_action(&self) -> Action {
unsafe {
*ENTRY_ACTIONS.get_unchecked(*self as usize)
}
}
}
const MAX_INTERMEDIATES: usize = 2;
const MAX_OSC_RAW: usize = 1024;
const MAX_PARAMS: usize = 16;
struct VtUtf8Receiver<'a, P: Perform + 'a>(&'a mut P, &'a mut State);
impl<'a, P: Perform> utf8::Receiver for VtUtf8Receiver<'a, P> {
fn codepoint(&mut self, c: char) {
self.0.print(c);
*self.1 = State::Ground;
}
fn invalid_sequence(&mut self) {
self.0.print('�');
*self.1 = State::Ground;
}
}
/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
///
/// [`Perform`]: trait.Perform.html
pub struct Parser {
state: State,
intermediates: [u8; MAX_INTERMEDIATES],
intermediate_idx: usize,
params: [i64; MAX_PARAMS],
param: i64,
collecting_param: bool,
num_params: usize,
osc_raw: [u8; MAX_OSC_RAW],
osc_params: [(usize, usize); MAX_PARAMS],
osc_idx: usize,
osc_num_params: usize,
ignoring: bool,
utf8_parser: utf8::Parser,
}
impl Parser {
/// Create a new Parser
pub fn new() -> Parser {
Parser {
state: State::Ground,
intermediates: [0u8; MAX_INTERMEDIATES],
intermediate_idx: 0,
params: [0i64; MAX_PARAMS],
param: 0,
collecting_param: false,
num_params: 0,
osc_raw: [0; MAX_OSC_RAW],
osc_params: [(0, 0); MAX_PARAMS],
osc_idx: 0,
osc_num_params: 0,
ignoring: false,
utf8_parser: utf8::Parser::new(),
}
}
#[inline]
fn params(&self) -> &[i64] {
&self.params[..self.num_params]
}
#[inline]
fn intermediates(&self) -> &[u8] {
&self.intermediates[..self.intermediate_idx]
}
/// Advance the parser state
///
/// Requires a [`Perform`] in case `byte` triggers an action
///
/// [`Perform`]: trait.Perform.html
#[inline]
pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
// Utf8 characters are handled out-of-band.
if let State::Utf8 = self.state {
self.process_utf8(performer, byte);
return;
}
// Handle state changes in the anywhere state before evaluating changes
// for current state.
let mut change = STATE_CHANGE[State::Anywhere as usize][byte as usize];
if change == 0 {
change = STATE_CHANGE[self.state as usize][byte as usize];
}
// Unpack into a state and action
let (state, action) = unpack(change);
self.perform_state_change(performer, state, action, byte);
}
#[inline]
fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
where P: Perform
{
let mut receiver = VtUtf8Receiver(performer, &mut self.state);
let utf8_parser = &mut self.utf8_parser;
utf8_parser.advance(&mut receiver, byte);
}
#[inline]
fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8)
where P: Perform
{
macro_rules! maybe_action {
($action:expr, $arg:expr) => {
match $action {
Action::None => (),
action => {
self.perform_action(performer, action, $arg);
},
}
}
}
match state {
State::Anywhere => {
// Just run the action
self.perform_action(performer, action, byte);
},
state => {
// Exit action for previous state
let exit_action = self.state.exit_action();
maybe_action!(exit_action, 0);
// Transition action
maybe_action!(action, byte);
// Entry action for new state
maybe_action!(state.entry_action(), 0);
// Assume the new state
self.state = state;
}
}
}
/// Separate method for osc_dispatch that borrows self as read-only
///
/// The aliasing is needed here for multiple slices into self.osc_raw
#[inline]
fn osc_dispatch<P: Perform>(&self, performer: &mut P) {
let mut slices: [&[u8]; MAX_PARAMS] = unsafe { mem::uninitialized() };
for i in 0..self.osc_num_params {
let indices = self.osc_params[i];
slices[i] = &self.osc_raw[indices.0..indices.1];
}
performer.osc_dispatch(
&slices[..self.osc_num_params],
);
}
#[inline]
fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
match action {
Action::Print => performer.print(byte as char),
Action::Execute => performer.execute(byte),
Action::Hook => {
performer.hook(
self.params(),
self.intermediates(),
self.ignoring,
);
},
Action::Put => performer.put(byte),
Action::OscStart => {
self.osc_idx = 0;
self.osc_num_params = 0;
},
Action::OscPut => {
let idx = self.osc_idx;
if idx == self.osc_raw.len() {
return;
}
// Param separator
if byte == b';' {
let param_idx = self.osc_num_params;
match param_idx {
// Only process up to MAX_PARAMS
MAX_PARAMS => return,
// First param is special - 0 to current byte index
0 => {
self.osc_params[param_idx] = (0, idx);
},
// All other params depend on previous indexing
_ => {
let prev = self.osc_params[param_idx - 1];
let begin = prev.1;
self.osc_params[param_idx] = (begin, idx);
}
}
self.osc_num_params += 1;
} else {
self.osc_raw[idx] = byte;
self.osc_idx += 1;
}
},
Action::OscEnd => {
let param_idx = self.osc_num_params;
let idx = self.osc_idx;
match param_idx {
// Finish last parameter if not already maxed
MAX_PARAMS => (),
// First param is special - 0 to current byte index
0 => {
self.osc_params[param_idx] = (0, idx);
self.osc_num_params += 1;
},
// All other params depend on previous indexing
_ => {
let prev = self.osc_params[param_idx - 1];
let begin = prev.1;
self.osc_params[param_idx] = (begin, idx);
self.osc_num_params += 1;
}
}
self.osc_dispatch(performer);
},
Action::Unhook => performer.unhook(),
Action::CsiDispatch => {
if self.collecting_param {
let idx = self.num_params;
self.params[idx] = self.param;
self.num_params += 1;
}
performer.csi_dispatch(
self.params(),
self.intermediates(),
self.ignoring,
byte as char
);
self.num_params = 0;
self.param = 0;
self.collecting_param = false;
}
Action::EscDispatch => {
performer.esc_dispatch(
self.params(),
self.intermediates(),
self.ignoring,
byte
);
},
Action::Ignore | Action::None => (),
Action::Collect => {
if self.intermediate_idx == MAX_INTERMEDIATES {
self.ignoring = true;
} else {
self.intermediates[self.intermediate_idx] = byte;
self.intermediate_idx += 1;
}
},
Action::Param => {
if byte == b';' {
// Completed a param
let idx = self.num_params;
if idx == MAX_PARAMS - 1 {
return;
}
self.params[idx] = self.param;
self.param = 0;
self.num_params += 1;
self.collecting_param = false;
} else {
// Continue collecting bytes into param
self.param = self.param.saturating_mul(10);
self.param = self.param.saturating_add((byte - b'0') as i64);
self.collecting_param = true;
}
},
Action::Clear => {
self.intermediate_idx = 0;
self.num_params = 0;
self.ignoring = false;
},
Action::BeginUtf8 => {
self.process_utf8(performer, byte);
},
}
}
}
/// Performs actions requested by the Parser
///
/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
/// movement, or simply printing characters to the screen.
///
/// The methods on this type correspond to actions described in
/// http://vt100.net/emu/dec_ansi_parser. I've done my best to describe them in
/// a useful way in my own words for completeness, but the site should be
/// referenced if something isn't clear. If the site disappears at some point in
/// the future, consider checking archive.org.
pub trait Perform {
/// Draw a character to the screen and update states
fn print(&mut self, char);
/// Execute a C0 or C1 control function
fn execute(&mut self, byte: u8);
/// Invoked when a final character arrives in first part of device control string
///
/// The control function should be determined from the private marker, final character, and
/// execute with a parameter list. A handler should be selected for remaining characters in the
/// string; the handler function should subsequently be called by `put` for every character in
/// the control string.
///
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn hook(&mut self, params: &[i64], intermediates: &[u8], ignore: bool);
/// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
/// will also be passed to the handler.
fn put(&mut self, byte: u8);
/// Called when a device control string is terminated
///
/// The previously selected handler should be notified that the DCS has
/// terminated.
fn unhook(&mut self);
/// Dispatch an operating system command
fn osc_dispatch(&mut self, params: &[&[u8]]);
/// A final character has arrived for a CSI sequence
///
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn csi_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, char);
/// The final character of an escape sequence has arrived.
///
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn esc_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, byte: u8);
}
#[cfg(test)]
#[macro_use]
extern crate std;
#[cfg(test)]
mod tests {
use std::vec::Vec;
use super::{Parser, Perform};
use core::i64;
static OSC_BYTES: &'static [u8] = &[0x1b, 0x5d, // Begin OSC
b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l',
b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o',
b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y',
0x07 // End OSC
];
#[derive(Default)]
struct OscDispatcher {
dispatched_osc: bool,
params: Vec<Vec<u8>>,
}
// All empty bodies except osc_dispatch
impl Perform for OscDispatcher {
fn print(&mut self, _: char) {}
fn execute(&mut self, _byte: u8) {}
fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {}
fn put(&mut self, _byte: u8) {}
fn unhook(&mut self) {}
fn osc_dispatch(&mut self, params: &[&[u8]]) {
// Set a flag so we know these assertions all run
self.dispatched_osc = true;
self.params = params.iter().map(|p| p.to_vec()).collect();
}
fn csi_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {}
fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {}
}
#[derive(Default)]
struct CsiDispatcher {
dispatched_csi: bool,
params: Vec<Vec<i64>>,
}
impl Perform for CsiDispatcher {
fn print(&mut self, _: char) {}
fn execute(&mut self, _byte: u8) {}
fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {}
fn put(&mut self, _byte: u8) {}
fn unhook(&mut self) {}
fn osc_dispatch(&mut self, _params: &[&[u8]]) { }
fn csi_dispatch(&mut self, params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {
self.dispatched_csi = true;
self.params.push(params.to_vec());
}
fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {}
}
#[test]
fn parse_osc() {
// Create dispatcher and check state
let mut dispatcher = OscDispatcher::default();
assert_eq!(dispatcher.dispatched_osc, false);
// Run parser using OSC_BYTES
let mut parser = Parser::new();
for byte in OSC_BYTES {
parser.advance(&mut dispatcher, *byte);
}
// Check that flag is set and thus osc_dispatch assertions ran.
assert!(dispatcher.dispatched_osc);
assert_eq!(dispatcher.params.len(), 2);
assert_eq!(dispatcher.params[0], &OSC_BYTES[2..3]);
assert_eq!(dispatcher.params[1], &OSC_BYTES[4..(OSC_BYTES.len() - 1)]);
}
#[test]
fn parse_empty_osc() {
// Create dispatcher and check state
let mut dispatcher = OscDispatcher::default();
assert_eq!(dispatcher.dispatched_osc, false);
// Run parser using OSC_BYTES
let mut parser = Parser::new();
for byte in &[0x1b, 0x5d, 0x07] {
parser.advance(&mut dispatcher, *byte);
}
// Check that flag is set and thus osc_dispatch assertions ran.
assert!(dispatcher.dispatched_osc);
}
#[test]
fn parse_osc_max_params() {
use MAX_PARAMS;
static INPUT: &'static [u8] = b"\x1b];;;;;;;;;;;;;;;;;\x1b";
// Create dispatcher and check state
let mut dispatcher = OscDispatcher::default();
assert_eq!(dispatcher.dispatched_osc, false);
// Run parser using OSC_BYTES
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
// Check that flag is set and thus osc_dispatch assertions ran.
assert!(dispatcher.dispatched_osc);
assert_eq!(dispatcher.params.len(), MAX_PARAMS);
for param in dispatcher.params.iter() {
assert_eq!(param.len(), 0);
}
}
#[test]
fn parse_csi_max_params() {
use MAX_PARAMS;
static INPUT: &'static [u8] = b"\x1b[1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;p";
// Create dispatcher and check state
let mut dispatcher = CsiDispatcher::default();
assert!(!dispatcher.dispatched_csi);
// Run parser using OSC_BYTES
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
// Check that flag is set and thus csi_dispatch assertions ran.
assert!(dispatcher.dispatched_csi);
assert_eq!(dispatcher.params.len(), 1);
assert_eq!(dispatcher.params[0].len(), MAX_PARAMS);
}
#[test]
fn parse_semi_set_underline() {
// Create dispatcher and check state
let mut dispatcher = CsiDispatcher::default();
// Run parser using OSC_BYTES
let mut parser = Parser::new();
for byte in b"\x1b[;4m" {
parser.advance(&mut dispatcher, *byte);
}
// Check that flag is set and thus osc_dispatch assertions ran.
assert_eq!(dispatcher.params[0], &[0, 4]);
}
#[test]
fn parse_long_csi_param() {
// The important part is the parameter, which is (i64::MAX + 1)
static INPUT: &'static [u8] = b"\x1b[9223372036854775808m";
let mut dispatcher = CsiDispatcher::default();
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
assert_eq!(dispatcher.params[0], &[i64::MAX as i64]);
}
#[test]
fn parse_osc_with_utf8_arguments() {
static INPUT: &'static [u8] = &[
0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27,
0xc2, 0xaf, 0x5c, 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f,
0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26, 0x20, 0x73, 0x6c, 0x65, 0x65,
0x70, 0x20, 0x31, 0x07
];
// Create dispatcher and check state
let mut dispatcher = OscDispatcher { params: vec![], dispatched_osc: false };
// Run parser using OSC_BYTES
let mut parser = Parser::new();
for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}
// Check that flag is set and thus osc_dispatch assertions ran.
assert_eq!(dispatcher.params[0], &[b'2']);
assert_eq!(dispatcher.params[1], &INPUT[5..(INPUT.len() - 1)]);
}
}