| //! UTF-8 Parse Transition Table |
| |
| /// Transition table for parsing UTF-8. This is built from the grammar described |
| /// at https://tools.ietf.org/html/rfc3629#section-4 which I have copied and |
| /// formatted below. |
| /// |
| /// # UTF-8 Grammar |
| /// |
| /// ```ignore |
| /// UTF8-octets = *( UTF8-char ) |
| /// UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 |
| /// UTF8-1 = %x00-7F |
| /// UTF8-2 = %xC2-DF UTF8-tail |
| /// UTF8-3 = %xE0 %xA0-BF UTF8-tail / |
| /// %xE1-EC 2( UTF8-tail ) / |
| /// %xED %x80-9F UTF8-tail / |
| /// %xEE-EF 2( UTF8-tail ) |
| /// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / |
| /// %xF1-F3 3( UTF8-tail ) / |
| /// %xF4 %x80-8F 2( UTF8-tail ) |
| /// UTF8-tail = %x80-BF |
| /// ``` |
| /// |
| /// Not specifying an action in this table is equivalent to specifying |
| /// Action::InvalidSequence. Not specifying a state is equivalent to specifying |
| /// state::ground. |
| pub static TRANSITIONS: [[u8; 256]; 8] = utf8_state_table! { |
| State::Ground => { |
| 0x00...0x7f => (State::Ground, Action::EmitByte), |
| 0xc2...0xdf => (State::Tail1, Action::SetByte2Top), |
| 0xe0 => (State::U3_2_e0, Action::SetByte3Top), |
| 0xe1...0xec => (State::Tail2, Action::SetByte3Top), |
| 0xed => (State::U3_2_ed, Action::SetByte3Top), |
| 0xee...0xef => (State::Tail2, Action::SetByte3Top), |
| 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4), |
| 0xf1...0xf3 => (State::Tail3, Action::SetByte4), |
| 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4), |
| }, |
| State::U3_2_e0 => { |
| 0xa0...0xbf => (State::Tail1, Action::SetByte2), |
| }, |
| State::U3_2_ed => { |
| 0x80...0x9f => (State::Tail1, Action::SetByte2), |
| }, |
| State::Utf8_4_3_f0 => { |
| 0x90...0xbf => (State::Tail2, Action::SetByte3), |
| }, |
| State::Utf8_4_3_f4 => { |
| 0x80...0x8f => (State::Tail2, Action::SetByte3), |
| }, |
| State::Tail3 => { |
| 0x80...0xbf => (State::Tail2, Action::SetByte3), |
| }, |
| State::Tail2 => { |
| 0x80...0xbf => (State::Tail1, Action::SetByte2), |
| }, |
| State::Tail1 => { |
| 0x80...0xbf => (State::Ground, Action::SetByte1), |
| }, |
| }; |