third_party/rust_crates/tiny_mirrors/regex-automata/src/nfa/mod.rs - fuchsia - Git at Google

 use std::fmt;

 use classes::ByteClasses;
 pub use nfa::compiler::Builder;

 mod compiler;
 mod map;
 mod range_trie;

 /// The representation for an NFA state identifier.
 pub type StateID = usize;

 /// A final compiled NFA.
 ///
 /// The states of the NFA are indexed by state IDs, which are how transitions
 /// are expressed.
 #[derive(Clone)]
 pub struct NFA {
     /// Whether this NFA can only match at the beginning of input or not.
     ///
     /// When true, a match should only be reported if it begins at the 0th
     /// index of the haystack.
     anchored: bool,
     /// The starting state of this NFA.
     start: StateID,
     /// The state list. This list is guaranteed to be indexable by the starting
     /// state ID, and it is also guaranteed to contain exactly one `Match`
     /// state.
     states: Vec<State>,
     /// A mapping from any byte value to its corresponding equivalence class
     /// identifier. Two bytes in the same equivalence class cannot discriminate
     /// between a match or a non-match. This map can be used to shrink the
     /// total size of a DFA's transition table with a small match-time cost.
     ///
     /// Note that the NFA's transitions are *not* defined in terms of these
     /// equivalence classes. The NFA's transitions are defined on the original
     /// byte values. For the most part, this is because they wouldn't really
     /// help the NFA much since the NFA already uses a sparse representation
     /// to represent transitions. Byte classes are most effective in a dense
     /// representation.
     byte_classes: ByteClasses,
 }

 impl NFA {
     /// Returns an NFA that always matches at every position.
     pub fn always_match() -> NFA {
         NFA {
             anchored: false,
             start: 0,
             states: vec![State::Match],
             byte_classes: ByteClasses::empty(),
         }
     }

     /// Returns an NFA that never matches at any position.
     pub fn never_match() -> NFA {
         NFA {
             anchored: false,
             start: 0,
             states: vec![State::Fail],
             byte_classes: ByteClasses::empty(),
         }
     }

     /// Returns true if and only if this NFA is anchored.
     pub fn is_anchored(&self) -> bool {
         self.anchored
     }

     /// Return the number of states in this NFA.
     pub fn len(&self) -> usize {
         self.states.len()
     }

     /// Return the ID of the initial state of this NFA.
     pub fn start(&self) -> StateID {
         self.start
     }

     /// Return the NFA state corresponding to the given ID.
     pub fn state(&self, id: StateID) -> &State {
         &self.states[id]
     }

     /// Return the set of equivalence classes for this NFA. The slice returned
     /// always has length 256 and maps each possible byte value to its
     /// corresponding equivalence class ID (which is never more than 255).
     pub fn byte_classes(&self) -> &ByteClasses {
         &self.byte_classes
     }
 }

 impl fmt::Debug for NFA {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         for (i, state) in self.states.iter().enumerate() {
             let status = if i == self.start { '>' } else { ' ' };
             writeln!(f, "{}{:06}: {:?}", status, i, state)?;
         }
         Ok(())
     }
 }

 /// A state in a final compiled NFA.
 #[derive(Clone, Eq, PartialEq)]
 pub enum State {
     /// A state that transitions to `next` if and only if the current input
     /// byte is in the range `[start, end]` (inclusive).
     ///
     /// This is a special case of Sparse in that it encodes only one transition
     /// (and therefore avoids the allocation).
     Range { range: Transition },
     /// A state with possibly many transitions, represented in a sparse
     /// fashion. Transitions are ordered lexicographically by input range.
     /// As such, this may only be used when every transition has equal
     /// priority. (In practice, this is only used for encoding large UTF-8
     /// automata.)
     Sparse { ranges: Box<[Transition]> },
     /// An alternation such that there exists an epsilon transition to all
     /// states in `alternates`, where matches found via earlier transitions
     /// are preferred over later transitions.
     Union { alternates: Box<[StateID]> },
     /// A fail state. When encountered, the automaton is guaranteed to never
     /// reach a match state.
     Fail,
     /// A match state. There is exactly one such occurrence of this state in
     /// an NFA.
     Match,
 }

 /// A transition to another state, only if the given byte falls in the
 /// inclusive range specified.
 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
 pub struct Transition {
     pub start: u8,
     pub end: u8,
     pub next: StateID,
 }

 impl State {
     /// Returns true if and only if this state contains one or more epsilon
     /// transitions.
     pub fn is_epsilon(&self) -> bool {
         match *self {
             State::Range { .. }
             | State::Sparse { .. }
             | State::Fail
             | State::Match => false,
             State::Union { .. } => true,
         }
     }

     /// Remap the transitions in this state using the given map. Namely, the
     /// given map should be indexed according to the transitions currently
     /// in this state.
     ///
     /// This is used during the final phase of the NFA compiler, which turns
     /// its intermediate NFA into the final NFA.
     fn remap(&mut self, remap: &[StateID]) {
         match *self {
             State::Range { ref mut range } => range.next = remap[range.next],
             State::Sparse { ref mut ranges } => {
                 for r in ranges.iter_mut() {
                     r.next = remap[r.next];
                 }
             }
             State::Union { ref mut alternates } => {
                 for alt in alternates.iter_mut() {
                     *alt = remap[*alt];
                 }
             }
             State::Fail => {}
             State::Match => {}
         }
     }
 }

 impl fmt::Debug for State {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match *self {
             State::Range { ref range } => range.fmt(f),
             State::Sparse { ref ranges } => {
                 let rs = ranges
                     .iter()
                     .map(|t| format!("{:?}", t))
                     .collect::<Vec<String>>()
                     .join(", ");
                 write!(f, "sparse({})", rs)
             }
             State::Union { ref alternates } => {
                 let alts = alternates
                     .iter()
                     .map(|id| format!("{}", id))
                     .collect::<Vec<String>>()
                     .join(", ");
                 write!(f, "alt({})", alts)
             }
             State::Fail => write!(f, "FAIL"),
             State::Match => write!(f, "MATCH"),
         }
     }
 }

 impl fmt::Debug for Transition {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         let Transition { start, end, next } = *self;
         if self.start == self.end {
             write!(f, "{} => {}", escape(start), next)
         } else {
             write!(f, "{}-{} => {}", escape(start), escape(end), next)
         }
     }
 }

 /// Return the given byte as its escaped string form.
 fn escape(b: u8) -> String {
     use std::ascii;

     String::from_utf8(ascii::escape_default(b).collect::<Vec<_>>()).unwrap()
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     use dense;
     use dfa::DFA;

     #[test]
     fn always_match() {
         let nfa = NFA::always_match();
         let dfa = dense::Builder::new().build_from_nfa::<usize>(&nfa).unwrap();

         assert_eq!(Some(0), dfa.find_at(b"", 0));
         assert_eq!(Some(0), dfa.find_at(b"a", 0));
         assert_eq!(Some(1), dfa.find_at(b"a", 1));
         assert_eq!(Some(0), dfa.find_at(b"ab", 0));
         assert_eq!(Some(1), dfa.find_at(b"ab", 1));
         assert_eq!(Some(2), dfa.find_at(b"ab", 2));
     }

     #[test]
     fn never_match() {
         let nfa = NFA::never_match();
         let dfa = dense::Builder::new().build_from_nfa::<usize>(&nfa).unwrap();

         assert_eq!(None, dfa.find_at(b"", 0));
         assert_eq!(None, dfa.find_at(b"a", 0));
         assert_eq!(None, dfa.find_at(b"a", 1));
         assert_eq!(None, dfa.find_at(b"ab", 0));
         assert_eq!(None, dfa.find_at(b"ab", 1));
         assert_eq!(None, dfa.find_at(b"ab", 2));
     }
 }
	use std::fmt;

	use classes::ByteClasses;
	pub use nfa::compiler::Builder;

	mod compiler;
	mod map;
	mod range_trie;

	/// The representation for an NFA state identifier.
	pub type StateID = usize;

	/// A final compiled NFA.
	///
	/// The states of the NFA are indexed by state IDs, which are how transitions
	/// are expressed.
	#[derive(Clone)]
	pub struct NFA {
	/// Whether this NFA can only match at the beginning of input or not.
	///
	/// When true, a match should only be reported if it begins at the 0th
	/// index of the haystack.
	anchored: bool,
	/// The starting state of this NFA.
	start: StateID,
	/// The state list. This list is guaranteed to be indexable by the starting
	/// state ID, and it is also guaranteed to contain exactly one `Match`
	/// state.
	states: Vec<State>,
	/// A mapping from any byte value to its corresponding equivalence class
	/// identifier. Two bytes in the same equivalence class cannot discriminate
	/// between a match or a non-match. This map can be used to shrink the
	/// total size of a DFA's transition table with a small match-time cost.
	///
	/// Note that the NFA's transitions are not defined in terms of these
	/// equivalence classes. The NFA's transitions are defined on the original
	/// byte values. For the most part, this is because they wouldn't really
	/// help the NFA much since the NFA already uses a sparse representation
	/// to represent transitions. Byte classes are most effective in a dense
	/// representation.
	byte_classes: ByteClasses,
	}

	impl NFA {
	/// Returns an NFA that always matches at every position.
	pub fn always_match() -> NFA {
	NFA {
	anchored: false,
	start: 0,
	states: vec![State::Match],
	byte_classes: ByteClasses::empty(),
	}
	}

	/// Returns an NFA that never matches at any position.
	pub fn never_match() -> NFA {
	NFA {
	anchored: false,
	start: 0,
	states: vec![State::Fail],
	byte_classes: ByteClasses::empty(),
	}
	}

	/// Returns true if and only if this NFA is anchored.
	pub fn is_anchored(&self) -> bool {
	self.anchored
	}

	/// Return the number of states in this NFA.
	pub fn len(&self) -> usize {
	self.states.len()
	}

	/// Return the ID of the initial state of this NFA.
	pub fn start(&self) -> StateID {
	self.start
	}

	/// Return the NFA state corresponding to the given ID.
	pub fn state(&self, id: StateID) -> &State {
	&self.states[id]
	}

	/// Return the set of equivalence classes for this NFA. The slice returned
	/// always has length 256 and maps each possible byte value to its
	/// corresponding equivalence class ID (which is never more than 255).
	pub fn byte_classes(&self) -> &ByteClasses {
	&self.byte_classes
	}
	}

	impl fmt::Debug for NFA {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	for (i, state) in self.states.iter().enumerate() {
	let status = if i == self.start { '>' } else { ' ' };
	writeln!(f, "{}{:06}: {:?}", status, i, state)?;
	}
	Ok(())
	}
	}

	/// A state in a final compiled NFA.
	#[derive(Clone, Eq, PartialEq)]
	pub enum State {
	/// A state that transitions to `next` if and only if the current input
	/// byte is in the range `[start, end]` (inclusive).
	///
	/// This is a special case of Sparse in that it encodes only one transition
	/// (and therefore avoids the allocation).
	Range { range: Transition },
	/// A state with possibly many transitions, represented in a sparse
	/// fashion. Transitions are ordered lexicographically by input range.
	/// As such, this may only be used when every transition has equal
	/// priority. (In practice, this is only used for encoding large UTF-8
	/// automata.)
	Sparse { ranges: Box<[Transition]> },
	/// An alternation such that there exists an epsilon transition to all
	/// states in `alternates`, where matches found via earlier transitions
	/// are preferred over later transitions.
	Union { alternates: Box<[StateID]> },
	/// A fail state. When encountered, the automaton is guaranteed to never
	/// reach a match state.
	Fail,
	/// A match state. There is exactly one such occurrence of this state in
	/// an NFA.
	Match,
	}

	/// A transition to another state, only if the given byte falls in the
	/// inclusive range specified.
	#[derive(Clone, Copy, Eq, Hash, PartialEq)]
	pub struct Transition {
	pub start: u8,
	pub end: u8,
	pub next: StateID,
	}

	impl State {
	/// Returns true if and only if this state contains one or more epsilon
	/// transitions.
	pub fn is_epsilon(&self) -> bool {
	match *self {
	State::Range { .. }
	\| State::Sparse { .. }
	\| State::Fail
	\| State::Match => false,
	State::Union { .. } => true,
	}
	}

	/// Remap the transitions in this state using the given map. Namely, the
	/// given map should be indexed according to the transitions currently
	/// in this state.
	///
	/// This is used during the final phase of the NFA compiler, which turns
	/// its intermediate NFA into the final NFA.
	fn remap(&mut self, remap: &[StateID]) {
	match *self {
	State::Range { ref mut range } => range.next = remap[range.next],
	State::Sparse { ref mut ranges } => {
	for r in ranges.iter_mut() {
	r.next = remap[r.next];
	}
	}
	State::Union { ref mut alternates } => {
	for alt in alternates.iter_mut() {
	alt = remap[alt];
	}
	}
	State::Fail => {}
	State::Match => {}
	}
	}
	}

	impl fmt::Debug for State {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	match *self {
	State::Range { ref range } => range.fmt(f),
	State::Sparse { ref ranges } => {
	let rs = ranges
	.iter()
	.map(\|t\| format!("{:?}", t))
	.collect::<Vec<String>>()
	.join(", ");
	write!(f, "sparse({})", rs)
	}
	State::Union { ref alternates } => {
	let alts = alternates
	.iter()
	.map(\|id\| format!("{}", id))
	.collect::<Vec<String>>()
	.join(", ");
	write!(f, "alt({})", alts)
	}
	State::Fail => write!(f, "FAIL"),
	State::Match => write!(f, "MATCH"),
	}
	}
	}

	impl fmt::Debug for Transition {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	let Transition { start, end, next } = *self;
	if self.start == self.end {
	write!(f, "{} => {}", escape(start), next)
	} else {
	write!(f, "{}-{} => {}", escape(start), escape(end), next)
	}
	}
	}

	/// Return the given byte as its escaped string form.
	fn escape(b: u8) -> String {
	use std::ascii;

	String::from_utf8(ascii::escape_default(b).collect::<Vec<_>>()).unwrap()
	}

	#[cfg(test)]
	mod tests {
	use super::*;
	use dense;
	use dfa::DFA;

	#[test]
	fn always_match() {
	let nfa = NFA::always_match();
	let dfa = dense::Builder::new().build_from_nfa::<usize>(&nfa).unwrap();

	assert_eq!(Some(0), dfa.find_at(b"", 0));
	assert_eq!(Some(0), dfa.find_at(b"a", 0));
	assert_eq!(Some(1), dfa.find_at(b"a", 1));
	assert_eq!(Some(0), dfa.find_at(b"ab", 0));
	assert_eq!(Some(1), dfa.find_at(b"ab", 1));
	assert_eq!(Some(2), dfa.find_at(b"ab", 2));
	}

	#[test]
	fn never_match() {
	let nfa = NFA::never_match();
	let dfa = dense::Builder::new().build_from_nfa::<usize>(&nfa).unwrap();

	assert_eq!(None, dfa.find_at(b"", 0));
	assert_eq!(None, dfa.find_at(b"a", 0));
	assert_eq!(None, dfa.find_at(b"a", 1));
	assert_eq!(None, dfa.find_at(b"ab", 0));
	assert_eq!(None, dfa.find_at(b"ab", 1));
	assert_eq!(None, dfa.find_at(b"ab", 2));
	}
	}