third_party/rust_crates/ask2patch/aho-corasick/src/automaton.rs - fuchsia - Git at Google

 use crate::ahocorasick::MatchKind;
 use crate::prefilter::{self, Candidate, Prefilter, PrefilterState};
 use crate::state_id::{dead_id, fail_id, StateID};
 use crate::Match;

 // NOTE: This trait essentially started as a copy of the same trait from from
 // regex-automata, with some wording changed since we use this trait for
 // NFAs in addition to DFAs in this crate. Additionally, we do not export
 // this trait. It's only used internally to reduce code duplication. The
 // regex-automata crate needs to expose it because its Regex type is generic
 // over implementations of this trait. In this crate, we encapsulate everything
 // behind the AhoCorasick type.
 //
 // This trait is a bit of a mess, but it's not quite clear how to fix it.
 // Basically, there are several competing concerns:
 //
 // * We need performance, so everything effectively needs to get monomorphized.
 // * There are several variations on searching Aho-Corasick automatons:
 //   overlapping, standard and leftmost. Overlapping and standard are somewhat
 //   combined together below, but there is no real way to combine standard with
 //   leftmost. Namely, leftmost requires continuing a search even after a match
 //   is found, in order to correctly disambiguate a match.
 // * On top of that, *sometimes* callers want to know which state the automaton
 //   is in after searching. This is principally useful for overlapping and
 //   stream searches. However, when callers don't care about this, we really
 //   do not want to be forced to compute it, since it sometimes requires extra
 //   work. Thus, there are effectively two copies of leftmost searching: one
 //   for tracking the state ID and one that doesn't. We should ideally do the
 //   same for standard searching, but my sanity stopped me.

 // SAFETY RATIONALE: Previously, the code below went to some length to remove
 // all bounds checks. This generally produced tighter assembly and lead to
 // 20-50% improvements in micro-benchmarks on corpora made up of random
 // characters. This somewhat makes sense, since the branch predictor is going
 // to be at its worse on random text.
 //
 // However, using the aho-corasick-debug tool and manually benchmarking
 // different inputs, the code *with* bounds checks actually wound up being
 // slightly faster:
 //
 //     $ cat input
 //     Sherlock Holmes
 //     John Watson
 //     Professor Moriarty
 //     Irene Adler
 //     Mary Watson
 //
 //     $ aho-corasick-debug-safe \
 //         input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa
 //     pattern read time: 32.824µs
 //     automaton build time: 444.687µs
 //     automaton heap usage: 72392 bytes
 //     match count: 639
 //     count time: 1.809961702s
 //
 //     $ aho-corasick-debug-master \
 //         input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa
 //     pattern read time: 31.425µs
 //     automaton build time: 317.434µs
 //     automaton heap usage: 72392 bytes
 //     match count: 639
 //     count time: 2.059157705s
 //
 // I was able to reproduce this result on two different machines (an i5 and
 // an i7). Therefore, we go the route of safe code for now.

 /// A trait describing the interface of an Aho-Corasick finite state machine.
 ///
 /// Every automaton has exactly one fail state, one dead state and exactly one
 /// start state. Generally, these correspond to the first, second and third
 /// states, respectively. The failure state is always treated as a sentinel.
 /// That is, no correct Aho-Corasick automaton will ever transition into the
 /// fail state. The dead state, however, can be transitioned into, but only
 /// when leftmost-first or leftmost-longest match semantics are enabled and
 /// only when at least one match has been observed.
 ///
 /// Every automaton also has one or more match states, such that
 /// `Automaton::is_match_state(id)` returns `true` if and only if `id`
 /// corresponds to a match state.
 pub trait Automaton {
     /// The representation used for state identifiers in this automaton.
     ///
     /// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`.
     type ID: StateID;

     /// The type of matching that should be done.
     fn match_kind(&self) -> &MatchKind;

     /// Returns true if and only if this automaton uses anchored searches.
     fn anchored(&self) -> bool;

     /// An optional prefilter for quickly skipping to the next candidate match.
     /// A prefilter must report at least every match, although it may report
     /// positions that do not correspond to a match. That is, it must not allow
     /// false negatives, but can allow false positives.
     ///
     /// Currently, a prefilter only runs when the automaton is in the start
     /// state. That is, the position reported by a prefilter should always
     /// correspond to the start of a potential match.
     fn prefilter(&self) -> Option<&dyn Prefilter>;

     /// Return the identifier of this automaton's start state.
     fn start_state(&self) -> Self::ID;

     /// Returns true if and only if the given state identifier refers to a
     /// valid state.
     fn is_valid(&self, id: Self::ID) -> bool;

     /// Returns true if and only if the given identifier corresponds to a match
     /// state.
     ///
     /// The state ID given must be valid, or else implementors may panic.
     fn is_match_state(&self, id: Self::ID) -> bool;

     /// Returns true if and only if the given identifier corresponds to a state
     /// that is either the dead state or a match state.
     ///
     /// Depending on the implementation of the automaton, this routine can
     /// be used to save a branch in the core matching loop. Nevertheless,
     /// `is_match_state(id) || id == dead_id()` is always a valid
     /// implementation. Indeed, this is the default implementation.
     ///
     /// The state ID given must be valid, or else implementors may panic.
     fn is_match_or_dead_state(&self, id: Self::ID) -> bool {
         id == dead_id() || self.is_match_state(id)
     }

     /// If the given state is a match state, return the match corresponding
     /// to the given match index. `end` must be the ending position of the
     /// detected match. If no match exists or if `match_index` exceeds the
     /// number of matches in this state, then `None` is returned.
     ///
     /// The state ID given must be valid, or else implementors may panic.
     ///
     /// If the given state ID is correct and if the `match_index` is less than
     /// the number of matches for that state, then this is guaranteed to return
     /// a match.
     fn get_match(
         &self,
         id: Self::ID,
         match_index: usize,
         end: usize,
     ) -> Option<Match>;

     /// Returns the number of matches for the given state. If the given state
     /// is not a match state, then this returns 0.
     ///
     /// The state ID given must be valid, or else implementors must panic.
     fn match_count(&self, id: Self::ID) -> usize;

     /// Given the current state that this automaton is in and the next input
     /// byte, this method returns the identifier of the next state. The
     /// identifier returned must always be valid and may never correspond to
     /// the fail state. The returned identifier may, however, point to the
     /// dead state.
     ///
     /// This is not safe so that implementors may look up the next state
     /// without memory safety checks such as bounds checks. As such, callers
     /// must ensure that the given identifier corresponds to a valid automaton
     /// state. Implementors must, in turn, ensure that this routine is safe for
     /// all valid state identifiers and for all possible `u8` values.
     fn next_state(&self, current: Self::ID, input: u8) -> Self::ID;

     /// Like next_state, but debug_asserts that the underlying
     /// implementation never returns a `fail_id()` for the next state.
     fn next_state_no_fail(&self, current: Self::ID, input: u8) -> Self::ID {
         let next = self.next_state(current, input);
         // We should never see a transition to the failure state.
         debug_assert!(
             next != fail_id(),
             "automaton should never return fail_id for next state"
         );
         next
     }

     /// Execute a search using standard match semantics.
     ///
     /// This can be used even when the automaton was constructed with leftmost
     /// match semantics when you want to find the earliest possible match. This
     /// can also be used as part of an overlapping search implementation.
     ///
     /// N.B. This does not report a match if `state_id` is given as a matching
     /// state. As such, this should not be used directly.
     #[inline(always)]
     fn standard_find_at(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
         state_id: &mut Self::ID,
     ) -> Option<Match> {
         if let Some(pre) = self.prefilter() {
             self.standard_find_at_imp(
                 prestate,
                 Some(pre),
                 haystack,
                 at,
                 state_id,
             )
         } else {
             self.standard_find_at_imp(prestate, None, haystack, at, state_id)
         }
     }

     // It's important for this to always be inlined. Namely, its only caller
     // is standard_find_at, and the inlining should remove the case analysis
     // for prefilter scanning when there is no prefilter available.
     #[inline(always)]
     fn standard_find_at_imp(
         &self,
         prestate: &mut PrefilterState,
         prefilter: Option<&dyn Prefilter>,
         haystack: &[u8],
         mut at: usize,
         state_id: &mut Self::ID,
     ) -> Option<Match> {
         while at < haystack.len() {
             if let Some(pre) = prefilter {
                 if prestate.is_effective(at) && *state_id == self.start_state()
                 {
                     let c = prefilter::next(prestate, pre, haystack, at)
                         .into_option();
                     match c {
                         None => return None,
                         Some(i) => {
                             at = i;
                         }
                     }
                 }
             }
             // CORRECTNESS: next_state is correct for all possible u8 values,
             // so the only thing we're concerned about is the validity of
             // `state_id`. `state_id` either comes from the caller (in which
             // case, we assume it is correct), or it comes from the return
             // value of next_state, which is guaranteed to be correct.
             *state_id = self.next_state_no_fail(*state_id, haystack[at]);
             at += 1;
             // This routine always quits immediately after seeing a
             // match, and since dead states can only come after seeing
             // a match, seeing a dead state here is impossible. (Unless
             // we have an anchored automaton, in which case, dead states
             // are used to stop a search.)
             debug_assert!(
                 *state_id != dead_id() || self.anchored(),
                 "standard find should never see a dead state"
             );

             if self.is_match_or_dead_state(*state_id) {
                 return if *state_id == dead_id() {
                     None
                 } else {
                     self.get_match(*state_id, 0, at)
                 };
             }
         }
         None
     }

     /// Execute a search using leftmost (either first or longest) match
     /// semantics.
     ///
     /// The principle difference between searching with standard semantics and
     /// searching with leftmost semantics is that leftmost searching will
     /// continue searching even after a match has been found. Once a match
     /// is found, the search does not stop until either the haystack has been
     /// exhausted or a dead state is observed in the automaton. (Dead states
     /// only exist in automatons constructed with leftmost semantics.) That is,
     /// we rely on the construction of the automaton to tell us when to quit.
     #[inline(never)]
     fn leftmost_find_at(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
         state_id: &mut Self::ID,
     ) -> Option<Match> {
         if let Some(pre) = self.prefilter() {
             self.leftmost_find_at_imp(
                 prestate,
                 Some(pre),
                 haystack,
                 at,
                 state_id,
             )
         } else {
             self.leftmost_find_at_imp(prestate, None, haystack, at, state_id)
         }
     }

     // It's important for this to always be inlined. Namely, its only caller
     // is leftmost_find_at, and the inlining should remove the case analysis
     // for prefilter scanning when there is no prefilter available.
     #[inline(always)]
     fn leftmost_find_at_imp(
         &self,
         prestate: &mut PrefilterState,
         prefilter: Option<&dyn Prefilter>,
         haystack: &[u8],
         mut at: usize,
         state_id: &mut Self::ID,
     ) -> Option<Match> {
         debug_assert!(self.match_kind().is_leftmost());
         if self.anchored() && at > 0 && *state_id == self.start_state() {
             return None;
         }
         let mut last_match = self.get_match(*state_id, 0, at);
         while at < haystack.len() {
             if let Some(pre) = prefilter {
                 if prestate.is_effective(at) && *state_id == self.start_state()
                 {
                     let c = prefilter::next(prestate, pre, haystack, at)
                         .into_option();
                     match c {
                         None => return None,
                         Some(i) => {
                             at = i;
                         }
                     }
                 }
             }
             // CORRECTNESS: next_state is correct for all possible u8 values,
             // so the only thing we're concerned about is the validity of
             // `state_id`. `state_id` either comes from the caller (in which
             // case, we assume it is correct), or it comes from the return
             // value of next_state, which is guaranteed to be correct.
             *state_id = self.next_state_no_fail(*state_id, haystack[at]);
             at += 1;
             if self.is_match_or_dead_state(*state_id) {
                 if *state_id == dead_id() {
                     // The only way to enter into a dead state is if a match
                     // has been found, so we assert as much. This is different
                     // from normal automata, where you might enter a dead state
                     // if you know a subsequent match will never be found
                     // (regardless of whether a match has already been found).
                     // For Aho-Corasick, it is built so that we can match at
                     // any position, so the possibility of a match always
                     // exists.
                     //
                     // (Unless we have an anchored automaton, in which case,
                     // dead states are used to stop a search.)
                     debug_assert!(
                         last_match.is_some() || self.anchored(),
                         "failure state should only be seen after match"
                     );
                     return last_match;
                 }
                 last_match = self.get_match(*state_id, 0, at);
             }
         }
         last_match
     }

     /// This is like leftmost_find_at, but does not need to track a caller
     /// provided state id. In other words, the only output of this routine is a
     /// match, if one exists.
     ///
     /// It is regrettable that we need to effectively copy a chunk of
     /// implementation twice, but when we don't need to track the state ID, we
     /// can allow the prefilter to report matches immediately without having
     /// to re-confirm them with the automaton. The re-confirmation step is
     /// necessary in leftmost_find_at because tracing through the automaton is
     /// the only way to correctly set the state ID. (Perhaps an alternative
     /// would be to keep a map from pattern ID to matching state ID, but that
     /// complicates the code and still doesn't permit us to defer to the
     /// prefilter entirely when possible.)
     ///
     /// I did try a few things to avoid the code duplication here, but nothing
     /// optimized as well as this approach. (In microbenchmarks, there was
     /// about a 25% difference.)
     #[inline(never)]
     fn leftmost_find_at_no_state(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
     ) -> Option<Match> {
         if let Some(pre) = self.prefilter() {
             self.leftmost_find_at_no_state_imp(
                 prestate,
                 Some(pre),
                 haystack,
                 at,
             )
         } else {
             self.leftmost_find_at_no_state_imp(prestate, None, haystack, at)
         }
     }

     // It's important for this to always be inlined. Namely, its only caller
     // is leftmost_find_at_no_state, and the inlining should remove the case
     // analysis for prefilter scanning when there is no prefilter available.
     #[inline(always)]
     fn leftmost_find_at_no_state_imp(
         &self,
         prestate: &mut PrefilterState,
         prefilter: Option<&dyn Prefilter>,
         haystack: &[u8],
         mut at: usize,
     ) -> Option<Match> {
         debug_assert!(self.match_kind().is_leftmost());
         if self.anchored() && at > 0 {
             return None;
         }
         // If our prefilter handles confirmation of matches 100% of the
         // time, and since we don't need to track state IDs, we can avoid
         // Aho-Corasick completely.
         if let Some(pre) = prefilter {
             // We should never have a prefilter during an anchored search.
             debug_assert!(!self.anchored());
             if !pre.reports_false_positives() {
                 return match pre.next_candidate(prestate, haystack, at) {
                     Candidate::None => None,
                     Candidate::Match(m) => Some(m),
                     Candidate::PossibleStartOfMatch(_) => unreachable!(),
                 };
             }
         }

         let mut state_id = self.start_state();
         let mut last_match = self.get_match(state_id, 0, at);
         while at < haystack.len() {
             if let Some(pre) = prefilter {
                 if prestate.is_effective(at) && state_id == self.start_state()
                 {
                     match prefilter::next(prestate, pre, haystack, at) {
                         Candidate::None => return None,
                         // Since we aren't tracking a state ID, we can
                         // quit early once we know we have a match.
                         Candidate::Match(m) => return Some(m),
                         Candidate::PossibleStartOfMatch(i) => {
                             at = i;
                         }
                     }
                 }
             }
             // CORRECTNESS: next_state is correct for all possible u8 values,
             // so the only thing we're concerned about is the validity of
             // `state_id`. `state_id` either comes from the caller (in which
             // case, we assume it is correct), or it comes from the return
             // value of next_state, which is guaranteed to be correct.
             state_id = self.next_state_no_fail(state_id, haystack[at]);
             at += 1;
             if self.is_match_or_dead_state(state_id) {
                 if state_id == dead_id() {
                     // The only way to enter into a dead state is if a
                     // match has been found, so we assert as much. This
                     // is different from normal automata, where you might
                     // enter a dead state if you know a subsequent match
                     // will never be found (regardless of whether a match
                     // has already been found). For Aho-Corasick, it is
                     // built so that we can match at any position, so the
                     // possibility of a match always exists.
                     //
                     // (Unless we have an anchored automaton, in which
                     // case, dead states are used to stop a search.)
                     debug_assert!(
                         last_match.is_some() || self.anchored(),
                         "failure state should only be seen after match"
                     );
                     return last_match;
                 }
                 last_match = self.get_match(state_id, 0, at);
             }
         }
         last_match
     }

     /// Execute an overlapping search.
     ///
     /// When executing an overlapping match, the previous state ID in addition
     /// to the previous match index should be given. If there are more matches
     /// at the given state, then the match is reported and the given index is
     /// incremented.
     #[inline(always)]
     fn overlapping_find_at(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
         state_id: &mut Self::ID,
         match_index: &mut usize,
     ) -> Option<Match> {
         if self.anchored() && at > 0 && *state_id == self.start_state() {
             return None;
         }

         let match_count = self.match_count(*state_id);
         if *match_index < match_count {
             // This is guaranteed to return a match since
             // match_index < match_count.
             let result = self.get_match(*state_id, *match_index, at);
             debug_assert!(result.is_some(), "must be a match");
             *match_index += 1;
             return result;
         }

         *match_index = 0;
         match self.standard_find_at(prestate, haystack, at, state_id) {
             None => None,
             Some(m) => {
                 *match_index = 1;
                 Some(m)
             }
         }
     }

     /// Return the earliest match found. This returns as soon as we know that
     /// we have a match. As such, this does not necessarily correspond to the
     /// leftmost starting match, but rather, the leftmost position at which a
     /// match ends.
     #[inline(always)]
     fn earliest_find_at(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
         state_id: &mut Self::ID,
     ) -> Option<Match> {
         if *state_id == self.start_state() {
             if self.anchored() && at > 0 {
                 return None;
             }
             if let Some(m) = self.get_match(*state_id, 0, at) {
                 return Some(m);
             }
         }
         self.standard_find_at(prestate, haystack, at, state_id)
     }

     /// A convenience function for finding the next match according to the
     /// match semantics of this automaton. For standard match semantics, this
     /// finds the earliest match. Otherwise, the leftmost match is found.
     #[inline(always)]
     fn find_at(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
         state_id: &mut Self::ID,
     ) -> Option<Match> {
         match *self.match_kind() {
             MatchKind::Standard => {
                 self.earliest_find_at(prestate, haystack, at, state_id)
             }
             MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => {
                 self.leftmost_find_at(prestate, haystack, at, state_id)
             }
             MatchKind::__Nonexhaustive => unreachable!(),
         }
     }

     /// Like find_at, but does not track state identifiers. This permits some
     /// optimizations when a prefilter that confirms its own matches is
     /// present.
     #[inline(always)]
     fn find_at_no_state(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         at: usize,
     ) -> Option<Match> {
         match *self.match_kind() {
             MatchKind::Standard => {
                 let mut state = self.start_state();
                 self.earliest_find_at(prestate, haystack, at, &mut state)
             }
             MatchKind::LeftmostFirst | MatchKind::LeftmostLongest => {
                 self.leftmost_find_at_no_state(prestate, haystack, at)
             }
             MatchKind::__Nonexhaustive => unreachable!(),
         }
     }
 }
	use crate::ahocorasick::MatchKind;
	use crate::prefilter::{self, Candidate, Prefilter, PrefilterState};
	use crate::state_id::{dead_id, fail_id, StateID};
	use crate::Match;

	// NOTE: This trait essentially started as a copy of the same trait from from
	// regex-automata, with some wording changed since we use this trait for
	// NFAs in addition to DFAs in this crate. Additionally, we do not export
	// this trait. It's only used internally to reduce code duplication. The
	// regex-automata crate needs to expose it because its Regex type is generic
	// over implementations of this trait. In this crate, we encapsulate everything
	// behind the AhoCorasick type.
	//
	// This trait is a bit of a mess, but it's not quite clear how to fix it.
	// Basically, there are several competing concerns:
	//
	// * We need performance, so everything effectively needs to get monomorphized.
	// * There are several variations on searching Aho-Corasick automatons:
	// overlapping, standard and leftmost. Overlapping and standard are somewhat
	// combined together below, but there is no real way to combine standard with
	// leftmost. Namely, leftmost requires continuing a search even after a match
	// is found, in order to correctly disambiguate a match.
	// * On top of that, sometimes callers want to know which state the automaton
	// is in after searching. This is principally useful for overlapping and
	// stream searches. However, when callers don't care about this, we really
	// do not want to be forced to compute it, since it sometimes requires extra
	// work. Thus, there are effectively two copies of leftmost searching: one
	// for tracking the state ID and one that doesn't. We should ideally do the
	// same for standard searching, but my sanity stopped me.

	// SAFETY RATIONALE: Previously, the code below went to some length to remove
	// all bounds checks. This generally produced tighter assembly and lead to
	// 20-50% improvements in micro-benchmarks on corpora made up of random
	// characters. This somewhat makes sense, since the branch predictor is going
	// to be at its worse on random text.
	//
	// However, using the aho-corasick-debug tool and manually benchmarking
	// different inputs, the code with bounds checks actually wound up being
	// slightly faster:
	//
	// $ cat input
	// Sherlock Holmes
	// John Watson
	// Professor Moriarty
	// Irene Adler
	// Mary Watson
	//
	// $ aho-corasick-debug-safe \
	// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa
	// pattern read time: 32.824µs
	// automaton build time: 444.687µs
	// automaton heap usage: 72392 bytes
	// match count: 639
	// count time: 1.809961702s
	//
	// $ aho-corasick-debug-master \
	// input OpenSubtitles2018.raw.sample.en --kind leftmost-first --dfa
	// pattern read time: 31.425µs
	// automaton build time: 317.434µs
	// automaton heap usage: 72392 bytes
	// match count: 639
	// count time: 2.059157705s
	//
	// I was able to reproduce this result on two different machines (an i5 and
	// an i7). Therefore, we go the route of safe code for now.

	/// A trait describing the interface of an Aho-Corasick finite state machine.
	///
	/// Every automaton has exactly one fail state, one dead state and exactly one
	/// start state. Generally, these correspond to the first, second and third
	/// states, respectively. The failure state is always treated as a sentinel.
	/// That is, no correct Aho-Corasick automaton will ever transition into the
	/// fail state. The dead state, however, can be transitioned into, but only
	/// when leftmost-first or leftmost-longest match semantics are enabled and
	/// only when at least one match has been observed.
	///
	/// Every automaton also has one or more match states, such that
	/// `Automaton::is_match_state(id)` returns `true` if and only if `id`
	/// corresponds to a match state.
	pub trait Automaton {
	/// The representation used for state identifiers in this automaton.
	///
	/// Typically, this is one of `u8`, `u16`, `u32`, `u64` or `usize`.
	type ID: StateID;

	/// The type of matching that should be done.
	fn match_kind(&self) -> &MatchKind;

	/// Returns true if and only if this automaton uses anchored searches.
	fn anchored(&self) -> bool;

	/// An optional prefilter for quickly skipping to the next candidate match.
	/// A prefilter must report at least every match, although it may report
	/// positions that do not correspond to a match. That is, it must not allow
	/// false negatives, but can allow false positives.
	///
	/// Currently, a prefilter only runs when the automaton is in the start
	/// state. That is, the position reported by a prefilter should always
	/// correspond to the start of a potential match.
	fn prefilter(&self) -> Option<&dyn Prefilter>;

	/// Return the identifier of this automaton's start state.
	fn start_state(&self) -> Self::ID;

	/// Returns true if and only if the given state identifier refers to a
	/// valid state.
	fn is_valid(&self, id: Self::ID) -> bool;

	/// Returns true if and only if the given identifier corresponds to a match
	/// state.
	///
	/// The state ID given must be valid, or else implementors may panic.
	fn is_match_state(&self, id: Self::ID) -> bool;

	/// Returns true if and only if the given identifier corresponds to a state
	/// that is either the dead state or a match state.
	///
	/// Depending on the implementation of the automaton, this routine can
	/// be used to save a branch in the core matching loop. Nevertheless,
	/// `is_match_state(id) \|\| id == dead_id()` is always a valid
	/// implementation. Indeed, this is the default implementation.
	///
	/// The state ID given must be valid, or else implementors may panic.
	fn is_match_or_dead_state(&self, id: Self::ID) -> bool {
	id == dead_id() \|\| self.is_match_state(id)
	}

	/// If the given state is a match state, return the match corresponding
	/// to the given match index. `end` must be the ending position of the
	/// detected match. If no match exists or if `match_index` exceeds the
	/// number of matches in this state, then `None` is returned.
	///
	/// The state ID given must be valid, or else implementors may panic.
	///
	/// If the given state ID is correct and if the `match_index` is less than
	/// the number of matches for that state, then this is guaranteed to return
	/// a match.
	fn get_match(
	&self,
	id: Self::ID,
	match_index: usize,
	end: usize,
	) -> Option<Match>;

	/// Returns the number of matches for the given state. If the given state
	/// is not a match state, then this returns 0.
	///
	/// The state ID given must be valid, or else implementors must panic.
	fn match_count(&self, id: Self::ID) -> usize;

	/// Given the current state that this automaton is in and the next input
	/// byte, this method returns the identifier of the next state. The
	/// identifier returned must always be valid and may never correspond to
	/// the fail state. The returned identifier may, however, point to the
	/// dead state.
	///
	/// This is not safe so that implementors may look up the next state
	/// without memory safety checks such as bounds checks. As such, callers
	/// must ensure that the given identifier corresponds to a valid automaton
	/// state. Implementors must, in turn, ensure that this routine is safe for
	/// all valid state identifiers and for all possible `u8` values.
	fn next_state(&self, current: Self::ID, input: u8) -> Self::ID;

	/// Like next_state, but debug_asserts that the underlying
	/// implementation never returns a `fail_id()` for the next state.
	fn next_state_no_fail(&self, current: Self::ID, input: u8) -> Self::ID {
	let next = self.next_state(current, input);
	// We should never see a transition to the failure state.
	debug_assert!(
	next != fail_id(),
	"automaton should never return fail_id for next state"
	);
	next
	}

	/// Execute a search using standard match semantics.
	///
	/// This can be used even when the automaton was constructed with leftmost
	/// match semantics when you want to find the earliest possible match. This
	/// can also be used as part of an overlapping search implementation.
	///
	/// N.B. This does not report a match if `state_id` is given as a matching
	/// state. As such, this should not be used directly.
	#[inline(always)]
	fn standard_find_at(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	state_id: &mut Self::ID,
	) -> Option<Match> {
	if let Some(pre) = self.prefilter() {
	self.standard_find_at_imp(
	prestate,
	Some(pre),
	haystack,
	at,
	state_id,
	)
	} else {
	self.standard_find_at_imp(prestate, None, haystack, at, state_id)
	}
	}

	// It's important for this to always be inlined. Namely, its only caller
	// is standard_find_at, and the inlining should remove the case analysis
	// for prefilter scanning when there is no prefilter available.
	#[inline(always)]
	fn standard_find_at_imp(
	&self,
	prestate: &mut PrefilterState,
	prefilter: Option<&dyn Prefilter>,
	haystack: &[u8],
	mut at: usize,
	state_id: &mut Self::ID,
	) -> Option<Match> {
	while at < haystack.len() {
	if let Some(pre) = prefilter {
	if prestate.is_effective(at) && *state_id == self.start_state()
	{
	let c = prefilter::next(prestate, pre, haystack, at)
	.into_option();
	match c {
	None => return None,
	Some(i) => {
	at = i;
	}
	}
	}
	}
	// CORRECTNESS: next_state is correct for all possible u8 values,
	// so the only thing we're concerned about is the validity of
	// `state_id`. `state_id` either comes from the caller (in which
	// case, we assume it is correct), or it comes from the return
	// value of next_state, which is guaranteed to be correct.
	state_id = self.next_state_no_fail(state_id, haystack[at]);
	at += 1;
	// This routine always quits immediately after seeing a
	// match, and since dead states can only come after seeing
	// a match, seeing a dead state here is impossible. (Unless
	// we have an anchored automaton, in which case, dead states
	// are used to stop a search.)
	debug_assert!(
	*state_id != dead_id() \|\| self.anchored(),
	"standard find should never see a dead state"
	);

	if self.is_match_or_dead_state(*state_id) {
	return if *state_id == dead_id() {
	None
	} else {
	self.get_match(*state_id, 0, at)
	};
	}
	}
	None
	}

	/// Execute a search using leftmost (either first or longest) match
	/// semantics.
	///
	/// The principle difference between searching with standard semantics and
	/// searching with leftmost semantics is that leftmost searching will
	/// continue searching even after a match has been found. Once a match
	/// is found, the search does not stop until either the haystack has been
	/// exhausted or a dead state is observed in the automaton. (Dead states
	/// only exist in automatons constructed with leftmost semantics.) That is,
	/// we rely on the construction of the automaton to tell us when to quit.
	#[inline(never)]
	fn leftmost_find_at(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	state_id: &mut Self::ID,
	) -> Option<Match> {
	if let Some(pre) = self.prefilter() {
	self.leftmost_find_at_imp(
	prestate,
	Some(pre),
	haystack,
	at,
	state_id,
	)
	} else {
	self.leftmost_find_at_imp(prestate, None, haystack, at, state_id)
	}
	}

	// It's important for this to always be inlined. Namely, its only caller
	// is leftmost_find_at, and the inlining should remove the case analysis
	// for prefilter scanning when there is no prefilter available.
	#[inline(always)]
	fn leftmost_find_at_imp(
	&self,
	prestate: &mut PrefilterState,
	prefilter: Option<&dyn Prefilter>,
	haystack: &[u8],
	mut at: usize,
	state_id: &mut Self::ID,
	) -> Option<Match> {
	debug_assert!(self.match_kind().is_leftmost());
	if self.anchored() && at > 0 && *state_id == self.start_state() {
	return None;
	}
	let mut last_match = self.get_match(*state_id, 0, at);
	while at < haystack.len() {
	if let Some(pre) = prefilter {
	if prestate.is_effective(at) && *state_id == self.start_state()
	{
	let c = prefilter::next(prestate, pre, haystack, at)
	.into_option();
	match c {
	None => return None,
	Some(i) => {
	at = i;
	}
	}
	}
	}
	// CORRECTNESS: next_state is correct for all possible u8 values,
	// so the only thing we're concerned about is the validity of
	// `state_id`. `state_id` either comes from the caller (in which
	// case, we assume it is correct), or it comes from the return
	// value of next_state, which is guaranteed to be correct.
	state_id = self.next_state_no_fail(state_id, haystack[at]);
	at += 1;
	if self.is_match_or_dead_state(*state_id) {
	if *state_id == dead_id() {
	// The only way to enter into a dead state is if a match
	// has been found, so we assert as much. This is different
	// from normal automata, where you might enter a dead state
	// if you know a subsequent match will never be found
	// (regardless of whether a match has already been found).
	// For Aho-Corasick, it is built so that we can match at
	// any position, so the possibility of a match always
	// exists.
	//
	// (Unless we have an anchored automaton, in which case,
	// dead states are used to stop a search.)
	debug_assert!(
	last_match.is_some() \|\| self.anchored(),
	"failure state should only be seen after match"
	);
	return last_match;
	}
	last_match = self.get_match(*state_id, 0, at);
	}
	}
	last_match
	}

	/// This is like leftmost_find_at, but does not need to track a caller
	/// provided state id. In other words, the only output of this routine is a
	/// match, if one exists.
	///
	/// It is regrettable that we need to effectively copy a chunk of
	/// implementation twice, but when we don't need to track the state ID, we
	/// can allow the prefilter to report matches immediately without having
	/// to re-confirm them with the automaton. The re-confirmation step is
	/// necessary in leftmost_find_at because tracing through the automaton is
	/// the only way to correctly set the state ID. (Perhaps an alternative
	/// would be to keep a map from pattern ID to matching state ID, but that
	/// complicates the code and still doesn't permit us to defer to the
	/// prefilter entirely when possible.)
	///
	/// I did try a few things to avoid the code duplication here, but nothing
	/// optimized as well as this approach. (In microbenchmarks, there was
	/// about a 25% difference.)
	#[inline(never)]
	fn leftmost_find_at_no_state(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	) -> Option<Match> {
	if let Some(pre) = self.prefilter() {
	self.leftmost_find_at_no_state_imp(
	prestate,
	Some(pre),
	haystack,
	at,
	)
	} else {
	self.leftmost_find_at_no_state_imp(prestate, None, haystack, at)
	}
	}

	// It's important for this to always be inlined. Namely, its only caller
	// is leftmost_find_at_no_state, and the inlining should remove the case
	// analysis for prefilter scanning when there is no prefilter available.
	#[inline(always)]
	fn leftmost_find_at_no_state_imp(
	&self,
	prestate: &mut PrefilterState,
	prefilter: Option<&dyn Prefilter>,
	haystack: &[u8],
	mut at: usize,
	) -> Option<Match> {
	debug_assert!(self.match_kind().is_leftmost());
	if self.anchored() && at > 0 {
	return None;
	}
	// If our prefilter handles confirmation of matches 100% of the
	// time, and since we don't need to track state IDs, we can avoid
	// Aho-Corasick completely.
	if let Some(pre) = prefilter {
	// We should never have a prefilter during an anchored search.
	debug_assert!(!self.anchored());
	if !pre.reports_false_positives() {
	return match pre.next_candidate(prestate, haystack, at) {
	Candidate::None => None,
	Candidate::Match(m) => Some(m),
	Candidate::PossibleStartOfMatch(_) => unreachable!(),
	};
	}
	}

	let mut state_id = self.start_state();
	let mut last_match = self.get_match(state_id, 0, at);
	while at < haystack.len() {
	if let Some(pre) = prefilter {
	if prestate.is_effective(at) && state_id == self.start_state()
	{
	match prefilter::next(prestate, pre, haystack, at) {
	Candidate::None => return None,
	// Since we aren't tracking a state ID, we can
	// quit early once we know we have a match.
	Candidate::Match(m) => return Some(m),
	Candidate::PossibleStartOfMatch(i) => {
	at = i;
	}
	}
	}
	}
	// CORRECTNESS: next_state is correct for all possible u8 values,
	// so the only thing we're concerned about is the validity of
	// `state_id`. `state_id` either comes from the caller (in which
	// case, we assume it is correct), or it comes from the return
	// value of next_state, which is guaranteed to be correct.
	state_id = self.next_state_no_fail(state_id, haystack[at]);
	at += 1;
	if self.is_match_or_dead_state(state_id) {
	if state_id == dead_id() {
	// The only way to enter into a dead state is if a
	// match has been found, so we assert as much. This
	// is different from normal automata, where you might
	// enter a dead state if you know a subsequent match
	// will never be found (regardless of whether a match
	// has already been found). For Aho-Corasick, it is
	// built so that we can match at any position, so the
	// possibility of a match always exists.
	//
	// (Unless we have an anchored automaton, in which
	// case, dead states are used to stop a search.)
	debug_assert!(
	last_match.is_some() \|\| self.anchored(),
	"failure state should only be seen after match"
	);
	return last_match;
	}
	last_match = self.get_match(state_id, 0, at);
	}
	}
	last_match
	}

	/// Execute an overlapping search.
	///
	/// When executing an overlapping match, the previous state ID in addition
	/// to the previous match index should be given. If there are more matches
	/// at the given state, then the match is reported and the given index is
	/// incremented.
	#[inline(always)]
	fn overlapping_find_at(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	state_id: &mut Self::ID,
	match_index: &mut usize,
	) -> Option<Match> {
	if self.anchored() && at > 0 && *state_id == self.start_state() {
	return None;
	}

	let match_count = self.match_count(*state_id);
	if *match_index < match_count {
	// This is guaranteed to return a match since
	// match_index < match_count.
	let result = self.get_match(state_id, match_index, at);
	debug_assert!(result.is_some(), "must be a match");
	*match_index += 1;
	return result;
	}

	*match_index = 0;
	match self.standard_find_at(prestate, haystack, at, state_id) {
	None => None,
	Some(m) => {
	*match_index = 1;
	Some(m)
	}
	}
	}

	/// Return the earliest match found. This returns as soon as we know that
	/// we have a match. As such, this does not necessarily correspond to the
	/// leftmost starting match, but rather, the leftmost position at which a
	/// match ends.
	#[inline(always)]
	fn earliest_find_at(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	state_id: &mut Self::ID,
	) -> Option<Match> {
	if *state_id == self.start_state() {
	if self.anchored() && at > 0 {
	return None;
	}
	if let Some(m) = self.get_match(*state_id, 0, at) {
	return Some(m);
	}
	}
	self.standard_find_at(prestate, haystack, at, state_id)
	}

	/// A convenience function for finding the next match according to the
	/// match semantics of this automaton. For standard match semantics, this
	/// finds the earliest match. Otherwise, the leftmost match is found.
	#[inline(always)]
	fn find_at(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	state_id: &mut Self::ID,
	) -> Option<Match> {
	match *self.match_kind() {
	MatchKind::Standard => {
	self.earliest_find_at(prestate, haystack, at, state_id)
	}
	MatchKind::LeftmostFirst \| MatchKind::LeftmostLongest => {
	self.leftmost_find_at(prestate, haystack, at, state_id)
	}
	MatchKind::__Nonexhaustive => unreachable!(),
	}
	}

	/// Like find_at, but does not track state identifiers. This permits some
	/// optimizations when a prefilter that confirms its own matches is
	/// present.
	#[inline(always)]
	fn find_at_no_state(
	&self,
	prestate: &mut PrefilterState,
	haystack: &[u8],
	at: usize,
	) -> Option<Match> {
	match *self.match_kind() {
	MatchKind::Standard => {
	let mut state = self.start_state();
	self.earliest_find_at(prestate, haystack, at, &mut state)
	}
	MatchKind::LeftmostFirst \| MatchKind::LeftmostLongest => {
	self.leftmost_find_at_no_state(prestate, haystack, at)
	}
	MatchKind::__Nonexhaustive => unreachable!(),
	}
	}
	}