| use std::str::pattern::*; |
| |
| // This macro makes it easier to write |
| // tests that do a series of iterations |
| macro_rules! search_asserts { |
| ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => { |
| let mut searcher = $needle.into_searcher($haystack); |
| let arr = [$( Step::from(searcher.$func()) ),*]; |
| assert_eq!(&arr[..], &$result, $testname); |
| } |
| } |
| |
| /// Combined enum for the results of next() and next_match()/next_reject() |
| #[derive(Debug, PartialEq, Eq)] |
| enum Step { |
| // variant names purposely chosen to |
| // be the same length for easy alignment |
| Matches(usize, usize), |
| Rejects(usize, usize), |
| InRange(usize, usize), |
| Done, |
| } |
| |
| use self::Step::*; |
| |
| impl From<SearchStep> for Step { |
| fn from(x: SearchStep) -> Self { |
| match x { |
| SearchStep::Match(a, b) => Matches(a, b), |
| SearchStep::Reject(a, b) => Rejects(a, b), |
| SearchStep::Done => Done, |
| } |
| } |
| } |
| |
| impl From<Option<(usize, usize)>> for Step { |
| fn from(x: Option<(usize, usize)>) -> Self { |
| match x { |
| Some((a, b)) => InRange(a, b), |
| None => Done, |
| } |
| } |
| } |
| |
| // FIXME(Manishearth) these tests focus on single-character searching (CharSearcher) |
| // and on next()/next_match(), not next_reject(). This is because |
| // the memchr changes make next_match() for single chars complex, but next_reject() |
| // continues to use next() under the hood. We should add more test cases for all |
| // of these, as well as tests for StrSearcher and higher level tests for str::find() (etc) |
| |
| #[test] |
| fn test_simple_iteration() { |
| search_asserts!( |
| "abcdeabcd", |
| 'a', |
| "forward iteration for ASCII string", |
| // a b c d e a b c d EOF |
| [next, next, next, next, next, next, next, next, next, next], |
| [ |
| Matches(0, 1), |
| Rejects(1, 2), |
| Rejects(2, 3), |
| Rejects(3, 4), |
| Rejects(4, 5), |
| Matches(5, 6), |
| Rejects(6, 7), |
| Rejects(7, 8), |
| Rejects(8, 9), |
| Done |
| ] |
| ); |
| |
| search_asserts!( |
| "abcdeabcd", |
| 'a', |
| "reverse iteration for ASCII string", |
| // d c b a e d c b a EOF |
| [ |
| next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back, |
| next_back, next_back |
| ], |
| [ |
| Rejects(8, 9), |
| Rejects(7, 8), |
| Rejects(6, 7), |
| Matches(5, 6), |
| Rejects(4, 5), |
| Rejects(3, 4), |
| Rejects(2, 3), |
| Rejects(1, 2), |
| Matches(0, 1), |
| Done |
| ] |
| ); |
| |
| search_asserts!( |
| "我爱我的猫", |
| '我', |
| "forward iteration for Chinese string", |
| // 我 愛 我 的 貓 EOF |
| [next, next, next, next, next, next], |
| [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done] |
| ); |
| |
| search_asserts!( |
| "我的猫说meow", |
| 'm', |
| "forward iteration for mixed string", |
| // 我 的 猫 说 m e o w EOF |
| [next, next, next, next, next, next, next, next, next], |
| [ |
| Rejects(0, 3), |
| Rejects(3, 6), |
| Rejects(6, 9), |
| Rejects(9, 12), |
| Matches(12, 13), |
| Rejects(13, 14), |
| Rejects(14, 15), |
| Rejects(15, 16), |
| Done |
| ] |
| ); |
| |
| search_asserts!( |
| "我的猫说meow", |
| '猫', |
| "reverse iteration for mixed string", |
| // w o e m 说 猫 的 我 EOF |
| [ |
| next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back, |
| next_back |
| ], |
| [ |
| Rejects(15, 16), |
| Rejects(14, 15), |
| Rejects(13, 14), |
| Rejects(12, 13), |
| Rejects(9, 12), |
| Matches(6, 9), |
| Rejects(3, 6), |
| Rejects(0, 3), |
| Done |
| ] |
| ); |
| } |
| |
| #[test] |
| fn test_simple_search() { |
| search_asserts!( |
| "abcdeabcdeabcde", |
| 'a', |
| "next_match for ASCII string", |
| [next_match, next_match, next_match, next_match], |
| [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done] |
| ); |
| |
| search_asserts!( |
| "abcdeabcdeabcde", |
| 'a', |
| "next_match_back for ASCII string", |
| [next_match_back, next_match_back, next_match_back, next_match_back], |
| [InRange(10, 11), InRange(5, 6), InRange(0, 1), Done] |
| ); |
| |
| search_asserts!( |
| "abcdeab", |
| 'a', |
| "next_reject for ASCII string", |
| [next_reject, next_reject, next_match, next_reject, next_reject], |
| [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done] |
| ); |
| |
| search_asserts!( |
| "abcdeabcdeabcde", |
| 'a', |
| "next_reject_back for ASCII string", |
| [ |
| next_reject_back, |
| next_reject_back, |
| next_match_back, |
| next_reject_back, |
| next_reject_back, |
| next_reject_back |
| ], |
| [ |
| InRange(14, 15), |
| InRange(13, 14), |
| InRange(10, 11), |
| InRange(9, 10), |
| InRange(8, 9), |
| InRange(7, 8) |
| ] |
| ); |
| } |
| |
| // Á, 각, ก, 😀 all end in 0x81 |
| // 🁀, ᘀ do not end in 0x81 but contain the byte |
| // ꁁ has 0x81 as its second and third bytes. |
| // |
| // The memchr-using implementation of next_match |
| // and next_match_back temporarily violate |
| // the property that the search is always on a unicode boundary, |
| // which is fine as long as this never reaches next() or next_back(). |
| // So we test if next() is correct after each next_match() as well. |
| const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a"; |
| |
| #[test] |
| fn test_stress_indices() { |
| // this isn't really a test, more of documentation on the indices of each character in the stresstest string |
| |
| search_asserts!( |
| STRESS, |
| 'x', |
| "Indices of characters in stress test", |
| [ |
| next, next, next, next, next, next, next, next, next, next, next, next, next, next, |
| next, next, next, next, next, next, next |
| ], |
| [ |
| Rejects(0, 2), // Á |
| Rejects(2, 3), // a |
| Rejects(3, 7), // 🁀 |
| Rejects(7, 8), // b |
| Rejects(8, 10), // Á |
| Rejects(10, 13), // ꁁ |
| Rejects(13, 14), // f |
| Rejects(14, 15), // g |
| Rejects(15, 19), // 😀 |
| Rejects(19, 22), // 각 |
| Rejects(22, 25), // ก |
| Rejects(25, 28), // ᘀ |
| Rejects(28, 31), // 각 |
| Rejects(31, 32), // a |
| Rejects(32, 34), // Á |
| Rejects(34, 37), // 각 |
| Rejects(37, 40), // ꁁ |
| Rejects(40, 43), // ก |
| Rejects(43, 47), // 😀 |
| Rejects(47, 48), // a |
| Done |
| ] |
| ); |
| } |
| |
| #[test] |
| fn test_forward_search_shared_bytes() { |
| search_asserts!( |
| STRESS, |
| 'Á', |
| "Forward search for two-byte Latin character", |
| [next_match, next_match, next_match, next_match], |
| [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'Á', |
| "Forward search for two-byte Latin character; check if next() still works", |
| [next_match, next, next_match, next, next_match, next, next_match], |
| [ |
| InRange(0, 2), |
| Rejects(2, 3), |
| InRange(8, 10), |
| Rejects(10, 13), |
| InRange(32, 34), |
| Rejects(34, 37), |
| Done |
| ] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '각', |
| "Forward search for three-byte Hangul character", |
| [next_match, next, next_match, next_match, next_match], |
| [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '각', |
| "Forward search for three-byte Hangul character; check if next() still works", |
| [next_match, next, next_match, next, next_match, next, next_match], |
| [ |
| InRange(19, 22), |
| Rejects(22, 25), |
| InRange(28, 31), |
| Rejects(31, 32), |
| InRange(34, 37), |
| Rejects(37, 40), |
| Done |
| ] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ก', |
| "Forward search for three-byte Thai character", |
| [next_match, next, next_match, next, next_match], |
| [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ก', |
| "Forward search for three-byte Thai character; check if next() still works", |
| [next_match, next, next_match, next, next_match], |
| [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '😁', |
| "Forward search for four-byte emoji", |
| [next_match, next, next_match, next, next_match], |
| [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '😁', |
| "Forward search for four-byte emoji; check if next() still works", |
| [next_match, next, next_match, next, next_match], |
| [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ꁁ', |
| "Forward search for three-byte Yi character with repeated bytes", |
| [next_match, next, next_match, next, next_match], |
| [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ꁁ', |
| "Forward search for three-byte Yi character with repeated bytes; check if next() still works", |
| [next_match, next, next_match, next, next_match], |
| [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done] |
| ); |
| } |
| |
| #[test] |
| fn test_reverse_search_shared_bytes() { |
| search_asserts!( |
| STRESS, |
| 'Á', |
| "Reverse search for two-byte Latin character", |
| [next_match_back, next_match_back, next_match_back, next_match_back], |
| [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'Á', |
| "Reverse search for two-byte Latin character; check if next_back() still works", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back], |
| [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '각', |
| "Reverse search for three-byte Hangul character", |
| [next_match_back, next_back, next_match_back, next_match_back, next_match_back], |
| [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '각', |
| "Reverse search for three-byte Hangul character; check if next_back() still works", |
| [ |
| next_match_back, |
| next_back, |
| next_match_back, |
| next_back, |
| next_match_back, |
| next_back, |
| next_match_back |
| ], |
| [ |
| InRange(34, 37), |
| Rejects(32, 34), |
| InRange(28, 31), |
| Rejects(25, 28), |
| InRange(19, 22), |
| Rejects(15, 19), |
| Done |
| ] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ก', |
| "Reverse search for three-byte Thai character", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back], |
| [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ก', |
| "Reverse search for three-byte Thai character; check if next_back() still works", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back], |
| [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '😁', |
| "Reverse search for four-byte emoji", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back], |
| [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| '😁', |
| "Reverse search for four-byte emoji; check if next_back() still works", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back], |
| [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ꁁ', |
| "Reverse search for three-byte Yi character with repeated bytes", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back], |
| [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] |
| ); |
| |
| search_asserts!( |
| STRESS, |
| 'ꁁ', |
| "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works", |
| [next_match_back, next_back, next_match_back, next_back, next_match_back], |
| [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] |
| ); |
| } |
| |
| #[test] |
| fn double_ended_regression_test() { |
| // https://github.com/rust-lang/rust/issues/47175 |
| // Ensures that double ended searching comes to a convergence |
| search_asserts!( |
| "abcdeabcdeabcde", |
| 'a', |
| "alternating double ended search", |
| [next_match, next_match_back, next_match, next_match_back], |
| [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done] |
| ); |
| search_asserts!( |
| "abcdeabcdeabcde", |
| 'a', |
| "triple double ended search for a", |
| [next_match, next_match_back, next_match_back, next_match_back], |
| [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done] |
| ); |
| search_asserts!( |
| "abcdeabcdeabcde", |
| 'd', |
| "triple double ended search for d", |
| [next_match, next_match_back, next_match_back, next_match_back], |
| [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done] |
| ); |
| search_asserts!( |
| STRESS, |
| 'Á', |
| "Double ended search for two-byte Latin character", |
| [next_match, next_match_back, next_match, next_match_back], |
| [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done] |
| ); |
| search_asserts!( |
| STRESS, |
| '각', |
| "Reverse double ended search for three-byte Hangul character", |
| [next_match_back, next_back, next_match, next, next_match_back, next_match], |
| [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done] |
| ); |
| search_asserts!( |
| STRESS, |
| 'ก', |
| "Double ended search for three-byte Thai character", |
| [next_match, next_back, next, next_match_back, next_match], |
| [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done] |
| ); |
| search_asserts!( |
| STRESS, |
| '😁', |
| "Double ended search for four-byte emoji", |
| [next_match_back, next, next_match, next_back, next_match], |
| [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done] |
| ); |
| search_asserts!( |
| STRESS, |
| 'ꁁ', |
| "Double ended search for three-byte Yi character with repeated bytes", |
| [next_match, next, next_match_back, next_back, next_match], |
| [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done] |
| ); |
| } |