Unrolled build for #157599 Rollup merge of #157599 - lnicola:sync-from-ra, r=lnicola `rust-analyzer` subtree update Subtree update of `rust-analyzer` to https://github.com/rust-lang/rust-analyzer/commit/57116fa264fd567f5b1f206290169e196d4447bf. Created using https://github.com/rust-lang/josh-sync. r? @ghost
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 77e0093..c014bc2 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs
@@ -1496,7 +1496,13 @@ fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::O let start = if long_period { self.crit_pos } else { cmp::max(self.crit_pos, self.memory) }; for i in start..needle.len() { - if needle[i] != haystack[self.position + i] { + // SAFETY: on every iteration of `'search`, the `haystack.get(self.position + needle_last)` + // check returned `Some`, so `self.position + needle_last < haystack.len()`. + // Since `i < needle.len()` implies `i <= needle_last`, we have + // `self.position + i < haystack.len()`. + // Every path that mutates `self.position` below either returns or re-enters `'search`, + // which re-runs the check before reaching the loop again. + if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } { self.position += i - self.crit_pos + 1; if !long_period { self.memory = 0; @@ -1508,7 +1514,13 @@ fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::O // See if the left part of the needle matches let start = if long_period { 0 } else { self.memory }; for i in (start..self.crit_pos).rev() { - if needle[i] != haystack[self.position + i] { + // SAFETY: on every iteration of `'search`, the `haystack.get(self.position + needle_last)` + // check returned `Some`, so `self.position + needle_last < haystack.len()`. + // Since `i < self.crit_pos <= needle.len()`, we have `i <= needle_last`, and thus + // `self.position + i <= self.position + needle_last < haystack.len()`. + // Every path that mutates `self.position` below either returns or re-enters `'search`, + // which re-runs the check before reaching the loop again. + if needle[i] != unsafe { *haystack.get_unchecked(self.position + i) } { self.position += self.period; if !long_period { self.memory = needle.len() - self.period; @@ -1583,7 +1595,14 @@ fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> cmp::min(self.crit_pos_back, self.memory_back) }; for i in (0..crit).rev() { - if needle[i] != haystack[self.end - needle.len() + i] { + // SAFETY: On every iteration of `'search`, `haystack.get(self.end.wrapping_sub(needle.len()))` + // returned `Some`, so `self.end >= needle.len()` and `self.end - needle.len() < haystack.len()`. + // Since `self.end <= haystack.len()` and `i < needle.len()`, we have + // `self.end - needle.len() + i < self.end <= haystack.len()`, so + // `haystack.get_unchecked(self.end - needle.len() + i)` is safe. + // - The path that mutates `self.end` either re-enters `'search`, which re-runs the checks + // before reaching this loop again, or returns on match, so the invariant holds. + if needle[i] != unsafe { *haystack.get_unchecked(self.end - needle.len() + i) } { self.end -= self.crit_pos_back - i; if !long_period { self.memory_back = needle.len(); @@ -1595,7 +1614,12 @@ fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> // See if the right part of the needle matches let needle_end = if long_period { needle.len() } else { self.memory_back }; for i in self.crit_pos_back..needle_end { - if needle[i] != haystack[self.end - needle.len() + i] { + // SAFETY: The same `self.end - needle.len() + i < haystack.len()` argument as the + // left-part loop applies: the `haystack.get(self.end.wrapping_sub(needle.len()))` + // check at the top of `'search` established the bound for this iteration, and + // every mutation of `self.end` is followed by `continue 'search` (which re-runs + // the check) or a `return` (which exits before any further unsafe access). + if needle[i] != unsafe { *haystack.get_unchecked(self.end - needle.len() + i) } { self.end -= self.period; if !long_period { self.memory_back = self.period;
diff --git a/library/coretests/benches/pattern.rs b/library/coretests/benches/pattern.rs index b0f8b39..15dfda9 100644 --- a/library/coretests/benches/pattern.rs +++ b/library/coretests/benches/pattern.rs
@@ -39,3 +39,51 @@ fn ends_with_str(b: &mut Bencher) { } }) } + +fn make_haystack() -> String { + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem \ + sit amet dolor ultricies condimentum. Praesent iaculis purus elit, ac malesuada \ + quam malesuada in. Duis sed orci eros. Suspendisse sit amet magna mollis, mollis \ + nunc luctus, imperdiet mi. Integer fringilla non sem ut lacinia. Fusce varius \ + tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec tempus vel, \ + gravida nec quam. In est dui, tincidunt sed tempus interdum, adipiscing laoreet \ + ante. Etiam tempor, tellus quis sagittis interdum, nulla purus mattis sem, quis \ + auctor erat odio ac tellus. In nec nunc sit amet diam volutpat molestie at sed \ + ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan lorem ac dignissim \ + placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat." + .repeat(50) +} + +#[bench] +fn find_str(b: &mut Bencher) { + let s = make_haystack(); + let haystack = black_box(s.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.find("the english language"))) +} + +#[bench] +fn rfind_str(b: &mut Bencher) { + let s = make_haystack(); + let haystack = black_box(s.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.rfind("the english language"))) +} + +#[bench] +fn find_str_worst_case(b: &mut Bencher) { + let near_miss = "the english languagX"; + let haystack_str = near_miss.repeat(2000); + let haystack = black_box(haystack_str.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.find("the english language"))) +} + +#[bench] +fn rfind_str_worst_case(b: &mut Bencher) { + let near_miss = "the english languagX"; + let haystack_str = near_miss.repeat(2000); + let haystack = black_box(haystack_str.as_str()); + b.bytes = haystack.len() as u64; + b.iter(|| black_box(haystack.rfind("the english language"))) +}
diff --git a/typos.toml b/typos.toml index 8e14ce5..f680f5b 100644 --- a/typos.toml +++ b/typos.toml
@@ -13,6 +13,7 @@ # generated lorem ipsum texts "library/alloctests/benches/str.rs", "library/alloctests/tests/str.rs", + "library/coretests/benches/pattern.rs", ] [default.extend-words]