|  | use std::char; | 
|  | use std::cmp; | 
|  | use std::fmt::Debug; | 
|  | use std::slice; | 
|  | use std::u8; | 
|  |  | 
|  | use unicode; | 
|  |  | 
|  | // This module contains an *internal* implementation of interval sets. | 
|  | // | 
|  | // The primary invariant that interval sets guards is canonical ordering. That | 
|  | // is, every interval set contains an ordered sequence of intervals where | 
|  | // no two intervals are overlapping or adjacent. While this invariant is | 
|  | // occasionally broken within the implementation, it should be impossible for | 
|  | // callers to observe it. | 
|  | // | 
|  | // Since case folding (as implemented below) breaks that invariant, we roll | 
|  | // that into this API even though it is a little out of place in an otherwise | 
|  | // generic interval set. (Hence the reason why the `unicode` module is imported | 
|  | // here.) | 
|  | // | 
|  | // Some of the implementation complexity here is a result of me wanting to | 
|  | // preserve the sequential representation without using additional memory. | 
|  | // In many cases, we do use linear extra memory, but it is at most 2x and it | 
|  | // is amortized. If we relaxed the memory requirements, this implementation | 
|  | // could become much simpler. The extra memory is honestly probably OK, but | 
|  | // character classes (especially of the Unicode variety) can become quite | 
|  | // large, and it would be nice to keep regex compilation snappy even in debug | 
|  | // builds. (In the past, I have been careless with this area of code and it has | 
|  | // caused slow regex compilations in debug mode, so this isn't entirely | 
|  | // unwarranted.) | 
|  | // | 
|  | // Tests on this are relegated to the public API of HIR in src/hir.rs. | 
|  |  | 
|  | #[derive(Clone, Debug, Eq, PartialEq)] | 
|  | pub struct IntervalSet<I> { | 
|  | ranges: Vec<I>, | 
|  | } | 
|  |  | 
|  | impl<I: Interval> IntervalSet<I> { | 
|  | /// Create a new set from a sequence of intervals. Each interval is | 
|  | /// specified as a pair of bounds, where both bounds are inclusive. | 
|  | /// | 
|  | /// The given ranges do not need to be in any specific order, and ranges | 
|  | /// may overlap. | 
|  | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 
|  | let mut set = IntervalSet { ranges: intervals.into_iter().collect() }; | 
|  | set.canonicalize(); | 
|  | set | 
|  | } | 
|  |  | 
|  | /// Add a new interval to this set. | 
|  | pub fn push(&mut self, interval: I) { | 
|  | // TODO: This could be faster. e.g., Push the interval such that | 
|  | // it preserves canonicalization. | 
|  | self.ranges.push(interval); | 
|  | self.canonicalize(); | 
|  | } | 
|  |  | 
|  | /// Return an iterator over all intervals in this set. | 
|  | /// | 
|  | /// The iterator yields intervals in ascending order. | 
|  | pub fn iter(&self) -> IntervalSetIter<I> { | 
|  | IntervalSetIter(self.ranges.iter()) | 
|  | } | 
|  |  | 
|  | /// Return an immutable slice of intervals in this set. | 
|  | /// | 
|  | /// The sequence returned is in canonical ordering. | 
|  | pub fn intervals(&self) -> &[I] { | 
|  | &self.ranges | 
|  | } | 
|  |  | 
|  | /// Expand this interval set such that it contains all case folded | 
|  | /// characters. For example, if this class consists of the range `a-z`, | 
|  | /// then applying case folding will result in the class containing both the | 
|  | /// ranges `a-z` and `A-Z`. | 
|  | /// | 
|  | /// This returns an error if the necessary case mapping data is not | 
|  | /// available. | 
|  | pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { | 
|  | let len = self.ranges.len(); | 
|  | for i in 0..len { | 
|  | let range = self.ranges[i]; | 
|  | if let Err(err) = range.case_fold_simple(&mut self.ranges) { | 
|  | self.canonicalize(); | 
|  | return Err(err); | 
|  | } | 
|  | } | 
|  | self.canonicalize(); | 
|  | Ok(()) | 
|  | } | 
|  |  | 
|  | /// Union this set with the given set, in place. | 
|  | pub fn union(&mut self, other: &IntervalSet<I>) { | 
|  | // This could almost certainly be done more efficiently. | 
|  | self.ranges.extend(&other.ranges); | 
|  | self.canonicalize(); | 
|  | } | 
|  |  | 
|  | /// Intersect this set with the given set, in place. | 
|  | pub fn intersect(&mut self, other: &IntervalSet<I>) { | 
|  | if self.ranges.is_empty() { | 
|  | return; | 
|  | } | 
|  | if other.ranges.is_empty() { | 
|  | self.ranges.clear(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // There should be a way to do this in-place with constant memory, | 
|  | // but I couldn't figure out a simple way to do it. So just append | 
|  | // the intersection to the end of this range, and then drain it before | 
|  | // we're done. | 
|  | let drain_end = self.ranges.len(); | 
|  |  | 
|  | let mut ita = (0..drain_end).into_iter(); | 
|  | let mut itb = (0..other.ranges.len()).into_iter(); | 
|  | let mut a = ita.next().unwrap(); | 
|  | let mut b = itb.next().unwrap(); | 
|  | loop { | 
|  | if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { | 
|  | self.ranges.push(ab); | 
|  | } | 
|  | let (it, aorb) = | 
|  | if self.ranges[a].upper() < other.ranges[b].upper() { | 
|  | (&mut ita, &mut a) | 
|  | } else { | 
|  | (&mut itb, &mut b) | 
|  | }; | 
|  | match it.next() { | 
|  | Some(v) => *aorb = v, | 
|  | None => break, | 
|  | } | 
|  | } | 
|  | self.ranges.drain(..drain_end); | 
|  | } | 
|  |  | 
|  | /// Subtract the given set from this set, in place. | 
|  | pub fn difference(&mut self, other: &IntervalSet<I>) { | 
|  | if self.ranges.is_empty() || other.ranges.is_empty() { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // This algorithm is (to me) surprisingly complex. A search of the | 
|  | // interwebs indicate that this is a potentially interesting problem. | 
|  | // Folks seem to suggest interval or segment trees, but I'd like to | 
|  | // avoid the overhead (both runtime and conceptual) of that. | 
|  | // | 
|  | // The following is basically my Shitty First Draft. Therefore, in | 
|  | // order to grok it, you probably need to read each line carefully. | 
|  | // Simplifications are most welcome! | 
|  | // | 
|  | // Remember, we can assume the canonical format invariant here, which | 
|  | // says that all ranges are sorted, not overlapping and not adjacent in | 
|  | // each class. | 
|  | let drain_end = self.ranges.len(); | 
|  | let (mut a, mut b) = (0, 0); | 
|  | 'LOOP: while a < drain_end && b < other.ranges.len() { | 
|  | // Basically, the easy cases are when neither range overlaps with | 
|  | // each other. If the `b` range is less than our current `a` | 
|  | // range, then we can skip it and move on. | 
|  | if other.ranges[b].upper() < self.ranges[a].lower() { | 
|  | b += 1; | 
|  | continue; | 
|  | } | 
|  | // ... similarly for the `a` range. If it's less than the smallest | 
|  | // `b` range, then we can add it as-is. | 
|  | if self.ranges[a].upper() < other.ranges[b].lower() { | 
|  | let range = self.ranges[a]; | 
|  | self.ranges.push(range); | 
|  | a += 1; | 
|  | continue; | 
|  | } | 
|  | // Otherwise, we have overlapping ranges. | 
|  | assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); | 
|  |  | 
|  | // This part is tricky and was non-obvious to me without looking | 
|  | // at explicit examples (see the tests). The trickiness stems from | 
|  | // two things: 1) subtracting a range from another range could | 
|  | // yield two ranges and 2) after subtracting a range, it's possible | 
|  | // that future ranges can have an impact. The loop below advances | 
|  | // the `b` ranges until they can't possible impact the current | 
|  | // range. | 
|  | // | 
|  | // For example, if our `a` range is `a-t` and our next three `b` | 
|  | // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply | 
|  | // subtraction three times before moving on to the next `a` range. | 
|  | let mut range = self.ranges[a]; | 
|  | while b < other.ranges.len() | 
|  | && !range.is_intersection_empty(&other.ranges[b]) | 
|  | { | 
|  | let old_range = range; | 
|  | range = match range.difference(&other.ranges[b]) { | 
|  | (None, None) => { | 
|  | // We lost the entire range, so move on to the next | 
|  | // without adding this one. | 
|  | a += 1; | 
|  | continue 'LOOP; | 
|  | } | 
|  | (Some(range1), None) | (None, Some(range1)) => range1, | 
|  | (Some(range1), Some(range2)) => { | 
|  | self.ranges.push(range1); | 
|  | range2 | 
|  | } | 
|  | }; | 
|  | // It's possible that the `b` range has more to contribute | 
|  | // here. In particular, if it is greater than the original | 
|  | // range, then it might impact the next `a` range *and* it | 
|  | // has impacted the current `a` range as much as possible, | 
|  | // so we can quit. We don't bump `b` so that the next `a` | 
|  | // range can apply it. | 
|  | if other.ranges[b].upper() > old_range.upper() { | 
|  | break; | 
|  | } | 
|  | // Otherwise, the next `b` range might apply to the current | 
|  | // `a` range. | 
|  | b += 1; | 
|  | } | 
|  | self.ranges.push(range); | 
|  | a += 1; | 
|  | } | 
|  | while a < drain_end { | 
|  | let range = self.ranges[a]; | 
|  | self.ranges.push(range); | 
|  | a += 1; | 
|  | } | 
|  | self.ranges.drain(..drain_end); | 
|  | } | 
|  |  | 
|  | /// Compute the symmetric difference of the two sets, in place. | 
|  | /// | 
|  | /// This computes the symmetric difference of two interval sets. This | 
|  | /// removes all elements in this set that are also in the given set, | 
|  | /// but also adds all elements from the given set that aren't in this | 
|  | /// set. That is, the set will contain all elements in either set, | 
|  | /// but will not contain any elements that are in both sets. | 
|  | pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) { | 
|  | // TODO(burntsushi): Fix this so that it amortizes allocation. | 
|  | let mut intersection = self.clone(); | 
|  | intersection.intersect(other); | 
|  | self.union(other); | 
|  | self.difference(&intersection); | 
|  | } | 
|  |  | 
|  | /// Negate this interval set. | 
|  | /// | 
|  | /// For all `x` where `x` is any element, if `x` was in this set, then it | 
|  | /// will not be in this set after negation. | 
|  | pub fn negate(&mut self) { | 
|  | if self.ranges.is_empty() { | 
|  | let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); | 
|  | self.ranges.push(I::create(min, max)); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // There should be a way to do this in-place with constant memory, | 
|  | // but I couldn't figure out a simple way to do it. So just append | 
|  | // the negation to the end of this range, and then drain it before | 
|  | // we're done. | 
|  | let drain_end = self.ranges.len(); | 
|  |  | 
|  | // We do checked arithmetic below because of the canonical ordering | 
|  | // invariant. | 
|  | if self.ranges[0].lower() > I::Bound::min_value() { | 
|  | let upper = self.ranges[0].lower().decrement(); | 
|  | self.ranges.push(I::create(I::Bound::min_value(), upper)); | 
|  | } | 
|  | for i in 1..drain_end { | 
|  | let lower = self.ranges[i - 1].upper().increment(); | 
|  | let upper = self.ranges[i].lower().decrement(); | 
|  | self.ranges.push(I::create(lower, upper)); | 
|  | } | 
|  | if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { | 
|  | let lower = self.ranges[drain_end - 1].upper().increment(); | 
|  | self.ranges.push(I::create(lower, I::Bound::max_value())); | 
|  | } | 
|  | self.ranges.drain(..drain_end); | 
|  | } | 
|  |  | 
|  | /// Converts this set into a canonical ordering. | 
|  | fn canonicalize(&mut self) { | 
|  | if self.is_canonical() { | 
|  | return; | 
|  | } | 
|  | self.ranges.sort(); | 
|  | assert!(!self.ranges.is_empty()); | 
|  |  | 
|  | // Is there a way to do this in-place with constant memory? I couldn't | 
|  | // figure out a way to do it. So just append the canonicalization to | 
|  | // the end of this range, and then drain it before we're done. | 
|  | let drain_end = self.ranges.len(); | 
|  | for oldi in 0..drain_end { | 
|  | // If we've added at least one new range, then check if we can | 
|  | // merge this range in the previously added range. | 
|  | if self.ranges.len() > drain_end { | 
|  | let (last, rest) = self.ranges.split_last_mut().unwrap(); | 
|  | if let Some(union) = last.union(&rest[oldi]) { | 
|  | *last = union; | 
|  | continue; | 
|  | } | 
|  | } | 
|  | let range = self.ranges[oldi]; | 
|  | self.ranges.push(range); | 
|  | } | 
|  | self.ranges.drain(..drain_end); | 
|  | } | 
|  |  | 
|  | /// Returns true if and only if this class is in a canonical ordering. | 
|  | fn is_canonical(&self) -> bool { | 
|  | for pair in self.ranges.windows(2) { | 
|  | if pair[0] >= pair[1] { | 
|  | return false; | 
|  | } | 
|  | if pair[0].is_contiguous(&pair[1]) { | 
|  | return false; | 
|  | } | 
|  | } | 
|  | true | 
|  | } | 
|  | } | 
|  |  | 
|  | /// An iterator over intervals. | 
|  | #[derive(Debug)] | 
|  | pub struct IntervalSetIter<'a, I: 'a>(slice::Iter<'a, I>); | 
|  |  | 
|  | impl<'a, I> Iterator for IntervalSetIter<'a, I> { | 
|  | type Item = &'a I; | 
|  |  | 
|  | fn next(&mut self) -> Option<&'a I> { | 
|  | self.0.next() | 
|  | } | 
|  | } | 
|  |  | 
|  | pub trait Interval: | 
|  | Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord | 
|  | { | 
|  | type Bound: Bound; | 
|  |  | 
|  | fn lower(&self) -> Self::Bound; | 
|  | fn upper(&self) -> Self::Bound; | 
|  | fn set_lower(&mut self, bound: Self::Bound); | 
|  | fn set_upper(&mut self, bound: Self::Bound); | 
|  | fn case_fold_simple( | 
|  | &self, | 
|  | intervals: &mut Vec<Self>, | 
|  | ) -> Result<(), unicode::CaseFoldError>; | 
|  |  | 
|  | /// Create a new interval. | 
|  | fn create(lower: Self::Bound, upper: Self::Bound) -> Self { | 
|  | let mut int = Self::default(); | 
|  | if lower <= upper { | 
|  | int.set_lower(lower); | 
|  | int.set_upper(upper); | 
|  | } else { | 
|  | int.set_lower(upper); | 
|  | int.set_upper(lower); | 
|  | } | 
|  | int | 
|  | } | 
|  |  | 
|  | /// Union the given overlapping range into this range. | 
|  | /// | 
|  | /// If the two ranges aren't contiguous, then this returns `None`. | 
|  | fn union(&self, other: &Self) -> Option<Self> { | 
|  | if !self.is_contiguous(other) { | 
|  | return None; | 
|  | } | 
|  | let lower = cmp::min(self.lower(), other.lower()); | 
|  | let upper = cmp::max(self.upper(), other.upper()); | 
|  | Some(Self::create(lower, upper)) | 
|  | } | 
|  |  | 
|  | /// Intersect this range with the given range and return the result. | 
|  | /// | 
|  | /// If the intersection is empty, then this returns `None`. | 
|  | fn intersect(&self, other: &Self) -> Option<Self> { | 
|  | let lower = cmp::max(self.lower(), other.lower()); | 
|  | let upper = cmp::min(self.upper(), other.upper()); | 
|  | if lower <= upper { | 
|  | Some(Self::create(lower, upper)) | 
|  | } else { | 
|  | None | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Subtract the given range from this range and return the resulting | 
|  | /// ranges. | 
|  | /// | 
|  | /// If subtraction would result in an empty range, then no ranges are | 
|  | /// returned. | 
|  | fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) { | 
|  | if self.is_subset(other) { | 
|  | return (None, None); | 
|  | } | 
|  | if self.is_intersection_empty(other) { | 
|  | return (Some(self.clone()), None); | 
|  | } | 
|  | let add_lower = other.lower() > self.lower(); | 
|  | let add_upper = other.upper() < self.upper(); | 
|  | // We know this because !self.is_subset(other) and the ranges have | 
|  | // a non-empty intersection. | 
|  | assert!(add_lower || add_upper); | 
|  | let mut ret = (None, None); | 
|  | if add_lower { | 
|  | let upper = other.lower().decrement(); | 
|  | ret.0 = Some(Self::create(self.lower(), upper)); | 
|  | } | 
|  | if add_upper { | 
|  | let lower = other.upper().increment(); | 
|  | let range = Self::create(lower, self.upper()); | 
|  | if ret.0.is_none() { | 
|  | ret.0 = Some(range); | 
|  | } else { | 
|  | ret.1 = Some(range); | 
|  | } | 
|  | } | 
|  | ret | 
|  | } | 
|  |  | 
|  | /// Compute the symmetric difference the given range from this range. This | 
|  | /// returns the union of the two ranges minus its intersection. | 
|  | fn symmetric_difference( | 
|  | &self, | 
|  | other: &Self, | 
|  | ) -> (Option<Self>, Option<Self>) { | 
|  | let union = match self.union(other) { | 
|  | None => return (Some(self.clone()), Some(other.clone())), | 
|  | Some(union) => union, | 
|  | }; | 
|  | let intersection = match self.intersect(other) { | 
|  | None => return (Some(self.clone()), Some(other.clone())), | 
|  | Some(intersection) => intersection, | 
|  | }; | 
|  | union.difference(&intersection) | 
|  | } | 
|  |  | 
|  | /// Returns true if and only if the two ranges are contiguous. Two ranges | 
|  | /// are contiguous if and only if the ranges are either overlapping or | 
|  | /// adjacent. | 
|  | fn is_contiguous(&self, other: &Self) -> bool { | 
|  | let lower1 = self.lower().as_u32(); | 
|  | let upper1 = self.upper().as_u32(); | 
|  | let lower2 = other.lower().as_u32(); | 
|  | let upper2 = other.upper().as_u32(); | 
|  | cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) | 
|  | } | 
|  |  | 
|  | /// Returns true if and only if the intersection of this range and the | 
|  | /// other range is empty. | 
|  | fn is_intersection_empty(&self, other: &Self) -> bool { | 
|  | let (lower1, upper1) = (self.lower(), self.upper()); | 
|  | let (lower2, upper2) = (other.lower(), other.upper()); | 
|  | cmp::max(lower1, lower2) > cmp::min(upper1, upper2) | 
|  | } | 
|  |  | 
|  | /// Returns true if and only if this range is a subset of the other range. | 
|  | fn is_subset(&self, other: &Self) -> bool { | 
|  | let (lower1, upper1) = (self.lower(), self.upper()); | 
|  | let (lower2, upper2) = (other.lower(), other.upper()); | 
|  | (lower2 <= lower1 && lower1 <= upper2) | 
|  | && (lower2 <= upper1 && upper1 <= upper2) | 
|  | } | 
|  | } | 
|  |  | 
|  | pub trait Bound: | 
|  | Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord | 
|  | { | 
|  | fn min_value() -> Self; | 
|  | fn max_value() -> Self; | 
|  | fn as_u32(self) -> u32; | 
|  | fn increment(self) -> Self; | 
|  | fn decrement(self) -> Self; | 
|  | } | 
|  |  | 
|  | impl Bound for u8 { | 
|  | fn min_value() -> Self { | 
|  | u8::MIN | 
|  | } | 
|  | fn max_value() -> Self { | 
|  | u8::MAX | 
|  | } | 
|  | fn as_u32(self) -> u32 { | 
|  | self as u32 | 
|  | } | 
|  | fn increment(self) -> Self { | 
|  | self.checked_add(1).unwrap() | 
|  | } | 
|  | fn decrement(self) -> Self { | 
|  | self.checked_sub(1).unwrap() | 
|  | } | 
|  | } | 
|  |  | 
|  | impl Bound for char { | 
|  | fn min_value() -> Self { | 
|  | '\x00' | 
|  | } | 
|  | fn max_value() -> Self { | 
|  | '\u{10FFFF}' | 
|  | } | 
|  | fn as_u32(self) -> u32 { | 
|  | self as u32 | 
|  | } | 
|  |  | 
|  | fn increment(self) -> Self { | 
|  | match self { | 
|  | '\u{D7FF}' => '\u{E000}', | 
|  | c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(), | 
|  | } | 
|  | } | 
|  |  | 
|  | fn decrement(self) -> Self { | 
|  | match self { | 
|  | '\u{E000}' => '\u{D7FF}', | 
|  | c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(), | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Tests for interval sets are written in src/hir.rs against the public API. |