src/fonts/src/font_service/font_info/char_set.rs - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 use {
     failure::{format_err, Error},
     std::cmp::Ordering,
 };

 type BitmapElement = u64;
 const BITMAP_ELEMENT_SIZE: usize = 64;
 const MAX_RANGE_GAP: u32 = 2048;

 /// Represents an ordered set of code points that begin at [CharSetRange.start]. The largest
 /// allowed discontinuity between two consecutive code points in the set is [MAX_RANGE_GAP].
 #[derive(Debug, Clone)]
 struct CharSetRange {
     start: u32,
     bitmap: Vec<BitmapElement>,
 }

 impl CharSetRange {
     fn new() -> CharSetRange {
         CharSetRange { start: 0, bitmap: vec![] }
     }

     fn start(&self) -> u32 {
         self.start
     }

     fn end(&self) -> u32 {
         self.start + (self.bitmap.len() * BITMAP_ELEMENT_SIZE) as u32
     }

     fn is_empty(&self) -> bool {
         self.bitmap.is_empty()
     }

     fn add(&mut self, val: u32) {
         assert!(val >= self.start);

         if self.bitmap.is_empty() {
             self.start = val;
         }

         let pos = (val - self.start) as usize;
         let element_pos = pos / BITMAP_ELEMENT_SIZE;

         if element_pos >= self.bitmap.len() {
             self.bitmap.resize(element_pos + 1, 0);
         }

         self.bitmap[element_pos] |= 1 << (pos % BITMAP_ELEMENT_SIZE);
     }

     fn contains(&self, c: u32) -> bool {
         if c < self.start || c >= self.end() {
             false
         } else {
             let index = c as usize - self.start as usize;
             (self.bitmap[index / 64] & (1 << (index % 64))) > 0
         }
     }
 }

 /// Represents an ordered set of code points.
 ///
 /// TODO(kpozin): Evaluate replacing with `MultiCharRange`, which might be more space-efficient for
 /// large sets with few discontinuities.
 #[derive(Debug, Clone)]
 pub struct CharSet {
     ranges: Vec<CharSetRange>,
 }

 impl CharSet {
     pub fn new(mut code_points: Vec<u32>) -> CharSet {
         code_points.sort_unstable();

         let mut ranges = vec![];
         let mut range = CharSetRange::new();
         for c in code_points {
             if c != 0 && !range.is_empty() && c >= range.end() + MAX_RANGE_GAP {
                 ranges.push(range);
                 range = CharSetRange::new();
             }
             range.add(c);
         }
         if !range.is_empty() {
             ranges.push(range)
         }
         CharSet { ranges }
     }

     pub fn from_string(s: String) -> Result<CharSet, Error> {
         let mut code_points: Vec<u32> = vec![];
         let mut prev: u32 = 0;
         for range in s.split(',').filter(|x| !x.is_empty()) {
             let mut split = range.split('+');

             let offset: u32 = match split.next() {
                 Some(off) => off
                     .parse()
                     .or_else(|_| Err(format_err!("Failed to parse {:?} as u32.", off)))?,
                 None => return Err(format_err!("Failed to parse {:?}: not a valid range.", range)),
             };

             let length: u32 = match split.next() {
                 Some(len) => len
                     .parse()
                     .or_else(|_| Err(format_err!("Failed to parse {:?} as u32.", len)))?,
                 None => 0, // We can treat "0,2,..." as "0+0,2+0,..."
             };

             if split.next().is_some() {
                 return Err(format_err!("Failed to parse {:?}: not a valid range.", range));
             }

             let begin = prev + offset;
             prev = begin + length;
             code_points.extend(begin..=prev);
         }
         Ok(CharSet::new(code_points))
     }

     pub fn contains(&self, c: u32) -> bool {
         match self.ranges.binary_search_by(|r| {
             if r.end() < c {
                 Ordering::Less
             } else if r.start() > c {
                 Ordering::Greater
             } else {
                 Ordering::Equal
             }
         }) {
             Ok(r) => self.ranges[r].contains(c),
             Err(_) => false,
         }
     }

     pub fn is_empty(&self) -> bool {
         self.ranges.is_empty()
     }
 }

 impl Default for CharSet {
     fn default() -> Self {
         CharSet::new(vec![])
     }
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     #[test]
     fn test_charset() {
         let charset = CharSet::new(vec![1, 2, 3, 10, 500, 5000, 5001, 10000]);
         assert!(!charset.contains(0));
         assert!(charset.contains(1));
         assert!(charset.contains(2));
         assert!(charset.contains(3));
         assert!(!charset.contains(4));
         assert!(!charset.contains(9));
         assert!(charset.contains(10));
         assert!(charset.contains(500));
         assert!(!charset.contains(501));
         assert!(charset.contains(5000));
         assert!(charset.contains(5001));
         assert!(!charset.contains(5002));
         assert!(!charset.contains(9999));
         assert!(charset.contains(10000));
         assert!(!charset.contains(10001));
     }

     #[test]
     fn test_charset_from_string() -> Result<(), Error> {
         let s = "0,2,11,19+94".to_string();
         let charset = CharSet::from_string(s)?;
         assert!([0, 2, 13, 32, 54, 126].into_iter().all(|c| charset.contains(*c)));
         assert!([1, 11, 19, 127, 10000].into_iter().all(|c| !charset.contains(*c)));
         Ok(())
     }

     #[test]
     fn test_charset_from_string_not_a_number() {
         for s in &["0,p,11", "q+1", "3+r"] {
             assert!(CharSet::from_string(s.to_string()).is_err())
         }
     }
 }
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	use {
	failure::{format_err, Error},
	std::cmp::Ordering,
	};

	type BitmapElement = u64;
	const BITMAP_ELEMENT_SIZE: usize = 64;
	const MAX_RANGE_GAP: u32 = 2048;

	/// Represents an ordered set of code points that begin at [CharSetRange.start]. The largest
	/// allowed discontinuity between two consecutive code points in the set is [MAX_RANGE_GAP].
	#[derive(Debug, Clone)]
	struct CharSetRange {
	start: u32,
	bitmap: Vec<BitmapElement>,
	}

	impl CharSetRange {
	fn new() -> CharSetRange {
	CharSetRange { start: 0, bitmap: vec![] }
	}

	fn start(&self) -> u32 {
	self.start
	}

	fn end(&self) -> u32 {
	self.start + (self.bitmap.len() * BITMAP_ELEMENT_SIZE) as u32
	}

	fn is_empty(&self) -> bool {
	self.bitmap.is_empty()
	}

	fn add(&mut self, val: u32) {
	assert!(val >= self.start);

	if self.bitmap.is_empty() {
	self.start = val;
	}

	let pos = (val - self.start) as usize;
	let element_pos = pos / BITMAP_ELEMENT_SIZE;

	if element_pos >= self.bitmap.len() {
	self.bitmap.resize(element_pos + 1, 0);
	}

	self.bitmap[element_pos] \|= 1 << (pos % BITMAP_ELEMENT_SIZE);
	}

	fn contains(&self, c: u32) -> bool {
	if c < self.start \|\| c >= self.end() {
	false
	} else {
	let index = c as usize - self.start as usize;
	(self.bitmap[index / 64] & (1 << (index % 64))) > 0
	}
	}
	}

	/// Represents an ordered set of code points.
	///
	/// TODO(kpozin): Evaluate replacing with `MultiCharRange`, which might be more space-efficient for
	/// large sets with few discontinuities.
	#[derive(Debug, Clone)]
	pub struct CharSet {
	ranges: Vec<CharSetRange>,
	}

	impl CharSet {
	pub fn new(mut code_points: Vec<u32>) -> CharSet {
	code_points.sort_unstable();

	let mut ranges = vec![];
	let mut range = CharSetRange::new();
	for c in code_points {
	if c != 0 && !range.is_empty() && c >= range.end() + MAX_RANGE_GAP {
	ranges.push(range);
	range = CharSetRange::new();
	}
	range.add(c);
	}
	if !range.is_empty() {
	ranges.push(range)
	}
	CharSet { ranges }
	}

	pub fn from_string(s: String) -> Result<CharSet, Error> {
	let mut code_points: Vec<u32> = vec![];
	let mut prev: u32 = 0;
	for range in s.split(',').filter(\|x\| !x.is_empty()) {
	let mut split = range.split('+');

	let offset: u32 = match split.next() {
	Some(off) => off
	.parse()
	.or_else(\|_\| Err(format_err!("Failed to parse {:?} as u32.", off)))?,
	None => return Err(format_err!("Failed to parse {:?}: not a valid range.", range)),
	};

	let length: u32 = match split.next() {
	Some(len) => len
	.parse()
	.or_else(\|_\| Err(format_err!("Failed to parse {:?} as u32.", len)))?,
	None => 0, // We can treat "0,2,..." as "0+0,2+0,..."
	};

	if split.next().is_some() {
	return Err(format_err!("Failed to parse {:?}: not a valid range.", range));
	}

	let begin = prev + offset;
	prev = begin + length;
	code_points.extend(begin..=prev);
	}
	Ok(CharSet::new(code_points))
	}

	pub fn contains(&self, c: u32) -> bool {
	match self.ranges.binary_search_by(\|r\| {
	if r.end() < c {
	Ordering::Less
	} else if r.start() > c {
	Ordering::Greater
	} else {
	Ordering::Equal
	}
	}) {
	Ok(r) => self.ranges[r].contains(c),
	Err(_) => false,
	}
	}

	pub fn is_empty(&self) -> bool {
	self.ranges.is_empty()
	}
	}

	impl Default for CharSet {
	fn default() -> Self {
	CharSet::new(vec![])
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn test_charset() {
	let charset = CharSet::new(vec![1, 2, 3, 10, 500, 5000, 5001, 10000]);
	assert!(!charset.contains(0));
	assert!(charset.contains(1));
	assert!(charset.contains(2));
	assert!(charset.contains(3));
	assert!(!charset.contains(4));
	assert!(!charset.contains(9));
	assert!(charset.contains(10));
	assert!(charset.contains(500));
	assert!(!charset.contains(501));
	assert!(charset.contains(5000));
	assert!(charset.contains(5001));
	assert!(!charset.contains(5002));
	assert!(!charset.contains(9999));
	assert!(charset.contains(10000));
	assert!(!charset.contains(10001));
	}

	#[test]
	fn test_charset_from_string() -> Result<(), Error> {
	let s = "0,2,11,19+94".to_string();
	let charset = CharSet::from_string(s)?;
	assert!([0, 2, 13, 32, 54, 126].into_iter().all(\|c\| charset.contains(*c)));
	assert!([1, 11, 19, 127, 10000].into_iter().all(\|c\| !charset.contains(*c)));
	Ok(())
	}

	#[test]
	fn test_charset_from_string_not_a_number() {
	for s in &["0,p,11", "q+1", "3+r"] {
	assert!(CharSet::from_string(s.to_string()).is_err())
	}
	}
	}