third_party/rust_crates/vendor/deflate/src/chained_hash_table.rs - fuchsia - Git at Google

 //use deflate_state::DebugCounter;
 use std::{mem, ptr};

 pub const WINDOW_SIZE: usize = 32768;
 pub const WINDOW_MASK: usize = WINDOW_SIZE - 1;
 #[cfg(test)]
 pub const HASH_BYTES: usize = 3;
 const HASH_SHIFT: u16 = 5;
 const HASH_MASK: u16 = WINDOW_MASK as u16;

 /// Helper struct to let us allocate both head and prev in the same block.
 struct Tables {
     /// Starts of hash chains (in prev)
     pub head: [u16; WINDOW_SIZE],
     /// Link to previous occurence of this hash value
     pub prev: [u16; WINDOW_SIZE],
 }

 impl Default for Tables {
     #[inline]
     fn default() -> Tables {
         // # Unsafe
         // This struct is not public and is only used in this module, and
         // the values are immediately filled in after this struct is
         // created.
         unsafe {
             Tables {
                 head: mem::uninitialized(),
                 prev: mem::uninitialized(),
             }
         }
     }
 }

 impl Tables {
     fn fill_prev(&mut self) {
         assert_eq!(self.head.len(), self.prev.len());
         // # Unsafe
         //
         // The arrays are created with the same length statically, so this should be safe.
         // We use this rather than copy_from_slice as prev starts out unitialized.
         unsafe {
             ptr::copy_nonoverlapping(self.head.as_ptr(), self.prev.as_mut_ptr(), self.prev.len())
         }
     }
 }

 /// Create and box the hash chains.
 fn create_tables() -> Box<Tables> {
     // Using default here is a trick to get around the lack of box syntax on stable rust.
     //
     // Box::new([0u16,n]) ends up creating an temporary array on the stack which is not optimised
     // but using default, which simply calls `box value` internally allows us to get around this.
     //
     // We could use vec instead, but using a boxed array helps the compiler optimise
     // away bounds checks as `n & WINDOW_MASK < WINDOW_SIZE` will always be true.
     let mut t: Box<Tables> = Box::default();

     for (n, b) in t.head.iter_mut().enumerate() {
         // # Unsafe
         //
         // Using ptr::write here since the values are uninitialised.
         // u16 is a primitive and doesn't implement drop, so this would be safe either way.
         unsafe {
             ptr::write(b, n as u16);
         }
     }

     t.fill_prev();

     t
 }

 /// Returns a new hash value based on the previous value and the next byte
 #[inline]
 pub fn update_hash(current_hash: u16, to_insert: u8) -> u16 {
     update_hash_conf(current_hash, to_insert, HASH_SHIFT, HASH_MASK)
 }

 #[inline]
 fn update_hash_conf(current_hash: u16, to_insert: u8, shift: u16, mask: u16) -> u16 {
     ((current_hash << shift) ^ (to_insert as u16)) & mask
 }

 #[inline]
 fn reset_array(arr: &mut [u16; WINDOW_SIZE]) {
     for (n, b) in arr.iter_mut().enumerate() {
         *b = n as u16;
     }
 }

 pub struct ChainedHashTable {
     // Current running hash value of the last 3 bytes
     current_hash: u16,
     // Hash chains.
     c: Box<Tables>,
     // Used for testing
     // count: DebugCounter,
 }

 impl ChainedHashTable {
     pub fn new() -> ChainedHashTable {
         ChainedHashTable {
             current_hash: 0,
             c: create_tables(),
             //count: DebugCounter::default(),
         }
     }

     #[cfg(test)]
     pub fn from_starting_values(v1: u8, v2: u8) -> ChainedHashTable {
         let mut t = ChainedHashTable::new();
         t.current_hash = update_hash(t.current_hash, v1);
         t.current_hash = update_hash(t.current_hash, v2);
         t
     }

     /// Resets the hash value and hash chains
     pub fn reset(&mut self) {
         self.current_hash = 0;
         reset_array(&mut self.c.head);
         {
             let h = self.c.head;
             let mut c = self.c.prev;
             c[..].copy_from_slice(&h[..]);
         }
         /*if cfg!(debug_assertions) {
             self.count.reset();
         }*/
     }

     pub fn add_initial_hash_values(&mut self, v1: u8, v2: u8) {
         self.current_hash = update_hash(self.current_hash, v1);
         self.current_hash = update_hash(self.current_hash, v2);
     }

     /// Insert a byte into the hash table
     #[inline]
     pub fn add_hash_value(&mut self, position: usize, value: u8) {
         // Check that all bytes are input in order and at the correct positions.
         // Disabled for now as it breaks when sync flushing.
         /*debug_assert_eq!(
             position & WINDOW_MASK,
             self.count.get() as usize & WINDOW_MASK
         );*/
         debug_assert!(
             position < WINDOW_SIZE * 2,
             "Position is larger than 2 * window size! {}",
             position
         );
         // Storing the hash in a temporary variable here makes the compiler avoid the
         // bounds checks in this function.
         let new_hash = update_hash(self.current_hash, value);

         self.add_with_hash(position, new_hash);

         // Update the stored hash value with the new hash.
         self.current_hash = new_hash;
     }

     /// Directly set the current hash value
     #[inline]
     pub fn set_hash(&mut self, hash: u16) {
         self.current_hash = hash;
     }

     /// Update the tables directly, providing the hash.
     #[inline]
     pub fn add_with_hash(&mut self, position: usize, hash: u16) {
         /*if cfg!(debug_assertions) {
             self.count.add(1);
         }*/

         self.c.prev[position & WINDOW_MASK] = self.c.head[hash as usize];

         // Ignoring any bits over 16 here is deliberate, as we only concern ourselves about
         // where in the buffer (which is 64k bytes) we are referring to.
         self.c.head[hash as usize] = position as u16;
     }

     // Get the head of the hash chain for the current hash value
     #[cfg(test)]
     #[inline]
     pub fn current_head(&self) -> u16 {
         self.c.head[self.current_hash as usize]
     }

     #[inline]
     pub fn current_hash(&self) -> u16 {
         self.current_hash
     }

     #[inline]
     pub fn get_prev(&self, bytes: usize) -> u16 {
         self.c.prev[bytes & WINDOW_MASK]
     }

     #[cfg(test)]
     #[inline]
     pub fn farthest_next(&self, match_pos: usize, match_len: usize) -> usize {
         let to_check = match_len.saturating_sub(2);

         let mut n = 0;
         let mut smallest_prev =
             self.get_prev(match_pos);
         let mut smallest_pos = 0;
         while n < to_check {
             let prev =
                 self.get_prev(match_pos + n);
             if prev < smallest_prev {
                 smallest_prev = prev;
                 smallest_pos = n;
             }
             n += 1;
         }
         smallest_pos
     }

     #[inline]
     fn slide_value(b: u16, pos: u16, bytes: u16) -> u16 {
         if b >= bytes {
             b - bytes
         } else {
             pos
         }
     }

     #[inline]
     fn slide_table(table: &mut [u16; WINDOW_SIZE], bytes: u16) {
         for (n, b) in table.iter_mut().enumerate() {
             *b = ChainedHashTable::slide_value(*b, n as u16, bytes);
         }
     }

     pub fn slide(&mut self, bytes: usize) {
         /*if cfg!(debug_assertions) && bytes != WINDOW_SIZE {
             // This should only happen in tests in this file.
             self.count.reset();
         }*/
         ChainedHashTable::slide_table(&mut self.c.head, bytes as u16);
         ChainedHashTable::slide_table(&mut self.c.prev, bytes as u16);
     }
 }

 #[cfg(test)]
 pub fn filled_hash_table(data: &[u8]) -> ChainedHashTable {
     assert!(data.len() <= (WINDOW_SIZE * 2) + 2);
     let mut hash_table = ChainedHashTable::from_starting_values(data[0], data[1]);
     for (n, b) in data[2..].iter().enumerate() {
         hash_table.add_hash_value(n, *b);
     }
     hash_table
 }

 #[cfg(test)]
 mod test {
     use super::{filled_hash_table, ChainedHashTable};

     #[test]
     fn chained_hash() {
         use std::str;

         let test_string = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
                            eiusmod tempor. rum. incididunt ut labore et dolore magna aliqua. Ut \
                            enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi \
                            ut aliquip ex ea commodo consequat. rum. Duis aute irure dolor in \
                            reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
                            pariatur. Excepteur sint occaecat cupidatat non proident, sunt in \
                            culpa qui officia deserunt mollit anim id est laborum.";

         let test_data = test_string.as_bytes();

         let current_bytes = &test_data[test_data.len() - super::HASH_BYTES..test_data.len()];

         let num_iters = test_string
             .matches(str::from_utf8(current_bytes).unwrap())
             .count();

         let hash_table = filled_hash_table(test_data);

         // Test that the positions in the chain are valid
         let mut prev_value = hash_table.get_prev(hash_table.current_head() as usize) as usize;
         let mut count = 0;
         let mut current = hash_table.current_head() as usize;
         while current != prev_value {
             count += 1;
             current = prev_value;
             prev_value = hash_table.get_prev(prev_value) as usize;
         }
         // There should be at least as many occurences of the hash of the checked bytes as the
         // numbers of occurences of the checked bytes themselves. As the hashes are not large enough
         // to store 8 * 3 = 24 bits, there could be more with different input data.
         assert!(count >= num_iters);
     }

     #[test]
     fn table_unique() {
         let mut test_data = Vec::new();
         test_data.extend(0u8..255);
         test_data.extend(255u8..0);
         let hash_table = filled_hash_table(&test_data);
         let prev_pos = hash_table.get_prev(hash_table.current_head() as usize);
         // Since all sequences in the input are unique, there shouldn't be any previous values.
         assert_eq!(prev_pos, hash_table.current_hash());
     }

     #[test]
     fn table_slide() {
         use std::fs::File;
         use std::io::Read;

         let window_size = super::WINDOW_SIZE;
         let window_size16 = super::WINDOW_SIZE as u16;

         let mut input = Vec::new();

         let mut f = File::open("tests/pg11.txt").unwrap();

         f.read_to_end(&mut input).unwrap();

         let mut hash_table = filled_hash_table(&input[..window_size + 2]);

         for (n, b) in input[2..window_size + 2].iter().enumerate() {
             hash_table.add_hash_value(n + window_size, *b);
         }

         hash_table.slide(window_size);

         {
             let max_head = hash_table.c.head.iter().max().unwrap();
             // After sliding there should be no hashes referring to values
             // higher than the window size
             assert!(*max_head < window_size16);
             assert!(*max_head > 0);
             let pos = hash_table.get_prev(hash_table.current_head() as usize);
             // There should be a previous occurence since we inserted the data 3 times
             assert!(pos < window_size16);
             assert!(pos > 0);
         }

         for (n, b) in input[2..(window_size / 2)].iter().enumerate() {
             hash_table.add_hash_value(n + window_size, *b);
         }

         // There should hashes referring to values in the upper part of the input window
         // at this point
         let max_prev = hash_table.c.prev.iter().max().unwrap();
         assert!(*max_prev > window_size16);

         let mut pos = hash_table.current_head();
         // There should be a previous occurence since we inserted the data 3 times
         assert!(pos > window_size16);
         let end_byte = input[(window_size / 2) - 1 - 2];
         let mut iterations = 0;
         while pos > window_size16 && iterations < 5000 {
             assert_eq!(input[pos as usize & window_size - 1], end_byte);

             pos = hash_table.get_prev(pos as usize);
             iterations += 1;
         }
     }

     #[test]
     /// Ensure that the initial hash values are correct.
     fn initial_chains() {
         let t = ChainedHashTable::new();
         for (n, &b) in t.c.head.iter().enumerate() {
             assert_eq!(n, b as usize);
         }
         for (n, &b) in t.c.prev.iter().enumerate() {
             assert_eq!(n, b as usize);
         }
     }
 }
	//use deflate_state::DebugCounter;
	use std::{mem, ptr};

	pub const WINDOW_SIZE: usize = 32768;
	pub const WINDOW_MASK: usize = WINDOW_SIZE - 1;
	#[cfg(test)]
	pub const HASH_BYTES: usize = 3;
	const HASH_SHIFT: u16 = 5;
	const HASH_MASK: u16 = WINDOW_MASK as u16;

	/// Helper struct to let us allocate both head and prev in the same block.
	struct Tables {
	/// Starts of hash chains (in prev)
	pub head: [u16; WINDOW_SIZE],
	/// Link to previous occurence of this hash value
	pub prev: [u16; WINDOW_SIZE],
	}

	impl Default for Tables {
	#[inline]
	fn default() -> Tables {
	// # Unsafe
	// This struct is not public and is only used in this module, and
	// the values are immediately filled in after this struct is
	// created.
	unsafe {
	Tables {
	head: mem::uninitialized(),
	prev: mem::uninitialized(),
	}
	}
	}
	}

	impl Tables {
	fn fill_prev(&mut self) {
	assert_eq!(self.head.len(), self.prev.len());
	// # Unsafe
	//
	// The arrays are created with the same length statically, so this should be safe.
	// We use this rather than copy_from_slice as prev starts out unitialized.
	unsafe {
	ptr::copy_nonoverlapping(self.head.as_ptr(), self.prev.as_mut_ptr(), self.prev.len())
	}
	}
	}

	/// Create and box the hash chains.
	fn create_tables() -> Box<Tables> {
	// Using default here is a trick to get around the lack of box syntax on stable rust.
	//
	// Box::new([0u16,n]) ends up creating an temporary array on the stack which is not optimised
	// but using default, which simply calls `box value` internally allows us to get around this.
	//
	// We could use vec instead, but using a boxed array helps the compiler optimise
	// away bounds checks as `n & WINDOW_MASK < WINDOW_SIZE` will always be true.
	let mut t: Box<Tables> = Box::default();

	for (n, b) in t.head.iter_mut().enumerate() {
	// # Unsafe
	//
	// Using ptr::write here since the values are uninitialised.
	// u16 is a primitive and doesn't implement drop, so this would be safe either way.
	unsafe {
	ptr::write(b, n as u16);
	}
	}

	t.fill_prev();

	t
	}

	/// Returns a new hash value based on the previous value and the next byte
	#[inline]
	pub fn update_hash(current_hash: u16, to_insert: u8) -> u16 {
	update_hash_conf(current_hash, to_insert, HASH_SHIFT, HASH_MASK)
	}

	#[inline]
	fn update_hash_conf(current_hash: u16, to_insert: u8, shift: u16, mask: u16) -> u16 {
	((current_hash << shift) ^ (to_insert as u16)) & mask
	}

	#[inline]
	fn reset_array(arr: &mut [u16; WINDOW_SIZE]) {
	for (n, b) in arr.iter_mut().enumerate() {
	*b = n as u16;
	}
	}

	pub struct ChainedHashTable {
	// Current running hash value of the last 3 bytes
	current_hash: u16,
	// Hash chains.
	c: Box<Tables>,
	// Used for testing
	// count: DebugCounter,
	}

	impl ChainedHashTable {
	pub fn new() -> ChainedHashTable {
	ChainedHashTable {
	current_hash: 0,
	c: create_tables(),
	//count: DebugCounter::default(),
	}
	}

	#[cfg(test)]
	pub fn from_starting_values(v1: u8, v2: u8) -> ChainedHashTable {
	let mut t = ChainedHashTable::new();
	t.current_hash = update_hash(t.current_hash, v1);
	t.current_hash = update_hash(t.current_hash, v2);
	t
	}

	/// Resets the hash value and hash chains
	pub fn reset(&mut self) {
	self.current_hash = 0;
	reset_array(&mut self.c.head);
	{
	let h = self.c.head;
	let mut c = self.c.prev;
	c[..].copy_from_slice(&h[..]);
	}
	/*if cfg!(debug_assertions) {
	self.count.reset();
	}*/
	}

	pub fn add_initial_hash_values(&mut self, v1: u8, v2: u8) {
	self.current_hash = update_hash(self.current_hash, v1);
	self.current_hash = update_hash(self.current_hash, v2);
	}

	/// Insert a byte into the hash table
	#[inline]
	pub fn add_hash_value(&mut self, position: usize, value: u8) {
	// Check that all bytes are input in order and at the correct positions.
	// Disabled for now as it breaks when sync flushing.
	/*debug_assert_eq!(
	position & WINDOW_MASK,
	self.count.get() as usize & WINDOW_MASK
	);*/
	debug_assert!(
	position < WINDOW_SIZE * 2,
	"Position is larger than 2 * window size! {}",
	position
	);
	// Storing the hash in a temporary variable here makes the compiler avoid the
	// bounds checks in this function.
	let new_hash = update_hash(self.current_hash, value);

	self.add_with_hash(position, new_hash);

	// Update the stored hash value with the new hash.
	self.current_hash = new_hash;
	}

	/// Directly set the current hash value
	#[inline]
	pub fn set_hash(&mut self, hash: u16) {
	self.current_hash = hash;
	}

	/// Update the tables directly, providing the hash.
	#[inline]
	pub fn add_with_hash(&mut self, position: usize, hash: u16) {
	/*if cfg!(debug_assertions) {
	self.count.add(1);
	}*/

	self.c.prev[position & WINDOW_MASK] = self.c.head[hash as usize];

	// Ignoring any bits over 16 here is deliberate, as we only concern ourselves about
	// where in the buffer (which is 64k bytes) we are referring to.
	self.c.head[hash as usize] = position as u16;
	}

	// Get the head of the hash chain for the current hash value
	#[cfg(test)]
	#[inline]
	pub fn current_head(&self) -> u16 {
	self.c.head[self.current_hash as usize]
	}

	#[inline]
	pub fn current_hash(&self) -> u16 {
	self.current_hash
	}

	#[inline]
	pub fn get_prev(&self, bytes: usize) -> u16 {
	self.c.prev[bytes & WINDOW_MASK]
	}

	#[cfg(test)]
	#[inline]
	pub fn farthest_next(&self, match_pos: usize, match_len: usize) -> usize {
	let to_check = match_len.saturating_sub(2);

	let mut n = 0;
	let mut smallest_prev =
	self.get_prev(match_pos);
	let mut smallest_pos = 0;
	while n < to_check {
	let prev =
	self.get_prev(match_pos + n);
	if prev < smallest_prev {
	smallest_prev = prev;
	smallest_pos = n;
	}
	n += 1;
	}
	smallest_pos
	}

	#[inline]
	fn slide_value(b: u16, pos: u16, bytes: u16) -> u16 {
	if b >= bytes {
	b - bytes
	} else {
	pos
	}
	}

	#[inline]
	fn slide_table(table: &mut [u16; WINDOW_SIZE], bytes: u16) {
	for (n, b) in table.iter_mut().enumerate() {
	b = ChainedHashTable::slide_value(b, n as u16, bytes);
	}
	}

	pub fn slide(&mut self, bytes: usize) {
	/*if cfg!(debug_assertions) && bytes != WINDOW_SIZE {
	// This should only happen in tests in this file.
	self.count.reset();
	}*/
	ChainedHashTable::slide_table(&mut self.c.head, bytes as u16);
	ChainedHashTable::slide_table(&mut self.c.prev, bytes as u16);
	}
	}

	#[cfg(test)]
	pub fn filled_hash_table(data: &[u8]) -> ChainedHashTable {
	assert!(data.len() <= (WINDOW_SIZE * 2) + 2);
	let mut hash_table = ChainedHashTable::from_starting_values(data[0], data[1]);
	for (n, b) in data[2..].iter().enumerate() {
	hash_table.add_hash_value(n, *b);
	}
	hash_table
	}

	#[cfg(test)]
	mod test {
	use super::{filled_hash_table, ChainedHashTable};

	#[test]
	fn chained_hash() {
	use std::str;

	let test_string = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
	eiusmod tempor. rum. incididunt ut labore et dolore magna aliqua. Ut \
	enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi \
	ut aliquip ex ea commodo consequat. rum. Duis aute irure dolor in \
	reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
	pariatur. Excepteur sint occaecat cupidatat non proident, sunt in \
	culpa qui officia deserunt mollit anim id est laborum.";

	let test_data = test_string.as_bytes();

	let current_bytes = &test_data[test_data.len() - super::HASH_BYTES..test_data.len()];

	let num_iters = test_string
	.matches(str::from_utf8(current_bytes).unwrap())
	.count();

	let hash_table = filled_hash_table(test_data);

	// Test that the positions in the chain are valid
	let mut prev_value = hash_table.get_prev(hash_table.current_head() as usize) as usize;
	let mut count = 0;
	let mut current = hash_table.current_head() as usize;
	while current != prev_value {
	count += 1;
	current = prev_value;
	prev_value = hash_table.get_prev(prev_value) as usize;
	}
	// There should be at least as many occurences of the hash of the checked bytes as the
	// numbers of occurences of the checked bytes themselves. As the hashes are not large enough
	// to store 8 * 3 = 24 bits, there could be more with different input data.
	assert!(count >= num_iters);
	}

	#[test]
	fn table_unique() {
	let mut test_data = Vec::new();
	test_data.extend(0u8..255);
	test_data.extend(255u8..0);
	let hash_table = filled_hash_table(&test_data);
	let prev_pos = hash_table.get_prev(hash_table.current_head() as usize);
	// Since all sequences in the input are unique, there shouldn't be any previous values.
	assert_eq!(prev_pos, hash_table.current_hash());
	}

	#[test]
	fn table_slide() {
	use std::fs::File;
	use std::io::Read;

	let window_size = super::WINDOW_SIZE;
	let window_size16 = super::WINDOW_SIZE as u16;

	let mut input = Vec::new();

	let mut f = File::open("tests/pg11.txt").unwrap();

	f.read_to_end(&mut input).unwrap();

	let mut hash_table = filled_hash_table(&input[..window_size + 2]);

	for (n, b) in input[2..window_size + 2].iter().enumerate() {
	hash_table.add_hash_value(n + window_size, *b);
	}

	hash_table.slide(window_size);

	{
	let max_head = hash_table.c.head.iter().max().unwrap();
	// After sliding there should be no hashes referring to values
	// higher than the window size
	assert!(*max_head < window_size16);
	assert!(*max_head > 0);
	let pos = hash_table.get_prev(hash_table.current_head() as usize);
	// There should be a previous occurence since we inserted the data 3 times
	assert!(pos < window_size16);
	assert!(pos > 0);
	}

	for (n, b) in input[2..(window_size / 2)].iter().enumerate() {
	hash_table.add_hash_value(n + window_size, *b);
	}

	// There should hashes referring to values in the upper part of the input window
	// at this point
	let max_prev = hash_table.c.prev.iter().max().unwrap();
	assert!(*max_prev > window_size16);

	let mut pos = hash_table.current_head();
	// There should be a previous occurence since we inserted the data 3 times
	assert!(pos > window_size16);
	let end_byte = input[(window_size / 2) - 1 - 2];
	let mut iterations = 0;
	while pos > window_size16 && iterations < 5000 {
	assert_eq!(input[pos as usize & window_size - 1], end_byte);

	pos = hash_table.get_prev(pos as usize);
	iterations += 1;
	}
	}

	#[test]
	/// Ensure that the initial hash values are correct.
	fn initial_chains() {
	let t = ChainedHashTable::new();
	for (n, &b) in t.c.head.iter().enumerate() {
	assert_eq!(n, b as usize);
	}
	for (n, &b) in t.c.prev.iter().enumerate() {
	assert_eq!(n, b as usize);
	}
	}
	}