| use std::u16; |
| |
| use lzvalue::LZValue; |
| use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION, |
| get_distance_code, get_length_code}; |
| |
| /// The type used for representing how many times a literal, length or distance code has been ouput |
| /// to the current buffer. |
| /// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using |
| /// 16-bit values. |
| pub type FrequencyType = u16; |
| |
| /// The maximum number of literals/lengths in the buffer, which in practice also means the maximum |
| /// number of literals/lengths output before a new block is started. |
| /// This should not be larger than the maximum value `FrequencyType` can represent to prevent |
| /// overflowing (which would degrade, or in the worst case break compression). |
| pub const MAX_BUFFER_LENGTH: usize = 1024 * 31; |
| |
| #[derive(Debug, PartialEq)] |
| pub enum BufferStatus { |
| NotFull, |
| Full, |
| } |
| |
| /// Struct that buffers lz77 data and keeps track of the usage of different codes |
| pub struct DynamicWriter { |
| buffer: Vec<LZValue>, |
| // The two last length codes are not actually used, but only participates in code construction |
| // Therefore, we ignore them to get the correct number of lengths |
| frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS], |
| distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES], |
| } |
| |
| impl DynamicWriter { |
| #[inline] |
| pub fn check_buffer_length(&self) -> BufferStatus { |
| if self.buffer.len() >= MAX_BUFFER_LENGTH { |
| BufferStatus::Full |
| } else { |
| BufferStatus::NotFull |
| } |
| } |
| |
| #[inline] |
| pub fn write_literal(&mut self, literal: u8) -> BufferStatus { |
| debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH); |
| self.buffer.push(LZValue::literal(literal)); |
| self.frequencies[usize::from(literal)] += 1; |
| self.check_buffer_length() |
| } |
| |
| #[inline] |
| pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus { |
| self.buffer.push(LZValue::length_distance(length, distance)); |
| let l_code_num = get_length_code(length); |
| // As we limit the buffer to 2^16 values, this should be safe from overflowing. |
| if cfg!(debug_assertions) { |
| self.frequencies[l_code_num] += 1; |
| } else { |
| // #Safety |
| // None of the values in the table of length code numbers will give a value |
| // that is out of bounds. |
| // There is a test to ensure that these functions can not produce too large values. |
| unsafe { |
| *self.frequencies.get_unchecked_mut(l_code_num) += 1; |
| } |
| } |
| let d_code_num = get_distance_code(distance); |
| // The compiler seems to be able to evade the bounds check here somehow. |
| self.distance_frequencies[usize::from(d_code_num)] += 1; |
| self.check_buffer_length() |
| } |
| |
| pub fn buffer_length(&self) -> usize { |
| self.buffer.len() |
| } |
| |
| pub fn get_buffer(&self) -> &[LZValue] { |
| &self.buffer |
| } |
| |
| pub fn new() -> DynamicWriter { |
| let mut w = DynamicWriter { |
| buffer: Vec::with_capacity(MAX_BUFFER_LENGTH), |
| frequencies: [0; NUM_LITERALS_AND_LENGTHS], |
| distance_frequencies: [0; NUM_DISTANCE_CODES], |
| }; |
| // This will always be 1, |
| // since there will always only be one end of block marker in each block |
| w.frequencies[END_OF_BLOCK_POSITION] = 1; |
| w |
| } |
| |
| /// Special output function used with RLE compression |
| /// that avoids bothering to lookup a distance code. |
| #[inline] |
| pub fn write_length_rle(&mut self, length: u16) -> BufferStatus { |
| self.buffer.push(LZValue::length_distance(length, 1)); |
| let l_code_num = get_length_code(length); |
| // As we limit the buffer to 2^16 values, this should be safe from overflowing. |
| if cfg!(debug_assertions) { |
| self.frequencies[l_code_num] += 1; |
| } else { |
| // #Safety |
| // None of the values in the table of length code numbers will give a value |
| // that is out of bounds. |
| // There is a test to ensure that these functions won't produce too large values. |
| unsafe { |
| *self.frequencies.get_unchecked_mut(l_code_num) += 1; |
| } |
| } |
| self.distance_frequencies[0] += 1; |
| self.check_buffer_length() |
| } |
| |
| pub fn get_frequencies(&self) -> (&[u16], &[u16]) { |
| (&self.frequencies, &self.distance_frequencies) |
| } |
| |
| pub fn clear_frequencies(&mut self) { |
| self.frequencies = [0; NUM_LITERALS_AND_LENGTHS]; |
| self.distance_frequencies = [0; NUM_DISTANCE_CODES]; |
| self.frequencies[END_OF_BLOCK_POSITION] = 1; |
| } |
| |
| pub fn clear_data(&mut self) { |
| self.buffer.clear() |
| } |
| |
| pub fn clear(&mut self) { |
| self.clear_frequencies(); |
| self.clear_data(); |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::*; |
| use huffman_table::{get_length_code, get_distance_code}; |
| #[test] |
| /// Ensure that these function won't produce values that would overflow the output_writer |
| /// tables since we use some unsafe indexing. |
| fn array_bounds() { |
| let w = DynamicWriter::new(); |
| |
| for i in 0..u16::max_value() { |
| get_length_code(i) < w.frequencies.len(); |
| } |
| |
| for i in 0..u16::max_value() { |
| get_distance_code(i) < w.distance_frequencies.len() as u8; |
| } |
| } |
| } |