|  | //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) | 
|  | //! compression algorightm in pure rust. | 
|  | //! | 
|  | //! This library provides functions to compress data using the DEFLATE algorithm, | 
|  | //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or | 
|  | //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats. | 
|  | //! The current implementation is still a bit lacking speed-wise compared to C-libraries | 
|  | //! like zlib and miniz. | 
|  | //! | 
|  | //! The deflate algorithm is an older compression algorithm that is still widely used today, | 
|  | //! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip` | 
|  | //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing | 
|  | //! some extra metadata and a checksum to validate the integrity of the raw data. | 
|  | //! | 
|  | //! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as | 
|  | //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where | 
|  | //! the `DEFLATE` format (with or without wrappers) is not required. | 
|  | //! | 
|  | //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default, | 
|  | //! but can be enabled with the `gzip` feature. | 
|  | //! | 
|  | //! As this library is still in development, the compression output may change slightly | 
|  | //! between versions. | 
|  | //! | 
|  | //! | 
|  | //! # Examples: | 
|  | //! ## Simple compression function: | 
|  | //! ``` rust | 
|  | //! use deflate::deflate_bytes; | 
|  | //! | 
|  | //! let data = b"Some data"; | 
|  | //! let compressed = deflate_bytes(data); | 
|  | //! # let _ = compressed; | 
|  | //! ``` | 
|  | //! | 
|  | //! ## Using a writer: | 
|  | //! ``` rust | 
|  | //! use std::io::Write; | 
|  | //! | 
|  | //! use deflate::Compression; | 
|  | //! use deflate::write::ZlibEncoder; | 
|  | //! | 
|  | //! let data = b"This is some test data"; | 
|  | //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); | 
|  | //! encoder.write_all(data).expect("Write error!"); | 
|  | //! let compressed_data = encoder.finish().expect("Failed to finish compression!"); | 
|  | //! # let _ = compressed_data; | 
|  | //! ``` | 
|  |  | 
|  | #![cfg_attr(all(feature = "benchmarks", test), feature(test))] | 
|  |  | 
|  | #[cfg(all(test, feature = "benchmarks"))] | 
|  | extern crate test as test_std; | 
|  |  | 
|  | #[cfg(test)] | 
|  | extern crate flate2; | 
|  | // #[cfg(test)] | 
|  | // extern crate inflate; | 
|  |  | 
|  | extern crate adler32; | 
|  | extern crate byteorder; | 
|  | #[cfg(feature = "gzip")] | 
|  | extern crate gzip_header; | 
|  |  | 
|  | mod compression_options; | 
|  | mod huffman_table; | 
|  | mod lz77; | 
|  | mod lzvalue; | 
|  | mod chained_hash_table; | 
|  | mod length_encode; | 
|  | mod output_writer; | 
|  | mod stored_block; | 
|  | mod huffman_lengths; | 
|  | mod zlib; | 
|  | mod checksum; | 
|  | mod bit_reverse; | 
|  | mod bitstream; | 
|  | mod encoder_state; | 
|  | mod matching; | 
|  | mod input_buffer; | 
|  | mod deflate_state; | 
|  | mod compress; | 
|  | mod rle; | 
|  | mod writer; | 
|  | #[cfg(test)] | 
|  | mod test_utils; | 
|  |  | 
|  | use std::io::Write; | 
|  | use std::io; | 
|  |  | 
|  | use byteorder::BigEndian; | 
|  | #[cfg(feature = "gzip")] | 
|  | use gzip_header::GzBuilder; | 
|  | #[cfg(feature = "gzip")] | 
|  | use gzip_header::Crc; | 
|  | #[cfg(feature = "gzip")] | 
|  | use byteorder::LittleEndian; | 
|  |  | 
|  | use checksum::RollingChecksum; | 
|  | use deflate_state::DeflateState; | 
|  |  | 
|  | pub use compression_options::{CompressionOptions, SpecialOptions, Compression}; | 
|  | use compress::Flush; | 
|  | pub use lz77::MatchingType; | 
|  |  | 
|  | use writer::compress_until_done; | 
|  |  | 
|  | /// Encoders implementing a `Write` interface. | 
|  | pub mod write { | 
|  | pub use writer::{DeflateEncoder, ZlibEncoder}; | 
|  | #[cfg(feature = "gzip")] | 
|  | pub use writer::gzip::GzEncoder; | 
|  | } | 
|  |  | 
|  |  | 
|  | fn compress_data_dynamic<RC: RollingChecksum, W: Write>( | 
|  | input: &[u8], | 
|  | writer: &mut W, | 
|  | mut checksum: RC, | 
|  | compression_options: CompressionOptions, | 
|  | ) -> io::Result<()> { | 
|  | checksum.update_from_slice(input); | 
|  | // We use a box here to avoid putting the buffers on the stack | 
|  | // It's done here rather than in the structs themselves for now to | 
|  | // keep the data close in memory. | 
|  | let mut deflate_state = Box::new(DeflateState::new(compression_options, writer)); | 
|  | compress_until_done(input, &mut deflate_state, Flush::Finish) | 
|  | } | 
|  |  | 
|  | /// Compress the given slice of bytes with DEFLATE compression. | 
|  | /// | 
|  | /// Returns a `Vec<u8>` of the compressed data. | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// ``` | 
|  | /// use deflate::{deflate_bytes_conf, Compression}; | 
|  | /// | 
|  | /// let data = b"This is some test data"; | 
|  | /// let compressed_data = deflate_bytes_conf(data, Compression::Best); | 
|  | /// # let _ = compressed_data; | 
|  | /// ``` | 
|  | pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> { | 
|  | let mut writer = Vec::with_capacity(input.len() / 3); | 
|  | compress_data_dynamic( | 
|  | input, | 
|  | &mut writer, | 
|  | checksum::NoChecksum::new(), | 
|  | options.into(), | 
|  | ).expect("Write error!"); | 
|  | writer | 
|  | } | 
|  |  | 
|  | /// Compress the given slice of bytes with DEFLATE compression using the default compression | 
|  | /// level. | 
|  | /// | 
|  | /// Returns a `Vec<u8>` of the compressed data. | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// ``` | 
|  | /// use deflate::deflate_bytes; | 
|  | /// | 
|  | /// let data = b"This is some test data"; | 
|  | /// let compressed_data = deflate_bytes(data); | 
|  | /// # let _ = compressed_data; | 
|  | /// ``` | 
|  | pub fn deflate_bytes(input: &[u8]) -> Vec<u8> { | 
|  | deflate_bytes_conf(input, Compression::Default) | 
|  | } | 
|  |  | 
|  | /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer. | 
|  | /// | 
|  | /// Returns a `Vec<u8>` of the compressed data. | 
|  | /// | 
|  | /// Zlib dictionaries are not yet suppored. | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// ``` | 
|  | /// use deflate::{deflate_bytes_zlib_conf, Compression}; | 
|  | /// | 
|  | /// let data = b"This is some test data"; | 
|  | /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best); | 
|  | /// # let _ = compressed_data; | 
|  | /// ``` | 
|  | pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> { | 
|  | use byteorder::WriteBytesExt; | 
|  | let mut writer = Vec::with_capacity(input.len() / 3); | 
|  | // Write header | 
|  | zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default) | 
|  | .expect("Write error when writing zlib header!"); | 
|  |  | 
|  | let mut checksum = checksum::Adler32Checksum::new(); | 
|  | compress_data_dynamic(input, &mut writer, &mut checksum, options.into()) | 
|  | .expect("Write error when writing compressed data!"); | 
|  |  | 
|  | let hash = checksum.current_hash(); | 
|  |  | 
|  | writer | 
|  | .write_u32::<BigEndian>(hash) | 
|  | .expect("Write error when writing checksum!"); | 
|  | writer | 
|  | } | 
|  |  | 
|  | /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer, | 
|  | /// using the default compression level. | 
|  | /// | 
|  | /// Returns a Vec<u8> of the compressed data. | 
|  | /// | 
|  | /// Zlib dictionaries are not yet suppored. | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// ``` | 
|  | /// use deflate::deflate_bytes_zlib; | 
|  | /// | 
|  | /// let data = b"This is some test data"; | 
|  | /// let compressed_data = deflate_bytes_zlib(data); | 
|  | /// # let _ = compressed_data; | 
|  | /// ``` | 
|  | pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> { | 
|  | deflate_bytes_zlib_conf(input, Compression::Default) | 
|  | } | 
|  |  | 
|  | /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer | 
|  | /// using the given gzip header and compression options. | 
|  | /// | 
|  | /// Returns a `Vec<u8>` of the compressed data. | 
|  | /// | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// ``` | 
|  | /// extern crate gzip_header; | 
|  | /// extern crate deflate; | 
|  | /// | 
|  | /// # fn main() { | 
|  | /// use deflate::{deflate_bytes_gzip_conf, Compression}; | 
|  | /// use gzip_header::GzBuilder; | 
|  | /// | 
|  | /// let data = b"This is some test data"; | 
|  | /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new()); | 
|  | /// # let _ = compressed_data; | 
|  | /// # } | 
|  | /// ``` | 
|  | #[cfg(feature = "gzip")] | 
|  | pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( | 
|  | input: &[u8], | 
|  | options: O, | 
|  | gzip_header: GzBuilder, | 
|  | ) -> Vec<u8> { | 
|  | use byteorder::WriteBytesExt; | 
|  | let mut writer = Vec::with_capacity(input.len() / 3); | 
|  |  | 
|  | // Write header | 
|  | writer | 
|  | .write_all(&gzip_header.into_header()) | 
|  | .expect("Write error when writing header!"); | 
|  | let mut checksum = checksum::NoChecksum::new(); | 
|  | compress_data_dynamic(input, &mut writer, &mut checksum, options.into()) | 
|  | .expect("Write error when writing compressed data!"); | 
|  |  | 
|  | let mut crc = Crc::new(); | 
|  | crc.update(input); | 
|  |  | 
|  | writer | 
|  | .write_u32::<LittleEndian>(crc.sum()) | 
|  | .expect("Write error when writing checksum!"); | 
|  | writer | 
|  | .write_u32::<LittleEndian>(crc.amt_as_u32()) | 
|  | .expect("Write error when writing amt!"); | 
|  | writer | 
|  | } | 
|  |  | 
|  | /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer, | 
|  | /// using the default compression level, and a gzip header with default values. | 
|  | /// | 
|  | /// Returns a `Vec<u8>` of the compressed data. | 
|  | /// | 
|  | /// | 
|  | /// # Examples | 
|  | /// | 
|  | /// ``` | 
|  | /// use deflate::deflate_bytes_gzip; | 
|  | /// let data = b"This is some test data"; | 
|  | /// let compressed_data = deflate_bytes_gzip(data); | 
|  | /// # let _ = compressed_data; | 
|  | /// ``` | 
|  | #[cfg(feature = "gzip")] | 
|  | pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> { | 
|  | deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new()) | 
|  | } | 
|  |  | 
|  | #[cfg(test)] | 
|  | mod test { | 
|  | use super::*; | 
|  | use std::io::Write; | 
|  |  | 
|  | use test_utils::{get_test_data, decompress_to_end, decompress_zlib}; | 
|  | #[cfg(feature = "gzip")] | 
|  | use test_utils::decompress_gzip; | 
|  |  | 
|  | type CO = CompressionOptions; | 
|  |  | 
|  | /// Write data to the writer in chunks of chunk_size. | 
|  | fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) { | 
|  | for chunk in data.chunks(chunk_size) { | 
|  | writer.write_all(&chunk).unwrap(); | 
|  | } | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn dynamic_string_mem() { | 
|  | let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes(); | 
|  | let compressed = deflate_bytes(&test_data); | 
|  |  | 
|  | assert!(compressed.len() < test_data.len()); | 
|  |  | 
|  | let result = decompress_to_end(&compressed); | 
|  | assert_eq!(test_data, result); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn dynamic_string_file() { | 
|  | let input = get_test_data(); | 
|  | let compressed = deflate_bytes(&input); | 
|  |  | 
|  | let result = decompress_to_end(&compressed); | 
|  | for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() { | 
|  | if a != b { | 
|  | println!("First difference at {}, input: {}, output: {}", n, a, b); | 
|  | println!( | 
|  | "input: {:?}, output: {:?}", | 
|  | &input[n - 3..n + 3], | 
|  | &result[n - 3..n + 3] | 
|  | ); | 
|  | break; | 
|  | } | 
|  | } | 
|  | // Not using assert_eq here deliberately to avoid massive amounts of output spam | 
|  | assert!(input == result); | 
|  | // Check that we actually managed to compress the input | 
|  | assert!(compressed.len() < input.len()); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn file_rle() { | 
|  | let input = get_test_data(); | 
|  | let compressed = deflate_bytes_conf(&input, CO::rle()); | 
|  |  | 
|  | let result = decompress_to_end(&compressed); | 
|  | assert!(input == result); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn file_zlib() { | 
|  | let test_data = get_test_data(); | 
|  |  | 
|  | let compressed = deflate_bytes_zlib(&test_data); | 
|  | // { | 
|  | //     use std::fs::File; | 
|  | //     use std::io::Write; | 
|  | //     let mut f = File::create("out.zlib").unwrap(); | 
|  | //     f.write_all(&compressed).unwrap(); | 
|  | // } | 
|  |  | 
|  | println!("file_zlib compressed(default) length: {}", compressed.len()); | 
|  |  | 
|  | let result = decompress_zlib(&compressed); | 
|  |  | 
|  | assert!(&test_data == &result); | 
|  | assert!(compressed.len() < test_data.len()); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn zlib_short() { | 
|  | let test_data = [10, 10, 10, 10, 10, 55]; | 
|  | roundtrip_zlib(&test_data, CO::default()); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn zlib_last_block() { | 
|  | let mut test_data = vec![22; 32768]; | 
|  | test_data.extend(&[5, 2, 55, 11, 12]); | 
|  | roundtrip_zlib(&test_data, CO::default()); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn deflate_short() { | 
|  | let test_data = [10, 10, 10, 10, 10, 55]; | 
|  | let compressed = deflate_bytes(&test_data); | 
|  |  | 
|  | let result = decompress_to_end(&compressed); | 
|  | assert_eq!(&test_data, result.as_slice()); | 
|  | // If block type and compression is selected correctly, this should only take 5 bytes. | 
|  | assert_eq!(compressed.len(), 5); | 
|  | } | 
|  |  | 
|  | #[cfg(feature = "gzip")] | 
|  | #[test] | 
|  | fn gzip() { | 
|  | let data = get_test_data(); | 
|  | let comment = b"Test"; | 
|  | let compressed = deflate_bytes_gzip_conf( | 
|  | &data, | 
|  | Compression::Default, | 
|  | GzBuilder::new().comment(&comment[..]), | 
|  | ); | 
|  | let (dec, decompressed) = decompress_gzip(&compressed); | 
|  | assert_eq!(dec.header().comment().unwrap(), comment); | 
|  | assert!(data == decompressed); | 
|  | } | 
|  |  | 
|  | fn chunk_test(chunk_size: usize, level: CompressionOptions) { | 
|  | let mut compressed = Vec::with_capacity(32000); | 
|  | let data = get_test_data(); | 
|  | { | 
|  | let mut compressor = write::ZlibEncoder::new(&mut compressed, level); | 
|  | chunked_write(&mut compressor, &data, chunk_size); | 
|  | compressor.finish().unwrap(); | 
|  | } | 
|  | let compressed2 = deflate_bytes_zlib_conf(&data, level); | 
|  | let res = decompress_zlib(&compressed); | 
|  | assert!(res == data); | 
|  | assert_eq!(compressed.len(), compressed2.len()); | 
|  | assert!(compressed == compressed2); | 
|  | } | 
|  |  | 
|  | fn writer_chunks_level(level: CompressionOptions) { | 
|  | use input_buffer::BUFFER_SIZE; | 
|  | let ct = |n| chunk_test(n, level); | 
|  | ct(1); | 
|  | ct(50); | 
|  | ct(400); | 
|  | ct(32768); | 
|  | ct(BUFFER_SIZE); | 
|  | ct(50000); | 
|  | ct((32768 * 2) + 258); | 
|  | } | 
|  |  | 
|  | #[ignore] | 
|  | #[test] | 
|  | /// Test the writer by inputing data in one chunk at the time. | 
|  | fn zlib_writer_chunks() { | 
|  | writer_chunks_level(CompressionOptions::default()); | 
|  | writer_chunks_level(CompressionOptions::fast()); | 
|  | writer_chunks_level(CompressionOptions::rle()); | 
|  | } | 
|  |  | 
|  | /// Check that the frequency values don't overflow. | 
|  | #[test] | 
|  | fn frequency_overflow() { | 
|  | let _ = deflate_bytes_conf( | 
|  | &vec![5; 100000], | 
|  | compression_options::CompressionOptions::default(), | 
|  | ); | 
|  | } | 
|  |  | 
|  | fn roundtrip_zlib(data: &[u8], level: CompressionOptions) { | 
|  | let compressed = deflate_bytes_zlib_conf(data, level); | 
|  | let res = decompress_zlib(&compressed); | 
|  | if data.len() <= 32 { | 
|  | assert_eq!(res, data, "Failed with level: {:?}", level); | 
|  | } else { | 
|  | assert!(res == data, "Failed with level: {:?}", level); | 
|  | } | 
|  | } | 
|  |  | 
|  | fn check_zero(level: CompressionOptions) { | 
|  | roundtrip_zlib(&[], level); | 
|  | } | 
|  |  | 
|  | /// Compress with an empty slice. | 
|  | #[test] | 
|  | fn empty_input() { | 
|  | check_zero(CompressionOptions::default()); | 
|  | check_zero(CompressionOptions::fast()); | 
|  | check_zero(CompressionOptions::rle()); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn one_and_two_values() { | 
|  | let one = &[1][..]; | 
|  | roundtrip_zlib(one, CO::rle()); | 
|  | roundtrip_zlib(one, CO::fast()); | 
|  | roundtrip_zlib(one, CO::default()); | 
|  | let two = &[5, 6, 7, 8][..]; | 
|  | roundtrip_zlib(two, CO::rle()); | 
|  | roundtrip_zlib(two, CO::fast()); | 
|  | roundtrip_zlib(two, CO::default()); | 
|  | } | 
|  |  | 
|  |  | 
|  | } |