487 lines
14 KiB
Rust
487 lines
14 KiB
Rust
//! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
|
|
//! compression algorithm in pure Rust.
|
|
//!
|
|
//! This library provides functions to compress data using the DEFLATE algorithm,
|
|
//! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
|
|
//! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
|
|
//! The current implementation is still a bit lacking speed-wise compared to C-libraries
|
|
//! like zlib and miniz.
|
|
//!
|
|
//! The deflate algorithm is an older compression algorithm that is still widely used today,
|
|
//! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip`
|
|
//! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
|
|
//! some extra metadata and a checksum to validate the integrity of the raw data.
|
|
//!
|
|
//! The deflate algorithm does not perform as well as newer algorithms used in file formats such as
|
|
//! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
|
|
//! the `DEFLATE` format (with or without wrappers) is not required.
|
|
//!
|
|
//! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default
|
|
//! but can be enabled with the `gzip` feature.
|
|
//!
|
|
//! As this library is still in development, the compression output may change slightly
|
|
//! between versions.
|
|
//!
|
|
//!
|
|
//! # Examples:
|
|
//! ## Simple compression function:
|
|
//! ``` rust
|
|
//! use deflate::deflate_bytes;
|
|
//!
|
|
//! let data = b"Some data";
|
|
//! let compressed = deflate_bytes(data);
|
|
//! # let _ = compressed;
|
|
//! ```
|
|
//!
|
|
//! ## Using a writer:
|
|
//! ``` rust
|
|
//! use std::io::Write;
|
|
//!
|
|
//! use deflate::Compression;
|
|
//! use deflate::write::ZlibEncoder;
|
|
//!
|
|
//! let data = b"This is some test data";
|
|
//! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
|
|
//! encoder.write_all(data).expect("Write error!");
|
|
//! let compressed_data = encoder.finish().expect("Failed to finish compression!");
|
|
//! # let _ = compressed_data;
|
|
//! ```
|
|
|
|
#![forbid(unsafe_code)]
|
|
#![cfg_attr(all(feature = "benchmarks", test), feature(test))]
|
|
|
|
#[cfg(all(test, feature = "benchmarks"))]
|
|
extern crate test as test_std;
|
|
|
|
#[cfg(test)]
|
|
extern crate miniz_oxide;
|
|
|
|
extern crate adler32;
|
|
#[cfg(feature = "gzip")]
|
|
extern crate gzip_header;
|
|
|
|
mod bit_reverse;
|
|
mod bitstream;
|
|
mod chained_hash_table;
|
|
mod checksum;
|
|
mod compress;
|
|
mod compression_options;
|
|
mod deflate_state;
|
|
mod encoder_state;
|
|
mod huffman_lengths;
|
|
mod huffman_table;
|
|
mod input_buffer;
|
|
mod length_encode;
|
|
mod lz77;
|
|
mod lzvalue;
|
|
mod matching;
|
|
mod output_writer;
|
|
mod rle;
|
|
mod stored_block;
|
|
#[cfg(test)]
|
|
mod test_utils;
|
|
mod writer;
|
|
mod zlib;
|
|
|
|
use std::io;
|
|
use std::io::Write;
|
|
|
|
#[cfg(feature = "gzip")]
|
|
use gzip_header::Crc;
|
|
#[cfg(feature = "gzip")]
|
|
use gzip_header::GzBuilder;
|
|
|
|
use crate::checksum::RollingChecksum;
|
|
use crate::deflate_state::DeflateState;
|
|
|
|
use crate::compress::Flush;
|
|
pub use compression_options::{Compression, CompressionOptions, SpecialOptions};
|
|
pub use lz77::MatchingType;
|
|
|
|
use crate::writer::compress_until_done;
|
|
|
|
/// Encoders implementing a `Write` interface.
|
|
pub mod write {
|
|
#[cfg(feature = "gzip")]
|
|
pub use crate::writer::gzip::GzEncoder;
|
|
pub use crate::writer::{DeflateEncoder, ZlibEncoder};
|
|
}
|
|
|
|
fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
|
|
input: &[u8],
|
|
writer: &mut W,
|
|
mut checksum: RC,
|
|
compression_options: CompressionOptions,
|
|
) -> io::Result<()> {
|
|
checksum.update_from_slice(input);
|
|
// We use a box here to avoid putting the buffers on the stack
|
|
// It's done here rather than in the structs themselves for now to
|
|
// keep the data close in memory.
|
|
let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
|
|
compress_until_done(input, &mut deflate_state, Flush::Finish)
|
|
}
|
|
|
|
/// Compress the given slice of bytes with DEFLATE compression.
|
|
///
|
|
/// Returns a `Vec<u8>` of the compressed data.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use deflate::{deflate_bytes_conf, Compression};
|
|
///
|
|
/// let data = b"This is some test data";
|
|
/// let compressed_data = deflate_bytes_conf(data, Compression::Best);
|
|
/// # let _ = compressed_data;
|
|
/// ```
|
|
pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
|
|
let mut writer = Vec::with_capacity(input.len() / 3);
|
|
compress_data_dynamic(
|
|
input,
|
|
&mut writer,
|
|
checksum::NoChecksum::new(),
|
|
options.into(),
|
|
)
|
|
.expect("Write error!");
|
|
writer
|
|
}
|
|
|
|
/// Compress the given slice of bytes with DEFLATE compression using the default compression
|
|
/// level.
|
|
///
|
|
/// Returns a `Vec<u8>` of the compressed data.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use deflate::deflate_bytes;
|
|
///
|
|
/// let data = b"This is some test data";
|
|
/// let compressed_data = deflate_bytes(data);
|
|
/// # let _ = compressed_data;
|
|
/// ```
|
|
pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
|
|
deflate_bytes_conf(input, Compression::Default)
|
|
}
|
|
|
|
/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
|
|
///
|
|
/// Returns a `Vec<u8>` of the compressed data.
|
|
///
|
|
/// Zlib dictionaries are not yet suppored.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use deflate::{deflate_bytes_zlib_conf, Compression};
|
|
///
|
|
/// let data = b"This is some test data";
|
|
/// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
|
|
/// # let _ = compressed_data;
|
|
/// ```
|
|
pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
|
|
let mut writer = Vec::with_capacity(input.len() / 3);
|
|
// Write header
|
|
zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
|
|
.expect("Write error when writing zlib header!");
|
|
|
|
let mut checksum = checksum::Adler32Checksum::new();
|
|
compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
|
|
.expect("Write error when writing compressed data!");
|
|
|
|
let hash = checksum.current_hash();
|
|
|
|
writer
|
|
.write_all(&hash.to_be_bytes())
|
|
.expect("Write error when writing checksum!");
|
|
writer
|
|
}
|
|
|
|
/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
|
|
/// using the default compression level.
|
|
///
|
|
/// Returns a Vec<u8> of the compressed data.
|
|
///
|
|
/// Zlib dictionaries are not yet suppored.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use deflate::deflate_bytes_zlib;
|
|
///
|
|
/// let data = b"This is some test data";
|
|
/// let compressed_data = deflate_bytes_zlib(data);
|
|
/// # let _ = compressed_data;
|
|
/// ```
|
|
pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
|
|
deflate_bytes_zlib_conf(input, Compression::Default)
|
|
}
|
|
|
|
/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
|
|
/// using the given gzip header and compression options.
|
|
///
|
|
/// Returns a `Vec<u8>` of the compressed data.
|
|
///
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// extern crate gzip_header;
|
|
/// extern crate deflate;
|
|
///
|
|
/// # fn main() {
|
|
/// use deflate::{deflate_bytes_gzip_conf, Compression};
|
|
/// use gzip_header::GzBuilder;
|
|
///
|
|
/// let data = b"This is some test data";
|
|
/// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
|
|
/// # let _ = compressed_data;
|
|
/// # }
|
|
/// ```
|
|
#[cfg(feature = "gzip")]
|
|
pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
|
|
input: &[u8],
|
|
options: O,
|
|
gzip_header: GzBuilder,
|
|
) -> Vec<u8> {
|
|
let mut writer = Vec::with_capacity(input.len() / 3);
|
|
|
|
// Write header
|
|
writer
|
|
.write_all(&gzip_header.into_header())
|
|
.expect("Write error when writing header!");
|
|
let mut checksum = checksum::NoChecksum::new();
|
|
compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
|
|
.expect("Write error when writing compressed data!");
|
|
|
|
let mut crc = Crc::new();
|
|
crc.update(input);
|
|
|
|
writer
|
|
.write_all(&crc.sum().to_le_bytes())
|
|
.expect("Write error when writing checksum!");
|
|
writer
|
|
.write_all(&crc.amt_as_u32().to_le_bytes())
|
|
.expect("Write error when writing amt!");
|
|
writer
|
|
}
|
|
|
|
/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
|
|
/// using the default compression level, and a gzip header with default values.
|
|
///
|
|
/// Returns a `Vec<u8>` of the compressed data.
|
|
///
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use deflate::deflate_bytes_gzip;
|
|
/// let data = b"This is some test data";
|
|
/// let compressed_data = deflate_bytes_gzip(data);
|
|
/// # let _ = compressed_data;
|
|
/// ```
|
|
#[cfg(feature = "gzip")]
|
|
pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
|
|
deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use std::io::Write;
|
|
|
|
#[cfg(feature = "gzip")]
|
|
use test_utils::decompress_gzip;
|
|
use test_utils::{decompress_to_end, decompress_zlib, get_test_data};
|
|
|
|
type CO = CompressionOptions;
|
|
|
|
/// Write data to the writer in chunks of chunk_size.
|
|
fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
|
|
for chunk in data.chunks(chunk_size) {
|
|
writer.write_all(&chunk).unwrap();
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn dynamic_string_mem() {
|
|
let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
|
|
let compressed = deflate_bytes(&test_data);
|
|
|
|
assert!(compressed.len() < test_data.len());
|
|
|
|
let result = decompress_to_end(&compressed);
|
|
assert_eq!(test_data, result);
|
|
}
|
|
|
|
#[test]
|
|
fn dynamic_string_file() {
|
|
let input = get_test_data();
|
|
let compressed = deflate_bytes(&input);
|
|
|
|
let result = decompress_to_end(&compressed);
|
|
for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
|
|
if a != b {
|
|
println!("First difference at {}, input: {}, output: {}", n, a, b);
|
|
println!(
|
|
"input: {:?}, output: {:?}",
|
|
&input[n - 3..n + 3],
|
|
&result[n - 3..n + 3]
|
|
);
|
|
break;
|
|
}
|
|
}
|
|
// Not using assert_eq here deliberately to avoid massive amounts of output spam
|
|
assert!(input == result);
|
|
// Check that we actually managed to compress the input
|
|
assert!(compressed.len() < input.len());
|
|
}
|
|
|
|
#[test]
|
|
fn file_rle() {
|
|
let input = get_test_data();
|
|
let compressed = deflate_bytes_conf(&input, CO::rle());
|
|
|
|
let result = decompress_to_end(&compressed);
|
|
assert!(input == result);
|
|
}
|
|
|
|
#[test]
|
|
fn file_zlib() {
|
|
let test_data = get_test_data();
|
|
|
|
let compressed = deflate_bytes_zlib(&test_data);
|
|
// {
|
|
// use std::fs::File;
|
|
// use std::io::Write;
|
|
// let mut f = File::create("out.zlib").unwrap();
|
|
// f.write_all(&compressed).unwrap();
|
|
// }
|
|
|
|
println!("file_zlib compressed(default) length: {}", compressed.len());
|
|
|
|
let result = decompress_zlib(&compressed);
|
|
|
|
assert!(&test_data == &result);
|
|
assert!(compressed.len() < test_data.len());
|
|
}
|
|
|
|
#[test]
|
|
fn zlib_short() {
|
|
let test_data = [10, 10, 10, 10, 10, 55];
|
|
roundtrip_zlib(&test_data, CO::default());
|
|
}
|
|
|
|
#[test]
|
|
fn zlib_last_block() {
|
|
let mut test_data = vec![22; 32768];
|
|
test_data.extend(&[5, 2, 55, 11, 12]);
|
|
roundtrip_zlib(&test_data, CO::default());
|
|
}
|
|
|
|
#[test]
|
|
fn deflate_short() {
|
|
let test_data = [10, 10, 10, 10, 10, 55];
|
|
let compressed = deflate_bytes(&test_data);
|
|
|
|
let result = decompress_to_end(&compressed);
|
|
assert_eq!(&test_data, result.as_slice());
|
|
// If block type and compression is selected correctly, this should only take 5 bytes.
|
|
assert_eq!(compressed.len(), 5);
|
|
}
|
|
|
|
#[cfg(feature = "gzip")]
|
|
#[test]
|
|
fn gzip() {
|
|
let data = get_test_data();
|
|
let comment = b"Test";
|
|
let compressed = deflate_bytes_gzip_conf(
|
|
&data,
|
|
Compression::Default,
|
|
GzBuilder::new().comment(&comment[..]),
|
|
);
|
|
let (dec, decompressed) = decompress_gzip(&compressed);
|
|
assert_eq!(dec.comment().unwrap(), comment);
|
|
assert!(data == decompressed);
|
|
}
|
|
|
|
fn chunk_test(chunk_size: usize, level: CompressionOptions) {
|
|
let mut compressed = Vec::with_capacity(32000);
|
|
let data = get_test_data();
|
|
{
|
|
let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
|
|
chunked_write(&mut compressor, &data, chunk_size);
|
|
compressor.finish().unwrap();
|
|
}
|
|
let compressed2 = deflate_bytes_zlib_conf(&data, level);
|
|
let res = decompress_zlib(&compressed);
|
|
assert!(res == data);
|
|
assert_eq!(compressed.len(), compressed2.len());
|
|
assert!(compressed == compressed2);
|
|
}
|
|
|
|
fn writer_chunks_level(level: CompressionOptions) {
|
|
use input_buffer::BUFFER_SIZE;
|
|
let ct = |n| chunk_test(n, level);
|
|
ct(1);
|
|
ct(50);
|
|
ct(400);
|
|
ct(32768);
|
|
ct(BUFFER_SIZE);
|
|
ct(50000);
|
|
ct((32768 * 2) + 258);
|
|
}
|
|
|
|
#[ignore]
|
|
#[test]
|
|
/// Test the writer by inputing data in one chunk at the time.
|
|
fn zlib_writer_chunks() {
|
|
writer_chunks_level(CompressionOptions::default());
|
|
writer_chunks_level(CompressionOptions::fast());
|
|
writer_chunks_level(CompressionOptions::rle());
|
|
}
|
|
|
|
/// Check that the frequency values don't overflow.
|
|
#[test]
|
|
fn frequency_overflow() {
|
|
let _ = deflate_bytes_conf(
|
|
&vec![5; 100000],
|
|
compression_options::CompressionOptions::default(),
|
|
);
|
|
}
|
|
|
|
fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
|
|
let compressed = deflate_bytes_zlib_conf(data, level);
|
|
let res = decompress_zlib(&compressed);
|
|
if data.len() <= 32 {
|
|
assert_eq!(res, data, "Failed with level: {:?}", level);
|
|
} else {
|
|
assert!(res == data, "Failed with level: {:?}", level);
|
|
}
|
|
}
|
|
|
|
fn check_zero(level: CompressionOptions) {
|
|
roundtrip_zlib(&[], level);
|
|
}
|
|
|
|
/// Compress with an empty slice.
|
|
#[test]
|
|
fn empty_input() {
|
|
check_zero(CompressionOptions::default());
|
|
check_zero(CompressionOptions::fast());
|
|
check_zero(CompressionOptions::rle());
|
|
}
|
|
|
|
#[test]
|
|
fn one_and_two_values() {
|
|
let one = &[1][..];
|
|
roundtrip_zlib(one, CO::rle());
|
|
roundtrip_zlib(one, CO::fast());
|
|
roundtrip_zlib(one, CO::default());
|
|
let two = &[5, 6, 7, 8][..];
|
|
roundtrip_zlib(two, CO::rle());
|
|
roundtrip_zlib(two, CO::fast());
|
|
roundtrip_zlib(two, CO::default());
|
|
}
|
|
}
|