blob: caccb13a9d74bfa8e6a262729bcbbf83e522a22b [file] [log] [blame]
use super::{add_padding, encode_to_slice, Config, LineEnding, LineWrap};
use super::line_wrap::line_wrap;
use std::cmp;
/// The output mechanism for ChunkedEncoder's encoded bytes.
pub trait Sink {
type Error;
/// Handle a chunk of encoded base64 data (as UTF-8 bytes)
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
}
#[derive(Debug, PartialEq)]
pub enum ChunkedEncoderError {
/// If wrapping is configured, the line length must be a multiple of 4, and must not be absurdly
/// large (see BUF_SIZE).
InvalidLineLength,
}
const BUF_SIZE: usize = 1024;
/// A base64 encoder that emits encoded bytes in chunks without heap allocation.
pub struct ChunkedEncoder {
config: Config,
max_input_chunk_len: usize,
}
impl ChunkedEncoder {
pub fn new(config: Config) -> Result<ChunkedEncoder, ChunkedEncoderError> {
Ok(ChunkedEncoder {
config,
max_input_chunk_len: max_input_length(BUF_SIZE, &config)?,
})
}
pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
let encode_table = self.config.char_set.encode_table();
let mut input_index = 0;
while input_index < bytes.len() {
// either the full input chunk size, or it's the last iteration
let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
let chunk = &bytes[input_index..(input_index + input_chunk_len)];
let mut b64_bytes_written = encode_to_slice(chunk, &mut encode_buf, encode_table);
input_index += input_chunk_len;
let more_input_left = input_index < bytes.len();
if self.config.pad && !more_input_left {
// no more input, add padding if needed. Buffer will have room because
// max_input_length leaves room for it.
b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
}
let line_ending_bytes = match self.config.line_wrap {
LineWrap::NoWrap => 0,
LineWrap::Wrap(line_len, line_ending) => {
let initial_line_ending_bytes =
line_wrap(&mut encode_buf, b64_bytes_written, line_len, line_ending);
if more_input_left {
assert_eq!(input_chunk_len, self.max_input_chunk_len);
// If there are more bytes of input, then we know we didn't just do the
// last chunk. line_wrap() doesn't put an ending after the last line, so we
// append one more line ending here. Since the chunk just encoded was not
// the last one, it was multiple of the line length (max_input_chunk_len),
// and therefore we can just put the line ending bytes at the end of the
// contents of the buffer.
match line_ending {
LineEnding::LF => {
encode_buf[b64_bytes_written + initial_line_ending_bytes] = b'\n';
initial_line_ending_bytes + 1
}
LineEnding::CRLF => {
encode_buf[b64_bytes_written + initial_line_ending_bytes] = b'\r';
encode_buf[b64_bytes_written + initial_line_ending_bytes + 1] =
b'\n';
initial_line_ending_bytes + 2
}
}
} else {
initial_line_ending_bytes
}
}
};
let total_bytes_written = b64_bytes_written + line_ending_bytes;
sink.write_encoded_bytes(&encode_buf[0..total_bytes_written])?;
}
Ok(())
}
}
/// Calculate the longest input that can be encoded for the given output buffer size.
///
/// If config requires line wrap, the calculated input length will be the maximum number of input
/// lines that can fit in the output buffer after each line has had its line ending appended.
///
/// If the config requires padding, two bytes of buffer space will be set aside so that the last
/// chunk of input can be encoded safely.
///
/// The input length will always be a multiple of 3 so that no encoding state has to be carried over
/// between chunks.
///
/// If the configured line length is not divisible by 4 (and therefore would require carrying
/// encoder state between chunks), or if the line length is too big for the buffer, an error will be
/// returned.
///
/// Note that the last overall line of input should *not* have an ending appended, but this will
/// conservatively calculate space as if it should because encoding is done in chunks, and all the
/// chunks before the last one will need a line ending after the last encoded line in that chunk.
fn max_input_length(encoded_buf_len: usize, config: &Config) -> Result<usize, ChunkedEncoderError> {
let effective_buf_len = if config.pad {
// make room for padding
encoded_buf_len
.checked_sub(2)
.expect("Don't use a tiny buffer")
} else {
encoded_buf_len
};
match config.line_wrap {
// No wrapping, no padding, so just normal base64 expansion.
LineWrap::NoWrap => Ok((effective_buf_len / 4) * 3),
LineWrap::Wrap(line_len, line_ending) => {
// To avoid complicated encode buffer shuffling, only allow line lengths that are
// multiples of 4 (which map to input lengths that are multiples of 3).
// line_len is never 0.
if line_len % 4 != 0 {
return Err(ChunkedEncoderError::InvalidLineLength);
}
let single_encoded_full_line_with_ending_len = line_len
.checked_add(line_ending.len())
.expect("Encoded line length with ending exceeds usize");
// max number of complete lines with endings that will fit in buffer
let num_encoded_wrapped_lines_in_buffer =
effective_buf_len / single_encoded_full_line_with_ending_len;
if num_encoded_wrapped_lines_in_buffer == 0 {
// line + ending is longer than can fit into encode buffer; give up
Err(ChunkedEncoderError::InvalidLineLength)
} else {
let input_len_for_line_len = (line_len / 4) * 3;
let input_len = input_len_for_line_len
.checked_mul(num_encoded_wrapped_lines_in_buffer)
.expect("Max input size exceeds usize");
assert!(input_len % 3 == 0 && input_len > 1);
Ok(input_len)
}
}
}
}
#[cfg(test)]
pub mod tests {
extern crate rand;
use super::super::*;
use super::super::tests::random_config;
use super::*;
use self::rand::Rng;
use self::rand::distributions::{IndependentSample, Range};
#[test]
fn chunked_encode_empty() {
assert_eq!("", chunked_encode_str(&[], STANDARD));
}
#[test]
fn chunked_encode_intermediate_fast_loop() {
// > 8 bytes input, will enter the pretty fast loop
assert_eq!(
"Zm9vYmFyYmF6cXV4",
chunked_encode_str(b"foobarbazqux", STANDARD)
);
}
#[test]
fn chunked_encode_fast_loop() {
// > 32 bytes input, will enter the uber fast loop
assert_eq!(
"Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==",
chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", STANDARD)
);
}
#[test]
fn chunked_encode_slow_loop_only() {
// < 8 bytes input, slow loop only
assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", STANDARD));
}
#[test]
fn chunked_encode_line_wrap_padding() {
// < 8 bytes input, slow loop only
let config = config_wrap(true, 4, LineEnding::LF);
assert_eq!(
"Zm9v\nYmFy\nZm9v\nYmFy\nZg==",
chunked_encode_str(b"foobarfoobarf", config)
);
}
#[test]
fn chunked_encode_longer_than_one_buffer_adds_final_line_wrap_lf() {
// longest line len possible
let config = config_wrap(false, 1020, LineEnding::LF);
let input = vec![0xFF; 768];
let encoded = chunked_encode_str(&input, config);
// got a line wrap
assert_eq!(1024 + 1, encoded.len());
for &b in encoded.as_bytes()[0..1020].iter() {
// ascii /
assert_eq!(47, b);
}
assert_eq!(10, encoded.as_bytes()[1020]);
for &b in encoded.as_bytes()[1021..].iter() {
// ascii /
assert_eq!(47, b);
}
}
#[test]
fn chunked_encode_longer_than_one_buffer_adds_final_line_wrap_crlf() {
// longest line len possible
let config = config_wrap(false, 1020, LineEnding::CRLF);
let input = vec![0xFF; 768];
let encoded = chunked_encode_str(&input, config);
// got a line wrap
assert_eq!(1024 + 2, encoded.len());
for &b in encoded.as_bytes()[0..1020].iter() {
// ascii /
assert_eq!(47, b);
}
assert_eq!(13, encoded.as_bytes()[1020]);
assert_eq!(10, encoded.as_bytes()[1021]);
for &b in encoded.as_bytes()[1022..].iter() {
// ascii /
assert_eq!(47, b);
}
}
#[test]
fn chunked_encode_matches_normal_encode_random_string_sink() {
let helper = StringSinkTestHelper;
chunked_encode_matches_normal_encode_random(&helper);
}
#[test]
fn max_input_length_no_wrap_no_pad() {
let config = config_no_wrap(false);
assert_eq!(768, max_input_length(1024, &config).unwrap());
}
#[test]
fn max_input_length_no_wrap_with_pad_decrements_one_triple() {
let config = config_no_wrap(true);
assert_eq!(765, max_input_length(1024, &config).unwrap());
}
#[test]
fn max_input_length_no_wrap_with_pad_one_byte_short() {
let config = config_no_wrap(true);
assert_eq!(765, max_input_length(1025, &config).unwrap());
}
#[test]
fn max_input_length_no_wrap_with_pad_fits_exactly() {
let config = config_no_wrap(true);
assert_eq!(768, max_input_length(1026, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_lf_fits_exactly_no_pad() {
// 10 * (72 + 1) = 730. 54 input bytes = 72 encoded bytes, + 1 for LF.
let config = config_wrap(false, 72, LineEnding::LF);
assert_eq!(540, max_input_length(730, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_lf_fits_one_spare_byte_no_pad() {
// 10 * (72 + 1) = 730. 54 input bytes = 72 encoded bytes, + 1 for LF.
let config = config_wrap(false, 72, LineEnding::LF);
assert_eq!(540, max_input_length(731, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_lf_size_one_byte_short_of_another_line_no_pad() {
// 10 * (72 + 1) = 730. 54 input bytes = 72 encoded bytes, + 1 for LF.
// 73 * 11 = 803
let config = config_wrap(false, 72, LineEnding::LF);
assert_eq!(540, max_input_length(802, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_lf_size_another_line_no_pad() {
// 10 * (72 + 1) = 730. 54 input bytes = 72 encoded bytes, + 1 for LF.
// 73 * 11 = 803
let config = config_wrap(false, 72, LineEnding::LF);
assert_eq!(594, max_input_length(803, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_lf_one_byte_short_with_pad() {
// one fewer input line
let config = config_wrap(true, 72, LineEnding::LF);
assert_eq!(486, max_input_length(731, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_lf_fits_exactly_with_pad() {
// 10 * (72 + 1) = 730. 54 input bytes = 72 encoded bytes, + 1 for LF.
let config = config_wrap(true, 72, LineEnding::LF);
assert_eq!(540, max_input_length(732, &config).unwrap());
}
#[test]
fn max_input_length_wrap_line_len_wont_fit_one_line_lf() {
// 300 bytes is 400 encoded, + 1 for LF
let config = config_wrap(false, 400, LineEnding::LF);
assert_eq!(
ChunkedEncoderError::InvalidLineLength,
max_input_length(400, &config).unwrap_err()
);
}
#[test]
fn max_input_length_wrap_line_len_just_fits_one_line_lf() {
// 300 bytes is 400 encoded, + 1 for LF
let config = Config::new(
CharacterSet::Standard,
false,
false,
LineWrap::Wrap(400, LineEnding::LF),
);
assert_eq!(300, max_input_length(401, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_crlf_fits_exactly_no_pad() {
// 10 * (72 + 2) = 740. 54 input bytes = 72 encoded bytes, + 2 for CRLF.
let config = config_wrap(false, 72, LineEnding::CRLF);
assert_eq!(540, max_input_length(740, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_crlf_fits_one_spare_byte_no_pad() {
// 10 * (72 + 2) = 740. 54 input bytes = 72 encoded bytes, + 2 for CRLF.
let config = config_wrap(false, 72, LineEnding::CRLF);
assert_eq!(540, max_input_length(741, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_crlf_size_one_byte_short_of_another_line_no_pad() {
// 10 * (72 + 2) = 740. 54 input bytes = 72 encoded bytes, + 2 for CRLF.
// 74 * 11 = 814
let config = config_wrap(false, 72, LineEnding::CRLF);
assert_eq!(540, max_input_length(813, &config).unwrap());
}
#[test]
fn max_input_length_wrap_with_crlf_size_another_line_no_pad() {
// 10 * (72 + 2) = 740. 54 input bytes = 72 encoded bytes, + 2 for CRLF.
// 74 * 11 = 814
let config = config_wrap(false, 72, LineEnding::CRLF);
assert_eq!(594, max_input_length(814, &config).unwrap());
}
#[test]
fn max_input_length_wrap_line_len_not_multiple_of_4_rejected() {
let config = config_wrap(false, 41, LineEnding::LF);
assert_eq!(
ChunkedEncoderError::InvalidLineLength,
max_input_length(400, &config).unwrap_err()
);
}
pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
let mut input_buf: Vec<u8> = Vec::new();
let mut output_buf = String::new();
let mut rng = rand::weak_rng();
let line_len_range = Range::new(1, 1020);
let input_len_range = Range::new(1, 10_000);
for _ in 0..5_000 {
input_buf.clear();
output_buf.clear();
let buf_len = input_len_range.ind_sample(&mut rng);
for _ in 0..buf_len {
input_buf.push(rng.gen());
}
let config = random_config_for_chunked_encoder(&mut rng, &line_len_range);
let chunk_encoded_string = sink_test_helper.encode_to_string(config, &input_buf);
encode_config_buf(&input_buf, config, &mut output_buf);
assert_eq!(
output_buf,
chunk_encoded_string,
"input len={}, config: pad={}, wrap={:?}",
buf_len,
config.pad,
config.line_wrap
);
}
}
fn chunked_encode_str(bytes: &[u8], config: Config) -> String {
let mut sink = StringSink::new();
{
let encoder = ChunkedEncoder::new(config).unwrap();
encoder.encode(bytes, &mut sink).unwrap();
}
return sink.string;
}
fn random_config_for_chunked_encoder<R: Rng>(
rng: &mut R,
line_len_range: &Range<usize>,
) -> Config {
loop {
let config = random_config(rng, line_len_range);
// only use a config with line_len that is divisible by 4
match config.line_wrap {
LineWrap::NoWrap => return config,
LineWrap::Wrap(line_len, _) => if line_len % 4 == 0 {
return config;
},
}
}
}
fn config_no_wrap(pad: bool) -> Config {
Config::new(CharacterSet::Standard, pad, false, LineWrap::NoWrap)
}
fn config_wrap(pad: bool, line_len: usize, line_ending: LineEnding) -> Config {
Config::new(
CharacterSet::Standard,
pad,
false,
LineWrap::Wrap(line_len, line_ending),
)
}
// An abstraction around sinks so that we can have tests that easily to any sink implementation
pub trait SinkTestHelper {
fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String;
}
// A really simple sink that just appends to a string for testing
struct StringSink {
string: String,
}
impl StringSink {
fn new() -> StringSink {
StringSink {
string: String::new(),
}
}
}
impl Sink for StringSink {
type Error = ();
fn write_encoded_bytes(&mut self, s: &[u8]) -> Result<(), Self::Error> {
self.string.push_str(str::from_utf8(s).unwrap());
Ok(())
}
}
struct StringSinkTestHelper;
impl SinkTestHelper for StringSinkTestHelper {
fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String {
let encoder = ChunkedEncoder::new(config).unwrap();
let mut sink = StringSink::new();
encoder.encode(bytes, &mut sink).unwrap();
sink.string
}
}
}