blob: 663e2d3d601f67a36469693366a675d46e456ae9 [file] [log] [blame]
/*-
* Copyright (c) 2014 Michihiro NAKAJIMA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "archive_platform.h"
__FBSDID("$FreeBSD$");
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#include <stdio.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_LZ4_H
#include <lz4.h>
#endif
#include "archive.h"
#include "archive_endian.h"
#include "archive_private.h"
#include "archive_read_private.h"
#include "archive_xxhash.h"
#define LZ4_MAGICNUMBER 0x184d2204
#define LZ4_SKIPPABLED 0x184d2a50
#define LZ4_LEGACY 0x184c2102
#if defined(HAVE_LIBLZ4)
struct private_data {
enum { SELECT_STREAM,
READ_DEFAULT_STREAM,
READ_DEFAULT_BLOCK,
READ_LEGACY_STREAM,
READ_LEGACY_BLOCK,
} stage;
struct {
unsigned block_independence:1;
unsigned block_checksum:3;
unsigned stream_size:1;
unsigned stream_checksum:1;
unsigned preset_dictionary:1;
int block_maximum_size;
} flags;
int64_t stream_size;
uint32_t dict_id;
char *out_block;
size_t out_block_size;
/* Bytes read but not yet consumed via __archive_read_consume() */
size_t unconsumed;
size_t decoded_size;
void *xxh32_state;
char valid; /* True = decompressor is initialized */
char eof; /* True = found end of compressed data. */
};
#define LEGACY_BLOCK_SIZE (8 * 1024 * 1024)
/* Lz4 filter */
static ssize_t lz4_filter_read(struct archive_read_filter *, const void **);
static int lz4_filter_close(struct archive_read_filter *);
#endif
/*
* Note that we can detect lz4 archives even if we can't decompress
* them. (In fact, we like detecting them because we can give better
* error messages.) So the bid framework here gets compiled even
* if liblz4 is unavailable.
*/
static int lz4_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
static int lz4_reader_init(struct archive_read_filter *);
static int lz4_reader_free(struct archive_read_filter_bidder *);
#if defined(HAVE_LIBLZ4)
static ssize_t lz4_filter_read_default_stream(struct archive_read_filter *,
const void **);
static ssize_t lz4_filter_read_legacy_stream(struct archive_read_filter *,
const void **);
#endif
int
archive_read_support_filter_lz4(struct archive *_a)
{
struct archive_read *a = (struct archive_read *)_a;
struct archive_read_filter_bidder *reader;
archive_check_magic(_a, ARCHIVE_READ_MAGIC,
ARCHIVE_STATE_NEW, "archive_read_support_filter_lz4");
if (__archive_read_get_bidder(a, &reader) != ARCHIVE_OK)
return (ARCHIVE_FATAL);
reader->data = NULL;
reader->name = "lz4";
reader->bid = lz4_reader_bid;
reader->init = lz4_reader_init;
reader->options = NULL;
reader->free = lz4_reader_free;
#if defined(HAVE_LIBLZ4)
return (ARCHIVE_OK);
#else
archive_set_error(_a, ARCHIVE_ERRNO_MISC,
"Using external lz4 program");
return (ARCHIVE_WARN);
#endif
}
static int
lz4_reader_free(struct archive_read_filter_bidder *self){
(void)self; /* UNUSED */
return (ARCHIVE_OK);
}
/*
* Test whether we can handle this data.
*
* This logic returns zero if any part of the signature fails. It
* also tries to Do The Right Thing if a very short buffer prevents us
* from verifying as much as we would like.
*/
static int
lz4_reader_bid(struct archive_read_filter_bidder *self,
struct archive_read_filter *filter)
{
const unsigned char *buffer;
ssize_t avail;
int bits_checked;
uint32_t number;
(void)self; /* UNUSED */
/* Minimal lz4 archive is 11 bytes. */
buffer = __archive_read_filter_ahead(filter, 11, &avail);
if (buffer == NULL)
return (0);
/* First four bytes must be LZ4 magic numbers. */
bits_checked = 0;
if ((number = archive_le32dec(buffer)) == LZ4_MAGICNUMBER) {
unsigned char flag, BD;
bits_checked += 32;
/* Next follows a stream descriptor. */
/* Descriptor Flags. */
flag = buffer[4];
/* A version number must be "01". */
if (((flag & 0xc0) >> 6) != 1)
return (0);
/* A reserved bit must be "0". */
if (flag & 2)
return (0);
bits_checked += 8;
BD = buffer[5];
/* A block maximum size should be more than 3. */
if (((BD & 0x70) >> 4) < 4)
return (0);
/* Reserved bits must be "0". */
if (BD & ~0x70)
return (0);
bits_checked += 8;
} else if (number == LZ4_LEGACY) {
bits_checked += 32;
}
return (bits_checked);
}
#if !defined(HAVE_LIBLZ4)
/*
* If we don't have the library on this system, we can't actually do the
* decompression. We can, however, still detect compressed archives
* and emit a useful message.
*/
static int
lz4_reader_init(struct archive_read_filter *self)
{
int r;
r = __archive_read_program(self, "lz4 -d -q");
/* Note: We set the format here even if __archive_read_program()
* above fails. We do, after all, know what the format is
* even if we weren't able to read it. */
self->code = ARCHIVE_FILTER_LZ4;
self->name = "lz4";
return (r);
}
#else
/*
* Setup the callbacks.
*/
static int
lz4_reader_init(struct archive_read_filter *self)
{
struct private_data *state;
self->code = ARCHIVE_FILTER_LZ4;
self->name = "lz4";
state = (struct private_data *)calloc(sizeof(*state), 1);
if (state == NULL) {
archive_set_error(&self->archive->archive, ENOMEM,
"Can't allocate data for lz4 decompression");
return (ARCHIVE_FATAL);
}
self->data = state;
state->stage = SELECT_STREAM;
self->read = lz4_filter_read;
self->skip = NULL; /* not supported */
self->close = lz4_filter_close;
return (ARCHIVE_OK);
}
static int
lz4_allocate_out_block(struct archive_read_filter *self)
{
struct private_data *state = (struct private_data *)self->data;
size_t out_block_size = state->flags.block_maximum_size;
void *out_block;
if (!state->flags.block_independence)
out_block_size += 64 * 1024;
if (state->out_block_size < out_block_size) {
free(state->out_block);
out_block = (unsigned char *)malloc(out_block_size);
state->out_block_size = out_block_size;
if (out_block == NULL) {
archive_set_error(&self->archive->archive, ENOMEM,
"Can't allocate data for lz4 decompression");
return (ARCHIVE_FATAL);
}
state->out_block = out_block;
}
if (!state->flags.block_independence)
memset(state->out_block, 0, 64 * 1024);
return (ARCHIVE_OK);
}
static int
lz4_allocate_out_block_for_legacy(struct archive_read_filter *self)
{
struct private_data *state = (struct private_data *)self->data;
size_t out_block_size = LEGACY_BLOCK_SIZE;
void *out_block;
if (state->out_block_size < out_block_size) {
free(state->out_block);
out_block = (unsigned char *)malloc(out_block_size);
state->out_block_size = out_block_size;
if (out_block == NULL) {
archive_set_error(&self->archive->archive, ENOMEM,
"Can't allocate data for lz4 decompression");
return (ARCHIVE_FATAL);
}
state->out_block = out_block;
}
return (ARCHIVE_OK);
}
/*
* Return the next block of decompressed data.
*/
static ssize_t
lz4_filter_read(struct archive_read_filter *self, const void **p)
{
struct private_data *state = (struct private_data *)self->data;
ssize_t ret;
if (state->eof) {
*p = NULL;
return (0);
}
__archive_read_filter_consume(self->upstream, state->unconsumed);
state->unconsumed = 0;
switch (state->stage) {
case SELECT_STREAM:
break;
case READ_DEFAULT_STREAM:
case READ_LEGACY_STREAM:
/* Reading a lz4 stream already failed. */
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC, "Invalid sequence.");
return (ARCHIVE_FATAL);
case READ_DEFAULT_BLOCK:
ret = lz4_filter_read_default_stream(self, p);
if (ret != 0 || state->stage != SELECT_STREAM)
return ret;
break;
case READ_LEGACY_BLOCK:
ret = lz4_filter_read_legacy_stream(self, p);
if (ret != 0 || state->stage != SELECT_STREAM)
return ret;
break;
default:
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC, "Program error.");
return (ARCHIVE_FATAL);
break;
}
while (state->stage == SELECT_STREAM) {
const char *read_buf;
/* Read a magic number. */
read_buf = __archive_read_filter_ahead(self->upstream, 4,
NULL);
if (read_buf == NULL) {
state->eof = 1;
*p = NULL;
return (0);
}
uint32_t number = archive_le32dec(read_buf);
__archive_read_filter_consume(self->upstream, 4);
if (number == LZ4_MAGICNUMBER)
return lz4_filter_read_default_stream(self, p);
else if (number == LZ4_LEGACY)
return lz4_filter_read_legacy_stream(self, p);
else if ((number & ~0xF) == LZ4_SKIPPABLED) {
read_buf = __archive_read_filter_ahead(
self->upstream, 4, NULL);
if (read_buf == NULL) {
archive_set_error(
&self->archive->archive,
ARCHIVE_ERRNO_MISC,
"Malformed lz4 data");
return (ARCHIVE_FATAL);
}
uint32_t skip_bytes = archive_le32dec(read_buf);
__archive_read_filter_consume(self->upstream,
4 + skip_bytes);
} else {
/* Ignore following unrecognized data. */
state->eof = 1;
*p = NULL;
return (0);
}
}
state->eof = 1;
*p = NULL;
return (0);
}
static int
lz4_filter_read_descriptor(struct archive_read_filter *self)
{
struct private_data *state = (struct private_data *)self->data;
const char *read_buf;
ssize_t bytes_remaining;
ssize_t descriptor_bytes;
unsigned char flag, bd;
unsigned int chsum, chsum_verifier;
/* Make sure we have 2 bytes for flags. */
read_buf = __archive_read_filter_ahead(self->upstream, 2,
&bytes_remaining);
if (read_buf == NULL) {
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC,
"truncated lz4 input");
return (ARCHIVE_FATAL);
}
/*
Parse flags.
*/
flag = (unsigned char)read_buf[0];
/* Verify version number. */
if ((flag & 0xc0) != 1<<6)
goto malformed_error;
/* A reserved bit must be zero. */
if (flag & 0x02)
goto malformed_error;
state->flags.block_independence = (flag & 0x20) != 0;
state->flags.block_checksum = (flag & 0x10)?4:0;
state->flags.stream_size = (flag & 0x08) != 0;
state->flags.stream_checksum = (flag & 0x04) != 0;
state->flags.preset_dictionary = (flag & 0x01) != 0;
/* BD */
bd = (unsigned char)read_buf[1];
/* Reserved bits must be zero. */
if (bd & 0x8f)
goto malformed_error;
/* Get a maximum block size. */
switch (read_buf[1] >> 4) {
case 4: /* 64 KB */
state->flags.block_maximum_size = 64 * 1024;
break;
case 5: /* 256 KB */
state->flags.block_maximum_size = 256 * 1024;
break;
case 6: /* 1 MB */
state->flags.block_maximum_size = 1024 * 1024;
break;
case 7: /* 4 MB */
state->flags.block_maximum_size = 4 * 1024 * 1024;
break;
default:
goto malformed_error;
}
/* Read the whole descriptor in a stream block. */
descriptor_bytes = 3;
if (state->flags.stream_size)
descriptor_bytes += 8;
if (state->flags.preset_dictionary)
descriptor_bytes += 4;
if (bytes_remaining < descriptor_bytes) {
read_buf = __archive_read_filter_ahead(self->upstream,
descriptor_bytes, &bytes_remaining);
if (read_buf == NULL) {
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC,
"truncated lz4 input");
return (ARCHIVE_FATAL);
}
}
/* Check if a descriptor is corrupted */
chsum = __archive_xxhash.XXH32(read_buf, (int)descriptor_bytes -1, 0);
chsum = (chsum >> 8) & 0xff;
chsum_verifier = read_buf[descriptor_bytes-1] & 0xff;
if (chsum != chsum_verifier)
goto malformed_error;
__archive_read_filter_consume(self->upstream, descriptor_bytes);
/* Make sure we have an enough buffer for uncompressed data. */
if (lz4_allocate_out_block(self) != ARCHIVE_OK)
return (ARCHIVE_FATAL);
if (state->flags.stream_checksum)
state->xxh32_state = __archive_xxhash.XXH32_init(0);
state->decoded_size = 0;
/* Success */
return (ARCHIVE_OK);
malformed_error:
archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
"malformed lz4 data");
return (ARCHIVE_FATAL);
}
static ssize_t
lz4_filter_read_data_block(struct archive_read_filter *self, const void **p)
{
struct private_data *state = (struct private_data *)self->data;
ssize_t compressed_size;
const char *read_buf;
ssize_t bytes_remaining;
int checksum_size;
ssize_t uncompressed_size;
size_t prefix64k;
*p = NULL;
/* Make sure we have 4 bytes for a block size. */
read_buf = __archive_read_filter_ahead(self->upstream, 4,
&bytes_remaining);
if (read_buf == NULL)
goto truncated_error;
compressed_size = archive_le32dec(read_buf);
if ((compressed_size & ~(1 << 31)) > state->flags.block_maximum_size)
goto malformed_error;
/* A compressed size == 0 means the end of stream blocks. */
if (compressed_size == 0) {
__archive_read_filter_consume(self->upstream, 4);
return 0;
}
checksum_size = state->flags.block_checksum;
/* Check if the block is uncompressed. */
if (compressed_size & (1 << 31)) {
compressed_size &= ~(1 << 31);
uncompressed_size = compressed_size;
} else
uncompressed_size = 0;/* Unknown yet. */
/*
Unfortunately, lz4 decompression API requires a whole block
for its decompression speed, so we read a whole block and allocate
a huge buffer used for decoded data.
*/
read_buf = __archive_read_filter_ahead(self->upstream,
4 + compressed_size + checksum_size, &bytes_remaining);
if (read_buf == NULL)
goto truncated_error;
/* Optional process, checking a block sum. */
if (checksum_size) {
unsigned int chsum = __archive_xxhash.XXH32(
read_buf + 4, (int)compressed_size, 0);
unsigned int chsum_block =
archive_le32dec(read_buf + 4 + compressed_size);
if (chsum != chsum_block)
goto malformed_error;
}
/* If the block is uncompressed, there is nothing to do. */
if (uncompressed_size) {
/* Prepare a prefix 64k block for next block. */
if (!state->flags.block_independence) {
prefix64k = 64 * 1024;
if (uncompressed_size < (ssize_t)prefix64k) {
memcpy(state->out_block
+ prefix64k - uncompressed_size,
read_buf + 4,
uncompressed_size);
memset(state->out_block, 0,
prefix64k - uncompressed_size);
} else {
memcpy(state->out_block,
read_buf + 4
+ uncompressed_size - prefix64k,
prefix64k);
}
state->decoded_size = 0;
}
state->unconsumed = 4 + uncompressed_size + checksum_size;
*p = read_buf + 4;
return uncompressed_size;
}
/*
Decompress a block data.
*/
if (state->flags.block_independence) {
prefix64k = 0;
uncompressed_size = LZ4_decompress_safe(read_buf + 4,
state->out_block, (int)compressed_size,
state->flags.block_maximum_size);
} else {
prefix64k = 64 * 1024;
if (state->decoded_size) {
if (state->decoded_size < prefix64k) {
memmove(state->out_block
+ prefix64k - state->decoded_size,
state->out_block + prefix64k,
state->decoded_size);
memset(state->out_block, 0,
prefix64k - state->decoded_size);
} else {
memmove(state->out_block,
state->out_block + state->decoded_size,
prefix64k);
}
}
#if LZ4_VERSION_MAJOR >= 1 && LZ4_VERSION_MINOR >= 7
uncompressed_size = LZ4_decompress_safe_usingDict(
read_buf + 4,
state->out_block + prefix64k, (int)compressed_size,
state->flags.block_maximum_size,
state->out_block,
prefix64k);
#else
uncompressed_size = LZ4_decompress_safe_withPrefix64k(
read_buf + 4,
state->out_block + prefix64k, (int)compressed_size,
state->flags.block_maximum_size);
#endif
}
/* Check if an error occurred in the decompression process. */
if (uncompressed_size < 0) {
archive_set_error(&(self->archive->archive),
ARCHIVE_ERRNO_MISC, "lz4 decompression failed");
return (ARCHIVE_FATAL);
}
state->unconsumed = 4 + compressed_size + checksum_size;
*p = state->out_block + prefix64k;
state->decoded_size = uncompressed_size;
return uncompressed_size;
malformed_error:
archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
"malformed lz4 data");
return (ARCHIVE_FATAL);
truncated_error:
archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
"truncated lz4 input");
return (ARCHIVE_FATAL);
}
static ssize_t
lz4_filter_read_default_stream(struct archive_read_filter *self, const void **p)
{
struct private_data *state = (struct private_data *)self->data;
const char *read_buf;
ssize_t bytes_remaining;
ssize_t ret;
if (state->stage == SELECT_STREAM) {
state->stage = READ_DEFAULT_STREAM;
/* First, read a descriptor. */
if((ret = lz4_filter_read_descriptor(self)) != ARCHIVE_OK)
return (ret);
state->stage = READ_DEFAULT_BLOCK;
}
/* Decompress a block. */
ret = lz4_filter_read_data_block(self, p);
/* If the end of block is detected, change the filter status
to read next stream. */
if (ret == 0 && *p == NULL)
state->stage = SELECT_STREAM;
/* Optional process, checking a stream sum. */
if (state->flags.stream_checksum) {
if (state->stage == SELECT_STREAM) {
unsigned int checksum;
unsigned int checksum_stream;
read_buf = __archive_read_filter_ahead(self->upstream,
4, &bytes_remaining);
if (read_buf == NULL) {
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC, "truncated lz4 input");
return (ARCHIVE_FATAL);
}
checksum = archive_le32dec(read_buf);
__archive_read_filter_consume(self->upstream, 4);
checksum_stream = __archive_xxhash.XXH32_digest(
state->xxh32_state);
state->xxh32_state = NULL;
if (checksum != checksum_stream) {
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC,
"lz4 stream cheksum error");
return (ARCHIVE_FATAL);
}
} else if (ret > 0)
__archive_xxhash.XXH32_update(state->xxh32_state,
*p, (int)ret);
}
return (ret);
}
static ssize_t
lz4_filter_read_legacy_stream(struct archive_read_filter *self, const void **p)
{
struct private_data *state = (struct private_data *)self->data;
int compressed;
const char *read_buf;
ssize_t ret;
*p = NULL;
ret = lz4_allocate_out_block_for_legacy(self);
if (ret != ARCHIVE_OK)
return ret;
/* Make sure we have 4 bytes for a block size. */
read_buf = __archive_read_filter_ahead(self->upstream, 4, NULL);
if (read_buf == NULL) {
if (state->stage == SELECT_STREAM) {
state->stage = READ_LEGACY_STREAM;
archive_set_error(&self->archive->archive,
ARCHIVE_ERRNO_MISC,
"truncated lz4 input");
return (ARCHIVE_FATAL);
}
state->stage = SELECT_STREAM;
return 0;
}
state->stage = READ_LEGACY_BLOCK;
compressed = archive_le32dec(read_buf);
if (compressed > LZ4_COMPRESSBOUND(LEGACY_BLOCK_SIZE)) {
state->stage = SELECT_STREAM;
return 0;
}
/* Make sure we have a whole block. */
read_buf = __archive_read_filter_ahead(self->upstream,
4 + compressed, NULL);
if (read_buf == NULL) {
archive_set_error(&(self->archive->archive),
ARCHIVE_ERRNO_MISC, "truncated lz4 input");
return (ARCHIVE_FATAL);
}
ret = LZ4_decompress_safe(read_buf + 4, state->out_block,
compressed, (int)state->out_block_size);
if (ret < 0) {
archive_set_error(&(self->archive->archive),
ARCHIVE_ERRNO_MISC, "lz4 decompression failed");
return (ARCHIVE_FATAL);
}
*p = state->out_block;
state->unconsumed = 4 + compressed;
return ret;
}
/*
* Clean up the decompressor.
*/
static int
lz4_filter_close(struct archive_read_filter *self)
{
struct private_data *state;
int ret = ARCHIVE_OK;
state = (struct private_data *)self->data;
free(state->xxh32_state);
free(state->out_block);
free(state);
return (ret);
}
#endif /* HAVE_LIBLZ4 */