blob: ee91d194ada6dce7e8bc561d78fe6cc5e18996f7 [file] [edit]
/* inflate_p.h -- Private inline functions and macros shared with more than one deflate method
*
*/
#ifndef INFLATE_P_H
#define INFLATE_P_H
#include <stdlib.h>
#include "zendian.h"
#include "zmemory.h"
#include "crc32_braid_tbl.h"
/* Architecture-specific hooks. */
#ifdef S390_DFLTCC_INFLATE
# include "arch/s390/dfltcc_inflate.h"
/* DFLTCC instructions require window to be page-aligned */
# define PAD_WINDOW PAD_4096
# define WINDOW_PAD_SIZE 4096
# define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096
#else
# define PAD_WINDOW PAD_64
# define WINDOW_PAD_SIZE 64
# define HINT_ALIGNED_WINDOW HINT_ALIGNED_64
/* Adjust the window size for the arch-specific inflate code. */
# define INFLATE_ADJUST_WINDOW_SIZE(n) (n)
/* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */
# define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
/* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */
# define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0)
/* Invoked at the beginning of each block. Useful for plugging arch-specific inflation code. */
# define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0)
/* Returns whether zlib-ng should compute a checksum. Set to 0 if arch-specific inflation code already does that. */
# define INFLATE_NEED_CHECKSUM(strm) 1
/* Returns whether zlib-ng should update a window. Set to 0 if arch-specific inflation code already does that. */
# define INFLATE_NEED_UPDATEWINDOW(strm) 1
/* Invoked at the beginning of inflateMark(). Useful for updating arch-specific pointers and offsets. */
# define INFLATE_MARK_HOOK(strm) do {} while (0)
/* Invoked at the beginning of inflateSyncPoint(). Useful for performing arch-specific state checks. */
# define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0)
/* Invoked at the beginning of inflateSetDictionary(). Useful for checking arch-specific window data. */
# define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
/* Invoked at the beginning of inflateGetDictionary(). Useful for adjusting arch-specific window data. */
# define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
#endif
/*
* Macros shared by inflate() and inflateBack()
*/
/* check macros for header crc */
#ifdef GUNZIP
# define CRC_DO1_B(c, b) c = crc_table[(c ^ (b)) & 0xff] ^ (c >> 8)
# define CRC2(check, word) \
do { \
uint32_t crc = ~(uint32_t)(check); \
CRC_DO1_B(crc, (word) ); \
CRC_DO1_B(crc, (word) >> 8); \
(check) = ~crc; \
} while (0)
# define CRC4(check, word) \
do { \
uint32_t crc = ~(uint32_t)(check); \
CRC_DO1_B(crc, (word) ); \
CRC_DO1_B(crc, (word) >> 8); \
CRC_DO1_B(crc, (word) >> 16); \
CRC_DO1_B(crc, (word) >> 24); \
(check) = ~crc; \
} while (0)
#endif
/* Compiler optimization for bit accumulator on x86 architectures */
#ifdef ARCH_X86
typedef uint8_t bits_t;
#else
typedef unsigned bits_t;
#endif
/* Load registers with state in inflate() for speed */
#define LOAD() \
do { \
put = strm->next_out; \
left = strm->avail_out; \
next = strm->next_in; \
have = strm->avail_in; \
hold = state->hold; \
bits = (bits_t)state->bits; \
} while (0)
/* Restore state from registers in inflate() */
#define RESTORE() \
do { \
strm->next_out = put; \
strm->avail_out = left; \
strm->next_in = (z_const unsigned char *)next; \
strm->avail_in = have; \
state->hold = hold; \
state->bits = bits; \
} while (0)
/* Refill to have at least 56 bits in the bit accumulator */
#define REFILL() do { \
hold |= load_64_bits(in, bits); \
in += (63 ^ bits) >> 3; \
bits |= 56; \
} while (0)
/* Clear the input bit accumulator */
#define INITBITS() \
do { \
hold = 0; \
bits = 0; \
} while (0)
/* Ensure that there is at least n bits in the bit accumulator. If there is
not enough available input to do that, then return from inflate()/inflateBack(). */
#define NEEDBITS(n) \
do { \
unsigned u = (unsigned)(n); \
while (bits < (bits_t)u) \
PULLBYTE(); \
} while (0)
/* Return the low n bits of the bit accumulator (n < 16) */
#define BITS(n) \
(hold & ((1U << (unsigned)(n)) - 1))
/* Remove n bits from the bit accumulator */
#define DROPBITS(n) \
do { \
unsigned u = (unsigned)(n); \
hold >>= u; \
bits -= (bits_t)u; \
} while (0)
/* Remove zero to seven bits as needed to go to a byte boundary */
#define BYTEBITS() \
do { \
hold >>= bits & 7; \
bits -= bits & 7; \
} while (0)
/* Set mode=BAD and prepare error message */
#define SET_BAD(errmsg) \
do { \
state->mode = BAD; \
strm->msg = (char *)errmsg; \
} while (0)
/* Huffman code table entry format for length/distance codes (op & 16 set):
* bits = code_bits + extra_bits (combined for single-shift decode)
* op = 16 | code_bits
* val = base value
*
* For literals (op == 0): bits = code_bits, val = literal byte
*/
/* Extract code size from a Huffman table entry */
#define CODE_BITS(here) \
((unsigned)((here.op & 16) ? (here.op & 15) : here.bits))
/* Extract extra bits count from a length/distance code entry */
#define CODE_EXTRA(here) \
((unsigned)((here.op & 16) ? (here.bits - (here.op & 15)) : 0))
/* Extract extra bits value from saved bit accumulator */
#define EXTRA_BITS(old, here, op) \
((old & (((uint64_t)1 << here.bits) - 1)) >> (op & MAX_BITS))
/* Build combined op field: preserves extra if not len/dist, else combines with code_bits */
#define COMBINE_OP(extra, code_bits) \
((unsigned char)((extra) & 16 ? (code_bits) | 16 : (extra)))
/* Build combined bits field: code_bits + extra_bits from extra's low nibble */
#define COMBINE_BITS(code_bits, extra) \
((unsigned char)((code_bits) + ((extra) & 15)))
/* Trace macros for debugging */
#define TRACE_LITERAL(val) \
Tracevv((stderr, val >= 0x20 && val < 0x7f ? \
"inflate: literal '%c'\n" : \
"inflate: literal 0x%02x\n", val))
#define TRACE_LENGTH(len) \
Tracevv((stderr, "inflate: length %u\n", len))
#define TRACE_DISTANCE(dist) \
Tracevv((stderr, "inflate: distance %u\n", dist))
#define TRACE_END_OF_BLOCK() \
Tracevv((stderr, "inflate: end of block\n"))
#define INFLATE_FAST_MIN_HAVE 15 /* max input bits per length/distance pair */
#define INFLATE_FAST_MIN_LEFT 260 /* max output per token (258) + 2 */
#define INFLATE_FAST_MIN_SAFE 3 /* max unchecked literal writes per iteration */
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
uint64_t chunk = zng_memread_8(in);
return Z_U64_FROM_LE(chunk) << bits;
}
/* Behave like chunkcopy, but avoid writing beyond of legal output. */
static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, size_t len, uint8_t *safe) {
size_t safelen = safe - out;
len = MIN(len, safelen);
int32_t olap_src = from >= out && from < out + len;
int32_t olap_dst = out >= from && out < from + len;
size_t tocopy;
/* For all cases without overlap, memcpy is ideal */
if (!(olap_src || olap_dst)) {
memcpy(out, from, len);
return out + len;
}
/* Complete overlap: Source == destination */
if (out == from) {
return out + len;
}
/* We are emulating a self-modifying copy loop here. To do this in a way that doesn't produce undefined behavior,
* we have to get a bit clever. First if the overlap is such that src falls between dst and dst+len, we can do the
* initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest
* atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look
* behind or lookahead distance. */
size_t non_olap_size = (size_t)ABS(from - out);
/* So this doesn't give use a worst case scenario of function calls in a loop,
* we want to instead break this down into copy blocks of fixed lengths
*
* TODO: The memcpy calls aren't inlined on architectures with strict memory alignment
*/
while (len) {
tocopy = MIN(non_olap_size, len);
len -= tocopy;
while (tocopy >= 16) {
memcpy(out, from, 16);
out += 16;
from += 16;
tocopy -= 16;
}
if (tocopy >= 8) {
memcpy(out, from, 8);
out += 8;
from += 8;
tocopy -= 8;
}
if (tocopy >= 4) {
memcpy(out, from, 4);
out += 4;
from += 4;
tocopy -= 4;
}
while (tocopy--) {
*out++ = *from++;
}
}
return out;
}
#endif