Merge pull request #6 from deviance/provide-pkg-config-data
Provide pkg-config data
diff --git a/README b/README
index 3bc8888..c60dab9 100644
--- a/README
+++ b/README
@@ -29,7 +29,7 @@
Performance
===========
-
+
Snappy is intended to be fast. On a single core of a Core i7 processor
in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
about 500 MB/sec or more. (These numbers are for the slowest inputs in our
@@ -67,7 +67,7 @@
Note that Snappy, both the implementation and the main interface,
is written in C++. However, several third-party bindings to other languages
-are available; see the Google Code page at http://code.google.com/p/snappy/
+are available; see the home page at http://google.github.io/snappy/
for more information. Also, if you want to use Snappy from C code, you can
use the included C bindings in snappy-c.h.
@@ -102,12 +102,12 @@
Google Test library installed, unit test behavior (especially failures) will be
significantly more user-friendly. You can find Google Test at
- http://code.google.com/p/googletest/
+ http://github.com/google/googletest
You probably also want the gflags library for handling of command-line flags;
you can find it at
- http://code.google.com/p/google-gflags/
+ http://gflags.github.io/gflags/
In addition to the unit tests, snappy contains microbenchmarks used to
tune compression and decompression performance. These are automatically run
@@ -129,7 +129,11 @@
Contact
=======
-Snappy is distributed through Google Code. For the latest version, a bug tracker,
+Snappy is distributed through GitHub. For the latest version, a bug tracker,
and other information, see
- http://code.google.com/p/snappy/
+ http://google.github.io/snappy/
+
+or the repository at
+
+ https://github.com/google/snappy
diff --git a/autogen.sh b/autogen.sh
index 9d0ebe9..9cb502e 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -2,6 +2,11 @@
rm -rf autom4te.cache
aclocal -I m4
autoheader
-libtoolize --copy
+if glibtoolize --version >/dev/null 2>/dev/null; then
+ LIBTOOLIZE=${LIBTOOLIZE:-glibtoolize}
+else
+ LIBTOOLIZE=${LIBTOOLIZE:-libtoolize}
+fi
+$LIBTOOLIZE --copy
automake --add-missing --copy
autoconf
diff --git a/configure.ac b/configure.ac
index e5b97c1..c460138 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,6 +9,7 @@
AC_INIT([snappy], [snappy_major.snappy_minor.snappy_patchlevel])
AC_CONFIG_MACRO_DIR([m4])
+AC_CONFIG_AUX_DIR([.])
# These are flags passed to automake (though they look like gcc flags!)
AM_INIT_AUTOMAKE([-Wall])
diff --git a/snappy-internal.h b/snappy-internal.h
index 0653dc6..c4d1f6d 100644
--- a/snappy-internal.h
+++ b/snappy-internal.h
@@ -144,6 +144,68 @@
}
#endif
+// Lookup tables for decompression code. Give --snappy_dump_decompression_table
+// to the unit test to recompute char_table.
+
+enum {
+ LITERAL = 0,
+ COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
+ COPY_2_BYTE_OFFSET = 2,
+ COPY_4_BYTE_OFFSET = 3
+};
+static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
+
+// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
+static const uint32 wordmask[] = {
+ 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
+};
+
+// Data stored per entry in lookup table:
+// Range Bits-used Description
+// ------------------------------------
+// 1..64 0..7 Literal/copy length encoded in opcode byte
+// 0..7 8..10 Copy offset encoded in opcode byte / 256
+// 0..4 11..13 Extra bytes after opcode
+//
+// We use eight bits for the length even though 7 would have sufficed
+// because of efficiency reasons:
+// (1) Extracting a byte is faster than a bit-field
+// (2) It properly aligns copy offset so we do not need a <<8
+static const uint16 char_table[256] = {
+ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
+ 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
+ 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
+ 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
+ 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
+ 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
+ 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
+ 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
+ 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
+ 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
+ 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
+ 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
+ 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
+ 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
+ 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
+ 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
+ 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
+ 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
+ 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
+ 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
+ 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
+ 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
+ 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
+ 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
+ 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
+ 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
+ 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
+ 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
+ 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
+ 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
+ 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
+};
+
} // end namespace internal
} // end namespace snappy
diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
index ddca1a8..1954c63 100644
--- a/snappy-stubs-internal.h
+++ b/snappy-stubs-internal.h
@@ -116,6 +116,15 @@
// sub-architectures.
//
// This is a mess, but there's not much we can do about it.
+//
+// To further complicate matters, only LDR instructions (single reads) are
+// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
+// explicitly tell the compiler that these accesses can be unaligned, it can and
+// will combine accesses. On armcc, the way to signal this is done by accessing
+// through the type (uint32 __packed *), but GCC has no such attribute
+// (it ignores __attribute__((packed)) on individual variables). However,
+// we can tell it that a _struct_ is unaligned, which has the same effect,
+// so we do that.
#elif defined(__arm__) && \
!defined(__ARM_ARCH_4__) && \
@@ -131,11 +140,39 @@
!defined(__ARM_ARCH_6ZK__) && \
!defined(__ARM_ARCH_6T2__)
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#if __GNUC__
+#define ATTRIBUTE_PACKED __attribute__((__packed__))
+#else
+#define ATTRIBUTE_PACKED
+#endif
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+namespace base {
+namespace internal {
+
+struct Unaligned16Struct {
+ uint16 value;
+ uint8 dummy; // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
+struct Unaligned32Struct {
+ uint32 value;
+ uint8 dummy; // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
+} // namespace internal
+} // namespace base
+
+#define UNALIGNED_LOAD16(_p) \
+ ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
+#define UNALIGNED_LOAD32(_p) \
+ ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
+
+#define UNALIGNED_STORE16(_p, _val) \
+ ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
+ (_val))
+#define UNALIGNED_STORE32(_p, _val) \
+ ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
+ (_val))
// TODO(user): NEON supports unaligned 64-bit loads and stores.
// See if that would be more efficient on platforms supporting it,
diff --git a/snappy-stubs-public.h.in b/snappy-stubs-public.h.in
index ebe676c..96989ac 100644
--- a/snappy-stubs-public.h.in
+++ b/snappy-stubs-public.h.in
@@ -80,9 +80,11 @@
typedef std::string string;
+#ifndef DISALLOW_COPY_AND_ASSIGN
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&)
+#endif
#if !@ac_cv_have_sys_uio_h@
// Windows does not have an iovec type, yet the concept is universally useful.
diff --git a/snappy-test.h b/snappy-test.h
index dbc55b9..5fb09c7 100644
--- a/snappy-test.h
+++ b/snappy-test.h
@@ -196,6 +196,7 @@
void Test_Snappy_FourByteOffset();
void Test_SnappyCorruption_TruncatedVarint();
void Test_SnappyCorruption_UnterminatedVarint();
+void Test_SnappyCorruption_OverflowingVarint();
void Test_Snappy_ReadPastEndOfBuffer();
void Test_Snappy_FindMatchLength();
void Test_Snappy_FindMatchLengthRandom();
@@ -500,6 +501,7 @@
snappy::Test_Snappy_FourByteOffset();
snappy::Test_SnappyCorruption_TruncatedVarint();
snappy::Test_SnappyCorruption_UnterminatedVarint();
+ snappy::Test_SnappyCorruption_OverflowingVarint();
snappy::Test_Snappy_ReadPastEndOfBuffer();
snappy::Test_Snappy_FindMatchLength();
snappy::Test_Snappy_FindMatchLengthRandom();
diff --git a/snappy.cc b/snappy.cc
index b6ca7ec..8a3668c 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -39,6 +39,14 @@
namespace snappy {
+using internal::COPY_1_BYTE_OFFSET;
+using internal::COPY_2_BYTE_OFFSET;
+using internal::COPY_4_BYTE_OFFSET;
+using internal::LITERAL;
+using internal::char_table;
+using internal::kMaximumTagLength;
+using internal::wordmask;
+
// Any hash function will produce a valid compressed bitstream, but a good
// hash function reduces the number of collisions and thus yields better
// compression for compressible input, and more speed for incompressible
@@ -76,14 +84,6 @@
return 32 + source_len + source_len/6;
}
-enum {
- LITERAL = 0,
- COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
- COPY_2_BYTE_OFFSET = 2,
- COPY_4_BYTE_OFFSET = 3
-};
-static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
-
// Copy "len" bytes from "src" to "op", one byte at a time. Used for
// handling COPY operations where the input and output regions may
// overlap. For example, suppose:
@@ -364,9 +364,9 @@
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
- // scanned, look at every third byte, etc.. When a match is found,
- // immediately go back to looking at every byte. This is a small loss
- // (~5% performance, ~0.1% density) for compressible data due to more
+ // scanned (or skipped), look at every third byte, etc.. When a match is
+ // found, immediately go back to looking at every byte. This is a small
+ // loss (~5% performance, ~0.1% density) for compressible data due to more
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
// win since the compressor quickly "realizes" the data is incompressible
// and doesn't bother looking for matches everywhere.
@@ -382,7 +382,8 @@
ip = next_ip;
uint32 hash = next_hash;
assert(hash == Hash(ip, shift));
- uint32 bytes_between_hash_lookups = skip++ >> 5;
+ uint32 bytes_between_hash_lookups = skip >> 5;
+ skip += bytes_between_hash_lookups;
next_ip = ip + bytes_between_hash_lookups;
if (PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
@@ -493,162 +494,6 @@
// bool TryFastAppend(const char* ip, size_t available, size_t length);
// };
-// -----------------------------------------------------------------------
-// Lookup table for decompression code. Generated by ComputeTable() below.
-// -----------------------------------------------------------------------
-
-// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
-static const uint32 wordmask[] = {
- 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
-};
-
-// Data stored per entry in lookup table:
-// Range Bits-used Description
-// ------------------------------------
-// 1..64 0..7 Literal/copy length encoded in opcode byte
-// 0..7 8..10 Copy offset encoded in opcode byte / 256
-// 0..4 11..13 Extra bytes after opcode
-//
-// We use eight bits for the length even though 7 would have sufficed
-// because of efficiency reasons:
-// (1) Extracting a byte is faster than a bit-field
-// (2) It properly aligns copy offset so we do not need a <<8
-static const uint16 char_table[256] = {
- 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
- 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
- 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
- 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
- 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
- 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
- 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
- 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
- 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
- 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
- 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
- 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
- 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
- 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
- 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
- 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
- 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
- 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
- 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
- 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
- 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
- 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
- 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
- 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
- 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
- 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
- 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
- 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
- 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
- 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
- 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
-};
-
-// In debug mode, allow optional computation of the table at startup.
-// Also, check that the decompression table is correct.
-#ifndef NDEBUG
-DEFINE_bool(snappy_dump_decompression_table, false,
- "If true, we print the decompression table at startup.");
-
-static uint16 MakeEntry(unsigned int extra,
- unsigned int len,
- unsigned int copy_offset) {
- // Check that all of the fields fit within the allocated space
- assert(extra == (extra & 0x7)); // At most 3 bits
- assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
- assert(len == (len & 0x7f)); // At most 7 bits
- return len | (copy_offset << 8) | (extra << 11);
-}
-
-static void ComputeTable() {
- uint16 dst[256];
-
- // Place invalid entries in all places to detect missing initialization
- int assigned = 0;
- for (int i = 0; i < 256; i++) {
- dst[i] = 0xffff;
- }
-
- // Small LITERAL entries. We store (len-1) in the top 6 bits.
- for (unsigned int len = 1; len <= 60; len++) {
- dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
- assigned++;
- }
-
- // Large LITERAL entries. We use 60..63 in the high 6 bits to
- // encode the number of bytes of length info that follow the opcode.
- for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
- // We set the length field in the lookup table to 1 because extra
- // bytes encode len-1.
- dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
- assigned++;
- }
-
- // COPY_1_BYTE_OFFSET.
- //
- // The tag byte in the compressed data stores len-4 in 3 bits, and
- // offset/256 in 5 bits. offset%256 is stored in the next byte.
- //
- // This format is used for length in range [4..11] and offset in
- // range [0..2047]
- for (unsigned int len = 4; len < 12; len++) {
- for (unsigned int offset = 0; offset < 2048; offset += 256) {
- dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
- MakeEntry(1, len, offset>>8);
- assigned++;
- }
- }
-
- // COPY_2_BYTE_OFFSET.
- // Tag contains len-1 in top 6 bits, and offset in next two bytes.
- for (unsigned int len = 1; len <= 64; len++) {
- dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
- assigned++;
- }
-
- // COPY_4_BYTE_OFFSET.
- // Tag contents len-1 in top 6 bits, and offset in next four bytes.
- for (unsigned int len = 1; len <= 64; len++) {
- dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
- assigned++;
- }
-
- // Check that each entry was initialized exactly once.
- if (assigned != 256) {
- fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
- abort();
- }
- for (int i = 0; i < 256; i++) {
- if (dst[i] == 0xffff) {
- fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
- abort();
- }
- }
-
- if (FLAGS_snappy_dump_decompression_table) {
- printf("static const uint16 char_table[256] = {\n ");
- for (int i = 0; i < 256; i++) {
- printf("0x%04x%s",
- dst[i],
- ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
- }
- printf("};\n");
- }
-
- // Check that computed table matched recorded table
- for (int i = 0; i < 256; i++) {
- if (dst[i] != char_table[i]) {
- fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
- i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
- abort();
- }
- }
-}
-#endif /* !NDEBUG */
// Helper class for decompression
class SnappyDecompressor {
@@ -701,7 +546,9 @@
if (n == 0) return false;
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
reader_->Skip(1);
- *result |= static_cast<uint32>(c & 0x7f) << shift;
+ uint32 val = c & 0x7f;
+ if (((val << shift) >> shift) != val) return false;
+ *result |= val << shift;
if (c < 128) {
break;
}
@@ -966,7 +813,7 @@
const size_t output_iov_count_;
// We are currently writing into output_iov_[curr_iov_index_].
- int curr_iov_index_;
+ size_t curr_iov_index_;
// Bytes written to output_iov_[curr_iov_index_] so far.
size_t curr_iov_written_;
@@ -977,7 +824,7 @@
// Maximum number of bytes that will be decompressed into output_iov_.
size_t output_limit_;
- inline char* GetIOVecPointer(int index, size_t offset) {
+ inline char* GetIOVecPointer(size_t index, size_t offset) {
return reinterpret_cast<char*>(output_iov_[index].iov_base) +
offset;
}
@@ -1058,7 +905,7 @@
}
// Locate the iovec from which we need to start the copy.
- int from_iov_index = curr_iov_index_;
+ size_t from_iov_index = curr_iov_index_;
size_t from_iov_offset = curr_iov_written_;
while (offset > 0) {
if (from_iov_offset >= offset) {
@@ -1067,8 +914,8 @@
}
offset -= from_iov_offset;
+ assert(from_iov_index > 0);
--from_iov_index;
- assert(from_iov_index >= 0);
from_iov_offset = output_iov_[from_iov_index].iov_len;
}
diff --git a/snappy_unittest.cc b/snappy_unittest.cc
index 4a80f2a..65ac16a 100644
--- a/snappy_unittest.cc
+++ b/snappy_unittest.cc
@@ -64,6 +64,9 @@
DEFINE_bool(write_uncompressed, false,
"Write uncompressed versions of each file to <file>.uncomp");
+DEFINE_bool(snappy_dump_decompression_table, false,
+ "If true, we print the decompression table during tests.");
+
namespace snappy {
@@ -1004,6 +1007,20 @@
&uncompressed));
}
+TEST(SnappyCorruption, OverflowingVarint) {
+ string compressed, uncompressed;
+ size_t ulength;
+ compressed.push_back('\xfb');
+ compressed.push_back('\xff');
+ compressed.push_back('\xff');
+ compressed.push_back('\xff');
+ compressed.push_back('\x7f');
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
+ &uncompressed));
+}
+
TEST(Snappy, ReadPastEndOfBuffer) {
// Check that we do not read past end of input
@@ -1160,6 +1177,100 @@
}
}
+static uint16 MakeEntry(unsigned int extra,
+ unsigned int len,
+ unsigned int copy_offset) {
+ // Check that all of the fields fit within the allocated space
+ assert(extra == (extra & 0x7)); // At most 3 bits
+ assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
+ assert(len == (len & 0x7f)); // At most 7 bits
+ return len | (copy_offset << 8) | (extra << 11);
+}
+
+// Check that the decompression table is correct, and optionally print out
+// the computed one.
+TEST(Snappy, VerifyCharTable) {
+ using snappy::internal::LITERAL;
+ using snappy::internal::COPY_1_BYTE_OFFSET;
+ using snappy::internal::COPY_2_BYTE_OFFSET;
+ using snappy::internal::COPY_4_BYTE_OFFSET;
+ using snappy::internal::char_table;
+ using snappy::internal::wordmask;
+
+ uint16 dst[256];
+
+ // Place invalid entries in all places to detect missing initialization
+ int assigned = 0;
+ for (int i = 0; i < 256; i++) {
+ dst[i] = 0xffff;
+ }
+
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
+ for (unsigned int len = 1; len <= 60; len++) {
+ dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
+ assigned++;
+ }
+
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
+ // encode the number of bytes of length info that follow the opcode.
+ for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
+ // We set the length field in the lookup table to 1 because extra
+ // bytes encode len-1.
+ dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
+ assigned++;
+ }
+
+ // COPY_1_BYTE_OFFSET.
+ //
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
+ //
+ // This format is used for length in range [4..11] and offset in
+ // range [0..2047]
+ for (unsigned int len = 4; len < 12; len++) {
+ for (unsigned int offset = 0; offset < 2048; offset += 256) {
+ dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
+ MakeEntry(1, len, offset>>8);
+ assigned++;
+ }
+ }
+
+ // COPY_2_BYTE_OFFSET.
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
+ for (unsigned int len = 1; len <= 64; len++) {
+ dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
+ assigned++;
+ }
+
+ // COPY_4_BYTE_OFFSET.
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
+ for (unsigned int len = 1; len <= 64; len++) {
+ dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
+ assigned++;
+ }
+
+ // Check that each entry was initialized exactly once.
+ EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
+ for (int i = 0; i < 256; i++) {
+ EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
+ }
+
+ if (FLAGS_snappy_dump_decompression_table) {
+ printf("static const uint16 char_table[256] = {\n ");
+ for (int i = 0; i < 256; i++) {
+ printf("0x%04x%s",
+ dst[i],
+ ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
+ }
+ printf("};\n");
+ }
+
+ // Check that computed table matched recorded table.
+ for (int i = 0; i < 256; i++) {
+ EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
+ }
+}
+
static void CompressFile(const char* fname) {
string fullinput;
CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));