Merge pull request #13 from huachaohuang/patch-1

Allow to compile in nested packages.
diff --git a/README b/README
index 3bc8888..c60dab9 100644
--- a/README
+++ b/README
@@ -29,7 +29,7 @@
 
 Performance
 ===========
- 
+
 Snappy is intended to be fast. On a single core of a Core i7 processor
 in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
 about 500 MB/sec or more. (These numbers are for the slowest inputs in our
@@ -67,7 +67,7 @@
 
 Note that Snappy, both the implementation and the main interface,
 is written in C++. However, several third-party bindings to other languages
-are available; see the Google Code page at http://code.google.com/p/snappy/
+are available; see the home page at http://google.github.io/snappy/
 for more information. Also, if you want to use Snappy from C code, you can
 use the included C bindings in snappy-c.h.
 
@@ -102,12 +102,12 @@
 Google Test library installed, unit test behavior (especially failures) will be
 significantly more user-friendly. You can find Google Test at
 
-  http://code.google.com/p/googletest/
+  http://github.com/google/googletest
 
 You probably also want the gflags library for handling of command-line flags;
 you can find it at
 
-  http://code.google.com/p/google-gflags/
+  http://gflags.github.io/gflags/
 
 In addition to the unit tests, snappy contains microbenchmarks used to
 tune compression and decompression performance. These are automatically run
@@ -129,7 +129,11 @@
 Contact
 =======
 
-Snappy is distributed through Google Code. For the latest version, a bug tracker,
+Snappy is distributed through GitHub. For the latest version, a bug tracker,
 and other information, see
 
-  http://code.google.com/p/snappy/
+  http://google.github.io/snappy/
+
+or the repository at
+
+  https://github.com/google/snappy
diff --git a/autogen.sh b/autogen.sh
index 9d0ebe9..9cb502e 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -2,6 +2,11 @@
 rm -rf autom4te.cache
 aclocal -I m4
 autoheader
-libtoolize --copy
+if glibtoolize --version >/dev/null 2>/dev/null; then
+  LIBTOOLIZE=${LIBTOOLIZE:-glibtoolize}
+else
+  LIBTOOLIZE=${LIBTOOLIZE:-libtoolize}
+fi
+$LIBTOOLIZE --copy
 automake --add-missing --copy
 autoconf
diff --git a/snappy-stubs-internal.h b/snappy-stubs-internal.h
index ddca1a8..1954c63 100644
--- a/snappy-stubs-internal.h
+++ b/snappy-stubs-internal.h
@@ -116,6 +116,15 @@
 // sub-architectures.
 //
 // This is a mess, but there's not much we can do about it.
+//
+// To further complicate matters, only LDR instructions (single reads) are
+// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
+// explicitly tell the compiler that these accesses can be unaligned, it can and
+// will combine accesses. On armcc, the way to signal this is done by accessing
+// through the type (uint32 __packed *), but GCC has no such attribute
+// (it ignores __attribute__((packed)) on individual variables). However,
+// we can tell it that a _struct_ is unaligned, which has the same effect,
+// so we do that.
 
 #elif defined(__arm__) && \
       !defined(__ARM_ARCH_4__) && \
@@ -131,11 +140,39 @@
       !defined(__ARM_ARCH_6ZK__) && \
       !defined(__ARM_ARCH_6T2__)
 
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#if __GNUC__
+#define ATTRIBUTE_PACKED __attribute__((__packed__))
+#else
+#define ATTRIBUTE_PACKED
+#endif
 
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+namespace base {
+namespace internal {
+
+struct Unaligned16Struct {
+  uint16 value;
+  uint8 dummy;  // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
+struct Unaligned32Struct {
+  uint32 value;
+  uint8 dummy;  // To make the size non-power-of-two.
+} ATTRIBUTE_PACKED;
+
+}  // namespace internal
+}  // namespace base
+
+#define UNALIGNED_LOAD16(_p) \
+    ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
+#define UNALIGNED_LOAD32(_p) \
+    ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
+
+#define UNALIGNED_STORE16(_p, _val) \
+    ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
+         (_val))
+#define UNALIGNED_STORE32(_p, _val) \
+    ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
+         (_val))
 
 // TODO(user): NEON supports unaligned 64-bit loads and stores.
 // See if that would be more efficient on platforms supporting it,
diff --git a/snappy-test.h b/snappy-test.h
index dbc55b9..5fb09c7 100644
--- a/snappy-test.h
+++ b/snappy-test.h
@@ -196,6 +196,7 @@
 void Test_Snappy_FourByteOffset();
 void Test_SnappyCorruption_TruncatedVarint();
 void Test_SnappyCorruption_UnterminatedVarint();
+void Test_SnappyCorruption_OverflowingVarint();
 void Test_Snappy_ReadPastEndOfBuffer();
 void Test_Snappy_FindMatchLength();
 void Test_Snappy_FindMatchLengthRandom();
@@ -500,6 +501,7 @@
   snappy::Test_Snappy_FourByteOffset();
   snappy::Test_SnappyCorruption_TruncatedVarint();
   snappy::Test_SnappyCorruption_UnterminatedVarint();
+  snappy::Test_SnappyCorruption_OverflowingVarint();
   snappy::Test_Snappy_ReadPastEndOfBuffer();
   snappy::Test_Snappy_FindMatchLength();
   snappy::Test_Snappy_FindMatchLengthRandom();
diff --git a/snappy.cc b/snappy.cc
index d81e6b3..8a3668c 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -364,9 +364,9 @@
       //
       // Heuristic match skipping: If 32 bytes are scanned with no matches
       // found, start looking only at every other byte. If 32 more bytes are
-      // scanned, look at every third byte, etc.. When a match is found,
-      // immediately go back to looking at every byte. This is a small loss
-      // (~5% performance, ~0.1% density) for compressible data due to more
+      // scanned (or skipped), look at every third byte, etc.. When a match is
+      // found, immediately go back to looking at every byte. This is a small
+      // loss (~5% performance, ~0.1% density) for compressible data due to more
       // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
       // win since the compressor quickly "realizes" the data is incompressible
       // and doesn't bother looking for matches everywhere.
@@ -382,7 +382,8 @@
         ip = next_ip;
         uint32 hash = next_hash;
         assert(hash == Hash(ip, shift));
-        uint32 bytes_between_hash_lookups = skip++ >> 5;
+        uint32 bytes_between_hash_lookups = skip >> 5;
+        skip += bytes_between_hash_lookups;
         next_ip = ip + bytes_between_hash_lookups;
         if (PREDICT_FALSE(next_ip > ip_limit)) {
           goto emit_remainder;
@@ -545,7 +546,9 @@
       if (n == 0) return false;
       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
       reader_->Skip(1);
-      *result |= static_cast<uint32>(c & 0x7f) << shift;
+      uint32 val = c & 0x7f;
+      if (((val << shift) >> shift) != val) return false;
+      *result |= val << shift;
       if (c < 128) {
         break;
       }
diff --git a/snappy_unittest.cc b/snappy_unittest.cc
index 3081662..65ac16a 100644
--- a/snappy_unittest.cc
+++ b/snappy_unittest.cc
@@ -1007,6 +1007,20 @@
                             &uncompressed));
 }
 
+TEST(SnappyCorruption, OverflowingVarint) {
+  string compressed, uncompressed;
+  size_t ulength;
+  compressed.push_back('\xfb');
+  compressed.push_back('\xff');
+  compressed.push_back('\xff');
+  compressed.push_back('\xff');
+  compressed.push_back('\x7f');
+  CHECK(!CheckUncompressedLength(compressed, &ulength));
+  CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+  CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
+                            &uncompressed));
+}
+
 TEST(Snappy, ReadPastEndOfBuffer) {
   // Check that we do not read past end of input