Merge pull request #531 from lz4/dev Preparing v1.8.2

commit: b3692db46d2b23a7c0af2d5e69988c94f126e10a [log] [tgz]
author: Yann Collet <Cyan4973@users.noreply.github.com> Mon May 07 14:38:45 2018 -0700
committer: GitHub <noreply@github.com> Mon May 07 14:38:45 2018 -0700
tree: d9aa14d72978e78a1ca2f1aee99511ce2d04293c
parent: dfed9fa1d77f0434306d377c4da1f7191d3ba08a [diff]
parent: bf6fd938e522150e2a30bee978102769a51f4b3e [diff]
diff --git a/.gitignore b/.gitignore
index 117b02d..829270b 100644
--- a/.gitignore
+++ b/.gitignore

@@ -29,3 +29,7 @@
 # Mac
 .DS_Store
 *.dSYM
+
+# Windows / Msys
+nul
+ld.exe*

diff --git a/.travis.yml b/.travis.yml
index 3353dee..0a876f9 100644
--- a/.travis.yml
+++ b/.travis.yml

@@ -14,12 +14,12 @@
       env: Ubu=12.04cont Cmd='make -C tests test-lz4 test-lz4c test-fullbench' COMPILER=cc
 
     - os: linux
-      sudo: false
-      env: Ubu=12.04cont Cmd='make -C tests test-frametest test-fuzzer' COMPILER=cc
+      sudo: required
+      env: Ubu=12.04cont Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest test-fuzzer' COMPILER=cc
 
     - os: linux
       sudo: false
-      env: Ubu=12.04cont Cmd="make gpptest && make clean examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
+      env: Ubu=12.04cont Cmd="make gpptest && make clean && make examples && make clean cmake && make clean travis-install && make clean clangtest" COMPILER=cc
 
 
     # 14.04 LTS Server Edition 64 bit
@@ -59,7 +59,7 @@
             - libc6-dev-i386
             - gcc-multilib
 
-    - env: Ubu=14.04 Cmd='make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
+    - env: Ubu=14.04 Cmd='sudo sysctl -w vm.mmap_min_addr="4096" && make -C tests test-frametest32 test-fuzzer32' COMPILER=cc
       dist: trusty
       sudo: required
       addons:
@@ -145,7 +145,15 @@
             - gcc-multilib
             - gcc-4.4
 
+    # tag-specific test
+    - if: tag =~ ^v[0-9]\.[0-9]
+      os: linux
+      sudo: false
+      env: Cmd="make -C tests checkTag && tests/checkTag $TRAVIS_BRANCH " COMPILER=cc
+
+
 script:
+  - uname -a
   - echo Cmd=$Cmd
   - $COMPILER -v
   - sh -c "$Cmd"

diff --git a/Makefile b/Makefile
index 0aa1b9d..86613fd 100644
--- a/Makefile
+++ b/Makefile

@@ -1,10 +1,8 @@
 # ################################################################
 # LZ4 - Makefile
-# Copyright (C) Yann Collet 2011-2016
+# Copyright (C) Yann Collet 2011-present
 # All rights reserved.
 #
-# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
-#
 # BSD license
 # Redistribution and use in source and binary forms, with or without modification,
 # are permitted provided that the following conditions are met:
@@ -58,6 +56,7 @@
 allmost: lib lz4 examples
 
 .PHONY: lib lib-release liblz4.a
+lib: liblz4.a
 lib lib-release liblz4.a:
 	@$(MAKE) -C $(LZ4DIR) $@
 
@@ -69,8 +68,8 @@
 	@cp $(PRGDIR)/lz4$(EXT) .
 
 .PHONY: examples
-examples: lib lz4
-	$(MAKE) -C $(EXDIR) test
+examples: liblz4.a
+	$(MAKE) -C $(EXDIR) all
 
 .PHONY: manuals
 manuals:
@@ -122,9 +121,14 @@
 list:
 	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 
+.PHONY: check
+check:
+	$(MAKE) -C $(TESTDIR) test-lz4-essentials
+
 .PHONY: test
 test:
 	$(MAKE) -C $(TESTDIR) $@
+	$(MAKE) -C $(EXDIR) $@
 
 clangtest: clean
 	clang -v
@@ -139,10 +143,10 @@
 	@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang
 
 usan: clean
-	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
+	CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
 usan32: clean
-	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T1mn" NB_LOOPS=-i1
+	CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
 
 staticAnalyze: clean
 	CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
@@ -159,20 +163,17 @@
 versionsTest: clean
 	$(MAKE) -C $(TESTDIR) $@
 
-gpptest: clean
-	g++ -v
-	CC=g++ $(MAKE) -C $(LZ4DIR)  all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(PRGDIR)  all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(TESTDIR) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-
-gpptest32: clean
-	g++ -v
-	CC=g++ $(MAKE) -C $(LZ4DIR)  all    CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(PRGDIR)  native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
-	CC=g++ $(MAKE) -C $(TESTDIR) native CFLAGS="-m32 -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+gpptest gpptest32: CC = "$(CXX) -Wno-deprecated"
+gpptest gpptest32: CFLAGS = -O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
+gpptest32: CFLAGS += -m32
+gpptest gpptest32: clean
+	$(CXX) -v
+	CC=$(CC) $(MAKE) -C $(LZ4DIR)  all CFLAGS="$(CFLAGS)"
+	CC=$(CC) $(MAKE) -C $(PRGDIR)  all CFLAGS="$(CFLAGS)"
+	CC=$(CC) $(MAKE) -C $(TESTDIR) all CFLAGS="$(CFLAGS)"
 
 c_standards: clean
-	# note : lz4 is not C90 compatible, because it requires long long support
+	CFLAGS="-std=c90   -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=gnu90 -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=c99   -Werror" $(MAKE) clean allmost
 	CFLAGS="-std=gnu99 -Werror" $(MAKE) clean allmost

diff --git a/NEWS b/NEWS
index f9c503f..0139e61 100644
--- a/NEWS
+++ b/NEWS

@@ -1,3 +1,15 @@
+v1.8.2
+perf: *much* faster dictionary compression on small files, by @felixhandte
+perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv)
+perf: slightly faster HC compression and decompression speed
+perf: very small compression ratio improvement
+fix : compression compatible with low memory addresses (< 0xFFFF)
+fix : decompression segfault when provided with NULL input, by @terrelln
+cli : new command --favor-decSpeed
+cli : benchmark mode more accurate for small inputs
+fullbench : can bench _destSize() variants, by @felixhandte
+doc : clarified block format parsing restrictions, by Alexey Tourbin (@svpv)
+
 v1.8.1
 perf : faster and stronger ultra modes (levels 10+)
 perf : slightly faster compression and decompression speed

diff --git a/README.md b/README.md
index 596fdac..406792a 100644
--- a/README.md
+++ b/README.md

@@ -43,33 +43,32 @@
 -------------------------
 
 The benchmark uses [lzbench], from @inikep
-compiled with GCC v6.2.0 on Linux 64-bits.
-The reference system uses a Core i7-3930K CPU @ 4.5GHz.
+compiled with GCC v7.3.0 on Linux 64-bits (Debian 4.15.17-1).
+The reference system uses a Core i7-6700K CPU @ 4.0GHz.
 Benchmark evaluates the compression of reference [Silesia Corpus]
 in single-thread mode.
 
 [lzbench]: https://github.com/inikep/lzbench
 [Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
 
-|  Compressor            | Ratio   | Compression | Decompression |
-|  ----------            | -----   | ----------- | ------------- |
-|  memcpy                |  1.000  | 7300 MB/s   |   7300 MB/s   |
-|**LZ4 fast 8  (v1.7.3)**|  1.799  |**911 MB/s** | **3360 MB/s** |
-|**LZ4 default (v1.7.3)**|**2.101**|**625 MB/s** | **3220 MB/s** |
-|  LZO 2.09              |  2.108  |  620 MB/s   |    845 MB/s   |
-|  QuickLZ 1.5.0         |  2.238  |  510 MB/s   |    600 MB/s   |
-|  Snappy 1.1.3          |  2.091  |  450 MB/s   |   1550 MB/s   |
-|  LZF v3.6              |  2.073  |  365 MB/s   |    820 MB/s   |
-|  [Zstandard] 1.1.1 -1  |  2.876  |  330 MB/s   |    930 MB/s   |
-|  [Zstandard] 1.1.1 -3  |  3.164  |  200 MB/s   |    810 MB/s   |
-| [zlib] deflate 1.2.8 -1|  2.730  |  100 MB/s   |    370 MB/s   |
-|**LZ4 HC -9 (v1.7.3)**  |**2.720**|   34 MB/s   | **3240 MB/s** |
-| [zlib] deflate 1.2.8 -6|  3.099  |   33 MB/s   |    390 MB/s   |
+|  Compressor             | Ratio   | Compression | Decompression |
+|  ----------             | -----   | ----------- | ------------- |
+|  memcpy                 |  1.000  |13100 MB/s   |  13100 MB/s   |
+|**LZ4 default (v1.8.2)** |**2.101**|**730 MB/s** | **3900 MB/s** |
+|  LZO 2.09               |  2.108  |  630 MB/s   |    800 MB/s   |
+|  QuickLZ 1.5.0          |  2.238  |  530 MB/s   |    720 MB/s   |
+|  Snappy 1.1.4           |  2.091  |  525 MB/s   |   1750 MB/s   |
+|  [Zstandard] 1.3.4 -1   |  2.877  |  470 MB/s   |   1380 MB/s   |
+|  LZF v3.6               |  2.073  |  380 MB/s   |    840 MB/s   |
+| [zlib] deflate 1.2.11 -1|  2.730  |  100 MB/s   |    380 MB/s   |
+|**LZ4 HC -9 (v1.8.2)**   |**2.721**|   40 MB/s   | **3920 MB/s** |
+| [zlib] deflate 1.2.11 -6|  3.099  |   34 MB/s   |    410 MB/s   |
 
 [zlib]: http://www.zlib.net/
 [Zstandard]: http://www.zstd.net/
 
-LZ4 is also compatible and well optimized for x32 mode, for which it provides an additional +10% speed performance.
+LZ4 is also compatible and well optimized for x32 mode,
+for which it provides some additional speed performance.
 
 
 Installation

diff --git a/circle.yml b/circle.yml
index 9a0d1ec..fa37590 100644
--- a/circle.yml
+++ b/circle.yml

@@ -11,10 +11,10 @@
     - clang -v; make clangtest && make clean
     - g++ -v; make gpptest     && make clean
     - gcc -v; make c_standards && make clean
-    - gcc-5 -v; make -C tests test-lz4 CC=gcc-5 MOREFLAGS=-Werror && make clean
-    - gcc-5 -v; make -C tests test-lz4c32 CC=gcc-5 MOREFLAGS="-I/usr/include/x86_64-linux-gnu -Werror" && make clean
-    - gcc-6 -v; make c_standards CC=gcc-6 && make clean
-    - gcc-6 -v; make -C tests test-lz4 CC=gcc-6 MOREFLAGS=-Werror && make clean
+    - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -Werror" make check && make clean
+    - gcc-5 -v; CC=gcc-5 CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
+    - gcc-6 -v; CC=gcc-6 make c_standards && make clean
+    - gcc-6 -v; CC=gcc-6 MOREFLAGS="-O2 -Werror" make check  && make clean
 # Shorter tests
     - make cmake               && make clean
     - make -C tests test-lz4
@@ -22,11 +22,11 @@
     - make -C tests test-frametest
     - make -C tests test-fullbench
     - make -C tests test-fuzzer && make clean
-    - make -C lib all && make clean
-    - pyenv global 3.4.4; CFLAGS=-I/usr/include/x86_64-linux-gnu make versionsTest && make clean
+    - make -C lib all          && make clean
+    - pyenv global 3.4.4; make versionsTest MOREFLAGS=-I/usr/include/x86_64-linux-gnu && make clean
     - make travis-install      && make clean
   # Longer tests
-    - gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
+    - gcc -v; CFLAGS="-O2 -m32 -Werror" CPPFLAGS=-I/usr/include/x86_64-linux-gnu make check && make clean
     - make usan                && make clean
     - clang -v; make staticAnalyze && make clean
   # Valgrind tests

diff --git a/contrib/gen_manual/Makefile b/contrib/gen_manual/Makefile
index 9fbe858..95abe2e 100644
--- a/contrib/gen_manual/Makefile
+++ b/contrib/gen_manual/Makefile

@@ -30,10 +30,10 @@
 # ################################################################
 
 
-CFLAGS ?= -O3
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
-CFLAGS += $(MOREFLAGS)
-FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+CXXFLAGS ?= -O3
+CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
+CXXFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)
 
 LZ4API = ../../lib/lz4.h
 LZ4MANUAL = ../../doc/lz4_manual.html

diff --git a/doc/images/usingCDict_1_8_2.png b/doc/images/usingCDict_1_8_2.png
new file mode 100644
index 0000000..9434198
--- /dev/null
+++ b/doc/images/usingCDict_1_8_2.png
Binary files differ

diff --git a/doc/lz4_Block_format.md b/doc/lz4_Block_format.md
index 4e39b41..5438730 100644
--- a/doc/lz4_Block_format.md
+++ b/doc/lz4_Block_format.md

@@ -1,6 +1,6 @@
 LZ4 Block Format Description
 ============================
-Last revised: 2015-05-07.
+Last revised: 2018-04-25.
 Author : Yann Collet
 
 
@@ -29,8 +29,8 @@
 A sequence is a suite of literals (not-compressed bytes),
 followed by a match copy.
 
-Each sequence starts with a token.
-The token is a one byte value, separated into two 4-bits fields.
+Each sequence starts with a `token`.
+The `token` is a one byte value, separated into two 4-bits fields.
 Therefore each field ranges from 0 to 15.
 
 
@@ -42,46 +42,46 @@
 Each additional byte then represent a value from 0 to 255,
 which is added to the previous value to produce a total length.
 When the byte value is 255, another byte is output.
-There can be any number of bytes following the token. There is no "size limit".
+There can be any number of bytes following `token`. There is no "size limit".
 (Side note : this is why a not-compressible input block is expanded by 0.4%).
 
-Example 1 : A length of 48 will be represented as :
+Example 1 : A literal length of 48 will be represented as :
 
   - 15 : value for the 4-bits High field
   - 33 : (=48-15) remaining length to reach 48
 
-Example 2 : A length of 280 will be represented as :
+Example 2 : A literal length of 280 will be represented as :
 
   - 15  : value for the 4-bits High field
   - 255 : following byte is maxed, since 280-15 >= 255
   - 10  : (=280 - 15 - 255) ) remaining length to reach 280
 
-Example 3 : A length of 15 will be represented as :
+Example 3 : A literal length of 15 will be represented as :
 
   - 15 : value for the 4-bits High field
   - 0  : (=15-15) yes, the zero must be output
 
-Following the token and optional length bytes, are the literals themselves.
+Following `token` and optional length bytes, are the literals themselves.
 They are exactly as numerous as previously decoded (length of literals).
 It's possible that there are zero literal.
 
 
 Following the literals is the match copy operation.
 
-It starts by the offset.
+It starts by the `offset`.
 This is a 2 bytes value, in little endian format
 (the 1st byte is the "low" byte, the 2nd one is the "high" byte).
 
-The offset represents the position of the match to be copied from.
+The `offset` represents the position of the match to be copied from.
 1 means "current position - 1 byte".
-The maximum offset value is 65535, 65536 cannot be coded.
+The maximum `offset` value is 65535, 65536 cannot be coded.
 Note that 0 is an invalid value, not used.
 
-Then we need to extract the match length.
+Then we need to extract the `matchlength`.
 For this, we use the second token field, the low 4-bits.
 Value, obviously, ranges from 0 to 15.
 However here, 0 means that the copy operation will be minimal.
-The minimum length of a match, called minmatch, is 4.
+The minimum length of a match, called `minmatch`, is 4.
 As a consequence, a 0 value means 4 bytes, and a value of 15 means 19+ bytes.
 Similar to literal length, on reaching the highest possible value (15),
 we output additional bytes, one at a time, with values ranging from 0 to 255.
@@ -90,18 +90,18 @@
 There is no limit to the number of optional bytes that can be output this way.
 (This points towards a maximum achievable compression ratio of about 250).
 
-Decoding the matchlength reaches the end of current sequence.
+Decoding the `matchlength` reaches the end of current sequence.
 Next byte will be the start of another sequence.
 But before moving to next sequence,
 it's time to use the decoded match position and length.
-The decoder copies matchlength bytes from match position to current position.
+The decoder copies `matchlength` bytes from match position to current position.
 
-In some cases, matchlength is larger than offset.
-Therefore, match pos + match length > current pos,
+In some cases, `matchlength` is larger than `offset`.
+Therefore, `match_pos + matchlength > current_pos`,
 which means that later bytes to copy are not yet decoded.
 This is called an "overlap match", and must be handled with special care.
-The most common case is an offset of 1,
-meaning the last byte is repeated matchlength times.
+A common case is an offset of 1,
+meaning the last byte is repeated `matchlength` times.
 
 
 Parsing restrictions
@@ -109,15 +109,28 @@
 There are specific parsing rules to respect in order to remain compatible
 with assumptions made by the decoder :
 
-1. The last 5 bytes are always literals
+1. The last 5 bytes are always literals.  In other words, the last five bytes
+   from the uncompressed input (or all bytes, if the input has less than five
+   bytes) must be encoded as literals on behalf of the last sequence.
+   The last sequence is incomplete, and stops right after the literals.
 2. The last match must start at least 12 bytes before end of block.
-   Consequently, a block with less than 13 bytes cannot be compressed.
+   The last match is part of the penultimate sequence,
+   since the last sequence stops right after literals.
+   Note that, as a consequence, blocks < 13 bytes cannot be compressed.
 
 These rules are in place to ensure that the decoder
-will never read beyond the input buffer, nor write beyond the output buffer.
+can speculatively execute copy instructions
+without ever reading nor writing beyond provided I/O buffers.
 
-Note that the last sequence is also incomplete,
-and stops right after literals.
+1. To copy literals from a non-last sequence, an 8-byte copy instruction
+   can always be safely issued (without reading past the input),
+   because literals are followed by a 2-byte offset,
+   and last sequence is at least 1+5 bytes long.
+2. Similarly, a match operation can speculatively copy up to 12 bytes
+   while remaining within output buffer boundaries.
+
+Empty inputs can be represented with a zero byte,
+interpreted as a token without literals and without a match.
 
 
 Additional notes

diff --git a/doc/lz4_Frame_format.md b/doc/lz4_Frame_format.md
index 77454b2..0c98df1 100644
--- a/doc/lz4_Frame_format.md
+++ b/doc/lz4_Frame_format.md

@@ -16,7 +16,7 @@
 
 ### Version
 
-1.6.0 (08/08/2017)
+1.6.1 (30/01/2018)
 
 
 Introduction
@@ -72,12 +72,15 @@
 
 __Frame Descriptor__
 
-3 to 15 Bytes, to be detailed in the next part.
-Most important part of the spec.
+3 to 15 Bytes, to be detailed in its own paragraph,
+as it is the most important part of the spec.
+
+The combined __Magic Number__ and __Frame Descriptor__ fields are sometimes
+called ___LZ4 Frame Header___. Its size varies between 7 and 19 bytes.
 
 __Data Blocks__
 
-To be detailed later on.
+To be detailed in its own paragraph.
 That’s where compressed data is stored.
 
 __EndMark__
@@ -98,6 +101,9 @@
 and also that the encoding/decoding process itself generated no distortion.
 Its usage is recommended.
 
+The combined __EndMark__ and __Content Checksum__ fields might sometimes be
+referred to as ___LZ4 Frame Footer___. Its size varies between 4 and 8 bytes.
+
 __Frame Concatenation__
 
 In some circumstances, it may be preferable to append multiple frames,
@@ -380,6 +386,8 @@
 Version changes
 ---------------
 
+1.6.1 : introduced terms "LZ4 Frame Header" and "LZ4 Frame Footer"
+
 1.6.0 : restored Dictionary ID field in Frame header
 
 1.5.1 : changed document format to MarkDown

diff --git a/doc/lz4_manual.html b/doc/lz4_manual.html
index 6b7935d..e5044fe 100644
--- a/doc/lz4_manual.html
+++ b/doc/lz4_manual.html

@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.8.1 Manual</title>
+<title>1.8.2 Manual</title>
 </head>
 <body>
-<h1>1.8.1 Manual</h1>
+<h1>1.8.2 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -15,8 +15,9 @@
 <li><a href="#Chapter5">Advanced Functions</a></li>
 <li><a href="#Chapter6">Streaming Compression Functions</a></li>
 <li><a href="#Chapter7">Streaming Decompression Functions</a></li>
-<li><a href="#Chapter8">Private definitions</a></li>
-<li><a href="#Chapter9">Obsolete Functions</a></li>
+<li><a href="#Chapter8">Unstable declarations</a></li>
+<li><a href="#Chapter9">Private definitions</a></li>
+<li><a href="#Chapter10">Obsolete Functions</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -42,9 +43,9 @@
 
 <a name="Chapter2"></a><h2>Version</h2><pre></pre>
 
-<pre><b>int LZ4_versionNumber (void);  </b>/**< library version number; to be used when checking dll version */<b>
+<pre><b>int LZ4_versionNumber (void);  </b>/**< library version number; useful to check dll version */<b>
 </b></pre><BR>
-<pre><b>const char* LZ4_versionString (void);   </b>/**< library version string; to be used when checking dll version */<b>
+<pre><b>const char* LZ4_versionString (void);   </b>/**< library version string; unseful to check dll version */<b>
 </b></pre><BR>
 <a name="Chapter3"></a><h2>Tuning parameter</h2><pre></pre>
 
@@ -53,7 +54,7 @@
 #endif
 </b><p> Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
  Increasing memory usage improves compression ratio
- Reduced memory usage can improve speed, due to cache effect
+ Reduced memory usage may improve speed, thanks to cache effect
  Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  
 </p></pre><BR>
@@ -65,12 +66,12 @@
     into already allocated 'dst' buffer of size 'dstCapacity'.
     Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
     It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'src' into a limited 'dst' budget,
+    If the function cannot compress 'src' into a more limited 'dst' budget,
     compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dst' content is not valid.
-    This function never writes outside 'dst' buffer, nor read outside 'source' buffer.
-        srcSize : supported max value is LZ4_MAX_INPUT_VALUE
-        dstCapacity : full or partial size of buffer 'dst' (which must be already allocated)
+    Note : as a consequence, 'dst' content is not valid.
+    Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+        srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+        dstCapacity : size of buffer 'dst' (which must be already allocated)
         return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
                   or 0 if compression fails 
 </p></pre><BR>
@@ -81,8 +82,7 @@
     return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
              If destination buffer is not large enough, decoding will stop and output an error code (negative value).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
+             This function is protected against malicious data packets.
 </p></pre><BR>
 
 <a name="Chapter5"></a><h2>Advanced Functions</h2><pre></pre>
@@ -91,18 +91,18 @@
 </b><p>    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
     This function is primarily useful for memory allocation purposes (destination buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
         inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
         return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+              or 0, if input size is incorrect (too large or negative)
 </p></pre><BR>
 
 <pre><b>int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-</b><p>    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+</b><p>    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
     An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
 </p></pre><BR>
 
 <pre><b>int LZ4_sizeofState(void);
@@ -125,26 +125,30 @@
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
-</b><p>    originalSize : is the original uncompressed size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             Destination buffer must be already allocated. Its size must be >= 'originalSize' bytes.
-    note : This function respects memory boundaries for *properly formed* compressed data.
-           It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionally modified data stream (malicious input).
-           Use this function in trusted environment only (data to decode comes from a trusted source).
+</b><p>This function is a bit faster than LZ4_decompress_safe(),
+but it may misbehave on malformed input because it doesn't perform full validation of compressed data.
+    originalSize : is the uncompressed size to regenerate
+                   Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes.
+    return : number of bytes read from source buffer (== compressed size).
+             If the source stream is detected malformed, the function stops decoding and return a negative result.
+    note : This function is only usable if the originalSize of uncompressed data is known in advance.
+           The caller should also check that all the compressed input has been consumed properly,
+           i.e. that the return value matches the size of the buffer with compressed input.
+           The function never writes past the output buffer.  However, since it doesn't know its 'src' size,
+           it may read past the intended input.  Also, because match offsets are not validated during decoding,
+           reads from 'src' may underflow.  Use this function in trusted environment **only**.
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 </b><p>    This function decompress a compressed block of size 'srcSize' at position 'src'
     into destination buffer 'dst' of size 'dstCapacity'.
     The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that.
-    However, it's not accurate, and may write more than 'targetOutputSize' (but <= dstCapacity).
+    However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity).
    @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity)
-       Note : this number can be < 'targetOutputSize' should the compressed block contain less data.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets.
+        Note : this number can also be < targetOutputSize, if compressed block contains less data.
+            Therefore, always control how many bytes were decoded.
+            If source stream is detected malformed, function returns a negative result.
+            This function is protected against malicious data packets.
 </p></pre><BR>
 
 <a name="Chapter6"></a><h2>Streaming Compression Functions</h2><pre></pre>
@@ -171,25 +175,29 @@
 </p></pre><BR>
 
 <pre><b>int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
-</b><p>  Compress content into 'src' using data from previously compressed blocks, improving compression ratio.
+</b><p>  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
   'dst' buffer must be already allocated.
   If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
 
-  Important : Up to 64KB of previously compressed data is assumed to remain present and unmodified in memory !
-  Special 1 : If input buffer is a double-buffer, it can have any size, including < 64 KB.
-  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+  Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory!
+
+  Special 1 : When input is a double-buffer, they can have any size, including < 64 KB.
+              Make sure that buffers are separated by at least one byte.
+              This way, each block only depends on previous block.
+  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
 
  @return : size of compressed block
-           or 0 if there is an error (typically, compressed data cannot fit into 'dst')
+           or 0 if there is an error (typically, cannot fit into 'dst').
   After an error, the stream status is invalid, it can only be reset or freed.
  
 </p></pre><BR>
 
-<pre><b>int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
-</b><p>  If previously compressed data block is not guaranteed to remain available at its current memory location,
+<pre><b>int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+</b><p>  If last 64KB data cannot be guaranteed to remain available at its current memory location,
   save it into a safer place (char* safeBuffer).
-  Note : it's not necessary to call LZ4_loadDict() after LZ4_saveDict(), dictionary is immediately usable.
-  @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
  
 </p></pre><BR>
 
@@ -198,37 +206,59 @@
 
 <pre><b>LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-</b><p>  creation / destruction of streaming decompression tracking structure.
-  A tracking structure can be re-used multiple times sequentially. 
+</b><p>  creation / destruction of streaming decompression tracking context.
+  A tracking context can be re-used multiple times.
+ 
 </p></pre><BR>
 
 <pre><b>int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-</b><p>  An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
+</b><p>  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
   Use this function to start decompression of a new stream of blocks.
-  A dictionary can optionnally be set. Use NULL or size 0 for a simple reset order.
+  A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  @return : 1 if OK, 0 if error
  
 </p></pre><BR>
 
+<pre><b>int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs))  </b>/* for static allocation; mbs presumed valid */<b>
+</b><p>  Note : in a ring buffer scenario (optional),
+  blocks are presumed decompressed next to each other
+  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+  at which stage it resumes from beginning of ring buffer.
+  When setting such a ring buffer for streaming decompression,
+  provides the minimum size of this ring buffer
+  to be compatible with any source respecting maxBlockSize condition.
+ @return : minimum ring buffer size,
+           or 0 if there is an error (invalid maxBlockSize).
+ 
+</p></pre><BR>
+
 <pre><b>int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
 </b><p>  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
   A block is an unsplittable entity, it must be presented entirely to a decompression function.
-  Decompression functions only accept one block at a time.
-  Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB).
+  Decompression functions only accepts one block at a time.
+  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+  If less than 64KB of data has been decoded, all the data must be present.
 
-  Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
-  - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
-    In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
-  - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
-    maxBlockSize is implementation dependent. It's the maximum size of any single block.
+  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+    In which case, encoding and decoding buffers do not need to be synchronized.
+    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+  - Synchronized mode :
+    Decompression buffer size is _exactly_ the same as compression buffer size,
+    and follows exactly same update rule (block boundaries at same positions),
+    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
     In which case, encoding and decoding buffers do not need to be synchronized,
     and encoding ring buffer can have any size, including small ones ( < 64 KB).
-  - _At least_ 64 KB + 8 bytes + maxBlockSize.
-    In which case, encoding and decoding buffers do not need to be synchronized,
-    and encoding ring buffer can have any size, including larger than decoding buffer.
-  Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
-  and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
+
+  Whenever these conditions are not possible,
+  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
 </p></pre><BR>
 
 <pre><b>int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
@@ -236,25 +266,98 @@
 </b><p>  These decoding functions work the same as
   a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
   They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  
 </p></pre><BR>
 
-<a name="Chapter8"></a><h2>Private definitions</h2><pre>
+<a name="Chapter8"></a><h2>Unstable declarations</h2><pre>
+ Declarations in this section should be considered unstable.
+ Use at your own peril, etc., etc.
+ They may be removed in the future.
+ Their signatures may change.
+<BR></pre>
+
+<pre><b>void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+</b><p>  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
+  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
+
+  Note:
+  Using this in advance of a non- streaming-compression function is redundant,
+  and potentially bad for performance, since they all perform their own custom
+  reset internally.
+
+  Differences from LZ4_resetStream():
+  When an LZ4_stream_t is known to be in a internally coherent state,
+  it can often be prepared for a new compression with almost no work, only
+  sometimes falling back to the full, expensive reset that is always required
+  when the stream is in an indeterminate state (i.e., the reset performed by
+  LZ4_resetStream()).
+
+  LZ4_streams are guaranteed to be in a valid state when:
+  - returned from LZ4_createStream()
+  - reset by LZ4_resetStream()
+  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
+  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
+  - the stream was in a valid state and was then used in any compression call
+    that returned success
+  - the stream was in an indeterminate state and was used in a compression
+    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
+    that returned success
+
+  When a stream isn't known to be in a valid state, it is not safe to pass to
+  any fastReset or streaming function. It must first be cleansed by the full
+  LZ4_resetStream().
+ 
+</p></pre><BR>
+
+<pre><b>int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+</b><p>  A variant of LZ4_compress_fast_extState().
+
+  Using this variant avoids an expensive initialization step. It is only safe
+  to call if the state buffer is known to be correctly initialized already
+  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
+  initialized"). From a high level, the difference is that this function
+  initializes the provided state with a call to something like
+  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
+  call to LZ4_resetStream().
+ 
+</p></pre><BR>
+
+<pre><b>void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
+</b><p>  This is an experimental API that allows for the efficient use of a
+  static dictionary many times.
+
+  Rather than re-loading the dictionary buffer into a working context before
+  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+  in which the working stream references the dictionary stream in-place.
+
+  Several assumptions are made about the state of the dictionary stream.
+  Currently, only streams which have been prepared by LZ4_loadDict() should
+  be expected to work.
+
+  Alternatively, the provided dictionary stream pointer may be NULL, in which
+  case any existing dictionary stream is unset.
+
+  If a dictionary is provided, it replaces any pre-existing stream history.
+  The dictionary contents are the only history that can be referenced and
+  logically immediately precede the data compressed in the first subsequent
+  compression call.
+
+  The dictionary will only remain attached to the working stream through the
+  first compression call, at the end of which it is cleared. The dictionary
+  stream (and source buffer) must remain in-place / accessible / unchanged
+  through the completion of the first compression call on the stream.
+ 
+</p></pre><BR>
+
+<a name="Chapter9"></a><h2>Private definitions</h2><pre>
  Do not use these definitions.
  They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
  Using these definitions will expose code to API and/or ABI break in future versions of the library.
 <BR></pre>
 
 <pre><b>typedef struct {
-    uint32_t hashTable[LZ4_HASH_SIZE_U32];
-    uint32_t currentOffset;
-    uint32_t initCheck;
-    const uint8_t* dictionary;
-    uint8_t* bufferStart;   </b>/* obsolete, used for slideInputBuffer */<b>
-    uint32_t dictSize;
-} LZ4_stream_t_internal;
-</b></pre><BR>
-<pre><b>typedef struct {
     const uint8_t* externalDict;
     size_t extDictSize;
     const uint8_t* prefixEnd;
@@ -262,15 +365,6 @@
 } LZ4_streamDecode_t_internal;
 </b></pre><BR>
 <pre><b>typedef struct {
-    unsigned int hashTable[LZ4_HASH_SIZE_U32];
-    unsigned int currentOffset;
-    unsigned int initCheck;
-    const unsigned char* dictionary;
-    unsigned char* bufferStart;   </b>/* obsolete, used for slideInputBuffer */<b>
-    unsigned int dictSize;
-} LZ4_stream_t_internal;
-</b></pre><BR>
-<pre><b>typedef struct {
     const unsigned char* externalDict;
     size_t extDictSize;
     const unsigned char* prefixEnd;
@@ -305,17 +399,15 @@
  
 </p></pre><BR>
 
-<a name="Chapter9"></a><h2>Obsolete Functions</h2><pre></pre>
+<a name="Chapter10"></a><h2>Obsolete Functions</h2><pre></pre>
 
 <pre><b>#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
 #  define LZ4_DEPRECATED(message)   </b>/* disable deprecation warnings */<b>
 #else
 #  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if defined(__clang__) </b>/* clang doesn't handle mixed C++11 and CNU attributes */<b>
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif defined (__cplusplus) && (__cplusplus >= 201402) </b>/* C++14 or greater */<b>
+#  if defined (__cplusplus) && (__cplusplus >= 201402) </b>/* C++14 or greater */<b>
 #    define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#  elif (LZ4_GCC_VERSION >= 405)
+#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
 #  elif (LZ4_GCC_VERSION >= 301)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated))

diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html
index 590c632..53ea7eb 100644
--- a/doc/lz4frame_manual.html
+++ b/doc/lz4frame_manual.html

@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>1.8.1 Manual</title>
+<title>1.8.2 Manual</title>
 </head>
 <body>
-<h1>1.8.1 Manual</h1>
+<h1>1.8.2 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -18,6 +18,7 @@
 <li><a href="#Chapter8">Compression</a></li>
 <li><a href="#Chapter9">Decompression functions</a></li>
 <li><a href="#Chapter10">Streaming decompression functions</a></li>
+<li><a href="#Chapter11">Bulk processing dictionary API</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -30,9 +31,9 @@
 
 <a name="Chapter3"></a><h2>Error management</h2><pre></pre>
 
-<pre><b>unsigned    LZ4F_isError(LZ4F_errorCode_t code);   </b>/**< tells if a `LZ4F_errorCode_t` function result is an error code */<b>
+<pre><b>unsigned    LZ4F_isError(LZ4F_errorCode_t code);   </b>/**< tells when a function result is an error code */<b>
 </b></pre><BR>
-<pre><b>const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   </b>/**< return error code string; useful for debugging */<b>
+<pre><b>const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   </b>/**< return error code string; for debugging */<b>
 </b></pre><BR>
 <a name="Chapter4"></a><h2>Frame compression types</h2><pre></pre>
 
@@ -74,13 +75,13 @@
 } LZ4F_frameType_t;
 </b></pre><BR>
 <pre><b>typedef struct {
-  LZ4F_blockSizeID_t     blockSizeID;          </b>/* max64KB, max256KB, max1MB, max4MB ; 0 == default */<b>
-  LZ4F_blockMode_t       blockMode;            </b>/* LZ4F_blockLinked, LZ4F_blockIndependent ; 0 == default */<b>
-  LZ4F_contentChecksum_t contentChecksumFlag;  </b>/* if enabled, frame is terminated with a 32-bits checksum of decompressed data ; 0 == disabled (default)  */<b>
-  LZ4F_frameType_t       frameType;            </b>/* read-only field : LZ4F_frame or LZ4F_skippableFrame */<b>
-  unsigned long long     contentSize;          </b>/* Size of uncompressed content ; 0 == unknown */<b>
-  unsigned               dictID;               </b>/* Dictionary ID, sent by the compressor to help decoder select the correct dictionary; 0 == no dictID provided */<b>
-  LZ4F_blockChecksum_t   blockChecksumFlag;    </b>/* if enabled, each block is followed by a checksum of block's compressed data ; 0 == disabled (default)  */<b>
+  LZ4F_blockSizeID_t     blockSizeID;         </b>/* max64KB, max256KB, max1MB, max4MB; 0 == default */<b>
+  LZ4F_blockMode_t       blockMode;           </b>/* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */<b>
+  LZ4F_contentChecksum_t contentChecksumFlag; </b>/* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */<b>
+  LZ4F_frameType_t       frameType;           </b>/* read-only field : LZ4F_frame or LZ4F_skippableFrame */<b>
+  unsigned long long     contentSize;         </b>/* Size of uncompressed content ; 0 == unknown */<b>
+  unsigned               dictID;              </b>/* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */<b>
+  LZ4F_blockChecksum_t   blockChecksumFlag;   </b>/* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */<b>
 } LZ4F_frameInfo_t;
 </b><p>  makes it possible to set or read frame parameters.
   It's not required to set all fields, as long as the structure was initially memset() to zero.
@@ -89,20 +90,23 @@
 
 <pre><b>typedef struct {
   LZ4F_frameInfo_t frameInfo;
-  int      compressionLevel;       </b>/* 0 == default (fast mode); values above LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values below 0 trigger "fast acceleration", proportional to value */<b>
-  unsigned autoFlush;              </b>/* 1 == always flush, to reduce usage of internal buffers */<b>
-  unsigned reserved[4];            </b>/* must be zero for forward compatibility */<b>
+  int      compressionLevel;    </b>/* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */<b>
+  unsigned autoFlush;           </b>/* 1: always flush, to reduce usage of internal buffers */<b>
+  unsigned favorDecSpeed;       </b>/* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */  /* >= v1.8.2 */<b>
+  unsigned reserved[3];         </b>/* must be zero for forward compatibility */<b>
 } LZ4F_preferences_t;
 </b><p>  makes it possible to supply detailed compression parameters to the stream interface.
-  It's not required to set all fields, as long as the structure was initially memset() to zero.
+  Structure is presumed initially memset() to zero, representing default settings.
   All reserved fields must be set to zero. 
 </p></pre><BR>
 
 <a name="Chapter5"></a><h2>Simple compression function</h2><pre></pre>
 
 <pre><b>size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
-</b><p>  Returns the maximum possible size of a frame compressed with LZ4F_compressFrame() given srcSize content and preferences.
-  Note : this result is only usable with LZ4F_compressFrame(), not with multi-segments compression.
+</b><p>  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+  Note : this result is only usable with LZ4F_compressFrame().
+         It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed.
  
 </p></pre><BR>
 
@@ -151,41 +155,54 @@
 </p></pre><BR>
 
 <pre><b>size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr);
-</b><p> Provides dstCapacity given a srcSize to guarantee operation success in worst case situations.
- prefsPtr is optional : you can provide NULL as argument, preferences will be set to cover worst case scenario.
- Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
- When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
+</b><p>  Provides minimum dstCapacity required to guarantee compression success
+  given a srcSize and preferences, covering worst case scenario.
+  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+  Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(),
+  Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+  It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd().
+  Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin().
+  Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
+  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
  
 </p></pre><BR>
 
-<pre><b>size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr);
-</b><p> LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
- An important rule is that dstCapacity MUST be large enough to ensure operation success even in worst case situations.
- This value is provided by LZ4F_compressBound().
- If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
- LZ4F_compressUpdate() doesn't guarantee error recovery. When an error occurs, compression context must be freed or resized.
+<pre><b>size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+                                       void* dstBuffer, size_t dstCapacity,
+                                 const void* srcBuffer, size_t srcSize,
+                                 const LZ4F_compressOptions_t* cOptPtr);
+</b><p>  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+  Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+  This value is provided by LZ4F_compressBound().
+  If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
+  LZ4F_compressUpdate() doesn't guarantee error recovery.
+  When an error occurs, compression context must be freed or resized.
  `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
  @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
            or an error code if it fails (which can be tested using LZ4F_isError())
  
 </p></pre><BR>
 
-<pre><b>size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
-</b><p> When data must be generated and sent immediately, without waiting for a block to be completely filled,
- it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
+<pre><b>size_t LZ4F_flush(LZ4F_cctx* cctx,
+                              void* dstBuffer, size_t dstCapacity,
+                        const LZ4F_compressOptions_t* cOptPtr);
+</b><p>  When data must be generated and sent immediately, without waiting for a block to be completely filled,
+  it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
  `dstCapacity` must be large enough to ensure the operation will be successful.
  `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
- @return : number of bytes written into dstBuffer (it can be zero, which means there was no data stored within cctx)
+ @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
            or an error code if it fails (which can be tested using LZ4F_isError())
  
 </p></pre><BR>
 
-<pre><b>size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
+<pre><b>size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+                                    void* dstBuffer, size_t dstCapacity,
+                              const LZ4F_compressOptions_t* cOptPtr);
 </b><p>  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
   It will flush whatever data remained within `cctx` (like LZ4_flush())
   and properly finalize the frame, with an endMark and a checksum.
  `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
- @return : number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
+ @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
            or an error code if it fails (which can be tested using LZ4F_isError())
   A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
  
@@ -194,19 +211,19 @@
 <a name="Chapter9"></a><h2>Decompression functions</h2><pre></pre>
 
 <pre><b>typedef struct {
-  unsigned stableDst;    </b>/* pledge that at least 64KB+64Bytes of previously decompressed data remain unmodifed where it was decoded. This optimization skips storage operations in tmp buffers */<b>
+  unsigned stableDst;    </b>/* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */<b>
   unsigned reserved[3];  </b>/* must be set to zero for forward compatibility */<b>
 } LZ4F_decompressOptions_t;
 </b></pre><BR>
 <pre><b>LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
 LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx);
-</b><p> Create an LZ4F_dctx object, to track all decompression operations.
- The version provided MUST be LZ4F_VERSION.
- The function provides a pointer to an allocated and initialized LZ4F_dctx object.
- The result is an errorCode, which can be tested using LZ4F_isError().
- dctx memory can be released using LZ4F_freeDecompressionContext();
- The result of LZ4F_freeDecompressionContext() is indicative of the current state of decompressionContext when being released.
- That is, it should be == 0 if decompression has been completed fully and correctly.
+</b><p>  Create an LZ4F_dctx object, to track all decompression operations.
+  The version provided MUST be LZ4F_VERSION.
+  The function provides a pointer to an allocated and initialized LZ4F_dctx object.
+  The result is an errorCode, which can be tested using LZ4F_isError().
+  dctx memory can be released using LZ4F_freeDecompressionContext();
+  Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
+  That is, it should be == 0 if decompression has been completed fully and correctly.
  
 </p></pre><BR>
 
@@ -245,8 +262,8 @@
   The function will read up to *srcSizePtr bytes from srcBuffer,
   and decompress data into dstBuffer, of capacity *dstSizePtr.
 
-  The number of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
-  The number of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
+  The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+  The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
 
   The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
   Unconsumed source data must be presented again in subsequent invocations.
@@ -278,5 +295,58 @@
   and start a new one using same context resources. 
 </p></pre><BR>
 
+<pre><b>typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+</b></pre><BR>
+<a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre>
+
+<pre><b>LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+</b><p>  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict 
+</p></pre><BR>
+
+<pre><b>LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dst, size_t dstCapacity,
+    const void* src, size_t srcSize,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* preferencesPtr);
+</b><p>  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+  cctx must point to a context created by LZ4F_createCompressionContext().
+  If cdict==NULL, compress without a dictionary.
+  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+  If this condition is not respected, function will fail (@return an errorCode).
+  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+  but it's not recommended, as it's the only way to provide dictID in the frame header.
+ @return : number of bytes written into dstBuffer.
+           or an error code if it fails (can be tested using LZ4F_isError()) 
+</p></pre><BR>
+
+<pre><b>LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dstBuffer, size_t dstCapacity,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* prefsPtr);
+</b><p>  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ `prefsPtr` is optional : you may provide NULL as argument,
+  however, it's the only way to provide dictID in the frame header.
+ @return : number of bytes written into dstBuffer for the header,
+           or an error code (which can be tested using LZ4F_isError()) 
+</p></pre><BR>
+
+<pre><b>LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+    LZ4F_dctx* dctxPtr,
+    void* dstBuffer, size_t* dstSizePtr,
+    const void* srcBuffer, size_t* srcSizePtr,
+    const void* dict, size_t dictSize,
+    const LZ4F_decompressOptions_t* decompressOptionsPtr);
+</b><p>  Same as LZ4F_decompress(), using a predefined dictionary.
+  Dictionary is used "in place", without any preprocessing.
+  It must remain accessible throughout the entire frame decoding. 
+</p></pre><BR>
+
 </html>
 </body>

diff --git a/examples/Makefile b/examples/Makefile
index 9321c24..103e7ec 100644
--- a/examples/Makefile
+++ b/examples/Makefile

@@ -52,34 +52,40 @@
 all: printVersion doubleBuffer dictionaryRandomAccess ringBuffer ringBufferHC \
      lineCompress frameCompress simpleBuffer
 
-printVersion: $(LZ4DIR)/lz4.c printVersion.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+$(LZ4DIR)/liblz4.a: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.h $(LZ4DIR)/lz4hc.h $(LZ4DIR)/lz4frame.h $(LZ4DIR)/lz4frame_static.h
+	$(MAKE) -C $(LZ4DIR) liblz4.a
 
-doubleBuffer: $(LZ4DIR)/lz4.c blockStreaming_doubleBuffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+printVersion: printVersion.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-dictionaryRandomAccess: $(LZ4DIR)/lz4.c dictionaryRandomAccess.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+doubleBuffer: blockStreaming_doubleBuffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-ringBuffer  : $(LZ4DIR)/lz4.c blockStreaming_ringBuffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+dictionaryRandomAccess: dictionaryRandomAccess.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-ringBufferHC: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c HCStreaming_ringBuffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+ringBuffer  : blockStreaming_ringBuffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-lineCompress: $(LZ4DIR)/lz4.c blockStreaming_lineByLine.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+ringBufferHC: HCStreaming_ringBuffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-frameCompress: frameCompress.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT) $(LZ4DIR)/liblz4.a
+lineCompress: blockStreaming_lineByLine.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-compressFunctions: $(LZ4DIR)/lz4.c compress_functions.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT) -lrt
+frameCompress: frameCompress.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-simpleBuffer: $(LZ4DIR)/lz4.c simple_buffer.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+compressFunctions: compress_functions.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT) -lrt
 
-test : all
+simpleBuffer: simple_buffer.c $(LZ4DIR)/liblz4.a
+	$(CC) $(FLAGS) $^ -o $@$(EXT)
+
+$(LZ4) :
+	$(MAKE) -C ../programs lz4
+
+test : all $(LZ4)
 	@echo "\n=== Print Version ==="
 	./printVersion$(EXT)
 	@echo "\n=== Simple compression example ==="

diff --git a/examples/frameCompress.c b/examples/frameCompress.c
index d66a8dc..9bfea48 100644
--- a/examples/frameCompress.c
+++ b/examples/frameCompress.c

@@ -1,122 +1,156 @@
-// LZ4frame API example : compress a file
-// Based on sample code from Zbigniew Jędrzejewski-Szmek
+/* LZ4frame API example : compress a file
+ * Modified from an example code by Zbigniew Jędrzejewski-Szmek
+ *
+ * This example streams an input file into an output file
+ * using a bounded memory budget.
+ * Input is read in chunks of IN_CHUNK_SIZE */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <assert.h>
 
 #include <lz4frame.h>
 
-#define BUF_SIZE 16*1024
-#define LZ4_HEADER_SIZE 19
-#define LZ4_FOOTER_SIZE 4
 
-static const LZ4F_preferences_t lz4_preferences = {
+#define IN_CHUNK_SIZE  (16*1024)
+
+static const LZ4F_preferences_t kPrefs = {
     { LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame,
-      0 /* content size unknown */, 0 /* no dictID */ , LZ4F_noBlockChecksum },
-    0,   /* compression level */
+      0 /* unknown content size */, 0 /* no dictID */ , LZ4F_noBlockChecksum },
+    0,   /* compression level; 0 == default */
     0,   /* autoflush */
-    { 0, 0, 0, 0 },  /* reserved, must be set to 0 */
+    0,   /* favor decompression speed */
+    { 0, 0, 0 },  /* reserved, must be set to 0 */
 };
 
-static size_t compress_file(FILE *in, FILE *out, size_t *size_in, size_t *size_out) {
-    size_t r=1;  /* function result; 1 == error, default (early exit) */
-    LZ4F_compressionContext_t ctx;
-    char *src, *buf = NULL;
-    size_t size, count_in = 0, count_out, offset = 0, frame_size;
 
-    if (LZ4F_isError( LZ4F_createCompressionContext(&ctx, LZ4F_VERSION) )) {
-        printf("Failed to create context: error %zu\n", r);
-        return 1;
+/* safe_fwrite() :
+ * performs fwrite(), ensure operation success, or immediately exit() */
+static void safe_fwrite(void* buf, size_t eltSize, size_t nbElt, FILE* f)
+{
+    size_t const writtenSize = fwrite(buf, eltSize, nbElt, f);
+    size_t const expectedSize = eltSize * nbElt;   /* note : should check for overflow */
+    if (writtenSize < expectedSize) {
+        if (ferror(f))  /* note : ferror() must follow fwrite */
+            printf("Write failed\n");
+        else
+            printf("Short write\n");
+        exit(1);
     }
+}
 
-    src = malloc(BUF_SIZE);
-    if (!src) {
-        printf("Not enough memory\n");
-        goto cleanup;
-    }
 
-    frame_size = LZ4F_compressBound(BUF_SIZE, &lz4_preferences);
-    size = frame_size + LZ4_HEADER_SIZE + LZ4_FOOTER_SIZE;
-    buf = malloc(size);
-    if (!buf) {
-        printf("Not enough memory\n");
-        goto cleanup;
-    }
+/* ================================================= */
+/*     Streaming Compression example               */
+/* ================================================= */
 
-    {   size_t const headerSize = LZ4F_compressBegin(ctx, buf, size, &lz4_preferences);
+typedef struct {
+    int error;
+    unsigned long long size_in;
+    unsigned long long size_out;
+} compressResult_t;
+
+static compressResult_t
+compress_file_internal(FILE* f_in, FILE* f_out,
+                    LZ4F_compressionContext_t ctx,
+                    void* inBuff, size_t inChunkSize,
+                    void* outBuff, size_t outCapacity)
+{
+    compressResult_t result = { 1, 0, 0 };  /* result for an error */
+    unsigned long long count_in = 0, count_out;
+
+    assert(f_in != NULL); assert(f_out != NULL);
+    assert(ctx != NULL);
+    assert(outCapacity >= LZ4F_HEADER_SIZE_MAX);
+    assert(outCapacity >= LZ4F_compressBound(inChunkSize, &kPrefs));
+
+    /* write frame header */
+    {   size_t const headerSize = LZ4F_compressBegin(ctx, outBuff, outCapacity, &kPrefs);
         if (LZ4F_isError(headerSize)) {
             printf("Failed to start compression: error %zu\n", headerSize);
-            goto cleanup;
+            return result;
         }
-        offset = count_out = headerSize;
-        printf("Buffer size is %zu bytes, header size %zu bytes\n", size, headerSize);
+        count_out = headerSize;
+        printf("Buffer size is %zu bytes, header size %zu bytes\n", outCapacity, headerSize);
+        safe_fwrite(outBuff, 1, headerSize, f_out);
     }
 
-
+    /* stream file */
     for (;;) {
-        size_t const readSize = fread(src, 1, BUF_SIZE, in);
-        if (readSize == 0)
-            break;
+        size_t const readSize = fread(inBuff, 1, IN_CHUNK_SIZE, f_in);
+        if (readSize == 0) break; /* nothing left to read from input file */
         count_in += readSize;
 
-        {   size_t const compressedSize = LZ4F_compressUpdate(ctx, buf + offset, size - offset, src, readSize, NULL);
-            if (LZ4F_isError(compressedSize)) {
-                printf("Compression failed: error %zu\n", compressedSize);
-                goto cleanup;
-            }
-            offset += compressedSize;
-            count_out += compressedSize;
-        }
-
-        if (size - offset < frame_size + LZ4_FOOTER_SIZE) {
-            size_t writtenSize;
-            printf("Writing %zu bytes\n", offset);
-
-            writtenSize = fwrite(buf, 1, offset, out);
-            if (writtenSize < offset) {
-                if (ferror(out))  /* note : ferror() must follow fwrite */
-                    printf("Write failed\n");
-                else
-                    printf("Short write\n");
-                goto cleanup;
-            }
-
-            offset = 0;
-        }
-    }
-
-    {   size_t const compressedSize = LZ4F_compressEnd(ctx, buf + offset, size - offset, NULL);
+        size_t const compressedSize = LZ4F_compressUpdate(ctx,
+                                                outBuff, outCapacity,
+                                                inBuff, readSize,
+                                                NULL);
         if (LZ4F_isError(compressedSize)) {
-            printf("Failed to end compression: error %zu\n", compressedSize);
-            goto cleanup;
+            printf("Compression failed: error %zu\n", compressedSize);
+            return result;
         }
-        offset += compressedSize;
+
+        printf("Writing %zu bytes\n", compressedSize);
+        safe_fwrite(outBuff, 1, compressedSize, f_out);
         count_out += compressedSize;
     }
 
-    printf("Writing %zu bytes\n", offset);
-    {   size_t const writtenSize = fwrite(buf, 1, offset, out);
-        if (writtenSize < offset) {
-            if (ferror(out))
-                printf("Write failed\n");
-            else
-                printf("Short write\n");
-            goto cleanup;
-    }   }
+    /* flush whatever remains within internal buffers */
+    {   size_t const compressedSize = LZ4F_compressEnd(ctx,
+                                                outBuff, outCapacity,
+                                                NULL);
+        if (LZ4F_isError(compressedSize)) {
+            printf("Failed to end compression: error %zu\n", compressedSize);
+            return result;
+        }
 
-    *size_in = count_in;
-    *size_out = count_out;
-    r = 0;   /* success */
+        printf("Writing %zu bytes\n", compressedSize);
+        safe_fwrite(outBuff, 1, compressedSize, f_out);
+        count_out += compressedSize;
+    }
 
- cleanup:
+    result.size_in = count_in;
+    result.size_out = count_out;
+    result.error = 0;
+    return result;
+}
+
+static compressResult_t
+compress_file(FILE* f_in, FILE* f_out)
+{
+    assert(f_in != NULL);
+    assert(f_out != NULL);
+
+    /* ressource allocation */
+    LZ4F_compressionContext_t ctx;
+    size_t const ctxCreation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
+    void* const src = malloc(IN_CHUNK_SIZE);
+    size_t const outbufCapacity = LZ4F_compressBound(IN_CHUNK_SIZE, &kPrefs);   /* large enough for any input <= IN_CHUNK_SIZE */
+    void* const outbuff = malloc(outbufCapacity);
+
+    compressResult_t result = { 1, 0, 0 };  /* == error (default) */
+    if (!LZ4F_isError(ctxCreation) && src && outbuff) {
+        result = compress_file_internal(f_in, f_out,
+                                        ctx,
+                                        src, IN_CHUNK_SIZE,
+                                        outbuff, outbufCapacity);
+    } else {
+        printf("error : ressource allocation failed \n");
+    }
+
     LZ4F_freeCompressionContext(ctx);   /* supports free on NULL */
     free(src);
-    free(buf);
-    return r;
+    free(outbuff);
+    return result;
 }
 
+
+/* ================================================= */
+/*     Streaming decompression example               */
+/* ================================================= */
+
 static size_t get_block_size(const LZ4F_frameInfo_t* info) {
     switch (info->blockSizeID) {
         case LZ4F_default:
@@ -125,125 +159,164 @@
         case LZ4F_max1MB:   return 1 << 20;
         case LZ4F_max4MB:   return 1 << 22;
         default:
-            printf("Impossible unless more block sizes are allowed\n");
+            printf("Impossible with expected frame specification (<=v1.6.1)\n");
             exit(1);
     }
 }
 
-static size_t decompress_file(FILE* in, FILE* out) {
-    void* const src = malloc(BUF_SIZE);
-    void* dst = NULL;
-    size_t dstCapacity = 0;
-    LZ4F_dctx* dctx = NULL;
+/* @return : 1==error, 0==success */
+static int
+decompress_file_internal(FILE* f_in, FILE* f_out,
+                        LZ4F_dctx* dctx,
+                        void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed,
+                        void* dst, size_t dstCapacity)
+{
+    int firstChunk = 1;
     size_t ret = 1;
 
-    /* Initialization */
-    if (!src) { perror("decompress_file(src)"); goto cleanup; }
-    {   size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, 100);
-        if (LZ4F_isError(dctxStatus)) {
-            printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus));
-            goto cleanup;
-    }   }
+    assert(f_in != NULL); assert(f_out != NULL);
+    assert(dctx != NULL);
+    assert(src != NULL); assert(srcCapacity > 0); assert(filled <= srcCapacity); assert(alreadyConsumed <= filled);
+    assert(dst != NULL); assert(dstCapacity > 0);
 
     /* Decompression */
     while (ret != 0) {
         /* Load more input */
-        size_t srcSize = fread(src, 1, BUF_SIZE, in);
-        const void* srcPtr = src;
-        const void* const srcEnd = srcPtr + srcSize;
-        if (srcSize == 0 || ferror(in)) {
+        size_t readSize = firstChunk ? filled : fread(src, 1, srcCapacity, f_in); firstChunk=0;
+        const void* srcPtr = src + alreadyConsumed; alreadyConsumed=0;
+        const void* const srcEnd = srcPtr + readSize;
+        if (readSize == 0 || ferror(f_in)) {
             printf("Decompress: not enough input or error reading file\n");
-            goto cleanup;
+            return 1;
         }
-        /* Allocate destination buffer if it isn't already */
-        if (!dst) {
-            LZ4F_frameInfo_t info;
-            ret = LZ4F_getFrameInfo(dctx, &info, src, &srcSize);
-            if (LZ4F_isError(ret)) {
-                printf("LZ4F_getFrameInfo error: %s\n", LZ4F_getErrorName(ret));
-                goto cleanup;
-            }
-            /* Allocating enough space for an entire block isn't necessary for
-             * correctness, but it allows some memcpy's to be elided.
-             */
-            dstCapacity = get_block_size(&info);
-            dst = malloc(dstCapacity);
-            if (!dst) { perror("decompress_file(dst)"); goto cleanup; }
-            srcPtr += srcSize;
-            srcSize = srcEnd - srcPtr;
-        }
+
         /* Decompress:
-         * Continue while there is more input to read and the frame isn't over.
-         * If srcPtr == srcEnd then we know that there is no more output left in the
-         * internal buffer left to flush.
+         * Continue while there is more input to read (srcPtr != srcEnd)
+         * and the frame isn't over (ret != 0)
          */
         while (srcPtr != srcEnd && ret != 0) {
-            /* INVARIANT: Any data left in dst has already been written */
+            /* Any data within dst has been flushed at this stage */
             size_t dstSize = dstCapacity;
+            size_t srcSize = srcEnd - srcPtr;
             ret = LZ4F_decompress(dctx, dst, &dstSize, srcPtr, &srcSize, /* LZ4F_decompressOptions_t */ NULL);
             if (LZ4F_isError(ret)) {
                 printf("Decompression error: %s\n", LZ4F_getErrorName(ret));
-                goto cleanup;
+                return 1;
             }
             /* Flush output */
-            if (dstSize != 0){
-                size_t written = fwrite(dst, 1, dstSize, out);
-                printf("Writing %zu bytes\n", dstSize);
-                if (written != dstSize) {
-                    printf("Decompress: Failed to write to file\n");
-                    goto cleanup;
-                }
-            }
+            if (dstSize != 0) safe_fwrite(dst, 1, dstSize, f_out);
             /* Update input */
             srcPtr += srcSize;
-            srcSize = srcEnd - srcPtr;
         }
     }
-    /* Check that there isn't trailing input data after the frame.
-     * It is valid to have multiple frames in the same file, but this example
-     * doesn't support it.
-     */
-    ret = fread(src, 1, 1, in);
-    if (ret != 0 || !feof(in)) {
-        printf("Decompress: Trailing data left in file after frame\n");
-        goto cleanup;
-    }
 
-cleanup:
-    free(src);
-    free(dst);
-    return LZ4F_freeDecompressionContext(dctx);   /* note : free works on NULL */
+    /* Check that there isn't trailing input data after the frame.
+     * It is valid to have multiple frames in the same file,
+     * but this example only supports one frame.
+     */
+    {   size_t const readSize = fread(src, 1, 1, f_in);
+        if (readSize != 0 || !feof(f_in)) {
+            printf("Decompress: Trailing data left in file after frame\n");
+            return 1;
+    }   }
+
+    return 0;
 }
 
-int compare(FILE* fp0, FILE* fp1)
+
+/* @return : 1==error, 0==completed */
+static int
+decompress_file_allocDst(FILE* f_in, FILE* f_out,
+                        LZ4F_dctx* dctx,
+                        void* src, size_t srcCapacity)
+{
+    assert(f_in != NULL); assert(f_out != NULL);
+    assert(dctx != NULL);
+    assert(src != NULL);
+    assert(srcCapacity >= LZ4F_HEADER_SIZE_MAX);  /* ensure LZ4F_getFrameInfo() can read enough data */
+
+    /* Read Frame header */
+    size_t const readSize = fread(src, 1, srcCapacity, f_in);
+    if (readSize == 0 || ferror(f_in)) {
+        printf("Decompress: not enough input or error reading file\n");
+        return 1;
+    }
+
+    LZ4F_frameInfo_t info;
+    size_t consumedSize = readSize;
+    {   size_t const fires = LZ4F_getFrameInfo(dctx, &info, src, &consumedSize);
+        if (LZ4F_isError(fires)) {
+            printf("LZ4F_getFrameInfo error: %s\n", LZ4F_getErrorName(fires));
+            return 1;
+    }   }
+
+    /* Allocating enough space for an entire block isn't necessary for
+     * correctness, but it allows some memcpy's to be elided.
+     */
+    size_t const dstCapacity = get_block_size(&info);
+    void* const dst = malloc(dstCapacity);
+    if (!dst) { perror("decompress_file(dst)"); return 1; }
+
+    int const decompressionResult = decompress_file_internal(
+                        f_in, f_out,
+                        dctx,
+                        src, srcCapacity, readSize, consumedSize,
+                        dst, dstCapacity);
+
+    free(dst);
+    return decompressionResult;
+}
+
+
+/* @result : 1==error, 0==success */
+static int decompress_file(FILE* f_in, FILE* f_out)
+{
+    assert(f_in != NULL); assert(f_out != NULL);
+
+    /* Ressource allocation */
+    void* const src = malloc(IN_CHUNK_SIZE);
+    if (!src) { perror("decompress_file(src)"); return 1; }
+
+    LZ4F_dctx* dctx;
+    {   size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, 100);
+        if (LZ4F_isError(dctxStatus)) {
+            printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus));
+    }   }
+
+    int const result = !dctx ? 1 /* error */ :
+                       decompress_file_allocDst(f_in, f_out, dctx, src, IN_CHUNK_SIZE);
+
+    free(src);
+    LZ4F_freeDecompressionContext(dctx);   /* note : free works on NULL */
+    return result;
+}
+
+
+int compareFiles(FILE* fp0, FILE* fp1)
 {
     int result = 0;
 
-    while(0 == result) {
+    while (result==0) {
         char b0[1024];
         char b1[1024];
-        const size_t r0 = fread(b0, 1, sizeof(b0), fp0);
-        const size_t r1 = fread(b1, 1, sizeof(b1), fp1);
+        size_t const r0 = fread(b0, 1, sizeof(b0), fp0);
+        size_t const r1 = fread(b1, 1, sizeof(b1), fp1);
 
-        result = (int) r0 - (int) r1;
-
-        if (0 == r0 || 0 == r1) {
-            break;
-        }
-        if (0 == result) {
-            result = memcmp(b0, b1, r0);
-        }
+        result = (r0 != r1);
+        if (!r0 || !r1) break;
+        if (!result) result = memcmp(b0, b1, r0);
     }
 
     return result;
 }
 
+
 int main(int argc, const char **argv) {
     char inpFilename[256] = { 0 };
     char lz4Filename[256] = { 0 };
     char decFilename[256] = { 0 };
 
-    if(argc < 2) {
+    if (argc < 2) {
         printf("Please specify input filename\n");
         return 0;
     }
@@ -259,40 +332,39 @@
     /* compress */
     {   FILE* const inpFp = fopen(inpFilename, "rb");
         FILE* const outFp = fopen(lz4Filename, "wb");
-        size_t sizeIn = 0;
-        size_t sizeOut = 0;
-        size_t ret;
 
         printf("compress : %s -> %s\n", inpFilename, lz4Filename);
-        ret = compress_file(inpFp, outFp, &sizeIn, &sizeOut);
-        if (ret) {
-            printf("compress : failed with code %zu\n", ret);
-            return (int)ret;
-        }
-        printf("%s: %zu → %zu bytes, %.1f%%\n",
-            inpFilename, sizeIn, sizeOut,
-            (double)sizeOut / sizeIn * 100);
-        printf("compress : done\n");
+        compressResult_t const ret = compress_file(inpFp, outFp);
 
         fclose(outFp);
         fclose(inpFp);
+
+        if (ret.error) {
+            printf("compress : failed with code %i\n", ret.error);
+            return ret.error;
+        }
+        printf("%s: %zu → %zu bytes, %.1f%%\n",
+            inpFilename,
+            (size_t)ret.size_in, (size_t)ret.size_out,  /* might overflow is size_t is 32 bits and size_{in,out} > 4 GB */
+            (double)ret.size_out / ret.size_in * 100);
+        printf("compress : done\n");
     }
 
     /* decompress */
     {   FILE* const inpFp = fopen(lz4Filename, "rb");
         FILE* const outFp = fopen(decFilename, "wb");
-        size_t ret;
 
         printf("decompress : %s -> %s\n", lz4Filename, decFilename);
-        ret = decompress_file(inpFp, outFp);
-        if (ret) {
-            printf("decompress : failed with code %zu\n", ret);
-            return (int)ret;
-        }
-        printf("decompress : done\n");
+        int const ret = decompress_file(inpFp, outFp);
 
         fclose(outFp);
         fclose(inpFp);
+
+        if (ret) {
+            printf("decompress : failed with code %i\n", ret);
+            return ret;
+        }
+        printf("decompress : done\n");
     }
 
     /* verify */
@@ -300,15 +372,16 @@
         FILE* const decFp = fopen(decFilename, "rb");
 
         printf("verify : %s <-> %s\n", inpFilename, decFilename);
-        const int cmp = compare(inpFp, decFp);
-        if(0 == cmp) {
-            printf("verify : OK\n");
-        } else {
-            printf("verify : NG\n");
-        }
+        int const cmp = compareFiles(inpFp, decFp);
 
         fclose(decFp);
         fclose(inpFp);
+
+        if (cmp) {
+            printf("corruption detected : decompressed file differs from original\n");
+            return cmp;
+        }
+        printf("verify : OK\n");
     }
 
     return 0;

diff --git a/lib/Makefile b/lib/Makefile
index dd33f50..abb6c07 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -42,6 +42,7 @@
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER  := $(shell echo $(LIBVER_SCRIPT))
 
+BUILD_SHARED:=yes
 BUILD_STATIC:=yes
 
 CPPFLAGS+= -DXXH_NAMESPACE=LZ4_
@@ -61,7 +62,7 @@
 	SHARED_EXT = dylib
 	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
 	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
-	SONAME_FLAGS = -install_name $(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+	SONAME_FLAGS = -install_name $(libdir)/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
 else
 	SONAME_FLAGS = -Wl,-soname=liblz4.$(SHARED_EXT).$(LIBVER_MAJOR)
 	SHARED_EXT = so
@@ -84,30 +85,38 @@
 all32: CFLAGS+=-m32
 all32: all
 
+ifeq ($(V), 1)
+Q =
+else
+Q = @
+endif
+
 liblz4.a: $(SRCFILES)
 ifeq ($(BUILD_STATIC),yes)  # can be disabled on command line
 	@echo compiling static library
-	@$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
-	@$(AR) rcs $@ *.o
+	$(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
+	$(Q)$(AR) rcs $@ *.o
 endif
 
 $(LIBLZ4): $(SRCFILES)
+ifeq ($(BUILD_SHARED),yes)  # can be disabled on command line
 	@echo compiling dynamic library $(LIBVER)
 ifneq (,$(filter Windows%,$(OS)))
-	@$(CC) $(FLAGS) -DLZ4_DLL_EXPORT=1 -shared $^ -o dll\$@.dll
+	$(Q)$(CC) $(FLAGS) -DLZ4_DLL_EXPORT=1 -shared $^ -o dll\$@.dll
 	dlltool -D dll\liblz4.dll -d dll\liblz4.def -l dll\liblz4.lib
 else
-	@$(CC) $(FLAGS) -shared $^ -fPIC -fvisibility=hidden $(SONAME_FLAGS) -o $@
+	$(Q)$(CC) $(FLAGS) -shared $^ -fPIC -fvisibility=hidden $(SONAME_FLAGS) -o $@
 	@echo creating versioned links
-	@ln -sf $@ liblz4.$(SHARED_EXT_MAJOR)
-	@ln -sf $@ liblz4.$(SHARED_EXT)
+	$(Q)ln -sf $@ liblz4.$(SHARED_EXT_MAJOR)
+	$(Q)ln -sf $@ liblz4.$(SHARED_EXT)
+endif
 endif
 
 liblz4: $(LIBLZ4)
 
 clean:
-	@$(RM) core *.o liblz4.pc dll/liblz4.dll dll/liblz4.lib
-	@$(RM) *.a *.$(SHARED_EXT) *.$(SHARED_EXT_MAJOR) *.$(SHARED_EXT_VER)
+	$(Q)$(RM) core *.o liblz4.pc dll/liblz4.dll dll/liblz4.lib
+	$(Q)$(RM) *.a *.$(SHARED_EXT) *.$(SHARED_EXT_MAJOR) *.$(SHARED_EXT_VER)
 	@echo Cleaning library completed
 
 
@@ -116,23 +125,29 @@
 #-----------------------------------------------------------------------------
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
 
+.PHONY: listL120
+listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
+	find . -type f -name '*.c' -o -name '*.h' | while read -r filename; do awk 'length > 120 {print FILENAME "(" FNR "): " $$0}' $$filename; done
+
 DESTDIR     ?=
 # directory variables : GNU conventions prefer lowercase
 # see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
-# support both lower and uppercase (BSD), use uppercase in script
-prefix      ?= /usr/local
-PREFIX      ?= $(prefix)
-exec_prefix ?= $(PREFIX)
-libdir      ?= $(exec_prefix)/lib
-LIBDIR      ?= $(libdir)
-includedir  ?= $(PREFIX)/include
-INCLUDEDIR  ?= $(includedir)
+# support both lower and uppercase (BSD), use lower in script
+PREFIX      ?= /usr/local
+prefix      ?= $(PREFIX)
+EXEC_PREFIX ?= $(prefix)
+exec_prefix ?= $(EXEC_PREFIX)
+LIBDIR      ?= $(exec_prefix)/lib
+libdir      ?= $(LIBDIR)
+INCLUDEDIR  ?= $(prefix)/include
+includedir  ?= $(INCLUDEDIR)
 
 ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly))
-PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
+PKGCONFIGDIR ?= $(prefix)/libdata/pkgconfig
 else
-PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
+PKGCONFIGDIR ?= $(libdir)/pkgconfig
 endif
+pkgconfigdir ?= $(PKGCONFIGDIR)
 
 ifneq (,$(filter $(shell uname),SunOS))
 INSTALL ?= ginstall
@@ -145,38 +160,41 @@
 
 liblz4.pc: liblz4.pc.in Makefile
 	@echo creating pkgconfig
-	@sed -e 's|@PREFIX@|$(PREFIX)|' \
-         -e 's|@LIBDIR@|$(LIBDIR)|' \
-         -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+	$(Q)sed -e 's|@PREFIX@|$(prefix)|' \
+         -e 's|@LIBDIR@|$(libdir)|' \
+         -e 's|@INCLUDEDIR@|$(includedir)|' \
          -e 's|@VERSION@|$(LIBVER)|' \
           $< >$@
 
 install: lib liblz4.pc
-	@$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ $(DESTDIR)$(INCLUDEDIR)/ $(DESTDIR)$(LIBDIR)/
-	@$(INSTALL_DATA) liblz4.pc $(DESTDIR)$(PKGCONFIGDIR)/
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(pkgconfigdir)/ $(DESTDIR)$(includedir)/ $(DESTDIR)$(libdir)/
+	$(Q)$(INSTALL_DATA) liblz4.pc $(DESTDIR)$(pkgconfigdir)/
 	@echo Installing libraries
 ifeq ($(BUILD_STATIC),yes)
-	@$(INSTALL_DATA) liblz4.a $(DESTDIR)$(LIBDIR)/liblz4.a
-	@$(INSTALL_DATA) lz4frame_static.h $(DESTDIR)$(INCLUDEDIR)/lz4frame_static.h
+	$(Q)$(INSTALL_DATA) liblz4.a $(DESTDIR)$(libdir)/liblz4.a
+	$(Q)$(INSTALL_DATA) lz4frame_static.h $(DESTDIR)$(includedir)/lz4frame_static.h
 endif
-	@$(INSTALL_PROGRAM) liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)
-	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
-	@ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
-	@echo Installing headers in $(INCLUDEDIR)
-	@$(INSTALL_DATA) lz4.h $(DESTDIR)$(INCLUDEDIR)/lz4.h
-	@$(INSTALL_DATA) lz4hc.h $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
-	@$(INSTALL_DATA) lz4frame.h $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+ifeq ($(BUILD_SHARED),yes)
+	$(Q)$(INSTALL_PROGRAM) liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)
+	$(Q)ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_MAJOR)
+	$(Q)ln -sf liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT)
+endif
+	@echo Installing headers in $(includedir)
+	$(Q)$(INSTALL_DATA) lz4.h $(DESTDIR)$(includedir)/lz4.h
+	$(Q)$(INSTALL_DATA) lz4hc.h $(DESTDIR)$(includedir)/lz4hc.h
+	$(Q)$(INSTALL_DATA) lz4frame.h $(DESTDIR)$(includedir)/lz4frame.h
 	@echo lz4 libraries installed
 
 uninstall:
-	@$(RM) $(DESTDIR)$(LIBDIR)/pkgconfig/liblz4.pc
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER)
-	@$(RM) $(DESTDIR)$(LIBDIR)/liblz4.a
-	@$(RM) $(DESTDIR)$(INCLUDEDIR)/lz4.h
-	@$(RM) $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
-	@$(RM) $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	$(Q)$(RM) $(DESTDIR)$(pkgconfigdir)/liblz4.pc
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT)
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_MAJOR)
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.$(SHARED_EXT_VER)
+	$(Q)$(RM) $(DESTDIR)$(libdir)/liblz4.a
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4.h
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4hc.h
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4frame.h
+	$(Q)$(RM) $(DESTDIR)$(includedir)/lz4frame_static.h
 	@echo lz4 libraries successfully uninstalled
 
 endif

diff --git a/lib/README.md b/lib/README.md
index fc5d4e9..7082fe3 100644
--- a/lib/README.md
+++ b/lib/README.md

@@ -15,7 +15,7 @@
 
 For more compression ratio at the cost of compression speed,
 the High Compression variant called **lz4hc** is available.
-Add files **`lz4hc.c`**, **`lz4hc.h`** and **`lz4opt.h`**.
+Add files **`lz4hc.c`** and **`lz4hc.h`**.
 The variant still depends on regular `lib/lz4.*` source files.
 
 

diff --git a/lib/lz4.c b/lib/lz4.c
index 213b085..e51a3e0 100644
--- a/lib/lz4.c
+++ b/lib/lz4.c

@@ -69,9 +69,11 @@
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#  if defined(__GNUC__) && \
+  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define LZ4_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
 #    define LZ4_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -80,7 +82,7 @@
  * LZ4_FORCE_SW_BITCOUNT
  * Define this parameter if your target system or compiler does not support hardware bit count
  */
-#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
 #  define LZ4_FORCE_SW_BITCOUNT
 #endif
 
@@ -89,6 +91,8 @@
 /*-************************************
 *  Dependency
 **************************************/
+#define LZ4_STATIC_LINKING_ONLY
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 #include "lz4.h"
 /* see also "memory routines" below */
 
@@ -146,18 +150,23 @@
 #  define expect(expr,value)    (expr)
 #endif
 
+#ifndef likely
 #define likely(expr)     expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
 #define unlikely(expr)   expect((expr) != 0, 0)
+#endif
 
 
 /*-************************************
 *  Memory routines
 **************************************/
 #include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOCATOR(n,s) calloc(n,s)
-#define FREEMEM        free
+#define ALLOC(s)          malloc(s)
+#define ALLOC_AND_ZERO(s) calloc(1,s)
+#define FREEMEM(p)        free(p)
 #include <string.h>   /* memset, memcpy */
-#define MEM_INIT       memset
+#define MEM_INIT(p,v,s)   memset((p),(v),(s))
 
 
 /*-************************************
@@ -270,11 +279,6 @@
     }
 }
 
-static void LZ4_copy8(void* dst, const void* src)
-{
-    memcpy(dst,src,8);
-}
-
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
 LZ4_FORCE_O2_INLINE_GCC_PPC64LE
 void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
@@ -283,7 +287,7 @@
     const BYTE* s = (const BYTE*)srcPtr;
     BYTE* const e = (BYTE*)dstEnd;
 
-    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+    do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 }
 
 
@@ -321,7 +325,7 @@
 #  endif
 #endif
 
-#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
 
 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
 #  include <stdio.h>
@@ -450,10 +454,33 @@
 /*-************************************
 *  Local Structures and types
 **************************************/
-typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
+typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive;
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 
-typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict        : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ *                   blob being compressed are valid and refer to the preceding
+ *                   content (of length ctx->dictSize), which is available
+ *                   contiguously preceding in memory the content currently
+ *                   being compressed.
+ * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
+ *                   else in memory, starting at ctx->dictionary with length
+ *                   ctx->dictSize.
+ * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
+ *                   content is in a separate context, pointed to by
+ *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ *                   entries in the current context that refer to positions
+ *                   preceding the beginning of the current compression are
+ *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ *                   ->dictSize describe the location and size of the preceding
+ *                   content, and matches are found by looking in the ctx
+ *                   ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
@@ -497,10 +524,25 @@
     return LZ4_hash4(LZ4_read32(p), tableType);
 }
 
-static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
+static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 {
     switch (tableType)
     {
+    default: /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: { /* illegal! */ assert(0); return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+    }
+}
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+                                  void* tableBase, tableType_t const tableType,
+                            const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case clearedTable: { /* illegal! */ assert(0); return; }
     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
@@ -513,19 +555,81 @@
     LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 }
 
-static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 {
-    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { const U32* const hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { const U16* const hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+    if (tableType == byU32) {
+        const U32* const hashTable = (const U32*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+        return hashTable[h];
+    }
+    if (tableType == byU16) {
+        const U16* const hashTable = (const U16*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+        return hashTable[h];
+    }
+    assert(0); return 0;  /* forbidden case */
 }
 
-LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
+                                             const void* tableBase, tableType_t tableType,
+                                             const BYTE* srcBase)
 {
     U32 const h = LZ4_hashPosition(p, tableType);
     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 }
 
+LZ4_FORCE_INLINE void LZ4_prepareTable(
+        LZ4_stream_t_internal* const cctx,
+        const int inputSize,
+        const tableType_t tableType) {
+    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+     * therefore safe to use no matter what mode we're in. Otherwise, we figure
+     * out if it's safe to leave as is or whether it needs to be reset.
+     */
+    if (cctx->tableType != clearedTable) {
+        if (cctx->tableType != tableType
+          || (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
+          || (tableType == byU32 && cctx->currentOffset > 1 GB)
+          || tableType == byPtr
+          || inputSize >= 4 KB)
+        {
+            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+            cctx->currentOffset = 0;
+            cctx->tableType = clearedTable;
+        } else {
+            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+        }
+    }
+
+    /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
+     * than compressing without a gap. However, compressing with
+     * currentOffset == 0 is faster still, so we preserve that case.
+     */
+    if (cctx->currentOffset != 0 && tableType == byU32) {
+        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+        cctx->currentOffset += 64 KB;
+    }
+
+    /* Finally, clear history */
+    cctx->dictCtx = NULL;
+    cctx->dictionary = NULL;
+    cctx->dictSize = 0;
+}
 
 /** LZ4_compress_generic() :
     inlined, to ensure branches are decided at compilation time */
@@ -534,50 +638,70 @@
                  const char* const source,
                  char* const dest,
                  const int inputSize,
+                 int *inputConsumed, /* only written when outputLimited == fillOutput */
                  const int maxOutputSize,
                  const limitedOutput_directive outputLimited,
                  const tableType_t tableType,
-                 const dict_directive dict,
+                 const dict_directive dictDirective,
                  const dictIssue_directive dictIssue,
                  const U32 acceleration)
 {
     const BYTE* ip = (const BYTE*) source;
-    const BYTE* base;
+
+    U32 const startIndex = cctx->currentOffset;
+    const BYTE* base = (const BYTE*) source - startIndex;
     const BYTE* lowLimit;
-    const BYTE* const lowRefLimit = ip - cctx->dictSize;
-    const BYTE* const dictionary = cctx->dictionary;
-    const BYTE* const dictEnd = dictionary + cctx->dictSize;
-    const ptrdiff_t dictDelta = dictEnd - (const BYTE*)source;
+
+    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+    const BYTE* const dictionary =
+        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+    const U32 dictSize =
+        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
+
+    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
+    const BYTE* const dictEnd = dictionary + dictSize;
     const BYTE* anchor = (const BYTE*) source;
     const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
     const BYTE* const matchlimit = iend - LASTLITERALS;
 
+    /* the dictCtx currentOffset is indexed on the start of the dictionary,
+     * while a dictionary in the current context precedes the currentOffset */
+    const BYTE* dictBase = dictDirective == usingDictCtx ?
+        dictionary + dictSize - dictCtx->currentOffset :
+        dictionary + dictSize - startIndex;
+
     BYTE* op = (BYTE*) dest;
     BYTE* const olimit = op + maxOutputSize;
 
+    U32 offset = 0;
     U32 forwardH;
 
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
     /* Init conditions */
+    if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
     if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
-    switch(dict)
-    {
-    case noDict:
-    default:
-        base = (const BYTE*)source;
-        lowLimit = (const BYTE*)source;
-        break;
-    case withPrefix64k:
-        base = (const BYTE*)source - cctx->currentOffset;
-        lowLimit = (const BYTE*)source - cctx->dictSize;
-        break;
-    case usingExtDict:
-        base = (const BYTE*)source - cctx->currentOffset;
-        lowLimit = (const BYTE*)source;
-        break;
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
+    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
+    assert(acceleration >= 1);
+
+    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+    /* Update context state */
+    if (dictDirective == usingDictCtx) {
+        /* Subsequent linked blocks can't use the dictionary. */
+        /* Instead, they use the block we just compressed. */
+        cctx->dictCtx = NULL;
+        cctx->dictSize = (U32)inputSize;
+    } else {
+        cctx->dictSize += (U32)inputSize;
     }
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+    cctx->currentOffset += (U32)inputSize;
+    cctx->tableType = tableType;
+
+    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 
     /* First Byte */
     LZ4_putPosition(ip, cctx->hashTable, tableType, base);
@@ -585,12 +709,12 @@
 
     /* Main Loop */
     for ( ; ; ) {
-        ptrdiff_t refDelta = 0;
         const BYTE* match;
         BYTE* token;
 
         /* Find a match */
-        {   const BYTE* forwardIp = ip;
+        if (tableType == byPtr) {
+            const BYTE* forwardIp = ip;
             unsigned step = 1;
             unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
             do {
@@ -599,34 +723,89 @@
                 forwardIp += step;
                 step = (searchMatchNb++ >> LZ4_skipTrigger);
 
-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
 
                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
-                if (dict==usingExtDict) {
-                    if (match < (const BYTE*)source) {
-                        refDelta = dictDelta;
-                        lowLimit = dictionary;
-                    } else {
-                        refDelta = 0;
-                        lowLimit = (const BYTE*)source;
-                }   }
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
 
-            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
-                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+            } while ( (match+MAX_DISTANCE < ip)
+                   || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+        } else {   /* byU32, byU16 */
+
+            const BYTE* forwardIp = ip;
+            unsigned step = 1;
+            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                U32 const current = (U32)(forwardIp - base);
+                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+                assert(matchIndex <= current);
+                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                if (dictDirective == usingDictCtx) {
+                    if (matchIndex < startIndex) {
+                        /* there was no match, try the dictionary */
+                        assert(tableType == byU32);
+                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                        match = dictBase + matchIndex;
+                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else if (dictDirective==usingExtDict) {
+                    if (matchIndex < startIndex) {
+                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
+                        assert(startIndex - matchIndex >= MINMATCH);
+                        match = dictBase + matchIndex;
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else {   /* single continuous memory segment */
+                    match = base + matchIndex;
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
+                assert(matchIndex < current);
+                if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue;  /* too far */
+                if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE);     /* too_far presumed impossible with byU16 */
+
+                if (LZ4_read32(match) == LZ4_read32(ip)) {
+                    if (maybe_extMem) offset = current - matchIndex;
+                    break;   /* match found */
+                }
+
+            } while(1);
         }
 
         /* Catch up */
-        while (((ip>anchor) & (match+refDelta > lowLimit)) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
 
         /* Encode Literals */
         {   unsigned const litLength = (unsigned)(ip - anchor);
             token = op++;
-            if ((outputLimited) &&  /* Check output buffer overflow */
+            if ((outputLimited == limitedOutput) &&  /* Check output buffer overflow */
                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
                 return 0;
+            if ((outputLimited == fillOutput) &&
+                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+                op--;
+                goto _last_literals;
+            }
             if (litLength >= RUN_MASK) {
                 int len = (int)litLength-RUN_MASK;
                 *token = (RUN_MASK<<ML_BITS);
@@ -638,35 +817,70 @@
             /* Copy Literals */
             LZ4_wildCopy(op, anchor, op+litLength);
             op+=litLength;
+            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
         }
 
 _next_match:
+        /* at this stage, the following variables must be correctly set :
+         * - ip : at start of LZ operation
+         * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+         * - offset : if maybe_ext_memSegment==1 (constant)
+         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+         */
+
+        if ((outputLimited == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+            /* the match was too close to the end, rewind and go to last literals */
+            op = token;
+            goto _last_literals;
+        }
+
         /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+        if (maybe_extMem) {   /* static test */
+            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+            assert(offset <= MAX_DISTANCE && offset > 0);
+            LZ4_writeLE16(op, (U16)offset); op+=2;
+        } else  {
+            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
+            assert(ip-match <= MAX_DISTANCE);
+            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+        }
 
         /* Encode MatchLength */
         {   unsigned matchCode;
 
-            if ((dict==usingExtDict) && (lowLimit==dictionary)) {
-                const BYTE* limit;
-                match += refDelta;
-                limit = ip + (dictEnd-match);
+            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+              && (lowLimit==dictionary) /* match within extDict */ ) {
+                const BYTE* limit = ip + (dictEnd-match);
+                assert(dictEnd > match);
                 if (limit > matchlimit) limit = matchlimit;
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
                 ip += MINMATCH + matchCode;
                 if (ip==limit) {
-                    unsigned const more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
                     matchCode += more;
                     ip += more;
                 }
+                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
             } else {
                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
                 ip += MINMATCH + matchCode;
+                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
             }
 
-            if ( outputLimited &&    /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) )
-                return 0;
+            if ((outputLimited) &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
+                if (outputLimited == limitedOutput)
+                  return 0;
+                if (outputLimited == fillOutput) {
+                    /* Match description too long : reduce it */
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
+                    ip -= matchCode - newMatchCode;
+                    matchCode = newMatchCode;
+                }
+            }
             if (matchCode >= ML_MASK) {
                 *token += ML_MASK;
                 matchCode -= ML_MASK;
@@ -685,37 +899,80 @@
         anchor = ip;
 
         /* Test end of chunk */
-        if (ip > mflimit) break;
+        if (ip >= mflimitPlusOne) break;
 
         /* Fill table */
         LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
 
         /* Test next position */
-        match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
-        if (dict==usingExtDict) {
-            if (match < (const BYTE*)source) {
-                refDelta = dictDelta;
-                lowLimit = dictionary;
-            } else {
-                refDelta = 0;
-                lowLimit = (const BYTE*)source;
-        }   }
-        LZ4_putPosition(ip, cctx->hashTable, tableType, base);
-        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
-            && (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
+        if (tableType == byPtr) {
+
+            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+            if ( (match+MAX_DISTANCE >= ip)
+              && (LZ4_read32(match) == LZ4_read32(ip)) )
+            { token=op++; *token=0; goto _next_match; }
+
+        } else {   /* byU32, byU16 */
+
+            U32 const h = LZ4_hashPosition(ip, tableType);
+            U32 const current = (U32)(ip-base);
+            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if (dictDirective == usingDictCtx) {
+                if (matchIndex < startIndex) {
+                    /* there was no match, try the dictionary */
+                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                    matchIndex += dictDelta;
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;  /* required for match length counter */
+                }
+            } else if (dictDirective==usingExtDict) {
+                if (matchIndex < startIndex) {
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;   /* required for match length counter */
+                }
+            } else {   /* single memory segment */
+                match = base + matchIndex;
+            }
+            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+              && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current))
+              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+                token=op++;
+                *token=0;
+                if (maybe_extMem) offset = current - matchIndex;
+                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+                goto _next_match;
+            }
+        }
 
         /* Prepare next loop */
         forwardH = LZ4_hashPosition(++ip, tableType);
+
     }
 
 _last_literals:
     /* Encode Last Literals */
-    {   size_t const lastRun = (size_t)(iend - anchor);
+    {   size_t lastRun = (size_t)(iend - anchor);
         if ( (outputLimited) &&  /* Check output buffer overflow */
-            ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) )
-            return 0;
+            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+            if (outputLimited == fillOutput) {
+                /* adapt lastRun to fill 'dst' */
+                lastRun  = (olimit-op) - 1;
+                lastRun -= (lastRun+240)/255;
+            }
+            if (outputLimited == limitedOutput)
+                return 0;
+        }
         if (lastRun >= RUN_MASK) {
             size_t accumulator = lastRun - RUN_MASK;
             *op++ = RUN_MASK << ML_BITS;
@@ -725,44 +982,97 @@
             *op++ = (BYTE)(lastRun<<ML_BITS);
         }
         memcpy(op, anchor, lastRun);
+        ip = anchor + lastRun;
         op += lastRun;
     }
 
-    /* End */
-    return (int) (((char*)op)-dest);
+    if (outputLimited == fillOutput) {
+        *inputConsumed = (int) (((const char*)ip)-source);
+    }
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
+    return (int)(((char*)op) - dest);
 }
 
 
 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
     LZ4_resetStream((LZ4_stream_t*)state);
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (inputSize < LZ4_64Klimit) {;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
 
-    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited,                        byU16, noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(ctx, source, dest, inputSize,             0,    notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+    if (dstCapacity >= LZ4_compressBound(srcSize)) {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
     } else {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput,                        byU16, noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
     }
 }
 
 
 int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
+    int result;
 #if (LZ4_HEAPMODE)
-    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctxPtr == NULL) return 0;
 #else
     LZ4_stream_t ctx;
-    void* const ctxPtr = &ctx;
+    LZ4_stream_t* const ctxPtr = &ctx;
 #endif
-
-    int const result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
 
 #if (LZ4_HEAPMODE)
     FREEMEM(ctxPtr);
@@ -785,172 +1095,15 @@
     LZ4_resetStream(&ctx);
 
     if (inputSize < LZ4_64Klimit)
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
+        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
     else
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
+        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
 }
 
 
-/*-******************************
-*  *_destSize() variant
-********************************/
-
-static int LZ4_compress_destSize_generic(
-                       LZ4_stream_t_internal* const ctx,
-                 const char* const src,
-                       char* const dst,
-                       int*  const srcSizePtr,
-                 const int targetDstSize,
-                 const tableType_t tableType)
-{
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* base = (const BYTE*) src;
-    const BYTE* lowLimit = (const BYTE*) src;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + *srcSizePtr;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dst;
-    BYTE* const oend = op + targetDstSize;
-    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
-    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
-    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
-
-    U32 forwardH;
-
-
-    /* Init conditions */
-    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
-    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
-    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
-
-    /* First Byte */
-    *srcSizePtr = 0;
-    LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
-    for ( ; ; ) {
-        const BYTE* match;
-        BYTE* token;
-
-        /* Find a match */
-        {   const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
-
-            do {
-                U32 h = forwardH;
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
-
-                match = LZ4_getPositionOnHash(h, ctx->hashTable, tableType, base);
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, ctx->hashTable, tableType, base);
-
-            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match) != LZ4_read32(ip)) );
-        }
-
-        /* Catch up */
-        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
-
-        /* Encode Literal length */
-        {   unsigned litLength = (unsigned)(ip - anchor);
-            token = op++;
-            if (op + ((litLength+240)/255) + litLength > oMaxLit) {
-                /* Not enough space for a last match */
-                op--;
-                goto _last_literals;
-            }
-            if (litLength>=RUN_MASK) {
-                unsigned len = litLength - RUN_MASK;
-                *token=(RUN_MASK<<ML_BITS);
-                for(; len >= 255 ; len-=255) *op++ = 255;
-                *op++ = (BYTE)len;
-            }
-            else *token = (BYTE)(litLength<<ML_BITS);
-
-            /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
-            op += litLength;
-        }
-
-_next_match:
-        /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
-
-        /* Encode MatchLength */
-        {   size_t matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-
-            if (op + ((matchLength+240)/255) > oMaxMatch) {
-                /* Match description too long : reduce it */
-                matchLength = (15-1) + (oMaxMatch-op) * 255;
-            }
-            ip += MINMATCH + matchLength;
-
-            if (matchLength>=ML_MASK) {
-                *token += ML_MASK;
-                matchLength -= ML_MASK;
-                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
-                *op++ = (BYTE)matchLength;
-            }
-            else *token += (BYTE)(matchLength);
-        }
-
-        anchor = ip;
-
-        /* Test end of block */
-        if (ip > mflimit) break;
-        if (op > oMaxSeq) break;
-
-        /* Fill table */
-        LZ4_putPosition(ip-2, ctx->hashTable, tableType, base);
-
-        /* Test next position */
-        match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
-        LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-        if ( (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
-
-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
-    }
-
-_last_literals:
-    /* Encode Last Literals */
-    {   size_t lastRunSize = (size_t)(iend - anchor);
-        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) {
-            /* adapt lastRunSize to fill 'dst' */
-            lastRunSize  = (oend-op) - 1;
-            lastRunSize -= (lastRunSize+240)/255;
-        }
-        ip = anchor + lastRunSize;
-
-        if (lastRunSize >= RUN_MASK) {
-            size_t accumulator = lastRunSize - RUN_MASK;
-            *op++ = RUN_MASK << ML_BITS;
-            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        } else {
-            *op++ = (BYTE)(lastRunSize<<ML_BITS);
-        }
-        memcpy(op, anchor, lastRunSize);
-        op += lastRunSize;
-    }
-
-    /* End */
-    *srcSizePtr = (int) (((const char*)ip)-src);
-    return (int) (((char*)op)-dst);
-}
-
-
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
     LZ4_resetStream(state);
@@ -958,18 +1111,20 @@
     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
     } else {
-        if (*srcSizePtr < LZ4_64Klimit)
-            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, byU16);
-        else
-            return LZ4_compress_destSize_generic(&state->internal_donotuse, src, dst, srcSizePtr, targetDstSize, sizeof(void*)==8 ? byU32 : byPtr);
-    }
+        if (*srcSizePtr < LZ4_64Klimit) {
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+        } else {
+            tableType_t const tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, tableType, noDict, noDictIssue, 1);
+    }   }
 }
 
 
 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
 {
 #if (LZ4_HEAPMODE)
-    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctx == NULL) return 0;
 #else
     LZ4_stream_t ctxBody;
     LZ4_stream_t* ctx = &ctxBody;
@@ -991,21 +1146,28 @@
 
 LZ4_stream_t* LZ4_createStream(void)
 {
-    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
     LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+    if (lz4s == NULL) return NULL;
     LZ4_resetStream(lz4s);
     return lz4s;
 }
 
 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
 {
-    DEBUGLOG(4, "LZ4_resetStream");
+    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
 }
 
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
 {
     if (!LZ4_stream) return 0;   /* support free on NULL */
+    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
     FREEMEM(LZ4_stream);
     return (0);
 }
@@ -1015,43 +1177,70 @@
 int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
 {
     LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const tableType_t tableType = byU32;
     const BYTE* p = (const BYTE*)dictionary;
     const BYTE* const dictEnd = p + dictSize;
     const BYTE* base;
 
-    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
-        LZ4_resetStream(LZ4_dict);
+    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+    /* It's necessary to reset the context,
+     * and not just continue it with prepareTable()
+     * to avoid any risk of generating overflowing matchIndex
+     * when compressing using this dictionary */
+    LZ4_resetStream(LZ4_dict);
+
+    /* We always increment the offset by 64 KB, since, if the dict is longer,
+     * we truncate it to the last 64k, and if it's shorter, we still want to
+     * advance by a whole window length so we can provide the guarantee that
+     * there are only valid offsets in the window, which allows an optimization
+     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+     * dictionary isn't a full 64k. */
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    base = dictEnd - 64 KB - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->currentOffset += 64 KB;
+    dict->tableType = tableType;
 
     if (dictSize < (int)HASH_UNIT) {
-        dict->dictionary = NULL;
-        dict->dictSize = 0;
         return 0;
     }
 
-    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
-    dict->currentOffset += 64 KB;
-    base = p - dict->currentOffset;
-    dict->dictionary = p;
-    dict->dictSize = (U32)(dictEnd - p);
-    dict->currentOffset += dict->dictSize;
-
     while (p <= dictEnd-HASH_UNIT) {
-        LZ4_putPosition(p, dict->hashTable, byU32, base);
+        LZ4_putPosition(p, dict->hashTable, tableType, base);
         p+=3;
     }
 
     return dict->dictSize;
 }
 
+void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
+    if (dictionary_stream != NULL) {
+        /* If the current offset is zero, we will never look in the
+         * external dictionary context, since there is no value a table
+         * entry can take that indicate a miss. In that case, we need
+         * to bump the offset to something non-zero.
+         */
+        if (working_stream->internal_donotuse.currentOffset == 0) {
+            working_stream->internal_donotuse.currentOffset = 64 KB;
+        }
+        working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
+    } else {
+        working_stream->internal_donotuse.dictCtx = NULL;
+    }
+}
 
-static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
 {
-    if ((LZ4_dict->currentOffset > 0x80000000) ||
-        ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {   /* address space overflow */
+    if (LZ4_dict->currentOffset + nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
         /* rescale hash table */
         U32 const delta = LZ4_dict->currentOffset - 64 KB;
         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
         int i;
+        DEBUGLOG(4, "LZ4_renormDictT");
         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
             else LZ4_dict->hashTable[i] -= delta;
@@ -1065,15 +1254,25 @@
 
 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 {
+    const tableType_t tableType = byU32;
     LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+    const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
 
-    const BYTE* smallest = (const BYTE*) source;
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+
     if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
-    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
-    LZ4_renormDictT(streamPtr, smallest);
+    LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
 
+    /* invalidate tiny dictionaries */
+    if ( (streamPtr->dictSize-1 < 4)   /* intentional underflow */
+      && (dictEnd != (const BYTE*)source) ) {
+        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+        streamPtr->dictSize = 0;
+        streamPtr->dictionary = (const BYTE*)source;
+        dictEnd = (const BYTE*)source;
+    }
+
     /* Check overlapping input/dictionary space */
     {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
         if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
@@ -1086,46 +1285,61 @@
 
     /* prefix mode : source data follows dictionary */
     if (dictEnd == (const BYTE*)source) {
-        int result;
         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
         else
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
-        streamPtr->dictSize += (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
-        return result;
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
     }
 
     /* external dictionary mode */
     {   int result;
-        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
-        else
-            result = LZ4_compress_generic(streamPtr, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
+        if (streamPtr->dictCtx) {
+            /* We depend here on the fact that dictCtx'es (produced by
+             * LZ4_loadDict) guarantee that their tables contain no references
+             * to offsets between dictCtx->currentOffset - 64 KB and
+             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+             * to use noDictIssue even when the dict isn't a full 64 KB.
+             */
+            if (inputSize > 4 KB) {
+                /* For compressing large blobs, it is faster to pay the setup
+                 * cost to copy the dictionary's tables into the active context,
+                 * so that the compression loop is only looking into one table.
+                 */
+                memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+            }
+        } else {
+            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            }
+        }
         streamPtr->dictionary = (const BYTE*)source;
         streamPtr->dictSize = (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
         return result;
     }
 }
 
 
-/* Hidden debug function, to force external dictionary mode */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
 {
     LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
     int result;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
 
-    const BYTE* smallest = dictEnd;
-    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
-    LZ4_renormDictT(streamPtr, smallest);
+    LZ4_renormDictT(streamPtr, srcSize);
 
-    result = LZ4_compress_generic(streamPtr, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+    } else {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    }
 
     streamPtr->dictionary = (const BYTE*)source;
-    streamPtr->dictSize = (U32)inputSize;
-    streamPtr->currentOffset += (U32)inputSize;
+    streamPtr->dictSize = (U32)srcSize;
 
     return result;
 }
@@ -1196,41 +1410,72 @@
     const int safeDecode = (endOnInput==endOnInputSize);
     const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
 
+    /* Set up the "end" pointers for the shortcut. */
+    const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+    const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i)", srcSize);
 
     /* Special cases */
     if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                      /* targetOutputSize too high => just decode everything */
     if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
     if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+    if ((endOnInput) && unlikely(srcSize==0)) return -1;
 
     /* Main Loop : decode sequences */
     while (1) {
-        size_t length;
         const BYTE* match;
         size_t offset;
 
         unsigned const token = *ip++;
+        size_t length = token >> ML_BITS; /* literal length */
 
-        /* shortcut for common case :
-         * in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes).
-         * this shortcut was tested on x86 and x64, where it improves decoding speed.
-         * it has not yet been benchmarked on ARM, Power, mips, etc. */
-        if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend)
-          & (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend))
-          & ((token < (15<<ML_BITS)) & ((token & ML_MASK) != 15)) ) {
-            size_t const ll = token >> ML_BITS;
-            size_t const off = LZ4_readLE16(ip+ll);
-            const BYTE* const matchPtr = op + ll - off;  /* pointer underflow risk ? */
-            if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) {
-                size_t const ml = (token & ML_MASK) + MINMATCH;
-                memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/;
-                memcpy(op, matchPtr, 18); op += ml;
+        assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+        /* A two-stage shortcut for the most common case:
+         * 1) If the literal length is 0..14, and there is enough space,
+         * enter the shortcut and copy 16 bytes on behalf of the literals
+         * (in the fast mode, only 8 bytes can be safely copied this way).
+         * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+         * manner; but we ensure that there's enough space in the output for
+         * those 18 bytes earlier, upon entering the shortcut (in other words,
+         * there is a combined check for both stages).
+         */
+        if ( (endOnInput ? length != RUN_MASK : length <= 8)
+            /* strictly "less than" on input, to re-enter the loop with at least one byte */
+          && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+            /* Copy the literals */
+            memcpy(op, ip, endOnInput ? 16 : 8);
+            op += length; ip += length;
+
+            /* The second stage: prepare for match copying, decode full info.
+             * If it doesn't work out, the info won't be wasted. */
+            length = token & ML_MASK; /* match length */
+            offset = LZ4_readLE16(ip); ip += 2;
+            match = op - offset;
+
+            /* Do not deal with overlapping matches. */
+            if ( (length != ML_MASK)
+              && (offset >= 8)
+              && (dict==withPrefix64k || match >= lowPrefix) ) {
+                /* Copy the match. */
+                memcpy(op + 0, match + 0, 8);
+                memcpy(op + 8, match + 8, 8);
+                memcpy(op +16, match +16, 2);
+                op += length + MINMATCH;
+                /* Both stages worked, load the next token. */
                 continue;
             }
+
+            /* The second stage didn't work out, but the info is ready.
+             * Propel it right to the point of match copying. */
+            goto _copy_match;
         }
 
         /* decode literal length */
-        if ((length=(token>>ML_BITS)) == RUN_MASK) {
+        if (length == RUN_MASK) {
             unsigned s;
+            if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error;   /* overflow detection */
             do {
                 s = *ip++;
                 length += s;
@@ -1262,11 +1507,14 @@
         /* get offset */
         offset = LZ4_readLE16(ip); ip+=2;
         match = op - offset;
-        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
-        LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */
 
         /* get matchlength */
         length = token & ML_MASK;
+
+_copy_match:
+        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+        LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */
+
         if (length == ML_MASK) {
             unsigned s;
             do {
@@ -1313,7 +1561,7 @@
             match += inc32table[offset];
             memcpy(op+4, match, 4);
             match -= dec64table[offset];
-        } else { LZ4_copy8(op, match); match+=8; }
+        } else { memcpy(op, match, 8); match+=8; }
         op += 8;
 
         if (unlikely(cpy>oend-12)) {
@@ -1326,7 +1574,7 @@
             }
             while (op<cpy) *op++ = *match++;
         } else {
-            LZ4_copy8(op, match);
+            memcpy(op, match, 8);
             if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
         }
         op = cpy;   /* correction */
@@ -1344,30 +1592,105 @@
 }
 
 
+/*===== Instantiate the API decoding functions. =====*/
+
 LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, full, 0, noDict,
+                                  (BYTE*)dest, NULL, 0);
 }
 
 LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
 {
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, partial, targetOutputSize,
+                                  noDict, (BYTE*)dest, NULL, 0);
 }
 
 LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
 {
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, full, 0, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
 }
 
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, full, 0, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    /* LZ4_decompress_fast doesn't validate match offsets,
+     * and thus serves well with any prefixed dictionary. */
+    return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2_GCC_PPC64LE
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                               size_t prefixSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, full, 0, noDict,
+                                  (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */
+#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict
+int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, full, 0, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2_GCC_PPC64LE
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+                                       const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, full, 0, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, full, 0, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, full, 0, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
 
 /*===== streaming decompression functions =====*/
 
 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
 {
-    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
     return lz4s;
 }
 
@@ -1378,12 +1701,11 @@
     return 0;
 }
 
-/*!
- * LZ4_setStreamDecode() :
- * Use this function to instruct where to find the dictionary.
- * This function is not necessary if previous data is still available where it was decoded.
- * Loading a size of 0 is allowed (same effect as no dictionary).
- * Return : 1 if OK, 0 if error
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  This function is not necessary if previous data is still available where it was decoded.
+ *  Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
  */
 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
 {
@@ -1395,6 +1717,25 @@
     return 1;
 }
 
+/*! LZ4_decoderRingBufferSize() :
+ *  when setting a ring buffer for streaming decompression (optional scenario),
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ *  Note : in a ring buffer scenario,
+ *  blocks are presumed decompressed next to each other.
+ *  When not enough space remains for next block (remainingSize < maxBlockSize),
+ *  decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+    if (maxBlockSize < 0) return 0;
+    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+    if (maxBlockSize < 16) maxBlockSize = 16;
+    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
 /*
 *_continue() :
     These decoding functions allow decompression of multiple blocks in "streaming" mode.
@@ -1408,19 +1749,32 @@
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
     int result;
 
-    if (lz4sd->prefixEnd == (BYTE*)dest) {
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+    if (lz4sd->prefixSize == 0) {
+        /* The first call, no dictionary yet. */
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd = (BYTE*)dest + result;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        /* They're rolling the current segment. */
+        if (lz4sd->prefixSize >= 64 KB - 1)
+            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        else if (lz4sd->extDictSize == 0)
+            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+                                                         lz4sd->prefixSize);
+        else
+            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize += result;
         lz4sd->prefixEnd  += result;
     } else {
+        /* The buffer wraps around, or they're switching to another buffer. */
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize = result;
         lz4sd->prefixEnd  = (BYTE*)dest + result;
@@ -1435,19 +1789,26 @@
     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
     int result;
 
-    if (lz4sd->prefixEnd == (BYTE*)dest) {
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+    if (lz4sd->prefixSize == 0) {
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_fast(source, dest, originalSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+            result = LZ4_decompress_fast(source, dest, originalSize);
+        else
+            result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize += originalSize;
         lz4sd->prefixEnd  += originalSize;
     } else {
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0) return result;
         lz4sd->prefixSize = originalSize;
         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
@@ -1464,36 +1825,23 @@
     the dictionary must be explicitly provided within parameters
 */
 
-LZ4_FORCE_O2_GCC_PPC64LE
-LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
-{
-    if (dictSize==0)
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
-    if (dictStart+dictSize == dest) {
-        if (dictSize >= (int)(64 KB - 1))
-            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
-    }
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
-LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+    if (dictSize==0)
+        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+    if (dictStart+dictSize == dest) {
+        if (dictSize >= 64 KB - 1)
+            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
+    }
+    return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
 {
-    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
-}
-
-/* debug function */
-LZ4_FORCE_O2_GCC_PPC64LE
-int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+    if (dictSize==0 || dictStart+dictSize == dest)
+        return LZ4_decompress_fast(source, dest, originalSize);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
 }
 
 
@@ -1501,64 +1849,67 @@
 *  Obsolete Functions
 ***************************************************/
 /* obsolete compression functions */
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
-int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
-int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
-int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
-int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
-int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
 
 /*
-These function names are deprecated and should no longer be used.
+These decompression functions are deprecated and should no longer be used.
 They are only provided here for compatibility with older user programs.
 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
 */
-int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
-
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
 
 /* Obsolete Streaming functions */
 
 int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
 
-static void LZ4_init(LZ4_stream_t* lz4ds, BYTE* base)
-{
-    MEM_INIT(lz4ds, 0, sizeof(LZ4_stream_t));
-    lz4ds->internal_donotuse.bufferStart = base;
-}
-
 int LZ4_resetStreamState(void* state, char* inputBuffer)
 {
-    if ((((uptrval)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
-    LZ4_init((LZ4_stream_t*)state, (BYTE*)inputBuffer);
+    (void)inputBuffer;
+    LZ4_resetStream((LZ4_stream_t*)state);
     return 0;
 }
 
 void* LZ4_create (char* inputBuffer)
 {
-    LZ4_stream_t* lz4ds = (LZ4_stream_t*)ALLOCATOR(8, sizeof(LZ4_stream_t));
-    LZ4_init (lz4ds, (BYTE*)inputBuffer);
-    return lz4ds;
+    (void)inputBuffer;
+    return LZ4_createStream();
 }
 
-char* LZ4_slideInputBuffer (void* LZ4_Data)
+char* LZ4_slideInputBuffer (void* state)
 {
-    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)LZ4_Data)->internal_donotuse;
-    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
-    return (char*)(ctx->bufferStart + dictSize);
-}
-
-/* Obsolete streaming decompression functions */
-
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
-}
-
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+    /* avoid const char * -> char * conversion warning */
+    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
 }
 
 #endif   /* LZ4_COMMONDEFS_ONLY */

diff --git a/lib/lz4.h b/lib/lz4.h
index a06b8a4..7d13122 100644
--- a/lib/lz4.h
+++ b/lib/lz4.h

@@ -93,7 +93,7 @@
 /*------   Version   ------*/
 #define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
 #define LZ4_VERSION_MINOR    8    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 
@@ -102,8 +102,8 @@
 #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
 #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
 
-LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; to be used when checking dll version */
-LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; to be used when checking dll version */
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; unseful to check dll version */
 
 
 /*-************************************
@@ -113,7 +113,7 @@
  * LZ4_MEMORY_USAGE :
  * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
  * Increasing memory usage improves compression ratio
- * Reduced memory usage can improve speed, due to cache effect
+ * Reduced memory usage may improve speed, thanks to cache effect
  * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
  */
 #ifndef LZ4_MEMORY_USAGE
@@ -128,12 +128,12 @@
     into already allocated 'dst' buffer of size 'dstCapacity'.
     Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
     It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'src' into a limited 'dst' budget,
+    If the function cannot compress 'src' into a more limited 'dst' budget,
     compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dst' content is not valid.
-    This function never writes outside 'dst' buffer, nor read outside 'source' buffer.
-        srcSize : supported max value is LZ4_MAX_INPUT_VALUE
-        dstCapacity : full or partial size of buffer 'dst' (which must be already allocated)
+    Note : as a consequence, 'dst' content is not valid.
+    Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+        srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+        dstCapacity : size of buffer 'dst' (which must be already allocated)
         return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
                   or 0 if compression fails */
 LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
@@ -144,8 +144,7 @@
     return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
              If destination buffer is not large enough, decoding will stop and output an error code (negative value).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
+             This function is protected against malicious data packets.
 */
 LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
 
@@ -161,20 +160,20 @@
     Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
     This function is primarily useful for memory allocation purposes (destination buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
         inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
         return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+              or 0, if input size is incorrect (too large or negative)
 */
 LZ4LIB_API int LZ4_compressBound(int inputSize);
 
 /*!
 LZ4_compress_fast() :
-    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
     The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
     It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
     An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
 */
 LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
@@ -205,15 +204,19 @@
 
 
 /*!
-LZ4_decompress_fast() : (unsafe!!)
-    originalSize : is the original uncompressed size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             Destination buffer must be already allocated. Its size must be >= 'originalSize' bytes.
-    note : This function respects memory boundaries for *properly formed* compressed data.
-           It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionally modified data stream (malicious input).
-           Use this function in trusted environment only (data to decode comes from a trusted source).
+LZ4_decompress_fast() : **unsafe!**
+This function is a bit faster than LZ4_decompress_safe(),
+but it may misbehave on malformed input because it doesn't perform full validation of compressed data.
+    originalSize : is the uncompressed size to regenerate
+                   Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes.
+    return : number of bytes read from source buffer (== compressed size).
+             If the source stream is detected malformed, the function stops decoding and return a negative result.
+    note : This function is only usable if the originalSize of uncompressed data is known in advance.
+           The caller should also check that all the compressed input has been consumed properly,
+           i.e. that the return value matches the size of the buffer with compressed input.
+           The function never writes past the output buffer.  However, since it doesn't know its 'src' size,
+           it may read past the intended input.  Also, because match offsets are not validated during decoding,
+           reads from 'src' may underflow.  Use this function in trusted environment **only**.
 */
 LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
 
@@ -222,12 +225,12 @@
     This function decompress a compressed block of size 'srcSize' at position 'src'
     into destination buffer 'dst' of size 'dstCapacity'.
     The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that.
-    However, it's not accurate, and may write more than 'targetOutputSize' (but <= dstCapacity).
+    However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity).
    @return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity)
-       Note : this number can be < 'targetOutputSize' should the compressed block contain less data.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets.
+        Note : this number can also be < targetOutputSize, if compressed block contains less data.
+            Therefore, always control how many bytes were decoded.
+            If source stream is detected malformed, function returns a negative result.
+            This function is protected against malicious data packets.
 */
 LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
 
@@ -235,7 +238,7 @@
 /*-*********************************************
 *  Streaming Compression Functions
 ***********************************************/
-typedef union LZ4_stream_u LZ4_stream_t;   /* incomplete type (defined later) */
+typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
 
 /*! LZ4_createStream() and LZ4_freeStream() :
  *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
@@ -259,67 +262,93 @@
 LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
 
 /*! LZ4_compress_fast_continue() :
- *  Compress content into 'src' using data from previously compressed blocks, improving compression ratio.
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
  *  'dst' buffer must be already allocated.
  *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
  *
- *  Important : Up to 64KB of previously compressed data is assumed to remain present and unmodified in memory !
- *  Special 1 : If input buffer is a double-buffer, it can have any size, including < 64 KB.
- *  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *  Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory!
+ *
+ *  Special 1 : When input is a double-buffer, they can have any size, including < 64 KB.
+ *              Make sure that buffers are separated by at least one byte.
+ *              This way, each block only depends on previous block.
+ *  Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
  *
  * @return : size of compressed block
- *           or 0 if there is an error (typically, compressed data cannot fit into 'dst')
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
  *  After an error, the stream status is invalid, it can only be reset or freed.
  */
 LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
 
 /*! LZ4_saveDict() :
- *  If previously compressed data block is not guaranteed to remain available at its current memory location,
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
  *  save it into a safer place (char* safeBuffer).
- *  Note : it's not necessary to call LZ4_loadDict() after LZ4_saveDict(), dictionary is immediately usable.
- *  @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
  */
-LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
 
 
 /*-**********************************************
 *  Streaming Decompression Functions
 *  Bufferless synchronous API
 ************************************************/
-typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* incomplete type (defined later) */
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
 
 /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
- *  creation / destruction of streaming decompression tracking structure.
- *  A tracking structure can be re-used multiple times sequentially. */
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
 LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
 LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 
 /*! LZ4_setStreamDecode() :
- *  An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
  *  Use this function to start decompression of a new stream of blocks.
- *  A dictionary can optionnally be set. Use NULL or size 0 for a simple reset order.
+ *  A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  * @return : 1 if OK, 0 if error
  */
 LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 
+/*! LZ4_decoderRingBufferSize() : v1.8.2
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs))  /* for static allocation; mbs presumed valid */
+
 /*! LZ4_decompress_*_continue() :
  *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
  *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
- *  Decompression functions only accept one block at a time.
- *  Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB).
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
  *
- *  Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
- *  - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
- *    In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
- *  - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- *    maxBlockSize is implementation dependent. It's the maximum size of any single block.
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
  *    In which case, encoding and decoding buffers do not need to be synchronized,
  *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
- *  - _At least_ 64 KB + 8 bytes + maxBlockSize.
- *    In which case, encoding and decoding buffers do not need to be synchronized,
- *    and encoding ring buffer can have any size, including larger than decoding buffer.
- *  Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
- *  and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
 */
 LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
 LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
@@ -329,6 +358,7 @@
  *  These decoding functions work the same as
  *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
  *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
  */
 LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
 LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
@@ -337,6 +367,94 @@
 /*^**********************************************
  * !!!!!!   STATIC LINKING ONLY   !!!!!!
  ***********************************************/
+
+/*-************************************
+ *  Unstable declarations
+ **************************************
+ * Declarations in this section should be considered unstable.
+ * Use at your own peril, etc., etc.
+ * They may be removed in the future.
+ * Their signatures may change.
+ **************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+/*! LZ4_resetStream_fast() :
+ *  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
+ *  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
+ *
+ *  Note:
+ *  Using this in advance of a non- streaming-compression function is redundant,
+ *  and potentially bad for performance, since they all perform their own custom
+ *  reset internally.
+ *
+ *  Differences from LZ4_resetStream():
+ *  When an LZ4_stream_t is known to be in a internally coherent state,
+ *  it can often be prepared for a new compression with almost no work, only
+ *  sometimes falling back to the full, expensive reset that is always required
+ *  when the stream is in an indeterminate state (i.e., the reset performed by
+ *  LZ4_resetStream()).
+ *
+ *  LZ4_streams are guaranteed to be in a valid state when:
+ *  - returned from LZ4_createStream()
+ *  - reset by LZ4_resetStream()
+ *  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
+ *  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
+ *  - the stream was in a valid state and was then used in any compression call
+ *    that returned success
+ *  - the stream was in an indeterminate state and was used in a compression
+ *    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
+ *    that returned success
+ *
+ *  When a stream isn't known to be in a valid state, it is not safe to pass to
+ *  any fastReset or streaming function. It must first be cleansed by the full
+ *  LZ4_resetStream().
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step. It is only safe
+ *  to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
+ *  initialized"). From a high level, the difference is that this function
+ *  initializes the provided state with a call to something like
+ *  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
+ *  call to LZ4_resetStream().
+ */
+LZ4LIB_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows for the efficient use of a
+ *  static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionary stream pointer may be NULL, in which
+ *  case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
+
+#endif
+
 /*-************************************
  *  Private definitions
  **************************************
@@ -351,14 +469,16 @@
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #include <stdint.h>
 
-typedef struct {
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
     uint32_t hashTable[LZ4_HASH_SIZE_U32];
     uint32_t currentOffset;
-    uint32_t initCheck;
+    uint16_t initCheck;
+    uint16_t tableType;
     const uint8_t* dictionary;
-    uint8_t* bufferStart;   /* obsolete, used for slideInputBuffer */
+    const LZ4_stream_t_internal* dictCtx;
     uint32_t dictSize;
-} LZ4_stream_t_internal;
+};
 
 typedef struct {
     const uint8_t* externalDict;
@@ -369,14 +489,16 @@
 
 #else
 
-typedef struct {
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
     unsigned int hashTable[LZ4_HASH_SIZE_U32];
     unsigned int currentOffset;
-    unsigned int initCheck;
+    unsigned short initCheck;
+    unsigned short tableType;
     const unsigned char* dictionary;
-    unsigned char* bufferStart;   /* obsolete, used for slideInputBuffer */
+    const LZ4_stream_t_internal* dictCtx;
     unsigned int dictSize;
-} LZ4_stream_t_internal;
+};
 
 typedef struct {
     const unsigned char* externalDict;
@@ -433,11 +555,9 @@
 #  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
 #else
 #  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if defined(__clang__) /* clang doesn't handle mixed C++11 and CNU attributes */
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#  elif (LZ4_GCC_VERSION >= 405)
+#  elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
 #  elif (LZ4_GCC_VERSION >= 301)
 #    define LZ4_DEPRECATED(message) __attribute__((deprecated))
@@ -450,26 +570,34 @@
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
 
 /* Obsolete compression functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress               (const char* source, char* dest, int sourceSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress               (const char* source, char* dest, int sourceSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
 
 /* Obsolete decompression functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast() instead") int LZ4_uncompress (const char* source, char* dest, int outputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe() instead") int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
 
-/* Obsolete streaming functions; use new streaming interface whenever possible */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API     char* LZ4_slideInputBuffer (void* state);
 
 /* Obsolete streaming decoding functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
 
 #endif /* LZ4_H_2983827168210 */
 

diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index 0b26f75..e1d0b1d 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c

@@ -47,10 +47,24 @@
 
 
 /*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4F_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4F_HEAPMODE
+#  define LZ4F_HEAPMODE 0
+#endif
+
+
+/*-************************************
 *  Memory routines
 **************************************/
 #include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOCATOR(s)   calloc(1,s)
+#define ALLOC(s)   malloc(s)
+#define ALLOC_AND_ZERO(s)   calloc(1,s)
 #define FREEMEM        free
 #include <string.h>   /* memset, memcpy, memmove */
 #define MEM_INIT       memset
@@ -59,7 +73,9 @@
 /*-************************************
 *  Includes
 **************************************/
-#include "lz4frame_static.h"
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#define LZ4_STATIC_LINKING_ONLY
 #include "lz4.h"
 #define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
@@ -80,6 +96,19 @@
 
 #define LZ4F_STATIC_ASSERT(c)    { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
 
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG)
+#  include <stdio.h>
+static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                                  \
+                if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+                    fprintf(stderr, __FILE__ ": ");           \
+                    fprintf(stderr, __VA_ARGS__);             \
+                    fprintf(stderr, " \n");                   \
+            }   }
+#else
+#  define DEBUGLOG(l, ...)      {}    /* disabled */
+#endif
+
 
 /*-************************************
 *  Basic Types
@@ -188,7 +217,8 @@
     U64    totalInSize;
     XXH32_state_t xxh;
     void*  lz4CtxPtr;
-    U32    lz4CtxLevel;   /* 0: unallocated;  1: LZ4_stream_t;  3: LZ4_streamHC_t */
+    U16    lz4CtxAlloc; /* sized for: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
+    U16    lz4CtxState; /* in use as: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */
 } LZ4F_cctx_t;
 
 
@@ -279,7 +309,7 @@
                                           size_t alreadyBuffered)
 {
     LZ4F_preferences_t prefsNull;
-    memset(&prefsNull, 0, sizeof(prefsNull));
+    MEM_INIT(&prefsNull, 0, sizeof(prefsNull));
     prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;   /* worst case */
     {   const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
         U32 const flush = prefsPtr->autoFlush | (srcSize==0);
@@ -308,7 +338,7 @@
     size_t const headerSize = maxFHSize;      /* max header size, including optional fields */
 
     if (preferencesPtr!=NULL) prefs = *preferencesPtr;
-    else memset(&prefs, 0, sizeof(prefs));
+    else MEM_INIT(&prefs, 0, sizeof(prefs));
     prefs.autoFlush = 1;
 
     return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);;
@@ -324,27 +354,22 @@
  * @return : number of bytes written into dstBuffer,
  *           or an error code if it fails (can be tested using LZ4F_isError())
  */
-size_t LZ4F_compressFrame_usingCDict(void* dstBuffer, size_t dstCapacity,
+size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx,
+                                     void* dstBuffer, size_t dstCapacity,
                                const void* srcBuffer, size_t srcSize,
                                const LZ4F_CDict* cdict,
                                const LZ4F_preferences_t* preferencesPtr)
 {
-    LZ4F_cctx_t cctxI;
-    LZ4_stream_t lz4ctx;   /* pretty large on stack */
     LZ4F_preferences_t prefs;
     LZ4F_compressOptions_t options;
     BYTE* const dstStart = (BYTE*) dstBuffer;
     BYTE* dstPtr = dstStart;
     BYTE* const dstEnd = dstStart + dstCapacity;
 
-    memset(&cctxI, 0, sizeof(cctxI));
-    cctxI.version = LZ4F_VERSION;
-    cctxI.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
-
     if (preferencesPtr!=NULL)
         prefs = *preferencesPtr;
     else
-        memset(&prefs, 0, sizeof(prefs));
+        MEM_INIT(&prefs, 0, sizeof(prefs));
     if (prefs.frameInfo.contentSize != 0)
         prefs.frameInfo.contentSize = (U64)srcSize;   /* auto-correct content size if selected (!=0) */
 
@@ -353,32 +378,24 @@
     if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
         prefs.frameInfo.blockMode = LZ4F_blockIndependent;   /* only one block => no need for inter-block link */
 
-    if (prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
-        cctxI.lz4CtxPtr = &lz4ctx;
-        cctxI.lz4CtxLevel = 1;
-    }  /* fast compression context pre-created on stack */
-
-    memset(&options, 0, sizeof(options));
+    MEM_INIT(&options, 0, sizeof(options));
     options.stableSrc = 1;
 
     if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs))  /* condition to guarantee success */
         return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
 
-    { size_t const headerSize = LZ4F_compressBegin_usingCDict(&cctxI, dstBuffer, dstCapacity, cdict, &prefs);  /* write header */
+    { size_t const headerSize = LZ4F_compressBegin_usingCDict(cctx, dstBuffer, dstCapacity, cdict, &prefs);  /* write header */
       if (LZ4F_isError(headerSize)) return headerSize;
       dstPtr += headerSize;   /* header size */ }
 
-    { size_t const cSize = LZ4F_compressUpdate(&cctxI, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
+    { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, dstEnd-dstPtr, srcBuffer, srcSize, &options);
       if (LZ4F_isError(cSize)) return cSize;
       dstPtr += cSize; }
 
-    { size_t const tailSize = LZ4F_compressEnd(&cctxI, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
+    { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
       if (LZ4F_isError(tailSize)) return tailSize;
       dstPtr += tailSize; }
 
-    if (prefs.compressionLevel >= LZ4HC_CLEVEL_MIN)  /* Ctx allocation only for lz4hc */
-        FREEMEM(cctxI.lz4CtxPtr);
-
     return (dstPtr - dstStart);
 }
 
@@ -394,9 +411,44 @@
                     const void* srcBuffer, size_t srcSize,
                     const LZ4F_preferences_t* preferencesPtr)
 {
-    return LZ4F_compressFrame_usingCDict(dstBuffer, dstCapacity,
-                                         srcBuffer, srcSize,
-                                         NULL, preferencesPtr);
+    size_t result;
+#if (LZ4F_HEAPMODE)
+    LZ4F_cctx_t *cctxPtr;
+    result = LZ4F_createCompressionContext(&cctxPtr, LZ4F_VERSION);
+    if (LZ4F_isError(result)) return result;
+#else
+    LZ4F_cctx_t cctx;
+    LZ4_stream_t lz4ctx;
+    LZ4F_cctx_t *cctxPtr = &cctx;
+
+    DEBUGLOG(4, "LZ4F_compressFrame");
+    MEM_INIT(&cctx, 0, sizeof(cctx));
+    cctx.version = LZ4F_VERSION;
+    cctx.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */
+    if (preferencesPtr == NULL ||
+        preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN)
+    {
+        LZ4_resetStream(&lz4ctx);
+        cctxPtr->lz4CtxPtr = &lz4ctx;
+        cctxPtr->lz4CtxAlloc = 1;
+        cctxPtr->lz4CtxState = 1;
+    }
+#endif
+
+    result = LZ4F_compressFrame_usingCDict(cctxPtr, dstBuffer, dstCapacity,
+                                           srcBuffer, srcSize,
+                                           NULL, preferencesPtr);
+
+#if (LZ4F_HEAPMODE)
+    LZ4F_freeCompressionContext(cctxPtr);
+#else
+    if (preferencesPtr != NULL &&
+        preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN)
+    {
+        FREEMEM(cctxPtr->lz4CtxPtr);
+    }
+#endif
+    return result;
 }
 
 
@@ -419,13 +471,14 @@
 LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize)
 {
     const char* dictStart = (const char*)dictBuffer;
-    LZ4F_CDict* cdict = (LZ4F_CDict*) malloc(sizeof(*cdict));
+    LZ4F_CDict* cdict = (LZ4F_CDict*) ALLOC(sizeof(*cdict));
+    DEBUGLOG(4, "LZ4F_createCDict");
     if (!cdict) return NULL;
     if (dictSize > 64 KB) {
         dictStart += dictSize - 64 KB;
         dictSize = 64 KB;
     }
-    cdict->dictContent = ALLOCATOR(dictSize);
+    cdict->dictContent = ALLOC(dictSize);
     cdict->fastCtx = LZ4_createStream();
     cdict->HCCtx = LZ4_createStreamHC();
     if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
@@ -433,9 +486,8 @@
         return NULL;
     }
     memcpy(cdict->dictContent, dictStart, dictSize);
-    LZ4_resetStream(cdict->fastCtx);
     LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
-    LZ4_resetStreamHC(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
+    LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
     LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
     return cdict;
 }
@@ -464,7 +516,7 @@
  */
 LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version)
 {
-    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOCATOR(sizeof(LZ4F_cctx_t));
+    LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t));
     if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed);
 
     cctxPtr->version = version;
@@ -490,6 +542,36 @@
 }
 
 
+/**
+ * This function prepares the internal LZ4(HC) stream for a new compression,
+ * resetting the context and attaching the dictionary, if there is one.
+ *
+ * It needs to be called at the beginning of each independent compression
+ * stream (i.e., at the beginning of a frame in blockLinked mode, or at the
+ * beginning of each block in blockIndependent mode).
+ */
+static void LZ4F_initStream(void* ctx,
+                            const LZ4F_CDict* cdict,
+                            int level,
+                            LZ4F_blockMode_t blockMode) {
+    if (level < LZ4HC_CLEVEL_MIN) {
+        if (cdict != NULL || blockMode == LZ4F_blockLinked) {
+            /* In these cases, we will call LZ4_compress_fast_continue(),
+             * which needs an already reset context. Otherwise, we'll call a
+             * one-shot API. The non-continued APIs internally perform their own
+             * resets at the beginning of their calls, where they know what
+             * tableType they need the context to be in. So in that case this
+             * would be misguided / wasted work. */
+            LZ4_resetStream_fast((LZ4_stream_t*)ctx);
+        }
+        LZ4_attach_dictionary((LZ4_stream_t *)ctx, cdict ? cdict->fastCtx : NULL);
+    } else {
+        LZ4_resetStreamHC_fast((LZ4_streamHC_t*)ctx, level);
+        LZ4_attach_HC_dictionary((LZ4_streamHC_t *)ctx, cdict ? cdict->HCCtx : NULL);
+    }
+}
+
+
 /*! LZ4F_compressBegin_usingCDict() :
  *  init streaming compression and writes frame header into dstBuffer.
  *  dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes.
@@ -507,21 +589,33 @@
     BYTE* headerStart;
 
     if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
-    memset(&prefNull, 0, sizeof(prefNull));
+    MEM_INIT(&prefNull, 0, sizeof(prefNull));
     if (preferencesPtr == NULL) preferencesPtr = &prefNull;
     cctxPtr->prefs = *preferencesPtr;
 
     /* Ctx Management */
-    {   U32 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;  /* 0:nothing ; 1:LZ4 table ; 2:HC tables */
-        if (cctxPtr->lz4CtxLevel < ctxTypeID) {
+    {   U16 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2;
+        if (cctxPtr->lz4CtxAlloc < ctxTypeID) {
             FREEMEM(cctxPtr->lz4CtxPtr);
-            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
                 cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
-            else
+            } else {
                 cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC();
+            }
             if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed);
-            cctxPtr->lz4CtxLevel = ctxTypeID;
-    }   }
+            cctxPtr->lz4CtxAlloc = ctxTypeID;
+            cctxPtr->lz4CtxState = ctxTypeID;
+        } else if (cctxPtr->lz4CtxState != ctxTypeID) {
+            /* otherwise, a sufficient buffer is allocated, but we need to
+             * reset it to the correct context type */
+            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
+                LZ4_resetStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr);
+            } else {
+                LZ4_resetStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+            }
+            cctxPtr->lz4CtxState = ctxTypeID;
+        }
+    }
 
     /* Buffer Management */
     if (cctxPtr->prefs.frameInfo.blockSizeID == 0)
@@ -535,7 +629,7 @@
         if (cctxPtr->maxBufferSize < requiredBuffSize) {
             cctxPtr->maxBufferSize = 0;
             FREEMEM(cctxPtr->tmpBuff);
-            cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize);
+            cctxPtr->tmpBuff = (BYTE*)ALLOC_AND_ZERO(requiredBuffSize);
             if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed);
             cctxPtr->maxBufferSize = requiredBuffSize;
     }   }
@@ -547,19 +641,10 @@
     cctxPtr->cdict = cdict;
     if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) {
         /* frame init only for blockLinked : blockIndependent will be init at each block */
-        if (cdict) {
-            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) {
-                memcpy(cctxPtr->lz4CtxPtr, cdict->fastCtx, sizeof(*cdict->fastCtx));
-            } else {
-                memcpy(cctxPtr->lz4CtxPtr, cdict->HCCtx, sizeof(*cdict->HCCtx));
-                LZ4_setCompressionLevel((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
-            }
-        } else {
-            if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN)
-                LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr));
-            else
-                LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel);
-        }
+        LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked);
+    }
+    if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) {
+          LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed);
     }
 
     /* Magic Number */
@@ -654,11 +739,12 @@
 static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
     int const acceleration = (level < -1) ? -level : 1;
+    LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
     if (cdict) {
-        memcpy(ctx, cdict->fastCtx, sizeof(*cdict->fastCtx));
         return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration);
+    } else {
+        return LZ4_compress_fast_extState_fastReset(ctx, src, dst, srcSize, dstCapacity, acceleration);
     }
-    return LZ4_compress_fast_extState(ctx, src, dst, srcSize, dstCapacity, acceleration);
 }
 
 static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
@@ -670,12 +756,11 @@
 
 static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
 {
+    LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent);
     if (cdict) {
-        memcpy(ctx, cdict->HCCtx, sizeof(*cdict->HCCtx));
-        LZ4_setCompressionLevel((LZ4_streamHC_t*)ctx, level);
         return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity);
     }
-    return LZ4_compress_HC_extStateHC(ctx, src, dst, srcSize, dstCapacity, level);
+    return LZ4_compress_HC_extStateHC_fastReset(ctx, src, dst, srcSize, dstCapacity, level);
 }
 
 static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict)
@@ -724,10 +809,12 @@
     LZ4F_lastBlockStatus lastBlockCompressed = notDone;
     compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
 
+    DEBUGLOG(4, "LZ4F_compressUpdate (srcSize=%zu)", srcSize);
 
     if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC);
-    if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
-    memset(&cOptionsNull, 0, sizeof(cOptionsNull));
+    if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize))
+        return err0r(LZ4F_ERROR_dstMaxSize_tooSmall);
+    MEM_INIT(&cOptionsNull, 0, sizeof(cOptionsNull));
     if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
 
     /* complete tmp buffer */
@@ -931,7 +1018,7 @@
  */
 LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
 {
-    LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOCATOR(sizeof(LZ4F_dctx));
+    LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOC_AND_ZERO(sizeof(LZ4F_dctx));
     if (dctx==NULL) return err0r(LZ4F_ERROR_GENERIC);
 
     dctx->version = versionNumber;
@@ -1003,7 +1090,7 @@
 
     /* need to decode header to get frameInfo */
     if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete);   /* minimal frame header size */
-    memset(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
+    MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo));
 
     /* special case : skippable frames */
     if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
@@ -1136,24 +1223,31 @@
 
 /* LZ4F_updateDict() :
  * only used for LZ4F_blockLinked mode */
-static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+static void LZ4F_updateDict(LZ4F_dctx* dctx,
+                      const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart,
+                      unsigned withinTmp)
 {
     if (dctx->dictSize==0)
         dctx->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
 
-    if (dctx->dict + dctx->dictSize == dstPtr) {  /* dictionary continuity */
+    if (dctx->dict + dctx->dictSize == dstPtr) {  /* dictionary continuity, directly within dstBuffer */
         dctx->dictSize += dstSize;
         return;
     }
 
-    if (dstPtr - dstPtr0 + dstSize >= 64 KB) {  /* dstBuffer large enough to become dictionary */
-        dctx->dict = (const BYTE*)dstPtr0;
-        dctx->dictSize = dstPtr - dstPtr0 + dstSize;
+    if (dstPtr - dstBufferStart + dstSize >= 64 KB) {  /* history in dstBuffer becomes large enough to become dictionary */
+        dctx->dict = (const BYTE*)dstBufferStart;
+        dctx->dictSize = dstPtr - dstBufferStart + dstSize;
         return;
     }
 
-    if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {
-        /* assumption : dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart */
+    assert(dstSize < 64 KB);   /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */
+
+    /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */
+
+    if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) {   /* continue history within tmpOutBuffer */
+        /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */
+        assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart);
         dctx->dictSize += dstSize;
         return;
     }
@@ -1174,7 +1268,7 @@
 
     if (dctx->dict == dctx->tmpOutBuffer) {    /* copy dst into tmp to complete dict */
         if (dctx->dictSize + dstSize > dctx->maxBufferSize) {  /* tmp buffer not large enough */
-            size_t const preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+            size_t const preserveSize = 64 KB - dstSize;
             memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
             dctx->dictSize = preserveSize;
         }
@@ -1184,7 +1278,7 @@
     }
 
     /* join dict & dest into tmp */
-    {   size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+    {   size_t preserveSize = 64 KB - dstSize;
         if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize;
         memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize);
         memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize);
@@ -1230,7 +1324,7 @@
     size_t nextSrcSizeHint = 1;
 
 
-    memset(&optionsNull, 0, sizeof(optionsNull));
+    MEM_INIT(&optionsNull, 0, sizeof(optionsNull));
     if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
     *srcSizePtr = 0;
     *dstSizePtr = 0;
@@ -1251,7 +1345,7 @@
             }
             dctx->tmpInSize = 0;
             if (srcEnd-srcPtr == 0) return minFHSize;   /* 0-size input */
-            dctx->tmpInTarget = minFHSize;   /* minimum to attempt decode */
+            dctx->tmpInTarget = minFHSize;   /* minimum size to decode header */
             dctx->dStage = dstage_storeFrameHeader;
             /* fall-through */
 
@@ -1279,11 +1373,11 @@
                 if (bufferNeeded > dctx->maxBufferSize) {   /* tmp buffers too small */
                     dctx->maxBufferSize = 0;   /* ensure allocation will be re-attempted on next entry*/
                     FREEMEM(dctx->tmpIn);
-                    dctx->tmpIn = (BYTE*)ALLOCATOR(dctx->maxBlockSize + 4 /* block checksum */);
+                    dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + 4 /* block checksum */);
                     if (dctx->tmpIn == NULL)
                         return err0r(LZ4F_ERROR_allocation_failed);
                     FREEMEM(dctx->tmpOutBuffer);
-                    dctx->tmpOutBuffer= (BYTE*)ALLOCATOR(bufferNeeded);
+                    dctx->tmpOutBuffer= (BYTE*)ALLOC(bufferNeeded);
                     if (dctx->tmpOutBuffer== NULL)
                         return err0r(LZ4F_ERROR_allocation_failed);
                     dctx->maxBufferSize = bufferNeeded;
@@ -1408,8 +1502,7 @@
                     U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
                     if (readCRC != calcCRC)
                         return err0r(LZ4F_ERROR_blockChecksum_invalid);
-                }
-            }
+            }   }
             dctx->dStage = dstage_getBlockHeader;  /* new block */
             break;
 
@@ -1450,11 +1543,19 @@
             }   }
 
             if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
+                const char* dict = (const char*)dctx->dict;
+                size_t dictSize = dctx->dictSize;
+                int decodedSize;
+                if (dict && dictSize > 1 GB) {
+                    /* the dictSize param is an int, avoid truncation / sign issues */
+                    dict += dictSize - 64 KB;
+                    dictSize = 64 KB;
+                }
                 /* enough capacity in `dst` to decompress directly there */
-                int const decodedSize = LZ4_decompress_safe_usingDict(
+                decodedSize = LZ4_decompress_safe_usingDict(
                         (const char*)selectedIn, (char*)dstPtr,
                         (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
-                        (const char*)dctx->dict, (int)dctx->dictSize);
+                        dict, (int)dictSize);
                 if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC);   /* decompression failed */
                 if (dctx->frameInfo.contentChecksumFlag)
                     XXH32_update(&(dctx->xxh), dstPtr, decodedSize);
@@ -1482,14 +1583,21 @@
                 } else {  /* dict not within tmp */
                     size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB);
                     dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace;
-                }
-            }
+            }   }
 
             /* Decode block */
-            {   int const decodedSize = LZ4_decompress_safe_usingDict(
+            {   const char* dict = (const char*)dctx->dict;
+                size_t dictSize = dctx->dictSize;
+                int decodedSize;
+                if (dict && dictSize > 1 GB) {
+                    /* the dictSize param is an int, avoid truncation / sign issues */
+                    dict += dictSize - 64 KB;
+                    dictSize = 64 KB;
+                }
+                decodedSize = LZ4_decompress_safe_usingDict(
                         (const char*)selectedIn, (char*)dctx->tmpOut,
                         (int)dctx->tmpInTarget, (int)dctx->maxBlockSize,
-                        (const char*)dctx->dict, (int)dctx->dictSize);
+                        dict, (int)dictSize);
                 if (decodedSize < 0)  /* decompression failed */
                     return err0r(LZ4F_ERROR_decompressionFailed);
                 if (dctx->frameInfo.contentChecksumFlag)
@@ -1507,8 +1615,8 @@
                 memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy);
 
                 /* dictionary management */
-                if (dctx->frameInfo.blockMode==LZ4F_blockLinked)
-                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1);
+                if (dctx->frameInfo.blockMode == LZ4F_blockLinked)
+                    LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/);
 
                 dctx->tmpOutStart += sizeToCopy;
                 dstPtr += sizeToCopy;
@@ -1517,8 +1625,9 @@
                     dctx->dStage = dstage_getBlockHeader;  /* get next block */
                     break;
                 }
+                /* could not flush everything : stop there, just request a block header */
+                doAnotherStage = 0;
                 nextSrcSizeHint = BHSize;
-                doAnotherStage = 0;   /* still some data to flush */
                 break;
             }
 
@@ -1555,7 +1664,7 @@
                 selectedIn = dctx->tmpIn;
             }   /* if (dctx->dStage == dstage_storeSuffix) */
 
-        /* case dstage_checkSuffix: */   /* no direct call, avoid scan-build warning */
+        /* case dstage_checkSuffix: */   /* no direct entry, avoid initialization risks */
             {   U32 const readCRC = LZ4F_readLE32(selectedIn);
                 U32 const resultCRC = XXH32_digest(&(dctx->xxh));
                 if (readCRC != resultCRC)
@@ -1579,8 +1688,7 @@
 
             if (dctx->dStage == dstage_storeSFrameSize)
         case dstage_storeSFrameSize:
-            {
-                size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
+            {   size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize,
                                              (size_t)(srcEnd - srcPtr) );
                 memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
@@ -1594,7 +1702,7 @@
                 selectedIn = dctx->header + 4;
             }   /* if (dctx->dStage == dstage_storeSFrameSize) */
 
-        /* case dstage_decodeSFrameSize: */   /* no direct access */
+        /* case dstage_decodeSFrameSize: */   /* no direct entry */
             {   size_t const SFrameSize = LZ4F_readLE32(selectedIn);
                 dctx->frameInfo.contentSize = SFrameSize;
                 dctx->tmpInTarget = SFrameSize;
@@ -1613,7 +1721,7 @@
                 LZ4F_resetDecompressionContext(dctx);
                 break;
             }
-        }
+        }   /* switch (dctx->dStage) */
     }   /* while (doAnotherStage) */
 
     /* preserve history within tmp whenever necessary */

diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index eb55e45..fb434ff 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h

@@ -93,8 +93,8 @@
  **************************************/
 typedef size_t LZ4F_errorCode_t;
 
-LZ4FLIB_API unsigned    LZ4F_isError(LZ4F_errorCode_t code);   /**< tells if a `LZ4F_errorCode_t` function result is an error code */
-LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /**< return error code string; useful for debugging */
+LZ4FLIB_API unsigned    LZ4F_isError(LZ4F_errorCode_t code);   /**< tells when a function result is an error code */
+LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /**< return error code string; for debugging */
 
 
 /*-************************************
@@ -162,24 +162,25 @@
  *  It's not required to set all fields, as long as the structure was initially memset() to zero.
  *  For all fields, 0 sets it to default value */
 typedef struct {
-  LZ4F_blockSizeID_t     blockSizeID;          /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
-  LZ4F_blockMode_t       blockMode;            /* LZ4F_blockLinked, LZ4F_blockIndependent ; 0 == default */
-  LZ4F_contentChecksum_t contentChecksumFlag;  /* if enabled, frame is terminated with a 32-bits checksum of decompressed data ; 0 == disabled (default)  */
-  LZ4F_frameType_t       frameType;            /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
-  unsigned long long     contentSize;          /* Size of uncompressed content ; 0 == unknown */
-  unsigned               dictID;               /* Dictionary ID, sent by the compressor to help decoder select the correct dictionary; 0 == no dictID provided */
-  LZ4F_blockChecksum_t   blockChecksumFlag;    /* if enabled, each block is followed by a checksum of block's compressed data ; 0 == disabled (default)  */
+  LZ4F_blockSizeID_t     blockSizeID;         /* max64KB, max256KB, max1MB, max4MB; 0 == default */
+  LZ4F_blockMode_t       blockMode;           /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */
+  LZ4F_contentChecksum_t contentChecksumFlag; /* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */
+  LZ4F_frameType_t       frameType;           /* read-only field : LZ4F_frame or LZ4F_skippableFrame */
+  unsigned long long     contentSize;         /* Size of uncompressed content ; 0 == unknown */
+  unsigned               dictID;              /* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */
+  LZ4F_blockChecksum_t   blockChecksumFlag;   /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
 } LZ4F_frameInfo_t;
 
 /*! LZ4F_preferences_t :
  *  makes it possible to supply detailed compression parameters to the stream interface.
- *  It's not required to set all fields, as long as the structure was initially memset() to zero.
+ *  Structure is presumed initially memset() to zero, representing default settings.
  *  All reserved fields must be set to zero. */
 typedef struct {
   LZ4F_frameInfo_t frameInfo;
-  int      compressionLevel;       /* 0 == default (fast mode); values above LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values below 0 trigger "fast acceleration", proportional to value */
-  unsigned autoFlush;              /* 1 == always flush, to reduce usage of internal buffers */
-  unsigned reserved[4];            /* must be zero for forward compatibility */
+  int      compressionLevel;    /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */
+  unsigned autoFlush;           /* 1: always flush, to reduce usage of internal buffers */
+  unsigned favorDecSpeed;       /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4LZ4HC_CLEVEL_OPT_MIN) */  /* >= v1.8.2 */
+  unsigned reserved[3];         /* must be zero for forward compatibility */
 } LZ4F_preferences_t;
 
 LZ4FLIB_API int LZ4F_compressionLevel_max(void);
@@ -189,8 +190,10 @@
 *  Simple compression function
 ***********************************/
 /*! LZ4F_compressFrameBound() :
- *  Returns the maximum possible size of a frame compressed with LZ4F_compressFrame() given srcSize content and preferences.
- *  Note : this result is only usable with LZ4F_compressFrame(), not with multi-segments compression.
+ *  Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences.
+ * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences.
+ *  Note : this result is only usable with LZ4F_compressFrame().
+ *         It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed.
  */
 LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
 
@@ -235,7 +238,7 @@
 
 /*----    Compression    ----*/
 
-#define LZ4F_HEADER_SIZE_MAX 19
+#define LZ4F_HEADER_SIZE_MAX 19   /* LZ4 Frame header size can vary from 7 to 19 bytes */
 /*! LZ4F_compressBegin() :
  *  will write the frame header into dstBuffer.
  *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
@@ -248,8 +251,13 @@
                                       const LZ4F_preferences_t* prefsPtr);
 
 /*! LZ4F_compressBound() :
- *  Provides minimum dstCapacity for a given srcSize to guarantee operation success in worst case situations.
+ *  Provides minimum dstCapacity required to guarantee compression success
+ *  given a srcSize and preferences, covering worst case scenario.
  *  prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario.
+ *  Estimation is valid for either LZ4F_compressUpdate(), LZ4F_flush() or LZ4F_compressEnd(),
+ *  Estimation includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes.
+ *  It also includes frame footer (ending + checksum), which would have to be generated by LZ4F_compressEnd().
+ *  Estimation doesn't include frame header, as it was already generated by LZ4F_compressBegin().
  *  Result is always the same for a srcSize and prefsPtr, so it can be trusted to size reusable buffers.
  *  When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations.
  */
@@ -257,36 +265,44 @@
 
 /*! LZ4F_compressUpdate() :
  *  LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
- *  An important rule is that dstCapacity MUST be large enough to ensure operation success even in worst case situations.
+ *  Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations.
  *  This value is provided by LZ4F_compressBound().
  *  If this condition is not respected, LZ4F_compress() will fail (result is an errorCode).
- *  LZ4F_compressUpdate() doesn't guarantee error recovery. When an error occurs, compression context must be freed or resized.
+ *  LZ4F_compressUpdate() doesn't guarantee error recovery.
+ *  When an error occurs, compression context must be freed or resized.
  * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default.
  * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered).
  *           or an error code if it fails (which can be tested using LZ4F_isError())
  */
-LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr);
+LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx,
+                                       void* dstBuffer, size_t dstCapacity,
+                                 const void* srcBuffer, size_t srcSize,
+                                 const LZ4F_compressOptions_t* cOptPtr);
 
 /*! LZ4F_flush() :
  *  When data must be generated and sent immediately, without waiting for a block to be completely filled,
  *  it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx.
  * `dstCapacity` must be large enough to ensure the operation will be successful.
  * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default.
- * @return : number of bytes written into dstBuffer (it can be zero, which means there was no data stored within cctx)
+ * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx)
  *           or an error code if it fails (which can be tested using LZ4F_isError())
  */
-LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
+LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx,
+                              void* dstBuffer, size_t dstCapacity,
+                        const LZ4F_compressOptions_t* cOptPtr);
 
 /*! LZ4F_compressEnd() :
  *  To properly finish an LZ4 frame, invoke LZ4F_compressEnd().
  *  It will flush whatever data remained within `cctx` (like LZ4_flush())
  *  and properly finalize the frame, with an endMark and a checksum.
  * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default.
- * @return : number of bytes written into dstBuffer (necessarily >= 4 (endMark), or 8 if optional frame checksum is enabled)
+ * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark),
  *           or an error code if it fails (which can be tested using LZ4F_isError())
  *  A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task.
  */
-LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr);
+LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx,
+                                    void* dstBuffer, size_t dstCapacity,
+                              const LZ4F_compressOptions_t* cOptPtr);
 
 
 /*-*********************************
@@ -296,7 +312,7 @@
 typedef LZ4F_dctx* LZ4F_decompressionContext_t;   /* compatibility with previous API versions */
 
 typedef struct {
-  unsigned stableDst;    /* pledge that at least 64KB+64Bytes of previously decompressed data remain unmodifed where it was decoded. This optimization skips storage operations in tmp buffers */
+  unsigned stableDst;    /* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */
   unsigned reserved[3];  /* must be set to zero for forward compatibility */
 } LZ4F_decompressOptions_t;
 
@@ -309,7 +325,7 @@
  *  The function provides a pointer to an allocated and initialized LZ4F_dctx object.
  *  The result is an errorCode, which can be tested using LZ4F_isError().
  *  dctx memory can be released using LZ4F_freeDecompressionContext();
- *  The result of LZ4F_freeDecompressionContext() is indicative of the current state of decompressionContext when being released.
+ *  Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released.
  *  That is, it should be == 0 if decompression has been completed fully and correctly.
  */
 LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version);
@@ -350,8 +366,8 @@
  *  The function will read up to *srcSizePtr bytes from srcBuffer,
  *  and decompress data into dstBuffer, of capacity *dstSizePtr.
  *
- *  The number of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
- *  The number of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
+ *  The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
+ *  The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
  *
  *  The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
  *  Unconsumed source data must be presented again in subsequent invocations.
@@ -394,3 +410,123 @@
 #endif
 
 #endif  /* LZ4F_H_09782039843 */
+
+#if defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843)
+#define LZ4F_H_STATIC_09782039843
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* These declarations are not stable and may change in the future. They are
+ * therefore only safe to depend on when the caller is statically linked
+ * against the library. To access their declarations, define
+ * LZ4F_STATIC_LINKING_ONLY.
+ *
+ * There is a further protection mechanism where these symbols aren't published
+ * into shared/dynamic libraries. You can override this behavior and force
+ * them to be published by defining LZ4F_PUBLISH_STATIC_FUNCTIONS. Use at
+ * your own risk.
+ */
+#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
+#define LZ4FLIB_STATIC_API LZ4FLIB_API
+#else
+#define LZ4FLIB_STATIC_API
+#endif
+
+
+/* ---   Error List   --- */
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) \
+        ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) \
+        ITEM(ERROR_blockMode_invalid) \
+        ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_headerVersion_wrong) \
+        ITEM(ERROR_blockChecksum_invalid) \
+        ITEM(ERROR_reservedFlag_set) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) \
+        ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_frameHeader_incomplete) \
+        ITEM(ERROR_frameType_unknown) \
+        ITEM(ERROR_frameSize_wrong) \
+        ITEM(ERROR_srcPtr_wrong) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_headerChecksum_invalid) \
+        ITEM(ERROR_contentChecksum_invalid) \
+        ITEM(ERROR_frameDecoding_alreadyStarted) \
+        ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
+
+/* enum list is exposed, to handle specific errors */
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
+
+LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+
+
+
+/**********************************
+ *  Bulk processing dictionary API
+ *********************************/
+typedef struct LZ4F_CDict_s LZ4F_CDict;
+
+/*! LZ4_createCDict() :
+ *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
+ *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
+ *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+
+
+/*! LZ4_compressFrame_usingCDict() :
+ *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
+ *  cctx must point to a context created by LZ4F_createCompressionContext().
+ *  If cdict==NULL, compress without a dictionary.
+ *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
+ *  If this condition is not respected, function will fail (@return an errorCode).
+ *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
+ *  but it's not recommended, as it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer.
+ *           or an error code if it fails (can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dst, size_t dstCapacity,
+    const void* src, size_t srcSize,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* preferencesPtr);
+
+
+/*! LZ4F_compressBegin_usingCDict() :
+ *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
+ *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
+ * `prefsPtr` is optional : you may provide NULL as argument,
+ *  however, it's the only way to provide dictID in the frame header.
+ * @return : number of bytes written into dstBuffer for the header,
+ *           or an error code (which can be tested using LZ4F_isError()) */
+LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dstBuffer, size_t dstCapacity,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* prefsPtr);
+
+
+/*! LZ4F_decompress_usingDict() :
+ *  Same as LZ4F_decompress(), using a predefined dictionary.
+ *  Dictionary is used "in place", without any preprocessing.
+ *  It must remain accessible throughout the entire frame decoding. */
+LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+    LZ4F_dctx* dctxPtr,
+    void* dstBuffer, size_t* dstSizePtr,
+    const void* srcBuffer, size_t* srcSizePtr,
+    const void* dict, size_t dictSize,
+    const LZ4F_decompressOptions_t* decompressOptionsPtr);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) */

diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h
index a59b94b..925a2c5 100644
--- a/lib/lz4frame_static.h
+++ b/lib/lz4frame_static.h

@@ -36,119 +36,12 @@
 #ifndef LZ4FRAME_STATIC_H_0398209384
 #define LZ4FRAME_STATIC_H_0398209384
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/* lz4frame_static.h should be used solely in the context of static linking.
- * It contains definitions which are not stable and may change in the future.
- * Never use it in the context of DLL linking.
- *
- * Defining LZ4F_PUBLISH_STATIC_FUNCTIONS allows one to override this. Use at
- * your own risk.
+/* The declarations that formerly were made here have been merged into
+ * lz4frame.h, protected by the LZ4F_STATIC_LINKING_ONLY macro. Going forward,
+ * it is recommended to simply include that header directly.
  */
-#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
-#define LZ4FLIB_STATIC_API LZ4FLIB_API
-#else
-#define LZ4FLIB_STATIC_API
-#endif
 
-
-/* ---   Dependency   --- */
+#define LZ4F_STATIC_LINKING_ONLY
 #include "lz4frame.h"
 
-
-/* ---   Error List   --- */
-#define LZ4F_LIST_ERRORS(ITEM) \
-        ITEM(OK_NoError) \
-        ITEM(ERROR_GENERIC) \
-        ITEM(ERROR_maxBlockSize_invalid) \
-        ITEM(ERROR_blockMode_invalid) \
-        ITEM(ERROR_contentChecksumFlag_invalid) \
-        ITEM(ERROR_compressionLevel_invalid) \
-        ITEM(ERROR_headerVersion_wrong) \
-        ITEM(ERROR_blockChecksum_invalid) \
-        ITEM(ERROR_reservedFlag_set) \
-        ITEM(ERROR_allocation_failed) \
-        ITEM(ERROR_srcSize_tooLarge) \
-        ITEM(ERROR_dstMaxSize_tooSmall) \
-        ITEM(ERROR_frameHeader_incomplete) \
-        ITEM(ERROR_frameType_unknown) \
-        ITEM(ERROR_frameSize_wrong) \
-        ITEM(ERROR_srcPtr_wrong) \
-        ITEM(ERROR_decompressionFailed) \
-        ITEM(ERROR_headerChecksum_invalid) \
-        ITEM(ERROR_contentChecksum_invalid) \
-        ITEM(ERROR_frameDecoding_alreadyStarted) \
-        ITEM(ERROR_maxCode)
-
-#define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM,
-
-/* enum list is exposed, to handle specific errors */
-typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
-
-LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
-
-
-
-/**********************************
- *  Bulk processing dictionary API
- *********************************/
-typedef struct LZ4F_CDict_s LZ4F_CDict;
-
-/*! LZ4_createCDict() :
- *  When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
- *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
- *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
-LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
-LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
-
-
-/*! LZ4_compressFrame_usingCDict() :
- *  Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary.
- *  If cdict==NULL, compress without a dictionary.
- *  dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr).
- *  If this condition is not respected, function will fail (@return an errorCode).
- *  The LZ4F_preferences_t structure is optional : you may provide NULL as argument,
- *  but it's not recommended, as it's the only way to provide dictID in the frame header.
- * @return : number of bytes written into dstBuffer.
- *           or an error code if it fails (can be tested using LZ4F_isError()) */
-LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
-    void* dst, size_t dstCapacity,
-    const void* src, size_t srcSize,
-    const LZ4F_CDict* cdict,
-    const LZ4F_preferences_t* preferencesPtr);
-
-
-/*! LZ4F_compressBegin_usingCDict() :
- *  Inits streaming dictionary compression, and writes the frame header into dstBuffer.
- *  dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
- * `prefsPtr` is optional : you may provide NULL as argument,
- *  however, it's the only way to provide dictID in the frame header.
- * @return : number of bytes written into dstBuffer for the header,
- *           or an error code (which can be tested using LZ4F_isError()) */
-LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
-    LZ4F_cctx* cctx,
-    void* dstBuffer, size_t dstCapacity,
-    const LZ4F_CDict* cdict,
-    const LZ4F_preferences_t* prefsPtr);
-
-
-/*! LZ4F_decompress_usingDict() :
- *  Same as LZ4F_decompress(), using a predefined dictionary.
- *  Dictionary is used "in place", without any preprocessing.
- *  It must remain accessible throughout the entire frame decoding. */
-LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
-    LZ4F_dctx* dctxPtr,
-    void* dstBuffer, size_t* dstSizePtr,
-    const void* srcBuffer, size_t* srcSizePtr,
-    const void* dict, size_t dictSize,
-    const LZ4F_decompressOptions_t* decompressOptionsPtr);
-
-
-#if defined (__cplusplus)
-}
-#endif
-
 #endif /* LZ4FRAME_STATIC_H_0398209384 */

diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index f2c2566..8108ea0 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c

@@ -67,6 +67,7 @@
 
 /*===   Constants   ===*/
 #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM   (1<<12)
 
 
 /*===   Macros   ===*/
@@ -78,21 +79,33 @@
 
 static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
 
+/*===   Enums   ===*/
+typedef enum { noDictCtx, usingDictCtx } dictCtx_directive;
 
 
 /**************************************
 *  HC Compression
 **************************************/
-static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
 {
     MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-    hc4->nextToUpdate = 64 KB;
-    hc4->base = start - 64 KB;
+}
+
+static void LZ4HC_init (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+    uptrval startingOffset = hc4->end - hc4->base;
+    if (startingOffset > 1 GB) {
+        LZ4HC_clearTables(hc4);
+        startingOffset = 0;
+    }
+    startingOffset += 64 KB;
+    hc4->nextToUpdate = (U32) startingOffset;
+    hc4->base = start - startingOffset;
     hc4->end = start;
-    hc4->dictBase = start - 64 KB;
-    hc4->dictLimit = 64 KB;
-    hc4->lowLimit = 64 KB;
+    hc4->dictBase = start - startingOffset;
+    hc4->dictLimit = (U32) startingOffset;
+    hc4->lowLimit = (U32) startingOffset;
 }
 
 
@@ -123,17 +136,21 @@
 int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
                     const BYTE* const iMin, const BYTE* const mMin)
 {
-    int back=0;
-    while ( (ip+back > iMin)
-         && (match+back > mMin)
-         && (ip[back-1] == match[back-1]))
+    int back = 0;
+    int const min = (int)MAX(iMin - ip, mMin - match);
+    assert(min <= 0);
+    assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+    assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+    while ( (back > min)
+         && (ip[back-1] == match[back-1]) )
             back--;
     return back;
 }
 
 /* LZ4HC_countPattern() :
  * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
-static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
 {
     const BYTE* const iStart = ip;
     reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
@@ -165,7 +182,8 @@
 /* LZ4HC_reverseCountPattern() :
  * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
  * read using natural platform endianess */
-static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
 {
     const BYTE* const iStart = ip;
 
@@ -182,8 +200,10 @@
 }
 
 typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
+typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
 
-LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
     LZ4HC_CCtx_internal* hc4,
     const BYTE* const ip,
     const BYTE* const iLowLimit,
@@ -192,19 +212,26 @@
     const BYTE** matchpos,
     const BYTE** startpos,
     const int maxNbAttempts,
-    const int patternAnalysis)
+    const int patternAnalysis,
+    const int chainSwap,
+    const dictCtx_directive dict,
+    const HCfavor_e favorDecSpeed)
 {
     U16* const chainTable = hc4->chainTable;
     U32* const HashTable = hc4->hashTable;
+    const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
     const BYTE* const base = hc4->base;
     const U32 dictLimit = hc4->dictLimit;
     const BYTE* const lowPrefixPtr = base + dictLimit;
-    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - MAX_DISTANCE;
+    const U32 ipIndex = (U32)(ip - base);
+    const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - MAX_DISTANCE;
     const BYTE* const dictBase = hc4->dictBase;
-    int const delta = (int)(ip-iLowLimit);
+    int const lookBackLength = (int)(ip-iLowLimit);
     int nbAttempts = maxNbAttempts;
+    int matchChainPos = 0;
     U32 const pattern = LZ4_read32(ip);
     U32 matchIndex;
+    U32 dictMatchIndex;
     repeat_state_e repeat = rep_untested;
     size_t srcPatternLength = 0;
 
@@ -212,86 +239,141 @@
     /* First Match */
     LZ4HC_Insert(hc4, ip);
     matchIndex = HashTable[LZ4HC_hashPtr(ip)];
-    DEBUGLOG(7, "First match at index %u / %u (lowLimit)",
-                matchIndex, lowLimit);
+    DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+                matchIndex, lowestMatchIndex);
 
-    while ((matchIndex>=lowLimit) && (nbAttempts)) {
-        DEBUGLOG(7, "remaining attempts : %i", nbAttempts);
+    while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
+        int matchLength=0;
         nbAttempts--;
-        if (matchIndex >= dictLimit) {
+        assert(matchIndex < ipIndex);
+        if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
+            /* do nothing */
+        } else if (matchIndex >= dictLimit) {   /* within current Prefix */
             const BYTE* const matchPtr = base + matchIndex;
-            if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) {
+            assert(matchPtr >= lowPrefixPtr);
+            assert(matchPtr < ip);
+            assert(longest >= 1);
+            if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
                 if (LZ4_read32(matchPtr) == pattern) {
-                    int mlt = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
-    #if 0
-                    /* more generic but unfortunately slower on clang */
-                    int const back = LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr);
-    #else
-                    int back = 0;
-                    while ( (ip+back > iLowLimit)
-                         && (matchPtr+back > lowPrefixPtr)
-                         && (ip[back-1] == matchPtr[back-1])) {
-                            back--;
-                    }
-    #endif
-                    mlt -= back;
-
-                    if (mlt > longest) {
-                        longest = mlt;
-                        *matchpos = matchPtr+back;
-                        *startpos = ip+back;
-                }   }
-            }
-        } else {   /* matchIndex < dictLimit */
+                    int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+                    matchLength = MINMATCH + LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+                    matchLength -= back;
+                    if (matchLength > longest) {
+                        longest = matchLength;
+                        *matchpos = matchPtr + back;
+                        *startpos = ip + back;
+            }   }   }
+        } else {   /* lowestMatchIndex <= matchIndex < dictLimit */
             const BYTE* const matchPtr = dictBase + matchIndex;
             if (LZ4_read32(matchPtr) == pattern) {
-                int mlt;
+                const BYTE* const dictStart = dictBase + hc4->lowLimit;
                 int back = 0;
                 const BYTE* vLimit = ip + (dictLimit - matchIndex);
                 if (vLimit > iHighLimit) vLimit = iHighLimit;
-                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
-                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
-                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
-                while ( (ip+back > iLowLimit)
-                     && (matchIndex+back > lowLimit)
-                     && (ip[back-1] == matchPtr[back-1]))
-                        back--;
-                mlt -= back;
-                if (mlt > longest) {
-                    longest = mlt;
-                    *matchpos = base + matchIndex + back;
+                matchLength = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+                    matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit);
+                back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
+                matchLength -= back;
+                if (matchLength > longest) {
+                    longest = matchLength;
+                    *matchpos = base + matchIndex + back;   /* virtual pos, relative to ip, to retrieve offset */
                     *startpos = ip + back;
         }   }   }
 
-        {   U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
-            matchIndex -= nextOffset;
-            if (patternAnalysis && nextOffset==1) {
+        if (chainSwap && matchLength==longest) {    /* better match => select a better chain */
+            assert(lookBackLength==0);   /* search forward only */
+            if (matchIndex + longest <= ipIndex) {
+                U32 distanceToNextMatch = 1;
+                int pos;
+                for (pos = 0; pos <= longest - MINMATCH; pos++) {
+                    U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + pos);
+                    if (candidateDist > distanceToNextMatch) {
+                        distanceToNextMatch = candidateDist;
+                        matchChainPos = pos;
+                }   }
+                if (distanceToNextMatch > 1) {
+                    if (distanceToNextMatch > matchIndex) break;   /* avoid overflow */
+                    matchIndex -= distanceToNextMatch;
+                    continue;
+        }   }   }
+
+        {   U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+            if (patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+                U32 const matchCandidateIdx = matchIndex-1;
                 /* may be a repeated pattern */
                 if (repeat == rep_untested) {
                     if ( ((pattern & 0xFFFF) == (pattern >> 16))
                       &  ((pattern & 0xFF)   == (pattern >> 24)) ) {
                         repeat = rep_confirmed;
-                        srcPatternLength = LZ4HC_countPattern(ip+4, iHighLimit, pattern) + 4;
+                        srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
                     } else {
                         repeat = rep_not;
                 }   }
                 if ( (repeat == rep_confirmed)
-                  && (matchIndex >= dictLimit) ) {   /* same segment only */
-                    const BYTE* const matchPtr = base + matchIndex;
+                  && (matchCandidateIdx >= dictLimit) ) {   /* same segment only */
+                    const BYTE* const matchPtr = base + matchCandidateIdx;
                     if (LZ4_read32(matchPtr) == pattern) {  /* good candidate */
                         size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
-                        const BYTE* const maxLowPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
-                        size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, maxLowPtr, pattern);
+                        const BYTE* const lowestMatchPtr = (lowPrefixPtr + MAX_DISTANCE >= ip) ? lowPrefixPtr : ip - MAX_DISTANCE;
+                        size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
                         size_t const currentSegmentLength = backLength + forwardPatternLength;
 
                         if ( (currentSegmentLength >= srcPatternLength)   /* current pattern segment large enough to contain full srcPatternLength */
                           && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
-                            matchIndex += (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
+                            matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength;  /* best position, full pattern, might be followed by more match */
                         } else {
-                            matchIndex -= (U32)backLength;   /* let's go to farthest segment position, will find a match of length currentSegmentLength + maybe some back */
-                        }
-        }   }   }   }
-    }  /* while ((matchIndex>=lowLimit) && (nbAttempts)) */
+                            matchIndex = matchCandidateIdx - (U32)backLength;   /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
+                            if (lookBackLength==0) {  /* no back possible */
+                                size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
+                                if ((size_t)longest < maxML) {
+                                    assert(maxML < 2 GB);
+                                    longest = (int)maxML;
+                                    *matchpos = base + matchIndex;   /* virtual pos, relative to ip, to retrieve offset */
+                                    *startpos = ip;
+                                }
+                                {   U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
+                                    if (distToNextPattern > matchIndex) break;  /* avoid overflow */
+                                    matchIndex -= distToNextPattern;
+                        }   }   }
+                        continue;
+                }   }
+        }   }   /* PA optimization */
+
+        /* follow current chain */
+        matchIndex -= DELTANEXTU16(chainTable, matchIndex+matchChainPos);
+
+    }  /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
+
+    if (dict == usingDictCtx && nbAttempts && ipIndex - lowestMatchIndex < MAX_DISTANCE) {
+        size_t const dictEndOffset = dictCtx->end - dictCtx->base;
+        assert(dictEndOffset <= 1 GB);
+        dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+        matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+        while (ipIndex - matchIndex <= MAX_DISTANCE && nbAttempts--) {
+            const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
+
+            if (LZ4_read32(matchPtr) == pattern) {
+                int mlt;
+                int back = 0;
+                const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+                back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
+                mlt -= back;
+                if (mlt > longest) {
+                    longest = mlt;
+                    *matchpos = base + matchIndex + back;
+                    *startpos = ip + back;
+                }
+            }
+
+            {   U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
+                dictMatchIndex -= nextOffset;
+                matchIndex -= nextOffset;
+            }
+        }
+    }
 
     return longest;
 }
@@ -301,13 +383,14 @@
                                  const BYTE* const ip, const BYTE* const iLimit,
                                  const BYTE** matchpos,
                                  const int maxNbAttempts,
-                                 const int patternAnalysis)
+                                 const int patternAnalysis,
+                                 const dictCtx_directive dict)
 {
     const BYTE* uselessPtr = ip;
     /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
      * but this won't be the case here, as we define iLowLimit==ip,
      * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
-    return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
+    return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
 }
 
 
@@ -333,7 +416,7 @@
     size_t length;
     BYTE* const token = (*op)++;
 
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
     static const BYTE* start = NULL;
     static U32 totalCost = 0;
     U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
@@ -342,8 +425,8 @@
     U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
     U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
     if (start==NULL) start = *anchor;  /* only works for single segment */
-    //g_debuglog_enable = (pos >= 2228) & (pos <= 2262);
-    DEBUGLOG(2, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
+    /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
+    DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
                 pos,
                 (U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
                 cost, totalCost);
@@ -390,22 +473,19 @@
     return 0;
 }
 
-/* btopt */
-#include "lz4opt.h"
-
-
-static int LZ4HC_compress_hashChain (
+LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
     LZ4HC_CCtx_internal* const ctx,
     const char* const source,
     char* const dest,
     int* srcSizePtr,
     int const maxOutputSize,
     unsigned maxNbAttempts,
-    limitedOutput_directive limit
+    const limitedOutput_directive limit,
+    const dictCtx_directive dict
     )
 {
     const int inputSize = *srcSizePtr;
-    const int patternAnalysis = (maxNbAttempts > 64);   /* levels 8+ */
+    const int patternAnalysis = (maxNbAttempts > 128);   /* levels 9+ */
 
     const BYTE* ip = (const BYTE*) source;
     const BYTE* anchor = ip;
@@ -417,14 +497,14 @@
     BYTE* op = (BYTE*) dest;
     BYTE* oend = op + maxOutputSize;
 
-    int   ml, ml2, ml3, ml0;
+    int   ml0, ml, ml2, ml3;
+    const BYTE* start0;
+    const BYTE* ref0;
     const BYTE* ref = NULL;
     const BYTE* start2 = NULL;
     const BYTE* ref2 = NULL;
     const BYTE* start3 = NULL;
     const BYTE* ref3 = NULL;
-    const BYTE* start0;
-    const BYTE* ref0;
 
     /* init */
     *srcSizePtr = 0;
@@ -432,36 +512,32 @@
     if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
     /* Main Loop */
-    while (ip < mflimit) {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
+    while (ip <= mflimit) {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
         if (ml<MINMATCH) { ip++; continue; }
 
         /* saved, in case we would skip too much */
-        start0 = ip;
-        ref0 = ref;
-        ml0 = ml;
+        start0 = ip; ref0 = ref; ml0 = ml;
 
 _Search2:
-        if (ip+ml < mflimit)
+        if (ip+ml <= mflimit) {
             ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
                             ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
-                            maxNbAttempts, patternAnalysis);
-        else
+                            maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+        } else {
             ml2 = ml;
+        }
 
-        if (ml2 == ml) { /* No better match */
+        if (ml2 == ml) { /* No better match => encode ML1 */
             optr = op;
             if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
             continue;
         }
 
-        if (start0 < ip) {
-            if (start2 < ip + ml0) {  /* empirical */
-                ip = start0;
-                ref = ref0;
-                ml = ml0;
-            }
-        }
+        if (start0 < ip) {   /* first match was skipped at least once */
+            if (start2 < ip + ml0) {  /* squeezing ML1 between ML0(original ML1) and ML2 */
+                ip = start0; ref = ref0; ml = ml0;  /* restore initial ML1 */
+        }   }
 
         /* Here, start0==ip */
         if ((start2 - ip) < 3) {  /* First Match too small : removed */
@@ -489,14 +565,15 @@
         }
         /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
 
-        if (start2 + ml2 < mflimit)
+        if (start2 + ml2 <= mflimit) {
             ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
                             start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
-                            maxNbAttempts, patternAnalysis);
-        else
+                            maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+        } else {
             ml3 = ml2;
+        }
 
-        if (ml3 == ml2) {  /* No better match : 2 sequences to encode */
+        if (ml3 == ml2) {  /* No better match => encode ML1 and ML2 */
             /* ip & ref are known; Now for ml */
             if (start2 < ip+ml)  ml = (int)(start2 - ip);
             /* Now, encode 2 sequences */
@@ -541,11 +618,12 @@
         }
 
         /*
-        * OK, now we have 3 ascending matches; let's write at least the first one
-        * ip & ref are known; Now for ml
+        * OK, now we have 3 ascending matches;
+        * let's write the first one ML1.
+        * ip & ref are known; Now decide ml.
         */
         if (start2 < ip+ml) {
-            if ((start2 - ip) < (int)ML_MASK) {
+            if ((start2 - ip) < OPTIMAL_ML) {
                 int correction;
                 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
                 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
@@ -562,14 +640,13 @@
         optr = op;
         if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) goto _dest_overflow;
 
-        ip = start2;
-        ref = ref2;
-        ml = ml2;
+        /* ML2 becomes ML1 */
+        ip = start2; ref = ref2; ml = ml2;
 
-        start2 = start3;
-        ref2 = ref3;
-        ml2 = ml3;
+        /* ML3 becomes ML2 */
+        start2 = start3; ref2 = ref3; ml2 = ml3;
 
+        /* let's find a new ML3 */
         goto _Search3;
     }
 
@@ -613,6 +690,112 @@
 }
 
 
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+    const char* const source, char* dst,
+    int* srcSizePtr, int dstCapacity,
+    int const nbSearches, size_t sufficient_len,
+    const limitedOutput_directive limit, int const fullUpdate,
+    const dictCtx_directive dict,
+    HCfavor_e favorDecSpeed);
+
+
+LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    const limitedOutput_directive limit,
+    const dictCtx_directive dict
+    )
+{
+    typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
+    typedef struct {
+        lz4hc_strat_e strat;
+        U32 nbSearches;
+        U32 targetLength;
+    } cParams_t;
+    static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
+        { lz4hc,     2, 16 },  /* 0, unused */
+        { lz4hc,     2, 16 },  /* 1, unused */
+        { lz4hc,     2, 16 },  /* 2, unused */
+        { lz4hc,     4, 16 },  /* 3 */
+        { lz4hc,     8, 16 },  /* 4 */
+        { lz4hc,    16, 16 },  /* 5 */
+        { lz4hc,    32, 16 },  /* 6 */
+        { lz4hc,    64, 16 },  /* 7 */
+        { lz4hc,   128, 16 },  /* 8 */
+        { lz4hc,   256, 16 },  /* 9 */
+        { lz4opt,   96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
+        { lz4opt,  512,128 },  /*11 */
+        { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
+    };
+
+    DEBUGLOG(4, "LZ4HC_compress_generic(%p, %p, %d)", ctx, src, *srcSizePtr);
+
+    if (limit == limitedDestSize && dstCapacity < 1) return 0;         /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size (too large or negative) */
+
+    ctx->end += *srcSizePtr;
+    if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
+    cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+    {   cParams_t const cParam = clTable[cLevel];
+        HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
+        if (cParam.strat == lz4hc)
+            return LZ4HC_compress_hashChain(ctx,
+                                src, dst, srcSizePtr, dstCapacity,
+                                cParam.nbSearches, limit, dict);
+        assert(cParam.strat == lz4opt);
+        return LZ4HC_compress_optimal(ctx,
+                            src, dst, srcSizePtr, dstCapacity,
+                            cParam.nbSearches, cParam.targetLength, limit,
+                            cLevel == LZ4HC_CLEVEL_MAX,   /* ultra mode */
+                            dict, favor);
+    }
+}
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
+
+static int LZ4HC_compress_generic_noDictCtx (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    limitedOutput_directive limit
+    )
+{
+    assert(ctx->dictCtx == NULL);
+    return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
+}
+
+static int LZ4HC_compress_generic_dictCtx (
+    LZ4HC_CCtx_internal* const ctx,
+    const char* const src,
+    char* const dst,
+    int* const srcSizePtr,
+    int const dstCapacity,
+    int cLevel,
+    limitedOutput_directive limit
+    )
+{
+    const size_t position = ctx->end - ctx->base - ctx->lowLimit;
+    assert(ctx->dictCtx != NULL);
+    if (position >= 64 KB) {
+        ctx->dictCtx = NULL;
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else if (position == 0 && *srcSizePtr > 4 KB) {
+        memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
+        LZ4HC_setExternalDict(ctx, (const BYTE *)src);
+        ctx->compressionLevel = (short)cLevel;
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else {
+        return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtx);
+    }
+}
+
 static int LZ4HC_compress_generic (
     LZ4HC_CCtx_internal* const ctx,
     const char* const src,
@@ -623,56 +806,21 @@
     limitedOutput_directive limit
     )
 {
-    typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
-    typedef struct {
-        lz4hc_strat_e strat;
-        U32 nbSearches;
-        U32 targetLength;
-    } cParams_t;
-    static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
-        { lz4hc,    2, 16 },  /* 0, unused */
-        { lz4hc,    2, 16 },  /* 1, unused */
-        { lz4hc,    2, 16 },  /* 2, unused */
-        { lz4hc,    4, 16 },  /* 3 */
-        { lz4hc,    8, 16 },  /* 4 */
-        { lz4hc,   16, 16 },  /* 5 */
-        { lz4hc,   32, 16 },  /* 6 */
-        { lz4hc,   64, 16 },  /* 7 */
-        { lz4hc,  128, 16 },  /* 8 */
-        { lz4hc,  256, 16 },  /* 9 */
-        { lz4opt,  96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
-        { lz4opt, 512,128 },  /*11 */
-        { lz4opt,8192, LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
-    };
-
-    if (limit == limitedDestSize && dstCapacity < 1) return 0;         /* Impossible to store anything */
-    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size (too large or negative) */
-
-    ctx->end += *srcSizePtr;
-    if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
-    cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
-    assert(cLevel >= 0);
-    assert(cLevel <= LZ4HC_CLEVEL_MAX);
-    {   cParams_t const cParam = clTable[cLevel];
-        if (cParam.strat == lz4hc)
-            return LZ4HC_compress_hashChain(ctx,
-                                src, dst, srcSizePtr, dstCapacity,
-                                cParam.nbSearches, limit);
-        assert(cParam.strat == lz4opt);
-        return LZ4HC_compress_optimal(ctx,
-                            src, dst, srcSizePtr, dstCapacity,
-                            cParam.nbSearches, cParam.targetLength, limit,
-                            cLevel == LZ4HC_CLEVEL_MAX);  /* ultra mode */
+    if (ctx->dictCtx == NULL) {
+        return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+    } else {
+        return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
     }
 }
 
 
 int LZ4_sizeofStateHC(void) { return sizeof(LZ4_streamHC_t); }
 
-int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
     LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
     if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
     LZ4HC_init (ctx, (const BYTE*)src);
     if (dstCapacity < LZ4_compressBound(srcSize))
         return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
@@ -680,10 +828,17 @@
         return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, noLimit);
 }
 
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4_resetStreamHC ((LZ4_streamHC_t*)state, compressionLevel);
+    return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
+}
+
 int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
 {
 #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
-    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t));
+    LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
 #else
     LZ4_streamHC_t state;
     LZ4_streamHC_t* const statePtr = &state;
@@ -700,6 +855,7 @@
 int LZ4_compress_HC_destSize(void* LZ4HC_Data, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
 {
     LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
+    LZ4_resetStreamHC((LZ4_streamHC_t*)LZ4HC_Data, cLevel);
     LZ4HC_init(ctx, (const BYTE*) source);
     return LZ4HC_compress_generic(ctx, source, dest, sourceSizePtr, targetDestSize, cLevel, limitedDestSize);
 }
@@ -710,8 +866,15 @@
 *  Streaming Functions
 **************************************/
 /* allocation */
-LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
-int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
+LZ4_streamHC_t* LZ4_createStreamHC(void) {
+    LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+    if (LZ4_streamHCPtr==NULL) return NULL;
+    LZ4_resetStreamHC(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+    return LZ4_streamHCPtr;
+}
+
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) {
+    DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
     if (!LZ4_streamHCPtr) return 0;  /* support free on NULL */
     free(LZ4_streamHCPtr);
     return 0;
@@ -722,36 +885,61 @@
 void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
 {
     LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= sizeof(size_t) * LZ4_STREAMHCSIZE_SIZET);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    DEBUGLOG(4, "LZ4_resetStreamHC(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1;
     LZ4_streamHCPtr->internal_donotuse.base = NULL;
+    LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+    LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
+    LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+    LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
+    LZ4_streamHCPtr->internal_donotuse.base = NULL;
+    LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
     LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
 }
 
 void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
 {
-    if (compressionLevel < 1) compressionLevel = 1;
+    if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
     if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
-    LZ4_streamHCPtr->internal_donotuse.compressionLevel = compressionLevel;
+    LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
+}
+
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
+{
+    LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
 }
 
 int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
 {
     LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize);
     if (dictSize > 64 KB) {
         dictionary += dictSize - 64 KB;
         dictSize = 64 KB;
     }
+    LZ4_resetStreamHC(LZ4_streamHCPtr, ctxPtr->compressionLevel);
     LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
     ctxPtr->end = (const BYTE*)dictionary + dictSize;
     if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
     return dictSize;
 }
 
+void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
+    working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
+}
 
 /* compression */
 
 static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
 {
-    if (ctxPtr->end >= ctxPtr->base + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
+    if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
 
     /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
     ctxPtr->lowLimit  = ctxPtr->dictLimit;
@@ -768,6 +956,7 @@
                                             limitedOutput_directive limit)
 {
     LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+    DEBUGLOG(4, "LZ4_compressHC_continue_generic(%p, %p, %d)", LZ4_streamHCPtr, src, *srcSizePtr);
     /* auto-init if forgotten */
     if (ctxPtr->base == NULL) LZ4HC_init (ctxPtr, (const BYTE*) src);
 
@@ -816,6 +1005,7 @@
 {
     LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
     int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
     if (dictSize > 64 KB) dictSize = 64 KB;
     if (dictSize < 4) dictSize = 0;
     if (dictSize > prefixSize) dictSize = prefixSize;
@@ -855,17 +1045,17 @@
 {
     LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
     if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+    LZ4_resetStreamHC((LZ4_streamHC_t*)state, ((LZ4_streamHC_t*)state)->internal_donotuse.compressionLevel);
     LZ4HC_init(ctx, (const BYTE*)inputBuffer);
-    ctx->inputBuffer = (BYTE*)inputBuffer;
     return 0;
 }
 
-void* LZ4_createHC (char* inputBuffer)
+void* LZ4_createHC (const char* inputBuffer)
 {
-    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOCATOR(1, sizeof(LZ4_streamHC_t));
+    LZ4_streamHC_t* hc4 = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
     if (hc4 == NULL) return NULL;   /* not enough memory */
+    LZ4_resetStreamHC(hc4, 0 /* compressionLevel */);
     LZ4HC_init (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
-    hc4->internal_donotuse.inputBuffer = (BYTE*)inputBuffer;
     return hc4;
 }
 
@@ -887,7 +1077,333 @@
 
 char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
 {
-    LZ4HC_CCtx_internal* const hc4 = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
-    int const dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
-    return (char*)(hc4->inputBuffer + dictSize);
+    LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data;
+    const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit;
+    LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
+    /* avoid const char * -> char * conversion warning :( */
+    return (char *)(uptrval)bufferStart;
 }
+
+
+/* ================================================
+ * LZ4 Optimal parser (levels 10-12)
+ * ===============================================*/
+typedef struct {
+    int price;
+    int off;
+    int mlen;
+    int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+    int price = litlen;
+    if (litlen >= (int)RUN_MASK)
+        price += 1 + (litlen-RUN_MASK)/255;
+    return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+    int price = 1 + 2 ; /* token + 16-bit offset */
+
+    price += LZ4HC_literalsPrice(litlen);
+
+    if (mlen >= (int)(ML_MASK+MINMATCH))
+        price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
+
+    return price;
+}
+
+
+typedef struct {
+    int off;
+    int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+                      const BYTE* ip, const BYTE* const iHighLimit,
+                      int minLen, int nbSearches,
+                      const dictCtx_directive dict,
+                      const HCfavor_e favorDecSpeed)
+{
+    LZ4HC_match_t match = { 0 , 0 };
+    const BYTE* matchPtr = NULL;
+    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+     * but this won't be the case here, as we define iLowLimit==ip,
+     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+    int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
+    if (matchLength <= minLen) return match;
+    if (favorDecSpeed) {
+        if ((matchLength>18) & (matchLength<=36)) matchLength=18;   /* favor shortcut */
+    }
+    match.len = matchLength;
+    match.off = (int)(ip-matchPtr);
+    return match;
+}
+
+
+static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
+                                    const char* const source,
+                                    char* dst,
+                                    int* srcSizePtr,
+                                    int dstCapacity,
+                                    int const nbSearches,
+                                    size_t sufficient_len,
+                                    const limitedOutput_directive limit,
+                                    int const fullUpdate,
+                                    const dictCtx_directive dict,
+                                    const HCfavor_e favorDecSpeed)
+{
+#define TRAILING_LITERALS 3
+    LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* ~64 KB, which is a bit large for stack... */
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + *srcSizePtr;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+    BYTE* op = (BYTE*) dst;
+    BYTE* opSaved = (BYTE*) dst;
+    BYTE* oend = op + dstCapacity;
+
+    /* init */
+    DEBUGLOG(5, "LZ4HC_compress_optimal");
+    *srcSizePtr = 0;
+    if (limit == limitedDestSize) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
+    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+    /* Main Loop */
+    assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
+    while (ip <= mflimit) {
+         int const llen = (int)(ip - anchor);
+         int best_mlen, best_off;
+         int cur, last_match_pos = 0;
+
+         LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+         if (firstMatch.len==0) { ip++; continue; }
+
+         if ((size_t)firstMatch.len > sufficient_len) {
+             /* good enough solution : immediate encoding */
+             int const firstML = firstMatch.len;
+             const BYTE* const matchPos = ip - firstMatch.off;
+             opSaved = op;
+             if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
+                 goto _dest_overflow;
+             continue;
+         }
+
+         /* set prices for first positions (literals) */
+         {   int rPos;
+             for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+                 int const cost = LZ4HC_literalsPrice(llen + rPos);
+                 opt[rPos].mlen = 1;
+                 opt[rPos].off = 0;
+                 opt[rPos].litlen = llen + rPos;
+                 opt[rPos].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             rPos, cost, opt[rPos].litlen);
+         }   }
+         /* set prices using initial match */
+         {   int mlen = MINMATCH;
+             int const matchML = firstMatch.len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
+             int const offset = firstMatch.off;
+             assert(matchML < LZ4_OPT_NUM);
+             for ( ; mlen <= matchML ; mlen++) {
+                 int const cost = LZ4HC_sequencePrice(llen, mlen);
+                 opt[mlen].mlen = mlen;
+                 opt[mlen].off = offset;
+                 opt[mlen].litlen = llen;
+                 opt[mlen].price = cost;
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+                             mlen, cost, mlen);
+         }   }
+         last_match_pos = firstMatch.len;
+         {   int addLit;
+             for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                 opt[last_match_pos+addLit].mlen = 1; /* literal */
+                 opt[last_match_pos+addLit].off = 0;
+                 opt[last_match_pos+addLit].litlen = addLit;
+                 opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                 DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+                             last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+         }   }
+
+         /* check further positions */
+         for (cur = 1; cur < last_match_pos; cur++) {
+             const BYTE* const curPtr = ip + cur;
+             LZ4HC_match_t newMatch;
+
+             if (curPtr > mflimit) break;
+             DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+                     cur, opt[cur].price, opt[cur+1].price, cur+1);
+             if (fullUpdate) {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if ( (opt[cur+1].price <= opt[cur].price)
+                   /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+                   && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+                     continue;
+             } else {
+                 /* not useful to search here if next position has same (or lower) cost */
+                 if (opt[cur+1].price <= opt[cur].price) continue;
+             }
+
+             DEBUGLOG(7, "search at rPos:%u", cur);
+             if (fullUpdate)
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+             else
+                 /* only test matches of minimum length; slightly faster, but misses a few bytes */
+                 newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
+             if (!newMatch.len) continue;
+
+             if ( ((size_t)newMatch.len > sufficient_len)
+               || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+                 /* immediate encoding */
+                 best_mlen = newMatch.len;
+                 best_off = newMatch.off;
+                 last_match_pos = cur + 1;
+                 goto encode;
+             }
+
+             /* before match : set price with literals at beginning */
+             {   int const baseLitlen = opt[cur].litlen;
+                 int litlen;
+                 for (litlen = 1; litlen < MINMATCH; litlen++) {
+                     int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+                     int const pos = cur + litlen;
+                     if (price < opt[pos].price) {
+                         opt[pos].mlen = 1; /* literal */
+                         opt[pos].off = 0;
+                         opt[pos].litlen = baseLitlen+litlen;
+                         opt[pos].price = price;
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+                                     pos, price, opt[pos].litlen);
+             }   }   }
+
+             /* set prices using match at position = cur */
+             {   int const matchML = newMatch.len;
+                 int ml = MINMATCH;
+
+                 assert(cur + newMatch.len < LZ4_OPT_NUM);
+                 for ( ; ml <= matchML ; ml++) {
+                     int const pos = cur + ml;
+                     int const offset = newMatch.off;
+                     int price;
+                     int ll;
+                     DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+                                 pos, last_match_pos);
+                     if (opt[cur].mlen == 1) {
+                         ll = opt[cur].litlen;
+                         price = ((cur > ll) ? opt[cur - ll].price : 0)
+                               + LZ4HC_sequencePrice(ll, ml);
+                     } else {
+                         ll = 0;
+                         price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+                     }
+
+                    assert((U32)favorDecSpeed <= 1);
+                     if (pos > last_match_pos+TRAILING_LITERALS
+                      || price <= opt[pos].price - (int)favorDecSpeed) {
+                         DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+                                     pos, price, ml);
+                         assert(pos < LZ4_OPT_NUM);
+                         if ( (ml == matchML)  /* last pos of last match */
+                           && (last_match_pos < pos) )
+                             last_match_pos = pos;
+                         opt[pos].mlen = ml;
+                         opt[pos].off = offset;
+                         opt[pos].litlen = ll;
+                         opt[pos].price = price;
+             }   }   }
+             /* complete following positions with literals */
+             {   int addLit;
+                 for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+                     opt[last_match_pos+addLit].mlen = 1; /* literal */
+                     opt[last_match_pos+addLit].off = 0;
+                     opt[last_match_pos+addLit].litlen = addLit;
+                     opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+                     DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+             }   }
+         }  /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+         best_mlen = opt[last_match_pos].mlen;
+         best_off = opt[last_match_pos].off;
+         cur = last_match_pos - best_mlen;
+
+ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+         assert(cur < LZ4_OPT_NUM);
+         assert(last_match_pos >= 1);  /* == 1 when only one candidate */
+         DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
+         {   int candidate_pos = cur;
+             int selected_matchLength = best_mlen;
+             int selected_offset = best_off;
+             while (1) {  /* from end to beginning */
+                 int const next_matchLength = opt[candidate_pos].mlen;  /* can be 1, means literal */
+                 int const next_offset = opt[candidate_pos].off;
+                 DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+                 opt[candidate_pos].mlen = selected_matchLength;
+                 opt[candidate_pos].off = selected_offset;
+                 selected_matchLength = next_matchLength;
+                 selected_offset = next_offset;
+                 if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+                 assert(next_matchLength > 0);  /* can be 1, means literal */
+                 candidate_pos -= next_matchLength;
+         }   }
+
+         /* encode all recorded sequences in order */
+         {   int rPos = 0;  /* relative position (to ip) */
+             while (rPos < last_match_pos) {
+                 int const ml = opt[rPos].mlen;
+                 int const offset = opt[rPos].off;
+                 if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
+                 rPos += ml;
+                 assert(ml >= MINMATCH);
+                 assert((offset >= 1) && (offset <= MAX_DISTANCE));
+                 opSaved = op;
+                 if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
+                     goto _dest_overflow;
+         }   }
+     }  /* while (ip <= mflimit) */
+
+ _last_literals:
+     /* Encode Last Literals */
+     {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+         size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+         size_t const totalSize = 1 + litLength + lastRunSize;
+         if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+         if (limit && (op + totalSize > oend)) {
+             if (limit == limitedOutput) return 0;  /* Check output limit */
+             /* adapt lastRunSize to fill 'dst' */
+             lastRunSize  = (size_t)(oend - op) - 1;
+             litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+             lastRunSize -= litLength;
+         }
+         ip = anchor + lastRunSize;
+
+         if (lastRunSize >= RUN_MASK) {
+             size_t accumulator = lastRunSize - RUN_MASK;
+             *op++ = (RUN_MASK << ML_BITS);
+             for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+             *op++ = (BYTE) accumulator;
+         } else {
+             *op++ = (BYTE)(lastRunSize << ML_BITS);
+         }
+         memcpy(op, anchor, lastRunSize);
+         op += lastRunSize;
+     }
+
+     /* End */
+     *srcSizePtr = (int) (((const char*)ip) - source);
+     return (int) ((char*)op-dst);
+
+ _dest_overflow:
+     if (limit == limitedDestSize) {
+         op = opSaved;  /* restore correct out pointer */
+         goto _last_literals;
+     }
+     return 0;
+ }

diff --git a/lib/lz4hc.h b/lib/lz4hc.h
index d41bf42..bb5e073 100644
--- a/lib/lz4hc.h
+++ b/lib/lz4hc.h

@@ -141,35 +141,39 @@
 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #include <stdint.h>
 
-typedef struct
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
 {
     uint32_t   hashTable[LZ4HC_HASHTABLESIZE];
     uint16_t   chainTable[LZ4HC_MAXD];
     const uint8_t* end;         /* next block here to continue on current prefix */
     const uint8_t* base;        /* All index relative to this position */
     const uint8_t* dictBase;    /* alternate base for extDict */
-    uint8_t* inputBuffer;       /* deprecated */
     uint32_t   dictLimit;       /* below that point, need extDict */
     uint32_t   lowLimit;        /* below that point, no more dict */
     uint32_t   nextToUpdate;    /* index from which to continue dictionary update */
-    int        compressionLevel;
-} LZ4HC_CCtx_internal;
+    short      compressionLevel;
+    short      favorDecSpeed;
+    const LZ4HC_CCtx_internal* dictCtx;
+};
 
 #else
 
-typedef struct
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
 {
     unsigned int   hashTable[LZ4HC_HASHTABLESIZE];
     unsigned short chainTable[LZ4HC_MAXD];
     const unsigned char* end;        /* next block here to continue on current prefix */
     const unsigned char* base;       /* All index relative to this position */
     const unsigned char* dictBase;   /* alternate base for extDict */
-    unsigned char* inputBuffer;      /* deprecated */
     unsigned int   dictLimit;        /* below that point, need extDict */
     unsigned int   lowLimit;         /* below that point, no more dict */
     unsigned int   nextToUpdate;     /* index from which to continue dictionary update */
-    int            compressionLevel;
-} LZ4HC_CCtx_internal;
+    short          compressionLevel;
+    short          favorDecSpeed;
+    const LZ4HC_CCtx_internal* dictCtx;
+};
 
 #endif
 
@@ -195,25 +199,32 @@
 /* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
 
 /* deprecated compression functions */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC               (const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC               (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
 
-/* Deprecated Streaming functions using older model; should no longer be used */
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
-LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int   LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
 
 
 #if defined (__cplusplus)
@@ -244,9 +255,9 @@
  * `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
  */
 int LZ4_compress_HC_destSize(void* LZ4HC_Data,
-                            const char* src, char* dst,
-                            int* srcSizePtr, int targetDstSize,
-                            int compressionLevel);
+                             const char* src, char* dst,
+                             int* srcSizePtr, int targetDstSize,
+                             int compressionLevel);
 
 /*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
  *  Similar as LZ4_compress_HC_continue(),
@@ -266,7 +277,71 @@
  */
 void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
 
+/*! LZ4_favorDecompressionSpeed() : v1.8.2 (experimental)
+ *  Parser will select decisions favoring decompression over compression ratio.
+ *  Only work at highest compression settings (level >= LZ4HC_CLEVEL_OPT_MIN)
+ */
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
 
+/*! LZ4_resetStreamHC_fast() :
+ *  When an LZ4_streamHC_t is known to be in a internally coherent state,
+ *  it can often be prepared for a new compression with almost no work, only
+ *  sometimes falling back to the full, expensive reset that is always required
+ *  when the stream is in an indeterminate state (i.e., the reset performed by
+ *  LZ4_resetStreamHC()).
+ *
+ *  LZ4_streamHCs are guaranteed to be in a valid state when:
+ *  - returned from LZ4_createStreamHC()
+ *  - reset by LZ4_resetStreamHC()
+ *  - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ *  - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ *  - the stream was in a valid state and was then used in any compression call
+ *    that returned success
+ *  - the stream was in an indeterminate state and was used in a compression
+ *    call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ *    returned success
+ */
+void LZ4_resetStreamHC_fast(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ *  A variant of LZ4_compress_HC_extStateHC().
+ *
+ *  Using this variant avoids an expensive initialization step. It is only safe
+ *  to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ *  "correctly initialized"). From a high level, the difference is that this
+ *  function initializes the provided state with a call to
+ *  LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ *  call to LZ4_resetStreamHC().
+ */
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+
+/*! LZ4_attach_HC_dictionary() :
+ *  This is an experimental API that allows for the efficient use of a
+ *  static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ *  working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionary stream pointer may be NULL, in which
+ *  case any existing dictionary stream is unset.
+ *
+ *  A dictionary should only be attached to a stream without any history (i.e.,
+ *  a stream that has just been reset).
+ *
+ *  The dictionary will remain attached to the working stream only for the
+ *  current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ *  dictionary context association from the working stream. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the lifetime of the stream session.
+ */
+LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream);
 
 #endif   /* LZ4_HC_SLO_098092834 */
 #endif   /* LZ4_HC_STATIC_LINKING_ONLY */

diff --git a/lib/lz4opt.h b/lib/lz4opt.h
deleted file mode 100644
index 5a8438c..0000000
--- a/lib/lz4opt.h
+++ /dev/null

@@ -1,356 +0,0 @@
-/*
-    lz4opt.h - Optimal Mode of LZ4
-    Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com>
-    Note : this file is intended to be included within lz4hc.c
-
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are
-    met:
-
-    * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-    copyright notice, this list of conditions and the following disclaimer
-    in the documentation and/or other materials provided with the
-    distribution.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    You can contact the author at :
-       - LZ4 source repository : https://github.com/lz4/lz4
-       - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-#define LZ4_OPT_NUM   (1<<12)
-
-typedef struct {
-    int price;
-    int off;
-    int mlen;
-    int litlen;
-} LZ4HC_optimal_t;
-
-
-/* price in bytes */
-LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
-{
-    int price = litlen;
-    if (litlen >= (int)RUN_MASK)
-        price += 1 + (litlen-RUN_MASK)/255;
-    return price;
-}
-
-
-/* requires mlen >= MINMATCH */
-LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
-{
-    int price = 1 + 2 ; /* token + 16-bit offset */
-
-    price += LZ4HC_literalsPrice(litlen);
-
-    if (mlen >= (int)(ML_MASK+MINMATCH))
-        price += 1 + (mlen-(ML_MASK+MINMATCH))/255;
-
-    return price;
-}
-
-
-/*-*************************************
-*  Match finder
-***************************************/
-typedef struct {
-    int off;
-    int len;
-} LZ4HC_match_t;
-
-LZ4_FORCE_INLINE
-LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
-                        const BYTE* ip, const BYTE* const iHighLimit,
-                        int minLen, int nbSearches)
-{
-    LZ4HC_match_t match = { 0 , 0 };
-    const BYTE* matchPtr = NULL;
-    /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
-     * but this won't be the case here, as we define iLowLimit==ip,
-     * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
-    int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx,
-                                ip, ip, iHighLimit, minLen, &matchPtr, &ip,
-                                nbSearches, 1 /* patternAnalysis */);
-    if (matchLength <= minLen) return match;
-    match.len = matchLength;
-    match.off = (int)(ip-matchPtr);
-    return match;
-}
-
-
-static int LZ4HC_compress_optimal (
-    LZ4HC_CCtx_internal* ctx,
-    const char* const source,
-    char* dst,
-    int* srcSizePtr,
-    int dstCapacity,
-    int const nbSearches,
-    size_t sufficient_len,
-    limitedOutput_directive limit,
-    int const fullUpdate
-    )
-{
-#define TRAILING_LITERALS 3
-    LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS];   /* this uses a bit too much stack memory to my taste ... */
-
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + *srcSizePtr;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-    BYTE* op = (BYTE*) dst;
-    BYTE* opSaved = (BYTE*) dst;
-    BYTE* oend = op + dstCapacity;
-
-    /* init */
-    DEBUGLOG(5, "LZ4HC_compress_optimal");
-    *srcSizePtr = 0;
-    if (limit == limitedDestSize) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
-    if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
-
-    /* Main Loop */
-    assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
-    while (ip < mflimit) {
-        int const llen = (int)(ip - anchor);
-        int best_mlen, best_off;
-        int cur, last_match_pos = 0;
-
-        LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches);
-        if (firstMatch.len==0) { ip++; continue; }
-
-        if ((size_t)firstMatch.len > sufficient_len) {
-            /* good enough solution : immediate encoding */
-            int const firstML = firstMatch.len;
-            const BYTE* const matchPos = ip - firstMatch.off;
-            opSaved = op;
-            if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
-                goto _dest_overflow;
-            continue;
-        }
-
-        /* set prices for first positions (literals) */
-        {   int rPos;
-            for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
-                int const cost = LZ4HC_literalsPrice(llen + rPos);
-                opt[rPos].mlen = 1;
-                opt[rPos].off = 0;
-                opt[rPos].litlen = llen + rPos;
-                opt[rPos].price = cost;
-                DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
-                            rPos, cost, opt[rPos].litlen);
-        }   }
-        /* set prices using initial match */
-        {   int mlen = MINMATCH;
-            int const matchML = firstMatch.len;   /* necessarily < sufficient_len < LZ4_OPT_NUM */
-            int const offset = firstMatch.off;
-            assert(matchML < LZ4_OPT_NUM);
-            for ( ; mlen <= matchML ; mlen++) {
-                int const cost = LZ4HC_sequencePrice(llen, mlen);
-                opt[mlen].mlen = mlen;
-                opt[mlen].off = offset;
-                opt[mlen].litlen = llen;
-                opt[mlen].price = cost;
-                DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
-                            mlen, cost, mlen);
-        }   }
-        last_match_pos = firstMatch.len;
-        {   int addLit;
-            for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
-                opt[last_match_pos+addLit].mlen = 1; /* literal */
-                opt[last_match_pos+addLit].off = 0;
-                opt[last_match_pos+addLit].litlen = addLit;
-                opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
-                DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
-                            last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
-        }   }
-
-        /* check further positions */
-        for (cur = 1; cur < last_match_pos; cur++) {
-            const BYTE* const curPtr = ip + cur;
-            LZ4HC_match_t newMatch;
-
-            if (curPtr >= mflimit) break;
-            DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
-                    cur, opt[cur].price, opt[cur+1].price, cur+1);
-            if (fullUpdate) {
-                /* not useful to search here if next position has same (or lower) cost */
-                if ( (opt[cur+1].price <= opt[cur].price)
-                  /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
-                  && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
-                    continue;
-            } else {
-                /* not useful to search here if next position has same (or lower) cost */
-                if (opt[cur+1].price <= opt[cur].price) continue;
-            }
-
-            DEBUGLOG(7, "search at rPos:%u", cur);
-            if (fullUpdate)
-                newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches);
-            else
-                /* only test matches of minimum length; slightly faster, but misses a few bytes */
-                newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches);
-            if (!newMatch.len) continue;
-
-            if ( ((size_t)newMatch.len > sufficient_len)
-              || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
-                /* immediate encoding */
-                best_mlen = newMatch.len;
-                best_off = newMatch.off;
-                last_match_pos = cur + 1;
-                goto encode;
-            }
-
-            /* before match : set price with literals at beginning */
-            {   int const baseLitlen = opt[cur].litlen;
-                int litlen;
-                for (litlen = 1; litlen < MINMATCH; litlen++) {
-                    int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
-                    int const pos = cur + litlen;
-                    if (price < opt[pos].price) {
-                        opt[pos].mlen = 1; /* literal */
-                        opt[pos].off = 0;
-                        opt[pos].litlen = baseLitlen+litlen;
-                        opt[pos].price = price;
-                        DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
-                                    pos, price, opt[pos].litlen);
-            }   }   }
-
-            /* set prices using match at position = cur */
-            {   int const matchML = newMatch.len;
-                int ml = MINMATCH;
-
-                assert(cur + newMatch.len < LZ4_OPT_NUM);
-                for ( ; ml <= matchML ; ml++) {
-                    int const pos = cur + ml;
-                    int const offset = newMatch.off;
-                    int price;
-                    int ll;
-                    DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
-                                pos, last_match_pos);
-                    if (opt[cur].mlen == 1) {
-                        ll = opt[cur].litlen;
-                        price = ((cur > ll) ? opt[cur - ll].price : 0)
-                              + LZ4HC_sequencePrice(ll, ml);
-                    } else {
-                        ll = 0;
-                        price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
-                    }
-
-                    if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) {
-                        DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
-                                    pos, price, ml);
-                        assert(pos < LZ4_OPT_NUM);
-                        if ( (ml == matchML)  /* last pos of last match */
-                          && (last_match_pos < pos) )
-                            last_match_pos = pos;
-                        opt[pos].mlen = ml;
-                        opt[pos].off = offset;
-                        opt[pos].litlen = ll;
-                        opt[pos].price = price;
-            }   }   }
-            /* complete following positions with literals */
-            {   int addLit;
-                for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
-                    opt[last_match_pos+addLit].mlen = 1; /* literal */
-                    opt[last_match_pos+addLit].off = 0;
-                    opt[last_match_pos+addLit].litlen = addLit;
-                    opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
-                    DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
-            }   }
-        }  /* for (cur = 1; cur <= last_match_pos; cur++) */
-
-        best_mlen = opt[last_match_pos].mlen;
-        best_off = opt[last_match_pos].off;
-        cur = last_match_pos - best_mlen;
-
-encode: /* cur, last_match_pos, best_mlen, best_off must be set */
-        assert(cur < LZ4_OPT_NUM);
-        assert(last_match_pos >= 1);  /* == 1 when only one candidate */
-        DEBUGLOG(6, "reverse traversal, looking for shortest path")
-        DEBUGLOG(6, "last_match_pos = %i", last_match_pos);
-        {   int candidate_pos = cur;
-            int selected_matchLength = best_mlen;
-            int selected_offset = best_off;
-            while (1) {  /* from end to beginning */
-                int const next_matchLength = opt[candidate_pos].mlen;  /* can be 1, means literal */
-                int const next_offset = opt[candidate_pos].off;
-                DEBUGLOG(6, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
-                opt[candidate_pos].mlen = selected_matchLength;
-                opt[candidate_pos].off = selected_offset;
-                selected_matchLength = next_matchLength;
-                selected_offset = next_offset;
-                if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
-                assert(next_matchLength > 0);  /* can be 1, means literal */
-                candidate_pos -= next_matchLength;
-        }   }
-
-        /* encode all recorded sequences in order */
-        {   int rPos = 0;  /* relative position (to ip) */
-            while (rPos < last_match_pos) {
-                int const ml = opt[rPos].mlen;
-                int const offset = opt[rPos].off;
-                if (ml == 1) { ip++; rPos++; continue; }  /* literal; note: can end up with several literals, in which case, skip them */
-                rPos += ml;
-                assert(ml >= MINMATCH);
-                assert((offset >= 1) && (offset <= MAX_DISTANCE));
-                opSaved = op;
-                if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
-                    goto _dest_overflow;
-        }   }
-    }  /* while (ip < mflimit) */
-
-_last_literals:
-    /* Encode Last Literals */
-    {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
-        size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
-        size_t const totalSize = 1 + litLength + lastRunSize;
-        if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
-        if (limit && (op + totalSize > oend)) {
-            if (limit == limitedOutput) return 0;  /* Check output limit */
-            /* adapt lastRunSize to fill 'dst' */
-            lastRunSize  = (size_t)(oend - op) - 1;
-            litLength = (lastRunSize + 255 - RUN_MASK) / 255;
-            lastRunSize -= litLength;
-        }
-        ip = anchor + lastRunSize;
-
-        if (lastRunSize >= RUN_MASK) {
-            size_t accumulator = lastRunSize - RUN_MASK;
-            *op++ = (RUN_MASK << ML_BITS);
-            for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        } else {
-            *op++ = (BYTE)(lastRunSize << ML_BITS);
-        }
-        memcpy(op, anchor, lastRunSize);
-        op += lastRunSize;
-    }
-
-    /* End */
-    *srcSizePtr = (int) (((const char*)ip) - source);
-    return (int) ((char*)op-dst);
-
-_dest_overflow:
-    if (limit == limitedDestSize) {
-        op = opSaved;  /* restore correct out pointer */
-        goto _last_literals;
-    }
-    return 0;
-}

diff --git a/lib/xxhash.c b/lib/xxhash.c
index bcf1f1d..3fc97fd 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c

@@ -52,7 +52,7 @@
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || \
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif

diff --git a/programs/Makefile b/programs/Makefile
index a51bd4b..72bdcaa 100644
--- a/programs/Makefile
+++ b/programs/Makefile

@@ -120,21 +120,19 @@
 DESTDIR     ?=
 # directory variables : GNU conventions prefer lowercase
 # see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
-# support both lower and uppercase (BSD), use uppercase in script
-prefix      ?= /usr/local
-PREFIX      ?= $(prefix)
-exec_prefix ?= $(PREFIX)
-bindir      ?= $(exec_prefix)/bin
-BINDIR      ?= $(bindir)
-datarootdir ?= $(PREFIX)/share
-mandir      ?= $(datarootdir)/man
-man1dir     ?= $(mandir)/man1
-
-ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS))
-MANDIR  ?= $(PREFIX)/man/man1
-else
-MANDIR  ?= $(man1dir)
-endif
+# support both lower and uppercase (BSD), use lowercase in script
+PREFIX      ?= /usr/local
+prefix      ?= $(PREFIX)
+EXEC_PREFIX ?= $(prefix)
+exec_prefix ?= $(EXEC_PREFIX)
+BINDIR      ?= $(exec_prefix)/bin
+bindir      ?= $(BINDIR)
+DATAROOTDIR ?= $(prefix)/share
+datarootdir ?= $(DATAROOTDIR)
+MANDIR      ?= $(datarootdir)/man
+mandir      ?= $(MANDIR)
+MAN1DIR     ?= $(mandir)/man1
+man1dir     ?= $(MAN1DIR)
 
 ifneq (,$(filter $(shell uname),SunOS))
 INSTALL ?= ginstall
@@ -148,27 +146,27 @@
 
 install: lz4
 	@echo Installing binaries
-	@$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/
-	@$(INSTALL_PROGRAM) lz4 $(DESTDIR)$(BINDIR)/lz4
-	@ln -sf lz4 $(DESTDIR)$(BINDIR)/lz4c
-	@ln -sf lz4 $(DESTDIR)$(BINDIR)/lz4cat
-	@ln -sf lz4 $(DESTDIR)$(BINDIR)/unlz4
+	@$(INSTALL) -d -m 755 $(DESTDIR)$(bindir)/ $(DESTDIR)$(man1dir)/
+	@$(INSTALL_PROGRAM) lz4 $(DESTDIR)$(bindir)/lz4
+	@ln -sf lz4 $(DESTDIR)$(bindir)/lz4c
+	@ln -sf lz4 $(DESTDIR)$(bindir)/lz4cat
+	@ln -sf lz4 $(DESTDIR)$(bindir)/unlz4
 	@echo Installing man pages
-	@$(INSTALL_DATA) lz4.1 $(DESTDIR)$(MANDIR)/lz4.1
-	@ln -sf lz4.1 $(DESTDIR)$(MANDIR)/lz4c.1
-	@ln -sf lz4.1 $(DESTDIR)$(MANDIR)/lz4cat.1
-	@ln -sf lz4.1 $(DESTDIR)$(MANDIR)/unlz4.1
+	@$(INSTALL_DATA) lz4.1 $(DESTDIR)$(man1dir)/lz4.1
+	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4c.1
+	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/lz4cat.1
+	@ln -sf lz4.1 $(DESTDIR)$(man1dir)/unlz4.1
 	@echo lz4 installation completed
 
 uninstall:
-	@$(RM) $(DESTDIR)$(BINDIR)/lz4cat
-	@$(RM) $(DESTDIR)$(BINDIR)/unlz4
-	@$(RM) $(DESTDIR)$(BINDIR)/lz4
-	@$(RM) $(DESTDIR)$(BINDIR)/lz4c
-	@$(RM) $(DESTDIR)$(MANDIR)/lz4.1
-	@$(RM) $(DESTDIR)$(MANDIR)/lz4c.1
-	@$(RM) $(DESTDIR)$(MANDIR)/lz4cat.1
-	@$(RM) $(DESTDIR)$(MANDIR)/unlz4.1
+	@$(RM) $(DESTDIR)$(bindir)/lz4cat
+	@$(RM) $(DESTDIR)$(bindir)/unlz4
+	@$(RM) $(DESTDIR)$(bindir)/lz4
+	@$(RM) $(DESTDIR)$(bindir)/lz4c
+	@$(RM) $(DESTDIR)$(man1dir)/lz4.1
+	@$(RM) $(DESTDIR)$(man1dir)/lz4c.1
+	@$(RM) $(DESTDIR)$(man1dir)/lz4cat.1
+	@$(RM) $(DESTDIR)$(man1dir)/unlz4.1
 	@echo lz4 programs successfully uninstalled
 
 endif

diff --git a/programs/bench.c b/programs/bench.c
index fac2a87..770191c 100644
--- a/programs/bench.c
+++ b/programs/bench.c

@@ -41,6 +41,7 @@
 #include <string.h>      /* memset */
 #include <stdio.h>       /* fprintf, fopen, ftello */
 #include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h>      /* assert */
 
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"
@@ -66,6 +67,7 @@
 
 #define NBSECONDS             3
 #define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
+#define TIMELOOP_NANOSEC      1*1000000000ULL /* 1 second */
 #define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
 #define COOLPERIOD_SEC        10
 #define DECOMP_MULT           1 /* test decompression DECOMP_MULT times longer than compression */
@@ -117,21 +119,21 @@
 static U32 g_nbSeconds = NBSECONDS;
 static size_t g_blockSize = 0;
 int g_additionalParam = 0;
+int g_benchSeparately = 0;
 
 void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 
 void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
 
-void BMK_SetNbSeconds(unsigned nbSeconds)
+void BMK_setNbSeconds(unsigned nbSeconds)
 {
     g_nbSeconds = nbSeconds;
     DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
 }
 
-void BMK_SetBlockSize(size_t blockSize)
-{
-    g_blockSize = blockSize;
-}
+void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; }
+
+void BMK_setBenchSeparately(int separate) { g_benchSeparately = (separate!=0); }
 
 
 /* ********************************************************
@@ -218,7 +220,9 @@
     {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
         U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
         UTIL_time_t coolTime;
-        U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 100;
+        U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 100;
+        U32 nbCompressionLoops = (U32)((5 MB) / (srcSize+1)) + 1;  /* conservative initial compression speed estimate */
+        U32 nbDecodeLoops = (U32)((200 MB) / (srcSize+1)) + 1;  /* conservative initial decode speed estimate */
         U64 totalCTime=0, totalDTime=0;
         U32 cCompleted=0, dCompleted=0;
 #       define NB_MARKS 4
@@ -230,9 +234,6 @@
         coolTime = UTIL_getTime();
         DISPLAYLEVEL(2, "\r%79s\r", "");
         while (!cCompleted || !dCompleted) {
-            UTIL_time_t clockStart;
-            U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
-
             /* overheat protection */
             if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
@@ -246,21 +247,27 @@
 
             UTIL_sleepMilli(1);  /* give processor time to other processes */
             UTIL_waitForNextTick();
-            clockStart = UTIL_getTime();
 
             if (!cCompleted) {   /* still some time to do compression tests */
-                U32 nbLoops = 0;
-                do {
+                UTIL_time_t const clockStart = UTIL_getTime();
+                U32 nbLoops;
+                for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
                         size_t const rSize = compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel);
                         if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4_compress() failed");
                         blockTable[blockNb].cSize = rSize;
+                }   }
+                {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
+                    if (clockSpan > 0) {
+                        if (clockSpan < fastestC * nbCompressionLoops)
+                            fastestC = clockSpan / nbCompressionLoops;
+                        assert(fastestC > 0);
+                        nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;  /* aim for ~1sec */
+                    } else {
+                        assert(nbCompressionLoops < 40000000);   /* avoid overflow */
+                        nbCompressionLoops *= 100;
                     }
-                    nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
-                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart);
-                    if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops;
                     totalCTime += clockSpan;
                     cCompleted = totalCTime>maxTime;
             }   }
@@ -272,44 +279,48 @@
             markNb = (markNb+1) % NB_MARKS;
             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
                     marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
-                    (double)srcSize / fastestC );
+                    ((double)srcSize / fastestC) * 1000 );
 
             (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
 #if 1
             /* Decompression */
             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
 
-            UTIL_sleepMilli(1); /* give processor time to other processes */
+            UTIL_sleepMilli(5); /* give processor time to other processes */
             UTIL_waitForNextTick();
-            clockStart = UTIL_getTime();
 
             if (!dCompleted) {
-                U32 nbLoops = 0;
-                do {
+                UTIL_time_t const clockStart = UTIL_getTime();
+                U32 nbLoops;
+                for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
                         size_t const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize);
                         if (LZ4_isError(regenSize)) {
-                            DISPLAY("LZ4_decompress_safe() failed on block %u  \n", blockNb);
-                            clockLoop = 0;   /* force immediate test end */
+                            DISPLAY("LZ4_decompress_safe() failed on block %u \n", blockNb);
                             break;
                         }
-
                         blockTable[blockNb].resSize = regenSize;
+                }   }
+                {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
+                    if (clockSpan > 0) {
+                        if (clockSpan < fastestD * nbDecodeLoops)
+                            fastestD = clockSpan / nbDecodeLoops;
+                        assert(fastestD > 0);
+                        nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;  /* aim for ~1sec */
+                    } else {
+                        assert(nbDecodeLoops < 40000000);   /* avoid overflow */
+                        nbDecodeLoops *= 100;
                     }
-                    nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart) < DECOMP_MULT*clockLoop);
-                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart);
-                    if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
                     totalDTime += clockSpan;
-                    dCompleted = totalDTime>(DECOMP_MULT*maxTime);
+                    dCompleted = totalDTime > (DECOMP_MULT*maxTime);
             }   }
 
             markNb = (markNb+1) % NB_MARKS;
             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
                     marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
-                    (double)srcSize / fastestC,
-                    (double)srcSize / fastestD );
+                    ((double)srcSize / fastestC) * 1000,
+                    ((double)srcSize / fastestD) * 1000);
 
             /* CRC Checking */
             {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
@@ -339,8 +350,8 @@
         }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
 
         if (g_displayLevel == 1) {
-            double cSpeed = (double)srcSize / fastestC;
-            double dSpeed = (double)srcSize / fastestD;
+            double const cSpeed = ((double)srcSize / fastestC) * 1000;
+            double const dSpeed = ((double)srcSize / fastestD) * 1000;
             if (g_additionalParam)
                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
             else
@@ -504,6 +515,22 @@
 }
 
 
+int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles,
+                   int cLevel, int cLevelLast)
+{
+    unsigned fileNb;
+    if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
+    if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
+    if (cLevelLast < cLevel) cLevelLast = cLevel;
+    if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
+
+    for (fileNb=0; fileNb<nbFiles; fileNb++)
+        BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast);
+
+    return 0;
+}
+
+
 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
                    int cLevel, int cLevelLast)
 {
@@ -516,7 +543,11 @@
 
     if (nbFiles == 0)
         BMK_syntheticTest(cLevel, cLevelLast, compressibility);
-    else
-        BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
+    else {
+        if (g_benchSeparately)
+            BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast);
+        else
+            BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
+    }
     return 0;
 }

diff --git a/programs/bench.h b/programs/bench.h
index 15def93..bb67bee 100644
--- a/programs/bench.h
+++ b/programs/bench.h

@@ -29,9 +29,10 @@
                    int cLevel, int cLevelLast);
 
 /* Set Parameters */
-void BMK_SetNbSeconds(unsigned nbLoops);
-void BMK_SetBlockSize(size_t blockSize);
+void BMK_setNbSeconds(unsigned nbLoops);
+void BMK_setBlockSize(size_t blockSize);
 void BMK_setAdditionalParam(int additionalParam);
 void BMK_setNotificationLevel(unsigned level);
+void BMK_setBenchSeparately(int separate);
 
 #endif   /* BENCH_H_125623623633 */

diff --git a/programs/lz4.1.md b/programs/lz4.1.md
index b7b8570..a5168e9 100644
--- a/programs/lz4.1.md
+++ b/programs/lz4.1.md

@@ -117,9 +117,9 @@
 ### Operation modifiers
 
 * `-#`:
-  Compression level, with # being any value from 1 to 16.
+  Compression level, with # being any value from 1 to 12.
   Higher values trade compression speed for compression ratio.
-  Values above 16 are considered the same as 16.
+  Values above 12 are considered the same as 12.
   Recommended values are 1 for fast compression (default),
   and 9 for high compression.
   Speed/compression trade-off will vary depending on data to compress.
@@ -206,7 +206,7 @@
   Benchmark multiple compression levels, from b# to e# (included)
 
 * `-i#`:
-  Minimum evaluation in seconds \[1-9\] (default : 3)
+  Minimum evaluation time in seconds \[1-9\] (default : 3)
 
 
 BUGS

diff --git a/programs/lz4cli.c b/programs/lz4cli.c
index 362ba49..ba519b4 100644
--- a/programs/lz4cli.c
+++ b/programs/lz4cli.c

@@ -140,6 +140,7 @@
     DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n");
     DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n");
     DISPLAY( "--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)\n");
+    DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n");
     DISPLAY( "Benchmark arguments : \n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
     DISPLAY( " -e#    : test all compression levels from -bX to # (default : 1)\n");
@@ -355,6 +356,7 @@
                 if (!strcmp(argument,  "--no-content-size")) { LZ4IO_setContentSize(0); continue; }
                 if (!strcmp(argument,  "--sparse")) { LZ4IO_setSparseFile(2); continue; }
                 if (!strcmp(argument,  "--no-sparse")) { LZ4IO_setSparseFile(0); continue; }
+                if (!strcmp(argument,  "--favor-decSpeed")) { LZ4IO_favorDecSpeed(1); continue; }
                 if (!strcmp(argument,  "--verbose")) { displayLevel++; continue; }
                 if (!strcmp(argument,  "--quiet")) { if (displayLevel) displayLevel--; continue; }
                 if (!strcmp(argument,  "--version")) { DISPLAY(WELCOME_MESSAGE); return 0; }
@@ -458,11 +460,11 @@
                                 if (B < 4) badusage(exeName);
                                 if (B <= 7) {
                                     blockSize = LZ4IO_setBlockSizeID(B);
-                                    BMK_SetBlockSize(blockSize);
+                                    BMK_setBlockSize(blockSize);
                                     DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
                                 } else {
                                     if (B < 32) badusage(exeName);
-                                    BMK_SetBlockSize(B);
+                                    BMK_setBlockSize(B);
                                     if (B >= 1024) {
                                         DISPLAYLEVEL(2, "bench: using blocks of size %u KB \n", (U32)(B>>10));
                                     } else {
@@ -480,6 +482,10 @@
                 case 'b': mode = om_bench; multiple_inputs=1;
                     break;
 
+                    /* hidden command : benchmark files, but do not fuse result */
+                case 'S': BMK_setBenchSeparately(1);
+                    break;
+
 #ifdef UTIL_HAS_CREATEFILELIST
                     /* recursive */
                 case 'r': recursive=1;
@@ -496,7 +502,7 @@
                         iters = readU32FromChar(&argument);
                         argument--;
                         BMK_setNotificationLevel(displayLevel);
-                        BMK_SetNbSeconds(iters);   /* notification if displayLevel >= 3 */
+                        BMK_setNbSeconds(iters);   /* notification if displayLevel >= 3 */
                     }
                     break;
 

diff --git a/programs/lz4io.c b/programs/lz4io.c
index 927928a..b52c1f3 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c

@@ -56,8 +56,8 @@
 #include "lz4io.h"
 #include "lz4.h"       /* still required for legacy format */
 #include "lz4hc.h"     /* still required for legacy format */
+#define LZ4F_STATIC_LINKING_ONLY
 #include "lz4frame.h"
-#include "lz4frame_static.h"
 
 
 /*****************************
@@ -94,9 +94,12 @@
 static int g_displayLevel = 0;   /* 0 : no display  ; 1: errors  ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if (((clock_t)(g_time - clock()) > refreshRate) || (g_displayLevel>=4)) \
-            { g_time = clock(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
+            if ( ((clock() - g_time) > refreshRate)    \
+              || (g_displayLevel>=4) ) {               \
+                g_time = clock();                      \
+                DISPLAY(__VA_ARGS__);                  \
+                if (g_displayLevel>=4) fflush(stderr); \
+        }   }
 static const clock_t refreshRate = CLOCKS_PER_SEC / 6;
 static clock_t g_time = 0;
 
@@ -113,6 +116,7 @@
 static int g_sparseFileSupport = 1;
 static int g_contentSizeFlag = 0;
 static int g_useDictionary = 0;
+static unsigned g_favorDecSpeed = 0;
 static const char* g_dictionaryFilename = NULL;
 
 
@@ -218,6 +222,12 @@
     return g_contentSizeFlag;
 }
 
+/* Default setting : 0 (disabled) */
+void LZ4IO_favorDecSpeed(int favor)
+{
+    g_favorDecSpeed = (favor!=0);
+}
+
 static U32 g_removeSrcFile = 0;
 void LZ4IO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); }
 
@@ -545,6 +555,7 @@
     prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)g_blockSizeId;
     prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)g_blockChecksum;
     prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)g_streamChecksum;
+    prefs.favorDecSpeed = g_favorDecSpeed;
     if (g_contentSizeFlag) {
       U64 const fileSize = UTIL_getFileSize(srcFileName);
       prefs.frameInfo.contentSize = fileSize;   /* == 0 if input == stdin */
@@ -560,7 +571,7 @@
     /* single-block file */
     if (readSize < blockSize) {
         /* Compress in single pass */
-        size_t cSize = LZ4F_compressFrame_usingCDict(dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
+        size_t cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
         if (LZ4F_isError(cSize)) EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize));
         compressedfilesize = cSize;
         DISPLAYUPDATE(2, "\rRead : %u MB   ==> %.2f%%   ",
@@ -951,7 +962,7 @@
         total += readBytes;
         storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, storedSkips);
     }
-    if (ferror(finput)) EXM_THROW(51, "Read Error")
+    if (ferror(finput)) EXM_THROW(51, "Read Error");
 
     LZ4IO_fwriteSparseEnd(foutput, storedSkips);
     return total;

diff --git a/programs/lz4io.h b/programs/lz4io.h
index b21b8b6..22c5e3e 100644
--- a/programs/lz4io.h
+++ b/programs/lz4io.h

@@ -94,10 +94,15 @@
 /* Default setting : 0 (disabled) */
 int LZ4IO_setSparseFile(int enable);
 
-/* Default setting : 0 (disabled) */
+/* Default setting : 0 == no content size present in frame header */
 int LZ4IO_setContentSize(int enable);
 
+/* Default setting : 0 == src file preserved */
 void LZ4IO_setRemoveSrcFile(unsigned flag);
 
+/* Default setting : 0 == favor compression ratio
+ * Note : 1 only works for high compression levels (10+) */
+void LZ4IO_favorDecSpeed(int favor);
+
 
 #endif  /* LZ4IO_H_237902873 */

diff --git a/programs/util.h b/programs/util.h
index fc7f63e..d74db0d 100644
--- a/programs/util.h
+++ b/programs/util.h

@@ -30,8 +30,9 @@
 *  Dependencies
 ******************************************/
 #include "platform.h"     /* PLATFORM_POSIX_VERSION */
-#include <stdlib.h>       /* malloc */
 #include <stddef.h>       /* size_t, ptrdiff_t */
+#include <stdlib.h>       /* malloc */
+#include <string.h>       /* strlen, strncpy */
 #include <stdio.h>        /* fprintf */
 #include <sys/types.h>    /* stat, utime */
 #include <sys/stat.h>     /* stat */
@@ -141,6 +142,7 @@
 *  Time functions
 ******************************************/
 #if defined(_WIN32)   /* Windows */
+
     typedef LARGE_INTEGER UTIL_time_t;
     UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
     UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
@@ -165,7 +167,9 @@
         }
         return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
     }
+
 #elif defined(__APPLE__) && defined(__MACH__)
+
     #include <mach/mach_time.h>
     typedef U64 UTIL_time_t;
     UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
@@ -177,7 +181,7 @@
             mach_timebase_info(&rate);
             init = 1;
         }
-        return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
+        return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom)) / 1000ULL;
     }
     UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
     {
@@ -189,7 +193,9 @@
         }
         return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
     }
-#elif (PLATFORM_POSIX_VERSION >= 200112L)
+
+#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || (defined(__GLIBC__) && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2) ) )
+
     #include <time.h>
     typedef struct timespec UTIL_time_t;
     UTIL_STATIC UTIL_time_t UTIL_getTime(void)
@@ -227,7 +233,9 @@
         nano += diff.tv_nsec;
         return nano;
     }
+
 #else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
+
     typedef clock_t UTIL_time_t;
     UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); }
     UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
@@ -242,6 +250,12 @@
     return UTIL_getSpanTimeMicro(clockStart, clockEnd);
 }
 
+/* returns time span in nanoseconds */
+UTIL_STATIC U64 UTIL_clockSpanNano(UTIL_time_t clockStart)
+{
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeNano(clockStart, clockEnd);
+}
 
 UTIL_STATIC void UTIL_waitForNextTick(void)
 {

diff --git a/tests/.gitignore b/tests/.gitignore
index 4c0f311..36dff42 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore

@@ -8,6 +8,7 @@
 fuzzer
 fuzzer32
 fasttest
+checkTag
 
 # test artefacts
 tmp*

diff --git a/tests/Makefile b/tests/Makefile
index 819ba43..d238561 100644
--- a/tests/Makefile
+++ b/tests/Makefile

@@ -1,8 +1,6 @@
 # ##########################################################################
 # LZ4 programs - Makefile
-# Copyright (C) Yann Collet 2011-2017
-#
-# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
+# Copyright (C) Yann Collet 2011-present
 #
 # GPL v2 License
 #
@@ -35,13 +33,14 @@
 TESTDIR := versionsTest
 PYTHON  ?= python3
 
-DEBUGFLAGS = -g -DLZ4_DEBUG=1
+DEBUGLEVEL?= 1
+DEBUGFLAGS = -g -DLZ4_DEBUG=$(DEBUGLEVEL)
 CFLAGS  ?= -O3 # can select custom optimization flags. For example : CFLAGS=-O2 make
 CFLAGS  += -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \
            -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \
            -Wpointer-arith -Wstrict-aliasing=1
 CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
-CPPFLAGS:= -I$(LZ4DIR) -I$(PRGDIR) -DXXH_NAMESPACE=LZ4_
+CPPFLAGS+= -I$(LZ4DIR) -I$(PRGDIR) -DXXH_NAMESPACE=LZ4_
 FLAGS    = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS)
 
 
@@ -53,12 +52,12 @@
 EXT  =
 VOID = /dev/null
 endif
-LZ4     := $(PRGDIR)/lz4$(EXT)
+LZ4 := $(PRGDIR)/lz4$(EXT)
 
 
 # Default test parameters
 TEST_FILES   := COPYING
-FUZZER_TIME  := -T3mn
+FUZZER_TIME  := -T90s
 NB_LOOPS     ?= -i1
 
 
@@ -72,6 +71,9 @@
 lz4:
 	$(MAKE) -C $(PRGDIR) $@ CFLAGS="$(CFLAGS)"
 
+lib liblz4.pc:
+	$(MAKE) -C $(LZ4DIR) $@ CFLAGS="$(CFLAGS)"
+
 lz4c unlz4 lz4cat: lz4
 	ln -sf $(LZ4) $(PRGDIR)/$@
 
@@ -81,7 +83,8 @@
 %.o : $(LZ4DIR)/%.c $(LZ4DIR)/%.h
 	$(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@
 
-fullbench  : lz4.o lz4hc.o lz4frame.o xxhash.o fullbench.c
+fullbench : DEBUGLEVEL=0
+fullbench : lz4.o lz4hc.o lz4frame.o xxhash.o fullbench.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 $(LZ4DIR)/liblz4.a:
@@ -111,7 +114,7 @@
         fullbench$(EXT) fullbench32$(EXT) \
         fuzzer$(EXT) fuzzer32$(EXT) \
         frametest$(EXT) frametest32$(EXT) \
-        fasttest$(EXT) datagen$(EXT)
+        fasttest$(EXT) datagen$(EXT) checkTag$(EXT)
 	@rm -fR $(TESTDIR)
 	@echo Cleaning completed
 
@@ -119,6 +122,9 @@
 versionsTest:
 	$(PYTHON) test-lz4-versions.py
 
+checkTag: checkTag.c $(LZ4DIR)/lz4.h
+	$(CC) $(FLAGS) $< -o $@$(EXT)
+
 
 #-----------------------------------------------------------------------------
 # validated only for Linux, OSX, BSD, Hurd and Solaris targets
@@ -138,11 +144,14 @@
 DD:=dd
 
 
-test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer
+test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install
 
 test32: CFLAGS+=-m32
 test32: test
 
+test-install: lz4 lib liblz4.pc
+	lz4_root=.. ./test_install.sh
+
 test-lz4-sparse: lz4 datagen
 	@echo "\n ---- test sparse file support ----"
 	./datagen -g5M  -P100 > tmplsdg5M
@@ -181,12 +190,6 @@
 	$(LZ4) -v tmplc1 | $(LZ4) -t
 	$(LZ4) -v --content-size tmplc1 | $(LZ4) -d > tmplc2
 	$(DIFF) -s tmplc1 tmplc2
-	# test large size [2-4] GB
-	@./datagen -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmplc1
-	@ls -ls tmplc1
-	@./datagen -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmplc2
-	@ls -ls tmplc2
-	$(DIFF) -s tmplc1 tmplc2
 	@$(RM) tmplc*
 
 test-lz4-frame-concatenation: lz4 datagen
@@ -290,6 +293,13 @@
 	@echo "\n ---- test huge files compression/decompression ----"
 	./datagen -g6GB   | $(LZ4) -vB5D  | $(LZ4) -qt
 	./datagen -g6GB   | $(LZ4) -v5BD  | $(LZ4) -qt
+	# test large file size [2-4] GB
+	@./datagen -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1
+	@ls -ls tmphf1
+	@./datagen -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmphf2
+	@ls -ls tmphf2
+	$(DIFF) -s tmphf1 tmphf2
+	@$(RM) tmphf*
 
 test-lz4-testmode: lz4 datagen
 	@echo "\n ---- bench mode ----"
@@ -323,9 +333,13 @@
 	./datagen -g16M -P90  | $(LZ4) -11B5    | $(LZ4) -t
 	./datagen -g32M -P10  | $(LZ4) -11B5D   | $(LZ4) -t
 
-test-lz4: lz4 datagen test-lz4-basic test-lz4-opt-parser test-lz4-multiple \
-          test-lz4-sparse test-lz4-frame-concatenation test-lz4-testmode \
-          test-lz4-contentSize test-lz4-hugefile test-lz4-dict
+test-lz4-essentials : lz4 datagen test-lz4-basic test-lz4-multiple \
+                      test-lz4-frame-concatenation test-lz4-testmode \
+                      test-lz4-contentSize test-lz4-dict
+	@$(RM) tmp*
+
+test-lz4: lz4 datagen test-lz4-essentials test-lz4-opt-parser \
+          test-lz4-sparse test-lz4-hugefile test-lz4-dict
 	@$(RM) tmp*
 
 test-lz4c: lz4c datagen
@@ -387,13 +401,12 @@
 	./datagen -g16KB -s2 > ftmdg16K2
 	./datagen -g16KB -s3 > ftmdg16K3
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --force --multiple ftmdg16K ftmdg16K2 ftmdg16K3
-	./datagen -g16MB > ftmdg16M
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -B5D -f ftmdg16M ftmdg16K2
+	./datagen -g7MB > ftmdg7M
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -B5D -f ftmdg7M ftmdg16K2
 	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -t ftmdg16K2
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -bi1 ftmdg16M
-	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 ftmdg16M ftmdg16K2
-	./datagen -g256MB > ftmdg256M
-	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -B4D -f -vq ftmdg256M $(VOID)
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -bi1 ftmdg7M
+	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 ftmdg7M ftmdg16K2
+	valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -B4D -f -vq ftmdg7M $(VOID)
 	$(RM) ftm*
 	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i64 -t1
 	valgrind --leak-check=yes --error-exitcode=1 ./frametest -i256

diff --git a/tests/checkTag.c b/tests/checkTag.c
new file mode 100644
index 0000000..4a33415
--- /dev/null
+++ b/tests/checkTag.c

@@ -0,0 +1,79 @@
+/*
+    checkTag.c - Version validation tool for LZ4
+    Copyright (C) Yann Collet 2018 - present
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repo : https://github.com/lz4/lz4
+*/
+
+/* checkTag command :
+ * $ ./checkTag tag
+ * checkTag validates tags of following format : v[0-9].[0-9].[0-9]{any}
+ * The tag is then compared to LZ4 version number.
+ * They are compatible if first 3 digits are identical.
+ * Anything beyond that is free, and doesn't impact validation.
+ * Example : tag v1.8.1.2 is compatible with version 1.8.1
+ * When tag and version are not compatible, program exits with error code 1.
+ * When they are compatible, it exists with a code 0.
+ * checkTag is intended to be used in automated testing environment.
+ */
+
+#include <stdio.h>   /* printf */
+#include <string.h>  /* strlen, strncmp */
+#include "lz4.h"     /* LZ4_VERSION_STRING */
+
+
+/*  validate() :
+ * @return 1 if tag is compatible, 0 if not.
+ */
+static int validate(const char* const tag)
+{
+    size_t const tagLength = strlen(tag);
+    size_t const verLength = strlen(LZ4_VERSION_STRING);
+
+    if (tagLength < 2) return 0;
+    if (tag[0] != 'v') return 0;
+    if (tagLength <= verLength) return 0;
+
+    if (strncmp(LZ4_VERSION_STRING, tag+1, verLength)) return 0;
+
+    return 1;
+}
+
+int main(int argc, const char** argv)
+{
+    const char* const exeName = argv[0];
+    const char* const tag = argv[1];
+    if (argc!=2) {
+        printf("incorrect usage : %s tag \n", exeName);
+        return 2;
+    }
+
+    printf("Version : %s \n", LZ4_VERSION_STRING);
+    printf("Tag     : %s \n", tag);
+
+    if (validate(tag)) {
+        printf("OK : tag is compatible with lz4 version \n");
+        return 0;
+    }
+
+    printf("!! error : tag and versions are not compatible !! \n");
+    return 1;
+}

diff --git a/tests/frametest.c b/tests/frametest.c
index 88d0afd..4efeb6f 100644
--- a/tests/frametest.c
+++ b/tests/frametest.c

@@ -41,7 +41,11 @@
 #include <string.h>     /* strcmp */
 #include <time.h>       /* clock_t, clock(), CLOCKS_PER_SEC */
 #include <assert.h>
-#include "lz4frame_static.h"
+#include "lz4frame.h"   /* include multiple times to test correctness/safety */
+#include "lz4frame.h"
+#define LZ4F_STATIC_LINKING_ONLY
+#include "lz4frame.h"
+#include "lz4frame.h"
 #include "lz4.h"        /* LZ4_VERSION_STRING */
 #define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"     /* XXH64 */
@@ -164,7 +168,7 @@
 /*-*******************************************************
 *  Tests
 *********************************************************/
-#define CHECK_V(v,f) v = f; if (LZ4F_isError(v)) goto _output_error
+#define CHECK_V(v,f) v = f; if (LZ4F_isError(v)) { fprintf(stderr, "%s\n", LZ4F_getErrorName(v)); goto _output_error; }
 #define CHECK(f)   { LZ4F_errorCode_t const CHECK_V(err_ , f); }
 
 int basicTests(U32 seed, double compressibility)
@@ -509,23 +513,26 @@
         CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL;
     }
 
-
     /* Dictionary compression test */
     {   size_t const dictSize = 63 KB;
         size_t const dstCapacity = LZ4F_compressFrameBound(dictSize, NULL);
         size_t cSizeNoDict, cSizeWithDict;
         LZ4F_CDict* const cdict = LZ4F_createCDict(CNBuffer, dictSize);
         if (cdict == NULL) goto _output_error;
+        CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
+        
         DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with NULL dict : ");
         CHECK_V(cSizeNoDict,
-                LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
                                               CNBuffer, dictSize,
                                               NULL, NULL) );
         DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeNoDict);
 
+        CHECK( LZ4F_freeCompressionContext(cctx) );
+        CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) );
         DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict : ");
         CHECK_V(cSizeWithDict,
-                LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
                                               CNBuffer, dictSize,
                                               cdict, NULL) );
         DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
@@ -557,7 +564,7 @@
             memset(&cParams, 0, sizeof(cParams));
             cParams.compressionLevel = -3;
             CHECK_V(cSizeLevelMax,
-                LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
                                               CNBuffer, dictSize,
                                               cdict, &cParams) );
             DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax);
@@ -569,7 +576,7 @@
             memset(&cParams, 0, sizeof(cParams));
             cParams.compressionLevel = LZ4F_compressionLevel_max();
             CHECK_V(cSizeLevelMax,
-                LZ4F_compressFrame_usingCDict(compressedBuffer, dstCapacity,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity,
                                               CNBuffer, dictSize,
                                               cdict, &cParams) );
             DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax);
@@ -584,7 +591,7 @@
             cParams.frameInfo.blockMode = LZ4F_blockLinked;
             cParams.frameInfo.blockSizeID = LZ4F_max64KB;
             CHECK_V(cSizeContiguous,
-                LZ4F_compressFrame_usingCDict(compressedBuffer, outCapacity,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, outCapacity,
                                               CNBuffer, inSize,
                                               cdict, &cParams) );
             DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
@@ -620,7 +627,7 @@
             cParams.frameInfo.blockMode = LZ4F_blockIndependent;
             cParams.frameInfo.blockSizeID = LZ4F_max64KB;
             CHECK_V(cSizeIndep,
-                LZ4F_compressFrame_usingCDict(compressedBuffer, outCapacity,
+                LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, outCapacity,
                                               CNBuffer, inSize,
                                               cdict, &cParams) );
             DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n",
@@ -647,6 +654,7 @@
         }
 
         LZ4F_freeCDict(cdict);
+        CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL;
     }
 
 
@@ -730,15 +738,18 @@
 
 static void locateBuffDiff(const void* buff1, const void* buff2, size_t size, unsigned nonContiguous)
 {
-    int p=0;
+    size_t p=0;
     const BYTE* b1=(const BYTE*)buff1;
     const BYTE* b2=(const BYTE*)buff2;
+    DISPLAY("locateBuffDiff: looking for error position \n");
     if (nonContiguous) {
-        DISPLAY("Non-contiguous output test (%i bytes)\n", (int)size);
+        DISPLAY("mode %u: non-contiguous output (%zu bytes), cannot search \n", nonContiguous, size);
         return;
     }
-    while (b1[p]==b2[p]) p++;
-    DISPLAY("Error at pos %i/%i : %02X != %02X \n", p, (int)size, b1[p], b2[p]);
+    while (p < size && b1[p]==b2[p]) p++;
+    if (p != size) {
+        DISPLAY("Error at pos %i/%i : %02X != %02X \n", (int)p, (int)size, b1[p], b2[p]);
+    }
 }
 
 
@@ -829,6 +840,8 @@
                 size_t const iSize = MIN(sampleMax, (size_t)(iend-ip));
                 size_t const oSize = LZ4F_compressBound(iSize, prefsPtr);
                 cOptions.stableSrc = ((FUZ_rand(&randState) & 3) == 1);
+                DISPLAYLEVEL(6, "Sending %zi bytes to compress (stableSrc:%u) \n",
+                                iSize, cOptions.stableSrc);
 
                 result = LZ4F_compressUpdate(cCtx, op, oSize, ip, iSize, &cOptions);
                 CHECK(LZ4F_isError(result), "Compression failed (error %i : %s)", (int)result, LZ4F_getErrorName(result));
@@ -873,7 +886,8 @@
                 dOptions.stableDst = FUZ_rand(&randState) & 1;
                 if (nonContiguousDst==2) dOptions.stableDst = 0;   /* overwrite mode */
                 result = LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, &dOptions);
-                if (LZ4F_getErrorCode(result) == LZ4F_ERROR_contentChecksum_invalid) locateBuffDiff(srcStart, decodedBuffer, srcSize, nonContiguousDst);
+                if (LZ4F_getErrorCode(result) == LZ4F_ERROR_contentChecksum_invalid)
+                    locateBuffDiff(srcStart, decodedBuffer, srcSize, nonContiguousDst);
                 CHECK(LZ4F_isError(result), "Decompression failed (error %i:%s)", (int)result, LZ4F_getErrorName(result));
                 XXH64_update(&xxh64, op, (U32)oSize);
                 totalOut += oSize;

diff --git a/tests/fullbench.c b/tests/fullbench.c
index f489392..c06e230 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c

@@ -184,6 +184,11 @@
     return LZ4_compress_default(in, out, inSize, LZ4_compressBound(inSize)-1);
 }
 
+static int local_LZ4_compress_destSize(const char* in, char* out, int inSize)
+{
+    return LZ4_compress_destSize(in, out, &inSize, LZ4_compressBound(inSize)-1);
+}
+
 static int local_LZ4_compress_fast0(const char* in, char* out, int inSize)
 {
     return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 0);
@@ -262,13 +267,20 @@
     return outSize;
 }
 
-static int local_LZ4_decompress_fast_usingDict(const char* in, char* out, int inSize, int outSize)
+static int local_LZ4_decompress_fast_usingDict_prefix(const char* in, char* out, int inSize, int outSize)
 {
     (void)inSize;
     LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65536);
     return outSize;
 }
 
+static int local_LZ4_decompress_fast_usingExtDict(const char* in, char* out, int inSize, int outSize)
+{
+    (void)inSize;
+    LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65535);
+    return outSize;
+}
+
 static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int inSize, int outSize)
 {
     (void)inSize;
@@ -277,7 +289,7 @@
 }
 
 #ifndef LZ4_DLL_IMPORT
-extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const char* dict, int dictSize);
+extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
 
 static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize)
 {
@@ -422,12 +434,13 @@
             case 0 : DISPLAY("Compression functions : \n"); continue;
             case 1 : compressionFunction = local_LZ4_compress_default_large; compressorName = "LZ4_compress_default"; break;
             case 2 : compressionFunction = local_LZ4_compress_default_small; compressorName = "LZ4_compress_default(small dst)"; break;
-            case 3 : compressionFunction = local_LZ4_compress_fast0; compressorName = "LZ4_compress_fast(0)"; break;
-            case 4 : compressionFunction = local_LZ4_compress_fast1; compressorName = "LZ4_compress_fast(1)"; break;
-            case 5 : compressionFunction = local_LZ4_compress_fast2; compressorName = "LZ4_compress_fast(2)"; break;
-            case 6 : compressionFunction = local_LZ4_compress_fast17; compressorName = "LZ4_compress_fast(17)"; break;
-            case 7 : compressionFunction = local_LZ4_compress_fast_extState0; compressorName = "LZ4_compress_fast_extState(0)"; break;
-            case 8 : compressionFunction = local_LZ4_compress_fast_continue0; initFunction = local_LZ4_createStream; compressorName = "LZ4_compress_fast_continue(0)"; break;
+            case 3 : compressionFunction = local_LZ4_compress_destSize; compressorName = "LZ4_compress_destSize"; break;
+            case 4 : compressionFunction = local_LZ4_compress_fast0; compressorName = "LZ4_compress_fast(0)"; break;
+            case 5 : compressionFunction = local_LZ4_compress_fast1; compressorName = "LZ4_compress_fast(1)"; break;
+            case 6 : compressionFunction = local_LZ4_compress_fast2; compressorName = "LZ4_compress_fast(2)"; break;
+            case 7 : compressionFunction = local_LZ4_compress_fast17; compressorName = "LZ4_compress_fast(17)"; break;
+            case 8 : compressionFunction = local_LZ4_compress_fast_extState0; compressorName = "LZ4_compress_fast_extState(0)"; break;
+            case 9 : compressionFunction = local_LZ4_compress_fast_continue0; initFunction = local_LZ4_createStream; compressorName = "LZ4_compress_fast_continue(0)"; break;
 
             case 10: compressionFunction = local_LZ4_compress_HC; compressorName = "LZ4_compress_HC"; break;
             case 12: compressionFunction = local_LZ4_compress_HC_extStateHC; compressorName = "LZ4_compress_HC_extStateHC"; break;
@@ -454,7 +467,7 @@
                 double averageTime;
                 clock_t clockTime;
 
-                PROGRESS("%1i- %-28.28s :%9i ->\r", loopNb, compressorName, (int)benchedSize);
+                PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, compressorName, (int)benchedSize);
                 { size_t i; for (i=0; i<benchedSize; i++) compressed_buff[i]=(char)i; }     /* warming up memory */
 
                 nb_loops = 0;
@@ -465,7 +478,8 @@
                     if (initFunction!=NULL) initFunction();
                     for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
                         chunkP[chunkNb].compressedSize = compressionFunction(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize);
-                        if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1);
+                        if (chunkP[chunkNb].compressedSize==0)
+                            DISPLAY("ERROR ! %s() = 0 !! \n", compressorName), exit(1);
                     }
                     nb_loops++;
                 }
@@ -476,13 +490,13 @@
                 if (averageTime < bestTime) bestTime = averageTime;
                 cSize=0; for (chunkNb=0; chunkNb<nbChunks; chunkNb++) cSize += chunkP[chunkNb].compressedSize;
                 ratio = (double)cSize/(double)benchedSize*100.;
-                PROGRESS("%1i- %-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
+                PROGRESS("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
             }
 
             if (ratio<100.)
-                DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
+                DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000);
             else
-                DISPLAY("%2i-%-28.28s :%9i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000);
+                DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000);
         }
 
         /* Prepare layout for decompression */
@@ -503,11 +517,12 @@
         }
         for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
             chunkP[chunkNb].compressedSize = LZ4_compress_default(chunkP[chunkNb].origBuffer, chunkP[chunkNb].compressedBuffer, chunkP[chunkNb].origSize, maxCompressedChunkSize);
-            if (chunkP[chunkNb].compressedSize==0) DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1);
+            if (chunkP[chunkNb].compressedSize==0)
+                DISPLAY("ERROR ! %s() = 0 !! \n", "LZ4_compress"), exit(1);
         }
 
         /* Decompression Algorithms */
-        for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && (g_decompressionTest); dAlgNb++) {
+        for (dAlgNb=0; (dAlgNb <= NB_DECOMPRESSION_ALGORITHMS) && g_decompressionTest; dAlgNb++) {
             const char* dName;
             int (*decompressionFunction)(const char*, char*, int, int);
             double bestTime = 100000000.;
@@ -518,7 +533,8 @@
             {
             case 0: DISPLAY("Decompression functions : \n"); continue;
             case 1: decompressionFunction = local_LZ4_decompress_fast; dName = "LZ4_decompress_fast"; break;
-            case 3: decompressionFunction = local_LZ4_decompress_fast_usingDict; dName = "LZ4_decompress_fast_usingDict"; break;
+            case 2: decompressionFunction = local_LZ4_decompress_fast_usingDict_prefix; dName = "LZ4_decompress_fast_usingDict(prefix)"; break;
+            case 3: decompressionFunction = local_LZ4_decompress_fast_usingExtDict; dName = "LZ4_decompress_fast_using(Ext)Dict"; break;
             case 4: decompressionFunction = LZ4_decompress_safe; dName = "LZ4_decompress_safe"; break;
             case 6: decompressionFunction = local_LZ4_decompress_safe_usingDict; dName = "LZ4_decompress_safe_usingDict"; break;
             case 7: decompressionFunction = local_LZ4_decompress_safe_partial; dName = "LZ4_decompress_safe_partial"; break;
@@ -549,7 +565,7 @@
                 clock_t clockTime;
                 U32 crcDecoded;
 
-                PROGRESS("%1i- %-29.29s :%10i ->\r", loopNb, dName, (int)benchedSize);
+                PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, dName, (int)benchedSize);
 
                 nb_loops = 0;
                 clockTime = clock();
@@ -568,14 +584,14 @@
                 averageTime = (double)clockTime / nb_loops / CLOCKS_PER_SEC;
                 if (averageTime < bestTime) bestTime = averageTime;
 
-                PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
+                PROGRESS("%2i-%-34.34s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
 
                 /* CRC Checking */
                 crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
                 if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
             }
 
-            DISPLAY("%2i-%-29.29s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
+            DISPLAY("%2i-%-34.34s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000);
         }
       }
       free(orig_buff);

diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index c134fe3..5dd75b3 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c

@@ -34,15 +34,21 @@
 
 #define LZ4_DISABLE_DEPRECATE_WARNINGS
 
+
 /*-************************************
 *  Dependencies
 **************************************/
+#ifdef __unix__   /* must be included before platform.h for MAP_ANONYMOUS */
+#  include <sys/mman.h>   /* mmap */
+#endif
 #include "platform.h"   /* _CRT_SECURE_NO_WARNINGS */
 #include "util.h"       /* U32 */
 #include <stdlib.h>
 #include <stdio.h>      /* fgets, sscanf */
 #include <string.h>     /* strcmp */
 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h>
+#define LZ4_STATIC_LINKING_ONLY
 #define LZ4_HC_STATIC_LINKING_ONLY
 #include "lz4hc.h"
 #define XXH_STATIC_LINKING_ONLY
@@ -240,6 +246,41 @@
 }
 
 
+#ifdef __unix__   /* is expected to be triggered on linux+gcc */
+
+static void* FUZ_createLowAddr(size_t size)
+{
+    void* const lowBuff = mmap((void*)(0x1000), size,
+                    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                    -1, 0);
+    DISPLAYLEVEL(2, "generating low buffer at address %p \n", lowBuff);
+    return lowBuff;
+}
+
+static void FUZ_freeLowAddr(void* buffer, size_t size)
+{
+    if (munmap(buffer, size)) {
+        perror("fuzzer: freeing low address buffer");
+        abort();
+    }
+}
+
+#else
+
+static void* FUZ_createLowAddr(size_t size)
+{
+    return malloc(size);
+}
+
+static void FUZ_freeLowAddr(void* buffer, size_t size)
+{
+    (void)size;
+    free(buffer);
+}
+
+#endif
+
+
 /*! FUZ_findDiff() :
 *   find the first different byte between buff1 and buff2.
 *   presumes buff1 != buff2.
@@ -252,7 +293,7 @@
     const BYTE* const b2 = (const BYTE*)buff2;
     size_t u = 0;
     while (b1[u]==b2[u]) u++;
-    DISPLAY("Wrong Byte at position %u \n", (unsigned)u);
+    DISPLAY("\nWrong Byte at position %u \n", (unsigned)u);
 }
 
 
@@ -266,6 +307,8 @@
     size_t const compressedBufferSize = LZ4_compressBound(FUZ_MAX_BLOCK_SIZE);
     char* const compressedBuffer = (char*)malloc(compressedBufferSize);
     char* const decodedBuffer = (char*)malloc(FUZ_MAX_DICT_SIZE + FUZ_MAX_BLOCK_SIZE);
+    size_t const labSize = 96 KB;
+    void* const lowAddrBuffer = FUZ_createLowAddr(labSize);
     void* const stateLZ4   = malloc(LZ4_sizeofState());
     void* const stateLZ4HC = malloc(LZ4_sizeofStateHC());
     LZ4_stream_t LZ4dict;
@@ -278,7 +321,14 @@
 
 #   define FUZ_CHECKTEST(cond, ...) if (cond) { printf("Test %u : ", testNb); printf(__VA_ARGS__); \
                                                 printf(" (seed %u, cycle %u) \n", seed, cycleNb); goto _output_error; }
-#   define FUZ_DISPLAYTEST          { testNb++; g_displayLevel>=4 ? printf("%2u\b\b", testNb), fflush(stdout) : 0; }
+#   define FUZ_DISPLAYTEST(...) {                 \
+                testNb++;                         \
+                if (g_displayLevel>=4) {          \
+                    printf("\r%4u - %2u ", cycleNb, testNb);  \
+                    printf(" " __VA_ARGS__);      \
+                    printf("   ");                \
+                    fflush(stdout);               \
+            }   }
 
 
     /* init */
@@ -302,24 +352,28 @@
         U32 testNb = 0;
         U32 randState = FUZ_rand(&coreRandState) ^ PRIME3;
         int const blockSize  = (FUZ_rand(&randState) % (FUZ_MAX_BLOCK_SIZE-1)) + 1;
-        int const blockStart = FUZ_rand(&randState) % (COMPRESSIBLE_NOISE_LENGTH - blockSize);
+        int const blockStart = (FUZ_rand(&randState) % (COMPRESSIBLE_NOISE_LENGTH - blockSize - 1)) + 1;
         int const dictSizeRand = FUZ_rand(&randState) % FUZ_MAX_DICT_SIZE;
-        int const dictSize = MIN(dictSizeRand, blockStart);
+        int const dictSize = MIN(dictSizeRand, blockStart - 1);
         int const compressionLevel = FUZ_rand(&randState) % (LZ4HC_CLEVEL_MAX+1);
-        char* const block = ((char*)CNBuffer) + blockStart;
+        const char* block = ((char*)CNBuffer) + blockStart;
         const char* dict = block - dictSize;
         int compressedSize, HCcompressedSize;
         int blockContinueCompressedSize;
         U32 const crcOrig = XXH32(block, blockSize, 0);
-        U32 crcCheck;
         int ret;
 
         FUZ_displayUpdate(cycleNb);
 
         /* Compression tests */
+        if ( ((FUZ_rand(&randState) & 63) == 2)
+          && ((size_t)blockSize < labSize) ) {
+            memcpy(lowAddrBuffer, block, blockSize);
+            block = (const char*)lowAddrBuffer;
+        }
 
         /* Test compression destSize */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_destSize()");
         {   int srcSize = blockSize;
             int const targetSize = srcSize * ((FUZ_rand(&randState) & 127)+1) >> 7;
             char endCheck = FUZ_rand(&randState) & 255;
@@ -334,7 +388,7 @@
                 U32 const crcBase = XXH32(block, srcSize, 0);
                 char const canary = FUZ_rand(&randState) & 255;
                 FUZ_CHECKTEST((ret==0), "LZ4_compress_destSize() compression failed");
-                FUZ_DISPLAYTEST;
+                FUZ_DISPLAYTEST();
                 compressedSize = ret;
                 decodedBuffer[srcSize] = canary;
                 ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, srcSize);
@@ -350,7 +404,7 @@
         }   }
 
         /* Test compression HC destSize */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_destSize()");
         {   int srcSize = blockSize;
             int const targetSize = srcSize * ((FUZ_rand(&randState) & 127)+1) >> 7;
             char const endCheck = FUZ_rand(&randState) & 255;
@@ -364,14 +418,12 @@
             FUZ_CHECKTEST(ret > targetSize, "LZ4_compress_HC_destSize() result larger than dst buffer !");
             FUZ_CHECKTEST(compressedBuffer[targetSize] != endCheck, "LZ4_compress_HC_destSize() overwrite dst buffer !");
             FUZ_CHECKTEST(srcSize > blockSize, "LZ4_compress_HC_destSize() fed more than src buffer !");
-            DISPLAYLEVEL(5, "LZ4_compress_HC_destSize(%i): destSize : %7i/%7i; content%7i/%7i ",
-                            compressionLevel, ret, targetSize, srcSize, blockSize);
             if (targetSize>0) {
                 /* check correctness */
                 U32 const crcBase = XXH32(block, srcSize, 0);
                 char const canary = FUZ_rand(&randState) & 255;
                 FUZ_CHECKTEST((ret==0), "LZ4_compress_HC_destSize() compression failed");
-                FUZ_DISPLAYTEST;
+                FUZ_DISPLAYTEST();
                 compressedSize = ret;
                 decodedBuffer[srcSize] = canary;
                 ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, srcSize);
@@ -387,61 +439,98 @@
         }   }
 
         /* Test compression HC */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC()");
         ret = LZ4_compress_HC(block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_HC() failed");
         HCcompressedSize = ret;
 
         /* Test compression HC using external state */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC()");
         ret = LZ4_compress_HC_extStateHC(stateLZ4HC, block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel);
-        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC() failed");
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC() failed")
+
+        /* Test compression HC using fast reset external state */
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC_fastReset()");
+        ret = LZ4_compress_HC_extStateHC_fastReset(stateLZ4HC, block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel);
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC_fastReset() failed");
 
         /* Test compression using external state */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_extState()");
         ret = LZ4_compress_fast_extState(stateLZ4, block, compressedBuffer, blockSize, (int)compressedBufferSize, 8);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState() failed");
 
+        /* Test compression using fast reset external state*/
+        FUZ_DISPLAYTEST();
+        ret = LZ4_compress_fast_extState_fastReset(stateLZ4, block, compressedBuffer, blockSize, (int)compressedBufferSize, 8);
+        FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState_fastReset() failed");
+
         /* Test compression */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_default()");
         ret = LZ4_compress_default(block, compressedBuffer, blockSize, (int)compressedBufferSize);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_default() failed");
         compressedSize = ret;
 
         /* Decompression tests */
 
-        /* Test decoding with output size being exactly what's necessary => must work */
-        FUZ_DISPLAYTEST;
+        /* Test decoding with output size exactly correct => must work */
+        FUZ_DISPLAYTEST("LZ4_decompress_fast() with exact output buffer");
         ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_fast failed despite correct space");
         FUZ_CHECKTEST(ret!=compressedSize, "LZ4_decompress_fast failed : did not fully read compressed data");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data");
+        }
 
         /* Test decoding with one byte missing => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("LZ4_decompress_fast() with output buffer 1-byte too short");
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize-1);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast overrun specified output buffer");
 
         /* Test decoding with one byte too much => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         ret = LZ4_decompress_fast(compressedBuffer, decodedBuffer, blockSize+1);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large");
 
+        /* Test decoding with empty input */
+        FUZ_DISPLAYTEST("LZ4_decompress_safe() with empty input");
+        LZ4_decompress_safe(NULL, decodedBuffer, 0, blockSize);
+
+        /* Test decoding with a one byte input */
+        FUZ_DISPLAYTEST("LZ4_decompress_safe() with one byte input");
+        {   char const tmp = 0xFF;
+            LZ4_decompress_safe(&tmp, decodedBuffer, 1, blockSize);
+        }
+
+        /* Test decoding shortcut edge case */
+        FUZ_DISPLAYTEST("LZ4_decompress_safe() with shortcut edge case");
+        {   char tmp[17];
+            /* 14 bytes of literals, followed by a 14 byte match.
+             * Should not read beyond the end of the buffer.
+             * See https://github.com/lz4/lz4/issues/508. */
+            *tmp = 0xEE;
+            memset(tmp + 1, 0, 14);
+            tmp[15] = 14;
+            tmp[16] = 0;
+            ret = LZ4_decompress_safe(tmp, decodedBuffer, sizeof(tmp), blockSize);
+            FUZ_CHECKTEST(ret >= 0, "LZ4_decompress_safe() should fail");
+        }
+
+
         /* Test decoding with output size exactly what's necessary => must work */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe failed despite sufficient space");
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        }
 
         // Test decoding with more than enough output size => must work
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         decodedBuffer[blockSize+1] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize+1);
@@ -449,18 +538,19 @@
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe did not regenerate original data");
         //FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe wrote more than (unknown) target size");   // well, is that an issue ?
         FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data");
+        }
 
         // Test decoding with output size being one byte too short => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-1);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe overrun specified output buffer size");
 
         // Test decoding with output size being 10 bytes too short => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         if (blockSize>10) {
             decodedBuffer[blockSize-10] = 0;
             ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize, blockSize-10);
@@ -469,51 +559,51 @@
         }
 
         // Test decoding with input size being one byte too short => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize-1, blockSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short (blockSize=%i, ret=%i, compressedSize=%i)", blockSize, ret, compressedSize);
 
         // Test decoding with input size being one byte too large => must fail
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize+1, blockSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being too large");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size");
 
         // Test partial decoding with target output size being max/2 => must work
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize/2, blockSize);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
 
         // Test partial decoding with target output size being just below max => must work
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         ret = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, compressedSize, blockSize-3, blockSize);
         FUZ_CHECKTEST(ret<0, "LZ4_decompress_safe_partial failed despite sufficient space");
 
         /* Test Compression with limited output size */
 
         /* Test compression with output size being exactly what's necessary (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_default() with output buffer just the right size");
         ret = LZ4_compress_default(block, compressedBuffer, blockSize, compressedSize);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_default() failed despite sufficient space");
 
         /* Test compression with output size being exactly what's necessary and external state (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_extState() with output buffer just the right size");
         ret = LZ4_compress_fast_extState(stateLZ4, block, compressedBuffer, blockSize, compressedSize, 1);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState() failed despite sufficient space");
 
         /* Test HC compression with output size being exactly what's necessary (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC() with output buffer just the right size");
         ret = LZ4_compress_HC(block, compressedBuffer, blockSize, HCcompressedSize, compressionLevel);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_HC() failed despite sufficient space");
 
         /* Test HC compression with output size being exactly what's necessary (should work) */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC() with output buffer just the right size");
         ret = LZ4_compress_HC_extStateHC(stateLZ4HC, block, compressedBuffer, blockSize, HCcompressedSize, compressionLevel);
         FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC() failed despite sufficient space");
 
         /* Test compression with missing bytes into output buffer => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_default() with output buffer a bit too short");
         {   int missingBytes = (FUZ_rand(&randState) % 0x3F) + 1;
             if (missingBytes >= compressedSize) missingBytes = compressedSize-1;
             missingBytes += !missingBytes;   /* avoid special case missingBytes==0 */
@@ -524,7 +614,7 @@
         }
 
         /* Test HC compression with missing bytes into output buffer => must fail */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_HC() with output buffer a bit too short");
         {   int missingBytes = (FUZ_rand(&randState) % 0x3F) + 1;
             if (missingBytes >= HCcompressedSize) missingBytes = HCcompressedSize-1;
             missingBytes += !missingBytes;   /* avoid special case missingBytes==0 */
@@ -540,7 +630,7 @@
         /*-******************/
 
         /* Compress using dictionary */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary of size %i", dictSize);
         {   LZ4_stream_t LZ4_stream;
             LZ4_resetStream(&LZ4_stream);
             LZ4_compress_fast_continue (&LZ4_stream, dict, compressedBuffer, dictSize, (int)compressedBufferSize, 1);   /* Just to fill hash tables */
@@ -549,75 +639,169 @@
         }
 
         /* Decompress with dictionary as prefix */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as prefix");
         memcpy(decodedBuffer, dict, dictSize);
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer+dictSize, blockSize, decodedBuffer, dictSize);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
-        crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
-        if (crcCheck!=crcOrig) {
-            int i=0;
-            while (block[i]==decodedBuffer[i]) i++;
-            printf("Wrong Byte at position %i/%i\n", i, blockSize);
-
+        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
         }
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_decompress_safe_usingDict()");
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer+dictSize, blockContinueCompressedSize, blockSize, decodedBuffer, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
-        crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer+dictSize, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
 
         /* Compress using External dictionary */
-        FUZ_DISPLAYTEST;
-        dict -= (FUZ_rand(&randState) & 0xF) + 1;   /* Separation, so it is an ExtDict */
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue(), with non-contiguous dictionary");
+        dict -= (FUZ_rand(&randState) & 0xF) + 1;   /* create space, so now dictionary is an ExtDict */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
         FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue failed");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary but with an output buffer too short by one byte");
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         ret = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1);
         FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using ExtDict should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary loaded with LZ4_loadDict()");
+        DISPLAYLEVEL(5, " compress %i bytes from buffer(%p) into dst(%p) using dict(%p) of size %i \n", blockSize, block, decodedBuffer, dict, dictSize);
         LZ4_loadDict(&LZ4dict, dict, dictSize);
         ret = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
         FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue should work : enough size available within output buffer");
 
         /* Decompress with dictionary as external */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as extDict");
+        DISPLAYLEVEL(5, " decoding %i bytes from buffer(%p) using dict(%p) of size %i \n", blockSize, decodedBuffer, dict, dictSize);
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
+        }
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_usingDict should have failed : wrong original size (-1 byte)");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize-1] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize);
         FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)");
         FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
+        {   U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
+            if ((U32)blockSize > missingBytes) {
+                decodedBuffer[blockSize-missingBytes] = 0;
+                ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-missingBytes, dict, dictSize);
+                FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%u byte)", missingBytes);
+                FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%u byte) (blockSize=%i)", missingBytes, blockSize);
+        }   }
+
+        /* Compress using external dictionary stream */
+        {
+            LZ4_stream_t LZ4_stream;
+            int expectedSize;
+            U32 expectedCrc;
+
+            FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_loadDict()");
+            LZ4_loadDict(&LZ4dict, dict, dictSize);
+            expectedSize = LZ4_compress_fast_continue(&LZ4dict, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
+            FUZ_CHECKTEST(expectedSize<=0, "LZ4_compress_fast_continue reference compression for extDictCtx should have succeeded");
+            expectedCrc = XXH32(compressedBuffer, expectedSize, 0);
+
+            FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary()");
+            LZ4_loadDict(&LZ4dict, dict, dictSize);
+            LZ4_resetStream(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1);
+            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed");
+
+            /* In the future, it might be desirable to let extDictCtx mode's
+             * output diverge from the output generated by regular extDict mode.
+             * Until that time, this comparison serves as a good regression
+             * test.
+             */
+            FUZ_CHECKTEST(blockContinueCompressedSize != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output (%d expected vs %d actual)", expectedSize, blockContinueCompressedSize);
+            FUZ_CHECKTEST(XXH32(compressedBuffer, blockContinueCompressedSize, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+
+            FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary(), but output buffer is 1 byte too short");
+            LZ4_resetStream(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1);
+            FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using extDictCtx should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize);
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStream(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer");
+            FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
+            FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStream_fast(&LZ4_stream);
+            LZ4_attach_dictionary(&LZ4_stream, &LZ4dict);
+            ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer");
+            FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output");
+            FUZ_CHECKTEST(XXH32(compressedBuffer, ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output");
+        }
+
+        /* Decompress with dictionary as external */
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize] = 0;
+        ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize);
+        FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input");
+        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize);
+        }
+
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize] = 0;
+        ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
+        FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+        FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
+
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize-1] = 0;
+        ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize);
+        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_usingDict should have failed : wrong original size (-1 byte)");
+        FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_usingDict overrun specified output buffer size");
+
+        FUZ_DISPLAYTEST();
+        decodedBuffer[blockSize-1] = 0;
+        ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize);
+        FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)");
+        FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+
+        FUZ_DISPLAYTEST();
         {   U32 const missingBytes = (FUZ_rand(&randState) & 0xF) + 2;
             if ((U32)blockSize > missingBytes) {
                 decodedBuffer[blockSize-missingBytes] = 0;
@@ -627,7 +811,7 @@
         }   }
 
         /* Compress HC using External dictionary */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         dict -= (FUZ_rand(&randState) & 7);    /* even bigger separation */
         if (dict < (char*)CNBuffer) dict = (char*)CNBuffer;
         LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
@@ -636,32 +820,75 @@
         blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, (int)compressedBufferSize);
         FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue failed");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
         ret = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1);
         FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDict should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
         ret = LZ4_compress_HC_continue(&LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
         FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue size is different (%i != %i)", ret, blockContinueCompressedSize);
         FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue should work : enough size available within output buffer");
 
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         decodedBuffer[blockSize] = 0;
         ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
         FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
         FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
-        crcCheck = XXH32(decodedBuffer, blockSize, 0);
-        if (crcCheck!=crcOrig)
-            FUZ_findDiff(block, decodedBuffer);
-        FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+            if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+            FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+        }
+
+        /* Compress HC using external dictionary stream */
+        FUZ_DISPLAYTEST();
+        {
+            LZ4_streamHC_t LZ4_streamHC;
+
+            LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
+            LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
+            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            blockContinueCompressedSize = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, (int)compressedBufferSize);
+            FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue with ExtDictCtx failed");
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1);
+            FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDictCtx should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize);
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx should work : enough size available within output buffer");
+
+            FUZ_DISPLAYTEST();
+            LZ4_resetStreamHC_fast (&LZ4_streamHC, compressionLevel);
+            LZ4_attach_HC_dictionary(&LZ4_streamHC, &LZ4dictHC);
+            ret = LZ4_compress_HC_continue(&LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx and fast reset size is different (%i != %i)", ret, blockContinueCompressedSize);
+            FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx and fast reset should work : enough size available within output buffer");
+
+            FUZ_DISPLAYTEST();
+            decodedBuffer[blockSize] = 0;
+            ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize);
+            FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
+            FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size");
+            {   U32 const crcCheck = XXH32(decodedBuffer, blockSize, 0);
+                if (crcCheck!=crcOrig) FUZ_findDiff(block, decodedBuffer);
+                FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data");
+            }
+        }
 
         /* Compress HC continue destSize */
-        FUZ_DISPLAYTEST;
+        FUZ_DISPLAYTEST();
         {   int const availableSpace = (FUZ_rand(&randState) % blockSize) + 5;
             int consumedSize = blockSize;
-            FUZ_DISPLAYTEST;
+            FUZ_DISPLAYTEST();
             LZ4_resetStreamHC (&LZ4dictHC, compressionLevel);
             LZ4_loadDictHC(&LZ4dictHC, dict, dictSize);
             blockContinueCompressedSize = LZ4_compress_HC_continue_destSize(&LZ4dictHC, block, compressedBuffer, &consumedSize, availableSpace);
@@ -670,15 +897,14 @@
             FUZ_CHECKTEST(blockContinueCompressedSize > availableSpace, "LZ4_compress_HC_continue_destSize write overflow");
             FUZ_CHECKTEST(consumedSize > blockSize, "LZ4_compress_HC_continue_destSize read overflow");
 
-            FUZ_DISPLAYTEST;
+            FUZ_DISPLAYTEST();
             decodedBuffer[consumedSize] = 0;
             ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, consumedSize, dict, dictSize);
             FUZ_CHECKTEST(ret!=consumedSize, "LZ4_decompress_safe_usingDict did not regenerate original data");
             FUZ_CHECKTEST(decodedBuffer[consumedSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size")
             {   U32 const crcSrc = XXH32(block, consumedSize, 0);
                 U32 const crcDst = XXH32(decodedBuffer, consumedSize, 0);
-                if (crcSrc!=crcDst)
-                    FUZ_findDiff(block, decodedBuffer);
+                if (crcSrc!=crcDst) FUZ_findDiff(block, decodedBuffer);
                 FUZ_CHECKTEST(crcSrc!=crcDst, "LZ4_decompress_safe_usingDict corrupted decoded data");
             }
         }
@@ -705,6 +931,7 @@
         free(CNBuffer);
         free(compressedBuffer);
         free(decodedBuffer);
+        FUZ_freeLowAddr(lowAddrBuffer, labSize);
         free(stateLZ4);
         free(stateLZ4HC);
         return result;
@@ -727,6 +954,7 @@
     const unsigned cycleNb= 0;
     char testInput[testInputSize];
     char testCompressed[testCompressedSize];
+    size_t const testVerifySize = testInputSize;
     char testVerify[testInputSize];
     char ringBuffer[ringBufferSize];
     U32 randState = 1;
@@ -761,8 +989,8 @@
 
         /* ring buffer test */
         {   XXH64_state_t xxhOrig;
-            XXH64_state_t xxhNew;
-            LZ4_streamDecode_t decodeState;
+            XXH64_state_t xxhNewSafe, xxhNewFast;
+            LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
             const U32 maxMessageSizeLog = 10;
             const U32 maxMessageSizeMask = (1<<maxMessageSizeLog) - 1;
             U32 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
@@ -770,26 +998,36 @@
             U32 rNext = 0;
             U32 dNext = 0;
             const U32 dBufferSize = ringBufferSize + maxMessageSizeMask;
+            int compressedSize;
 
             XXH64_reset(&xxhOrig, 0);
-            XXH64_reset(&xxhNew, 0);
+            XXH64_reset(&xxhNewSafe, 0);
+            XXH64_reset(&xxhNewFast, 0);
             LZ4_resetStream(&streamingState);
-            LZ4_setStreamDecode(&decodeState, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
             while (iNext + messageSize < testCompressedSize) {
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
                 memcpy (ringBuffer + rNext, testInput + iNext, messageSize);
-                result = LZ4_compress_fast_continue(&streamingState, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize, 1);
-                FUZ_CHECKTEST(result==0, "LZ4_compress_fast_continue() compression failed");
+                compressedSize = LZ4_compress_fast_continue(&streamingState, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize, 1);
+                FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_fast_continue() compression failed");
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed");
+                result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize);
+                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
 
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNew);
-                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+                XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
+
+                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, messageSize);
+                FUZ_CHECKTEST(result!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed");
+
+                XXH64_update(&xxhNewFast, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewFast);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
 
                 /* prepare next message */
                 iNext += messageSize;
@@ -911,8 +1149,8 @@
 
         /* ring buffer test */
         {   XXH64_state_t xxhOrig;
-            XXH64_state_t xxhNew;
-            LZ4_streamDecode_t decodeState;
+            XXH64_state_t xxhNewSafe, xxhNewFast;
+            LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
             const U32 maxMessageSizeLog = 10;
             const U32 maxMessageSizeMask = (1<<maxMessageSizeLog) - 1;
             U32 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
@@ -920,26 +1158,36 @@
             U32 rNext = 0;
             U32 dNext = 0;
             const U32 dBufferSize = ringBufferSize + maxMessageSizeMask;
+            int compressedSize;
 
             XXH64_reset(&xxhOrig, 0);
-            XXH64_reset(&xxhNew, 0);
+            XXH64_reset(&xxhNewSafe, 0);
+            XXH64_reset(&xxhNewFast, 0);
             LZ4_resetStreamHC(&sHC, compressionLevel);
-            LZ4_setStreamDecode(&decodeState, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
             while (iNext + messageSize < testCompressedSize) {
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
                 memcpy (ringBuffer + rNext, testInput + iNext, messageSize);
-                result = LZ4_compress_HC_continue(&sHC, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
-                FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+                compressedSize = LZ4_compress_HC_continue(&sHC, ringBuffer + rNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
+                FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe() test failed");
+                result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, testVerify + dNext, compressedSize, messageSize);
+                FUZ_CHECKTEST(result!=(int)messageSize, "ringBuffer : LZ4_decompress_safe_continue() test failed");
 
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                { U64 const crcNew = XXH64_digest(&xxhNew);
-                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+                XXH64_update(&xxhNewSafe, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
+
+                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, testVerify + dNext, messageSize);
+                FUZ_CHECKTEST(result!=compressedSize, "ringBuffer : LZ4_decompress_fast_continue() test failed");
+
+                XXH64_update(&xxhNewFast, testVerify + dNext, messageSize);
+                { U64 const crcNew = XXH64_digest(&xxhNewFast);
+                  FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
 
                 /* prepare next message */
                 iNext += messageSize;
@@ -951,70 +1199,105 @@
             }
         }
 
-        /* small decoder-side ring buffer test */
+        /* Ring buffer test : Non synchronized decoder */
+        /* This test uses minimum amount of memory required to setup a decoding ring buffer
+         * while being unsynchronized with encoder
+         * (no assumption done on how the data is encoded, it just follows LZ4 format specification).
+         * This size is documented in lz4.h, and is LZ4_decoderRingBufferSize(maxBlockSize).
+         */
         {   XXH64_state_t xxhOrig;
-            XXH64_state_t xxhNew;
-            LZ4_streamDecode_t decodeState;
-            const U32 maxMessageSizeLog = 12;
-            const U32 maxMessageSizeMask = (1<<maxMessageSizeLog) - 1;
-            U32 messageSize;
+            XXH64_state_t xxhNewSafe, xxhNewFast;
+            LZ4_streamDecode_t decodeStateSafe, decodeStateFast;
+            const int maxMessageSizeLog = 12;
+            const int maxMessageSize = 1 << maxMessageSizeLog;
+            const int maxMessageSizeMask = maxMessageSize - 1;
+            int messageSize;
             U32 totalMessageSize = 0;
-            U32 iNext = 0;
-            U32 dNext = 0;
-            const U32 dBufferSize = 64 KB;
+            const int dBufferSize = LZ4_decoderRingBufferSize(maxMessageSize);
+            char* const ringBufferSafe = testVerify;
+            char* const ringBufferFast = testVerify + dBufferSize + 1;   /* used by LZ4_decompress_fast_continue */
+            int iNext = 0;
+            int dNext = 0;
+            int compressedSize;
 
+            assert((size_t)(dBufferSize + 1 + dBufferSize) < testVerifySize);   /* space used by ringBufferSafe and ringBufferFast */
             XXH64_reset(&xxhOrig, 0);
-            XXH64_reset(&xxhNew, 0);
+            XXH64_reset(&xxhNewSafe, 0);
+            XXH64_reset(&xxhNewFast, 0);
             LZ4_resetStreamHC(&sHC, compressionLevel);
-            LZ4_setStreamDecode(&decodeState, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateSafe, NULL, 0);
+            LZ4_setStreamDecode(&decodeStateFast, NULL, 0);
 
-#define BSIZE1 65537
-#define BSIZE2 16435
+#define BSIZE1 (dBufferSize - (maxMessageSize-1))
 
             /* first block */
-            messageSize = BSIZE1;
+            messageSize = BSIZE1;   /* note : we cheat a bit here, in theory no message should be > maxMessageSize. We just want to fill the decoding ring buffer once. */
             XXH64_update(&xxhOrig, testInput + iNext, messageSize);
             crcOrig = XXH64_digest(&xxhOrig);
 
-            result = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
-            FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+            compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
+            FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
 
-            result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-            FUZ_CHECKTEST(result!=(int)messageSize, "64K D.ringBuffer : LZ4_decompress_safe() test failed");
+            result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, ringBufferSafe + dNext, compressedSize, messageSize);
+            FUZ_CHECKTEST(result!=messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed");
 
-            XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-            { U64 const crcNew = XXH64_digest(&xxhNew);
-              FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); }
+            XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, messageSize);
+            { U64 const crcNew = XXH64_digest(&xxhNewSafe);
+              FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); }
 
-            /* prepare next message */
+            result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize);
+            FUZ_CHECKTEST(result!=compressedSize, "64K D.ringBuffer : LZ4_decompress_fast_continue() test failed");
+
+            XXH64_update(&xxhNewFast, ringBufferFast + dNext, messageSize);
+            { U64 const crcNew = XXH64_digest(&xxhNewFast);
+              FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); }
+
+            /* prepare second message */
             dNext += messageSize;
             totalMessageSize += messageSize;
-            messageSize = BSIZE2;
-            iNext = 132000;
-            memcpy(testInput + iNext, testInput + 8, messageSize);
-            if (dNext > dBufferSize) dNext = 0;
+            messageSize = maxMessageSize;
+            iNext = BSIZE1+1;
+            assert(BSIZE1 >= 65535);
+            memcpy(testInput + iNext, testInput + (BSIZE1-65535), messageSize);  /* will generate a match at max distance == 65535 */
+            FUZ_CHECKTEST(dNext+messageSize <= dBufferSize, "Ring buffer test : second message should require restarting from beginning");
+            dNext = 0;
 
             while (totalMessageSize < 9 MB) {
                 XXH64_update(&xxhOrig, testInput + iNext, messageSize);
                 crcOrig = XXH64_digest(&xxhOrig);
 
-                result = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
-                FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed");
+                compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize);
+                FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed");
+                DISPLAYLEVEL(5, "compressed %i bytes to %i bytes \n", messageSize, compressedSize);
 
-                result = LZ4_decompress_safe_continue(&decodeState, testCompressed, testVerify + dNext, result, messageSize);
-                FUZ_CHECKTEST(result!=(int)messageSize, "64K D.ringBuffer : LZ4_decompress_safe() test failed");
-
-                XXH64_update(&xxhNew, testVerify + dNext, messageSize);
-                {   U64 const crcNew = XXH64_digest(&xxhNew);
-                    if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, testVerify + dNext);
-                    FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption during small decoder-side ring buffer test");
+                /* test LZ4_decompress_safe_continue */
+                assert(dNext < dBufferSize);
+                assert(dBufferSize - dNext >= maxMessageSize);
+                result = LZ4_decompress_safe_continue(&decodeStateSafe,
+                                                      testCompressed, ringBufferSafe + dNext,
+                                                      compressedSize, dBufferSize - dNext);   /* works without knowing messageSize, under assumption that messageSize <= maxMessageSize */
+                FUZ_CHECKTEST(result!=messageSize, "D.ringBuffer : LZ4_decompress_safe_continue() test failed");
+                XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, messageSize);
+                {   U64 const crcNew = XXH64_digest(&xxhNewSafe);
+                    if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferSafe + dNext);
+                    FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption during D.ringBuffer test");
                 }
+
+                /* test LZ4_decompress_fast_continue in its own buffer ringBufferFast */
+                result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize);
+                FUZ_CHECKTEST(result!=compressedSize, "D.ringBuffer : LZ4_decompress_fast_continue() test failed");
+                XXH64_update(&xxhNewFast, ringBufferFast + dNext, messageSize);
+                {   U64 const crcNew = XXH64_digest(&xxhNewFast);
+                    if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferFast + dNext);
+                    FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption during D.ringBuffer test");
+                }
+
                 /* prepare next message */
                 dNext += messageSize;
                 totalMessageSize += messageSize;
                 messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1;
                 iNext = (FUZ_rand(&randState) & 65535);
-                if (dNext > dBufferSize) dNext = 0;
+                if (dNext + maxMessageSize > dBufferSize) dNext = 0;
             }
         }
     }
@@ -1074,13 +1357,13 @@
                     return FUZ_usage(programName);
 
                 case 'v':   /* verbose mode */
-                    argument++;
                     g_displayLevel++;
+                    argument++;
                     break;
 
                 case 'p':   /* pause at the end */
-                    argument++;
                     use_pause=1;
+                    argument++;
                     break;
 
                 case 'i':

diff --git a/tests/test_install.sh b/tests/test_install.sh
new file mode 100755
index 0000000..f9de402
--- /dev/null
+++ b/tests/test_install.sh

@@ -0,0 +1,20 @@
+#/usr/bin/env sh
+set -e
+
+make="make -C $lz4_root"
+for cmd in install uninstall; do
+  for upper in DUMMY PREFIX EXEC_PREFIX LIBDIR INCLUDEDIR PKGCONFIGDIR BINDIR MANDIR MAN1DIR ; do
+    lower=$(echo $upper | tr '[:upper:]' '[:lower:]')
+    tmp_lower="$(pwd)/tmp-lower-$lower/"
+    tmp_upper="$(pwd)/tmp-upper-$lower/"
+    echo $make $cmd DESTDIR="$tmp_upper" $upper="test"
+    $make $cmd DESTDIR="$tmp_upper" $upper="test" >/dev/null
+    echo $make $cmd DESTDIR="$tmp_lower" $lower="test"
+    $make $cmd DESTDIR="$tmp_lower" $lower="test" >/dev/null
+    command diff -r "$tmp_lower" "$tmp_upper" && echo "SAME!" || false
+    if [ "x$cmd" = "xuninstall" ]; then
+      test -z "$(find "$tmp_lower" -type f)" && echo "EMPTY!" || false
+      rm -rf "$tmp_upper" "$tmp_lower"
+    fi
+  done
+done

diff --git a/visual/.gitignore b/visual/.gitignore
index dea92fc..276f8f5 100644
--- a/visual/.gitignore
+++ b/visual/.gitignore

@@ -6,5 +6,5 @@
 *.sdf
 *.suo
 *.user
-
+ver*/
 VS2010/bin/

diff --git a/visual/VS2017/datagen/datagen.vcxproj b/visual/VS2017/datagen/datagen.vcxproj
new file mode 100644
index 0000000..30e159e
--- /dev/null
+++ b/visual/VS2017/datagen/datagen.vcxproj

@@ -0,0 +1,173 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{D745AE2F-596A-403A-9B91-81A8C6779243}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>datagen</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\programs;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\programs\datagen.c" />
+    <ClCompile Include="..\..\..\tests\datagencli.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\programs\datagen.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/frametest/frametest.vcxproj b/visual/VS2017/frametest/frametest.vcxproj
new file mode 100644
index 0000000..a3a403d
--- /dev/null
+++ b/visual/VS2017/frametest/frametest.vcxproj

@@ -0,0 +1,180 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>frametest</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\frametest.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj b/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj
new file mode 100644
index 0000000..d54a8d7
--- /dev/null
+++ b/visual/VS2017/fullbench-dll/fullbench-dll.vcxproj

@@ -0,0 +1,184 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{13992FD2-077E-4954-B065-A428198201A9}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fullbench-dll</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LZ4_DLL_IMPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)bin\$(Platform)_$(Configuration);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>liblz4.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\fullbench.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/fullbench/fullbench.vcxproj b/visual/VS2017/fullbench/fullbench.vcxproj
new file mode 100644
index 0000000..54c9743
--- /dev/null
+++ b/visual/VS2017/fullbench/fullbench.vcxproj

@@ -0,0 +1,180 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fullbench</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\fullbench.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/fuzzer/fuzzer.vcxproj b/visual/VS2017/fuzzer/fuzzer.vcxproj
new file mode 100644
index 0000000..aa6fe42
--- /dev/null
+++ b/visual/VS2017/fuzzer/fuzzer.vcxproj

@@ -0,0 +1,177 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{18B9F1A7-9C66-4352-898B-30804DADE0FD}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>fuzzer</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+    <ClCompile Include="..\..\..\tests\fuzzer.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/liblz4-dll/liblz4-dll.rc b/visual/VS2017/liblz4-dll/liblz4-dll.rc
new file mode 100644
index 0000000..b1871fe
--- /dev/null
+++ b/visual/VS2017/liblz4-dll/liblz4-dll.rc

@@ -0,0 +1,51 @@
+// Microsoft Visual C++ generated resource script.
+//
+
+#include "lz4.h" /* LZ4_VERSION_STRING */
+#define APSTUDIO_READONLY_SYMBOLS
+#include "verrsrc.h"
+#undef APSTUDIO_READONLY_SYMBOLS
+
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+LANGUAGE 9, 1
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO     VERSIONINFO
+  FILEVERSION       LZ4_VERSION_MAJOR,LZ4_VERSION_MINOR,LZ4_VERSION_RELEASE,0
+  PRODUCTVERSION    LZ4_VERSION_MAJOR,LZ4_VERSION_MINOR,LZ4_VERSION_RELEASE,0
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+#ifdef _DEBUG
+ FILEFLAGS VS_FF_DEBUG
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS VOS_NT_WINDOWS32
+ FILETYPE VFT_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904B0"
+        BEGIN
+            VALUE "CompanyName", "Yann Collet"
+            VALUE "FileDescription", "Extremely fast compression"
+            VALUE "FileVersion", LZ4_VERSION_STRING
+            VALUE "InternalName", "lz4.dll"
+            VALUE "LegalCopyright", "Copyright (C) 2013-2016, Yann Collet"
+            VALUE "OriginalFilename", "lz4.dll"
+            VALUE "ProductName", "LZ4"
+            VALUE "ProductVersion", LZ4_VERSION_STRING
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x0409, 1200
+    END
+END
+
+#endif

diff --git a/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj b/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj
new file mode 100644
index 0000000..8e7ee3b
--- /dev/null
+++ b/visual/VS2017/liblz4-dll/liblz4-dll.vcxproj

@@ -0,0 +1,183 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9800039D-4AAA-43A4-BB78-FEF6F4836927}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>liblz4-dll</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+    <ProjectName>liblz4-dll</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ResourceCompile Include="liblz4-dll.rc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/liblz4/liblz4.vcxproj b/visual/VS2017/liblz4/liblz4.vcxproj
new file mode 100644
index 0000000..948f7db
--- /dev/null
+++ b/visual/VS2017/liblz4/liblz4.vcxproj

@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>liblz4</RootNamespace>
+    <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization Condition="'$(EnableWholeProgramOptimization)'=='true'">true</WholeProgramOptimization>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <TargetName>liblz4_static</TargetName>
+    <IncludePath>$(IncludePath);$(UniversalCRT_IncludePath);$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
+    <RunCodeAnalysis>true</RunCodeAnalysis>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>true</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>false</EnablePREfast>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;LZ4_DLL_EXPORT=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <TreatWarningAsError>false</TreatWarningAsError>
+      <EnablePREfast>true</EnablePREfast>
+      <AdditionalOptions>/analyze:stacksize295252 %(AdditionalOptions)</AdditionalOptions>
+      <RuntimeLibrary Condition="'$(UseStaticCRT)'=='true'">MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\lib\lz4.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame.h" />
+    <ClInclude Include="..\..\..\lib\lz4frame_static.h" />
+    <ClInclude Include="..\..\..\lib\lz4hc.h" />
+    <ClInclude Include="..\..\..\lib\xxhash.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\lib\lz4.c" />
+    <ClCompile Include="..\..\..\lib\lz4frame.c" />
+    <ClCompile Include="..\..\..\lib\lz4hc.c" />
+    <ClCompile Include="..\..\..\lib\xxhash.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file

diff --git a/visual/VS2017/lz4.sln b/visual/VS2017/lz4.sln
new file mode 100644
index 0000000..78f223b
--- /dev/null
+++ b/visual/VS2017/lz4.sln

@@ -0,0 +1,98 @@
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Express 2012 for Windows Desktop
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lz4", "lz4\lz4.vcxproj", "{E30329AC-0057-4FE0-8FDA-7F650D398C4C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblz4-dll", "liblz4-dll\liblz4-dll.vcxproj", "{9800039D-4AAA-43A4-BB78-FEF6F4836927}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "liblz4", "liblz4\liblz4.vcxproj", "{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fuzzer", "fuzzer\fuzzer.vcxproj", "{18B9F1A7-9C66-4352-898B-30804DADE0FD}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\fullbench.vcxproj", "{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "frametest", "frametest\frametest.vcxproj", "{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "datagen", "datagen\datagen.vcxproj", "{D745AE2F-596A-403A-9B91-81A8C6779243}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench-dll", "fullbench-dll\fullbench-dll.vcxproj", "{13992FD2-077E-4954-B065-A428198201A9}"
+	ProjectSection(ProjectDependencies) = postProject
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927} = {9800039D-4AAA-43A4-BB78-FEF6F4836927}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|Win32.ActiveCfg = Debug|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|Win32.Build.0 = Debug|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|x64.ActiveCfg = Debug|x64
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Debug|x64.Build.0 = Debug|x64
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|Win32.ActiveCfg = Release|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|Win32.Build.0 = Release|Win32
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|x64.ActiveCfg = Release|x64
+		{E30329AC-0057-4FE0-8FDA-7F650D398C4C}.Release|x64.Build.0 = Release|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|Win32.ActiveCfg = Debug|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|Win32.Build.0 = Debug|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|x64.ActiveCfg = Debug|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Debug|x64.Build.0 = Debug|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|Win32.ActiveCfg = Release|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|Win32.Build.0 = Release|Win32
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|x64.ActiveCfg = Release|x64
+		{9800039D-4AAA-43A4-BB78-FEF6F4836927}.Release|x64.Build.0 = Release|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|Win32.ActiveCfg = Debug|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|Win32.Build.0 = Debug|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|x64.ActiveCfg = Debug|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Debug|x64.Build.0 = Debug|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|Win32.ActiveCfg = Release|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|Win32.Build.0 = Release|Win32
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|x64.ActiveCfg = Release|x64
+		{9092C5CC-3E71-41B3-BF68-4A7BDD8A5476}.Release|x64.Build.0 = Release|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|Win32.ActiveCfg = Debug|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|Win32.Build.0 = Debug|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|x64.ActiveCfg = Debug|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Debug|x64.Build.0 = Debug|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|Win32.ActiveCfg = Release|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|Win32.Build.0 = Release|Win32
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|x64.ActiveCfg = Release|x64
+		{18B9F1A7-9C66-4352-898B-30804DADE0FD}.Release|x64.Build.0 = Release|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|Win32.ActiveCfg = Debug|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|Win32.Build.0 = Debug|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|x64.ActiveCfg = Debug|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Debug|x64.Build.0 = Debug|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|Win32.ActiveCfg = Release|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|Win32.Build.0 = Release|Win32
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|x64.ActiveCfg = Release|x64
+		{6A4DF4EF-C77F-43C6-8901-DDCD20879E4E}.Release|x64.Build.0 = Release|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|Win32.ActiveCfg = Debug|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|Win32.Build.0 = Debug|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|x64.ActiveCfg = Debug|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Debug|x64.Build.0 = Debug|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|Win32.ActiveCfg = Release|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|Win32.Build.0 = Release|Win32
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|x64.ActiveCfg = Release|x64
+		{39AD6ECC-8BAD-4368-95E4-A1AA2F077BB7}.Release|x64.Build.0 = Release|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|Win32.ActiveCfg = Debug|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|Win32.Build.0 = Debug|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|x64.ActiveCfg = Debug|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Debug|x64.Build.0 = Debug|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|Win32.ActiveCfg = Release|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|Win32.Build.0 = Release|Win32
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|x64.ActiveCfg = Release|x64
+		{D745AE2F-596A-403A-9B91-81A8C6779243}.Release|x64.Build.0 = Release|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.ActiveCfg = Debug|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.Build.0 = Debug|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.ActiveCfg = Debug|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.Build.0 = Debug|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.ActiveCfg = Release|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64
+		{13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
commit	b3692db46d2b23a7c0af2d5e69988c94f126e10a	[log] [tgz]
author	Yann Collet <Cyan4973@users.noreply.github.com>	Mon May 07 14:38:45 2018 -0700
committer	GitHub <noreply@github.com>	Mon May 07 14:38:45 2018 -0700
tree	d9aa14d72978e78a1ca2f1aee99511ce2d04293c
parent	dfed9fa1d77f0434306d377c4da1f7191d3ba08a [diff]
parent	bf6fd938e522150e2a30bee978102769a51f4b3e [diff]