v1.4.1: Merge pull request #1691 from facebook/dev

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 42e4042..ca153d8 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,33 +1,14 @@
 version: 2
 
-references:
-  # Install the dependencies required for tests.
-  # Add the step "- *install-dependencies" to the beginning of your job to run
-  # this command.
-  install-dependencies: &install-dependencies
-    run:
-      name: Install dependencies
-      # TODO: We can split these dependencies up by job to reduce installation
-      # time.
-      command: |
-        sudo dpkg --add-architecture i386
-        sudo apt-get -y -qq update
-        sudo apt-get -y install \
-            gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \
-            libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
-            libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \
-            libcurl4-openssl-dev
-
 jobs:
   # the first half of the jobs are in this test
   short-tests-0:
     # TODO: Create a small custom docker image with all the dependencies we need
     #       preinstalled to reduce installation time.
     docker:
-      - image: circleci/buildpack-deps:bionic
+      - image: fbopensource/zstd-circleci-primary:0.0.1
     steps:
       - checkout
-      - *install-dependencies
       - run:
           name: Test
           command: |
@@ -41,10 +22,9 @@
   # the second half of the jobs are in this test
   short-tests-1:
     docker:
-      - image: circleci/buildpack-deps:bionic
+      - image: fbopensource/zstd-circleci-primary:0.0.1
     steps:
       - checkout
-      - *install-dependencies
       - run:
           name: Test
           command: |
@@ -61,12 +41,11 @@
   # tagged release.
   publish-github-release:
     docker:
-      - image: circleci/buildpack-deps:bionic
+      - image: fbopensource/zstd-circleci-primary:0.0.1
     environment:
       CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
     steps:
       - checkout
-      - *install-dependencies
       - run:
           name: Publish
           command: |
@@ -86,12 +65,11 @@
   # This step should only be run in a cron job
   regression-test:
     docker:
-      - image: circleci/buildpack-deps:bionic
+      - image: fbopensource/zstd-circleci-primary:0.0.1
     environment:
       CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
     steps:
       - checkout
-      - *install-dependencies
       # Restore the cached resources.
       - restore_cache:
           # We try our best to bust the cache when the data changes by hashing
diff --git a/.circleci/images/primary/Dockerfile b/.circleci/images/primary/Dockerfile
new file mode 100644
index 0000000..dd80041
--- /dev/null
+++ b/.circleci/images/primary/Dockerfile
@@ -0,0 +1,9 @@
+FROM circleci/buildpack-deps:bionic
+
+RUN sudo dpkg --add-architecture i386
+RUN sudo apt-get -y -qq update
+RUN sudo apt-get -y install \
+    gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \
+    libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
+    libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \
+    libcurl4-openssl-dev
diff --git a/.gitignore b/.gitignore
index d28a512..4c29705 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,7 @@
 *.dylib
 
 # Executables
-zstd
+/zstd
 zstdmt
 *.exe
 *.out
@@ -22,7 +22,7 @@
 
 # Test artefacts
 tmp*
-dictionary*
+dictionary.
 NUL
 
 # Build artefacts
@@ -41,3 +41,4 @@
 .DS_Store
 googletest/
 *.d
+*.vscode
diff --git a/.travis.yml b/.travis.yml
index ad2b429..a2c1ae1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -153,6 +153,12 @@
         - make clean
         - make travis-install    # just ensures `make install` works
 
+    - name: c99 compatibility
+      if: branch = master
+      script:
+        - make c99build
+        - make -C tests test-zstd
+
     - name: PPC64
       if: branch = master
       script:
@@ -184,20 +190,30 @@
       compiler: clang
       install:
         - sudo apt-get install -qq liblz4-dev valgrind tree
-        - travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip'
-            && unzip ~/ninja.zip -d ~/.local/bin
-        - travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/get-pip.py'
-            && python3 ~/get-pip.py --user
-            && pip3 install --user meson
+        - |
+          travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.9.0/ninja-linux.zip' &&
+          unzip ~/ninja.zip -d ~/.local/bin
+        - |
+          travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/get-pip.py' &&
+          python3 ~/get-pip.py --user &&
+          pip3 install --user meson
       script:
-        - meson --buildtype=debug
-                -Db_lundef=false
-                -Dauto_features=enabled
-                -Dbuild_{programs,tests,contrib}=true
-                -Ddefault_library=both
-                build/meson builddir
-        - cd builddir
+        - |
+          meson setup \
+            --buildtype=debugoptimized \
+            -Db_lundef=false \
+            -Dauto_features=enabled \
+            -Dbin_programs=true \
+            -Dbin_tests=true \
+            -Dbin_contrib=true \
+            -Ddefault_library=both \
+             build/meson builddir
+        - pushd builddir
+        - ninja
+        - meson test --verbose --no-rebuild
         - DESTDIR=./staging ninja install
         - tree ./staging
+      after_failure:
+        - cat "$TRAVIS_BUILD_DIR"/builddir/meson-logs/testlog.txt
   allow_failures:
     - env: ALLOW_FAILURES=true
diff --git a/CHANGELOG b/CHANGELOG
index bf04a8a..a3a1f61 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,33 @@
+v1.4.1
+bug: Fix data corruption in niche use cases by @terrelln (#1659)
+bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595)
+bug: Fix out of bounds read by @terrelln (#1590)
+perf: Improve decode speed by ~7% @mgrice (#1668)
+perf: Slightly improved compression ratio of level 3 and 4 (ZSTD_dfast) by @cyan4973 (#1681)
+perf: Slightly faster compression speed when re-using a context by @cyan4973 (#1658)
+perf: Improve compression ratio for small windowLog by @cyan4973 (#1624)
+perf: Faster compression speed in high compression mode for repetitive data by @terrelln (#1635)
+api: Add parameter to generate smaller dictionaries by @tyler-tran (#1656)
+cli: Recognize symlinks when built in C99 mode by @felixhandte (#1640)
+cli: Expose cpu load indicator for each file on -vv mode by @ephiepark (#1631)
+cli: Restrict read permissions on destination files by @chungy (#1644)
+cli: zstdgrep: handle -f flag by @felixhandte (#1618)
+cli: zstdcat: follow symlinks by @vejnar (#1604)
+doc: Remove extra size limit on compressed blocks by @felixhandte (#1689)
+doc: Fix typo by @yk-tanigawa (#1633)
+doc: Improve documentation on streaming buffer sizes by @cyan4973 (#1629)
+build: CMake: support building with LZ4 @leeyoung624 (#1626)
+build: CMake: install zstdless and zstdgrep by @leeyoung624 (#1647)
+build: CMake: respect existing uninstall target by @j301scott (#1619)
+build: Make: skip multithread tests when built without support by @michaelforney (#1620)
+build: Make: Fix examples/ test target by @sjnam (#1603)
+build: Meson: rename options out of deprecated namespace by @lzutao (#1665)
+build: Meson: fix build by @lzutao (#1602)
+build: Visual Studio: don't export symbols in static lib by @scharan (#1650)
+build: Visual Studio: fix linking by @absotively (#1639)
+build: Fix MinGW-W64 build by @myzhang1029 (#1600)
+misc: Expand decodecorpus coverage by @ephiepark (#1664)
+
 v1.4.0
 perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
 api: Move the advanced API, including all functions in the staging section, to the stable section
diff --git a/build/.gitignore b/build/.gitignore
index 1ceb70e..5a18b30 100644
--- a/build/.gitignore
+++ b/build/.gitignore
@@ -29,3 +29,5 @@
 CTestTestfile.cmake
 build
 lib
+!cmake/lib
+!meson/lib
diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
index 2c4f126..bcde584 100644
--- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
@@ -209,7 +209,6 @@
       <EnablePREfast>false</EnablePREfast>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <AssemblerOutput>All</AssemblerOutput>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -234,7 +233,6 @@
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
       <WholeProgramOptimization>true</WholeProgramOptimization>
       <OmitFramePointers>true</OmitFramePointers>
-      <AssemblerOutput>All</AssemblerOutput>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj
index 27a4889..6b985ba 100644
--- a/build/VS2010/libzstd/libzstd.vcxproj
+++ b/build/VS2010/libzstd/libzstd.vcxproj
@@ -90,31 +90,26 @@
     <ProjectGuid>{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}</ProjectGuid>
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>libzstd</RootNamespace>
+    <TargetName>libzstd_static</TargetName>
+    <CharacterSet>MultiByte</CharacterSet>
+    <ConfigurationType>StaticLibrary</ConfigurationType>
     <OutDir>$(SolutionDir)bin\$(Platform)_$(Configuration)\</OutDir>
     <IntDir>$(SolutionDir)bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\</IntDir>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
-    <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
@@ -134,35 +129,36 @@
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
-    <TargetName>libzstd_static</TargetName>
     <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
-    <TargetName>libzstd_static</TargetName>
     <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
-    <TargetName>libzstd_static</TargetName>
     <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
-    <TargetName>libzstd_static</TargetName>
     <IncludePath>$(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath);</IncludePath>
     <RunCodeAnalysis>false</RunCodeAnalysis>
   </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <ProgramDataBaseFileName>$(OutDir)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level4</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -182,7 +178,7 @@
       </PrecompiledHeader>
       <WarningLevel>Level4</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>true</TreatWarningAsError>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -202,11 +198,10 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <EnablePREfast>false</EnablePREfast>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-      <AssemblerOutput>All</AssemblerOutput>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
@@ -224,14 +219,13 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <TreatWarningAsError>false</TreatWarningAsError>
       <EnablePREfast>false</EnablePREfast>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
       <WholeProgramOptimization>true</WholeProgramOptimization>
       <OmitFramePointers>true</OmitFramePointers>
-      <AssemblerOutput>All</AssemblerOutput>
     </ClCompile>
     <Link>
       <SubSystem>Console</SubSystem>
diff --git a/build/cmake/CMakeModules/FindLibLZ4.cmake b/build/cmake/CMakeModules/FindLibLZ4.cmake
new file mode 100644
index 0000000..d0fac06
--- /dev/null
+++ b/build/cmake/CMakeModules/FindLibLZ4.cmake
@@ -0,0 +1,49 @@
+# Find LibLZ4
+#
+# Find LibLZ4 headers and library
+#
+#   Result Variables
+#
+#   LIBLZ4_FOUND             - True if lz4 is found
+#   LIBLZ4_INCLUDE_DIRS      - lz4 headers directories
+#   LIBLZ4_LIBRARIES         - lz4 libraries
+#   LIBLZ4_VERSION_MAJOR     - The major version of lz4
+#   LIBLZ4_VERSION_MINOR     - The minor version of lz4
+#   LIBLZ4_VERSION_RELEASE   - The release version of lz4
+#   LIBLZ4_VERSION_STRING    - version number string (e.g. 1.8.3)
+#
+#   Hints
+#
+#   Set ``LZ4_ROOT_DIR`` to the directory of lz4.h and lz4 library
+
+set(_LIBLZ4_ROOT_HINTS
+    ENV LZ4_ROOT_DIR)
+
+find_path(  LIBLZ4_INCLUDE_DIR lz4.h
+            HINTS ${_LIBLZ4_ROOT_HINTS})
+find_library(   LIBLZ4_LIBRARY NAMES lz4 liblz4 liblz4_static
+                HINTS ${_LIBLZ4_ROOT_HINTS})
+
+if(LIBLZ4_INCLUDE_DIR)
+    file(STRINGS "${LIBLZ4_INCLUDE_DIR}/lz4.h" LIBLZ4_HEADER_CONTENT REGEX "#define LZ4_VERSION_[A-Z]+ +[0-9]+")
+
+    string(REGEX REPLACE ".*#define LZ4_VERSION_MAJOR +([0-9]+).*" "\\1" LIBLZ4_VERSION_MAJOR "${LIBLZ4_HEADER_CONTENT}")
+    string(REGEX REPLACE ".*#define LZ4_VERSION_MINOR +([0-9]+).*" "\\1" LIBLZ4_VERSION_MINOR "${LIBLZ4_HEADER_CONTENT}")
+    string(REGEX REPLACE ".*#define LZ4_VERSION_RELEASE +([0-9]+).*" "\\1" LIBLZ4_VERSION_RELEASE "${LIBLZ4_HEADER_CONTENT}")
+
+    set(LIBLZ4_VERSION_STRING "${LIBLZ4_VERSION_MAJOR}.${LIBLZ4_VERSION_MINOR}.${LIBLZ4_VERSION_RELEASE}")
+    unset(LIBLZ4_HEADER_CONTENT)
+endif()
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibLZ4    REQUIRED_VARS   LIBLZ4_INCLUDE_DIR
+                                                            LIBLZ4_LIBRARY
+                                            VERSION_VAR     LIBLZ4_VERSION_STRING
+                                            FAIL_MESSAGE    "Could NOT find LZ4, try to set the paths to lz4.h and lz4 library in environment variable LZ4_ROOT_DIR")
+
+if (LIBLZ4_FOUND)
+    set(LIBLZ4_LIBRARIES ${LIBLZ4_LIBRARY})
+    set(LIBLZ4_INCLUDE_DIRS ${LIBLZ4_INCLUDE_DIR})
+endif ()
+
+mark_as_advanced( LIBLZ4_INCLUDE_DIR LIBLZ4_LIBRARY )
diff --git a/build/cmake/README.md b/build/cmake/README.md
index 681b14c..854389a 100644
--- a/build/cmake/README.md
+++ b/build/cmake/README.md
@@ -5,6 +5,45 @@
 conventions of using lower-case for commands, and upper-case for
 variables.
 
+# How to build
+
+As cmake doesn't support command like `cmake clean`, it's recommanded to perform a "out of source build".
+To do this, you can create a new directory and build in it:
+```sh
+cd build/cmake
+mkdir builddir
+cd builddir
+cmake ..
+make
+```
+Then you can clean all cmake caches by simpily delete the new directory:
+```sh
+rm -rf build/cmake/builddir
+```
+
+And of course, you can directly build in build/cmake:
+```sh
+cd build/cmake
+cmake
+make
+```
+
+To show cmake build options, you can:
+```sh
+cd build/cmake/builddir
+cmake -LH ..
+```
+
+Bool options can be set to ON/OFF with -D\[option\]=\[ON/OFF\]. You can configure cmake options like this:
+```sh
+cd build/cmake/builddir
+cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=ON ..
+make
+```
+
+## referring
+[Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output)
+
 # CMake Style Recommendations
 
 ## Indent all code correctly, i.e. the body of
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 508bee3..e415c15 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -194,10 +194,12 @@
 endif ()
 
 # uninstall target
-configure_file(
-        "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
-        "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
-        IMMEDIATE @ONLY)
+if (NOT TARGET uninstall)
+    configure_file(
+            "${CMAKE_CURRENT_SOURCE_DIR}/cmake_uninstall.cmake.in"
+            "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
+            IMMEDIATE @ONLY)
 
-add_custom_target(uninstall
-        COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
+    add_custom_target(uninstall
+            COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
+endif ()
diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt
index f6f7a36..50408bd 100644
--- a/build/cmake/programs/CMakeLists.txt
+++ b/build/cmake/programs/CMakeLists.txt
@@ -38,6 +38,8 @@
     add_custom_target(unzstd ALL ${CMAKE_COMMAND} -E create_symlink zstd unzstd DEPENDS zstd COMMENT "Creating unzstd symlink")
     install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdcat DESTINATION "bin")
     install(FILES ${CMAKE_CURRENT_BINARY_DIR}/unzstd DESTINATION "bin")
+    install(PROGRAMS ${PROGRAMS_DIR}/zstdgrep DESTINATION "bin")
+    install(PROGRAMS ${PROGRAMS_DIR}/zstdless DESTINATION "bin")
 
     add_custom_target(zstd.1 ALL
         ${CMAKE_COMMAND} -E copy ${PROGRAMS_DIR}/zstd.1 .
@@ -83,7 +85,9 @@
 
 option(ZSTD_ZLIB_SUPPORT "ZLIB SUPPORT" OFF)
 option(ZSTD_LZMA_SUPPORT "LZMA SUPPORT" OFF)
+option(ZSTD_LZ4_SUPPORT "LZ4 SUPPORT" OFF)
 
+# Add gzip support
 if (ZSTD_ZLIB_SUPPORT)
     find_package(ZLIB REQUIRED)
 
@@ -96,6 +100,7 @@
     endif ()
 endif ()
 
+# Add lzma support
 if (ZSTD_LZMA_SUPPORT)
     find_package(LibLZMA REQUIRED)
 
@@ -107,3 +112,16 @@
         message(SEND_ERROR "lzma library is missing")
     endif ()
 endif ()
+
+# Add lz4 support
+if (ZSTD_LZ4_SUPPORT)
+    find_package(LibLZ4 REQUIRED)
+
+    if (LIBLZ4_FOUND)
+        include_directories(${LIBLZ4_INCLUDE_DIRS})
+        target_link_libraries(zstd ${LIBLZ4_LIBRARIES})
+        set_property(TARGET zstd APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_LZ4COMPRESS;ZSTD_LZ4DECOMPRESS")
+    else ()
+        message(SEND_ERROR "lz4 library is missing")
+    endif ()
+endif ()
diff --git a/build/meson/README.md b/build/meson/README.md
index d79ed49..d393a06 100644
--- a/build/meson/README.md
+++ b/build/meson/README.md
@@ -17,7 +17,7 @@
 `cd` to this meson directory (`build/meson`)
 
 ```sh
-meson --buildtype=release -Dbuild_{programs,contrib}=true builddir
+meson setup -Dbin_programs=true -Dbin_contrib=true builddir
 cd builddir
 ninja             # to build
 ninja install     # to install
diff --git a/build/meson/contrib/gen_html/meson.build b/build/meson/contrib/gen_html/meson.build
index cabff20..3f30253 100644
--- a/build/meson/contrib/gen_html/meson.build
+++ b/build/meson/contrib/gen_html/meson.build
@@ -17,6 +17,7 @@
 gen_html = executable('gen_html',
   join_paths(zstd_rootdir, 'contrib/gen_html/gen_html.cpp'),
   include_directories: gen_html_includes,
+  native: true,
   install: false)
 
 # Update zstd manual
diff --git a/build/meson/meson.build b/build/meson/meson.build
index 6543158..121811e 100644
--- a/build/meson/meson.build
+++ b/build/meson/meson.build
@@ -12,11 +12,11 @@
   ['c', 'cpp'],
   license: ['BSD', 'GPLv2'],
   default_options : [
-    'c_std=c99',
+    'c_std=gnu99',
     'cpp_std=c++11',
     'buildtype=release'
   ],
-  version: '1.3.8',
+  version: 'DUMMY',
   meson_version: '>=0.47.0')
 
 cc = meson.get_compiler('c')
@@ -43,13 +43,10 @@
 GetZstdLibraryVersion_py = files('GetZstdLibraryVersion.py')
 r = run_command(python3, GetZstdLibraryVersion_py, zstd_h_file)
 if r.returncode() == 0
-  output = r.stdout().strip()
-  if output.version_compare('>@0@'.format(zstd_version))
-    zstd_version = output
-    message('Project version is now: @0@'.format(zstd_version))
-  endif
+  zstd_version = r.stdout().strip()
+  message('Project version is now: @0@'.format(zstd_version))
 else
-  message('Cannot find project version in @0@'.format(zstd_h_file))
+  error('Cannot find project version in @0@'.format(zstd_h_file))
 endif
 
 zstd_libversion = zstd_version
@@ -78,9 +75,9 @@
 use_backtrace = get_option('backtrace')
 use_static_runtime = get_option('static_runtime')
 
-build_programs = get_option('build_programs')
-build_contrib = get_option('build_contrib')
-build_tests = get_option('build_tests')
+bin_programs = get_option('bin_programs')
+bin_contrib = get_option('bin_contrib')
+bin_tests = get_option('bin_tests')
 
 feature_multi_thread = get_option('multi_thread')
 feature_zlib = get_option('zlib')
@@ -91,7 +88,7 @@
 # Dependencies
 # =============================================================================
 
-libm_dep = cc.find_library('m', required: build_tests)
+libm_dep = cc.find_library('m', required: bin_tests)
 thread_dep = dependency('threads', required: feature_multi_thread)
 use_multi_thread = thread_dep.found()
 # Arguments in dependency should be equivalent to those passed to pkg-config
@@ -136,14 +133,14 @@
 
 subdir('lib')
 
-if build_programs
+if bin_programs
   subdir('programs')
 endif
 
-if build_tests
+if bin_tests
   subdir('tests')
 endif
 
-if build_contrib
+if bin_contrib
   subdir('contrib')
 endif
diff --git a/build/meson/meson_options.txt b/build/meson/meson_options.txt
index 349d915..90a81c5 100644
--- a/build/meson/meson_options.txt
+++ b/build/meson/meson_options.txt
@@ -19,11 +19,11 @@
 option('static_runtime', type: 'boolean', value: false,
   description: 'Link to static run-time libraries on MSVC')
 
-option('build_programs', type: 'boolean', value: true,
+option('bin_programs', type: 'boolean', value: true,
   description: 'Enable programs build')
-option('build_tests', type: 'boolean', value: false,
+option('bin_tests', type: 'boolean', value: false,
   description: 'Enable tests build')
-option('build_contrib', type: 'boolean', value: false,
+option('bin_contrib', type: 'boolean', value: false,
   description: 'Enable contrib build')
 
 option('multi_thread', type: 'feature', value: 'enabled',
diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build
index fd813ea..64eba60 100644
--- a/build/meson/tests/meson.build
+++ b/build/meson/tests/meson.build
@@ -112,13 +112,15 @@
   dependencies: [ libzstd_dep ],
   install: false)
 
-legacy_sources = [join_paths(zstd_rootdir, 'tests/legacy.c')]
-legacy = executable('legacy',
-  legacy_sources,
-  # Use -Dlegacy_level build option to control it
-  #c_args: '-DZSTD_LEGACY_SUPPORT=4',
-  dependencies: [ libzstd_dep ],
-  install: false)
+if 0 < legacy_level and legacy_level <= 4
+  legacy_sources = [join_paths(zstd_rootdir, 'tests/legacy.c')]
+  legacy = executable('legacy',
+    legacy_sources,
+    # Use -Dlegacy_level build option to control it
+    #c_args: '-DZSTD_LEGACY_SUPPORT=4',
+    dependencies: [ libzstd_dep ],
+    install: false)
+endif
 
 decodecorpus_sources = [join_paths(zstd_rootdir, 'programs/util.c'),
   join_paths(zstd_rootdir, 'programs/timefn.c'),
@@ -177,7 +179,8 @@
     args: ZSTDRTTEST,
     env: ['ZSTD=' + zstd.full_path()],
     depends: [datagen],
-    timeout: 600) # Timeout should work on HDD drive
+    workdir: meson.current_build_dir(),
+    timeout: 2800) # Timeout should work on HDD drive
 endif
 
 test('test-fullbench-1',
@@ -195,7 +198,7 @@
   test('test-fuzzer',
     fuzzer,
     args: ['-v', FUZZERTEST] + FUZZER_FLAGS,
-    timeout: 240)
+    timeout: 480)
 endif
 
 test('test-zbuff',
@@ -205,7 +208,7 @@
 test('test-zstream-1',
   zstreamtest,
   args: ['-v', ZSTREAM_TESTTIME] + FUZZER_FLAGS,
-  timeout: 120)
+  timeout: 240)
 test('test-zstream-2',
   zstreamtest,
   args: ['-mt', '-t1', ZSTREAM_TESTTIME] + FUZZER_FLAGS,
@@ -217,7 +220,9 @@
 test('test-longmatch', longmatch, timeout: 36)
 test('test-invalidDictionaries', invalidDictionaries) # should be fast
 test('test-symbols', symbols) # should be fast
-test('test-legacy', legacy) # should be fast
+if 0 < legacy_level and legacy_level <= 4
+  test('test-legacy', legacy) # should be fast
+endif
 test('test-decodecorpus',
   decodecorpus,
   args: ['-t', DECODECORPUS_TESTTIME],
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index ed758cf..7d02426 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,7 @@
 
 ### Version
 
-0.3.1 (25/10/18)
+0.3.2 (17/07/19)
 
 
 Introduction
@@ -390,9 +390,7 @@
 -  Window_Size
 -  128 KB
 
-A `Compressed_Block` has the extra restriction that `Block_Size` is always
-strictly less than the decompressed size.
-If this condition cannot be respected,
+If this condition cannot be respected when generating a `Compressed_Block`,
 the block must be sent uncompressed instead (`Raw_Block`).
 
 
@@ -1655,6 +1653,7 @@
 
 Version changes
 ---------------
+- 0.3.2 : remove additional block size restriction on compressed blocks
 - 0.3.1 : minor clarification regarding offset history update rules
 - 0.3.0 : minor edits to match RFC8478
 - 0.2.9 : clarifications for huffman weights direct representation, by Ulrich Kunitz
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index f1628b5..f281120 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,46 +1,36 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.0 Manual</title>
+<title>zstd 1.4.1 Manual</title>
 </head>
 <body>
-<h1>zstd 1.4.0 Manual</h1>
+<h1>zstd 1.4.1 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
 <li><a href="#Chapter1">Introduction</a></li>
 <li><a href="#Chapter2">Version</a></li>
-<li><a href="#Chapter3">Default constant</a></li>
-<li><a href="#Chapter4">Constants</a></li>
-<li><a href="#Chapter5">Simple API</a></li>
-<li><a href="#Chapter6">Explicit context</a></li>
-<li><a href="#Chapter7">Advanced compression API</a></li>
-<li><a href="#Chapter8">Advanced decompression API</a></li>
-<li><a href="#Chapter9">Streaming</a></li>
-<li><a href="#Chapter10">Streaming compression - HowTo</a></li>
-<li><a href="#Chapter11">This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</a></li>
-<li><a href="#Chapter12">Equivalent to:</a></li>
-<li><a href="#Chapter13">Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</a></li>
-<li><a href="#Chapter14">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</a></li>
-<li><a href="#Chapter15">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</a></li>
-<li><a href="#Chapter16">Streaming decompression - HowTo</a></li>
-<li><a href="#Chapter17">Simple dictionary API</a></li>
-<li><a href="#Chapter18">Bulk processing dictionary API</a></li>
-<li><a href="#Chapter19">Dictionary helper functions</a></li>
-<li><a href="#Chapter20">Advanced dictionary and prefix API</a></li>
-<li><a href="#Chapter21">ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
-<li><a href="#Chapter22">experimental API (static linking only)</a></li>
-<li><a href="#Chapter23">Frame size functions</a></li>
-<li><a href="#Chapter24">ZSTD_decompressBound() :</a></li>
-<li><a href="#Chapter25">Memory management</a></li>
-<li><a href="#Chapter26">Advanced compression functions</a></li>
-<li><a href="#Chapter27">Advanced decompression functions</a></li>
-<li><a href="#Chapter28">Advanced streaming functions</a></li>
-<li><a href="#Chapter29">Buffer-less and synchronous inner streaming functions</a></li>
-<li><a href="#Chapter30">Buffer-less streaming compression (synchronous mode)</a></li>
-<li><a href="#Chapter31">Buffer-less streaming decompression (synchronous mode)</a></li>
-<li><a href="#Chapter32">ZSTD_getFrameHeader() :</a></li>
-<li><a href="#Chapter33">Block level API</a></li>
+<li><a href="#Chapter3">Simple API</a></li>
+<li><a href="#Chapter4">Explicit context</a></li>
+<li><a href="#Chapter5">Advanced compression API</a></li>
+<li><a href="#Chapter6">Advanced decompression API</a></li>
+<li><a href="#Chapter7">Streaming</a></li>
+<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
+<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
+<li><a href="#Chapter10">Simple dictionary API</a></li>
+<li><a href="#Chapter11">Bulk processing dictionary API</a></li>
+<li><a href="#Chapter12">Dictionary helper functions</a></li>
+<li><a href="#Chapter13">Advanced dictionary and prefix API</a></li>
+<li><a href="#Chapter14">experimental API (static linking only)</a></li>
+<li><a href="#Chapter15">Frame size functions</a></li>
+<li><a href="#Chapter16">Memory management</a></li>
+<li><a href="#Chapter17">Advanced compression functions</a></li>
+<li><a href="#Chapter18">Advanced decompression functions</a></li>
+<li><a href="#Chapter19">Advanced streaming functions</a></li>
+<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter23">Block level API</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -78,11 +68,7 @@
 
 <pre><b>unsigned ZSTD_versionNumber(void);   </b>/**< to check runtime library version */<b>
 </b></pre><BR>
-<a name="Chapter3"></a><h2>Default constant</h2><pre></pre>
-
-<a name="Chapter4"></a><h2>Constants</h2><pre></pre>
-
-<a name="Chapter5"></a><h2>Simple API</h2><pre></pre>
+<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
 
 <pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
                 const void* src, size_t srcSize,
@@ -152,12 +138,17 @@
 int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed */<b>
 int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
 </pre></b><BR>
-<a name="Chapter6"></a><h2>Explicit context</h2><pre></pre>
+<a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>
 
 <h3>Compression context</h3><pre>  When compressing many times,
-  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+  it is recommended to allocate a context just once,
+  and re-use it for each successive compression operation.
   This will make workload friendlier for system's memory.
-  Use one context per thread for parallel execution in multi-threaded environments. 
+  Note : re-using context is just a speed / resource optimization.
+         It doesn't change the compression ratio, which remains identical.
+  Note 2 : In multi-threaded environments,
+         use one different context per thread for parallel execution.
+ 
 </pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
 size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@@ -189,7 +180,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter7"></a><h2>Advanced compression API</h2><pre></pre>
+<a name="Chapter5"></a><h2>Advanced compression API</h2><pre></pre>
 
 <pre><b>typedef enum { ZSTD_fast=1,
                ZSTD_dfast=2,
@@ -332,6 +323,7 @@
      * ZSTD_c_forceMaxWindow
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -341,6 +333,7 @@
      ZSTD_c_experimentalParam3=1000,
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
 } ZSTD_cParameter;
 </b></pre><BR>
 <pre><b>typedef struct {
@@ -424,7 +417,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter8"></a><h2>Advanced decompression API</h2><pre></pre>
+<a name="Chapter6"></a><h2>Advanced decompression API</h2><pre></pre>
 
 <pre><b>typedef enum {
 
@@ -472,7 +465,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter9"></a><h2>Streaming</h2><pre></pre>
+<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
 
 <pre><b>typedef struct ZSTD_inBuffer_s {
   const void* src;    </b>/**< start of input buffer */<b>
@@ -486,7 +479,7 @@
   size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
 } ZSTD_outBuffer;
 </b></pre><BR>
-<a name="Chapter10"></a><h2>Streaming compression - HowTo</h2><pre>
+<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
   A ZSTD_CStream object is required to track streaming operation.
   Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
   ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
@@ -592,31 +585,28 @@
 
 <pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
 </b></pre><BR>
-<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
+<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */<b>
 </b></pre><BR>
-<a name="Chapter11"></a><h2>This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</h2><pre> ZSTD_compressStream2(). It is redundent, but is still fully supported.
- Advanced parameters and dictionary compression can only be used through the
- new API.
-<BR></pre>
-
-<a name="Chapter12"></a><h2>Equivalent to:</h2><pre>
+<pre><b>size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+</b>/*!<b>
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */<b>
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</b>/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */<b>
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</b><p>
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
  
-<BR></pre>
+</p></pre><BR>
 
-<a name="Chapter13"></a><h2>Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</h2><pre> NOTE: The return value is different. ZSTD_compressStream() returns a hint for
- the next read size (if non-zero and not an error). ZSTD_compressStream2()
- returns the number of bytes left to flush (if non-zero and not an error).
- 
-<BR></pre>
-
-<a name="Chapter14"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</h2><pre></pre>
-
-<a name="Chapter15"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</h2><pre></pre>
-
-<a name="Chapter16"></a><h2>Streaming decompression - HowTo</h2><pre>
+<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
   A ZSTD_DStream object is required to track streaming operations.
   Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
   ZSTD_DStream objects can be re-used multiple times.
@@ -647,14 +637,12 @@
 <h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
 size_t ZSTD_freeDStream(ZSTD_DStream* zds);
 </pre></b><BR>
-<h3>Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream(ZSTD_DStream* zds);
-size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-</pre></b><BR>
+<h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 <pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
 </b></pre><BR>
 <pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
 </b></pre><BR>
-<a name="Chapter17"></a><h2>Simple dictionary API</h2><pre></pre>
+<a name="Chapter10"></a><h2>Simple dictionary API</h2><pre></pre>
 
 <pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
                                void* dst, size_t dstCapacity,
@@ -680,7 +668,7 @@
   Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
 </p></pre><BR>
 
-<a name="Chapter18"></a><h2>Bulk processing dictionary API</h2><pre></pre>
+<a name="Chapter11"></a><h2>Bulk processing dictionary API</h2><pre></pre>
 
 <pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                              int compressionLevel);
@@ -723,7 +711,7 @@
   Recommended when same dictionary is used multiple times. 
 </p></pre><BR>
 
-<a name="Chapter19"></a><h2>Dictionary helper functions</h2><pre></pre>
+<a name="Chapter12"></a><h2>Dictionary helper functions</h2><pre></pre>
 
 <pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 </b><p>  Provides the dictID stored within dictionary.
@@ -749,7 +737,7 @@
   When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
 </p></pre><BR>
 
-<a name="Chapter20"></a><h2>Advanced dictionary and prefix API</h2><pre>
+<a name="Chapter13"></a><h2>Advanced dictionary and prefix API</h2><pre>
  This API allows dictionaries to be used with ZSTD_compress2(),
  ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
  only reset with the context is reset with ZSTD_reset_parameters or
@@ -867,15 +855,7 @@
   Note that object memory usage can evolve (increase or decrease) over time. 
 </p></pre><BR>
 
-<a name="Chapter21"></a><h2>ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre>
- The definitions in the following section are considered experimental.
- They are provided for advanced scenarios.
- They should never be used with a dynamic library, as prototypes may change in the future.
- Use them only in association with static linking.
- 
-<BR></pre>
-
-<a name="Chapter22"></a><h2>experimental API (static linking only)</h2><pre>
+<a name="Chapter14"></a><h2>experimental API (static linking only)</h2><pre>
  The following symbols and constants
  are not planned to join "stable API" status in the near future.
  They can still change in future versions.
@@ -973,7 +953,7 @@
   ZSTD_lcm_uncompressed = 2,  </b>/**< Always emit uncompressed literals. */<b>
 } ZSTD_literalCompressionMode_e;
 </b></pre><BR>
-<a name="Chapter23"></a><h2>Frame size functions</h2><pre></pre>
+<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
 
 <pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 </b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
@@ -998,7 +978,8 @@
             however it does mean that all frame data must be present and valid. 
 </p></pre><BR>
 
-<a name="Chapter24"></a><h2>ZSTD_decompressBound() :</h2><pre>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+<pre><b>unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+</b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
   `srcSize` must be the _exact_ size of this series
        (i.e. there should be a frame boundary at `src + srcSize`)
   @return : - upper-bound for the decompressed size of all data in all successive frames
@@ -1010,7 +991,7 @@
   note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
               upper-bound = # blocks * min(128 KB, Window_Size)
  
-<BR></pre>
+</p></pre><BR>
 
 <pre><b>size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 </b><p>  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
@@ -1018,7 +999,7 @@
            or an error code (if srcSize is too small) 
 </p></pre><BR>
 
-<a name="Chapter25"></a><h2>Memory management</h2><pre></pre>
+<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
 
 <pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
@@ -1098,7 +1079,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter26"></a><h2>Advanced compression functions</h2><pre></pre>
+<a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre>
 
 <pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 </b><p>  Create a digested dictionary for compression
@@ -1243,7 +1224,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter27"></a><h2>Advanced decompression functions</h2><pre></pre>
+<a name="Chapter18"></a><h2>Advanced decompression functions</h2><pre></pre>
 
 <pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
 </b><p>  Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -1305,7 +1286,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter28"></a><h2>Advanced streaming functions</h2><pre>  Warning : most of these functions are now redundant with the Advanced API.
+<a name="Chapter19"></a><h2>Advanced streaming functions</h2><pre>  Warning : most of these functions are now redundant with the Advanced API.
   Once Advanced API reaches "stable" status,
   redundant functions will be deprecated, and then at some point removed.
 <BR></pre>
@@ -1407,18 +1388,41 @@
  
 </p></pre><BR>
 
-<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); </b>/**< note: no dictionary will be used if dict == NULL or dictSize < 8 */<b>
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
-size_t ZSTD_resetDStream(ZSTD_DStream* zds);  </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
+<h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></b>/**<b>
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+</b>/**<b>
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+</b>/**<b>
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ */
+size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 </pre></b><BR>
-<a name="Chapter29"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
   This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
   But it's also a complex one, with several restrictions, documented below.
   Prefer normal streaming API for an easier experience.
  
 <BR></pre>
 
-<a name="Chapter30"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -1454,7 +1458,7 @@
 size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </pre></b><BR>
-<a name="Chapter31"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
   A ZSTD_DCtx object can be re-used multiple times.
@@ -1536,23 +1540,21 @@
     unsigned checksumFlag;
 } ZSTD_frameHeader;
 </pre></b><BR>
-<a name="Chapter32"></a><h2>ZSTD_getFrameHeader() :</h2><pre>  decode Frame Header, or requires larger `srcSize`.
+<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
+</b>/*! ZSTD_getFrameHeader_advanced() :<b>
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
+</b><p>  decode Frame Header, or requires larger `srcSize`.
  @return : 0, `zfhPtr` is correctly filled,
           >0, `srcSize` is too small, value is wanted `srcSize` amount,
            or an error code, which can be tested using ZSTD_isError() 
-<BR></pre>
-
-<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
-</b></pre><BR>
-<pre><b>size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
-</b><p>  same as ZSTD_getFrameHeader(),
-  with added capability to select a format (like ZSTD_f_zstd1_magicless) 
 </p></pre><BR>
 
 <pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
 </b></pre><BR>
-<a name="Chapter33"></a><h2>Block level API</h2><pre></pre>
+<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>
 
 <pre><b></b><p>    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
diff --git a/examples/Makefile b/examples/Makefile
index cd995f2..65ea8ab 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -77,7 +77,6 @@
 	@echo -- Edge cases detection
 	! ./streaming_decompression tmp    # invalid input, must fail
 	! ./simple_decompression tmp       # invalid input, must fail
-	! ./simple_decompression tmp.zst   # unknown input size, must fail
 	touch tmpNull                      # create 0-size file
 	./simple_compression tmpNull
 	./simple_decompression tmpNull.zst # 0-size frame : must work
diff --git a/lib/Makefile b/lib/Makefile
index 404f5b6..87a396c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -17,6 +17,7 @@
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER := $(shell echo $(LIBVER_SCRIPT))
 VERSION?= $(LIBVER)
+CCVER := $(shell $(CC) --version)
 
 CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
 ifeq ($(OS),Windows_NT)   # MinGW assumed
@@ -45,6 +46,10 @@
 ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
 ZSTD_FILES := $(ZSTDCOMMON_FILES)
 
+ifeq ($(findstring GCC,$(CCVER)),GCC)
+decompress/zstd_decompress_block.o :	CFLAGS+=-fno-tree-vectorize
+endif
+
 ZSTD_LEGACY_SUPPORT ?= 5
 ZSTD_LIB_COMPRESSION ?= 1
 ZSTD_LIB_DECOMPRESSION ?= 1
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index 0836e3e..87bf51a 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -127,6 +127,13 @@
     }                                     \
 }
 
+/* vectorization */
+#if !defined(__clang__) && defined(__GNUC__)
+#  define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#else
+#  define DONT_VECTORIZE
+#endif
+
 /* disable warnings */
 #ifdef _MSC_VER    /* Visual Studio */
 #  include <intrin.h>                    /* For Visual 2005 */
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 31f756a..81b16ea 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -34,7 +34,6 @@
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */
 
-
 #if defined (__cplusplus)
 extern "C" {
 #endif
@@ -193,19 +192,72 @@
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 8
+#define VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst,
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
 
 /*! ZSTD_wildcopy() :
  *  custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
 {
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
     BYTE* const oend = op + length;
-    do
-        COPY8(op, ip)
-    while (op < oend);
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
+    if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
+      do
+          COPY8(op, ip)
+      while (op < oend);
+    }
+    else {
+      if ((length & 8) == 0)
+        COPY8(op, ip);
+      do {
+        COPY16(op, ip);
+      }
+      while (op < oend);
+    }
+}
+
+/*! ZSTD_wildcopy_16min() :
+ *  same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
+MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(length >= 8);
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
+      do
+          COPY8(op, ip)
+      while (op < oend);
+    }
+    else {
+      if ((length & 8) == 0)
+        COPY8(op, ip);
+      do {
+        COPY16(op, ip);
+      }
+      while (op < oend);
+    }
 }
 
 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 2e163c8..1476512 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -385,6 +385,11 @@
         bounds.upperBound = ZSTD_lcm_uncompressed;
         return bounds;
 
+    case ZSTD_c_targetCBlockSize:
+        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+        return bounds;
+
     default:
         {   ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
             return boundError;
@@ -452,6 +457,7 @@
     case ZSTD_c_ldmHashRateLog:
     case ZSTD_c_forceAttachDict:
     case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_targetCBlockSize:
     default:
         return 0;
     }
@@ -497,6 +503,7 @@
     case ZSTD_c_ldmHashLog:
     case ZSTD_c_ldmMinMatch:
     case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_targetCBlockSize:
         break;
 
     default: RETURN_ERROR(parameter_unsupported);
@@ -671,6 +678,12 @@
         CCtxParams->ldmParams.hashRateLog = value;
         return CCtxParams->ldmParams.hashRateLog;
 
+    case ZSTD_c_targetCBlockSize :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        CCtxParams->targetCBlockSize = value;
+        return CCtxParams->targetCBlockSize;
+
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
 }
@@ -692,13 +705,13 @@
         *value = CCtxParams->compressionLevel;
         break;
     case ZSTD_c_windowLog :
-        *value = CCtxParams->cParams.windowLog;
+        *value = (int)CCtxParams->cParams.windowLog;
         break;
     case ZSTD_c_hashLog :
-        *value = CCtxParams->cParams.hashLog;
+        *value = (int)CCtxParams->cParams.hashLog;
         break;
     case ZSTD_c_chainLog :
-        *value = CCtxParams->cParams.chainLog;
+        *value = (int)CCtxParams->cParams.chainLog;
         break;
     case ZSTD_c_searchLog :
         *value = CCtxParams->cParams.searchLog;
@@ -773,6 +786,9 @@
     case ZSTD_c_ldmHashRateLog :
         *value = CCtxParams->ldmParams.hashRateLog;
         break;
+    case ZSTD_c_targetCBlockSize :
+        *value = (int)CCtxParams->targetCBlockSize;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -930,12 +946,12 @@
     @return : 0, or an error code if one value is beyond authorized range */
 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
 {
-    BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog);
-    BOUNDCHECK(ZSTD_c_chainLog,  cParams.chainLog);
-    BOUNDCHECK(ZSTD_c_hashLog,   cParams.hashLog);
-    BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog);
-    BOUNDCHECK(ZSTD_c_minMatch,  cParams.minMatch);
-    BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength);
+    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
+    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
+    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
+    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
     BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
     return 0;
 }
@@ -951,7 +967,7 @@
         if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
         else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
     }
-#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int)
+#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
     CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
@@ -1282,15 +1298,14 @@
 }
 
 /*! ZSTD_invalidateMatchState()
- * Invalidate all the matches in the match finder tables.
- * Requires nextSrc and base to be set (can be NULL).
+ *  Invalidate all the matches in the match finder tables.
+ *  Requires nextSrc and base to be set (can be NULL).
  */
 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
 {
     ZSTD_window_clear(&ms->window);
 
     ms->nextToUpdate = ms->window.dictLimit;
-    ms->nextToUpdate3 = ms->window.dictLimit;
     ms->loadedDictEnd = 0;
     ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
     ms->dictMatchState = NULL;
@@ -1327,15 +1342,17 @@
 
 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
 
+typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
+
 static void*
 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
                       void* ptr,
                 const ZSTD_compressionParameters* cParams,
-                      ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
+                      ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
 {
     size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
     size_t const hSize = ((size_t)1) << cParams->hashLog;
-    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = ((size_t)1) << hashLog3;
     size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
 
@@ -1349,7 +1366,7 @@
     ZSTD_invalidateMatchState(ms);
 
     /* opt parser space */
-    if (forCCtx && (cParams->strategy >= ZSTD_btopt)) {
+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
         DEBUGLOG(4, "reserving optimal parser space");
         ms->opt.litFreq = (unsigned*)ptr;
         ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
@@ -1377,6 +1394,19 @@
     return ptr;
 }
 
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
+
 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128  /* when workspace is continuously too large
                                          * during at least this number of times,
@@ -1388,7 +1418,7 @@
     note : `params` are assumed fully validated at this stage */
 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
                                       ZSTD_CCtx_params params,
-                                      U64 pledgedSrcSize,
+                                      U64 const pledgedSrcSize,
                                       ZSTD_compResetPolicy_e const crp,
                                       ZSTD_buffered_policy_e const zbuff)
 {
@@ -1400,13 +1430,21 @@
         if (ZSTD_equivalentParams(zc->appliedParams, params,
                                   zc->inBuffSize,
                                   zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
-                                  zbuff, pledgedSrcSize)) {
-            DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
-                        zc->appliedParams.cParams.windowLog, zc->blockSize);
+                                  zbuff, pledgedSrcSize) ) {
+            DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
             zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0);   /* if it was too large, it still is */
-            if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION)
+            if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
+                DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
+                            zc->appliedParams.cParams.windowLog, zc->blockSize);
+                if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
+                    /* prefer a reset, faster than a rescale */
+                    ZSTD_reset_matchState(&zc->blockState.matchState,
+                                           zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
+                                          &params.cParams,
+                                           crp, ZSTD_resetTarget_CCtx);
+                }
                 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
-    }   }
+    }   }   }
     DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
 
     if (params.ldmParams.enableLdm) {
@@ -1449,7 +1487,7 @@
             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
 
             if (workSpaceTooSmall || workSpaceWasteful) {
-                DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB",
+                DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
                             zc->workSpaceSize >> 10,
                             neededSpace >> 10);
 
@@ -1491,7 +1529,10 @@
 
         ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
 
-        ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32;
+        ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
+                                     zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
+                                    &params.cParams,
+                                     crp, ZSTD_resetTarget_CCtx);
 
         /* ldm hash table */
         /* initialize bucketOffsets table later for pointer alignment */
@@ -1509,8 +1550,6 @@
         }
         assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
 
-        ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
-
         /* sequences storage */
         zc->seqStore.maxNbSeq = maxNbSeq;
         zc->seqStore.sequencesStart = (seqDef*)ptr;
@@ -1587,15 +1626,14 @@
                                  * handled in _enforceMaxDist */
 }
 
-static size_t ZSTD_resetCCtx_byAttachingCDict(
-    ZSTD_CCtx* cctx,
-    const ZSTD_CDict* cdict,
-    ZSTD_CCtx_params params,
-    U64 pledgedSrcSize,
-    ZSTD_buffered_policy_e zbuff)
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+                        const ZSTD_CDict* cdict,
+                        ZSTD_CCtx_params params,
+                        U64 pledgedSrcSize,
+                        ZSTD_buffered_policy_e zbuff)
 {
-    {
-        const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+    {   const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
         unsigned const windowLog = params.cParams.windowLog;
         assert(windowLog != 0);
         /* Resize working context table params for input only, since the dict
@@ -1607,8 +1645,7 @@
         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
     }
 
-    {
-        const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
                                   - cdict->matchState.window.base);
         const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
         if (cdictLen == 0) {
@@ -1625,9 +1662,9 @@
                     cctx->blockState.matchState.window.base + cdictEnd;
                 ZSTD_window_clear(&cctx->blockState.matchState.window);
             }
+            /* loadedDictEnd is expressed within the referential of the active context */
             cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
-        }
-    }
+    }   }
 
     cctx->dictID = cdict->dictID;
 
@@ -1681,7 +1718,6 @@
         ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
         dstMatchState->window       = srcMatchState->window;
         dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
-        dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
     }
 
@@ -1761,7 +1797,6 @@
         ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
         dstMatchState->window       = srcMatchState->window;
         dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
-        dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
     }
     dstCCtx->dictID = srcCCtx->dictID;
@@ -1831,16 +1866,15 @@
 
 /*! ZSTD_reduceIndex() :
 *   rescale all indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
 {
-    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
-    {   U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
+    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
         ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
     }
 
-    if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
-        U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
-        if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
+    if (params->cParams.strategy != ZSTD_fast) {
+        U32 const chainSize = (U32)1 << params->cParams.chainLog;
+        if (params->cParams.strategy == ZSTD_btlazy2)
             ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
         else
             ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
@@ -2524,6 +2558,7 @@
         op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
     else
         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    assert(op <= oend);
     if (nbSeq==0) {
         /* Copy the old tables over as if we repeated them */
         memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
@@ -2532,6 +2567,7 @@
 
     /* seqHead : flags for FSE encoding type */
     seqHead = op++;
+    assert(op <= oend);
 
     /* convert length/distances into codes */
     ZSTD_seqToCodes(seqStorePtr);
@@ -2555,6 +2591,7 @@
             if (LLtype == set_compressed)
                 lastNCount = op;
             op += countSize;
+            assert(op <= oend);
     }   }
     /* build CTable for Offsets */
     {   unsigned max = MaxOff;
@@ -2577,6 +2614,7 @@
             if (Offtype == set_compressed)
                 lastNCount = op;
             op += countSize;
+            assert(op <= oend);
     }   }
     /* build CTable for MatchLengths */
     {   unsigned max = MaxML;
@@ -2597,6 +2635,7 @@
             if (MLtype == set_compressed)
                 lastNCount = op;
             op += countSize;
+            assert(op <= oend);
     }   }
 
     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
@@ -2610,6 +2649,7 @@
                                         longOffsets, bmi2);
         FORWARD_IF_ERROR(bitstreamSize);
         op += bitstreamSize;
+        assert(op <= oend);
         /* zstd versions <= 1.3.4 mistakenly report corruption when
          * FSE_readNCount() receives a buffer < 4 bytes.
          * Fixed by https://github.com/facebook/zstd/pull/1146.
@@ -2721,30 +2761,24 @@
     ssPtr->longLengthID = 0;
 }
 
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
-                                        void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize)
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
 {
     ZSTD_matchState_t* const ms = &zc->blockState.matchState;
-    size_t cSize;
-    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
-                (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate);
+    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
-
     /* Assert that we have correctly flushed the ctx params into the ms's copy */
     ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
-
     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
         ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
-        cSize = 0;
-        goto out;  /* don't even attempt compression below a certain srcSize */
+        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
     }
     ZSTD_resetSeqStore(&(zc->seqStore));
     /* required for optimal parser to read stats from dictionary */
     ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
     /* tell the optimal parser how we expect to compress literals */
     ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
-
     /* a gap between an attached dict and the current window is not safe,
      * they must remain adjacent,
      * and when that stops being the case, the dict must be unset */
@@ -2798,6 +2832,21 @@
         {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
             ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
     }   }
+    return ZSTDbss_compress;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize)
+{
+    size_t cSize;
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss);
+        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+    }
 
     /* encode sequences and literals */
     cSize = ZSTD_compressSequences(&zc->seqStore,
@@ -2826,6 +2875,25 @@
 }
 
 
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
+{
+    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
+        U32 const maxDist = (U32)1 << params->cParams.windowLog;
+        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        ZSTD_reduceIndex(ms, params, correction);
+        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+        else ms->nextToUpdate -= correction;
+        /* invalidate dictionaries on overflow correction */
+        ms->loadedDictEnd = 0;
+        ms->dictMatchState = NULL;
+    }
+}
+
+
 /*! ZSTD_compress_frameChunk() :
 *   Compress a chunk of data into one or multiple blocks.
 *   All blocks will be terminated, all input will be consumed.
@@ -2844,7 +2912,7 @@
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
     U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
-    assert(cctx->appliedParams.cParams.windowLog <= 31);
+    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
 
     DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
     if (cctx->appliedParams.fParams.checksumFlag && srcSize)
@@ -2859,19 +2927,10 @@
                         "not enough space to store compressed block");
         if (remaining < blockSize) blockSize = remaining;
 
-        if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
-            U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
-            U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
-            ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
-            ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
-            ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
-            ZSTD_reduceIndex(cctx, correction);
-            if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
-            else ms->nextToUpdate -= correction;
-            ms->loadedDictEnd = 0;
-            ms->dictMatchState = NULL;
-        }
-        ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+        ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
+        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+
+        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
         if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
 
         {   size_t cSize = ZSTD_compressBlock_internal(cctx,
@@ -2899,7 +2958,7 @@
     }   }
 
     if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 
@@ -2991,6 +3050,7 @@
         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
                                        cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
         FORWARD_IF_ERROR(fhSize);
+        assert(fhSize <= dstCapacity);
         dstCapacity -= fhSize;
         dst = (char*)dst + fhSize;
         cctx->stage = ZSTDcs_ongoing;
@@ -3007,18 +3067,7 @@
 
     if (!frame) {
         /* overflow check and correction for block mode */
-        if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
-            U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
-            U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
-            ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
-            ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
-            ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
-            ZSTD_reduceIndex(cctx, correction);
-            if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
-            else ms->nextToUpdate -= correction;
-            ms->loadedDictEnd = 0;
-            ms->dictMatchState = NULL;
-        }
+        ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
     }
 
     DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
@@ -3074,7 +3123,7 @@
                                          const void* src, size_t srcSize,
                                          ZSTD_dictTableLoadMethod_e dtlm)
 {
-    const BYTE* const ip = (const BYTE*) src;
+    const BYTE* ip = (const BYTE*) src;
     const BYTE* const iend = ip + srcSize;
 
     ZSTD_window_update(&ms->window, src, srcSize);
@@ -3085,32 +3134,42 @@
 
     if (srcSize <= HASH_READ_SIZE) return 0;
 
-    switch(params->cParams.strategy)
-    {
-    case ZSTD_fast:
-        ZSTD_fillHashTable(ms, iend, dtlm);
-        break;
-    case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
-        break;
+    while (iend - ip > HASH_READ_SIZE) {
+        size_t const remaining = iend - ip;
+        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
+        const BYTE* const ichunk = ip + chunk;
 
-    case ZSTD_greedy:
-    case ZSTD_lazy:
-    case ZSTD_lazy2:
-        if (srcSize >= HASH_READ_SIZE)
-            ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
-        break;
+        ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
 
-    case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
-    case ZSTD_btopt:
-    case ZSTD_btultra:
-    case ZSTD_btultra2:
-        if (srcSize >= HASH_READ_SIZE)
-            ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
-        break;
+        switch(params->cParams.strategy)
+        {
+        case ZSTD_fast:
+            ZSTD_fillHashTable(ms, ichunk, dtlm);
+            break;
+        case ZSTD_dfast:
+            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
+            break;
 
-    default:
-        assert(0);  /* not possible : not a valid strategy id */
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+            break;
+
+        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
+            break;
+
+        default:
+            assert(0);  /* not possible : not a valid strategy id */
+        }
+
+        ip = ichunk;
     }
 
     ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3297,12 +3356,11 @@
 
     FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
                                      ZSTDcrp_continue, zbuff) );
-    {
-        size_t const dictID = ZSTD_compress_insertDictionary(
+    {   size_t const dictID = ZSTD_compress_insertDictionary(
                 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
                 &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
         FORWARD_IF_ERROR(dictID);
-        assert(dictID <= (size_t)(U32)-1);
+        assert(dictID <= UINT_MAX);
         cctx->dictID = (U32)dictID;
     }
     return 0;
@@ -3555,10 +3613,10 @@
 
     /* Reset the state to no dictionary */
     ZSTD_reset_compressedBlockState(&cdict->cBlockState);
-    {   void* const end = ZSTD_reset_matchState(
-                &cdict->matchState,
-                (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
-                &cParams, ZSTDcrp_continue, /* forCCtx */ 0);
+    {   void* const end = ZSTD_reset_matchState(&cdict->matchState,
+                            (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
+                            &cParams,
+                             ZSTDcrp_continue, ZSTD_resetTarget_CDict);
         assert(end == (char*)cdict->workspace + cdict->workspaceSize);
         (void)end;
     }
@@ -4068,7 +4126,7 @@
         case zcss_flush:
             DEBUGLOG(5, "flush stage");
             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-                size_t const flushed = ZSTD_limitCopy(op, oend-op,
+                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
                             zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
                 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
                             (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
@@ -4262,7 +4320,7 @@
     if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
     /* single thread mode : attempt to calculate remaining to flush more precisely */
     {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
-        size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
+        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
         size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
         DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
         return toFlush;
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index cc3cbb9..5495899 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -33,13 +33,13 @@
 ***************************************/
 #define kSearchStrength      8
 #define HASH_READ_SIZE       8
-#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index 1 now means "unsorted".
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
                                        It's not a big deal though : candidate will just be sorted again.
                                        Additionally, candidate position 1 will be lost.
                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
-                                       Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
 
 
 /*-*************************************
@@ -128,21 +128,20 @@
     BYTE const* base;       /* All regular indexes relative to this position */
     BYTE const* dictBase;   /* extDict indexes relative to this position */
     U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more data */
+    U32 lowLimit;           /* below that point, no more valid data */
 } ZSTD_window_t;
 
 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
 struct ZSTD_matchState_t {
     ZSTD_window_t window;   /* State for window round buffer management */
-    U32 loadedDictEnd;      /* index of end of dictionary */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
     U32 nextToUpdate;       /* index from which to continue table update */
-    U32 nextToUpdate3;      /* index from which to continue table update */
     U32 hashLog3;           /* dispatch table : larger == faster, more memory */
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
     optState_t opt;         /* optimal parser state */
-    const ZSTD_matchState_t * dictMatchState;
+    const ZSTD_matchState_t* dictMatchState;
     ZSTD_compressionParameters cParams;
 };
 
@@ -195,6 +194,9 @@
     int compressionLevel;
     int forceWindow;           /* force back-references to respect limit of
                                 * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
 
     ZSTD_dictAttachPref_e attachDictPref;
     ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -324,7 +326,7 @@
     /* copy Literals */
     assert(seqStorePtr->maxNbLit <= 128 KB);
     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
-    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
     seqStorePtr->lit += litLength;
 
     /* literal Length */
@@ -564,6 +566,9 @@
 /*-*************************************
 *  Round buffer management
 ***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
 /* Max current allowed */
 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
 /* Maximum chunk size before overflow correction needs to be called again */
@@ -675,31 +680,49 @@
  * Updates lowLimit so that:
  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
  *
- * This allows a simple check that index >= lowLimit to see if index is valid.
- * This must be called before a block compression call, with srcEnd as the block
- * source end.
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
  *
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
- * This is because dictionaries are allowed to be referenced as long as the last
- * byte of the dictionary is in the window, but once they are out of range,
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
- * loadedDictEnd == 0.
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
  *
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
- * is below them. forceWindow and dictMatchState are therefore incompatible.
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
  */
 MEM_STATIC void
 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
-                           void const* srcEnd,
-                           U32 maxDist,
-                           U32* loadedDictEndPtr,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
                      const ZSTD_matchState_t** dictMatchStatePtr)
 {
-    U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
-    U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
-    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
-                (unsigned)blockEndIdx, (unsigned)maxDist);
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
     if (blockEndIdx > maxDist + loadedDictEnd) {
         U32 const newLowLimit = blockEndIdx - maxDist;
         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -708,10 +731,31 @@
                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
             window->dictLimit = window->lowLimit;
         }
-        if (loadedDictEndPtr)
-            *loadedDictEndPtr = 0;
-        if (dictMatchStatePtr)
-            *dictMatchStatePtr = NULL;
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size. */
+MEM_STATIC void
+ZSTD_checkDictValidity(ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
     }
 }
 
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index 47faf6d..5957255 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -43,8 +43,7 @@
             /* Only load extra positions for ZSTD_dtlm_full */
             if (dtlm == ZSTD_dtlm_fast)
                 break;
-        }
-    }
+    }   }
 }
 
 
@@ -63,7 +62,10 @@
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
-    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32 lowestValid = ms->window.dictLimit;
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
     const BYTE* const prefixLowest = base + prefixLowestIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -95,8 +97,15 @@
                                      dictCParams->chainLog : hBitsS;
     const U32 dictAndPrefixLength  = (U32)(ip - prefixLowest + dictEnd - dictStart);
 
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
     assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
 
+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(lowestValid + maxDistance >= endIndex);
+    }
+
     /* init */
     ip += (dictAndPrefixLength == 0);
     if (dictMode == ZSTD_noDict) {
@@ -138,7 +147,7 @@
             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
             goto _match_stored;
         }
 
@@ -147,7 +156,7 @@
           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
             mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
             goto _match_stored;
         }
 
@@ -170,8 +179,7 @@
                 offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
                 goto _match_found;
-            }
-        }
+        }   }
 
         if (matchIndexS > prefixLowestIndex) {
             /* check prefix short match */
@@ -186,16 +194,14 @@
 
             if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
                 goto _search_next_long;
-            }
-        }
+        }   }
 
         ip += ((ip-anchor) >> kSearchStrength) + 1;
         continue;
 
 _search_next_long:
 
-        {
-            size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
             size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
             U32 const matchIndexL3 = hashLong[hl3];
             const BYTE* matchL3 = base + matchIndexL3;
@@ -221,9 +227,7 @@
                     offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
                     goto _match_found;
-                }
-            }
-        }
+        }   }   }
 
         /* if no long +1 match, explore the short match we found */
         if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@@ -242,7 +246,7 @@
         offset_2 = offset_1;
         offset_1 = offset;
 
-        ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
 
 _match_stored:
         /* match found */
@@ -250,11 +254,14 @@
         anchor = ip;
 
         if (ip <= ilimit) {
-            /* Fill Table */
-            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
-                hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;  /* here because current+2 could be > iend-8 */
-            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
-                hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
 
             /* check immediate repcode */
             if (dictMode == ZSTD_dictMatchState) {
@@ -278,8 +285,7 @@
                         continue;
                     }
                     break;
-                }
-            }
+            }   }
 
             if (dictMode == ZSTD_noDict) {
                 while ( (ip <= ilimit)
@@ -294,14 +300,15 @@
                     ip += rLength;
                     anchor = ip;
                     continue;   /* faster when present ... (?) */
-    }   }   }   }
+        }   }   }
+    }   /* while (ip < ilimit) */
 
     /* save reps for next block */
     rep[0] = offset_1 ? offset_1 : offsetSaved;
     rep[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }
 
 
@@ -360,10 +367,15 @@
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
-    const U32   prefixStartIndex = ms->window.dictLimit;
     const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   lowestValid = ms->window.lowLimit;
+    const U32   lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
     const BYTE* const prefixStart = base + prefixStartIndex;
-    const U32   dictStartIndex = ms->window.lowLimit;
     const BYTE* const dictBase = ms->window.dictBase;
     const BYTE* const dictStart = dictBase + dictStartIndex;
     const BYTE* const dictEnd = dictBase + prefixStartIndex;
@@ -371,6 +383,10 @@
 
     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
 
+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
     /* Search Loop */
     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@@ -396,7 +412,7 @@
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
         } else {
             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -407,7 +423,7 @@
                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
 
             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -432,23 +448,27 @@
                 }
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
 
             } else {
                 ip += ((ip-anchor) >> kSearchStrength) + 1;
                 continue;
         }   }
 
-        /* found a match : store it */
+        /* move to next sequence start */
         ip += mLength;
         anchor = ip;
 
         if (ip <= ilimit) {
-            /* Fill Table */
-            hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
-            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
-            hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
-            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
             /* check immediate repcode */
             while (ip <= ilimit) {
                 U32 const current2 = (U32)(ip-base);
@@ -475,7 +495,7 @@
     rep[1] = offset_2;
 
     /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }
 
 
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index ed997b4..a05b8a4 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -13,7 +13,8 @@
 
 
 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-                        void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32* const hashTable = ms->hashTable;
@@ -41,6 +42,7 @@
     }   }   }   }
 }
 
+
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_fast_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -58,7 +60,10 @@
     const BYTE* ip0 = istart;
     const BYTE* ip1;
     const BYTE* anchor = istart;
-    const U32   prefixStartIndex = ms->window.dictLimit;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   validStartIndex = ms->window.dictLimit;
+    const U32   prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
     const BYTE* const prefixStart = base + prefixStartIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -165,7 +170,7 @@
     rep[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }
 
 
@@ -222,8 +227,15 @@
     const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
     const U32 dictHLog             = dictCParams->hashLog;
 
-    /* otherwise, we would get index underflow when translating a dict index
-     * into a local index */
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no no underflow
+     * when translating a dict index into a local index */
     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
 
     /* init */
@@ -251,7 +263,7 @@
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
         } else if ( (matchIndex <= prefixStartIndex) ) {
             size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
             U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -271,7 +283,7 @@
                 } /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
             }
         } else if (MEM_read32(match) != MEM_read32(ip)) {
             /* it's not a match, and we're not going to check the dictionary */
@@ -286,7 +298,7 @@
                  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
             offset_2 = offset_1;
             offset_1 = offset;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
         }
 
         /* match found */
@@ -327,7 +339,7 @@
     rep[1] = offset_2 ? offset_2 : offsetSaved;
 
     /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }
 
 size_t ZSTD_compressBlock_fast_dictMatchState(
@@ -366,15 +378,24 @@
     const BYTE* const istart = (const BYTE*)src;
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
-    const U32   dictStartIndex = ms->window.lowLimit;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   validLow = ms->window.lowLimit;
+    const U32   lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
+    const U32   dictStartIndex = lowLimit;
     const BYTE* const dictStart = dictBase + dictStartIndex;
-    const U32   prefixStartIndex = ms->window.dictLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
     const BYTE* const prefixStart = base + prefixStartIndex;
     const BYTE* const dictEnd = dictBase + prefixStartIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - 8;
     U32 offset_1=rep[0], offset_2=rep[1];
 
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
     /* Search Loop */
     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
         const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@@ -394,7 +415,7 @@
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
         } else {
             if ( (matchIndex < dictStartIndex) ||
                  (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -410,7 +431,7 @@
                 offset = current - matchIndex;
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
         }   }
 
         /* found a match : store it */
@@ -445,7 +466,7 @@
     rep[1] = offset_2;
 
     /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }
 
 
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 53f998a..94d906c 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -83,7 +83,10 @@
     U32* largerPtr  = smallerPtr + 1;
     U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
     U32 dummy32;   /* to be nullified at the end */
-    U32 const windowLow = ms->window.lowLimit;
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+
 
     DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
                 current, dictLimit, windowLow);
@@ -239,7 +242,9 @@
 
     const BYTE* const base = ms->window.base;
     U32    const current = (U32)(ip-base);
-    U32    const windowLow = ms->window.lowLimit;
+    U32    const maxDistance = 1U << cParams->windowLog;
+    U32    const windowValid = ms->window.lowLimit;
+    U32    const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
 
     U32*   const bt = ms->chainTable;
     U32    const btLog  = cParams->chainLog - 1;
@@ -490,8 +495,10 @@
     const U32 dictLimit = ms->window.dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
     const BYTE* const dictEnd = dictBase + dictLimit;
-    const U32 lowLimit = ms->window.lowLimit;
     const U32 current = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowValid = ms->window.lowLimit;
+    const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
     const U32 minChain = current > chainSize ? current - chainSize : 0;
     U32 nbAttempts = 1U << cParams->searchLog;
     size_t ml=4-1;
@@ -653,7 +660,6 @@
 
     /* init */
     ip += (dictAndPrefixLength == 0);
-    ms->nextToUpdate3 = ms->nextToUpdate;
     if (dictMode == ZSTD_noDict) {
         U32 const maxRep = (U32)(ip - prefixLowest);
         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@@ -933,7 +939,6 @@
     U32 offset_1 = rep[0], offset_2 = rep[1];
 
     /* init */
-    ms->nextToUpdate3 = ms->nextToUpdate;
     ip += (ip == prefixStart);
 
     /* Match Loop */
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 784d20f..3dcf86e 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -447,7 +447,7 @@
         if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
             U32 const ldmHSize = 1U << params->hashLog;
             U32 const correction = ZSTD_window_correctOverflow(
-                &ldmState->window, /* cycleLog */ 0, maxDist, src);
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
             ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
         }
         /* 2. We enforce the maximum offset allowed.
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index efb69d3..e32e542 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -255,13 +255,13 @@
  * to provide a cost which is directly comparable to a match ending at same position */
 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
 {
-    if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
+    if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
 
     /* dynamic statistics */
     {   U32 const llCode = ZSTD_LLcode(litLength);
-        int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
-                               + WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
-                               - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+        int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+                               + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
+                               - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
 #if 1
         return contribution;
 #else
@@ -278,7 +278,7 @@
                                      const optState_t* const optPtr,
                                      int optLevel)
 {
-    int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
+    int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
                            + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
     return contribution;
 }
@@ -372,13 +372,15 @@
 
 /* Update hashTable3 up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
 {
     U32* const hashTable3 = ms->hashTable3;
     U32 const hashLog3 = ms->hashLog3;
     const BYTE* const base = ms->window.base;
-    U32 idx = ms->nextToUpdate3;
-    U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
     assert(hashLog3 > 0);
 
@@ -387,6 +389,7 @@
         idx++;
     }
 
+    *nextToUpdate3 = target;
     return hashTable3[hash3];
 }
 
@@ -503,9 +506,11 @@
     }   }
 
     *smallerPtr = *largerPtr = 0;
-    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
-    assert(matchEndIdx > current + 8);
-    return matchEndIdx - (current + 8);
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > current + 8);
+        return MAX(positions, matchEndIdx - (current + 8));
+    }
 }
 
 FORCE_INLINE_TEMPLATE
@@ -520,8 +525,13 @@
     DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                 idx, target, dictMode);
 
-    while(idx < target)
-        idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
     ms->nextToUpdate = target;
 }
 
@@ -531,16 +541,18 @@
 
 FORCE_INLINE_TEMPLATE
 U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
                     ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
                     const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
-                    U32 rep[ZSTD_REP_NUM],
+                    const U32 rep[ZSTD_REP_NUM],
                     U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
-                    ZSTD_match_t* matches,
                     const U32 lengthToBeat,
                     U32 const mls /* template */)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const maxDistance = 1U << cParams->windowLog;
     const BYTE* const base = ms->window.base;
     U32 const current = (U32)(ip-base);
     U32 const hashLog = cParams->hashLog;
@@ -556,8 +568,9 @@
     U32 const dictLimit = ms->window.dictLimit;
     const BYTE* const dictEnd = dictBase + dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
-    U32 const btLow = btMask >= current ? 0 : current - btMask;
-    U32 const windowLow = ms->window.lowLimit;
+    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
     U32 const matchLow = windowLow ? windowLow : 1;
     U32* smallerPtr = bt + 2*(current&btMask);
     U32* largerPtr  = bt + 2*(current&btMask) + 1;
@@ -627,7 +640,7 @@
 
     /* HC3 match finder */
     if ((mls == 3) /*static*/ && (bestLength < mls)) {
-        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
         if ((matchIndex3 >= matchLow)
           & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
             size_t mlen;
@@ -653,9 +666,7 @@
                      (ip+mlen == iLimit) ) {  /* best possible length */
                     ms->nextToUpdate = current+1;  /* skip insertion */
                     return 1;
-                }
-            }
-        }
+        }   }   }
         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
     }
 
@@ -760,10 +771,13 @@
 
 
 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
                         ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
                         const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
-                        U32 rep[ZSTD_REP_NUM], U32 const ll0,
-                        ZSTD_match_t* matches, U32 const lengthToBeat)
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32 const matchLengthSearch = cParams->minMatch;
@@ -772,12 +786,12 @@
     ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
     switch(matchLengthSearch)
     {
-    case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
     default :
-    case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
-    case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
     case 7 :
-    case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
     }
 }
 
@@ -853,6 +867,7 @@
 
     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;
 
     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
     ZSTD_match_t* const matches = optStatePtr->matchTable;
@@ -862,7 +877,6 @@
     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
     assert(optLevel <= 2);
-    ms->nextToUpdate3 = ms->nextToUpdate;
     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
     ip += (ip==prefixStart);
 
@@ -873,7 +887,7 @@
         /* find first match */
         {   U32 const litlen = (U32)(ip - anchor);
             U32 const ll0 = !litlen;
-            U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
+            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
             if (!nbMatches) { ip++; continue; }
 
             /* initialize opt[0] */
@@ -970,7 +984,7 @@
                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                 U32 const previousPrice = opt[cur].price;
                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
+                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
                 if (!nbMatches) {
                     DEBUGLOG(7, "rPos:%u : no match found", cur);
@@ -1094,7 +1108,7 @@
     }   /* while (ip < ilimit) */
 
     /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }
 
 
@@ -1158,7 +1172,6 @@
     ms->window.dictLimit += (U32)srcSize;
     ms->window.lowLimit = ms->window.dictLimit;
     ms->nextToUpdate = ms->window.dictLimit;
-    ms->nextToUpdate3 = ms->window.dictLimit;
 
     /* re-inforce weight of collected statistics */
     ZSTD_upscaleStats(&ms->opt);
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index 38fbb90..9e537b8 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1129,9 +1129,14 @@
             size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
             size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
             assert(flushed <= produced);
+            assert(jobPtr->consumed <= jobPtr->src.size);
             toFlush = produced - flushed;
-            if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
-                /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
+            /* if toFlush==0, nothing is available to flush.
+             * However, jobID is expected to still be active:
+             * if jobID was already completed and fully flushed,
+             * ZSTDMT_flushProduced() should have already moved onto next job.
+             * Therefore, some input has not yet been consumed. */
+            if (toFlush==0) {
                 assert(jobPtr->consumed < jobPtr->src.size);
             }
         }
@@ -1148,12 +1153,16 @@
 
 static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
 {
-    if (params.ldmParams.enableLdm)
+    unsigned jobLog;
+    if (params.ldmParams.enableLdm) {
         /* In Long Range Mode, the windowLog is typically oversized.
          * In which case, it's preferable to determine the jobSize
          * based on chainLog instead. */
-        return MAX(21, params.cParams.chainLog + 4);
-    return MAX(20, params.cParams.windowLog + 2);
+        jobLog = MAX(21, params.cParams.chainLog + 4);
+    } else {
+        jobLog = MAX(20, params.cParams.windowLog + 2);
+    }
+    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
 }
 
 static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@@ -1197,7 +1206,7 @@
         ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
                 - overlapRLog;
     }
-    assert(0 <= ovLog && ovLog <= 30);
+    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
     DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
     DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
     return (ovLog==0) ? 0 : (size_t)1 << ovLog;
@@ -1391,7 +1400,7 @@
         FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
 
     if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
-    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
+    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
 
     mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
     if (mtctx->singleBlockingThread) {
@@ -1432,6 +1441,8 @@
     if (mtctx->targetSectionSize == 0) {
         mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
     }
+    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
+
     if (params.rsyncable) {
         /* Aim for the targetsectionSize as the average job size. */
         U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index 12e6bcb..12a5260 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -50,6 +50,7 @@
 #ifndef ZSTDMT_JOBSIZE_MIN
 #  define ZSTDMT_JOBSIZE_MIN (1 MB)
 #endif
+#define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
 
 
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 675596f..e42872a 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -360,8 +360,11 @@
     sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
     RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
                     frameParameter_unsupported);
-
-    return skippableHeaderSize + sizeU32;
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
+        return skippableSize;
+    }
 }
 
 /** ZSTD_findDecompressedSize() :
@@ -378,11 +381,10 @@
 
         if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
             size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-            if (ZSTD_isError(skippableSize))
-                return skippableSize;
-            if (srcSize < skippableSize) {
+            if (ZSTD_isError(skippableSize)) {
                 return ZSTD_CONTENTSIZE_ERROR;
             }
+            assert(skippableSize <= srcSize);
 
             src = (const BYTE *)src + skippableSize;
             srcSize -= skippableSize;
@@ -467,6 +469,8 @@
     if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
         && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
         frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
         return frameSizeInfo;
     } else {
         const BYTE* ip = (const BYTE*)src;
@@ -529,7 +533,6 @@
     return frameSizeInfo.compressedSize;
 }
 
-
 /** ZSTD_decompressBound() :
  *  compatible with legacy mode
  *  `src` must point to the start of a ZSTD frame or a skippeable frame
@@ -546,6 +549,7 @@
         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
             return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
         src = (const BYTE*)src + compressedSize;
         srcSize -= compressedSize;
         bound += decompressedBound;
@@ -738,9 +742,8 @@
                         (unsigned)magicNumber, ZSTD_MAGICNUMBER);
             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
                 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-                if (ZSTD_isError(skippableSize))
-                    return skippableSize;
-                RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
+                FORWARD_IF_ERROR(skippableSize);
+                assert(skippableSize <= srcSize);
 
                 src = (const BYTE *)src + skippableSize;
                 srcSize -= skippableSize;
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index a2a7eed..24f4859 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -505,7 +505,7 @@
     *nbSeqPtr = nbSeq;
 
     /* FSE table descriptors */
-    RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -637,9 +637,10 @@
     if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
 
     /* copy Literals */
-    ZSTD_copy8(op, *litPtr);
     if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+        ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    else
+        ZSTD_copy8(op, *litPtr);
     op = oLitEnd;
     *litPtr = iLitEnd;   /* update for next sequence */
 
@@ -686,13 +687,13 @@
 
     if (oMatchEnd > oend-(16-MINMATCH)) {
         if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op);
+            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
             match += oend_w - op;
             op = oend_w;
         }
         while (op < oMatchEnd) *op++ = *match++;
     } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
     }
     return sequenceLength;
 }
@@ -717,9 +718,11 @@
     if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
 
     /* copy Literals */
-    ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
     if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+        ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    else
+        ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
+
     op = oLitEnd;
     *litPtr = iLitEnd;   /* update for next sequence */
 
@@ -766,13 +769,13 @@
 
     if (oMatchEnd > oend-(16-MINMATCH)) {
         if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op);
+            ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
             match += oend_w - op;
             op = oend_w;
         }
         while (op < oMatchEnd) *op++ = *match++;
     } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);   /* works even if matchLength < 8 */
     }
     return sequenceLength;
 }
@@ -889,6 +892,7 @@
 }
 
 FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
                                void* dst, size_t maxDstSize,
                          const void* seqStart, size_t seqSize, int nbSeq,
@@ -918,6 +922,11 @@
         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
 
+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
             nbSeq--;
             {   seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
@@ -930,6 +939,7 @@
         /* check if reached exact end */
         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
         RETURN_ERROR_IF(nbSeq, corruption_detected);
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
         /* save reps for next block */
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
     }
@@ -1131,6 +1141,7 @@
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index 21464ad..6219967 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -526,10 +526,10 @@
  * Prepare a context for dictionary building.
  * The context is only dependent on the parameter `d` and can used multiple
  * times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
  * The context must be destroyed with `COVER_ctx_destroy()`.
  */
-static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
                           const size_t *samplesSizes, unsigned nbSamples,
                           unsigned d, double splitPoint) {
   const BYTE *const samples = (const BYTE *)samplesBuffer;
@@ -544,17 +544,17 @@
       totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
     DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
                  (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
-    return 0;
+    return ERROR(srcSize_wrong);
   }
   /* Check if there are at least 5 training samples */
   if (nbTrainSamples < 5) {
     DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
-    return 0;
+    return ERROR(srcSize_wrong);
   }
   /* Check if there's testing sample */
   if (nbTestSamples < 1) {
     DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
-    return 0;
+    return ERROR(srcSize_wrong);
   }
   /* Zero the context */
   memset(ctx, 0, sizeof(*ctx));
@@ -577,7 +577,7 @@
   if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
     DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
     COVER_ctx_destroy(ctx);
-    return 0;
+    return ERROR(memory_allocation);
   }
   ctx->freqs = NULL;
   ctx->d = d;
@@ -624,7 +624,7 @@
                 (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
   ctx->freqs = ctx->suffix;
   ctx->suffix = NULL;
-  return 1;
+  return 0;
 }
 
 void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@@ -729,11 +729,11 @@
   /* Checks */
   if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
     DISPLAYLEVEL(1, "Cover parameters incorrect\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
   }
   if (nbSamples == 0) {
     DISPLAYLEVEL(1, "Cover must have at least one input file\n");
-    return ERROR(GENERIC);
+    return ERROR(srcSize_wrong);
   }
   if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
     DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -741,15 +741,18 @@
     return ERROR(dstSize_tooSmall);
   }
   /* Initialize context and activeDmers */
-  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
-                      parameters.d, parameters.splitPoint)) {
-    return ERROR(GENERIC);
+  {
+    size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d, parameters.splitPoint);
+    if (ZSTD_isError(initVal)) {
+      return initVal;
+    }
   }
   COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
     COVER_ctx_destroy(&ctx);
-    return ERROR(GENERIC);
+    return ERROR(memory_allocation);
   }
 
   DISPLAYLEVEL(2, "Building dictionary\n");
@@ -810,7 +813,7 @@
         cctx, dst, dstCapacity, samples + offsets[i],
         samplesSizes[i], cdict);
     if (ZSTD_isError(size)) {
-      totalCompressedSize = ERROR(GENERIC);
+      totalCompressedSize = size;
       goto _compressCleanup;
     }
     totalCompressedSize += size;
@@ -886,9 +889,11 @@
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
  * If this dictionary is the best so far save it and its parameters.
  */
-void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
-                              ZDICT_cover_params_t parameters, void *dict,
-                              size_t dictSize) {
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                              COVER_dictSelection_t selection) {
+  void* dict = selection.dictContent;
+  size_t compressedSize = selection.totalCompressedSize;
+  size_t dictSize = selection.dictSize;
   if (!best) {
     return;
   }
@@ -914,6 +919,9 @@
         }
       }
       /* Save the dictionary, parameters, and size */
+      if (!dict) {
+        return;
+      }
       memcpy(best->dict, dict, dictSize);
       best->dictSize = dictSize;
       best->parameters = parameters;
@@ -926,6 +934,111 @@
   }
 }
 
+COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
+    COVER_dictSelection_t selection = { NULL, 0, error };
+    return selection;
+}
+
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
+  return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
+}
+
+void COVER_dictSelectionFree(COVER_dictSelection_t selection){
+  free(selection.dictContent);
+}
+
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
+
+  size_t largestDict = 0;
+  size_t largestCompressed = 0;
+  BYTE* customDictContentEnd = customDictContent + dictContentSize;
+
+  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
+  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
+
+  if (!largestDictbuffer || !candidateDictBuffer) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  /* Initial dictionary size and compressed size */
+  memcpy(largestDictbuffer, customDictContent, dictContentSize);
+  dictContentSize = ZDICT_finalizeDictionary(
+    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
+    samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+  if (ZDICT_isError(dictContentSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                       samplesBuffer, offsets,
+                                                       nbCheckSamples, nbSamples,
+                                                       largestDictbuffer, dictContentSize);
+
+  if (ZSTD_isError(totalCompressedSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(totalCompressedSize);
+  }
+
+  if (params.shrinkDict == 0) {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+
+  largestDict = dictContentSize;
+  largestCompressed = totalCompressedSize;
+  dictContentSize = ZDICT_DICTSIZE_MIN;
+
+  /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
+  while (dictContentSize < largestDict) {
+    memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
+    dictContentSize = ZDICT_finalizeDictionary(
+      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
+      samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+    if (ZDICT_isError(dictContentSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(dictContentSize);
+
+    }
+
+    totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                         samplesBuffer, offsets,
+                                                         nbCheckSamples, nbSamples,
+                                                         candidateDictBuffer, dictContentSize);
+
+    if (ZSTD_isError(totalCompressedSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(totalCompressedSize);
+    }
+
+    if (totalCompressedSize <= largestCompressed * regressionTolerance) {
+      COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
+      free(largestDictbuffer);
+      return selection;
+    }
+    dictContentSize *= 2;
+  }
+  dictContentSize = largestDict;
+  totalCompressedSize = largestCompressed;
+  {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+}
+
 /**
  * Parameters for COVER_tryParameters().
  */
@@ -951,6 +1064,7 @@
   /* Allocate space for hash table, dict, and freqs */
   COVER_map_t activeDmers;
   BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
   U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
   if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
     DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@@ -966,29 +1080,21 @@
   {
     const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
                                               dictBufferCapacity, parameters);
-    dictBufferCapacity = ZDICT_finalizeDictionary(
-        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
-        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
-        parameters.zParams);
-    if (ZDICT_isError(dictBufferCapacity)) {
-      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+        totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
       goto _cleanup;
     }
   }
-  /* Check total compressed size */
-  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
-                                                       ctx->samples, ctx->offsets,
-                                                       ctx->nbTrainSamples, ctx->nbSamples,
-                                                       dict, dictBufferCapacity);
-
 _cleanup:
-  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
-                    dictBufferCapacity);
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
   free(data);
   COVER_map_destroy(&activeDmers);
-  if (dict) {
-    free(dict);
-  }
+  COVER_dictSelectionFree(selection);
   if (freqs) {
     free(freqs);
   }
@@ -1010,6 +1116,7 @@
   const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
   const unsigned kIterations =
       (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  const unsigned shrinkDict = 0;
   /* Local variables */
   const int displayLevel = parameters->zParams.notificationLevel;
   unsigned iteration = 1;
@@ -1022,15 +1129,15 @@
   /* Checks */
   if (splitPoint <= 0 || splitPoint > 1) {
     LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
   }
   if (kMinK < kMaxD || kMaxK < kMinK) {
     LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
   }
   if (nbSamples == 0) {
     DISPLAYLEVEL(1, "Cover must have at least one input file\n");
-    return ERROR(GENERIC);
+    return ERROR(srcSize_wrong);
   }
   if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
     DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -1054,11 +1161,14 @@
     /* Initialize the context for this value of d */
     COVER_ctx_t ctx;
     LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
-    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
-      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
-      COVER_best_destroy(&best);
-      POOL_free(pool);
-      return ERROR(GENERIC);
+    {
+      const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
+      if (ZSTD_isError(initVal)) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+        COVER_best_destroy(&best);
+        POOL_free(pool);
+        return initVal;
+      }
     }
     if (!warned) {
       COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@@ -1075,7 +1185,7 @@
         COVER_best_destroy(&best);
         COVER_ctx_destroy(&ctx);
         POOL_free(pool);
-        return ERROR(GENERIC);
+        return ERROR(memory_allocation);
       }
       data->ctx = &ctx;
       data->best = &best;
@@ -1085,6 +1195,7 @@
       data->parameters.d = d;
       data->parameters.splitPoint = splitPoint;
       data->parameters.steps = kSteps;
+      data->parameters.shrinkDict = shrinkDict;
       data->parameters.zParams.notificationLevel = g_displayLevel;
       /* Check the parameters */
       if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h
index efb4680..d9e0636 100644
--- a/lib/dictBuilder/cover.h
+++ b/lib/dictBuilder/cover.h
@@ -47,6 +47,15 @@
 } COVER_epoch_info_t;
 
 /**
+ * Struct used for the dictionary selection function.
+ */
+typedef struct COVER_dictSelection {
+  BYTE* dictContent;
+  size_t dictSize;
+  size_t totalCompressedSize;
+} COVER_dictSelection_t;
+
+/**
  * Computes the number of epochs and the size of each epoch.
  * We will make sure that each epoch gets at least 10 * k bytes.
  *
@@ -107,6 +116,32 @@
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
  * If this dictionary is the best so far save it and its parameters.
  */
-void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
-                       ZDICT_cover_params_t parameters, void *dict,
-                       size_t dictSize);
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                       COVER_dictSelection_t selection);
+/**
+ * Error function for COVER_selectDict function. Checks if the return
+ * value is an error.
+ */
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
+
+ /**
+  * Error function for COVER_selectDict function. Returns a struct where
+  * return.totalCompressedSize is a ZSTD error.
+  */
+COVER_dictSelection_t COVER_dictSelectionError(size_t error);
+
+/**
+ * Always call after selectDict is called to free up used memory from
+ * newly created dictionary.
+ */
+void COVER_dictSelectionFree(COVER_dictSelection_t selection);
+
+/**
+ * Called to finalize the dictionary and select one based on whether or not
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
+ * smallest dictionary within a specified regression of the compressed size
+ * from the largest dictionary.
+ */
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+                       size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+                       size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c
index 5b6b941..941bb5a 100644
--- a/lib/dictBuilder/fastcover.c
+++ b/lib/dictBuilder/fastcover.c
@@ -287,10 +287,10 @@
  * Prepare a context for dictionary building.
  * The context is only dependent on the parameter `d` and can used multiple
  * times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
  */
-static int
+static size_t
 FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
                    const void* samplesBuffer,
                    const size_t* samplesSizes, unsigned nbSamples,
@@ -310,19 +310,19 @@
         totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
         DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
                     (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
-        return 0;
+        return ERROR(srcSize_wrong);
     }
 
     /* Check if there are at least 5 training samples */
     if (nbTrainSamples < 5) {
         DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
-        return 0;
+        return ERROR(srcSize_wrong);
     }
 
     /* Check if there's testing sample */
     if (nbTestSamples < 1) {
         DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
-        return 0;
+        return ERROR(srcSize_wrong);
     }
 
     /* Zero the context */
@@ -347,7 +347,7 @@
     if (ctx->offsets == NULL) {
         DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
         FASTCOVER_ctx_destroy(ctx);
-        return 0;
+        return ERROR(memory_allocation);
     }
 
     /* Fill offsets from the samplesSizes */
@@ -364,13 +364,13 @@
     if (ctx->freqs == NULL) {
         DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
         FASTCOVER_ctx_destroy(ctx);
-        return 0;
+        return ERROR(memory_allocation);
     }
 
     DISPLAYLEVEL(2, "Computing frequencies\n");
     FASTCOVER_computeFrequency(ctx->freqs, ctx);
 
-    return 1;
+    return 0;
 }
 
 
@@ -435,7 +435,6 @@
   return tail;
 }
 
-
 /**
  * Parameters for FASTCOVER_tryParameters().
  */
@@ -464,6 +463,7 @@
   U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
   /* Allocate space for hash table, dict, and freqs */
   BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
   U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
   if (!segmentFreqs || !dict || !freqs) {
     DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@@ -473,27 +473,24 @@
   memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
   /* Build the dictionary */
   { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
-                                                  parameters, segmentFreqs);
+                                                    parameters, segmentFreqs);
+
     const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
-    dictBufferCapacity = ZDICT_finalizeDictionary(
-        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
-        ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
-    if (ZDICT_isError(dictBufferCapacity)) {
-      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+         ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+         totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
       goto _cleanup;
     }
   }
-  /* Check total compressed size */
-  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
-                                                       ctx->samples, ctx->offsets,
-                                                       ctx->nbTrainSamples, ctx->nbSamples,
-                                                       dict, dictBufferCapacity);
 _cleanup:
-  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
-                    dictBufferCapacity);
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
   free(data);
   free(segmentFreqs);
-  free(dict);
+  COVER_dictSelectionFree(selection);
   free(freqs);
 }
 
@@ -508,6 +505,7 @@
     coverParams->nbThreads = fastCoverParams.nbThreads;
     coverParams->splitPoint = fastCoverParams.splitPoint;
     coverParams->zParams = fastCoverParams.zParams;
+    coverParams->shrinkDict = fastCoverParams.shrinkDict;
 }
 
 
@@ -524,6 +522,7 @@
     fastCoverParams->f = f;
     fastCoverParams->accel = accel;
     fastCoverParams->zParams = coverParams.zParams;
+    fastCoverParams->shrinkDict = coverParams.shrinkDict;
 }
 
 
@@ -550,11 +549,11 @@
     if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
                                    parameters.accel)) {
       DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
     }
     if (nbSamples == 0) {
       DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
-      return ERROR(GENERIC);
+      return ERROR(srcSize_wrong);
     }
     if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
       DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -564,11 +563,14 @@
     /* Assign corresponding FASTCOVER_accel_t to accelParams*/
     accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
     /* Initialize context */
-    if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+    {
+      size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
                             coverParams.d, parameters.splitPoint, parameters.f,
-                            accelParams)) {
-      DISPLAYLEVEL(1, "Failed to initialize context\n");
-      return ERROR(GENERIC);
+                            accelParams);
+      if (ZSTD_isError(initVal)) {
+        DISPLAYLEVEL(1, "Failed to initialize context\n");
+        return initVal;
+      }
     }
     COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
     /* Build the dictionary */
@@ -616,6 +618,7 @@
         (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
     const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
     const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
+    const unsigned shrinkDict = 0;
     /* Local variables */
     const int displayLevel = parameters->zParams.notificationLevel;
     unsigned iteration = 1;
@@ -627,19 +630,19 @@
     /* Checks */
     if (splitPoint <= 0 || splitPoint > 1) {
       LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
     }
     if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
       LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
     }
     if (kMinK < kMaxD || kMaxK < kMinK) {
       LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
     }
     if (nbSamples == 0) {
       LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
-      return ERROR(GENERIC);
+      return ERROR(srcSize_wrong);
     }
     if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
       LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@@ -666,11 +669,14 @@
       /* Initialize the context for this value of d */
       FASTCOVER_ctx_t ctx;
       LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
-      if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
-        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
-        COVER_best_destroy(&best);
-        POOL_free(pool);
-        return ERROR(GENERIC);
+      {
+        size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
+        if (ZSTD_isError(initVal)) {
+          LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+          COVER_best_destroy(&best);
+          POOL_free(pool);
+          return initVal;
+        }
       }
       if (!warned) {
         COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
@@ -687,7 +693,7 @@
           COVER_best_destroy(&best);
           FASTCOVER_ctx_destroy(&ctx);
           POOL_free(pool);
-          return ERROR(GENERIC);
+          return ERROR(memory_allocation);
         }
         data->ctx = &ctx;
         data->best = &best;
@@ -697,6 +703,7 @@
         data->parameters.d = d;
         data->parameters.splitPoint = splitPoint;
         data->parameters.steps = kSteps;
+        data->parameters.shrinkDict = shrinkDict;
         data->parameters.zParams.notificationLevel = g_displayLevel;
         /* Check the parameters */
         if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index c753da0..ee21ee1 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -741,7 +741,7 @@
     /* analyze, build stats, starting with literals */
     {   size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
         if (HUF_isError(maxNbBits)) {
-            eSize = ERROR(GENERIC);
+            eSize = maxNbBits;
             DISPLAYLEVEL(1, " HUF_buildCTable error \n");
             goto _cleanup;
         }
@@ -764,7 +764,7 @@
     total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
     errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
     if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
         DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
         goto _cleanup;
     }
@@ -773,7 +773,7 @@
     total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
     errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
     if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
         DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
         goto _cleanup;
     }
@@ -782,7 +782,7 @@
     total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
     errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
     if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
         DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
         goto _cleanup;
     }
@@ -791,7 +791,7 @@
     /* write result to buffer */
     {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
         if (HUF_isError(hhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = hhSize;
             DISPLAYLEVEL(1, "HUF_writeCTable error \n");
             goto _cleanup;
         }
@@ -802,7 +802,7 @@
 
     {   size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
         if (FSE_isError(ohSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = ohSize;
             DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
             goto _cleanup;
         }
@@ -813,7 +813,7 @@
 
     {   size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
         if (FSE_isError(mhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = mhSize;
             DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
             goto _cleanup;
         }
@@ -824,7 +824,7 @@
 
     {   size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
         if (FSE_isError(lhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = lhSize;
             DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
             goto _cleanup;
         }
@@ -834,7 +834,7 @@
     }
 
     if (maxDstSize<12) {
-        eSize = ERROR(GENERIC);
+        eSize = ERROR(dstSize_tooSmall);
         DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
         goto _cleanup;
     }
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index e229731..37978ec 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -94,6 +94,8 @@
     unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
     unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
     double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
     ZDICT_params_t zParams;
 } ZDICT_cover_params_t;
 
@@ -105,6 +107,9 @@
     unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
     double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
     unsigned accel;              /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
+
     ZDICT_params_t zParams;
 } ZDICT_fastCover_params_t;
 
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index e5b383e..0dbd3c7 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -238,6 +238,10 @@
             frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
             break;
     }
+    if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
+        frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
+        frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    }
     return frameSizeInfo;
 }
 
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index cad2b99..ae8cba2 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1073,99 +1073,102 @@
     const void* cSrc, size_t cSrcSize,
     const U16* DTable)
 {
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const omax = op + maxDstSize;
-    BYTE* const olimit = omax-15;
-
-    const void* ptr = DTable;
-    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-    U32 reloadStatus;
-
-    /* Init */
-
-    const U16* jumpTable = (const U16*)cSrc;
-    const size_t length1 = FSE_readLE16(jumpTable);
-    const size_t length2 = FSE_readLE16(jumpTable+1);
-    const size_t length3 = FSE_readLE16(jumpTable+2);
-    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
-    const char* const start1 = (const char*)(cSrc) + 6;
-    const char* const start2 = start1 + length1;
-    const char* const start3 = start2 + length2;
-    const char* const start4 = start3 + length3;
-    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
-
-    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
-
-    errorCode = FSE_initDStream(&bitD1, start1, length1);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD2, start2, length2);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD3, start3, length3);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD4, start4, length4);
-    if (FSE_isError(errorCode)) return errorCode;
-
-    reloadStatus=FSE_reloadDStream(&bitD2);
-
-    /* 16 symbols per loop */
-    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
-        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
     {
-#define HUF_DECODE_SYMBOL_0(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* op = ostart;
+        BYTE* const omax = op + maxDstSize;
+        BYTE* const olimit = omax-15;
 
-#define HUF_DECODE_SYMBOL_1(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
-        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+        const void* ptr = DTable;
+        const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+        U32 reloadStatus;
 
-#define HUF_DECODE_SYMBOL_2(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
-        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+        /* Init */
 
-        HUF_DECODE_SYMBOL_1( 0, bitD1);
-        HUF_DECODE_SYMBOL_1( 1, bitD2);
-        HUF_DECODE_SYMBOL_1( 2, bitD3);
-        HUF_DECODE_SYMBOL_1( 3, bitD4);
-        HUF_DECODE_SYMBOL_2( 4, bitD1);
-        HUF_DECODE_SYMBOL_2( 5, bitD2);
-        HUF_DECODE_SYMBOL_2( 6, bitD3);
-        HUF_DECODE_SYMBOL_2( 7, bitD4);
-        HUF_DECODE_SYMBOL_1( 8, bitD1);
-        HUF_DECODE_SYMBOL_1( 9, bitD2);
-        HUF_DECODE_SYMBOL_1(10, bitD3);
-        HUF_DECODE_SYMBOL_1(11, bitD4);
-        HUF_DECODE_SYMBOL_0(12, bitD1);
-        HUF_DECODE_SYMBOL_0(13, bitD2);
-        HUF_DECODE_SYMBOL_0(14, bitD3);
-        HUF_DECODE_SYMBOL_0(15, bitD4);
-    }
+        const U16* jumpTable = (const U16*)cSrc;
+        const size_t length1 = FSE_readLE16(jumpTable);
+        const size_t length2 = FSE_readLE16(jumpTable+1);
+        const size_t length3 = FSE_readLE16(jumpTable+2);
+        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+        const char* const start1 = (const char*)(cSrc) + 6;
+        const char* const start2 = start1 + length1;
+        const char* const start3 = start2 + length2;
+        const char* const start4 = start3 + length3;
+        FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
 
-    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
-        return (size_t)-FSE_ERROR_corruptionDetected;
+        if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
 
-    /* tail */
-    {
-        // bitTail = bitD1;   // *much* slower : -20% !??!
-        FSE_DStream_t bitTail;
-        bitTail.ptr = bitD1.ptr;
-        bitTail.bitsConsumed = bitD1.bitsConsumed;
-        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
-        bitTail.start = start1;
-        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        errorCode = FSE_initDStream(&bitD1, start1, length1);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD2, start2, length2);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD3, start3, length3);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD4, start4, length4);
+        if (FSE_isError(errorCode)) return errorCode;
+
+        reloadStatus=FSE_reloadDStream(&bitD2);
+
+        /* 16 symbols per loop */
+        for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+            op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
         {
-            HUF_DECODE_SYMBOL_0(0, bitTail);
+    #define HUF_DECODE_SYMBOL_0(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+    #define HUF_DECODE_SYMBOL_1(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+            if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+    #define HUF_DECODE_SYMBOL_2(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+            if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+            HUF_DECODE_SYMBOL_1( 0, bitD1);
+            HUF_DECODE_SYMBOL_1( 1, bitD2);
+            HUF_DECODE_SYMBOL_1( 2, bitD3);
+            HUF_DECODE_SYMBOL_1( 3, bitD4);
+            HUF_DECODE_SYMBOL_2( 4, bitD1);
+            HUF_DECODE_SYMBOL_2( 5, bitD2);
+            HUF_DECODE_SYMBOL_2( 6, bitD3);
+            HUF_DECODE_SYMBOL_2( 7, bitD4);
+            HUF_DECODE_SYMBOL_1( 8, bitD1);
+            HUF_DECODE_SYMBOL_1( 9, bitD2);
+            HUF_DECODE_SYMBOL_1(10, bitD3);
+            HUF_DECODE_SYMBOL_1(11, bitD4);
+            HUF_DECODE_SYMBOL_0(12, bitD1);
+            HUF_DECODE_SYMBOL_0(13, bitD2);
+            HUF_DECODE_SYMBOL_0(14, bitD3);
+            HUF_DECODE_SYMBOL_0(15, bitD4);
         }
 
-        if (FSE_endOfDStream(&bitTail))
-            return op-ostart;
+        if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+            return (size_t)-FSE_ERROR_corruptionDetected;
+
+        /* tail */
+        {
+            // bitTail = bitD1;   // *much* slower : -20% !??!
+            FSE_DStream_t bitTail;
+            bitTail.ptr = bitD1.ptr;
+            bitTail.bitsConsumed = bitD1.bitsConsumed;
+            bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+            bitTail.start = start1;
+            for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+            {
+                HUF_DECODE_SYMBOL_0(0, bitTail);
+            }
+
+            if (FSE_endOfDStream(&bitTail))
+                return op-ostart;
+        }
+
+        if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+        return (size_t)-FSE_ERROR_corruptionDetected;
     }
-
-    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
-
-    return (size_t)-FSE_ERROR_corruptionDetected;
 }
 
 
@@ -1355,8 +1358,6 @@
 
 static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
 
-static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
-
 static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
 
 static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
@@ -1381,16 +1382,9 @@
     }
 }
 
-
-static U32 ZSTD_readLE32(const void* memPtr)
+static U32 ZSTD_readLE24(const void* memPtr)
 {
-    if (ZSTD_isLittleEndian())
-        return ZSTD_read32(memPtr);
-    else
-    {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
-    }
+    return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
 }
 
 static U32 ZSTD_readBE32(const void* memPtr)
@@ -1704,13 +1698,13 @@
     seqState->prevOffset = seq->offset;
     if (litLength == MaxLL)
     {
-        U32 add = dumps<de ? *dumps++ : 0;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) litLength += add;
         else
         {
             if (dumps<=(de-3))
             {
-                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                litLength = ZSTD_readLE24(dumps);
                 dumps += 3;
             }
         }
@@ -1732,13 +1726,13 @@
     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
     if (matchLength == MaxML)
     {
-        U32 add = dumps<de ? *dumps++ : 0;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) matchLength += add;
         else
         {
             if (dumps<=(de-3))
             {
-                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                matchLength = ZSTD_readLE24(dumps);
                 dumps += 3;
             }
         }
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 561bc41..793df60 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -217,6 +217,11 @@
     }
 }
 
+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
     if (MEM_isLittleEndian())
@@ -3043,11 +3048,11 @@
     seqState->prevOffset = seq->offset;
     if (litLength == MaxLL)
     {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) litLength += add;
-        else
+        else if (dumps + 3 <= de)
         {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            litLength = MEM_readLE24(dumps);
             dumps += 3;
         }
         if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@@ -3073,11 +3078,11 @@
     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
     if (matchLength == MaxML)
     {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) matchLength += add;
-        else
+        else if (dumps + 3 <= de)
         {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            matchLength = MEM_readLE24(dumps);
             dumps += 3;
         }
         if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index a1bf0fa..7a0e7c9 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -219,6 +219,11 @@
     }
 }
 
+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
     if (MEM_isLittleEndian())
@@ -2684,11 +2689,11 @@
     seqState->prevOffset = seq->offset;
     if (litLength == MaxLL)
     {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) litLength += add;
-        else
+        else if (dumps + 3 <= de)
         {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            litLength = MEM_readLE24(dumps);
             dumps += 3;
         }
         if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@@ -2714,11 +2719,11 @@
     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
     if (matchLength == MaxML)
     {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) matchLength += add;
-        else
+        else if (dumps + 3 <= de)
         {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            matchLength = MEM_readLE24(dumps);
             dumps += 3;
         }
         if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 4342330..645a6e3 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -189,6 +189,11 @@
     }
 }
 
+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
     if (MEM_isLittleEndian())
@@ -2808,13 +2813,12 @@
     litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
     prevOffset = litLength ? seq->offset : seqState->prevOffset;
     if (litLength == MaxLL) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) litLength += add;
-        else {
-            litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+        else if (dumps + 3 <= de) {
+            litLength = MEM_readLE24(dumps);
             dumps += 3;
         }
-        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
         if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
 
@@ -2837,13 +2841,12 @@
     /* MatchLength */
     matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
     if (matchLength == MaxML) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) matchLength += add;
-        else {
-            matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+        else if (dumps + 3 <= de){
+            matchLength = MEM_readLE24(dumps);
             dumps += 3;
         }
-        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
         if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
     matchLength += MINMATCH;
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index caaf15f..a7ea606 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -218,6 +218,11 @@
     }
 }
 
+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
     if (MEM_isLittleEndian())
@@ -1998,91 +2003,92 @@
     const void* cSrc, size_t cSrcSize,
     const U16* DTable)
 {
-    const BYTE* const istart = (const BYTE*) cSrc;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-    const void* const dtPtr = DTable;
-    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-
-    /* Init */
-    BITv05_DStream_t bitD1;
-    BITv05_DStream_t bitD2;
-    BITv05_DStream_t bitD3;
-    BITv05_DStream_t bitD4;
-    const size_t length1 = MEM_readLE16(istart);
-    const size_t length2 = MEM_readLE16(istart+2);
-    const size_t length3 = MEM_readLE16(istart+4);
-    size_t length4;
-    const BYTE* const istart1 = istart + 6;  /* jumpTable */
-    const BYTE* const istart2 = istart1 + length1;
-    const BYTE* const istart3 = istart2 + length2;
-    const BYTE* const istart4 = istart3 + length3;
-    const size_t segmentSize = (dstSize+3) / 4;
-    BYTE* const opStart2 = ostart + segmentSize;
-    BYTE* const opStart3 = opStart2 + segmentSize;
-    BYTE* const opStart4 = opStart3 + segmentSize;
-    BYTE* op1 = ostart;
-    BYTE* op2 = opStart2;
-    BYTE* op3 = opStart3;
-    BYTE* op4 = opStart4;
-    U32 endSignal;
-
     /* Check */
     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
 
-    length4 = cSrcSize - (length1 + length2 + length3 + 6);
-    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
-    if (HUFv05_isError(errorCode)) return errorCode;
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;
 
-    /* 16-32 symbols per loop (4-8 symbols per stream) */
-    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
-    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
         endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
     }
-
-    /* check corruption */
-    if (op1 > opStart2) return ERROR(corruption_detected);
-    if (op2 > opStart3) return ERROR(corruption_detected);
-    if (op3 > opStart4) return ERROR(corruption_detected);
-    /* note : op4 supposed already verified within main loop */
-
-    /* finish bitStreams one by one */
-    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
-
-    /* check */
-    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
-    if (!endSignal) return ERROR(corruption_detected);
-
-    /* decoded size */
-    return dstSize;
 }
 
 
@@ -3150,14 +3156,13 @@
     litLength = FSEv05_peakSymbol(&(seqState->stateLL));
     prevOffset = litLength ? seq->offset : seqState->prevOffset;
     if (litLength == MaxLL) {
-        U32 add = *dumps++;
+        const U32 add = *dumps++;
         if (add < 255) litLength += add;
-        else {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
+        else if (dumps + 3 <= de) {
+            litLength = MEM_readLE24(dumps);
             if (litLength&1) litLength>>=1, dumps += 3;
             else litLength = (U16)(litLength)>>1, dumps += 2;
         }
-        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
         if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
 
@@ -3184,14 +3189,13 @@
     /* MatchLength */
     matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
     if (matchLength == MaxML) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
         if (add < 255) matchLength += add;
-        else {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+        else if (dumps + 3 <= de) {
+            matchLength = MEM_readLE24(dumps);
             if (matchLength&1) matchLength>>=1, dumps += 3;
             else matchLength = (U16)(matchLength)>>1, dumps += 2;
         }
-        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
         if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
     }
     matchLength += MINMATCH;
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index a695cbb..f907a3a 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -3242,14 +3242,12 @@
     }
 
     /* FSE table descriptors */
+    if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
     {   U32 const LLtype  = *ip >> 6;
         U32 const Offtype = (*ip >> 4) & 3;
         U32 const MLtype  = (*ip >> 2) & 3;
         ip++;
 
-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
         /* Build DTables */
         {   size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
             if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
@@ -3672,7 +3670,7 @@
     blockProperties_t blockProperties = { bt_compressed, 0 };
 
     /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
+    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
         if (ZSTDv06_isError(frameHeaderSize)) {
             ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
             return;
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index 6b94889..a83ddc9 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -3470,14 +3470,12 @@
     }
 
     /* FSE table descriptors */
+    if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
     {   U32 const LLtype  = *ip >> 6;
         U32 const OFtype = (*ip >> 4) & 3;
         U32 const MLtype  = (*ip >> 2) & 3;
         ip++;
 
-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
         /* Build DTables */
         {   size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
             if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
@@ -3918,7 +3916,7 @@
     }
 
     /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
         if (ZSTDv07_isError(frameHeaderSize)) {
             ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
             return;
diff --git a/lib/zstd.h b/lib/zstd.h
index 53470c1..a1910ee 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -71,7 +71,7 @@
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  0
+#define ZSTD_VERSION_RELEASE  1
 
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
@@ -82,16 +82,16 @@
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
 ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */
 
-/***************************************
-*  Default constant
-***************************************/
+/* *************************************
+ *  Default constant
+ ***************************************/
 #ifndef ZSTD_CLEVEL_DEFAULT
 #  define ZSTD_CLEVEL_DEFAULT 3
 #endif
 
-/***************************************
-*  Constants
-***************************************/
+/* *************************************
+ *  Constants
+ ***************************************/
 
 /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
 #define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
@@ -183,9 +183,14 @@
 ***************************************/
 /*= Compression context
  *  When compressing many times,
- *  it is recommended to allocate a context just once, and re-use it for each successive compression operation.
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
  *  This will make workload friendlier for system's memory.
- *  Use one context per thread for parallel execution in multi-threaded environments. */
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@@ -380,6 +385,7 @@
      * ZSTD_c_forceMaxWindow
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -389,6 +395,7 @@
      ZSTD_c_experimentalParam3=1000,
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
 } ZSTD_cParameter;
 
 typedef struct {
@@ -657,17 +664,33 @@
                                          ZSTD_inBuffer* input,
                                          ZSTD_EndDirective endOp);
 
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
 
-/*******************************************************************************
- * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
- * ZSTD_compressStream2(). It is redundant, but is still fully supported.
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
  * Advanced parameters and dictionary compression can only be used through the
  * new API.
  ******************************************************************************/
 
-/**
+/*!
  * Equivalent to:
  *
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@@ -675,16 +698,16 @@
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
  */
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
-/**
+/*!
  * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
  * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
  * the next read size (if non-zero and not an error). ZSTD_compressStream2()
- * returns the number of bytes left to flush (if non-zero and not an error).
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
  */
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 
 
@@ -969,7 +992,7 @@
 #endif  /* ZSTD_H_235446 */
 
 
-/****************************************************************************************
+/* **************************************************************************************
  *   ADVANCED AND EXPERIMENTAL FUNCTIONS
  ****************************************************************************************
  * The definitions in the following section are considered experimental.
@@ -1037,6 +1060,10 @@
 #define ZSTD_LDM_HASHRATELOG_MIN     0
 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
 
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+
 /* internal */
 #define ZSTD_HASHLOG3_MAX           17
 
@@ -1162,7 +1189,7 @@
  *            however it does mean that all frame data must be present and valid. */
 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 
-/** ZSTD_decompressBound() :
+/*! ZSTD_decompressBound() :
  *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
  *  `srcSize` must be the _exact_ size of this series
  *       (i.e. there should be a frame boundary at `src + srcSize`)
@@ -1409,6 +1436,11 @@
  */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
@@ -1843,7 +1875,7 @@
     unsigned checksumFlag;
 } ZSTD_frameHeader;
 
-/** ZSTD_getFrameHeader() :
+/*! ZSTD_getFrameHeader() :
  *  decode Frame Header, or requires larger `srcSize`.
  * @return : 0, `zfhPtr` is correctly filled,
  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
diff --git a/programs/.gitignore b/programs/.gitignore
index 701830c..0a8e18f 100644
--- a/programs/.gitignore
+++ b/programs/.gitignore
@@ -33,4 +33,5 @@
 
 # Misc files
 *.bat
+!windres/generate_res.bat
 dirTest*
diff --git a/programs/README.md b/programs/README.md
index afbebaa..d9ef5dd 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -37,7 +37,7 @@
   `.gz` support is automatically enabled when `zlib` library is detected at build time.
   It's possible to disable `.gz` support, by setting `HAVE_ZLIB=0`.
   Example : `make zstd HAVE_ZLIB=0`
-  It's also possible to force compilation with zlib support, `using HAVE_ZLIB=1`.
+  It's also possible to force compilation with zlib support, using `HAVE_ZLIB=1`.
   In which case, linking stage will fail if `zlib` library cannot be found.
   This is useful to prevent silent feature disabling.
 
@@ -45,7 +45,7 @@
   This is ordered through commands `--format=xz` and `--format=lzma` respectively.
   Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior.
   `.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time.
-  It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0` .
+  It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0`.
   Example : `make zstd HAVE_LZMA=0`
   It's also possible to force compilation with lzma support, using `HAVE_LZMA=1`.
   In which case, linking stage will fail if `lzma` library cannot be found.
diff --git a/programs/benchfn.c b/programs/benchfn.c
index 0932d15..2a51a34 100644
--- a/programs/benchfn.c
+++ b/programs/benchfn.c
@@ -15,7 +15,6 @@
 ***************************************/
 #include <stdlib.h>      /* malloc, free */
 #include <string.h>      /* memset */
-#undef NDEBUG            /* assert must not be disabled */
 #include <assert.h>      /* assert */
 
 #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
@@ -54,6 +53,9 @@
     return retValue;                                  \
 }
 
+/* Abort execution if a condition is not met */
+#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
+
 
 /* *************************************
 *  Benchmarking an arbitrary function
@@ -68,13 +70,13 @@
  *           check outcome validity first, using BMK_isValid_runResult() */
 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
 {
-    assert(outcome.error_tag_never_ever_use_directly == 0);
+    CONTROL(outcome.error_tag_never_ever_use_directly == 0);
     return outcome.internal_never_ever_use_directly;
 }
 
 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
 {
-    assert(outcome.error_tag_never_ever_use_directly != 0);
+    CONTROL(outcome.error_tag_never_ever_use_directly != 0);
     return outcome.error_result_never_ever_use_directly;
 }
 
diff --git a/programs/fileio.c b/programs/fileio.c
index 7ada592..569a410 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -175,7 +175,7 @@
 
 #if !defined(BACKTRACE_ENABLE)
 /* automatic detector : backtrace enabled by default on linux+glibc and osx */
-#  if (defined(__linux__) && defined(__GLIBC__)) \
+#  if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
      || (defined(__APPLE__) && defined(__MACH__))
 #    define BACKTRACE_ENABLE 1
 #  else
@@ -269,6 +269,13 @@
         else
             return -1;
     }
+    static __int64 LONG_TELL(FILE* file) {
+        LARGE_INTEGER off, newOff;
+        off.QuadPart = 0;
+        newOff.QuadPart = 0;
+        SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
+        return newOff.QuadPart;
+    }
 #else
 #   define LONG_SEEK fseek
 #   define LONG_TELL ftell
@@ -297,6 +304,7 @@
     int ldmMinMatch;
     int ldmBucketSizeLog;
     int ldmHashRateLog;
+    size_t targetCBlockSize;
     ZSTD_literalCompressionMode_e literalCompressionMode;
 
     /* IO preferences */
@@ -341,6 +349,7 @@
     ret->ldmMinMatch = 0;
     ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
     ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
+    ret->targetCBlockSize = 0;
     ret->literalCompressionMode = ZSTD_lcm_auto;
     return ret;
 }
@@ -409,6 +418,10 @@
     prefs->rsyncable = rsyncable;
 }
 
+void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
+    prefs->targetCBlockSize = targetCBlockSize;
+}
+
 void FIO_setLiteralCompressionMode(
         FIO_prefs_t* const prefs,
         ZSTD_literalCompressionMode_e mode) {
@@ -557,8 +570,11 @@
     }   }
 
     {   FILE* const f = fopen( dstFileName, "wb" );
-        if (f == NULL)
+        if (f == NULL) {
             DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
+        } else {
+            chmod(dstFileName, 00600);
+        }
         return f;
     }
 }
@@ -649,6 +665,8 @@
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
         /* compression level */
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
+        /* max compressed block size */
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
         /* long distance matching */
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
@@ -1158,6 +1176,8 @@
                               const char* dstFileName, const char* srcFileName,
                               int compressionLevel)
 {
+    UTIL_time_t const timeStart = UTIL_getTime();
+    clock_t const cpuStart = clock();
     U64 readsize = 0;
     U64 compressedfilesize = 0;
     U64 const fileSize = UTIL_getFileSize(srcFileName);
@@ -1210,6 +1230,15 @@
         (unsigned long long)readsize, (unsigned long long) compressedfilesize,
          dstFileName);
 
+    /* Elapsed Time and CPU Load */
+    {   clock_t const cpuEnd = clock();
+        double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
+        U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
+        double const timeLength_s = (double)timeLength_ns / 1000000000;
+        double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
+        DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec  (cpu load : %.0f%%)\n",
+                        srcFileName, timeLength_s, cpuLoad_pct);
+    }
     return 0;
 }
 
@@ -1332,15 +1361,12 @@
                          const char* dictFileName, int compressionLevel,
                          ZSTD_compressionParameters comprParams)
 {
-    clock_t const start = clock();
     U64 const fileSize = UTIL_getFileSize(srcFileName);
     U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
 
     cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
     int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
 
-    double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
-    DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
 
     FIO_freeCResources(ress);
     return result;
diff --git a/programs/fileio.h b/programs/fileio.h
index e466337..311f8c0 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -71,6 +71,7 @@
 void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
 void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse);  /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
+void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
 void FIO_setLiteralCompressionMode(
         FIO_prefs_t* const prefs,
         ZSTD_literalCompressionMode_e mode);
diff --git a/programs/util.c b/programs/util.c
index 7b827d4..6190bca 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -107,20 +107,12 @@
 U32 UTIL_isLink(const char* infilename)
 {
 /* macro guards, as defined in : https://linux.die.net/man/2/lstat */
-#ifndef __STRICT_ANSI__
-#if defined(_BSD_SOURCE) \
-    || (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \
-    || (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \
-    || (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \
-    || (defined(__APPLE__) && defined(__MACH__)) \
-    || defined(__OpenBSD__) \
-    || defined(__FreeBSD__)
+#if PLATFORM_POSIX_VERSION >= 200112L
     int r;
     stat_t statbuf;
     r = lstat(infilename, &statbuf);
     if (!r && S_ISLNK(statbuf.st_mode)) return 1;
 #endif
-#endif
     (void)infilename;
     return 0;
 }
diff --git a/programs/zstd.1 b/programs/zstd.1
index cb4e127..beca9da 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "December 2018" "zstd 1.3.8" "User Commands"
+.TH "ZSTD" "1" "July 2019" "zstd 1.4.1" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -187,6 +187,10 @@
 suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
 .
 .TP
+\fB\-\-no\-progress\fR
+do not display the progress bar, but keep all other messages\.
+.
+.TP
 \fB\-C\fR, \fB\-\-[no\-]check\fR
 add integrity check computed from uncompressed data (default: enabled)
 .
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index fbb1c04..a13c924 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -141,6 +141,7 @@
     DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
     DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
     DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
+    DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
 #ifdef ZSTD_MULTITHREAD
     DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
     DISPLAY( " -B#    : select size of each job (default: 0==automatic) \n");
@@ -179,8 +180,8 @@
     DISPLAY( "\n");
     DISPLAY( "Dictionary builder : \n");
     DISPLAY( "--train ## : create a dictionary from a training set of files \n");
-    DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args\n");
-    DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fast cover algorithm with optional args\n");
+    DISPLAY( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args\n");
+    DISPLAY( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args\n");
     DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
     DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
     DISPLAY( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
@@ -299,6 +300,7 @@
  * @return 1 means that cover parameters were correct
  * @return 0 in case of malformed parameters
  */
+static const unsigned kDefaultRegression = 1;
 static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
 {
     memset(params, 0, sizeof(*params));
@@ -311,10 +313,23 @@
           params->splitPoint = (double)splitPercentage / 100.0;
           if (stringPtr[0]==',') { stringPtr++; continue; } else break;
         }
+        if (longCommandWArg(&stringPtr, "shrink")) {
+          params->shrinkDictMaxRegression = kDefaultRegression;
+          params->shrinkDict = 1;
+          if (stringPtr[0]=='=') {
+            stringPtr++;
+            params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
+          }
+          if (stringPtr[0]==',') {
+            stringPtr++;
+            continue;
+          }
+          else break;
+        }
         return 0;
     }
     if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100));
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);
     return 1;
 }
 
@@ -338,10 +353,23 @@
           params->splitPoint = (double)splitPercentage / 100.0;
           if (stringPtr[0]==',') { stringPtr++; continue; } else break;
         }
+        if (longCommandWArg(&stringPtr, "shrink")) {
+          params->shrinkDictMaxRegression = kDefaultRegression;
+          params->shrinkDict = 1;
+          if (stringPtr[0]=='=') {
+            stringPtr++;
+            params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
+          }
+          if (stringPtr[0]==',') {
+            stringPtr++;
+            continue;
+          }
+          else break;
+        }
         return 0;
     }
     if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel);
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);
     return 1;
 }
 
@@ -367,6 +395,8 @@
     params.d = 8;
     params.steps = 4;
     params.splitPoint = 1.0;
+    params.shrinkDict = 0;
+    params.shrinkDictMaxRegression = kDefaultRegression;
     return params;
 }
 
@@ -379,6 +409,8 @@
     params.steps = 4;
     params.splitPoint = 0.75; /* different from default splitPoint of cover */
     params.accel = DEFAULT_ACCEL;
+    params.shrinkDict = 0;
+    params.shrinkDictMaxRegression = kDefaultRegression;
     return params;
 }
 #endif
@@ -555,6 +587,7 @@
     const char* suffix = ZSTD_EXTENSION;
     unsigned maxDictSize = g_defaultMaxDictSize;
     unsigned dictID = 0;
+    size_t targetCBlockSize = 0;
     int dictCLevel = g_defaultDictCLevel;
     unsigned dictSelect = g_defaultSelectivityLevel;
 #ifdef UTIL_HAS_CREATEFILELIST
@@ -588,11 +621,11 @@
     /* preset behaviors */
     if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;
     if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
-    if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; }     /* supports multiple formats */
-    if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; }    /* behave like zcat, also supports multiple formats */
+    if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }     /* supports multiple formats */
+    if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }    /* behave like zcat, also supports multiple formats */
     if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); }        /* behave like gzip */
     if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); }                                                     /* behave like gunzip, also supports multiple formats */
-    if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; forceStdout=1; FIO_overwriteMode(prefs); outFileName=stdoutmark; g_displayLevel=1; }   /* behave like gzcat, also supports multiple formats */
+    if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; }   /* behave like gzcat, also supports multiple formats */
     if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); }    /* behave like lzma */
     if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */
     if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); }          /* behave like xz */
@@ -711,6 +744,7 @@
                     if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
+                    if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--long")) {
                         unsigned ldmWindowLog = 0;
                         ldmFlag = 1;
@@ -1115,6 +1149,7 @@
         FIO_setAdaptMin(prefs, adaptMin);
         FIO_setAdaptMax(prefs, adaptMax);
         FIO_setRsyncable(prefs, rsyncable);
+        FIO_setTargetCBlockSize(prefs, targetCBlockSize);
         FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
         if (adaptMin > cLevel) cLevel = adaptMin;
         if (adaptMax < cLevel) cLevel = adaptMax;
@@ -1124,7 +1159,7 @@
         else
           operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; /* not used when ZSTD_NOCOMPRESS set */
+        (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
         DISPLAY("Compression not supported \n");
 #endif
     } else {  /* decompression or test */
diff --git a/programs/zstdgrep b/programs/zstdgrep
index cb804b8..4879fb0 100755
--- a/programs/zstdgrep
+++ b/programs/zstdgrep
@@ -58,6 +58,9 @@
                     shift 2
                     break
                     ;;
+                -f)
+                    pattern_found=2
+                    ;;
                 *)
                     ;;
             esac
@@ -117,7 +120,11 @@
     set -f
     while [ "$#" -gt 0 ]; do
         # shellcheck disable=SC2086
-        "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
+        if [ $pattern_found -eq 2 ]; then
+            "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- -
+        else
+            "${zcat}" -fq -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
+        fi
         [ "$?" -ne 0 ] && EXIT_CODE=1
         shift
     done
diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
index 57bc14d..d0a0292 100644
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTDGREP" "1" "December 2018" "zstd 1.3.8" "User Commands"
+.TH "ZSTDGREP" "1" "July 2019" "zstd 1.4.1" "User Commands"
 .
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
diff --git a/programs/zstdless.1 b/programs/zstdless.1
index ff39742..4e21d5a 100644
--- a/programs/zstdless.1
+++ b/programs/zstdless.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTDLESS" "1" "December 2018" "zstd 1.3.8" "User Commands"
+.TH "ZSTDLESS" "1" "July 2019" "zstd 1.4.1" "User Commands"
 .
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
diff --git a/tests/.gitignore b/tests/.gitignore
index 1f08c39..4edf6ce 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -55,6 +55,7 @@
 tmp*
 *.zst
 *.gz
+!gzip/hufts-segv.gz
 result
 out
 *.zstd
diff --git a/tests/Makefile b/tests/Makefile
index f11b731..bd2f909 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -215,6 +215,9 @@
 longmatch  : $(ZSTD_OBJECTS) longmatch.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
+bigdict: $(ZSTDMT_OBJECTS) $(PRGDIR)/datagen.c bigdict.c
+	$(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
+
 invalidDictionaries : $(ZSTD_OBJECTS) invalidDictionaries.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
@@ -247,7 +250,7 @@
 	$(MAKE) -C $(ZSTDDIR) clean
 	$(MAKE) -C $(PRGDIR) clean
 	@$(RM) -fR $(TESTARTEFACT)
-	@$(RM) -f core *.o tmp* result* *.gcda dictionary *.zst \
+	@$(RM) -f core *.o tmp* *.tmp result* *.gcda dictionary *.zst \
         $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
         fullbench$(EXT) fullbench32$(EXT) \
         fullbench-lib$(EXT) fullbench-dll$(EXT) \
@@ -256,7 +259,7 @@
         zstreamtest$(EXT) zstreamtest32$(EXT) \
         datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
         symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \
-        decodecorpus$(EXT) checkTag$(EXT)
+        decodecorpus$(EXT) checkTag$(EXT) bigdict$(EXT)
 	@echo Cleaning completed
 
 
@@ -359,6 +362,9 @@
 	-echo 'hello world' > test.txt && $(PRGDIR)/zstd test.txt
 	env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep hello test.txt.zst
 	env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep weird test.txt.zst && return 1 || return 0
+	-echo 'hello' > pattern.txt
+	env ZCAT=/tmp/zstdcat $(PRGDIR)/zstdgrep -f pattern.txt test.txt.zst
+	$(RM) test.txt test.txt.zst pattern.txt
 
 test-fullbench: fullbench datagen
 	$(QEMU_SYS) ./fullbench -i1
@@ -394,6 +400,9 @@
 test-longmatch: longmatch
 	$(QEMU_SYS) ./longmatch
 
+test-bigdict: bigdict
+	$(QEMU_SYS) ./bigdict
+
 test-invalidDictionaries: invalidDictionaries
 	$(QEMU_SYS) ./invalidDictionaries
 
diff --git a/tests/bigdict.c b/tests/bigdict.c
new file mode 100644
index 0000000..11501f6
--- /dev/null
+++ b/tests/bigdict.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "datagen.h"
+#include "mem.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+
+static int
+compress(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
+         void* dst, size_t dstCapacity,
+         void const* src, size_t srcSize,
+         void* roundtrip, ZSTD_EndDirective end)
+{
+    ZSTD_inBuffer in = {src, srcSize, 0};
+    ZSTD_outBuffer out = {dst, dstCapacity, 0};
+    int ended = 0;
+
+    while (!ended && (in.pos < in.size || out.pos > 0)) {
+        size_t rc;
+        out.pos = 0;
+        rc = ZSTD_compressStream2(cctx, &out, &in, end);
+        if (ZSTD_isError(rc))
+            return 1;
+        if (end == ZSTD_e_end && rc == 0)
+            ended = 1;
+        {
+            ZSTD_inBuffer rtIn = {dst, out.pos, 0};
+            ZSTD_outBuffer rtOut = {roundtrip, srcSize, 0};
+            rc = 1;
+            while (rtIn.pos < rtIn.size || rtOut.pos > 0) {
+                rtOut.pos = 0;
+                rc = ZSTD_decompressStream(dctx, &rtOut, &rtIn);
+                if (ZSTD_isError(rc)) {
+                    fprintf(stderr, "Decompression error: %s\n", ZSTD_getErrorName(rc));
+                    return 1;
+                }
+                if (rc == 0)
+                    break;
+            }
+            if (ended && rc != 0) {
+                fprintf(stderr, "Frame not finished!\n");
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, const char** argv)
+{
+    ZSTD_CCtx* cctx = ZSTD_createCCtx();
+    ZSTD_DCtx* dctx = ZSTD_createDCtx();
+    const size_t dataSize = (size_t)1 << 30;
+    const size_t outSize = ZSTD_compressBound(dataSize);
+    const size_t bufferSize = (size_t)1 << 31;
+    char* buffer = (char*)malloc(bufferSize);
+    void* out = malloc(outSize);
+    void* roundtrip = malloc(dataSize);
+    (void)argc;
+    (void)argv;
+
+    if (!buffer || !out || !roundtrip || !cctx || !dctx) {
+        fprintf(stderr, "Allocation failure\n");
+        return 1;
+    }
+
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 31)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_overlapLog, 9)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, ZSTD_btopt)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, 7)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, 7)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, 1)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 10)))
+        return 1;
+    if (ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, 10)))
+        return 1;
+
+    if (ZSTD_isError(ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 31)))
+        return 1;
+
+    RDG_genBuffer(buffer, bufferSize, 1.0, 0.0, 0xbeefcafe);
+
+    /* Compress 30 GB */
+    {
+        int i;
+        for (i = 0; i < 10; ++i) {
+            fprintf(stderr, "Compressing 1 GB\n");
+            if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_continue))
+                return 1;
+        }
+    }
+    fprintf(stderr, "Compressing 1 GB\n");
+    if (compress(cctx, dctx, out, outSize, buffer, dataSize, roundtrip, ZSTD_e_end))
+        return 1;
+
+    fprintf(stderr, "Success!\n");
+
+    free(roundtrip);
+    free(out);
+    free(buffer);
+    ZSTD_freeDCtx(dctx);
+    ZSTD_freeCCtx(cctx);
+    return 0;
+}
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 9910d3c..df40862 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -840,16 +840,16 @@
     {   unsigned max = MaxLL;
         size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
         assert(!HIST_isError(mostFrequent));
-        if (mostFrequent == nbSeq) {
-            /* do RLE if we have the chance */
-            *op++ = llCodeTable[0];
-            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-            LLtype = set_rle;
-        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+        if (frame->stats.fseInit && !(RAND(seed) & 3) &&
                    isSymbolSubset(llCodeTable, nbSeq,
                                   frame->stats.litlengthSymbolSet, 35)) {
             /* maybe do repeat mode if we're allowed to */
             LLtype = set_repeat;
+        } else if (mostFrequent == nbSeq) {
+            /* do RLE if we have the chance */
+            *op++ = llCodeTable[0];
+            FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
+            LLtype = set_rle;
         } else if (!(RAND(seed) & 3)) {
             /* maybe use the default distribution */
             FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
@@ -872,14 +872,14 @@
     {   unsigned max = MaxOff;
         size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
         assert(!HIST_isError(mostFrequent));
-        if (mostFrequent == nbSeq) {
-            *op++ = ofCodeTable[0];
-            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-            Offtype = set_rle;
-        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+        if (frame->stats.fseInit && !(RAND(seed) & 3) &&
                    isSymbolSubset(ofCodeTable, nbSeq,
                                   frame->stats.offsetSymbolSet, 28)) {
             Offtype = set_repeat;
+        } else if (mostFrequent == nbSeq) {
+            *op++ = ofCodeTable[0];
+            FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
+            Offtype = set_rle;
         } else if (!(RAND(seed) & 3)) {
             FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_basic;
@@ -900,14 +900,14 @@
     {   unsigned max = MaxML;
         size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP));   /* cannot fail */
         assert(!HIST_isError(mostFrequent));
-        if (mostFrequent == nbSeq) {
-            *op++ = *mlCodeTable;
-            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-            MLtype = set_rle;
-        } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
+        if (frame->stats.fseInit && !(RAND(seed) & 3) &&
                    isSymbolSubset(mlCodeTable, nbSeq,
                                   frame->stats.matchlengthSymbolSet, 52)) {
             MLtype = set_repeat;
+        } else if (mostFrequent == nbSeq) {
+            *op++ = *mlCodeTable;
+            FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
+            MLtype = set_rle;
         } else if (!(RAND(seed) & 3)) {
             /* sometimes do default distribution */
             FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
diff --git a/tests/fullbench.c b/tests/fullbench.c
index b06e2ed..f750ee0 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -15,7 +15,7 @@
 #include "util.h"        /* Compiler options, UTIL_GetFileSize */
 #include <stdlib.h>      /* malloc */
 #include <stdio.h>       /* fprintf, fopen, ftello64 */
-#include <assert.h>      /* assert */
+#include <assert.h>
 
 #include "timefn.h"      /* UTIL_clockSpanNano, UTIL_getTime */
 #include "mem.h"         /* U32 */
@@ -31,8 +31,8 @@
 #include "zstd.h"        /* ZSTD_versionString */
 #include "util.h"        /* time functions */
 #include "datagen.h"
-#include "benchfn.h"       /* CustomBench*/
-#include "benchzstd.h"     /* MB_UNIT */
+#include "benchfn.h"     /* CustomBench */
+#include "benchzstd.h"   /* MB_UNIT */
 
 
 /*_************************************
@@ -51,7 +51,7 @@
 #define DEFAULT_CLEVEL 1
 
 #define COMPRESSIBILITY_DEFAULT 0.50
-static const size_t g_sampleSize = 10000000;
+static const size_t kSampleSizeDefault = 10000000;
 
 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
 
@@ -61,12 +61,12 @@
 **************************************/
 #define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
 
+#define CONTROL(c)  { if (!(c)) { abort(); } }   /* like assert(), but cannot be disabled */
 
 /*_************************************
 *  Benchmark Parameters
 **************************************/
 static unsigned g_nbIterations = NBLOOPS;
-static double g_compressibility = COMPRESSIBILITY_DEFAULT;
 
 
 /*_*******************************************************
@@ -100,12 +100,12 @@
 static size_t
 local_ZSTD_compress(const void* src, size_t srcSize,
                     void* dst, size_t dstSize,
-                    void* buff2)
+                    void* payload)
 {
     ZSTD_parameters p;
     ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
     p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
     return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p);
     //return ZSTD_compress(dst, dstSize, src, srcSize, cLevel);
 }
@@ -126,7 +126,7 @@
 static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
 {
     (void)src; (void)srcSize; (void)dst; (void)dstSize;
-    return ZSTD_decodeLiteralsBlock((ZSTD_DCtx*)g_zdc, buff2, g_cSize);
+    return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize);
 }
 
 static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
@@ -141,14 +141,14 @@
 static size_t
 local_ZSTD_compressStream(const void* src, size_t srcSize,
                           void* dst, size_t dstCapacity,
-                          void* buff2)
+                          void* payload)
 {
     ZSTD_outBuffer buffOut;
     ZSTD_inBuffer buffIn;
     ZSTD_parameters p;
     ZSTD_frameParameters f = {1 /* contentSizeHeader*/, 0, 0};
     p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
     ZSTD_initCStream_advanced(g_cstream, NULL, 0, p, ZSTD_CONTENTSIZE_UNKNOWN);
     buffOut.dst = dst;
     buffOut.size = dstCapacity;
@@ -162,22 +162,38 @@
 }
 
 static size_t
+local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize,
+                          void* dst, size_t dstCapacity,
+                          void* payload)
+{
+    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+    size_t r;
+    assert(cctx != NULL);
+
+    r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload);
+
+    ZSTD_freeCCtx(cctx);
+
+    return r;
+}
+
+static size_t
 local_ZSTD_compress_generic_end(const void* src, size_t srcSize,
                                 void* dst, size_t dstCapacity,
-                                void* buff2)
+                                void* payload)
 {
-    (void)buff2;
+    (void)payload;
     return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
 }
 
 static size_t
 local_ZSTD_compress_generic_continue(const void* src, size_t srcSize,
                                      void* dst, size_t dstCapacity,
-                                     void* buff2)
+                                     void* payload)
 {
     ZSTD_outBuffer buffOut;
     ZSTD_inBuffer buffIn;
-    (void)buff2;
+    (void)payload;
     buffOut.dst = dst;
     buffOut.size = dstCapacity;
     buffOut.pos = 0;
@@ -192,9 +208,9 @@
 static size_t
 local_ZSTD_compress_generic_T2_end(const void* src, size_t srcSize,
                                    void* dst, size_t dstCapacity,
-                                   void* buff2)
+                                   void* payload)
 {
-    (void)buff2;
+    (void)payload;
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2);
     return ZSTD_compress2(g_cstream, dst, dstCapacity, src, srcSize);
 }
@@ -202,11 +218,11 @@
 static size_t
 local_ZSTD_compress_generic_T2_continue(const void* src, size_t srcSize,
                                         void* dst, size_t dstCapacity,
-                                        void* buff2)
+                                        void* payload)
 {
     ZSTD_outBuffer buffOut;
     ZSTD_inBuffer buffIn;
-    (void)buff2;
+    (void)payload;
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_nbWorkers, 2);
     buffOut.dst = dst;
     buffOut.size = dstCapacity;
@@ -242,27 +258,28 @@
 #ifndef ZSTD_DLL_IMPORT
 static size_t local_ZSTD_compressContinue(const void* src, size_t srcSize,
                                           void* dst, size_t dstCapacity,
-                                          void* buff2)
+                                          void* payload)
 {
     ZSTD_parameters p;
     ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
     p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
     ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize);
     return ZSTD_compressEnd(g_zcc, dst, dstCapacity, src, srcSize);
 }
 
 #define FIRST_BLOCK_SIZE 8
-static size_t local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize,
-                                                  void* dst, size_t dstCapacity,
-                                                  void* buff2)
+static size_t
+local_ZSTD_compressContinue_extDict(const void* src, size_t srcSize,
+                                    void* dst, size_t dstCapacity,
+                                    void* payload)
 {
     BYTE firstBlockBuf[FIRST_BLOCK_SIZE];
 
     ZSTD_parameters p;
-    ZSTD_frameParameters f = { 1, 0, 0 };
+    ZSTD_frameParameters const f = { 1, 0, 0 };
     p.fParams = f;
-    p.cParams = *(ZSTD_compressionParameters*)buff2;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
     ZSTD_compressBegin_advanced(g_zcc, NULL, 0, p, srcSize);
     memcpy(firstBlockBuf, src, FIRST_BLOCK_SIZE);
 
@@ -318,7 +335,7 @@
     size_t dstBuffSize = ZSTD_compressBound(srcSize);
     BYTE*  dstBuff;
     void*  dstBuff2;
-    void*  buff2;
+    void*  payload;
     const char* benchName;
     BMK_benchFn_t benchFunction;
     int errorcode = 0;
@@ -355,6 +372,9 @@
     case 42:
         benchFunction = local_ZSTD_decompressStream; benchName = "decompressStream";
         break;
+    case 43:
+        benchFunction = local_ZSTD_compressStream_freshCCtx; benchName = "compressStream_freshCCtx";
+        break;
     case 51:
         benchFunction = local_ZSTD_compress_generic_continue; benchName = "compress_generic, continue";
         break;
@@ -379,7 +399,7 @@
         free(dstBuff); free(dstBuff2);
         return 12;
     }
-    buff2 = dstBuff2;
+    payload = dstBuff2;
     if (g_zcc==NULL) g_zcc = ZSTD_createCCtx();
     if (g_zdc==NULL) g_zdc = ZSTD_createDCtx();
     if (g_cstream==NULL) g_cstream = ZSTD_createCStream();
@@ -412,62 +432,66 @@
     switch(benchNb)
     {
     case 1:
-        buff2 = &cparams;
+        payload = &cparams;
         break;
     case 2:
-        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
+        g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
         break;
 #ifndef ZSTD_DLL_IMPORT
     case 11:
-        buff2 = &cparams;
+        payload = &cparams;
         break;
     case 12:
-        buff2 = &cparams;
+        payload = &cparams;
         break;
     case 13 :
-        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
+        g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
         break;
-    case 31:  /* ZSTD_decodeLiteralsBlock */
-        {   blockProperties_t bp;
-            ZSTD_frameHeader zfp;
-            size_t frameHeaderSize, skippedSize;
+    case 31:  /* ZSTD_decodeLiteralsBlock : starts literals block in dstBuff2 */
+        {   size_t frameHeaderSize;
             g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
-            frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_FRAMEHEADERSIZE_MIN);
-            if (frameHeaderSize==0) frameHeaderSize = ZSTD_FRAMEHEADERSIZE_MIN;
-            ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp);  /* Get 1st block type */
-            if (bp.blockType != bt_compressed) {
-                DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
-                goto _cleanOut;
+            frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
+            CONTROL(!ZSTD_isError(frameHeaderSize));
+            /* check block is compressible, hence contains a literals section */
+            {   blockProperties_t bp;
+                ZSTD_getcBlockSize(dstBuff+frameHeaderSize, dstBuffSize, &bp);  /* Get 1st block type */
+                if (bp.blockType != bt_compressed) {
+                    DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
+                    goto _cleanOut;
+            }   }
+            {   size_t const skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
+                memcpy(dstBuff2, dstBuff+skippedSize, g_cSize-skippedSize);
             }
-            skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
-            memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize);
             srcSize = srcSize > 128 KB ? 128 KB : srcSize;    /* speed relative to block */
             ZSTD_decompressBegin(g_zdc);
             break;
         }
     case 32:   /* ZSTD_decodeSeqHeaders */
         {   blockProperties_t bp;
-            ZSTD_frameHeader zfp;
             const BYTE* ip = dstBuff;
             const BYTE* iend;
-            size_t frameHeaderSize, cBlockSize;
-            ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);   /* it would be better to use direct block compression here */
-            g_cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
-            frameHeaderSize = ZSTD_getFrameHeader(&zfp, dstBuff, ZSTD_FRAMEHEADERSIZE_MIN);
-            if (frameHeaderSize==0) frameHeaderSize = ZSTD_FRAMEHEADERSIZE_MIN;
-            ip += frameHeaderSize;   /* Skip frame Header */
-            cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
-            if (bp.blockType != bt_compressed) {
-                DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
-                goto _cleanOut;
+            {   size_t const cSize = ZSTD_compress(dstBuff, dstBuffSize, src, srcSize, cLevel);
+                CONTROL(cSize > ZSTD_FRAMEHEADERSIZE_PREFIX);
             }
-            iend = ip + ZSTD_blockHeaderSize + cBlockSize;   /* End of first block */
-            ip += ZSTD_blockHeaderSize;                      /* skip block header */
+            /* Skip frame Header */
+            {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(dstBuff, ZSTD_FRAMEHEADERSIZE_PREFIX);
+                CONTROL(!ZSTD_isError(frameHeaderSize));
+                ip += frameHeaderSize;
+            }
+            /* Find end of block */
+            {   size_t const cBlockSize = ZSTD_getcBlockSize(ip, dstBuffSize, &bp);   /* Get 1st block type */
+                if (bp.blockType != bt_compressed) {
+                    DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
+                    goto _cleanOut;
+                }
+                iend = ip + ZSTD_blockHeaderSize + cBlockSize;   /* End of first block */
+            }
+            ip += ZSTD_blockHeaderSize;    /* skip block header */
             ZSTD_decompressBegin(g_zdc);
-            assert(iend > ip);
+            CONTROL(iend > ip);
             ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip));   /* skip literal segment */
             g_cSize = (size_t)(iend-ip);
-            memcpy(buff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
+            memcpy(dstBuff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
             srcSize = srcSize > 128 KB ? 128 KB : srcSize;   /* speed relative to block */
             break;
         }
@@ -476,10 +500,13 @@
         goto _cleanOut;
 #endif
     case 41 :
-        buff2 = &cparams;
+        payload = &cparams;
         break;
     case 42 :
-        g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize, cLevel);
+        g_cSize = ZSTD_compress(payload, dstBuffSize, src, srcSize, cLevel);
+        break;
+    case 43 :
+        payload = &cparams;
         break;
 
     /* test functions */
@@ -498,10 +525,10 @@
         BMK_runTime_t bestResult;
         bestResult.sumOfReturn = 0;
         bestResult.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
-        assert(tfs != NULL);
+        CONTROL(tfs != NULL);
 
         bp.benchFn = benchFunction;
-        bp.benchPayload = buff2;
+        bp.benchPayload = payload;
         bp.initFn = NULL;
         bp.initPayload = NULL;
         bp.errorFn = ZSTD_isError;
@@ -549,21 +576,19 @@
 
 
 static int benchSample(U32 benchNb,
+                       size_t benchedSize, double compressibility,
                        int cLevel, ZSTD_compressionParameters cparams)
 {
-    size_t const benchedSize = g_sampleSize;
-    const char* const name = "Sample 10MiB";
-
     /* Allocation */
     void* const origBuff = malloc(benchedSize);
     if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; }
 
     /* Fill buffer */
-    RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
+    RDG_genBuffer(origBuff, benchedSize, compressibility, 0.0, 0);
 
     /* bench */
     DISPLAY("\r%70s\r", "");
-    DISPLAY(" %s : \n", name);
+    DISPLAY(" Sample %u bytes : \n", (unsigned)benchedSize);
     if (benchNb) {
         benchMem(benchNb, origBuff, benchedSize, cLevel, cparams);
     } else {  /* 0 == run all tests */
@@ -696,10 +721,11 @@
     usage(exename);
     DISPLAY( "\nAdvanced options :\n");
     DISPLAY( " -b#    : test only function # \n");
-    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
-    DISPLAY( " -P#    : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
     DISPLAY( " -l#    : benchmark functions at that compression level (default : %i)\n", DEFAULT_CLEVEL);
     DISPLAY( " --zstd : custom parameter selection. Format same as zstdcli \n");
+    DISPLAY( " -P#    : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
+    DISPLAY( " -B#    : sample size (default : %u)\n", (unsigned)kSampleSizeDefault);
+    DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
     return 0;
 }
 
@@ -718,13 +744,15 @@
     U32 benchNb = 0, main_pause = 0;
     int cLevel = DEFAULT_CLEVEL;
     ZSTD_compressionParameters cparams = ZSTD_getCParams(cLevel, 0, 0);
+    size_t sampleSize = kSampleSizeDefault;
+    double compressibility = COMPRESSIBILITY_DEFAULT;
 
     DISPLAY(WELCOME_MESSAGE);
     if (argc<1) return badusage(exename);
 
     for (argNb=1; argNb<argc; argNb++) {
         const char* argument = argv[argNb];
-        assert(argument != NULL);
+        CONTROL(argument != NULL);
 
         if (longCommandWArg(&argument, "--zstd=")) {
             for ( ; ;) {
@@ -767,21 +795,29 @@
                     benchNb = readU32FromChar(&argument);
                     break;
 
-                    /* Modify Nb Iterations */
-                case 'i':
+                    /* Select compression level to use */
+                case 'l':
                     argument++;
-                    g_nbIterations = readU32FromChar(&argument);
+                    cLevel = (int)readU32FromChar(&argument);
+                    cparams = ZSTD_getCParams(cLevel, 0, 0);
                     break;
 
                     /* Select compressibility of synthetic sample */
                 case 'P':
                     argument++;
-                    g_compressibility = (double)readU32FromChar(&argument) / 100.;
+                    compressibility = (double)readU32FromChar(&argument) / 100.;
                     break;
-                case 'l':
+
+                    /* Select size of synthetic sample */
+                case 'B':
                     argument++;
-                    cLevel = (int)readU32FromChar(&argument);
-                    cparams = ZSTD_getCParams(cLevel, 0, 0);
+                    sampleSize = (size_t)readU32FromChar(&argument);
+                    break;
+
+                    /* Modify Nb Iterations */
+                case 'i':
+                    argument++;
+                    g_nbIterations = readU32FromChar(&argument);
                     break;
 
                     /* Unknown command */
@@ -798,7 +834,7 @@
 
 
     if (filenamesStart==0)   /* no input file */
-        result = benchSample(benchNb, cLevel, cparams);
+        result = benchSample(benchNb, sampleSize, compressibility, cLevel, cparams);
     else
         result = benchFiles(benchNb, argv+filenamesStart, argc-filenamesStart, cLevel, cparams);
 
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 31b151b..8bf16b1 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -26,8 +26,8 @@
 PRGDIR = ../../programs
 
 FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
-	-DZSTD_MULTITHREAD $(CPPFLAGS)
+	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
+	-I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
 FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
 	-Wstrict-prototypes -Wundef \
@@ -47,12 +47,14 @@
 ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
 ZSTDDICT_SRC := $(ZSTDDIR)/dictBuilder/*.c
+ZSTDLEGACY_SRC := $(ZSTDDIR)/legacy/*.c
 FUZZ_SRC       := \
 	$(FUZZ_SRC) \
 	$(ZSTDDECOMP_SRC) \
 	$(ZSTDCOMMON_SRC) \
 	$(ZSTDCOMP_SRC) \
-	$(ZSTDDICT_SRC)
+	$(ZSTDDICT_SRC) \
+	$(ZSTDLEGACY_SRC)
 
 FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
 
@@ -69,7 +71,9 @@
 	stream_decompress \
 	block_decompress  \
 	dictionary_round_trip \
-	dictionary_decompress
+	dictionary_decompress \
+	zstd_frame_info \
+	simple_compress
 
 all: $(FUZZ_TARGETS)
 
@@ -100,6 +104,12 @@
 dictionary_decompress: $(FUZZ_HEADERS) $(FUZZ_OBJ) dictionary_decompress.o
 	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) dictionary_decompress.o $(LIB_FUZZING_ENGINE) -o $@
 
+simple_compress: $(FUZZ_HEADERS) $(FUZZ_OBJ) simple_compress.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) simple_compress.o $(LIB_FUZZING_ENGINE) -o $@
+
+zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_OBJ) zstd_frame_info.o $(LIB_FUZZING_ENGINE) -o $@
+
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
 
@@ -122,6 +132,9 @@
 .PHONY: corpora
 corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
 
+.PHONY: seedcorpora
+seedcorpora: $(patsubst %,corpora/%_seed_corpus.zip,$(FUZZ_TARGETS))
+
 regressiontest: corpora
 	CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
 	$(PYTHON) ./fuzz.py regression all
@@ -130,7 +143,9 @@
 	@$(MAKE) -C $(ZSTDDIR) clean
 	@$(RM) *.a *.o
 	@$(RM) simple_round_trip stream_round_trip simple_decompress \
-           stream_decompress block_decompress block_round_trip
+           stream_decompress block_decompress block_round_trip \
+           simple_compress dictionary_round_trip dictionary_decompress \
+           zstd_frame_info
 
 cleanall:
 	@$(RM) -r Fuzzer
diff --git a/tests/fuzz/default.options b/tests/fuzz/default.options
deleted file mode 100644
index 8ea8588..0000000
--- a/tests/fuzz/default.options
+++ /dev/null
@@ -1,2 +0,0 @@
-[libfuzzer]
-max_len = 8192
diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c
index 7d3a767..e900054 100644
--- a/tests/fuzz/dictionary_decompress.c
+++ b/tests/fuzz/dictionary_decompress.c
@@ -20,43 +20,42 @@
 #include "zstd_helpers.h"
 
 static ZSTD_DCtx *dctx = NULL;
-static void* rBuf = NULL;
-static size_t bufSize = 0;
 
 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    FUZZ_dict_t dict;
-    size_t neededBufSize;
-
     uint32_t seed = FUZZ_seed(&src, &size);
-    neededBufSize = MAX(20 * size, (size_t)256 << 10);
+    FUZZ_dict_t dict;
+    ZSTD_DDict* ddict = NULL;
+    int i;
 
-    /* Allocate all buffers and contexts if not already allocated */
-    if (neededBufSize > bufSize) {
-        free(rBuf);
-        rBuf = malloc(neededBufSize);
-        bufSize = neededBufSize;
-        FUZZ_ASSERT(rBuf);
-    }
     if (!dctx) {
         dctx = ZSTD_createDCtx();
         FUZZ_ASSERT(dctx);
     }
     dict = FUZZ_train(src, size, &seed);
     if (FUZZ_rand32(&seed, 0, 1) == 0) {
-        ZSTD_decompress_usingDict(dctx,
-                rBuf, neededBufSize,
-                src, size,
-                dict.buff, dict.size);
+        ddict = ZSTD_createDDict(dict.buff, dict.size);
+        FUZZ_ASSERT(ddict);
     } else {
         FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced(
                 dctx, dict.buff, dict.size,
                 (ZSTD_dictLoadMethod_e)FUZZ_rand32(&seed, 0, 1),
                 (ZSTD_dictContentType_e)FUZZ_rand32(&seed, 0, 2)));
-        ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
     }
-
+    /* Run it 10 times over 10 output sizes. Reuse the context and dict. */
+    for (i = 0; i < 10; ++i) {
+        size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
+        void* rBuf = malloc(bufSize);
+        FUZZ_ASSERT(rBuf);
+        if (ddict) {
+            ZSTD_decompress_usingDDict(dctx, rBuf, bufSize, src, size, ddict);
+        } else {
+            ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+        }
+        free(rBuf);
+    }
     free(dict.buff);
+    ZSTD_freeDDict(ddict);
 #ifndef STATEFUL_FUZZING
     ZSTD_freeDCtx(dctx); dctx = NULL;
 #endif
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index cd2a5b4..d993209 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -36,6 +36,8 @@
     'block_decompress',
     'dictionary_round_trip',
     'dictionary_decompress',
+    'zstd_frame_info',
+    'simple_compress',
 ]
 ALL_TARGETS = TARGETS + ['all']
 FUZZ_RNG_SEED_SIZE = 4
diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c
new file mode 100644
index 0000000..aaed403
--- /dev/null
+++ b/tests/fuzz/simple_compress.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target attempts to comprss the fuzzed data with the simple
+ * compression function with an output buffer that may be too small to
+ * ensure that the compressor never crashes.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd.h"
+
+static ZSTD_CCtx *cctx = NULL;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    uint32_t seed = FUZZ_seed(&src, &size);
+    size_t const maxSize = ZSTD_compressBound(size);
+    int i;
+    if (!cctx) {
+        cctx = ZSTD_createCCtx();
+        FUZZ_ASSERT(cctx);
+    }
+    /* Run it 10 times over 10 output sizes. Reuse the context. */
+    for (i = 0; i < 10; ++i) {
+        int const level = (int)FUZZ_rand32(&seed, 0, 19 + 3) - 3; /* [-3, 19] */
+        size_t const bufSize = FUZZ_rand32(&seed, 0, maxSize);
+        void* rBuf = malloc(bufSize);
+        FUZZ_ASSERT(rBuf);
+        ZSTD_compressCCtx(cctx, rBuf, bufSize, src, size, level);
+        free(rBuf);
+    }
+
+#ifndef STATEFUL_FUZZING
+    ZSTD_freeCCtx(cctx); cctx = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
index bba272c..af3f302 100644
--- a/tests/fuzz/simple_decompress.c
+++ b/tests/fuzz/simple_decompress.c
@@ -19,28 +19,24 @@
 #include "zstd.h"
 
 static ZSTD_DCtx *dctx = NULL;
-static void* rBuf = NULL;
-static size_t bufSize = 0;
 
 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
 {
-    size_t neededBufSize;
 
-    FUZZ_seed(&src, &size);
-    neededBufSize = MAX(20 * size, (size_t)256 << 10);
-
-    /* Allocate all buffers and contexts if not already allocated */
-    if (neededBufSize > bufSize) {
-        free(rBuf);
-        rBuf = malloc(neededBufSize);
-        bufSize = neededBufSize;
-        FUZZ_ASSERT(rBuf);
-    }
+    uint32_t seed = FUZZ_seed(&src, &size);
+    int i;
     if (!dctx) {
         dctx = ZSTD_createDCtx();
         FUZZ_ASSERT(dctx);
     }
-    ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, src, size);
+    /* Run it 10 times over 10 output sizes. Reuse the context. */
+    for (i = 0; i < 10; ++i) {
+        size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size);
+        void* rBuf = malloc(bufSize);
+        FUZZ_ASSERT(rBuf);
+        ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size);
+        free(rBuf);
+    }
 
 #ifndef STATEFUL_FUZZING
     ZSTD_freeDCtx(dctx); dctx = NULL;
diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c
new file mode 100644
index 0000000..7512d5f
--- /dev/null
+++ b/tests/fuzz/zstd_frame_info.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+/**
+ * This fuzz target fuzzes all of the helper functions that consume compressed
+ * input.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "fuzz_helpers.h"
+#include "zstd_helpers.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    ZSTD_frameHeader zfh;
+    /* Consume the seed to be compatible with the corpora of other decompression
+     * fuzzers.
+     */
+    FUZZ_seed(&src, &size);
+    /* You can fuzz any helper functions here that are fast, and take zstd
+     * compressed data as input. E.g. don't expect the input to be a dictionary,
+     * so don't fuzz ZSTD_getDictID_fromDict().
+     */
+    ZSTD_getFrameContentSize(src, size);
+    ZSTD_getDecompressedSize(src, size);
+    ZSTD_findFrameCompressedSize(src, size);
+    ZSTD_getDictID_fromFrame(src, size);
+    ZSTD_findDecompressedSize(src, size);
+    ZSTD_decompressBound(src, size);
+    ZSTD_frameHeaderSize(src, size);
+    ZSTD_isFrame(src, size);
+    ZSTD_getFrameHeader(&zfh, src, size);
+    ZSTD_getFrameHeader_advanced(&zfh, src, size, ZSTD_f_zstd1);
+    return 0;
+}
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 1a31c78..f42de9e 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -62,10 +62,12 @@
 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 
-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
-            { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
+#define DISPLAYUPDATE(l, ...) \
+    if (g_displayLevel>=l) { \
+        if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
+        { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
+        if (g_displayLevel>=4) fflush(stderr); } \
+    }
 
 
 /*-*******************************************************
@@ -73,7 +75,7 @@
 *********************************************************/
 #undef MIN
 #undef MAX
-/* Declaring the function is it isn't unused */
+/* Declaring the function, to avoid -Wmissing-prototype */
 void FUZ_bug976(void);
 void FUZ_bug976(void)
 {   /* these constants shall not depend on MIN() macro */
@@ -247,7 +249,7 @@
 
     /* advanced MT streaming API test */
     if (part <= 4)
-    {   unsigned nbThreads;
+    {   int nbThreads;
         for (nbThreads=1; nbThreads<=4; nbThreads++) {
             int compressionLevel;
             for (compressionLevel=1; compressionLevel<=6; compressionLevel++) {
@@ -261,7 +263,7 @@
                 CHECK_Z( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_continue) );
                 while ( ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end) ) {}
                 ZSTD_freeCCtx(cctx);
-                DISPLAYLEVEL(3, "compress_generic,-T%u,continue level %i : ",
+                DISPLAYLEVEL(3, "compress_generic,-T%i,continue level %i : ",
                                 nbThreads, compressionLevel);
                 FUZ_displayMallocStats(malcount);
     }   }   }
@@ -768,13 +770,11 @@
             DISPLAYLEVEL(3, "OK \n");
 
             DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
-            { size_t const r = ZSTD_compressBegin(staticCCtx, 1);
-              if (ZSTD_isError(r)) goto _output_error; }
+            CHECK( ZSTD_compressBegin(staticCCtx, 1) );
             DISPLAYLEVEL(3, "OK \n");
 
             DISPLAYLEVEL(3, "test%3i : init CStream for small level %u : ", testNb++, 1);
-            { size_t const r = ZSTD_initCStream(staticCCtx, 1);
-              if (ZSTD_isError(r)) goto _output_error; }
+            CHECK( ZSTD_initCStream(staticCCtx, 1) );
             DISPLAYLEVEL(3, "OK \n");
 
             DISPLAYLEVEL(3, "test%3i : init CStream with dictionary (should fail) : ", testNb++);
@@ -1059,7 +1059,7 @@
     /* Dictionary and dictBuilder tests */
     {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
         size_t const dictBufferCapacity = 16 KB;
-        void* dictBuffer = malloc(dictBufferCapacity);
+        void* const dictBuffer = malloc(dictBufferCapacity);
         size_t const totalSampleSize = 1 MB;
         size_t const sampleUnitSize = 8 KB;
         U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
@@ -1104,6 +1104,22 @@
         }
         DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
 
+        DISPLAYLEVEL(3, "test%3i : COVER dictBuilder with shrinkDict: ", testNb++);
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        {   ZDICT_cover_params_t coverParams;
+            memset(&coverParams, 0, sizeof(coverParams));
+            coverParams.steps = 8;
+            coverParams.nbThreads = 4;
+            coverParams.shrinkDict = 1;
+            coverParams.shrinkDictMaxRegression = 1;
+            dictSize = ZDICT_optimizeTrainFromBuffer_cover(
+                dictBuffer, dictBufferCapacity,
+                CNBuffer, samplesSizes, nbSamples/8,  /* less samples for faster tests */
+                &coverParams);
+            if (ZDICT_isError(dictSize)) goto _output_error;
+        }
+        DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
+
         DISPLAYLEVEL(3, "test%3i : Multithreaded FASTCOVER dictBuilder : ", testNb++);
         { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
         {   ZDICT_fastCover_params_t fastCoverParams;
@@ -1118,6 +1134,22 @@
         }
         DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
 
+        DISPLAYLEVEL(3, "test%3i : FASTCOVER dictBuilder with shrinkDict: ", testNb++);
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        {   ZDICT_fastCover_params_t fastCoverParams;
+            memset(&fastCoverParams, 0, sizeof(fastCoverParams));
+            fastCoverParams.steps = 8;
+            fastCoverParams.nbThreads = 4;
+            fastCoverParams.shrinkDict = 1;
+            fastCoverParams.shrinkDictMaxRegression = 1;
+            dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(
+                dictBuffer, dictBufferCapacity,
+                CNBuffer, samplesSizes, nbSamples,
+                &fastCoverParams);
+            if (ZDICT_isError(dictSize)) goto _output_error;
+        }
+        DISPLAYLEVEL(3, "OK, created dictionary of size %u \n", (unsigned)dictSize);
+
         DISPLAYLEVEL(3, "test%3i : check dictID : ", testNb++);
         dictID = ZDICT_getDictID(dictBuffer, dictSize);
         if (dictID==0) goto _output_error;
@@ -1164,6 +1196,7 @@
             ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize,
                                             ZSTD_dlm_byRef, ZSTD_dct_auto,
                                             cParams, ZSTD_defaultCMem);
+            assert(cdict != NULL);
             DISPLAYLEVEL(3, "(size : %u) : ", (unsigned)ZSTD_sizeof_CDict(cdict));
             cSize = ZSTD_compress_usingCDict(cctx, compressedBuffer, compressedBufferSize,
                                                  CNBuffer, CNBuffSize, cdict);
@@ -1221,8 +1254,11 @@
         {   ZSTD_frameParameters const fParams = { 0 /* frameSize */, 1 /* checksum */, 1 /* noDictID*/ };
             ZSTD_compressionParameters const cParams = ZSTD_getCParams(1, CNBuffSize, dictSize);
             ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem);
-            cSize = ZSTD_compress_usingCDict_advanced(cctx, compressedBuffer, compressedBufferSize,
-                                                 CNBuffer, CNBuffSize, cdict, fParams);
+            assert(cdict != NULL);
+            cSize = ZSTD_compress_usingCDict_advanced(cctx,
+                                                      compressedBuffer, compressedBufferSize,
+                                                      CNBuffer, CNBuffSize,
+                                                      cdict, fParams);
             ZSTD_freeCDict(cdict);
             if (ZSTD_isError(cSize)) goto _output_error;
         }
@@ -1235,7 +1271,8 @@
         DISPLAYLEVEL(3, "OK (unknown)\n");
 
         DISPLAYLEVEL(3, "test%3i : frame built without dictID should be decompressible : ", testNb++);
-        {   ZSTD_DCtx* const dctx = ZSTD_createDCtx(); assert(dctx != NULL);
+        {   ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+            assert(dctx != NULL);
             CHECKPLUS(r, ZSTD_decompress_usingDict(dctx,
                                            decodedBuffer, CNBuffSize,
                                            compressedBuffer, cSize,
@@ -2459,7 +2496,7 @@
  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
  *  @return 0 and doesn't modify *stringPtr otherwise.
  */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
 {
     size_t const comSize = strlen(longCommand);
     int const result = !strncmp(*stringPtr, longCommand, comSize);
@@ -2519,7 +2556,7 @@
 
                 case 'i':
                     argument++; maxDuration = 0;
-                    nbTests = readU32FromChar(&argument);
+                    nbTests = (int)readU32FromChar(&argument);
                     break;
 
                 case 'T':
@@ -2539,12 +2576,12 @@
 
                 case 't':
                     argument++;
-                    testNb = readU32FromChar(&argument);
+                    testNb = (int)readU32FromChar(&argument);
                     break;
 
                 case 'P':   /* compressibility % */
                     argument++;
-                    proba = readU32FromChar(&argument);
+                    proba = (int)readU32FromChar(&argument);
                     if (proba>100) proba = 100;
                     break;
 
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 75c179a..98fb313 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -609,8 +609,8 @@
 
 static constraint_t relaxTarget(constraint_t target) {
     target.cMem = (U32)-1;
-    target.cSpeed *= ((double)g_strictness) / 100;
-    target.dSpeed *= ((double)g_strictness) / 100;
+    target.cSpeed = (target.cSpeed * g_strictness) / 100;
+    target.dSpeed = (target.dSpeed * g_strictness) / 100;
     return target;
 }
 
@@ -1737,8 +1737,8 @@
 
     /* optimistic assumption of benchres */
     {   BMK_benchResult_t resultMax = benchres;
-        resultMax.cSpeed *= uncertaintyConstantC * VARIANCE;
-        resultMax.dSpeed *= uncertaintyConstantD * VARIANCE;
+        resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
+        resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
 
         /* disregard infeasible results in feas mode */
         /* disregard if resultMax < winner in infeas mode */
@@ -2429,9 +2429,9 @@
         }
 
         g_lvltarget = winner.result;
-        g_lvltarget.cSpeed *= ((double)g_strictness) / 100;
-        g_lvltarget.dSpeed *= ((double)g_strictness) / 100;
-        g_lvltarget.cSize /= ((double)g_strictness) / 100;
+        g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
+        g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
+        g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;
 
         target.cSpeed = (U32)g_lvltarget.cSpeed;
         target.dSpeed = (U32)g_lvltarget.dSpeed;
diff --git a/tests/playTests.sh b/tests/playTests.sh
index bce2710..6938732 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -1,7 +1,9 @@
-#!/bin/sh -e
+#!/bin/sh
+
+set -e
 
 die() {
-    $ECHO "$@" 1>&2
+    println "$@" 1>&2
     exit 1
 }
 
@@ -20,7 +22,7 @@
     fi
 
     rm -f tmp1 tmp2
-    $ECHO "roundTripTest: ./datagen $1 $proba | $ZSTD -v$cLevel | $ZSTD -d$dLevel"
+    println "roundTripTest: ./datagen $1 $proba | $ZSTD -v$cLevel | $ZSTD -d$dLevel"
     ./datagen $1 $proba | $MD5SUM > tmp1
     ./datagen $1 $proba | $ZSTD --ultra -v$cLevel | $ZSTD -d$dLevel  | $MD5SUM > tmp2
     $DIFF -q tmp1 tmp2
@@ -41,7 +43,7 @@
     fi
 
     rm -f tmp.zstd tmp.md5.1 tmp.md5.2
-    $ECHO "fileRoundTripTest: ./datagen $1 $local_p > tmp && $ZSTD -v$local_c -c tmp | $ZSTD -d$local_d"
+    println "fileRoundTripTest: ./datagen $1 $local_p > tmp && $ZSTD -v$local_c -c tmp | $ZSTD -d$local_d"
     ./datagen $1 $local_p > tmp
     < tmp $MD5SUM > tmp.md5.1
     $ZSTD --ultra -v$local_c -c tmp | $ZSTD -d$local_d | $MD5SUM > tmp.md5.2
@@ -49,9 +51,17 @@
 }
 
 truncateLastByte() {
-	dd bs=1 count=$(($(wc -c < "$1") - 1)) if="$1"
+    dd bs=1 count=$(($(wc -c < "$1") - 1)) if="$1"
 }
 
+println() {
+    printf '%b\n' "${*}"
+}
+
+
+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
+PRGDIR="$SCRIPT_DIR/../programs"
+TESTDIR="$SCRIPT_DIR/../tests"
 UNAME=$(uname)
 
 isTerminal=false
@@ -86,16 +96,11 @@
   SunOS) DIFF="gdiff" ;;
 esac
 
-ECHO="echo -e"
-case "$UNAME" in
-  Darwin) ECHO="echo" ;;
-esac
-
-$ECHO "\nStarting playTests.sh isWindows=$isWindows ZSTD='$ZSTD'"
+println "\nStarting playTests.sh isWindows=$isWindows ZSTD='$ZSTD'"
 
 [ -n "$ZSTD" ] || die "ZSTD variable must be defined!"
 
-if [ -n "$(echo hello | $ZSTD -v -T2 2>&1 > $INTOVOID | grep 'multi-threading is disabled')" ]
+if echo hello | $ZSTD -v -T2 2>&1 > $INTOVOID | grep -q 'multi-threading is disabled'
 then
     hasMT=""
 else
@@ -104,103 +109,103 @@
 
 
 
-$ECHO "\n===>  simple tests "
+println "\n===>  simple tests "
 
 ./datagen > tmp
-$ECHO "test : basic compression "
+println "test : basic compression "
 $ZSTD -f tmp                      # trivial compression case, creates tmp.zst
-$ECHO "test : basic decompression"
+println "test : basic decompression"
 $ZSTD -df tmp.zst                 # trivial decompression case (overwrites tmp)
-$ECHO "test : too large compression level => auto-fix"
+println "test : too large compression level => auto-fix"
 $ZSTD -99 -f tmp  # too large compression level, automatic sized down
 $ZSTD -5000000000 -f tmp && die "too large numeric value : must fail"
-$ECHO "test : --fast aka negative compression levels"
+println "test : --fast aka negative compression levels"
 $ZSTD --fast -f tmp  # == -1
 $ZSTD --fast=3 -f tmp  # == -3
 $ZSTD --fast=200000 -f tmp  # too low compression level, automatic fixed
 $ZSTD --fast=5000000000 -f tmp && die "too large numeric value : must fail"
 $ZSTD -c --fast=0 tmp > $INTOVOID && die "--fast must not accept value 0"
-$ECHO "test : too large numeric argument"
+println "test : too large numeric argument"
 $ZSTD --fast=9999999999 -f tmp  && die "should have refused numeric value"
-$ECHO "test : set compression level with environment variable ZSTD_CLEVEL"
+println "test : set compression level with environment variable ZSTD_CLEVEL"
 ZSTD_CLEVEL=12  $ZSTD -f tmp # positive compression level
 ZSTD_CLEVEL=-12 $ZSTD -f tmp # negative compression level
 ZSTD_CLEVEL=+12 $ZSTD -f tmp # valid: verbose '+' sign
-ZSTD_CLEVEL=    $ZSTD -f tmp # empty env var, warn and revert to default setting
+ZSTD_CLEVEL=''  $ZSTD -f tmp # empty env var, warn and revert to default setting
 ZSTD_CLEVEL=-   $ZSTD -f tmp # malformed env var, warn and revert to default setting
 ZSTD_CLEVEL=a   $ZSTD -f tmp # malformed env var, warn and revert to default setting
 ZSTD_CLEVEL=+a  $ZSTD -f tmp # malformed env var, warn and revert to default setting
 ZSTD_CLEVEL=3a7 $ZSTD -f tmp # malformed env var, warn and revert to default setting
 ZSTD_CLEVEL=50000000000  $ZSTD -f tmp # numeric value too large, warn and revert to default setting
-$ECHO "test : override ZSTD_CLEVEL with command line option"
+println "test : override ZSTD_CLEVEL with command line option"
 ZSTD_CLEVEL=12  $ZSTD --fast=3 -f tmp # overridden by command line option
-$ECHO "test : compress to stdout"
+println "test : compress to stdout"
 $ZSTD tmp -c > tmpCompressed
 $ZSTD tmp --stdout > tmpCompressed       # long command format
-$ECHO "test : compress to named file"
+println "test : compress to named file"
 rm tmpCompressed
 $ZSTD tmp -o tmpCompressed
 test -f tmpCompressed   # file must be created
-$ECHO "test : -o must be followed by filename (must fail)"
+println "test : -o must be followed by filename (must fail)"
 $ZSTD tmp -of tmpCompressed && die "-o must be followed by filename "
-$ECHO "test : force write, correct order"
+println "test : force write, correct order"
 $ZSTD tmp -fo tmpCompressed
-$ECHO "test : forgotten argument"
+println "test : forgotten argument"
 cp tmp tmp2
 $ZSTD tmp2 -fo && die "-o must be followed by filename "
-$ECHO "test : implied stdout when input is stdin"
-$ECHO bob | $ZSTD | $ZSTD -d
+println "test : implied stdout when input is stdin"
+println bob | $ZSTD | $ZSTD -d
 if [ "$isTerminal" = true ]; then
-$ECHO "test : compressed data to terminal"
-$ECHO bob | $ZSTD && die "should have refused : compressed data to terminal"
-$ECHO "test : compressed data from terminal (a hang here is a test fail, zstd is wrongly waiting on data from terminal)"
+println "test : compressed data to terminal"
+println bob | $ZSTD && die "should have refused : compressed data to terminal"
+println "test : compressed data from terminal (a hang here is a test fail, zstd is wrongly waiting on data from terminal)"
 $ZSTD -d > $INTOVOID && die "should have refused : compressed data from terminal"
 fi
-$ECHO "test : null-length file roundtrip"
-$ECHO -n '' | $ZSTD - --stdout | $ZSTD -d --stdout
-$ECHO "test : ensure small file doesn't add 3-bytes null block"
+println "test : null-length file roundtrip"
+println -n '' | $ZSTD - --stdout | $ZSTD -d --stdout
+println "test : ensure small file doesn't add 3-bytes null block"
 ./datagen -g1 > tmp1
 $ZSTD tmp1 -c | wc -c | grep "14"
 $ZSTD < tmp1  | wc -c | grep "14"
-$ECHO "test : decompress file with wrong suffix (must fail)"
+println "test : decompress file with wrong suffix (must fail)"
 $ZSTD -d tmpCompressed && die "wrong suffix error not detected!"
 $ZSTD -df tmp && die "should have refused : wrong extension"
-$ECHO "test : decompress into stdout"
+println "test : decompress into stdout"
 $ZSTD -d tmpCompressed -c > tmpResult    # decompression using stdout
 $ZSTD --decompress tmpCompressed -c > tmpResult
 $ZSTD --decompress tmpCompressed --stdout > tmpResult
-$ECHO "test : decompress from stdin into stdout"
+println "test : decompress from stdin into stdout"
 $ZSTD -dc   < tmp.zst > $INTOVOID   # combine decompression, stdin & stdout
 $ZSTD -dc - < tmp.zst > $INTOVOID
 $ZSTD -d    < tmp.zst > $INTOVOID   # implicit stdout when stdin is used
 $ZSTD -d  - < tmp.zst > $INTOVOID
-$ECHO "test : impose memory limitation (must fail)"
+println "test : impose memory limitation (must fail)"
 $ZSTD -d -f tmp.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed"
 $ZSTD -d -f tmp.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
 $ZSTD -d -f tmp.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
 $ZSTD -d -f tmp.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
-$ECHO "test : overwrite protection"
+println "test : overwrite protection"
 $ZSTD -q tmp && die "overwrite check failed!"
-$ECHO "test : force overwrite"
+println "test : force overwrite"
 $ZSTD -q -f tmp
 $ZSTD -q --force tmp
-$ECHO "test : overwrite readonly file"
+println "test : overwrite readonly file"
 rm -f tmpro tmpro.zst
-$ECHO foo > tmpro.zst
-$ECHO foo > tmpro
+println foo > tmpro.zst
+println foo > tmpro
 chmod 400 tmpro.zst
 $ZSTD -q tmpro && die "should have refused to overwrite read-only file"
 $ZSTD -q -f tmpro
-$ECHO "test: --no-progress flag"
+println "test: --no-progress flag"
 $ZSTD tmpro -c --no-progress | $ZSTD -d -f -o "$INTOVOID" --no-progress
 $ZSTD tmpro -cv --no-progress | $ZSTD -dv -f -o "$INTOVOID" --no-progress
 rm -f tmpro tmpro.zst
-$ECHO "test: overwrite input file (must fail)"
+println "test: overwrite input file (must fail)"
 $ZSTD tmp -fo tmp && die "zstd compression overwrote the input file"
 $ZSTD tmp.zst -dfo tmp.zst && die "zstd decompression overwrote the input file"
-$ECHO "test: detect that input file does not exist"
+println "test: detect that input file does not exist"
 $ZSTD nothere && die "zstd hasn't detected that input file does not exist"
-$ECHO "test: --[no-]compress-literals"
+println "test: --[no-]compress-literals"
 $ZSTD tmp -c --no-compress-literals -1       | $ZSTD -t
 $ZSTD tmp -c --no-compress-literals --fast=1 | $ZSTD -t
 $ZSTD tmp -c --no-compress-literals -19      | $ZSTD -t
@@ -210,27 +215,27 @@
 $ZSTD -b --fast=1 -i1e1 tmp --compress-literals
 $ZSTD -b --fast=1 -i1e1 tmp --no-compress-literals
 
-$ECHO "test : file removal"
+println "test : file removal"
 $ZSTD -f --rm tmp
 test ! -f tmp  # tmp should no longer be present
 $ZSTD -f -d --rm tmp.zst
 test ! -f tmp.zst   # tmp.zst should no longer be present
-$ECHO "test : should quietly not remove non-regular file"
-$ECHO hello > tmp
+println "test : should quietly not remove non-regular file"
+println hello > tmp
 $ZSTD tmp -f -o "$DEVDEVICE" 2>tmplog > "$INTOVOID"
 grep -v "Refusing to remove non-regular file" tmplog
 rm -f tmplog
 $ZSTD tmp -f -o "$INTOVOID" 2>&1 | grep -v "Refusing to remove non-regular file"
-$ECHO "test : --rm on stdin"
-$ECHO a | $ZSTD --rm > $INTOVOID   # --rm should remain silent
+println "test : --rm on stdin"
+println a | $ZSTD --rm > $INTOVOID   # --rm should remain silent
 rm tmp
 $ZSTD -f tmp && die "tmp not present : should have failed"
 test ! -f tmp.zst  # tmp.zst should not be created
-$ECHO "test : -d -f do not delete destination when source is not present"
+println "test : -d -f do not delete destination when source is not present"
 touch tmp    # create destination file
 $ZSTD -d -f tmp.zst && die "attempt to decompress a non existing file"
 test -f tmp  # destination file should still be present
-$ECHO "test : -f do not delete destination when source is not present"
+println "test : -f do not delete destination when source is not present"
 rm tmp         # erase source file
 touch tmp.zst  # create destination file
 $ZSTD -f tmp && die "attempt to compress a non existing file"
@@ -238,9 +243,9 @@
 rm tmp*
 
 
-$ECHO "test : compress multiple files"
-$ECHO hello > tmp1
-$ECHO world > tmp2
+println "test : compress multiple files"
+println hello > tmp1
+println world > tmp2
 $ZSTD tmp1 tmp2 -o "$INTOVOID" -f
 $ZSTD tmp1 tmp2 -c | $ZSTD -t
 $ZSTD tmp1 tmp2 -o tmp.zst
@@ -261,11 +266,11 @@
 rm tmp*
 
 
-$ECHO "\n===>  Advanced compression parameters "
-$ECHO "Hello world!" | $ZSTD --zstd=windowLog=21,      - -o tmp.zst && die "wrong parameters not detected!"
-$ECHO "Hello world!" | $ZSTD --zstd=windowLo=21        - -o tmp.zst && die "wrong parameters not detected!"
-$ECHO "Hello world!" | $ZSTD --zstd=windowLog=21,slog  - -o tmp.zst && die "wrong parameters not detected!"
-$ECHO "Hello world!" | $ZSTD --zstd=strategy=10        - -o tmp.zst && die "parameter out of bound not detected!"  # > btultra2 : does not exist
+println "\n===>  Advanced compression parameters "
+println "Hello world!" | $ZSTD --zstd=windowLog=21,      - -o tmp.zst && die "wrong parameters not detected!"
+println "Hello world!" | $ZSTD --zstd=windowLo=21        - -o tmp.zst && die "wrong parameters not detected!"
+println "Hello world!" | $ZSTD --zstd=windowLog=21,slog  - -o tmp.zst && die "wrong parameters not detected!"
+println "Hello world!" | $ZSTD --zstd=strategy=10        - -o tmp.zst && die "parameter out of bound not detected!"  # > btultra2 : does not exist
 test ! -f tmp.zst  # tmp.zst should not be created
 roundTripTest -g512K
 roundTripTest -g512K " --zstd=mml=3,tlen=48,strat=6"
@@ -276,17 +281,17 @@
 roundTripTest -g64K  "19 --zstd=strat=9"   # btultra2
 
 
-$ECHO "\n===>  Pass-Through mode "
-$ECHO "Hello world 1!" | $ZSTD -df
-$ECHO "Hello world 2!" | $ZSTD -dcf
-$ECHO "Hello world 3!" > tmp1
+println "\n===>  Pass-Through mode "
+println "Hello world 1!" | $ZSTD -df
+println "Hello world 2!" | $ZSTD -dcf
+println "Hello world 3!" > tmp1
 $ZSTD -dcf tmp1
 
 
-$ECHO "\n===>  frame concatenation "
+println "\n===>  frame concatenation "
 
-$ECHO "hello " > hello.tmp
-$ECHO "world!" > world.tmp
+println "hello " > hello.tmp
+println "world!" > world.tmp
 cat hello.tmp world.tmp > helloworld.tmp
 $ZSTD -c hello.tmp > hello.zstd
 $ZSTD -c world.tmp > world.zstd
@@ -294,44 +299,49 @@
 $ZSTD -dc helloworld.zstd > result.tmp
 cat result.tmp
 $DIFF helloworld.tmp result.tmp
-$ECHO "frame concatenation without checksum"
+println "frame concatenation without checksum"
 $ZSTD -c hello.tmp > hello.zstd --no-check
 $ZSTD -c world.tmp > world.zstd --no-check
 cat hello.zstd world.zstd > helloworld.zstd
 $ZSTD -dc helloworld.zstd > result.tmp
 $DIFF helloworld.tmp result.tmp
-$ECHO "testing zstdcat symlink"
+println "testing zstdcat symlink"
 ln -sf $ZSTD zstdcat
 ./zstdcat helloworld.zstd > result.tmp
 $DIFF helloworld.tmp result.tmp
+ln -s helloworld.zstd helloworld.link.zstd
+./zstdcat helloworld.link.zstd > result.tmp
+$DIFF helloworld.tmp result.tmp
 rm zstdcat
 rm result.tmp
-$ECHO "testing zcat symlink"
+println "testing zcat symlink"
 ln -sf $ZSTD zcat
 ./zcat helloworld.zstd > result.tmp
 $DIFF helloworld.tmp result.tmp
+./zcat helloworld.link.zstd > result.tmp
+$DIFF helloworld.tmp result.tmp
 rm zcat
 rm ./*.tmp ./*.zstd
-$ECHO "frame concatenation tests completed"
+println "frame concatenation tests completed"
 
 
 if [ "$isWindows" = false ] && [ "$UNAME" != 'SunOS' ] && [ "$UNAME" != "OpenBSD" ] ; then
-$ECHO "\n**** flush write error test **** "
+println "\n**** flush write error test **** "
 
-$ECHO "$ECHO foo | $ZSTD > /dev/full"
-$ECHO foo | $ZSTD > /dev/full && die "write error not detected!"
-$ECHO "$ECHO foo | $ZSTD | $ZSTD -d > /dev/full"
-$ECHO foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
+println "println foo | $ZSTD > /dev/full"
+println foo | $ZSTD > /dev/full && die "write error not detected!"
+println "println foo | $ZSTD | $ZSTD -d > /dev/full"
+println foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
 
 fi
 
 
 if [ "$isWindows" = false ] && [ "$UNAME" != 'SunOS' ] ; then
 
-$ECHO "\n===>  symbolic link test "
+println "\n===>  symbolic link test "
 
 rm -f hello.tmp world.tmp world2.tmp hello.tmp.zst world.tmp.zst
-$ECHO "hello world" > hello.tmp
+println "hello world" > hello.tmp
 ln -s hello.tmp world.tmp
 ln -s hello.tmp world2.tmp
 $ZSTD world.tmp hello.tmp || true
@@ -349,7 +359,7 @@
 fi
 
 
-$ECHO "\n===>  test sparse file support "
+println "\n===>  test sparse file support "
 
 ./datagen -g5M  -P100 > tmpSparse
 $ZSTD tmpSparse -c | $ZSTD -dv -o tmpSparseRegen
@@ -362,10 +372,10 @@
 ./datagen -s1 -g1200007 -P100 | $ZSTD | $ZSTD -dv --sparse -c > tmpSparseOdd   # Odd size file (to not finish on an exact nb of blocks)
 ./datagen -s1 -g1200007 -P100 | $DIFF -s - tmpSparseOdd
 ls -ls tmpSparseOdd  # look at file size and block size on disk
-$ECHO "\n Sparse Compatibility with Console :"
-$ECHO "Hello World 1 !" | $ZSTD | $ZSTD -d -c
-$ECHO "Hello World 2 !" | $ZSTD | $ZSTD -d | cat
-$ECHO "\n Sparse Compatibility with Append :"
+println "\n Sparse Compatibility with Console :"
+println "Hello World 1 !" | $ZSTD | $ZSTD -d -c
+println "Hello World 2 !" | $ZSTD | $ZSTD -d | cat
+println "\n Sparse Compatibility with Append :"
 ./datagen -P100 -g1M > tmpSparse1M
 cat tmpSparse1M tmpSparse1M > tmpSparse2M
 $ZSTD -v -f tmpSparse1M -o tmpSparseCompressed
@@ -376,187 +386,193 @@
 rm tmpSparse*
 
 
-$ECHO "\n===>  multiple files tests "
+println "\n===>  multiple files tests "
 
 ./datagen -s1        > tmp1 2> $INTOVOID
 ./datagen -s2 -g100K > tmp2 2> $INTOVOID
 ./datagen -s3 -g1M   > tmp3 2> $INTOVOID
-$ECHO "compress tmp* : "
+println "compress tmp* : "
 $ZSTD -f tmp*
 ls -ls tmp*
 rm tmp1 tmp2 tmp3
-$ECHO "decompress tmp* : "
-$ZSTD -df *.zst
+println "decompress tmp* : "
+$ZSTD -df ./*.zst
 ls -ls tmp*
-$ECHO "compress tmp* into stdout > tmpall : "
+println "compress tmp* into stdout > tmpall : "
 $ZSTD -c tmp1 tmp2 tmp3 > tmpall
 ls -ls tmp*  # check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst)
-$ECHO "decompress tmpall* into stdout > tmpdec : "
+println "decompress tmpall* into stdout > tmpdec : "
 cp tmpall tmpall2
 $ZSTD -dc tmpall* > tmpdec
 ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
-$ECHO "compress multiple files including a missing one (notHere) : "
+println "compress multiple files including a missing one (notHere) : "
 $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
 
 
-$ECHO "\n===>  dictionary tests "
+println "\n===>  dictionary tests "
 
-$ECHO "- test with raw dict (content only) "
+println "- test with raw dict (content only) "
 ./datagen > tmpDict
 ./datagen -g1M | $MD5SUM > tmp1
 ./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2
 $DIFF -q tmp1 tmp2
-$ECHO "- Create first dictionary "
-TESTFILE=../programs/zstdcli.c
-$ZSTD --train *.c ../programs/*.c -o tmpDict
-cp $TESTFILE tmp
-$ECHO "- Test dictionary compression with tmpDict as an input file and dictionary"
+println "- Create first dictionary "
+TESTFILE="$PRGDIR"/zstdcli.c
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
+cp "$TESTFILE" tmp
+println "- Test dictionary compression with tmpDict as an input file and dictionary"
 $ZSTD -f tmpDict -D tmpDict && die "compression error not detected!"
-$ECHO "- Dictionary compression roundtrip"
+println "- Dictionary compression roundtrip"
 $ZSTD -f tmp -D tmpDict
 $ZSTD -d tmp.zst -D tmpDict -fo result
-$DIFF $TESTFILE result
-$ECHO "- Dictionary compression with btlazy2 strategy"
+$DIFF "$TESTFILE" result
+println "- Dictionary compression with btlazy2 strategy"
 $ZSTD -f tmp -D tmpDict --zstd=strategy=6
 $ZSTD -d tmp.zst -D tmpDict -fo result
-$DIFF $TESTFILE result
+$DIFF "$TESTFILE" result
 if [ -n "$hasMT" ]
 then
-    $ECHO "- Test dictionary compression with multithreading "
+    println "- Test dictionary compression with multithreading "
     ./datagen -g5M | $ZSTD -T2 -D tmpDict | $ZSTD -t -D tmpDict   # fails with v1.3.2
 fi
-$ECHO "- Create second (different) dictionary "
-$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC
+println "- Create second (different) dictionary "
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
 $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
-$ECHO "- Create dictionary with short dictID"
-$ZSTD --train *.c ../programs/*.c --dictID=1 -o tmpDict1
+println "- Create dictionary with short dictID"
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1
 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
-$ECHO "- Create dictionary with wrong dictID parameter order (must fail)"
-$ZSTD --train *.c ../programs/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument "
-$ECHO "- Create dictionary with size limit"
-$ZSTD --train *.c ../programs/*.c -o tmpDict2 --maxdict=4K -v
-$ECHO "- Create dictionary with small size limit"
-$ZSTD --train *.c ../programs/*.c -o tmpDict3 --maxdict=1K -v
-$ECHO "- Create dictionary with wrong parameter order (must fail)"
-$ZSTD --train *.c ../programs/*.c -o tmpDict3 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument "
-$ECHO "- Compress without dictID"
+println "- Create dictionary with wrong dictID parameter order (must fail)"
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID -o 1 tmpDict1 && die "wrong order : --dictID must be followed by argument "
+println "- Create dictionary with size limit"
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K -v
+println "- Create dictionary with small size limit"
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict3 --maxdict=1K -v
+println "- Create dictionary with wrong parameter order (must fail)"
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict3 --maxdict -v 4K && die "wrong order : --maxdict must be followed by argument "
+println "- Compress without dictID"
 $ZSTD -f tmp -D tmpDict1 --no-dictID
 $ZSTD -d tmp.zst -D tmpDict -fo result
-$DIFF $TESTFILE result
-$ECHO "- Compress with wrong argument order (must fail)"
+$DIFF "$TESTFILE" result
+println "- Compress with wrong argument order (must fail)"
 $ZSTD tmp -Df tmpDict1 -c > $INTOVOID && die "-D must be followed by dictionary name "
-$ECHO "- Compress multiple files with dictionary"
+println "- Compress multiple files with dictionary"
 rm -rf dirTestDict
 mkdir dirTestDict
-cp *.c dirTestDict
-cp ../programs/*.c dirTestDict
-cp ../programs/*.h dirTestDict
+cp "$TESTDIR"/*.c dirTestDict
+cp "$PRGDIR"/*.c dirTestDict
+cp "$PRGDIR"/*.h dirTestDict
 $MD5SUM dirTestDict/* > tmph1
 $ZSTD -f --rm dirTestDict/* -D tmpDictC
 $ZSTD -d --rm dirTestDict/*.zst -D tmpDictC  # note : use internal checksum by default
 case "$UNAME" in
-  Darwin) $ECHO "md5sum -c not supported on OS-X : test skipped" ;;  # not compatible with OS-X's md5
+  Darwin) println "md5sum -c not supported on OS-X : test skipped" ;;  # not compatible with OS-X's md5
   *) $MD5SUM -c tmph1 ;;
 esac
 rm -rf dirTestDict
-$ECHO "- dictionary builder on bogus input"
-$ECHO "Hello World" > tmp
+println "- dictionary builder on bogus input"
+println "Hello World" > tmp
 $ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
 ./datagen -P0 -g10M > tmp
 $ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
-$ECHO "- Test -o before --train"
+println "- Test -o before --train"
 rm -f tmpDict dictionary
-$ZSTD -o tmpDict --train *.c ../programs/*.c
+$ZSTD -o tmpDict --train "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f tmpDict
-$ZSTD --train *.c ../programs/*.c
+$ZSTD --train "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
 rm tmp* dictionary
 
 
-$ECHO "\n===>  fastCover dictionary builder : advanced options "
-
-TESTFILE=../programs/zstdcli.c
+println "\n===>  fastCover dictionary builder : advanced options "
+TESTFILE="$PRGDIR"/zstdcli.c
 ./datagen > tmpDict
-$ECHO "- Create first dictionary"
-$ZSTD --train-fastcover=k=46,d=8,f=15,split=80 *.c ../programs/*.c -o tmpDict
-cp $TESTFILE tmp
+println "- Create first dictionary"
+$ZSTD --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
+cp "$TESTFILE" tmp
 $ZSTD -f tmp -D tmpDict
 $ZSTD -d tmp.zst -D tmpDict -fo result
-$DIFF $TESTFILE result
-$ECHO "- Create second (different) dictionary"
-$ZSTD --train-fastcover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$DIFF "$TESTFILE" result
+println "- Create second (different) dictionary"
+$ZSTD --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
 $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
-$ECHO "- Create dictionary with short dictID"
-$ZSTD --train-fastcover=k=46,d=8,f=15,split=80 *.c ../programs/*.c --dictID=1 -o tmpDict1
+$ZSTD --train-fastcover=k=56,d=8 && die "Create dictionary without input file"
+println "- Create dictionary with short dictID"
+$ZSTD --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1
 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
-$ECHO "- Create dictionary with size limit"
-$ZSTD --train-fastcover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
-$ECHO "- Compare size of dictionary from 90% training samples with 80% training samples"
-$ZSTD --train-fastcover=split=90 -r *.c ../programs/*.c
-$ZSTD --train-fastcover=split=80 -r *.c ../programs/*.c
-$ECHO "- Create dictionary using all samples for both training and testing"
-$ZSTD --train-fastcover=split=100 -r *.c ../programs/*.c
-$ECHO "- Create dictionary using f=16"
-$ZSTD --train-fastcover=f=16 -r *.c ../programs/*.c
-$ECHO "- Create dictionary using accel=2"
-$ZSTD --train-fastcover=accel=2 -r *.c ../programs/*.c
-$ECHO "- Create dictionary using accel=10"
-$ZSTD --train-fastcover=accel=10 -r *.c ../programs/*.c
-$ECHO "- Create dictionary with multithreading"
-$ZSTD --train-fastcover -T4 -r *.c ../programs/*.c
-$ECHO "- Test -o before --train-fastcover"
+println "- Create dictionaries with shrink-dict flag enabled"
+$ZSTD --train-fastcover=steps=256,shrink "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict
+$ZSTD --train-fastcover=steps=256,shrink=1 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict1
+$ZSTD --train-fastcover=steps=256,shrink=5 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpShrinkDict2
+println "- Create dictionary with size limit"
+$ZSTD --train-fastcover=steps=8 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K
+println "- Compare size of dictionary from 90% training samples with 80% training samples"
+$ZSTD --train-fastcover=split=90 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=split=80 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Create dictionary using all samples for both training and testing"
+$ZSTD --train-fastcover=split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Create dictionary using f=16"
+$ZSTD --train-fastcover=f=16 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=accel=15 -r "$TESTDIR"/*.c "$PRGDIR"/*.c && die "Created dictionary using accel=15"
+println "- Create dictionary using accel=2"
+$ZSTD --train-fastcover=accel=2 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Create dictionary using accel=10"
+$ZSTD --train-fastcover=accel=10 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Create dictionary with multithreading"
+$ZSTD --train-fastcover -T4 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Test -o before --train-fastcover"
 rm -f tmpDict dictionary
-$ZSTD -o tmpDict --train-fastcover *.c ../programs/*.c
+$ZSTD -o tmpDict --train-fastcover "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f tmpDict
-$ZSTD --train-fastcover *.c ../programs/*.c
+$ZSTD --train-fastcover "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
 rm tmp* dictionary
 
 
-$ECHO "\n===>  legacy dictionary builder "
+println "\n===>  legacy dictionary builder "
 
-TESTFILE=../programs/zstdcli.c
+TESTFILE="$PRGDIR"/zstdcli.c
 ./datagen > tmpDict
-$ECHO "- Create first dictionary"
-$ZSTD --train-legacy=selectivity=8 *.c ../programs/*.c -o tmpDict
-cp $TESTFILE tmp
+println "- Create first dictionary"
+$ZSTD --train-legacy=selectivity=8 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
+cp "$TESTFILE" tmp
 $ZSTD -f tmp -D tmpDict
 $ZSTD -d tmp.zst -D tmpDict -fo result
-$DIFF $TESTFILE result
-$ECHO "- Create second (different) dictionary"
-$ZSTD --train-legacy=s=5 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$DIFF "$TESTFILE" result
+$ZSTD --train-legacy=s=8 && die "Create dictionary without input files (should error)"
+println "- Create second (different) dictionary"
+$ZSTD --train-legacy=s=5 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
 $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
-$ECHO "- Create dictionary with short dictID"
-$ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1
+println "- Create dictionary with short dictID"
+$ZSTD --train-legacy -s5 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1
 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
-$ECHO "- Create dictionary with size limit"
-$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
-$ECHO "- Test -o before --train-legacy"
+println "- Create dictionary with size limit"
+$ZSTD --train-legacy -s9 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K
+println "- Test -o before --train-legacy"
 rm -f tmpDict dictionary
-$ZSTD -o tmpDict --train-legacy *.c ../programs/*.c
+$ZSTD -o tmpDict --train-legacy "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f tmpDict
-$ZSTD --train-legacy *.c ../programs/*.c
+$ZSTD --train-legacy "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
 rm tmp* dictionary
 
 
-$ECHO "\n===>  integrity tests "
+println "\n===>  integrity tests "
 
-$ECHO "test one file (tmp1.zst) "
+println "test one file (tmp1.zst) "
 ./datagen > tmp1
 $ZSTD tmp1
 $ZSTD -t tmp1.zst
 $ZSTD --test tmp1.zst
-$ECHO "test multiple files (*.zst) "
-$ZSTD -t *.zst
-$ECHO "test bad files (*) "
-$ZSTD -t * && die "bad files not detected !"
+println "test multiple files (*.zst) "
+$ZSTD -t ./*.zst
+println "test bad files (*) "
+$ZSTD -t ./* && die "bad files not detected !"
 $ZSTD -t tmp1 && die "bad file not detected !"
 cp tmp1 tmp2.zst
 $ZSTD -t tmp2.zst && die "bad file not detected !"
 ./datagen -g0 > tmp3
 $ZSTD -t tmp3 && die "bad file not detected !"   # detects 0-sized files as bad
-$ECHO "test --rm and --test combined "
+println "test --rm and --test combined "
 $ZSTD -t --rm tmp1.zst
 test -f tmp1.zst   # check file is still present
 split -b16384 tmp1.zst tmpSplit.
@@ -565,40 +581,40 @@
 
 
 
-$ECHO "\n===>  golden files tests "
+println "\n===>  golden files tests "
 
-$ZSTD -t -r files
-$ZSTD -c -r files | $ZSTD -t
+$ZSTD -t -r "$TESTDIR/files"
+$ZSTD -c -r "$TESTDIR/files" | $ZSTD -t
 
 
-$ECHO "\n===>  benchmark mode tests "
+println "\n===>  benchmark mode tests "
 
-$ECHO "bench one file"
+println "bench one file"
 ./datagen > tmp1
 $ZSTD -bi0 tmp1
-$ECHO "bench multiple levels"
+println "bench multiple levels"
 $ZSTD -i0b0e3 tmp1
-$ECHO "bench negative level"
+println "bench negative level"
 $ZSTD -bi0 --fast tmp1
-$ECHO "with recursive and quiet modes"
+println "with recursive and quiet modes"
 $ZSTD -rqi1b1e2 tmp1
-$ECHO "benchmark decompression only"
+println "benchmark decompression only"
 $ZSTD -f tmp1
 $ZSTD -b -d -i1 tmp1.zst
 
-$ECHO "\n===>  zstd compatibility tests "
+println "\n===>  zstd compatibility tests "
 
 ./datagen > tmp
 rm -f tmp.zst
 $ZSTD --format=zstd -f tmp
 test -f tmp.zst
 
-$ECHO "\n===>  gzip compatibility tests "
+println "\n===>  gzip compatibility tests "
 
 GZIPMODE=1
 $ZSTD --format=gzip -V || GZIPMODE=0
 if [ $GZIPMODE -eq 1 ]; then
-    $ECHO "gzip support detected"
+    println "gzip support detected"
     GZIPEXE=1
     gzip -V || GZIPEXE=0
     if [ $GZIPEXE -eq 1 ]; then
@@ -609,14 +625,14 @@
         $ZSTD -d -f -v tmp.gz
         rm tmp*
     else
-        $ECHO "gzip binary not detected"
+        println "gzip binary not detected"
     fi
 else
-    $ECHO "gzip mode not supported"
+    println "gzip mode not supported"
 fi
 
 
-$ECHO "\n===>  gzip frame tests "
+println "\n===>  gzip frame tests "
 
 if [ $GZIPMODE -eq 1 ]; then
     ./datagen > tmp
@@ -626,7 +642,7 @@
     truncateLastByte tmp.gz | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
     rm tmp*
 else
-    $ECHO "gzip mode not supported"
+    println "gzip mode not supported"
 fi
 
 if [ $GZIPMODE -eq 1 ]; then
@@ -636,16 +652,16 @@
     test -f tmp.zst
 fi
 
-$ECHO "\n===>  xz compatibility tests "
+println "\n===>  xz compatibility tests "
 
 LZMAMODE=1
 $ZSTD --format=xz -V || LZMAMODE=0
 if [ $LZMAMODE -eq 1 ]; then
-    $ECHO "xz support detected"
+    println "xz support detected"
     XZEXE=1
     xz -Q -V && lzma -Q -V || XZEXE=0
     if [ $XZEXE -eq 1 ]; then
-        $ECHO "Testing zstd xz and lzma support"
+        println "Testing zstd xz and lzma support"
         ./datagen > tmp
         $ZSTD --format=lzma -f tmp
         $ZSTD --format=xz -f tmp
@@ -656,18 +672,18 @@
         $ZSTD -d -f -v tmp.xz
         $ZSTD -d -f -v tmp.lzma
         rm tmp*
-        $ECHO "Creating symlinks"
+        println "Creating symlinks"
         ln -s $ZSTD ./xz
         ln -s $ZSTD ./unxz
         ln -s $ZSTD ./lzma
         ln -s $ZSTD ./unlzma
-        $ECHO "Testing xz and lzma symlinks"
+        println "Testing xz and lzma symlinks"
         ./datagen > tmp
         ./xz tmp
         xz -Q -d tmp.xz
         ./lzma tmp
         lzma -Q -d tmp.lzma
-        $ECHO "Testing unxz and unlzma symlinks"
+        println "Testing unxz and unlzma symlinks"
         xz -Q tmp
         ./xz -d tmp.xz
         lzma -Q tmp
@@ -675,14 +691,14 @@
         rm xz unxz lzma unlzma
         rm tmp*
     else
-        $ECHO "xz binary not detected"
+        println "xz binary not detected"
     fi
 else
-    $ECHO "xz mode not supported"
+    println "xz mode not supported"
 fi
 
 
-$ECHO "\n===>  xz frame tests "
+println "\n===>  xz frame tests "
 
 if [ $LZMAMODE -eq 1 ]; then
     ./datagen > tmp
@@ -694,15 +710,15 @@
     truncateLastByte tmp.lzma | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
     rm tmp*
 else
-    $ECHO "xz mode not supported"
+    println "xz mode not supported"
 fi
 
-$ECHO "\n===>  lz4 compatibility tests "
+println "\n===>  lz4 compatibility tests "
 
 LZ4MODE=1
 $ZSTD --format=lz4 -V || LZ4MODE=0
 if [ $LZ4MODE -eq 1 ]; then
-    $ECHO "lz4 support detected"
+    println "lz4 support detected"
     LZ4EXE=1
     lz4 -V || LZ4EXE=0
     if [ $LZ4EXE -eq 1 ]; then
@@ -713,14 +729,14 @@
         $ZSTD -d -f -v tmp.lz4
         rm tmp*
     else
-        $ECHO "lz4 binary not detected"
+        println "lz4 binary not detected"
     fi
 else
-    $ECHO "lz4 mode not supported"
+    println "lz4 mode not supported"
 fi
 
 
-$ECHO "\n===>  lz4 frame tests "
+println "\n===>  lz4 frame tests "
 
 if [ $LZ4MODE -eq 1 ]; then
     ./datagen > tmp
@@ -730,10 +746,10 @@
     truncateLastByte tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
     rm tmp*
 else
-    $ECHO "lz4 mode not supported"
+    println "lz4 mode not supported"
 fi
 
-$ECHO "\n===> suffix list test"
+println "\n===> suffix list test"
 
 ! $ZSTD -d tmp.abc 2> tmplg
 
@@ -750,7 +766,7 @@
     grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
 fi
 
-$ECHO "\n===>  zstd round-trip tests "
+println "\n===>  zstd round-trip tests "
 
 roundTripTest
 roundTripTest -g15K       # TableID==3
@@ -763,7 +779,7 @@
 
 fileRoundTripTest -g500K
 
-$ECHO "\n===>  zstd long distance matching round-trip tests "
+println "\n===>  zstd long distance matching round-trip tests "
 roundTripTest -g0 "2 --single-thread --long"
 roundTripTest -g1000K "1 --single-thread --long"
 roundTripTest -g517K "6 --single-thread --long"
@@ -775,62 +791,62 @@
 roundTripTest -g96K "5 --single-thread"
 if [ -n "$hasMT" ]
 then
-    $ECHO "\n===>  zstdmt round-trip tests "
+    println "\n===>  zstdmt round-trip tests "
     roundTripTest -g4M "1 -T0"
     roundTripTest -g8M "3 -T2"
     roundTripTest -g8000K "2 --threads=2"
     fileRoundTripTest -g4M "19 -T2 -B1M"
 
-    $ECHO "\n===>  zstdmt long distance matching round-trip tests "
+    println "\n===>  zstdmt long distance matching round-trip tests "
     roundTripTest -g8M "3 --long=24 -T2"
 
-    $ECHO "\n===>  ovLog tests "
+    println "\n===>  ovLog tests "
     ./datagen -g2MB > tmp
     refSize=$($ZSTD tmp -6 -c --zstd=wlog=18         | wc -c)
     ov9Size=$($ZSTD tmp -6 -c --zstd=wlog=18,ovlog=9 | wc -c)
     ov1Size=$($ZSTD tmp -6 -c --zstd=wlog=18,ovlog=1 | wc -c)
-    if [ $refSize -eq $ov9Size ]; then
+    if [ "$refSize" -eq "$ov9Size" ]; then
         echo ov9Size should be different from refSize
         exit 1
     fi
-    if [ $refSize -eq $ov1Size ]; then
+    if [ "$refSize" -eq "$ov1Size" ]; then
         echo ov1Size should be different from refSize
         exit 1
     fi
-    if [ $ov9Size -ge $ov1Size ]; then
-        echo ov9Size=$ov9Size should be smaller than ov1Size=$ov1Size
+    if [ "$ov9Size" -ge "$ov1Size" ]; then
+        echo ov9Size="$ov9Size" should be smaller than ov1Size="$ov1Size"
         exit 1
     fi
 
 else
-    $ECHO "\n===>  no multithreading, skipping zstdmt tests "
+    println "\n===>  no multithreading, skipping zstdmt tests "
 fi
 
 rm tmp*
 
-$ECHO "\n===>  zstd --list/-l single frame tests "
+println "\n===>  zstd --list/-l single frame tests "
 ./datagen > tmp1
 ./datagen > tmp2
 ./datagen > tmp3
 $ZSTD tmp*
-$ZSTD -l *.zst
-$ZSTD -lv *.zst | grep "Decompressed Size:"  # check that decompressed size is present in header
-$ZSTD --list *.zst
-$ZSTD --list -v *.zst
+$ZSTD -l ./*.zst
+$ZSTD -lv ./*.zst | grep "Decompressed Size:"  # check that decompressed size is present in header
+$ZSTD --list ./*.zst
+$ZSTD --list -v ./*.zst
 
-$ECHO "\n===>  zstd --list/-l multiple frame tests "
+println "\n===>  zstd --list/-l multiple frame tests "
 cat tmp1.zst tmp2.zst > tmp12.zst
 cat tmp12.zst tmp3.zst > tmp123.zst
-$ZSTD -l *.zst
-$ZSTD -lv *.zst
+$ZSTD -l ./*.zst
+$ZSTD -lv ./*.zst
 
-$ECHO "\n===>  zstd --list/-l error detection tests "
+println "\n===>  zstd --list/-l error detection tests "
 $ZSTD -l tmp1 tmp1.zst && die "-l must fail on non-zstd file"
 $ZSTD --list tmp* && die "-l must fail on non-zstd file"
 $ZSTD -lv tmp1* && die "-l must fail on non-zstd file"
 $ZSTD --list -v tmp2 tmp12.zst && die "-l must fail on non-zstd file"
 
-$ECHO "test : detect truncated compressed file "
+println "test : detect truncated compressed file "
 TEST_DATA_FILE=truncatable-input.txt
 FULL_COMPRESSED_FILE=${TEST_DATA_FILE}.zst
 TRUNCATED_COMPRESSED_FILE=truncated-input.txt.zst
@@ -843,7 +859,7 @@
 rm $FULL_COMPRESSED_FILE
 rm $TRUNCATED_COMPRESSED_FILE
 
-$ECHO "\n===>  zstd --list/-l errors when presented with stdin / no files"
+println "\n===>  zstd --list/-l errors when presented with stdin / no files"
 $ZSTD -l && die "-l must fail on empty list of files"
 $ZSTD -l - && die "-l does not work on stdin"
 $ZSTD -l < tmp1.zst && die "-l does not work on stdin"
@@ -852,7 +868,7 @@
 $ZSTD -l - tmp1.zst < tmp1.zst && die "-l does not work on stdin"
 $ZSTD -l tmp1.zst < tmp2.zst # this will check tmp1.zst, but not tmp2.zst, which is not an error : zstd simply doesn't read stdin in this case. It must not error just because stdin is not a tty
 
-$ECHO "\n===>  zstd --list/-l test with null files "
+println "\n===>  zstd --list/-l test with null files "
 ./datagen -g0 > tmp5
 $ZSTD tmp5
 $ZSTD -l tmp5.zst
@@ -860,12 +876,12 @@
 $ZSTD -lv tmp5.zst | grep "Decompressed Size: 0.00 KB (0 B)"  # check that 0 size is present in header
 $ZSTD -lv tmp5* && die "-l must fail on non-zstd file"
 
-$ECHO "\n===>  zstd --list/-l test with no content size field "
+println "\n===>  zstd --list/-l test with no content size field "
 ./datagen -g513K | $ZSTD > tmp6.zst
 $ZSTD -l tmp6.zst
 $ZSTD -lv tmp6.zst | grep "Decompressed Size:"  && die "Field :Decompressed Size: should not be available in this compressed file"
 
-$ECHO "\n===>   zstd --list/-l test with no checksum "
+println "\n===>   zstd --list/-l test with no checksum "
 $ZSTD -f --no-check tmp1
 $ZSTD -l tmp1.zst
 $ZSTD -lv tmp1.zst
@@ -873,7 +889,7 @@
 rm tmp*
 
 
-$ECHO "\n===>   zstd long distance matching tests "
+println "\n===>   zstd long distance matching tests "
 roundTripTest -g0 " --single-thread --long"
 roundTripTest -g9M "2 --single-thread --long"
 # Test parameter parsing
@@ -883,29 +899,32 @@
 roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB"
 
 
-$ECHO "\n===>   adaptive mode "
-roundTripTest -g270000000 " --adapt"
-roundTripTest -g27000000 " --adapt=min=1,max=4"
-$ECHO "===>   test: --adapt must fail on incoherent bounds "
-./datagen > tmp
-$ZSTD -f -vv --adapt=min=10,max=9 tmp && die "--adapt must fail on incoherent bounds"
+if [ -n "$hasMT" ]
+then
+    println "\n===>   adaptive mode "
+    roundTripTest -g270000000 " --adapt"
+    roundTripTest -g27000000 " --adapt=min=1,max=4"
+    println "===>   test: --adapt must fail on incoherent bounds "
+    ./datagen > tmp
+    $ZSTD -f -vv --adapt=min=10,max=9 tmp && die "--adapt must fail on incoherent bounds"
 
-$ECHO "\n===>   rsyncable mode "
-roundTripTest -g10M " --rsyncable"
-roundTripTest -g10M " --rsyncable -B100K"
-$ECHO "===>   test: --rsyncable must fail with --single-thread"
-$ZSTD -f -vv --rsyncable --single-thread tmp && die "--rsyncable must fail with --single-thread"
+    println "\n===>   rsyncable mode "
+    roundTripTest -g10M " --rsyncable"
+    roundTripTest -g10M " --rsyncable -B100K"
+    println "===>   test: --rsyncable must fail with --single-thread"
+    $ZSTD -f -vv --rsyncable --single-thread tmp && die "--rsyncable must fail with --single-thread"
+fi
 
 
 if [ "$1" != "--test-large-data" ]; then
-    $ECHO "Skipping large data tests"
+    println "Skipping large data tests"
     exit 0
 fi
 
 
 #############################################################################
 
-$ECHO "\n===>   large files tests "
+println "\n===>   large files tests "
 
 roundTripTest -g270000000 1
 roundTripTest -g250000000 2
@@ -937,7 +956,7 @@
 fileRoundTripTest -g4193M -P99 1
 
 
-$ECHO "\n===>   zstd long, long distance matching round-trip tests "
+println "\n===>   zstd long, long distance matching round-trip tests "
 roundTripTest -g270000000 "1 --single-thread --long"
 roundTripTest -g130000000 -P60 "5 --single-thread --long"
 roundTripTest -g35000000 -P70 "8 --single-thread --long"
@@ -949,45 +968,54 @@
 
 if [ -n "$hasMT" ]
 then
-    $ECHO "\n===>   zstdmt long round-trip tests "
+    println "\n===>   zstdmt long round-trip tests "
     roundTripTest -g80000000 -P99 "19 -T2" " "
     roundTripTest -g5000000000 -P99 "1 -T2" " "
     roundTripTest -g500000000 -P97 "1 -T999" " "
     fileRoundTripTest -g4103M -P98 " -T0" " "
     roundTripTest -g400000000 -P97 "1 --long=24 -T2" " "
+    # Exposes the bug in https://github.com/facebook/zstd/pull/1678
+    # This test fails on 4 different travis builds at the time of writing
+    # because it needs to allocate 8 GB of memory.
+    # roundTripTest -g10G -P99 "1 -T1 --long=31 --zstd=clog=27 --fast=1000"
 else
-    $ECHO "\n**** no multithreading, skipping zstdmt tests **** "
+    println "\n**** no multithreading, skipping zstdmt tests **** "
 fi
 
 
-$ECHO "\n===>  cover dictionary builder : advanced options "
+println "\n===>  cover dictionary builder : advanced options "
 
-TESTFILE=../programs/zstdcli.c
+TESTFILE="$PRGDIR"/zstdcli.c
 ./datagen > tmpDict
-$ECHO "- Create first dictionary"
-$ZSTD --train-cover=k=46,d=8,split=80 *.c ../programs/*.c -o tmpDict
-cp $TESTFILE tmp
+println "- Create first dictionary"
+$ZSTD --train-cover=k=46,d=8,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
+cp "$TESTFILE" tmp
 $ZSTD -f tmp -D tmpDict
 $ZSTD -d tmp.zst -D tmpDict -fo result
-$DIFF $TESTFILE result
-$ECHO "- Create second (different) dictionary"
-$ZSTD --train-cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$DIFF "$TESTFILE" result
+$ZSTD --train-cover=k=56,d=8 && die "Create dictionary without input file (should error)"
+println "- Create second (different) dictionary"
+$ZSTD --train-cover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
 $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
-$ECHO "- Create dictionary with short dictID"
-$ZSTD --train-cover=k=46,d=8,split=80 *.c ../programs/*.c --dictID=1 -o tmpDict1
+println "- Create dictionary using shrink-dict flag"
+$ZSTD --train-cover=steps=256,shrink "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpShrinkDict
+$ZSTD --train-cover=steps=256,shrink=1 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpShrinkDict1
+$ZSTD --train-cover=steps=256,shrink=5 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpShrinkDict2
+println "- Create dictionary with short dictID"
+$ZSTD --train-cover=k=46,d=8,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1
 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
-$ECHO "- Create dictionary with size limit"
-$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
-$ECHO "- Compare size of dictionary from 90% training samples with 80% training samples"
-$ZSTD --train-cover=split=90 -r *.c ../programs/*.c
-$ZSTD --train-cover=split=80 -r *.c ../programs/*.c
-$ECHO "- Create dictionary using all samples for both training and testing"
-$ZSTD --train-cover=split=100 -r *.c ../programs/*.c
-$ECHO "- Test -o before --train-cover"
+println "- Create dictionary with size limit"
+$ZSTD --train-cover=steps=8 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict2 --maxdict=4K
+println "- Compare size of dictionary from 90% training samples with 80% training samples"
+$ZSTD --train-cover=split=90 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-cover=split=80 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Create dictionary using all samples for both training and testing"
+$ZSTD --train-cover=split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+println "- Test -o before --train-cover"
 rm -f tmpDict dictionary
-$ZSTD -o tmpDict --train-cover *.c ../programs/*.c
+$ZSTD -o tmpDict --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f tmpDict
-$ZSTD --train-cover *.c ../programs/*.c
+$ZSTD --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
 rm -f tmp* dictionary
 
diff --git a/tests/poolTests.c b/tests/poolTests.c
index 272e961..26d57fb 100644
--- a/tests/poolTests.c
+++ b/tests/poolTests.c
@@ -90,6 +90,7 @@
 
 typedef struct {
     ZSTD_pthread_mutex_t mut;
+    int countdown;
     int val;
     int max;
     ZSTD_pthread_cond_t cond;
@@ -97,48 +98,56 @@
 
 static void waitLongFn(void *opaque) {
   poolTest_t* const test = (poolTest_t*) opaque;
-  UTIL_sleepMilli(10);
   ZSTD_pthread_mutex_lock(&test->mut);
-  test->val = test->val + 1;
-  if (test->val == test->max)
-    ZSTD_pthread_cond_signal(&test->cond);
+  test->val++;
+  if (test->val > test->max)
+      test->max = test->val;
+  ZSTD_pthread_mutex_unlock(&test->mut);
+
+  UTIL_sleepMilli(10);
+
+  ZSTD_pthread_mutex_lock(&test->mut);
+  test->val--;
+  test->countdown--;
+  if (test->countdown == 0)
+      ZSTD_pthread_cond_signal(&test->cond);
   ZSTD_pthread_mutex_unlock(&test->mut);
 }
 
 static int testThreadReduction_internal(POOL_ctx* ctx, poolTest_t test)
 {
     int const nbWaits = 16;
-    UTIL_time_t startTime;
-    U64 time4threads, time2threads;
 
+    test.countdown = nbWaits;
     test.val = 0;
-    test.max = nbWaits;
+    test.max = 0;
 
-    startTime = UTIL_getTime();
     {   int i;
         for (i=0; i<nbWaits; i++)
             POOL_add(ctx, &waitLongFn, &test);
     }
     ZSTD_pthread_mutex_lock(&test.mut);
-    ZSTD_pthread_cond_wait(&test.cond, &test.mut);
-    ASSERT_EQ(test.val, nbWaits);
+    while (test.countdown > 0)
+        ZSTD_pthread_cond_wait(&test.cond, &test.mut);
+    ASSERT_EQ(test.val, 0);
+    ASSERT_EQ(test.max, 4);
     ZSTD_pthread_mutex_unlock(&test.mut);
-    time4threads = UTIL_clockSpanNano(startTime);
 
     ASSERT_EQ( POOL_resize(ctx, 2/*nbThreads*/) , 0 );
+    test.countdown = nbWaits;
     test.val = 0;
-    startTime = UTIL_getTime();
+    test.max = 0;
     {   int i;
         for (i=0; i<nbWaits; i++)
             POOL_add(ctx, &waitLongFn, &test);
     }
     ZSTD_pthread_mutex_lock(&test.mut);
-    ZSTD_pthread_cond_wait(&test.cond, &test.mut);
-    ASSERT_EQ(test.val, nbWaits);
+    while (test.countdown > 0)
+        ZSTD_pthread_cond_wait(&test.cond, &test.mut);
+    ASSERT_EQ(test.val, 0);
+    ASSERT_EQ(test.max, 2);
     ZSTD_pthread_mutex_unlock(&test.mut);
-    time2threads = UTIL_clockSpanNano(startTime);
 
-    if (time4threads >= time2threads) return 1;   /* check 4 threads were effectively faster than 2 */
     return 0;
 }
 
@@ -246,7 +255,7 @@
       printf("FAIL: thread reduction not effective \n");
       return 1;
   } else {
-      printf("SUCCESS: thread reduction effective (slower execution) \n");
+      printf("SUCCESS: thread reduction effective \n");
   }
 
   if (testAbruptEnding()) {
diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index 7ac94f2..e66787f 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -2,59 +2,59 @@
 silesia.tar,                        level -5,                           compress simple,                    6738558
 silesia.tar,                        level -3,                           compress simple,                    6446362
 silesia.tar,                        level -1,                           compress simple,                    6186038
-silesia.tar,                        level 0,                            compress simple,                    4875008
+silesia.tar,                        level 0,                            compress simple,                    4861374
 silesia.tar,                        level 1,                            compress simple,                    5334825
-silesia.tar,                        level 3,                            compress simple,                    4875008
-silesia.tar,                        level 4,                            compress simple,                    4813507
-silesia.tar,                        level 5,                            compress simple,                    4722235
-silesia.tar,                        level 6,                            compress simple,                    4672194
-silesia.tar,                        level 7,                            compress simple,                    4606658
-silesia.tar,                        level 9,                            compress simple,                    4554098
-silesia.tar,                        level 13,                           compress simple,                    4491702
-silesia.tar,                        level 16,                           compress simple,                    4381277
-silesia.tar,                        level 19,                           compress simple,                    4281514
-silesia.tar,                        uncompressed literals,              compress simple,                    4875008
-silesia.tar,                        uncompressed literals optimal,      compress simple,                    4281514
+silesia.tar,                        level 3,                            compress simple,                    4861374
+silesia.tar,                        level 4,                            compress simple,                    4799583
+silesia.tar,                        level 5,                            compress simple,                    4722271
+silesia.tar,                        level 6,                            compress simple,                    4672231
+silesia.tar,                        level 7,                            compress simple,                    4606657
+silesia.tar,                        level 9,                            compress simple,                    4554099
+silesia.tar,                        level 13,                           compress simple,                    4491706
+silesia.tar,                        level 16,                           compress simple,                    4381265
+silesia.tar,                        level 19,                           compress simple,                    4281551
+silesia.tar,                        uncompressed literals,              compress simple,                    4861374
+silesia.tar,                        uncompressed literals optimal,      compress simple,                    4281551
 silesia.tar,                        huffman literals,                   compress simple,                    6186038
 silesia,                            level -5,                           compress cctx,                      6737567
 silesia,                            level -3,                           compress cctx,                      6444663
 silesia,                            level -1,                           compress cctx,                      6178442
-silesia,                            level 0,                            compress cctx,                      4862377
+silesia,                            level 0,                            compress cctx,                      4849491
 silesia,                            level 1,                            compress cctx,                      5313144
-silesia,                            level 3,                            compress cctx,                      4862377
-silesia,                            level 4,                            compress cctx,                      4800629
+silesia,                            level 3,                            compress cctx,                      4849491
+silesia,                            level 4,                            compress cctx,                      4786913
 silesia,                            level 5,                            compress cctx,                      4710178
 silesia,                            level 6,                            compress cctx,                      4659996
 silesia,                            level 7,                            compress cctx,                      4596234
 silesia,                            level 9,                            compress cctx,                      4543862
 silesia,                            level 13,                           compress cctx,                      4482073
-silesia,                            level 16,                           compress cctx,                      4377391
+silesia,                            level 16,                           compress cctx,                      4377389
 silesia,                            level 19,                           compress cctx,                      4293262
-silesia,                            long distance mode,                 compress cctx,                      4862377
-silesia,                            multithreaded,                      compress cctx,                      4862377
-silesia,                            multithreaded long distance mode,   compress cctx,                      4862377
-silesia,                            small window log,                   compress cctx,                      7115734
+silesia,                            long distance mode,                 compress cctx,                      4849491
+silesia,                            multithreaded,                      compress cctx,                      4849491
+silesia,                            multithreaded long distance mode,   compress cctx,                      4849491
+silesia,                            small window log,                   compress cctx,                      7112784
 silesia,                            small hash log,                     compress cctx,                      6554898
 silesia,                            small chain log,                    compress cctx,                      4931093
-silesia,                            explicit params,                    compress cctx,                      4813352
-silesia,                            uncompressed literals,              compress cctx,                      4862377
+silesia,                            explicit params,                    compress cctx,                      4794609
+silesia,                            uncompressed literals,              compress cctx,                      4849491
 silesia,                            uncompressed literals optimal,      compress cctx,                      4293262
 silesia,                            huffman literals,                   compress cctx,                      6178442
-silesia,                            multithreaded with advanced params, compress cctx,                      4862377
+silesia,                            multithreaded with advanced params, compress cctx,                      4849491
 github,                             level -5,                           compress cctx,                      205285
 github,                             level -5 with dict,                 compress cctx,                      47294
 github,                             level -3,                           compress cctx,                      190643
 github,                             level -3 with dict,                 compress cctx,                      48047
 github,                             level -1,                           compress cctx,                      175568
 github,                             level -1 with dict,                 compress cctx,                      43527
-github,                             level 0,                            compress cctx,                      136397
-github,                             level 0 with dict,                  compress cctx,                      41536
+github,                             level 0,                            compress cctx,                      136311
+github,                             level 0 with dict,                  compress cctx,                      41534
 github,                             level 1,                            compress cctx,                      142450
 github,                             level 1 with dict,                  compress cctx,                      42157
-github,                             level 3,                            compress cctx,                      136397
-github,                             level 3 with dict,                  compress cctx,                      41536
+github,                             level 3,                            compress cctx,                      136311
+github,                             level 3 with dict,                  compress cctx,                      41534
 github,                             level 4,                            compress cctx,                      136144
-github,                             level 4 with dict,                  compress cctx,                      41721
+github,                             level 4 with dict,                  compress cctx,                      41725
 github,                             level 5,                            compress cctx,                      135106
 github,                             level 5 with dict,                  compress cctx,                      38934
 github,                             level 6,                            compress cctx,                      135108
@@ -69,82 +69,82 @@
 github,                             level 16 with dict,                 compress cctx,                      37568
 github,                             level 19,                           compress cctx,                      133717
 github,                             level 19 with dict,                 compress cctx,                      37567
-github,                             long distance mode,                 compress cctx,                      141473
-github,                             multithreaded,                      compress cctx,                      141473
-github,                             multithreaded long distance mode,   compress cctx,                      141473
-github,                             small window log,                   compress cctx,                      141473
+github,                             long distance mode,                 compress cctx,                      141101
+github,                             multithreaded,                      compress cctx,                      141101
+github,                             multithreaded long distance mode,   compress cctx,                      141101
+github,                             small window log,                   compress cctx,                      141101
 github,                             small hash log,                     compress cctx,                      138943
 github,                             small chain log,                    compress cctx,                      139239
 github,                             explicit params,                    compress cctx,                      140924
-github,                             uncompressed literals,              compress cctx,                      136397
+github,                             uncompressed literals,              compress cctx,                      136311
 github,                             uncompressed literals optimal,      compress cctx,                      133717
 github,                             huffman literals,                   compress cctx,                      175568
-github,                             multithreaded with advanced params, compress cctx,                      141473
+github,                             multithreaded with advanced params, compress cctx,                      141101
 silesia,                            level -5,                           zstdcli,                            6882514
 silesia,                            level -3,                           zstdcli,                            6568406
 silesia,                            level -1,                           zstdcli,                            6183433
-silesia,                            level 0,                            zstdcli,                            4862425
+silesia,                            level 0,                            zstdcli,                            4849539
 silesia,                            level 1,                            zstdcli,                            5314157
-silesia,                            level 3,                            zstdcli,                            4862425
-silesia,                            level 4,                            zstdcli,                            4800677
+silesia,                            level 3,                            zstdcli,                            4849539
+silesia,                            level 4,                            zstdcli,                            4786961
 silesia,                            level 5,                            zstdcli,                            4710226
 silesia,                            level 6,                            zstdcli,                            4660044
 silesia,                            level 7,                            zstdcli,                            4596282
 silesia,                            level 9,                            zstdcli,                            4543910
 silesia,                            level 13,                           zstdcli,                            4482121
-silesia,                            level 16,                           zstdcli,                            4377439
+silesia,                            level 16,                           zstdcli,                            4377437
 silesia,                            level 19,                           zstdcli,                            4293310
-silesia,                            long distance mode,                 zstdcli,                            4853437
-silesia,                            multithreaded,                      zstdcli,                            4862425
-silesia,                            multithreaded long distance mode,   zstdcli,                            4853437
-silesia,                            small window log,                   zstdcli,                            7126434
+silesia,                            long distance mode,                 zstdcli,                            4839698
+silesia,                            multithreaded,                      zstdcli,                            4849539
+silesia,                            multithreaded long distance mode,   zstdcli,                            4839698
+silesia,                            small window log,                   zstdcli,                            7123892
 silesia,                            small hash log,                     zstdcli,                            6554946
 silesia,                            small chain log,                    zstdcli,                            4931141
-silesia,                            explicit params,                    zstdcli,                            4815380
-silesia,                            uncompressed literals,              zstdcli,                            5155472
-silesia,                            uncompressed literals optimal,      zstdcli,                            4325475
+silesia,                            explicit params,                    zstdcli,                            4797048
+silesia,                            uncompressed literals,              zstdcli,                            5128008
+silesia,                            uncompressed literals optimal,      zstdcli,                            4325482
 silesia,                            huffman literals,                   zstdcli,                            5331158
-silesia,                            multithreaded with advanced params, zstdcli,                            5155472
+silesia,                            multithreaded with advanced params, zstdcli,                            5128008
 silesia.tar,                        level -5,                           zstdcli,                            6738906
 silesia.tar,                        level -3,                           zstdcli,                            6448409
 silesia.tar,                        level -1,                           zstdcli,                            6186908
-silesia.tar,                        level 0,                            zstdcli,                            4875136
+silesia.tar,                        level 0,                            zstdcli,                            4861462
 silesia.tar,                        level 1,                            zstdcli,                            5336255
-silesia.tar,                        level 3,                            zstdcli,                            4875136
-silesia.tar,                        level 4,                            zstdcli,                            4814531
-silesia.tar,                        level 5,                            zstdcli,                            4723284
-silesia.tar,                        level 6,                            zstdcli,                            4673591
-silesia.tar,                        level 7,                            zstdcli,                            4608342
-silesia.tar,                        level 9,                            zstdcli,                            4554700
-silesia.tar,                        level 13,                           zstdcli,                            4491706
-silesia.tar,                        level 16,                           zstdcli,                            4381281
-silesia.tar,                        level 19,                           zstdcli,                            4281518
-silesia.tar,                        no source size,                     zstdcli,                            4875132
-silesia.tar,                        long distance mode,                 zstdcli,                            4866975
-silesia.tar,                        multithreaded,                      zstdcli,                            4875136
-silesia.tar,                        multithreaded long distance mode,   zstdcli,                            4866975
-silesia.tar,                        small window log,                   zstdcli,                            7130434
+silesia.tar,                        level 3,                            zstdcli,                            4861462
+silesia.tar,                        level 4,                            zstdcli,                            4800482
+silesia.tar,                        level 5,                            zstdcli,                            4723312
+silesia.tar,                        level 6,                            zstdcli,                            4673616
+silesia.tar,                        level 7,                            zstdcli,                            4608346
+silesia.tar,                        level 9,                            zstdcli,                            4554702
+silesia.tar,                        level 13,                           zstdcli,                            4491710
+silesia.tar,                        level 16,                           zstdcli,                            4381269
+silesia.tar,                        level 19,                           zstdcli,                            4281555
+silesia.tar,                        no source size,                     zstdcli,                            4861458
+silesia.tar,                        long distance mode,                 zstdcli,                            4853140
+silesia.tar,                        multithreaded,                      zstdcli,                            4861462
+silesia.tar,                        multithreaded long distance mode,   zstdcli,                            4853140
+silesia.tar,                        small window log,                   zstdcli,                            7127964
 silesia.tar,                        small hash log,                     zstdcli,                            6587841
-silesia.tar,                        small chain log,                    zstdcli,                            4943259
-silesia.tar,                        explicit params,                    zstdcli,                            4839202
-silesia.tar,                        uncompressed literals,              zstdcli,                            5158134
-silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4321098
+silesia.tar,                        small chain log,                    zstdcli,                            4943269
+silesia.tar,                        explicit params,                    zstdcli,                            4822318
+silesia.tar,                        uncompressed literals,              zstdcli,                            5129548
+silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4320914
 silesia.tar,                        huffman literals,                   zstdcli,                            5347560
-silesia.tar,                        multithreaded with advanced params, zstdcli,                            5158134
+silesia.tar,                        multithreaded with advanced params, zstdcli,                            5129548
 github,                             level -5,                           zstdcli,                            207285
 github,                             level -5 with dict,                 zstdcli,                            48718
 github,                             level -3,                           zstdcli,                            192643
 github,                             level -3 with dict,                 zstdcli,                            47395
 github,                             level -1,                           zstdcli,                            177568
 github,                             level -1 with dict,                 zstdcli,                            45170
-github,                             level 0,                            zstdcli,                            138397
-github,                             level 0 with dict,                  zstdcli,                            43170
+github,                             level 0,                            zstdcli,                            138311
+github,                             level 0 with dict,                  zstdcli,                            43148
 github,                             level 1,                            zstdcli,                            144450
 github,                             level 1 with dict,                  zstdcli,                            43682
-github,                             level 3,                            zstdcli,                            138397
-github,                             level 3 with dict,                  zstdcli,                            43170
+github,                             level 3,                            zstdcli,                            138311
+github,                             level 3 with dict,                  zstdcli,                            43148
 github,                             level 4,                            zstdcli,                            138144
-github,                             level 4 with dict,                  zstdcli,                            43306
+github,                             level 4 with dict,                  zstdcli,                            43251
 github,                             level 5,                            zstdcli,                            137106
 github,                             level 5 with dict,                  zstdcli,                            40938
 github,                             level 6,                            zstdcli,                            137108
@@ -159,83 +159,83 @@
 github,                             level 16 with dict,                 zstdcli,                            39577
 github,                             level 19,                           zstdcli,                            135717
 github,                             level 19 with dict,                 zstdcli,                            39576
-github,                             long distance mode,                 zstdcli,                            138397
-github,                             multithreaded,                      zstdcli,                            138397
-github,                             multithreaded long distance mode,   zstdcli,                            138397
-github,                             small window log,                   zstdcli,                            138397
+github,                             long distance mode,                 zstdcli,                            138311
+github,                             multithreaded,                      zstdcli,                            138311
+github,                             multithreaded long distance mode,   zstdcli,                            138311
+github,                             small window log,                   zstdcli,                            138311
 github,                             small hash log,                     zstdcli,                            137467
 github,                             small chain log,                    zstdcli,                            138314
 github,                             explicit params,                    zstdcli,                            136140
-github,                             uncompressed literals,              zstdcli,                            169004
+github,                             uncompressed literals,              zstdcli,                            167915
 github,                             uncompressed literals optimal,      zstdcli,                            158824
 github,                             huffman literals,                   zstdcli,                            144450
-github,                             multithreaded with advanced params, zstdcli,                            169004
+github,                             multithreaded with advanced params, zstdcli,                            167915
 silesia,                            level -5,                           advanced one pass,                  6737567
 silesia,                            level -3,                           advanced one pass,                  6444663
 silesia,                            level -1,                           advanced one pass,                  6178442
-silesia,                            level 0,                            advanced one pass,                  4862377
+silesia,                            level 0,                            advanced one pass,                  4849491
 silesia,                            level 1,                            advanced one pass,                  5313144
-silesia,                            level 3,                            advanced one pass,                  4862377
-silesia,                            level 4,                            advanced one pass,                  4800629
+silesia,                            level 3,                            advanced one pass,                  4849491
+silesia,                            level 4,                            advanced one pass,                  4786913
 silesia,                            level 5,                            advanced one pass,                  4710178
 silesia,                            level 6,                            advanced one pass,                  4659996
 silesia,                            level 7,                            advanced one pass,                  4596234
 silesia,                            level 9,                            advanced one pass,                  4543862
 silesia,                            level 13,                           advanced one pass,                  4482073
-silesia,                            level 16,                           advanced one pass,                  4377391
+silesia,                            level 16,                           advanced one pass,                  4377389
 silesia,                            level 19,                           advanced one pass,                  4293262
-silesia,                            no source size,                     advanced one pass,                  4862377
-silesia,                            long distance mode,                 advanced one pass,                  4853389
-silesia,                            multithreaded,                      advanced one pass,                  4862377
-silesia,                            multithreaded long distance mode,   advanced one pass,                  4853389
-silesia,                            small window log,                   advanced one pass,                  7126386
+silesia,                            no source size,                     advanced one pass,                  4849491
+silesia,                            long distance mode,                 advanced one pass,                  4839650
+silesia,                            multithreaded,                      advanced one pass,                  4849491
+silesia,                            multithreaded long distance mode,   advanced one pass,                  4839650
+silesia,                            small window log,                   advanced one pass,                  7123844
 silesia,                            small hash log,                     advanced one pass,                  6554898
 silesia,                            small chain log,                    advanced one pass,                  4931093
-silesia,                            explicit params,                    advanced one pass,                  4815369
-silesia,                            uncompressed literals,              advanced one pass,                  5155424
-silesia,                            uncompressed literals optimal,      advanced one pass,                  4325427
+silesia,                            explicit params,                    advanced one pass,                  4797035
+silesia,                            uncompressed literals,              advanced one pass,                  5127960
+silesia,                            uncompressed literals optimal,      advanced one pass,                  4325434
 silesia,                            huffman literals,                   advanced one pass,                  5326210
-silesia,                            multithreaded with advanced params, advanced one pass,                  5155424
+silesia,                            multithreaded with advanced params, advanced one pass,                  5127960
 silesia.tar,                        level -5,                           advanced one pass,                  6738558
 silesia.tar,                        level -3,                           advanced one pass,                  6446362
 silesia.tar,                        level -1,                           advanced one pass,                  6186038
-silesia.tar,                        level 0,                            advanced one pass,                  4875008
+silesia.tar,                        level 0,                            advanced one pass,                  4861374
 silesia.tar,                        level 1,                            advanced one pass,                  5334825
-silesia.tar,                        level 3,                            advanced one pass,                  4875008
-silesia.tar,                        level 4,                            advanced one pass,                  4813507
-silesia.tar,                        level 5,                            advanced one pass,                  4722235
-silesia.tar,                        level 6,                            advanced one pass,                  4672194
-silesia.tar,                        level 7,                            advanced one pass,                  4606658
-silesia.tar,                        level 9,                            advanced one pass,                  4554098
-silesia.tar,                        level 13,                           advanced one pass,                  4491702
-silesia.tar,                        level 16,                           advanced one pass,                  4381277
-silesia.tar,                        level 19,                           advanced one pass,                  4281514
-silesia.tar,                        no source size,                     advanced one pass,                  4875008
-silesia.tar,                        long distance mode,                 advanced one pass,                  4861218
-silesia.tar,                        multithreaded,                      advanced one pass,                  4874631
-silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4860683
-silesia.tar,                        small window log,                   advanced one pass,                  7130394
+silesia.tar,                        level 3,                            advanced one pass,                  4861374
+silesia.tar,                        level 4,                            advanced one pass,                  4799583
+silesia.tar,                        level 5,                            advanced one pass,                  4722271
+silesia.tar,                        level 6,                            advanced one pass,                  4672231
+silesia.tar,                        level 7,                            advanced one pass,                  4606657
+silesia.tar,                        level 9,                            advanced one pass,                  4554099
+silesia.tar,                        level 13,                           advanced one pass,                  4491706
+silesia.tar,                        level 16,                           advanced one pass,                  4381265
+silesia.tar,                        level 19,                           advanced one pass,                  4281551
+silesia.tar,                        no source size,                     advanced one pass,                  4861374
+silesia.tar,                        long distance mode,                 advanced one pass,                  4848046
+silesia.tar,                        multithreaded,                      advanced one pass,                  4860726
+silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4847343
+silesia.tar,                        small window log,                   advanced one pass,                  7127924
 silesia.tar,                        small hash log,                     advanced one pass,                  6587833
-silesia.tar,                        small chain log,                    advanced one pass,                  4943255
-silesia.tar,                        explicit params,                    advanced one pass,                  4829974
-silesia.tar,                        uncompressed literals,              advanced one pass,                  5157992
-silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4321094
+silesia.tar,                        small chain log,                    advanced one pass,                  4943266
+silesia.tar,                        explicit params,                    advanced one pass,                  4808543
+silesia.tar,                        uncompressed literals,              advanced one pass,                  5129447
+silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4320910
 silesia.tar,                        huffman literals,                   advanced one pass,                  5347283
-silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5158545
+silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5129766
 github,                             level -5,                           advanced one pass,                  205285
 github,                             level -5 with dict,                 advanced one pass,                  46718
 github,                             level -3,                           advanced one pass,                  190643
 github,                             level -3 with dict,                 advanced one pass,                  45395
 github,                             level -1,                           advanced one pass,                  175568
 github,                             level -1 with dict,                 advanced one pass,                  43170
-github,                             level 0,                            advanced one pass,                  136397
-github,                             level 0 with dict,                  advanced one pass,                  41170
+github,                             level 0,                            advanced one pass,                  136311
+github,                             level 0 with dict,                  advanced one pass,                  41148
 github,                             level 1,                            advanced one pass,                  142450
 github,                             level 1 with dict,                  advanced one pass,                  41682
-github,                             level 3,                            advanced one pass,                  136397
-github,                             level 3 with dict,                  advanced one pass,                  41170
+github,                             level 3,                            advanced one pass,                  136311
+github,                             level 3 with dict,                  advanced one pass,                  41148
 github,                             level 4,                            advanced one pass,                  136144
-github,                             level 4 with dict,                  advanced one pass,                  41306
+github,                             level 4 with dict,                  advanced one pass,                  41251
 github,                             level 5,                            advanced one pass,                  135106
 github,                             level 5 with dict,                  advanced one pass,                  38938
 github,                             level 6,                            advanced one pass,                  135108
@@ -250,84 +250,84 @@
 github,                             level 16 with dict,                 advanced one pass,                  37577
 github,                             level 19,                           advanced one pass,                  133717
 github,                             level 19 with dict,                 advanced one pass,                  37576
-github,                             no source size,                     advanced one pass,                  136397
-github,                             long distance mode,                 advanced one pass,                  136397
-github,                             multithreaded,                      advanced one pass,                  136397
-github,                             multithreaded long distance mode,   advanced one pass,                  136397
-github,                             small window log,                   advanced one pass,                  136397
+github,                             no source size,                     advanced one pass,                  136311
+github,                             long distance mode,                 advanced one pass,                  136311
+github,                             multithreaded,                      advanced one pass,                  136311
+github,                             multithreaded long distance mode,   advanced one pass,                  136311
+github,                             small window log,                   advanced one pass,                  136311
 github,                             small hash log,                     advanced one pass,                  135467
 github,                             small chain log,                    advanced one pass,                  136314
 github,                             explicit params,                    advanced one pass,                  137670
-github,                             uncompressed literals,              advanced one pass,                  167004
+github,                             uncompressed literals,              advanced one pass,                  165915
 github,                             uncompressed literals optimal,      advanced one pass,                  156824
 github,                             huffman literals,                   advanced one pass,                  142450
-github,                             multithreaded with advanced params, advanced one pass,                  167004
+github,                             multithreaded with advanced params, advanced one pass,                  165915
 silesia,                            level -5,                           advanced one pass small out,        6737567
 silesia,                            level -3,                           advanced one pass small out,        6444663
 silesia,                            level -1,                           advanced one pass small out,        6178442
-silesia,                            level 0,                            advanced one pass small out,        4862377
+silesia,                            level 0,                            advanced one pass small out,        4849491
 silesia,                            level 1,                            advanced one pass small out,        5313144
-silesia,                            level 3,                            advanced one pass small out,        4862377
-silesia,                            level 4,                            advanced one pass small out,        4800629
+silesia,                            level 3,                            advanced one pass small out,        4849491
+silesia,                            level 4,                            advanced one pass small out,        4786913
 silesia,                            level 5,                            advanced one pass small out,        4710178
 silesia,                            level 6,                            advanced one pass small out,        4659996
 silesia,                            level 7,                            advanced one pass small out,        4596234
 silesia,                            level 9,                            advanced one pass small out,        4543862
 silesia,                            level 13,                           advanced one pass small out,        4482073
-silesia,                            level 16,                           advanced one pass small out,        4377391
+silesia,                            level 16,                           advanced one pass small out,        4377389
 silesia,                            level 19,                           advanced one pass small out,        4293262
-silesia,                            no source size,                     advanced one pass small out,        4862377
-silesia,                            long distance mode,                 advanced one pass small out,        4853389
-silesia,                            multithreaded,                      advanced one pass small out,        4862377
-silesia,                            multithreaded long distance mode,   advanced one pass small out,        4853389
-silesia,                            small window log,                   advanced one pass small out,        7126386
+silesia,                            no source size,                     advanced one pass small out,        4849491
+silesia,                            long distance mode,                 advanced one pass small out,        4839650
+silesia,                            multithreaded,                      advanced one pass small out,        4849491
+silesia,                            multithreaded long distance mode,   advanced one pass small out,        4839650
+silesia,                            small window log,                   advanced one pass small out,        7123844
 silesia,                            small hash log,                     advanced one pass small out,        6554898
 silesia,                            small chain log,                    advanced one pass small out,        4931093
-silesia,                            explicit params,                    advanced one pass small out,        4815369
-silesia,                            uncompressed literals,              advanced one pass small out,        5155424
-silesia,                            uncompressed literals optimal,      advanced one pass small out,        4325427
+silesia,                            explicit params,                    advanced one pass small out,        4797035
+silesia,                            uncompressed literals,              advanced one pass small out,        5127960
+silesia,                            uncompressed literals optimal,      advanced one pass small out,        4325434
 silesia,                            huffman literals,                   advanced one pass small out,        5326210
-silesia,                            multithreaded with advanced params, advanced one pass small out,        5155424
+silesia,                            multithreaded with advanced params, advanced one pass small out,        5127960
 silesia.tar,                        level -5,                           advanced one pass small out,        6738558
 silesia.tar,                        level -3,                           advanced one pass small out,        6446362
 silesia.tar,                        level -1,                           advanced one pass small out,        6186038
-silesia.tar,                        level 0,                            advanced one pass small out,        4875008
+silesia.tar,                        level 0,                            advanced one pass small out,        4861374
 silesia.tar,                        level 1,                            advanced one pass small out,        5334825
-silesia.tar,                        level 3,                            advanced one pass small out,        4875008
-silesia.tar,                        level 4,                            advanced one pass small out,        4813507
-silesia.tar,                        level 5,                            advanced one pass small out,        4722235
-silesia.tar,                        level 6,                            advanced one pass small out,        4672194
-silesia.tar,                        level 7,                            advanced one pass small out,        4606658
-silesia.tar,                        level 9,                            advanced one pass small out,        4554098
-silesia.tar,                        level 13,                           advanced one pass small out,        4491702
-silesia.tar,                        level 16,                           advanced one pass small out,        4381277
-silesia.tar,                        level 19,                           advanced one pass small out,        4281514
-silesia.tar,                        no source size,                     advanced one pass small out,        4875008
-silesia.tar,                        long distance mode,                 advanced one pass small out,        4861218
-silesia.tar,                        multithreaded,                      advanced one pass small out,        4874631
-silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4860683
-silesia.tar,                        small window log,                   advanced one pass small out,        7130394
+silesia.tar,                        level 3,                            advanced one pass small out,        4861374
+silesia.tar,                        level 4,                            advanced one pass small out,        4799583
+silesia.tar,                        level 5,                            advanced one pass small out,        4722271
+silesia.tar,                        level 6,                            advanced one pass small out,        4672231
+silesia.tar,                        level 7,                            advanced one pass small out,        4606657
+silesia.tar,                        level 9,                            advanced one pass small out,        4554099
+silesia.tar,                        level 13,                           advanced one pass small out,        4491706
+silesia.tar,                        level 16,                           advanced one pass small out,        4381265
+silesia.tar,                        level 19,                           advanced one pass small out,        4281551
+silesia.tar,                        no source size,                     advanced one pass small out,        4861374
+silesia.tar,                        long distance mode,                 advanced one pass small out,        4848046
+silesia.tar,                        multithreaded,                      advanced one pass small out,        4860726
+silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4847343
+silesia.tar,                        small window log,                   advanced one pass small out,        7127924
 silesia.tar,                        small hash log,                     advanced one pass small out,        6587833
-silesia.tar,                        small chain log,                    advanced one pass small out,        4943255
-silesia.tar,                        explicit params,                    advanced one pass small out,        4829974
-silesia.tar,                        uncompressed literals,              advanced one pass small out,        5157992
-silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4321094
+silesia.tar,                        small chain log,                    advanced one pass small out,        4943266
+silesia.tar,                        explicit params,                    advanced one pass small out,        4808543
+silesia.tar,                        uncompressed literals,              advanced one pass small out,        5129447
+silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4320910
 silesia.tar,                        huffman literals,                   advanced one pass small out,        5347283
-silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5158545
+silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5129766
 github,                             level -5,                           advanced one pass small out,        205285
 github,                             level -5 with dict,                 advanced one pass small out,        46718
 github,                             level -3,                           advanced one pass small out,        190643
 github,                             level -3 with dict,                 advanced one pass small out,        45395
 github,                             level -1,                           advanced one pass small out,        175568
 github,                             level -1 with dict,                 advanced one pass small out,        43170
-github,                             level 0,                            advanced one pass small out,        136397
-github,                             level 0 with dict,                  advanced one pass small out,        41170
+github,                             level 0,                            advanced one pass small out,        136311
+github,                             level 0 with dict,                  advanced one pass small out,        41148
 github,                             level 1,                            advanced one pass small out,        142450
 github,                             level 1 with dict,                  advanced one pass small out,        41682
-github,                             level 3,                            advanced one pass small out,        136397
-github,                             level 3 with dict,                  advanced one pass small out,        41170
+github,                             level 3,                            advanced one pass small out,        136311
+github,                             level 3 with dict,                  advanced one pass small out,        41148
 github,                             level 4,                            advanced one pass small out,        136144
-github,                             level 4 with dict,                  advanced one pass small out,        41306
+github,                             level 4 with dict,                  advanced one pass small out,        41251
 github,                             level 5,                            advanced one pass small out,        135106
 github,                             level 5 with dict,                  advanced one pass small out,        38938
 github,                             level 6,                            advanced one pass small out,        135108
@@ -342,84 +342,84 @@
 github,                             level 16 with dict,                 advanced one pass small out,        37577
 github,                             level 19,                           advanced one pass small out,        133717
 github,                             level 19 with dict,                 advanced one pass small out,        37576
-github,                             no source size,                     advanced one pass small out,        136397
-github,                             long distance mode,                 advanced one pass small out,        136397
-github,                             multithreaded,                      advanced one pass small out,        136397
-github,                             multithreaded long distance mode,   advanced one pass small out,        136397
-github,                             small window log,                   advanced one pass small out,        136397
+github,                             no source size,                     advanced one pass small out,        136311
+github,                             long distance mode,                 advanced one pass small out,        136311
+github,                             multithreaded,                      advanced one pass small out,        136311
+github,                             multithreaded long distance mode,   advanced one pass small out,        136311
+github,                             small window log,                   advanced one pass small out,        136311
 github,                             small hash log,                     advanced one pass small out,        135467
 github,                             small chain log,                    advanced one pass small out,        136314
 github,                             explicit params,                    advanced one pass small out,        137670
-github,                             uncompressed literals,              advanced one pass small out,        167004
+github,                             uncompressed literals,              advanced one pass small out,        165915
 github,                             uncompressed literals optimal,      advanced one pass small out,        156824
 github,                             huffman literals,                   advanced one pass small out,        142450
-github,                             multithreaded with advanced params, advanced one pass small out,        167004
+github,                             multithreaded with advanced params, advanced one pass small out,        165915
 silesia,                            level -5,                           advanced streaming,                 6882466
 silesia,                            level -3,                           advanced streaming,                 6568358
 silesia,                            level -1,                           advanced streaming,                 6183385
-silesia,                            level 0,                            advanced streaming,                 4862377
+silesia,                            level 0,                            advanced streaming,                 4849491
 silesia,                            level 1,                            advanced streaming,                 5314109
-silesia,                            level 3,                            advanced streaming,                 4862377
-silesia,                            level 4,                            advanced streaming,                 4800629
+silesia,                            level 3,                            advanced streaming,                 4849491
+silesia,                            level 4,                            advanced streaming,                 4786913
 silesia,                            level 5,                            advanced streaming,                 4710178
 silesia,                            level 6,                            advanced streaming,                 4659996
 silesia,                            level 7,                            advanced streaming,                 4596234
 silesia,                            level 9,                            advanced streaming,                 4543862
 silesia,                            level 13,                           advanced streaming,                 4482073
-silesia,                            level 16,                           advanced streaming,                 4377391
+silesia,                            level 16,                           advanced streaming,                 4377389
 silesia,                            level 19,                           advanced streaming,                 4293262
-silesia,                            no source size,                     advanced streaming,                 4862341
-silesia,                            long distance mode,                 advanced streaming,                 4853389
-silesia,                            multithreaded,                      advanced streaming,                 4862377
-silesia,                            multithreaded long distance mode,   advanced streaming,                 4853389
-silesia,                            small window log,                   advanced streaming,                 7126389
+silesia,                            no source size,                     advanced streaming,                 4849455
+silesia,                            long distance mode,                 advanced streaming,                 4839650
+silesia,                            multithreaded,                      advanced streaming,                 4849491
+silesia,                            multithreaded long distance mode,   advanced streaming,                 4839650
+silesia,                            small window log,                   advanced streaming,                 7123846
 silesia,                            small hash log,                     advanced streaming,                 6554898
 silesia,                            small chain log,                    advanced streaming,                 4931093
-silesia,                            explicit params,                    advanced streaming,                 4815380
-silesia,                            uncompressed literals,              advanced streaming,                 5155424
-silesia,                            uncompressed literals optimal,      advanced streaming,                 4325427
+silesia,                            explicit params,                    advanced streaming,                 4797048
+silesia,                            uncompressed literals,              advanced streaming,                 5127960
+silesia,                            uncompressed literals optimal,      advanced streaming,                 4325434
 silesia,                            huffman literals,                   advanced streaming,                 5331110
-silesia,                            multithreaded with advanced params, advanced streaming,                 5155424
+silesia,                            multithreaded with advanced params, advanced streaming,                 5127960
 silesia.tar,                        level -5,                           advanced streaming,                 6982738
 silesia.tar,                        level -3,                           advanced streaming,                 6641264
 silesia.tar,                        level -1,                           advanced streaming,                 6190789
-silesia.tar,                        level 0,                            advanced streaming,                 4875010
+silesia.tar,                        level 0,                            advanced streaming,                 4861376
 silesia.tar,                        level 1,                            advanced streaming,                 5336879
-silesia.tar,                        level 3,                            advanced streaming,                 4875010
-silesia.tar,                        level 4,                            advanced streaming,                 4813507
-silesia.tar,                        level 5,                            advanced streaming,                 4722240
-silesia.tar,                        level 6,                            advanced streaming,                 4672203
-silesia.tar,                        level 7,                            advanced streaming,                 4606658
-silesia.tar,                        level 9,                            advanced streaming,                 4554105
-silesia.tar,                        level 13,                           advanced streaming,                 4491703
-silesia.tar,                        level 16,                           advanced streaming,                 4381277
-silesia.tar,                        level 19,                           advanced streaming,                 4281514
-silesia.tar,                        no source size,                     advanced streaming,                 4875006
-silesia.tar,                        long distance mode,                 advanced streaming,                 4861218
-silesia.tar,                        multithreaded,                      advanced streaming,                 4875132
-silesia.tar,                        multithreaded long distance mode,   advanced streaming,                 4866971
-silesia.tar,                        small window log,                   advanced streaming,                 7130394
+silesia.tar,                        level 3,                            advanced streaming,                 4861376
+silesia.tar,                        level 4,                            advanced streaming,                 4799583
+silesia.tar,                        level 5,                            advanced streaming,                 4722276
+silesia.tar,                        level 6,                            advanced streaming,                 4672240
+silesia.tar,                        level 7,                            advanced streaming,                 4606657
+silesia.tar,                        level 9,                            advanced streaming,                 4554106
+silesia.tar,                        level 13,                           advanced streaming,                 4491707
+silesia.tar,                        level 16,                           advanced streaming,                 4381284
+silesia.tar,                        level 19,                           advanced streaming,                 4281511
+silesia.tar,                        no source size,                     advanced streaming,                 4861372
+silesia.tar,                        long distance mode,                 advanced streaming,                 4848046
+silesia.tar,                        multithreaded,                      advanced streaming,                 4861458
+silesia.tar,                        multithreaded long distance mode,   advanced streaming,                 4853136
+silesia.tar,                        small window log,                   advanced streaming,                 7127924
 silesia.tar,                        small hash log,                     advanced streaming,                 6587834
-silesia.tar,                        small chain log,                    advanced streaming,                 4943260
-silesia.tar,                        explicit params,                    advanced streaming,                 4830002
-silesia.tar,                        uncompressed literals,              advanced streaming,                 5157995
-silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4321094
+silesia.tar,                        small chain log,                    advanced streaming,                 4943271
+silesia.tar,                        explicit params,                    advanced streaming,                 4808570
+silesia.tar,                        uncompressed literals,              advanced streaming,                 5129450
+silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4320841
 silesia.tar,                        huffman literals,                   advanced streaming,                 5352306
-silesia.tar,                        multithreaded with advanced params, advanced streaming,                 5158130
+silesia.tar,                        multithreaded with advanced params, advanced streaming,                 5129544
 github,                             level -5,                           advanced streaming,                 205285
 github,                             level -5 with dict,                 advanced streaming,                 46718
 github,                             level -3,                           advanced streaming,                 190643
 github,                             level -3 with dict,                 advanced streaming,                 45395
 github,                             level -1,                           advanced streaming,                 175568
 github,                             level -1 with dict,                 advanced streaming,                 43170
-github,                             level 0,                            advanced streaming,                 136397
-github,                             level 0 with dict,                  advanced streaming,                 41170
+github,                             level 0,                            advanced streaming,                 136311
+github,                             level 0 with dict,                  advanced streaming,                 41148
 github,                             level 1,                            advanced streaming,                 142450
 github,                             level 1 with dict,                  advanced streaming,                 41682
-github,                             level 3,                            advanced streaming,                 136397
-github,                             level 3 with dict,                  advanced streaming,                 41170
+github,                             level 3,                            advanced streaming,                 136311
+github,                             level 3 with dict,                  advanced streaming,                 41148
 github,                             level 4,                            advanced streaming,                 136144
-github,                             level 4 with dict,                  advanced streaming,                 41306
+github,                             level 4 with dict,                  advanced streaming,                 41251
 github,                             level 5,                            advanced streaming,                 135106
 github,                             level 5 with dict,                  advanced streaming,                 38938
 github,                             level 6,                            advanced streaming,                 135108
@@ -434,33 +434,33 @@
 github,                             level 16 with dict,                 advanced streaming,                 37577
 github,                             level 19,                           advanced streaming,                 133717
 github,                             level 19 with dict,                 advanced streaming,                 37576
-github,                             no source size,                     advanced streaming,                 136397
-github,                             long distance mode,                 advanced streaming,                 136397
-github,                             multithreaded,                      advanced streaming,                 136397
-github,                             multithreaded long distance mode,   advanced streaming,                 136397
-github,                             small window log,                   advanced streaming,                 136397
+github,                             no source size,                     advanced streaming,                 136311
+github,                             long distance mode,                 advanced streaming,                 136311
+github,                             multithreaded,                      advanced streaming,                 136311
+github,                             multithreaded long distance mode,   advanced streaming,                 136311
+github,                             small window log,                   advanced streaming,                 136311
 github,                             small hash log,                     advanced streaming,                 135467
 github,                             small chain log,                    advanced streaming,                 136314
 github,                             explicit params,                    advanced streaming,                 137670
-github,                             uncompressed literals,              advanced streaming,                 167004
+github,                             uncompressed literals,              advanced streaming,                 165915
 github,                             uncompressed literals optimal,      advanced streaming,                 156824
 github,                             huffman literals,                   advanced streaming,                 142450
-github,                             multithreaded with advanced params, advanced streaming,                 167004
+github,                             multithreaded with advanced params, advanced streaming,                 165915
 silesia,                            level -5,                           old streaming,                      6882466
 silesia,                            level -3,                           old streaming,                      6568358
 silesia,                            level -1,                           old streaming,                      6183385
-silesia,                            level 0,                            old streaming,                      4862377
+silesia,                            level 0,                            old streaming,                      4849491
 silesia,                            level 1,                            old streaming,                      5314109
-silesia,                            level 3,                            old streaming,                      4862377
-silesia,                            level 4,                            old streaming,                      4800629
+silesia,                            level 3,                            old streaming,                      4849491
+silesia,                            level 4,                            old streaming,                      4786913
 silesia,                            level 5,                            old streaming,                      4710178
 silesia,                            level 6,                            old streaming,                      4659996
 silesia,                            level 7,                            old streaming,                      4596234
 silesia,                            level 9,                            old streaming,                      4543862
 silesia,                            level 13,                           old streaming,                      4482073
-silesia,                            level 16,                           old streaming,                      4377391
+silesia,                            level 16,                           old streaming,                      4377389
 silesia,                            level 19,                           old streaming,                      4293262
-silesia,                            no source size,                     old streaming,                      4862341
+silesia,                            no source size,                     old streaming,                      4849455
 silesia,                            long distance mode,                 old streaming,                      12000408
 silesia,                            multithreaded,                      old streaming,                      12000408
 silesia,                            multithreaded long distance mode,   old streaming,                      12000408
@@ -468,25 +468,25 @@
 silesia,                            small hash log,                     old streaming,                      12000408
 silesia,                            small chain log,                    old streaming,                      12000408
 silesia,                            explicit params,                    old streaming,                      12000408
-silesia,                            uncompressed literals,              old streaming,                      4862377
+silesia,                            uncompressed literals,              old streaming,                      4849491
 silesia,                            uncompressed literals optimal,      old streaming,                      4293262
 silesia,                            huffman literals,                   old streaming,                      6183385
 silesia,                            multithreaded with advanced params, old streaming,                      12000408
 silesia.tar,                        level -5,                           old streaming,                      6982738
 silesia.tar,                        level -3,                           old streaming,                      6641264
 silesia.tar,                        level -1,                           old streaming,                      6190789
-silesia.tar,                        level 0,                            old streaming,                      4875010
+silesia.tar,                        level 0,                            old streaming,                      4861376
 silesia.tar,                        level 1,                            old streaming,                      5336879
-silesia.tar,                        level 3,                            old streaming,                      4875010
-silesia.tar,                        level 4,                            old streaming,                      4813507
-silesia.tar,                        level 5,                            old streaming,                      4722240
-silesia.tar,                        level 6,                            old streaming,                      4672203
-silesia.tar,                        level 7,                            old streaming,                      4606658
-silesia.tar,                        level 9,                            old streaming,                      4554105
-silesia.tar,                        level 13,                           old streaming,                      4491703
-silesia.tar,                        level 16,                           old streaming,                      4381277
-silesia.tar,                        level 19,                           old streaming,                      4281514
-silesia.tar,                        no source size,                     old streaming,                      4875006
+silesia.tar,                        level 3,                            old streaming,                      4861376
+silesia.tar,                        level 4,                            old streaming,                      4799583
+silesia.tar,                        level 5,                            old streaming,                      4722276
+silesia.tar,                        level 6,                            old streaming,                      4672240
+silesia.tar,                        level 7,                            old streaming,                      4606657
+silesia.tar,                        level 9,                            old streaming,                      4554106
+silesia.tar,                        level 13,                           old streaming,                      4491707
+silesia.tar,                        level 16,                           old streaming,                      4381284
+silesia.tar,                        level 19,                           old streaming,                      4281511
+silesia.tar,                        no source size,                     old streaming,                      4861372
 silesia.tar,                        long distance mode,                 old streaming,                      12022046
 silesia.tar,                        multithreaded,                      old streaming,                      12022046
 silesia.tar,                        multithreaded long distance mode,   old streaming,                      12022046
@@ -494,8 +494,8 @@
 silesia.tar,                        small hash log,                     old streaming,                      12022046
 silesia.tar,                        small chain log,                    old streaming,                      12022046
 silesia.tar,                        explicit params,                    old streaming,                      12022046
-silesia.tar,                        uncompressed literals,              old streaming,                      4875010
-silesia.tar,                        uncompressed literals optimal,      old streaming,                      4281514
+silesia.tar,                        uncompressed literals,              old streaming,                      4861376
+silesia.tar,                        uncompressed literals optimal,      old streaming,                      4281511
 silesia.tar,                        huffman literals,                   old streaming,                      6190789
 silesia.tar,                        multithreaded with advanced params, old streaming,                      12022046
 github,                             level -5,                           old streaming,                      205285
@@ -504,14 +504,14 @@
 github,                             level -3 with dict,                 old streaming,                      45395
 github,                             level -1,                           old streaming,                      175568
 github,                             level -1 with dict,                 old streaming,                      43170
-github,                             level 0,                            old streaming,                      136397
-github,                             level 0 with dict,                  old streaming,                      41170
+github,                             level 0,                            old streaming,                      136311
+github,                             level 0 with dict,                  old streaming,                      41148
 github,                             level 1,                            old streaming,                      142450
 github,                             level 1 with dict,                  old streaming,                      41682
-github,                             level 3,                            old streaming,                      136397
-github,                             level 3 with dict,                  old streaming,                      41170
+github,                             level 3,                            old streaming,                      136311
+github,                             level 3 with dict,                  old streaming,                      41148
 github,                             level 4,                            old streaming,                      136144
-github,                             level 4 with dict,                  old streaming,                      41306
+github,                             level 4 with dict,                  old streaming,                      41251
 github,                             level 5,                            old streaming,                      135106
 github,                             level 5 with dict,                  old streaming,                      38938
 github,                             level 6,                            old streaming,                      135108
@@ -526,7 +526,7 @@
 github,                             level 16 with dict,                 old streaming,                      37577
 github,                             level 19,                           old streaming,                      133717
 github,                             level 19 with dict,                 old streaming,                      37576
-github,                             no source size,                     old streaming,                      141003
+github,                             no source size,                     old streaming,                      140631
 github,                             long distance mode,                 old streaming,                      412933
 github,                             multithreaded,                      old streaming,                      412933
 github,                             multithreaded long distance mode,   old streaming,                      412933
@@ -534,25 +534,25 @@
 github,                             small hash log,                     old streaming,                      412933
 github,                             small chain log,                    old streaming,                      412933
 github,                             explicit params,                    old streaming,                      412933
-github,                             uncompressed literals,              old streaming,                      136397
+github,                             uncompressed literals,              old streaming,                      136311
 github,                             uncompressed literals optimal,      old streaming,                      133717
 github,                             huffman literals,                   old streaming,                      175568
 github,                             multithreaded with advanced params, old streaming,                      412933
 silesia,                            level -5,                           old streaming advanced,             6882466
 silesia,                            level -3,                           old streaming advanced,             6568358
 silesia,                            level -1,                           old streaming advanced,             6183385
-silesia,                            level 0,                            old streaming advanced,             4862377
+silesia,                            level 0,                            old streaming advanced,             4849491
 silesia,                            level 1,                            old streaming advanced,             5314109
-silesia,                            level 3,                            old streaming advanced,             4862377
-silesia,                            level 4,                            old streaming advanced,             4800629
+silesia,                            level 3,                            old streaming advanced,             4849491
+silesia,                            level 4,                            old streaming advanced,             4786913
 silesia,                            level 5,                            old streaming advanced,             4710178
 silesia,                            level 6,                            old streaming advanced,             4659996
 silesia,                            level 7,                            old streaming advanced,             4596234
 silesia,                            level 9,                            old streaming advanced,             4543862
 silesia,                            level 13,                           old streaming advanced,             4482073
-silesia,                            level 16,                           old streaming advanced,             4377391
+silesia,                            level 16,                           old streaming advanced,             4377389
 silesia,                            level 19,                           old streaming advanced,             4293262
-silesia,                            no source size,                     old streaming advanced,             4862341
+silesia,                            no source size,                     old streaming advanced,             4849455
 silesia,                            long distance mode,                 old streaming advanced,             12000408
 silesia,                            multithreaded,                      old streaming advanced,             12000408
 silesia,                            multithreaded long distance mode,   old streaming advanced,             12000408
@@ -560,25 +560,25 @@
 silesia,                            small hash log,                     old streaming advanced,             12000408
 silesia,                            small chain log,                    old streaming advanced,             12000408
 silesia,                            explicit params,                    old streaming advanced,             12000408
-silesia,                            uncompressed literals,              old streaming advanced,             4862377
+silesia,                            uncompressed literals,              old streaming advanced,             4849491
 silesia,                            uncompressed literals optimal,      old streaming advanced,             4293262
 silesia,                            huffman literals,                   old streaming advanced,             6183385
 silesia,                            multithreaded with advanced params, old streaming advanced,             12000408
 silesia.tar,                        level -5,                           old streaming advanced,             6982738
 silesia.tar,                        level -3,                           old streaming advanced,             6641264
 silesia.tar,                        level -1,                           old streaming advanced,             6190789
-silesia.tar,                        level 0,                            old streaming advanced,             4875010
+silesia.tar,                        level 0,                            old streaming advanced,             4861376
 silesia.tar,                        level 1,                            old streaming advanced,             5336879
-silesia.tar,                        level 3,                            old streaming advanced,             4875010
-silesia.tar,                        level 4,                            old streaming advanced,             4813507
-silesia.tar,                        level 5,                            old streaming advanced,             4722240
-silesia.tar,                        level 6,                            old streaming advanced,             4672203
-silesia.tar,                        level 7,                            old streaming advanced,             4606658
-silesia.tar,                        level 9,                            old streaming advanced,             4554105
-silesia.tar,                        level 13,                           old streaming advanced,             4491703
-silesia.tar,                        level 16,                           old streaming advanced,             4381277
-silesia.tar,                        level 19,                           old streaming advanced,             4281514
-silesia.tar,                        no source size,                     old streaming advanced,             4875006
+silesia.tar,                        level 3,                            old streaming advanced,             4861376
+silesia.tar,                        level 4,                            old streaming advanced,             4799583
+silesia.tar,                        level 5,                            old streaming advanced,             4722276
+silesia.tar,                        level 6,                            old streaming advanced,             4672240
+silesia.tar,                        level 7,                            old streaming advanced,             4606657
+silesia.tar,                        level 9,                            old streaming advanced,             4554106
+silesia.tar,                        level 13,                           old streaming advanced,             4491707
+silesia.tar,                        level 16,                           old streaming advanced,             4381284
+silesia.tar,                        level 19,                           old streaming advanced,             4281511
+silesia.tar,                        no source size,                     old streaming advanced,             4861372
 silesia.tar,                        long distance mode,                 old streaming advanced,             12022046
 silesia.tar,                        multithreaded,                      old streaming advanced,             12022046
 silesia.tar,                        multithreaded long distance mode,   old streaming advanced,             12022046
@@ -586,8 +586,8 @@
 silesia.tar,                        small hash log,                     old streaming advanced,             12022046
 silesia.tar,                        small chain log,                    old streaming advanced,             12022046
 silesia.tar,                        explicit params,                    old streaming advanced,             12022046
-silesia.tar,                        uncompressed literals,              old streaming advanced,             4875010
-silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4281514
+silesia.tar,                        uncompressed literals,              old streaming advanced,             4861376
+silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4281511
 silesia.tar,                        huffman literals,                   old streaming advanced,             6190789
 silesia.tar,                        multithreaded with advanced params, old streaming advanced,             12022046
 github,                             level -5,                           old streaming advanced,             205285
@@ -596,14 +596,14 @@
 github,                             level -3 with dict,                 old streaming advanced,             45395
 github,                             level -1,                           old streaming advanced,             175568
 github,                             level -1 with dict,                 old streaming advanced,             43170
-github,                             level 0,                            old streaming advanced,             136397
-github,                             level 0 with dict,                  old streaming advanced,             41170
+github,                             level 0,                            old streaming advanced,             136311
+github,                             level 0 with dict,                  old streaming advanced,             41148
 github,                             level 1,                            old streaming advanced,             142450
 github,                             level 1 with dict,                  old streaming advanced,             41682
-github,                             level 3,                            old streaming advanced,             136397
-github,                             level 3 with dict,                  old streaming advanced,             41170
+github,                             level 3,                            old streaming advanced,             136311
+github,                             level 3 with dict,                  old streaming advanced,             41148
 github,                             level 4,                            old streaming advanced,             136144
-github,                             level 4 with dict,                  old streaming advanced,             41306
+github,                             level 4 with dict,                  old streaming advanced,             41251
 github,                             level 5,                            old streaming advanced,             135106
 github,                             level 5 with dict,                  old streaming advanced,             38938
 github,                             level 6,                            old streaming advanced,             135108
@@ -618,7 +618,7 @@
 github,                             level 16 with dict,                 old streaming advanced,             37577
 github,                             level 19,                           old streaming advanced,             133717
 github,                             level 19 with dict,                 old streaming advanced,             37576
-github,                             no source size,                     old streaming advanced,             141003
+github,                             no source size,                     old streaming advanced,             140631
 github,                             long distance mode,                 old streaming advanced,             412933
 github,                             multithreaded,                      old streaming advanced,             412933
 github,                             multithreaded long distance mode,   old streaming advanced,             412933
@@ -626,25 +626,25 @@
 github,                             small hash log,                     old streaming advanced,             412933
 github,                             small chain log,                    old streaming advanced,             412933
 github,                             explicit params,                    old streaming advanced,             412933
-github,                             uncompressed literals,              old streaming advanced,             136397
+github,                             uncompressed literals,              old streaming advanced,             136311
 github,                             uncompressed literals optimal,      old streaming advanced,             133717
 github,                             huffman literals,                   old streaming advanced,             175568
 github,                             multithreaded with advanced params, old streaming advanced,             412933
 silesia,                            level -5,                           old streaming cdcit,                6882466
 silesia,                            level -3,                           old streaming cdcit,                6568358
 silesia,                            level -1,                           old streaming cdcit,                6183385
-silesia,                            level 0,                            old streaming cdcit,                4862377
+silesia,                            level 0,                            old streaming cdcit,                4849491
 silesia,                            level 1,                            old streaming cdcit,                5314109
-silesia,                            level 3,                            old streaming cdcit,                4862377
-silesia,                            level 4,                            old streaming cdcit,                4800629
+silesia,                            level 3,                            old streaming cdcit,                4849491
+silesia,                            level 4,                            old streaming cdcit,                4786913
 silesia,                            level 5,                            old streaming cdcit,                4710178
 silesia,                            level 6,                            old streaming cdcit,                4659996
 silesia,                            level 7,                            old streaming cdcit,                4596234
 silesia,                            level 9,                            old streaming cdcit,                4543862
 silesia,                            level 13,                           old streaming cdcit,                4482073
-silesia,                            level 16,                           old streaming cdcit,                4377391
+silesia,                            level 16,                           old streaming cdcit,                4377389
 silesia,                            level 19,                           old streaming cdcit,                4293262
-silesia,                            no source size,                     old streaming cdcit,                4862341
+silesia,                            no source size,                     old streaming cdcit,                4849455
 silesia,                            long distance mode,                 old streaming cdcit,                12000408
 silesia,                            multithreaded,                      old streaming cdcit,                12000408
 silesia,                            multithreaded long distance mode,   old streaming cdcit,                12000408
@@ -652,25 +652,25 @@
 silesia,                            small hash log,                     old streaming cdcit,                12000408
 silesia,                            small chain log,                    old streaming cdcit,                12000408
 silesia,                            explicit params,                    old streaming cdcit,                12000408
-silesia,                            uncompressed literals,              old streaming cdcit,                4862377
+silesia,                            uncompressed literals,              old streaming cdcit,                4849491
 silesia,                            uncompressed literals optimal,      old streaming cdcit,                4293262
 silesia,                            huffman literals,                   old streaming cdcit,                6183385
 silesia,                            multithreaded with advanced params, old streaming cdcit,                12000408
 silesia.tar,                        level -5,                           old streaming cdcit,                6982738
 silesia.tar,                        level -3,                           old streaming cdcit,                6641264
 silesia.tar,                        level -1,                           old streaming cdcit,                6190789
-silesia.tar,                        level 0,                            old streaming cdcit,                4875010
+silesia.tar,                        level 0,                            old streaming cdcit,                4861376
 silesia.tar,                        level 1,                            old streaming cdcit,                5336879
-silesia.tar,                        level 3,                            old streaming cdcit,                4875010
-silesia.tar,                        level 4,                            old streaming cdcit,                4813507
-silesia.tar,                        level 5,                            old streaming cdcit,                4722240
-silesia.tar,                        level 6,                            old streaming cdcit,                4672203
-silesia.tar,                        level 7,                            old streaming cdcit,                4606658
-silesia.tar,                        level 9,                            old streaming cdcit,                4554105
-silesia.tar,                        level 13,                           old streaming cdcit,                4491703
-silesia.tar,                        level 16,                           old streaming cdcit,                4381277
-silesia.tar,                        level 19,                           old streaming cdcit,                4281514
-silesia.tar,                        no source size,                     old streaming cdcit,                4875006
+silesia.tar,                        level 3,                            old streaming cdcit,                4861376
+silesia.tar,                        level 4,                            old streaming cdcit,                4799583
+silesia.tar,                        level 5,                            old streaming cdcit,                4722276
+silesia.tar,                        level 6,                            old streaming cdcit,                4672240
+silesia.tar,                        level 7,                            old streaming cdcit,                4606657
+silesia.tar,                        level 9,                            old streaming cdcit,                4554106
+silesia.tar,                        level 13,                           old streaming cdcit,                4491707
+silesia.tar,                        level 16,                           old streaming cdcit,                4381284
+silesia.tar,                        level 19,                           old streaming cdcit,                4281511
+silesia.tar,                        no source size,                     old streaming cdcit,                4861372
 silesia.tar,                        long distance mode,                 old streaming cdcit,                12022046
 silesia.tar,                        multithreaded,                      old streaming cdcit,                12022046
 silesia.tar,                        multithreaded long distance mode,   old streaming cdcit,                12022046
@@ -678,8 +678,8 @@
 silesia.tar,                        small hash log,                     old streaming cdcit,                12022046
 silesia.tar,                        small chain log,                    old streaming cdcit,                12022046
 silesia.tar,                        explicit params,                    old streaming cdcit,                12022046
-silesia.tar,                        uncompressed literals,              old streaming cdcit,                4875010
-silesia.tar,                        uncompressed literals optimal,      old streaming cdcit,                4281514
+silesia.tar,                        uncompressed literals,              old streaming cdcit,                4861376
+silesia.tar,                        uncompressed literals optimal,      old streaming cdcit,                4281511
 silesia.tar,                        huffman literals,                   old streaming cdcit,                6190789
 silesia.tar,                        multithreaded with advanced params, old streaming cdcit,                12022046
 github,                             level -5,                           old streaming cdcit,                205285
@@ -688,14 +688,14 @@
 github,                             level -3 with dict,                 old streaming cdcit,                45395
 github,                             level -1,                           old streaming cdcit,                175568
 github,                             level -1 with dict,                 old streaming cdcit,                43170
-github,                             level 0,                            old streaming cdcit,                136397
-github,                             level 0 with dict,                  old streaming cdcit,                41170
+github,                             level 0,                            old streaming cdcit,                136311
+github,                             level 0 with dict,                  old streaming cdcit,                41148
 github,                             level 1,                            old streaming cdcit,                142450
 github,                             level 1 with dict,                  old streaming cdcit,                41682
-github,                             level 3,                            old streaming cdcit,                136397
-github,                             level 3 with dict,                  old streaming cdcit,                41170
+github,                             level 3,                            old streaming cdcit,                136311
+github,                             level 3 with dict,                  old streaming cdcit,                41148
 github,                             level 4,                            old streaming cdcit,                136144
-github,                             level 4 with dict,                  old streaming cdcit,                41306
+github,                             level 4 with dict,                  old streaming cdcit,                41251
 github,                             level 5,                            old streaming cdcit,                135106
 github,                             level 5 with dict,                  old streaming cdcit,                38938
 github,                             level 6,                            old streaming cdcit,                135108
@@ -710,7 +710,7 @@
 github,                             level 16 with dict,                 old streaming cdcit,                37577
 github,                             level 19,                           old streaming cdcit,                133717
 github,                             level 19 with dict,                 old streaming cdcit,                37576
-github,                             no source size,                     old streaming cdcit,                141003
+github,                             no source size,                     old streaming cdcit,                140631
 github,                             long distance mode,                 old streaming cdcit,                412933
 github,                             multithreaded,                      old streaming cdcit,                412933
 github,                             multithreaded long distance mode,   old streaming cdcit,                412933
@@ -718,25 +718,25 @@
 github,                             small hash log,                     old streaming cdcit,                412933
 github,                             small chain log,                    old streaming cdcit,                412933
 github,                             explicit params,                    old streaming cdcit,                412933
-github,                             uncompressed literals,              old streaming cdcit,                136397
+github,                             uncompressed literals,              old streaming cdcit,                136311
 github,                             uncompressed literals optimal,      old streaming cdcit,                133717
 github,                             huffman literals,                   old streaming cdcit,                175568
 github,                             multithreaded with advanced params, old streaming cdcit,                412933
 silesia,                            level -5,                           old streaming advanced cdict,       6882466
 silesia,                            level -3,                           old streaming advanced cdict,       6568358
 silesia,                            level -1,                           old streaming advanced cdict,       6183385
-silesia,                            level 0,                            old streaming advanced cdict,       4862377
+silesia,                            level 0,                            old streaming advanced cdict,       4849491
 silesia,                            level 1,                            old streaming advanced cdict,       5314109
-silesia,                            level 3,                            old streaming advanced cdict,       4862377
-silesia,                            level 4,                            old streaming advanced cdict,       4800629
+silesia,                            level 3,                            old streaming advanced cdict,       4849491
+silesia,                            level 4,                            old streaming advanced cdict,       4786913
 silesia,                            level 5,                            old streaming advanced cdict,       4710178
 silesia,                            level 6,                            old streaming advanced cdict,       4659996
 silesia,                            level 7,                            old streaming advanced cdict,       4596234
 silesia,                            level 9,                            old streaming advanced cdict,       4543862
 silesia,                            level 13,                           old streaming advanced cdict,       4482073
-silesia,                            level 16,                           old streaming advanced cdict,       4377391
+silesia,                            level 16,                           old streaming advanced cdict,       4377389
 silesia,                            level 19,                           old streaming advanced cdict,       4293262
-silesia,                            no source size,                     old streaming advanced cdict,       4862341
+silesia,                            no source size,                     old streaming advanced cdict,       4849455
 silesia,                            long distance mode,                 old streaming advanced cdict,       12000408
 silesia,                            multithreaded,                      old streaming advanced cdict,       12000408
 silesia,                            multithreaded long distance mode,   old streaming advanced cdict,       12000408
@@ -744,25 +744,25 @@
 silesia,                            small hash log,                     old streaming advanced cdict,       12000408
 silesia,                            small chain log,                    old streaming advanced cdict,       12000408
 silesia,                            explicit params,                    old streaming advanced cdict,       12000408
-silesia,                            uncompressed literals,              old streaming advanced cdict,       4862377
+silesia,                            uncompressed literals,              old streaming advanced cdict,       4849491
 silesia,                            uncompressed literals optimal,      old streaming advanced cdict,       4293262
 silesia,                            huffman literals,                   old streaming advanced cdict,       6183385
 silesia,                            multithreaded with advanced params, old streaming advanced cdict,       12000408
 silesia.tar,                        level -5,                           old streaming advanced cdict,       6982738
 silesia.tar,                        level -3,                           old streaming advanced cdict,       6641264
 silesia.tar,                        level -1,                           old streaming advanced cdict,       6190789
-silesia.tar,                        level 0,                            old streaming advanced cdict,       4875010
+silesia.tar,                        level 0,                            old streaming advanced cdict,       4861376
 silesia.tar,                        level 1,                            old streaming advanced cdict,       5336879
-silesia.tar,                        level 3,                            old streaming advanced cdict,       4875010
-silesia.tar,                        level 4,                            old streaming advanced cdict,       4813507
-silesia.tar,                        level 5,                            old streaming advanced cdict,       4722240
-silesia.tar,                        level 6,                            old streaming advanced cdict,       4672203
-silesia.tar,                        level 7,                            old streaming advanced cdict,       4606658
-silesia.tar,                        level 9,                            old streaming advanced cdict,       4554105
-silesia.tar,                        level 13,                           old streaming advanced cdict,       4491703
-silesia.tar,                        level 16,                           old streaming advanced cdict,       4381277
-silesia.tar,                        level 19,                           old streaming advanced cdict,       4281514
-silesia.tar,                        no source size,                     old streaming advanced cdict,       4875006
+silesia.tar,                        level 3,                            old streaming advanced cdict,       4861376
+silesia.tar,                        level 4,                            old streaming advanced cdict,       4799583
+silesia.tar,                        level 5,                            old streaming advanced cdict,       4722276
+silesia.tar,                        level 6,                            old streaming advanced cdict,       4672240
+silesia.tar,                        level 7,                            old streaming advanced cdict,       4606657
+silesia.tar,                        level 9,                            old streaming advanced cdict,       4554106
+silesia.tar,                        level 13,                           old streaming advanced cdict,       4491707
+silesia.tar,                        level 16,                           old streaming advanced cdict,       4381284
+silesia.tar,                        level 19,                           old streaming advanced cdict,       4281511
+silesia.tar,                        no source size,                     old streaming advanced cdict,       4861372
 silesia.tar,                        long distance mode,                 old streaming advanced cdict,       12022046
 silesia.tar,                        multithreaded,                      old streaming advanced cdict,       12022046
 silesia.tar,                        multithreaded long distance mode,   old streaming advanced cdict,       12022046
@@ -770,8 +770,8 @@
 silesia.tar,                        small hash log,                     old streaming advanced cdict,       12022046
 silesia.tar,                        small chain log,                    old streaming advanced cdict,       12022046
 silesia.tar,                        explicit params,                    old streaming advanced cdict,       12022046
-silesia.tar,                        uncompressed literals,              old streaming advanced cdict,       4875010
-silesia.tar,                        uncompressed literals optimal,      old streaming advanced cdict,       4281514
+silesia.tar,                        uncompressed literals,              old streaming advanced cdict,       4861376
+silesia.tar,                        uncompressed literals optimal,      old streaming advanced cdict,       4281511
 silesia.tar,                        huffman literals,                   old streaming advanced cdict,       6190789
 silesia.tar,                        multithreaded with advanced params, old streaming advanced cdict,       12022046
 github,                             level -5,                           old streaming advanced cdict,       205285
@@ -780,14 +780,14 @@
 github,                             level -3 with dict,                 old streaming advanced cdict,       45395
 github,                             level -1,                           old streaming advanced cdict,       175568
 github,                             level -1 with dict,                 old streaming advanced cdict,       43170
-github,                             level 0,                            old streaming advanced cdict,       136397
-github,                             level 0 with dict,                  old streaming advanced cdict,       41170
+github,                             level 0,                            old streaming advanced cdict,       136311
+github,                             level 0 with dict,                  old streaming advanced cdict,       41148
 github,                             level 1,                            old streaming advanced cdict,       142450
 github,                             level 1 with dict,                  old streaming advanced cdict,       41682
-github,                             level 3,                            old streaming advanced cdict,       136397
-github,                             level 3 with dict,                  old streaming advanced cdict,       41170
+github,                             level 3,                            old streaming advanced cdict,       136311
+github,                             level 3 with dict,                  old streaming advanced cdict,       41148
 github,                             level 4,                            old streaming advanced cdict,       136144
-github,                             level 4 with dict,                  old streaming advanced cdict,       41306
+github,                             level 4 with dict,                  old streaming advanced cdict,       41251
 github,                             level 5,                            old streaming advanced cdict,       135106
 github,                             level 5 with dict,                  old streaming advanced cdict,       38938
 github,                             level 6,                            old streaming advanced cdict,       135108
@@ -802,7 +802,7 @@
 github,                             level 16 with dict,                 old streaming advanced cdict,       37577
 github,                             level 19,                           old streaming advanced cdict,       133717
 github,                             level 19 with dict,                 old streaming advanced cdict,       37576
-github,                             no source size,                     old streaming advanced cdict,       141003
+github,                             no source size,                     old streaming advanced cdict,       140631
 github,                             long distance mode,                 old streaming advanced cdict,       412933
 github,                             multithreaded,                      old streaming advanced cdict,       412933
 github,                             multithreaded long distance mode,   old streaming advanced cdict,       412933
@@ -810,7 +810,7 @@
 github,                             small hash log,                     old streaming advanced cdict,       412933
 github,                             small chain log,                    old streaming advanced cdict,       412933
 github,                             explicit params,                    old streaming advanced cdict,       412933
-github,                             uncompressed literals,              old streaming advanced cdict,       136397
+github,                             uncompressed literals,              old streaming advanced cdict,       136311
 github,                             uncompressed literals optimal,      old streaming advanced cdict,       133717
 github,                             huffman literals,                   old streaming advanced cdict,       175568
 github,                             multithreaded with advanced params, old streaming advanced cdict,       412933
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 55c14ad..97d4e33 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1184,6 +1184,58 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    /* Small Sequence Section bug */
+    DISPLAYLEVEL(3, "test%3i : decompress blocks with small sequences section : ", testNb++);
+    {   /* This test consists of 3 blocks. Each block has one sequence.
+            The sequence has literal length of 10, match length of 10 and offset of 10.
+            The sequence value and compression mode for the blocks are following:
+            The order of values are ll, ml, of.
+              - First block  : (10, 7, 13) (rle, rle, rle)
+                 - size of sequences section: 6 bytes (1 byte for nbSeq, 1 byte for encoding mode, 3 bytes for rle, 1 byte bitstream)
+              - Second block : (10, 7, 1) (repeat, repeat, rle)
+                 - size of sequences section: 4 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 bytes for rle, 1 byte bitstream)
+              - Third block  : (10, 7, 1) (repeat, repeat, repeat)
+                 - size of sequences section: 3 bytes (1 byte for nbSeq, 1 byte for encoding mode, 1 byte bitstream) */
+
+        unsigned char compressed[] = {
+            0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x3c, 0x35, 0x01, 0x00, 0xf0, 0x85, 0x08,
+            0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
+            0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac,
+            0x69, 0x94, 0x89, 0x1c, 0x03, 0x44, 0x0a, 0x07, 0x00, 0xb4, 0x04, 0x80,
+            0x40, 0x0a, 0xa4
+        };
+        unsigned int compressedSize = 51;
+        unsigned char decompressed[] = {
+            0x85, 0x08, 0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x85, 0x08,
+            0xc2, 0xc4, 0x70, 0xcf, 0xd7, 0xc0, 0x96, 0x7e, 0x4c, 0x6b, 0xa9, 0x8b,
+            0xbc, 0xc5, 0xb6, 0xd9, 0x7f, 0x4c, 0x4c, 0x6b, 0xa9, 0x8b, 0xbc, 0xc5,
+            0xb6, 0xd9, 0x7f, 0x4c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94,
+            0x89, 0x1c, 0xf1, 0x05, 0xa6, 0x54, 0xef, 0xac, 0x69, 0x94, 0x89, 0x1c
+        };
+        unsigned int decompressedSize = 60;
+
+        ZSTD_DStream* const zds = ZSTD_createDStream();
+        if (zds==NULL) goto _output_error;
+
+        CHECK_Z( ZSTD_initDStream(zds) );
+        inBuff.src = compressed;
+        inBuff.size = compressedSize;
+        inBuff.pos = 0;
+        outBuff.dst = decodedBuffer;
+        outBuff.size = CNBufferSize;
+        outBuff.pos = 0;
+
+        CHECK(ZSTD_decompressStream(zds, &outBuff, &inBuff) != 0,
+              "Decompress did not reach the end of frame");
+        CHECK(inBuff.pos != inBuff.size, "Decompress did not fully consume input");
+        CHECK(outBuff.pos != decompressedSize, "Decompressed size does not match");
+        CHECK(memcmp(outBuff.dst, decompressed, decompressedSize) != 0,
+              "Decompressed data does not match");
+
+        ZSTD_freeDStream(zds);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : dictionary + uncompressible block + reusing tables checks offset table validity: ", testNb++);
     {   ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(
             dictionary.start, dictionary.filled,
diff --git a/zlibWrapper/.gitignore b/zlibWrapper/.gitignore
index 6167ca4..b037ae6 100644
--- a/zlibWrapper/.gitignore
+++ b/zlibWrapper/.gitignore
@@ -1,11 +1,14 @@
+# object artifacts
+*.o
+
 # Default result files
 _*
-example.*
+example
 example_zstd.*
 example_gz.*
-fitblk.*
+fitblk
 fitblk_zstd.*
-zwrapbench.*
+zwrapbench
 foo.gz
 
 minigzip