Zstd 1.5.0 Release

Zstd 1.5.0 Release
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4529743..c634737 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -36,33 +36,6 @@
             make armbuild   V=1; make clean
             make -C tests test-legacy test-longmatch; make clean
             make -C lib libzstd-nomt; make clean
-  # This step is only run on release tags.
-  # It publishes the source tarball as artifacts and if the GITHUB_TOKEN
-  # environment variable is set it will publish the source tarball to the
-  # tagged release.
-  publish-github-release:
-    docker:
-      - image: fbopensource/zstd-circleci-primary:0.0.1
-    environment:
-      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
-    steps:
-      - checkout
-      - run:
-          name: Publish
-          command: |
-            export VERSION=$(echo $CIRCLE_TAG | tail -c +2)
-            export ZSTD_VERSION=zstd-$VERSION
-            git archive $CIRCLE_TAG --prefix $ZSTD_VERSION/ --format tar \
-                        -o $ZSTD_VERSION.tar
-            sha256sum $ZSTD_VERSION.tar > $ZSTD_VERSION.tar.sha256
-            zstd -19 $ZSTD_VERSION.tar
-            sha256sum $ZSTD_VERSION.tar.zst > $ZSTD_VERSION.tar.zst.sha256
-            gzip -k -9 $ZSTD_VERSION.tar
-            sha256sum $ZSTD_VERSION.tar.gz > $ZSTD_VERSION.tar.gz.sha256
-            mkdir -p $CIRCLE_ARTIFACTS
-            cp $ZSTD_VERSION.tar* $CIRCLE_ARTIFACTS
-      - store_artifacts:
-          path: /tmp/circleci-artifacts
   # This step should only be run in a cron job
   regression-test:
     docker:
diff --git a/.github/workflows/generic-dev.yml b/.github/workflows/generic-dev.yml
index d6e9b44..ae4ee7e 100644
--- a/.github/workflows/generic-dev.yml
+++ b/.github/workflows/generic-dev.yml
@@ -30,6 +30,9 @@
 
   test:
     runs-on: ubuntu-latest
+    env:
+      DEVNULLRIGHTS: 1
+      READFROMBLOCKDEVICE: 1
     steps:
     - uses: actions/checkout@v2
     - name: make test
@@ -55,14 +58,12 @@
         CC=gcc-7 CFLAGS=-Werror make -j all
         make clean
         LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
-        make -C tests zbufftest-dll
 
     # candidate test (to check) : underlink test
     # LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
-    # zbufftest-dll : test that a user program can link to multi-threaded libzstd without specifying -pthread
 
   gcc-8-asan-ubsan-testzstd:
-    runs-on: ubuntu-16.04 # fails on 18.04
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
     - name: gcc-8 + ASan + UBSan + Test Zstd
@@ -126,6 +127,16 @@
         make libc6install
         CFLAGS="-O2 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
 
+  clang-msan-fuzz:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: clang + MSan + Fuzz Test
+      run: |
+        sudo apt-get update
+        sudo apt-get install clang
+        CC=clang FUZZER_FLAGS="--long-tests" make clean msan-fuzztest
+
   asan-ubsan-msan-regression:
     runs-on: ubuntu-latest
     steps:
diff --git a/.github/workflows/generic-release.yml b/.github/workflows/generic-release.yml
index 1ee26f3..cb91fb2 100644
--- a/.github/workflows/generic-release.yml
+++ b/.github/workflows/generic-release.yml
@@ -25,14 +25,6 @@
         make test
         # make -c lib all (need to fix. not working right now)
 
-  zbuff:
-    runs-on: ubuntu-16.04
-    steps:
-    - uses: actions/checkout@v2
-    - name: zbuff test
-      run: |
-        make -C tests test-zbuff
-
   tsan:
     runs-on: ubuntu-latest
     steps:
diff --git a/.github/workflows/publish-release-artifacts.yml b/.github/workflows/publish-release-artifacts.yml
new file mode 100644
index 0000000..952cb26
--- /dev/null
+++ b/.github/workflows/publish-release-artifacts.yml
@@ -0,0 +1,68 @@
+name: publish-release-artifacts
+
+on:
+  release:
+    types:
+      - created
+
+jobs:
+  publish-release-artifacts:
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/')
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Archive
+        env:
+          RELEASE_SIGNING_KEY: ${{ secrets.RELEASE_SIGNING_KEY }}
+          RELEASE_SIGNING_KEY_PASSPHRASE: ${{ secrets.RELEASE_SIGNING_KEY_PASSPHRASE }}
+        run: |
+          # compute file name
+          export TAG="$(echo "$GITHUB_REF" | sed -n 's_^refs/tags/__p')"
+          if [ -z "$TAG" ]; then
+            echo "action must be run on a tag. GITHUB_REF is not a tag: $GITHUB_REF"
+            exit 1
+          fi
+          # Attempt to extract "1.2.3" from "v1.2.3" to maintain artifact name backwards compat.
+          # Otherwise, degrade to using full tag.
+          export VERSION="$(echo "$TAG" | sed 's_^v\([0-9]\+\.[0-9]\+\.[0-9]\+\)$_\1_')"
+          export ZSTD_VERSION="zstd-$VERSION"
+
+          # archive
+          git archive $TAG \
+              --prefix $ZSTD_VERSION/ \
+              --format tar \
+              -o $ZSTD_VERSION.tar
+
+          # Do the rest of the work in a sub-dir so we can glob everything we want to publish.
+          mkdir artifacts/
+          mv $ZSTD_VERSION.tar artifacts/
+          cd artifacts/
+
+          # compress
+          zstd -k -19 $ZSTD_VERSION.tar
+          gzip -k  -9 $ZSTD_VERSION.tar
+
+          # we only publish the compressed tarballs
+          rm $ZSTD_VERSION.tar
+
+          # hash
+          sha256sum $ZSTD_VERSION.tar.zst > $ZSTD_VERSION.tar.zst.sha256
+          sha256sum $ZSTD_VERSION.tar.gz  > $ZSTD_VERSION.tar.gz.sha256
+
+          # sign
+          if [ -n "$RELEASE_SIGNING_KEY" ]; then
+            export GPG_BATCH_OPTS="--batch --no-use-agent --pinentry-mode loopback --no-tty --yes"
+            echo "$RELEASE_SIGNING_KEY" | gpg $GPG_BATCH_OPTS --import
+            gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.zst.sig $ZSTD_VERSION.tar.zst
+            gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.gz.sig  $ZSTD_VERSION.tar.gz
+          fi
+
+      - name: Publish
+        uses: skx/github-action-publish-binaries@release-1.3
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          args: artifacts/*
diff --git a/.gitignore b/.gitignore
index ae277e9..ea574d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,5 @@
 *.code-workspace
 compile_commands.json
 .clangd
+perf.data
+perf.data.old
diff --git a/.travis.yml b/.travis.yml
index 7fcae88..b0e7048 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -125,7 +125,7 @@
     # meson dedicated test
     - name: Xenial (Meson + clang)    # ~15mn
       if: branch = release
-      dist: xenial
+      dist: bionic
       language: cpp
       compiler: clang
       install:
diff --git a/CHANGELOG b/CHANGELOG
index 9f3c8f7..3b15165 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,59 @@
+v1.5.0  (May 11, 2021)
+api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42)
+  `ZSTD_defaultCLevel()`
+  `ZSTD_getDictID_fromCDict()`
+api: Several experimental functions have been deprecated and will emit a compiler warning (#2582, @senhuang42)
+  `ZSTD_compress_advanced()`
+  `ZSTD_compress_usingCDict_advanced()`
+  `ZSTD_compressBegin_advanced()`
+  `ZSTD_compressBegin_usingCDict_advanced()`
+  `ZSTD_initCStream_srcSize()`
+  `ZSTD_initCStream_usingDict()`
+  `ZSTD_initCStream_usingCDict()`
+  `ZSTD_initCStream_advanced()`
+  `ZSTD_initCStream_usingCDict_advanced()`
+  `ZSTD_resetCStream()`
+api: ZSTDMT_NBWORKERS_MAX reduced to 64 for 32-bit environments (@Cyan4973)
+perf: Significant speed improvements for middle compression levels (#2494, @senhuang42 @terrelln)
+perf: Block splitter to improve compression ratio, enabled by default for high compression levels (#2447, @senhuang42)
+perf: Decompression loop refactor, speed improvements on `clang` and for `--long` modes (#2614 #2630, @Cyan4973)
+perf: Reduced stack usage during compression and decompression entropy stage (#2522 #2524, @terrelln)
+bug: Improve setting permissions of created files (#2525, @felixhandte)
+bug: Fix large dictionary non-determinism (#2607, @terrelln)
+bug: Fix non-determinism test failures on Linux i686 (#2606, @terrelln)
+bug: Fix various dedicated dictionary search bugs (#2540 #2586, @senhuang42 @felixhandte)
+bug: Ensure `ZSTD_estimateCCtxSize*() `monotonically increases with compression level (#2538, @senhuang42)
+bug: Fix --patch-from mode parameter bound bug with small files (#2637, @occivink)
+bug: Fix UBSAN error in decompression (#2625, @terrelln)
+bug: Fix superblock compression divide by zero bug (#2592, @senhuang42)
+bug: Make the number of physical CPU cores detection more robust (#2517, @PaulBone)
+doc: Improve `zdict.h` dictionary training API documentation (#2622, @terrelln)
+doc: Note that public `ZSTD_free*()` functions accept NULL pointers (#2521, @animalize)
+doc: Add style guide docs for open source contributors (#2626, @Cyan4973)
+tests: Better regression test coverage for different dictionary modes (#2559, @senhuang42)
+tests: Better test coverage of index reduction (#2603, @terrelln)
+tests: OSS-Fuzz coverage for seekable format (#2617, @senhuang42)
+tests: Test coverage for ZSTD threadpool API (#2604, @senhuang42)
+build: Dynamic library built multithreaded by default (#2584, @senhuang42)
+build: Move  `zstd_errors.h`  and  `zdict.h`  to  `lib/`  root (#2597, @terrelln)
+build: Allow `ZSTDMT_JOBSIZE_MIN` to be configured at compile-time, reduce default to 512KB (#2611, @Cyan4973)
+build: Single file library build script moved to `build/` directory (#2618, @felixhandte)
+build: `ZBUFF_*()` is no longer built by default (#2583, @senhuang42)
+build: Fixed Meson build (#2548, @SupervisedThinking @kloczek)
+build: Fix excessive compiler warnings with clang-cl and CMake (#2600, @nickhutchinson)
+build: Detect presence of `md5` on Darwin (#2609, @felixhandte)
+build: Avoid SIGBUS on armv6 (#2633, @bmwiedmann)
+cli: `--progress` flag added to always display progress bar (#2595, @senhuang42)
+cli: Allow reading from block devices with `--force` (#2613, @felixhandte)
+cli: Fix CLI filesize display bug (#2550, @Cyan4973)
+cli: Fix windows CLI `--filelist` end-of-line bug (#2620, @Cyan4973)
+contrib: Various fixes for linux kernel patch (#2539, @terrelln)
+contrib: Seekable format - Decompression hanging edge case fix (#2516, @senhuang42)
+contrib: Seekable format - New seek table-only API  (#2113 #2518, @mdittmer @Cyan4973)
+contrib: Seekable format - Fix seek table descriptor check when loading (#2534, @foxeng)
+contrib: Seekable format - Decompression fix for large offsets, (#2594, @azat)
+misc: Automatically published release tarballs available on Github (#2535, @felixhandte)
+
 v1.4.9  (Mar 1, 2021)
 bug: Use `umask()` to Constrain Created File Permissions (#2495, @felixhandte)
 bug: Make Simple Single-Pass Functions Ignore Advanced Parameters (#2498, @terrelln)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 44f2393..5effa26 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -399,7 +399,105 @@
 outlined on that page and do not file a public issue.
 
 ## Coding Style
+It's a pretty long topic, which is difficult to summarize in a single paragraph.
+As a rule of thumbs, try to imitate the coding style of
+similar lines of codes around your contribution.
+The following is a non-exhaustive list of rules employed in zstd code base:
+
+### C90
+This code base is following strict C90 standard,
+with 2 extensions : 64-bit `long long` types, and variadic macros.
+This rule is applied strictly to code within `lib/` and `programs/`.
+Sub-project in `contrib/` are allowed to use other conventions.
+
+### C++ direct compatibility : symbol mangling
+All public symbol declarations must be wrapped in `extern “C” { … }`,
+so that this project can be compiled as C++98 code,
+and linked into C++ applications.
+
+### Minimal Frugal
+This design requirement is fundamental to preserve the portability of the code base.
+#### Dependencies
+- Reduce dependencies to the minimum possible level.
+  Any dependency should be considered “bad” by default,
+  and only tolerated because it provides a service in a better way than can be achieved locally.
+  The only external dependencies this repository tolerates are
+  standard C libraries, and in rare cases, system level headers.
+- Within `lib/`, this policy is even more drastic.
+  The only external dependencies allowed are `<assert.h>`, `<stdlib.h>`, `<string.h>`,
+  and even then, not directly.
+  In particular, no function shall ever allocate on heap directly,
+  and must use instead `ZSTD_malloc()` and equivalent.
+  Other accepted non-symbol headers are `<stddef.h>` and `<limits.h>`.
+- Within the project, there is a strict hierarchy of dependencies that must be respected.
+  `programs/` is allowed to depend on `lib/`, but only its public API.
+  Within `lib/`, `lib/common` doesn't depend on any other directory.
+  `lib/compress` and `lib/decompress` shall not depend on each other.
+  `lib/dictBuilder` can depend on `lib/common` and `lib/compress`, but not `lib/decompress`.
+#### Resources
+- Functions in `lib/` must use very little stack space,
+  several dozens of bytes max.
+  Everything larger must use the heap allocator,
+  or require a scratch buffer to be emplaced manually.
+
+### Naming
+* All public symbols are prefixed with `ZSTD_`
+  + private symbols, with a scope limited to their own unit, are free of this restriction.
+    However, since `libzstd` source code can be amalgamated,
+    each symbol name must attempt to be (and remain) unique.
+    Avoid too generic names that could become ground for future collisions.
+    This generally implies usage of some form of prefix.
+* For symbols (functions and variables), naming convention is `PREFIX_camelCase`.
+  + In some advanced cases, one can also find :
+    - `PREFIX_prefix2_camelCase`
+    - `PREFIX_camelCase_extendedQualifier`
+* Multi-words names generally consist of an action followed by object:
+  - for example : `ZSTD_createCCtx()`
+* Prefer positive actions
+  - `goBackward` rather than `notGoForward`
+* Type names (`struct`, etc.) follow similar convention,
+  except that they are allowed and even invited to start by an Uppercase letter.
+  Example : `ZSTD_CCtx`, `ZSTD_CDict`
+* Macro names are all Capital letters.
+  The same composition rules (`PREFIX_NAME_QUALIFIER`) apply.
+* File names are all lowercase letters.
+  The convention is `snake_case`.
+  File names **must** be unique across the entire code base,
+  even when they stand in clearly separated directories.
+
+### Qualifiers
+* This code base is `const` friendly, if not `const` fanatical.
+  Any variable that can be `const` (aka. read-only) **must** be `const`.
+  Any pointer which content will not be modified must be `const`.
+  This property is then controlled at compiler level.
+  `const` variables are an important signal to readers that this variable isn’t modified.
+  Conversely, non-const variables are a signal to readers to watch out for modifications later on in the function.
+* If a function must be inlined, mention it explicitly,
+  using project's own portable macros, such as `FORCE_INLINE_ATTR`,
+  defined in `lib/common/compiler.h`.
+
+### Debugging
+* **Assertions** are welcome, and should be used very liberally,
+  to control any condition the code expects for its correct execution.
+  These assertion checks will be run in debug builds, and disabled in production.
+* For traces, this project provides its own debug macros,
+  in particular `DEBUGLOG(level, ...)`, defined in `lib/common/debug.h`.
+
+### Code documentation
+* Avoid code documentation that merely repeats what the code is already stating.
+  Whenever applicable, prefer employing the code as the primary way to convey explanations.
+  Example 1 : `int nbTokens = n;` instead of `int i = n; /* i is a nb of tokens *./`.
+  Example 2 : `assert(size > 0);` instead of `/* here, size should be positive */`.
+* At declaration level, the documentation explains how to use the function or variable
+  and when applicable why it's needed, of the scenarios where it can be useful.
+* At implementation level, the documentation explains the general outline of the algorithm employed,
+  and when applicable why this specific choice was preferred.
+
+### General layout
 * 4 spaces for indentation rather than tabs
+* Code documentation shall directly precede function declaration or implementation
+* Function implementations and its code documentation should be preceded and followed by an empty line
+
 
 ## License
 By contributing to Zstandard, you agree that your contributions will be licensed
diff --git a/Makefile b/Makefile
index 8522d9d..c1908f0 100644
--- a/Makefile
+++ b/Makefile
@@ -57,8 +57,8 @@
 	$(MAKE) -C $(PRGDIR) zstd32
 	$(MAKE) -C $(TESTDIR) all32
 
-.PHONY: lib lib-release
-lib lib-release :
+.PHONY: lib lib-release lib-mt lib-nomt
+lib lib-release lib-mt lib-nomt:
 	$(Q)$(MAKE) -C $(ZSTDDIR) $@
 
 .PHONY: zstd zstd-release
@@ -122,8 +122,8 @@
 	$(MAKE) -C contrib/seekable_format/examples all
 	$(MAKE) -C contrib/seekable_format/tests test
 	$(MAKE) -C contrib/largeNbDicts all
-	cd contrib/single_file_libs/ ; ./build_decoder_test.sh
-	cd contrib/single_file_libs/ ; ./build_library_test.sh
+	cd build/single_file_libs/ ; ./build_decoder_test.sh
+	cd build/single_file_libs/ ; ./build_library_test.sh
 
 .PHONY: cleanTabs
 cleanTabs:
@@ -151,7 +151,6 @@
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku))
 
 HOST_OS = POSIX
-CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
 
 HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 EGREP_OPTIONS ?=
@@ -179,7 +178,7 @@
 	    done \
 	} | column -t -s $$'\t'
 
-.PHONY: install armtest usan asan uasan
+.PHONY: install armtest usan asan uasan msan asan32
 install:
 	$(Q)$(MAKE) -C $(ZSTDDIR) $@
 	$(Q)$(MAKE) -C $(PRGDIR) $@
@@ -193,22 +192,19 @@
 travis-install:
 	$(MAKE) install PREFIX=~/install_test_dir
 
-.PHONY: gcc5build
+.PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build
 gcc5build: clean
 	gcc-5 -v
 	CC=gcc-5 $(MAKE) all MOREFLAGS="-Werror"
 
-.PHONY: gcc6build
 gcc6build: clean
 	gcc-6 -v
 	CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror"
 
-.PHONY: gcc7build
 gcc7build: clean
 	gcc-7 -v
 	CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror"
 
-.PHONY: clangbuild
 clangbuild: clean
 	clang -v
 	CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" $(MAKE) all
@@ -229,6 +225,7 @@
 ppc64build: clean
 	CC=powerpc-linux-gnu-gcc CFLAGS="-m64 -Werror" $(MAKE) -j allzstd
 
+.PHONY: armfuzz aarch64fuzz ppcfuzz ppc64fuzz
 armfuzz: clean
 	CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
 
@@ -242,7 +239,7 @@
 ppc64fuzz: clean
 	CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
 
-.PHONY: cxxtest
+.PHONY: cxxtest gcc5test gcc6test armtest aarch64test ppctest ppc64test
 cxxtest: CXXFLAGS += -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
 cxxtest: clean
 	$(MAKE) -C $(PRGDIR) all CC="$(CXX) -Wno-deprecated" CFLAGS="$(CXXFLAGS)"   # adding -Wno-deprecated to avoid clang++ warning on dealing with C files directly
@@ -271,6 +268,7 @@
 	$(MAKE) -C $(TESTDIR) datagen   # use native, faster
 	$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests
 
+.PHONY: arm-ppc-compilation
 arm-ppc-compilation:
 	$(MAKE) -C $(PRGDIR) clean zstd CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
 	$(MAKE) -C $(PRGDIR) clean zstd CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
@@ -288,7 +286,6 @@
 
 # run UBsan with -fsanitize-recover=pointer-overflow
 # this only works with recent compilers such as gcc 8+
-
 usan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=undefined -Werror"
 
@@ -316,13 +313,16 @@
 tsan-%: clean
 	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS=--no-big-tests
 
+.PHONY: apt-install
 apt-install:
 	sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install $(APT_PACKAGES)
 
+.PHONY: apt-add-repo
 apt-add-repo:
 	sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
 	sudo apt-get update -y -qq
 
+.PHONY: ppcinstall arminstall valgrindinstall libc6install gcc6install gcc7install gcc8install gpp6install clang38install lz4install
 ppcinstall:
 	APT_PACKAGES="qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu" $(MAKE) apt-install
 
@@ -357,16 +357,18 @@
 endif
 
 
+CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+
 ifneq (,$(filter MSYS%,$(shell uname)))
 HOST_OS = MSYS
 CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
 endif
 
-
 #------------------------------------------------------------------------
 # target specific tests
 #------------------------------------------------------------------------
 ifneq (,$(filter $(HOST_OS),MSYS POSIX))
+.PHONY: cmakebuild c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
 cmakebuild:
 	cmake --version
 	$(RM) -r $(BUILDIR)/cmake/build
diff --git a/TESTING.md b/TESTING.md
index b851d1c..32b133b 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -40,5 +40,4 @@
 - Versions test (ensuring `zstd` can decode files from all previous versions)
 - `pzstd` with asan and tsan, as well as in 32-bits mode
 - Testing `zstd` with legacy mode off
-- Testing `zbuff` (old streaming API)
 - Entire test suite and make install on macOS
diff --git a/appveyor.yml b/appveyor.yml
index 1721593..c6ab786 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -52,6 +52,15 @@
       PLATFORM: "Win32"
       CONFIGURATION: "Release"
 
+    - COMPILER: "clang-cl"
+      HOST:     "cmake-visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+      CMAKE_GENERATOR: "Visual Studio 15 2017"
+      CMAKE_GENERATOR_PLATFORM: "x64"
+      CMAKE_GENERATOR_TOOLSET: "LLVM"
+      APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
+
   install:
   - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
   - SET PATH_ORIGINAL=%PATH%
@@ -154,6 +163,15 @@
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
     )
+  - if [%HOST%]==[cmake-visual] (
+      ECHO *** &&
+      ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
+      PUSHD build\cmake &&
+      cmake -DBUILD_TESTING=ON . &&
+      cmake --build . --config %CONFIGURATION% -j4 &&
+      POPD &&
+      ECHO ***
+    )
 
   test_script:
   - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
@@ -223,13 +241,21 @@
       PLATFORM: "Win32"
       CONFIGURATION: "Release"
 
+    - COMPILER: "clang-cl"
+      HOST:     "cmake-visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+      CMAKE_GENERATOR: "Visual Studio 15 2017"
+      CMAKE_GENERATOR_PLATFORM: "x64"
+      CMAKE_GENERATOR_TOOLSET: "LLVM"
+      APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
+
   install:
   - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
   - SET PATH_ORIGINAL=%PATH%
   - if [%HOST%]==[cygwin] (
       ECHO Installing Cygwin Packages &&
       C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^
-        gcc-g++,^
         gcc,^
         cmake,^
         make
@@ -252,8 +278,8 @@
       C:\cygwin64\bin\bash --login -c "
         set -e;
         cd build/cmake;
-        CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_ZSTREAM_FLAGS=-T30s .;
-        make -j4;
+        CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T20s -DZSTD_ZSTREAM_FLAGS=-T20s -DZSTD_FULLBENCH_FLAGS=-i0 .;
+        make VERBOSE=1 -j;
         ctest -V -L Medium;
       "
     )
@@ -281,6 +307,15 @@
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
       COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
     )
+  - if [%HOST%]==[cmake-visual] (
+      ECHO *** &&
+      ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
+      PUSHD build\cmake &&
+      cmake -DBUILD_TESTING=ON . &&
+      cmake --build . --config %CONFIGURATION% -j4 &&
+      POPD &&
+      ECHO ***
+    )
 
 
   test_script:
diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj
index 5752643..5e349dc 100644
--- a/build/VS2008/fullbench/fullbench.vcproj
+++ b/build/VS2008/fullbench/fullbench.vcproj
@@ -463,7 +463,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj
index d48bc0f..32f2846 100644
--- a/build/VS2008/fuzzer/fuzzer.vcproj
+++ b/build/VS2008/fuzzer/fuzzer.vcproj
@@ -483,7 +483,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -511,7 +511,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index 6f26529..c7eec57 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -559,7 +559,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
@@ -575,7 +575,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj
index 5eb49f9..88c1aee 100644
--- a/build/VS2008/zstdlib/zstdlib.vcproj
+++ b/build/VS2008/zstdlib/zstdlib.vcproj
@@ -495,7 +495,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -523,7 +523,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
index 20932fa..2e0a042 100644
--- a/build/VS2010/fullbench/fullbench.vcxproj
+++ b/build/VS2010/fullbench/fullbench.vcxproj
@@ -190,7 +190,7 @@
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\common\pool.h" />
     <ClInclude Include="..\..\..\lib\common\threading.h" />
diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
index 8427572..91974ec 100644
--- a/build/VS2010/fuzzer/fuzzer.vcxproj
+++ b/build/VS2010/fuzzer/fuzzer.vcxproj
@@ -196,7 +196,7 @@
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
@@ -211,7 +211,7 @@
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
     <ClInclude Include="..\..\..\lib\decompress\zstd_ddict.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
+    <ClInclude Include="..\..\..\lib\zdict.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\cover.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\programs\datagen.h" />
diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
index 0957d41..a0aa897 100644
--- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
@@ -44,9 +44,6 @@
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress_block.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_ddict.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\fastcover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
@@ -64,12 +61,11 @@
     <ClInclude Include="..\..\..\lib\common\threading.h" />
     <ClInclude Include="..\..\..\lib\common\bitstream.h" />
     <ClInclude Include="..\..\..\lib\common\error_private.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\mem.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
-    <ClInclude Include="..\..\..\lib\deprecated\zbuff.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj
index 2034293..17c08d7 100644
--- a/build/VS2010/libzstd/libzstd.vcxproj
+++ b/build/VS2010/libzstd/libzstd.vcxproj
@@ -44,9 +44,6 @@
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress_block.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_ddict.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
-    <ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\fastcover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
@@ -64,12 +61,11 @@
     <ClInclude Include="..\..\..\lib\common\threading.h" />
     <ClInclude Include="..\..\..\lib\common\bitstream.h" />
     <ClInclude Include="..\..\..\lib\common\error_private.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\common\mem.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
-    <ClInclude Include="..\..\..\lib\deprecated\zbuff.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index e389f96..46e22f4 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -70,14 +70,14 @@
     <ClInclude Include="..\..\..\lib\common\threading.h" />
     <ClInclude Include="..\..\..\lib\common\xxhash.h" />
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
-    <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
+    <ClInclude Include="..\..\..\lib\zdict.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\cover.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
     <ClInclude Include="..\..\..\lib\common\fse.h" />
     <ClInclude Include="..\..\..\lib\common\huf.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\common\zstd_internal.h" />
-    <ClInclude Include="..\..\..\lib\common\zstd_errors.h" />
+    <ClInclude Include="..\..\..\lib\zstd_errors.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress_literals.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_compress_sequences.h" />
diff --git a/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake b/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake
index 6238971..e23b9d6 100644
--- a/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake
+++ b/build/cmake/CMakeModules/AddZstdCompilationFlags.cmake
@@ -26,7 +26,12 @@
         EnableCompilerFlag("-std=c++11" false true)
         #Set c99 by default
         EnableCompilerFlag("-std=c99" true false)
-        EnableCompilerFlag("-Wall" true true)
+        if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND MSVC)
+            # clang-cl normally maps -Wall to -Weverything.
+            EnableCompilerFlag("/clang:-Wall" true true)
+        else ()
+            EnableCompilerFlag("-Wall" true true)
+        endif ()
         EnableCompilerFlag("-Wextra" true true)
         EnableCompilerFlag("-Wundef" true true)
         EnableCompilerFlag("-Wshadow" true true)
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 179c976..5f75665 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -24,28 +24,24 @@
 file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c)
 file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
 file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c)
-file(GLOB DeprecatedSources ${LIBRARY_DIR}/deprecated/*.c)
 
 set(Sources
         ${CommonSources}
         ${CompressSources}
         ${DecompressSources}
-        ${DictBuilderSources}
-        ${DeprecatedSources})
+        ${DictBuilderSources})
 
 file(GLOB CommonHeaders ${LIBRARY_DIR}/common/*.h)
 file(GLOB CompressHeaders ${LIBRARY_DIR}/compress/*.h)
 file(GLOB DecompressHeaders ${LIBRARY_DIR}/decompress/*.h)
 file(GLOB DictBuilderHeaders ${LIBRARY_DIR}/dictBuilder/*.h)
-file(GLOB DeprecatedHeaders ${LIBRARY_DIR}/deprecated/*.h)
 
 set(Headers
         ${LIBRARY_DIR}/zstd.h
         ${CommonHeaders}
         ${CompressHeaders}
         ${DecompressHeaders}
-        ${DictBuilderHeaders}
-        ${DeprecatedHeaders})
+        ${DictBuilderHeaders})
 
 if (ZSTD_LEGACY_SUPPORT)
     set(LIBRARY_LEGACY_DIR ${LIBRARY_DIR}/legacy)
@@ -162,8 +158,8 @@
 # install target
 install(FILES
     "${LIBRARY_DIR}/zstd.h"
-    "${LIBRARY_DIR}/dictBuilder/zdict.h"
-    "${LIBRARY_DIR}/common/zstd_errors.h"
+    "${LIBRARY_DIR}/zdict.h"
+    "${LIBRARY_DIR}/zstd_errors.h"
     DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
 
 install(TARGETS ${library_targets}
diff --git a/build/cmake/tests/.gitignore b/build/cmake/tests/.gitignore
index 2ab62a3..ca2947f 100644
--- a/build/cmake/tests/.gitignore
+++ b/build/cmake/tests/.gitignore
@@ -3,5 +3,4 @@
 fullbench
 fuzzer
 paramgrill
-zbufftest
 
diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 34eca91..8bba6ea 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -57,13 +57,15 @@
 # fullbench
 #
 add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
+set_property(TARGET fullbench APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
 target_link_libraries(fullbench libzstd_static)
-add_test(NAME fullbench COMMAND fullbench)
+add_test(NAME fullbench COMMAND fullbench ${ZSTD_FULLBENCH_FLAGS})
 
 #
 # fuzzer
 #
 add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c)
+set_property(TARGET fuzzer APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
 target_link_libraries(fuzzer libzstd_static)
 AddTestFlagsOption(ZSTD_FUZZER_FLAGS "$ENV{FUZZERTEST} $ENV{FUZZER_FLAGS}"
     "Semicolon-separated list of flags to pass to the fuzzer test (see `fuzzer -h` for usage)")
@@ -76,6 +78,7 @@
 # zstreamtest
 #
 add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c)
+set_property(TARGET zstreamtest APPEND PROPERTY COMPILE_OPTIONS "-Wno-deprecated-declarations")
 target_link_libraries(zstreamtest libzstd_static)
 AddTestFlagsOption(ZSTD_ZSTREAM_FLAGS "$ENV{ZSTREAM_TESTTIME} $ENV{FUZZER_FLAGS}"
     "Semicolon-separated list of flags to pass to the zstreamtest test (see `zstreamtest -h` for usage)")
diff --git a/build/meson/contrib/pzstd/meson.build b/build/meson/contrib/pzstd/meson.build
index 8f3822f..dcf2136 100644
--- a/build/meson/contrib/pzstd/meson.build
+++ b/build/meson/contrib/pzstd/meson.build
@@ -18,7 +18,7 @@
   join_paths(zstd_rootdir, 'contrib/pzstd/SkippableFrame.cpp')]
 pzstd = executable('pzstd',
   pzstd_sources,
-  cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic' ],
+  cpp_args: [ '-DNDEBUG', '-Wno-shadow', '-pedantic', '-Wno-deprecated-declarations' ],
   include_directories: pzstd_includes,
   dependencies: [ libzstd_dep, thread_dep ],
   install: true)
diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build
index 17806c8..5cc9fee 100644
--- a/build/meson/lib/meson.build
+++ b/build/meson/lib/meson.build
@@ -14,8 +14,7 @@
   join_paths(zstd_rootdir, 'lib/common'),
   join_paths(zstd_rootdir, 'lib/compress'),
   join_paths(zstd_rootdir, 'lib/decompress'),
-  join_paths(zstd_rootdir, 'lib/dictBuilder'),
-  join_paths(zstd_rootdir, 'lib/deprecated'))]
+  join_paths(zstd_rootdir, 'lib/dictBuilder'))]
 
 libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
   join_paths(zstd_rootdir, 'lib/common/fse_decompress.c'),
@@ -44,10 +43,7 @@
   join_paths(zstd_rootdir, 'lib/dictBuilder/cover.c'),
   join_paths(zstd_rootdir, 'lib/dictBuilder/fastcover.c'),
   join_paths(zstd_rootdir, 'lib/dictBuilder/divsufsort.c'),
-  join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.c'),
-  join_paths(zstd_rootdir, 'lib/deprecated/zbuff_common.c'),
-  join_paths(zstd_rootdir, 'lib/deprecated/zbuff_compress.c'),
-  join_paths(zstd_rootdir, 'lib/deprecated/zbuff_decompress.c')]
+  join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.c')]
 
 # Explicit define legacy support
 add_project_arguments('-DZSTD_LEGACY_SUPPORT=@0@'.format(legacy_level),
@@ -127,5 +123,5 @@
   url: 'http://www.zstd.net/')
 
 install_headers(join_paths(zstd_rootdir, 'lib/zstd.h'),
-  join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.h'),
-  join_paths(zstd_rootdir, 'lib/common/zstd_errors.h'))
+  join_paths(zstd_rootdir, 'lib/zdict.h'),
+  join_paths(zstd_rootdir, 'lib/zstd_errors.h'))
diff --git a/build/meson/tests/meson.build b/build/meson/tests/meson.build
index 0587f9a..1b23363 100644
--- a/build/meson/tests/meson.build
+++ b/build/meson/tests/meson.build
@@ -57,7 +57,7 @@
 fuzzer = executable('fuzzer',
   fuzzer_sources,
   include_directories: test_includes,
-  dependencies: libzstd_dep,
+  dependencies: [ libzstd_dep, thread_dep ],
   install: false)
 
 zstreamtest_sources = [join_paths(zstd_rootdir, 'programs/datagen.c'),
diff --git a/contrib/single_file_libs/.gitignore b/build/single_file_libs/.gitignore
similarity index 100%
rename from contrib/single_file_libs/.gitignore
rename to build/single_file_libs/.gitignore
diff --git a/contrib/single_file_libs/README.md b/build/single_file_libs/README.md
similarity index 96%
rename from contrib/single_file_libs/README.md
rename to build/single_file_libs/README.md
index d88e8fc..1705b76 100644
--- a/contrib/single_file_libs/README.md
+++ b/build/single_file_libs/README.md
@@ -11,7 +11,7 @@
 
 Create `zstddeclib.c` from the Zstd source using:
 ```
-cd zstd/contrib/single_file_libs
+cd zstd/build/single_file_libs
 ./combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c
 ```
 Then add the resulting file to your project (see the [example files](examples)).
@@ -25,7 +25,7 @@
 
 Create `zstd.c` from the Zstd source using:
 ```
-cd zstd/contrib/single_file_libs
+cd zstd/build/single_file_libs
 ./combine.sh -r ../../lib -o zstd.c zstd-in.c
 ```
 It's possible to create a compressor-only library but since the decompressor is so small in comparison this doesn't bring much of a gain (but for the curious, simply remove the files in the _decompress_ section at the end of `zstd-in.c`).
diff --git a/contrib/single_file_libs/build_decoder_test.sh b/build/single_file_libs/build_decoder_test.sh
similarity index 100%
rename from contrib/single_file_libs/build_decoder_test.sh
rename to build/single_file_libs/build_decoder_test.sh
diff --git a/contrib/single_file_libs/build_library_test.sh b/build/single_file_libs/build_library_test.sh
similarity index 100%
rename from contrib/single_file_libs/build_library_test.sh
rename to build/single_file_libs/build_library_test.sh
diff --git a/contrib/single_file_libs/combine.sh b/build/single_file_libs/combine.sh
similarity index 100%
rename from contrib/single_file_libs/combine.sh
rename to build/single_file_libs/combine.sh
diff --git a/contrib/single_file_libs/create_single_file_decoder.sh b/build/single_file_libs/create_single_file_decoder.sh
similarity index 100%
rename from contrib/single_file_libs/create_single_file_decoder.sh
rename to build/single_file_libs/create_single_file_decoder.sh
diff --git a/contrib/single_file_libs/create_single_file_library.sh b/build/single_file_libs/create_single_file_library.sh
similarity index 100%
rename from contrib/single_file_libs/create_single_file_library.sh
rename to build/single_file_libs/create_single_file_library.sh
diff --git a/contrib/single_file_libs/examples/README.md b/build/single_file_libs/examples/README.md
similarity index 100%
rename from contrib/single_file_libs/examples/README.md
rename to build/single_file_libs/examples/README.md
diff --git a/contrib/single_file_libs/examples/emscripten.c b/build/single_file_libs/examples/emscripten.c
similarity index 100%
rename from contrib/single_file_libs/examples/emscripten.c
rename to build/single_file_libs/examples/emscripten.c
diff --git a/contrib/single_file_libs/examples/roundtrip.c b/build/single_file_libs/examples/roundtrip.c
similarity index 100%
rename from contrib/single_file_libs/examples/roundtrip.c
rename to build/single_file_libs/examples/roundtrip.c
diff --git a/contrib/single_file_libs/examples/shell.html b/build/single_file_libs/examples/shell.html
similarity index 100%
rename from contrib/single_file_libs/examples/shell.html
rename to build/single_file_libs/examples/shell.html
diff --git a/contrib/single_file_libs/examples/simple.c b/build/single_file_libs/examples/simple.c
similarity index 100%
rename from contrib/single_file_libs/examples/simple.c
rename to build/single_file_libs/examples/simple.c
diff --git a/contrib/single_file_libs/examples/testcard-dxt1.inl b/build/single_file_libs/examples/testcard-dxt1.inl
similarity index 100%
rename from contrib/single_file_libs/examples/testcard-dxt1.inl
rename to build/single_file_libs/examples/testcard-dxt1.inl
diff --git a/contrib/single_file_libs/examples/testcard-zstd.inl b/build/single_file_libs/examples/testcard-zstd.inl
similarity index 100%
rename from contrib/single_file_libs/examples/testcard-zstd.inl
rename to build/single_file_libs/examples/testcard-zstd.inl
diff --git a/contrib/single_file_libs/examples/testcard.png b/build/single_file_libs/examples/testcard.png
similarity index 100%
rename from contrib/single_file_libs/examples/testcard.png
rename to build/single_file_libs/examples/testcard.png
Binary files differ
diff --git a/contrib/single_file_libs/zstd-in.c b/build/single_file_libs/zstd-in.c
similarity index 100%
rename from contrib/single_file_libs/zstd-in.c
rename to build/single_file_libs/zstd-in.c
diff --git a/contrib/single_file_libs/zstddeclib-in.c b/build/single_file_libs/zstddeclib-in.c
similarity index 100%
rename from contrib/single_file_libs/zstddeclib-in.c
rename to build/single_file_libs/zstddeclib-in.c
diff --git a/contrib/VS2005/fullbench/fullbench.vcproj b/contrib/VS2005/fullbench/fullbench.vcproj
index c67490c..98f8593 100644
--- a/contrib/VS2005/fullbench/fullbench.vcproj
+++ b/contrib/VS2005/fullbench/fullbench.vcproj
@@ -390,7 +390,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
diff --git a/contrib/VS2005/fuzzer/fuzzer.vcproj b/contrib/VS2005/fuzzer/fuzzer.vcproj
index c64c503..d182535 100644
--- a/contrib/VS2005/fuzzer/fuzzer.vcproj
+++ b/contrib/VS2005/fuzzer/fuzzer.vcproj
@@ -426,7 +426,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -454,7 +454,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/contrib/VS2005/zstd/zstd.vcproj b/contrib/VS2005/zstd/zstd.vcproj
index 46cabbf..78645d1 100644
--- a/contrib/VS2005/zstd/zstd.vcproj
+++ b/contrib/VS2005/zstd/zstd.vcproj
@@ -454,7 +454,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -482,7 +482,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/contrib/VS2005/zstdlib/zstdlib.vcproj b/contrib/VS2005/zstdlib/zstdlib.vcproj
index f77df78..67ddd2d 100644
--- a/contrib/VS2005/zstdlib/zstdlib.vcproj
+++ b/contrib/VS2005/zstdlib/zstdlib.vcproj
@@ -372,18 +372,6 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\deprecated\zbuff_common.c"
-				>
-			</File>
-			<File
-				RelativePath="..\..\..\lib\deprecated\zbuff_compress.c"
-				>
-			</File>
-			<File
-				RelativePath="..\..\..\lib\deprecated\zbuff_decompress.c"
-				>
-			</File>
-			<File
 				RelativePath="..\..\..\lib\dictBuilder\zdict.c"
 				>
 			</File>
@@ -458,7 +446,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zstd_errors.h"
+				RelativePath="..\..\..\lib\zstd_errors.h"
 				>
 			</File>
 			<File
@@ -486,11 +474,7 @@
 				>
 			</File>
 			<File
-				RelativePath="..\..\..\lib\common\zbuff.h"
-				>
-			</File>
-			<File
-				RelativePath="..\..\..\lib\dictBuilder\zdict.h"
+				RelativePath="..\..\..\lib\zdict.h"
 				>
 			</File>
 			<File
diff --git a/contrib/freestanding_lib/freestanding.py b/contrib/freestanding_lib/freestanding.py
index 32d4fcb..1971687 100755
--- a/contrib/freestanding_lib/freestanding.py
+++ b/contrib/freestanding_lib/freestanding.py
@@ -27,6 +27,8 @@
     "common/pool.h",
     "common/threading.c",
     "common/threading.h",
+    "common/zstd_trace.c",
+    "common/zstd_trace.h",
     "compress/zstdmt_compress.h",
     "compress/zstdmt_compress.c",
 ]
@@ -430,7 +432,7 @@
             external_xxhash: bool, xxh64_state: Optional[str],
             xxh64_prefix: Optional[str], rewritten_includes: [(str, str)],
             defs: [(str, Optional[str])], replaces: [(str, str)],
-            undefs: [str], excludes: [str]
+            undefs: [str], excludes: [str], seds: [str],
     ):
         self._zstd_deps = zstd_deps
         self._mem = mem
@@ -444,6 +446,7 @@
         self._replaces = replaces
         self._undefs = undefs
         self._excludes = excludes
+        self._seds = seds
 
     def _dst_lib_file_paths(self):
         """
@@ -471,24 +474,25 @@
         dst_path = os.path.join(self._dst_lib, lib_path)
         self._log(f"\tCopying: {src_path} -> {dst_path}")
         shutil.copyfile(src_path, dst_path)
-    
+
     def _copy_source_lib(self):
         self._log("Copying source library into output library")
 
         assert os.path.exists(self._src_lib)
         os.makedirs(self._dst_lib, exist_ok=True)
         self._copy_file("zstd.h")
+        self._copy_file("zstd_errors.h")
         for subdir in INCLUDED_SUBDIRS:
             src_dir = os.path.join(self._src_lib, subdir)
             dst_dir = os.path.join(self._dst_lib, subdir)
-            
+
             assert os.path.exists(src_dir)
             os.makedirs(dst_dir, exist_ok=True)
 
             for filename in os.listdir(src_dir):
                 lib_path = os.path.join(subdir, filename)
                 self._copy_file(lib_path)
-    
+
     def _copy_zstd_deps(self):
         dst_zstd_deps = os.path.join(self._dst_lib, "common", "zstd_deps.h")
         self._log(f"Copying zstd_deps: {self._zstd_deps} -> {dst_zstd_deps}")
@@ -508,7 +512,7 @@
         assert not (undef and value is not None)
         for filepath in self._dst_lib_file_paths():
             file = FileLines(filepath)
-    
+
     def _hardwire_defines(self):
         self._log("Hardwiring macros")
         partial_preprocessor = PartialPreprocessor(self._defs, self._replaces, self._undefs)
@@ -536,7 +540,7 @@
                         skipped.append(line)
                         if end_re.search(line) is not None:
                             assert begin_re.search(line) is None
-                            self._log(f"\t\tRemoving excluded section: {exclude}") 
+                            self._log(f"\t\tRemoving excluded section: {exclude}")
                             for s in skipped:
                                 self._log(f"\t\t\t- {s}")
                             emit = True
@@ -559,12 +563,12 @@
                 e = match.end('include')
                 file.lines[i] = line[:s] + rewritten + line[e:]
             file.write()
-    
+
     def _rewrite_includes(self):
         self._log("Rewriting includes")
         for original, rewritten in self._rewritten_includes:
             self._rewrite_include(original, rewritten)
-    
+
     def _replace_xxh64_prefix(self):
         if self._xxh64_prefix is None:
             return
@@ -596,6 +600,48 @@
                 file.lines[i] = line
             file.write()
 
+    def _parse_sed(self, sed):
+        assert sed[0] == 's'
+        delim = sed[1]
+        match = re.fullmatch(f's{delim}(.+){delim}(.*){delim}(.*)', sed)
+        assert match is not None
+        regex = re.compile(match.group(1))
+        format_str = match.group(2)
+        is_global = match.group(3) == 'g'
+        return regex, format_str, is_global
+
+    def _process_sed(self, sed):
+        self._log(f"Processing sed: {sed}")
+        regex, format_str, is_global = self._parse_sed(sed)
+
+        for filepath in self._dst_lib_file_paths():
+            file = FileLines(filepath)
+            for i, line in enumerate(file.lines):
+                modified = False
+                while True:
+                    match = regex.search(line)
+                    if match is None:
+                        break
+                    replacement = format_str.format(match.groups(''), match.groupdict(''))
+                    b = match.start()
+                    e = match.end()
+                    line = line[:b] + replacement + line[e:]
+                    modified = True
+                    if not is_global:
+                        break
+                if modified:
+                    self._log(f"\t- {file.lines[i][:-1]}")
+                    self._log(f"\t+ {line[:-1]}")
+                file.lines[i] = line
+            file.write()
+
+    def _process_seds(self):
+        self._log("Processing seds")
+        for sed in self._seds:
+            self._process_sed(sed)
+
+
+
     def go(self):
         self._copy_source_lib()
         self._copy_zstd_deps()
@@ -604,6 +650,7 @@
         self._remove_excludes()
         self._rewrite_includes()
         self._replace_xxh64_prefix()
+        self._process_seds()
 
 
 def parse_optional_pair(defines: [str]) -> [(str, Optional[str])]:
@@ -641,6 +688,7 @@
     parser.add_argument("--xxh64-state", default=None, help="Alternate XXH64 state type (excluding _) e.g. --xxh64-state='struct xxh64_state'")
     parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64")
     parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '<stddef\\.h>=<linux/types.h>')")
+    parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.")
     parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)")
     parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed mutliple times)")
     parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition")
@@ -656,6 +704,11 @@
         if name in args.undefs:
             raise RuntimeError(f"{name} is both defined and undefined!")
 
+    # Always set tracing to 0
+    if "ZSTD_NO_TRACE" not in (arg[0] for arg in args.defs):
+        args.defs.append(("ZSTD_NO_TRACE", None))
+        args.defs.append(("ZSTD_TRACE", "0"))
+
     args.replaces = parse_pair(args.replaces)
     for name, _ in args.replaces:
         if name in args.undefs or name in args.defs:
@@ -688,7 +741,8 @@
         args.defs,
         args.replaces,
         args.undefs,
-        args.excludes
+        args.excludes,
+        args.seds,
     ).go()
 
 if __name__ == "__main__":
diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile
index 1725e7d..c391df7 100644
--- a/contrib/linux-kernel/Makefile
+++ b/contrib/linux-kernel/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -22,6 +22,10 @@
 		--xxh64-prefix 'xxh64' \
 		--rewrite-include '<limits\.h>=<linux/limits.h>' \
 		--rewrite-include '<stddef\.h>=<linux/types.h>' \
+		--rewrite-include '"\.\./zstd.h"=<linux/zstd.h>' \
+		--rewrite-include '"(\.\./)?zstd_errors.h"=<linux/zstd_errors.h>' \
+		--sed 's,/\*\*\*,/* *,g' \
+		--sed 's,/\*\*,/*,g' \
 		-DZSTD_NO_INTRINSICS \
 		-DZSTD_NO_UNUSED_FUNCTIONS \
 		-DZSTD_LEGACY_SUPPORT=0 \
@@ -47,7 +51,10 @@
 		-RZSTDLIB_VISIBILITY= \
 		-RZSTDERRORLIB_VISIBILITY= \
 		-DZSTD_HAVE_WEAK_SYMBOLS=0 \
-		-DZSTD_TRACE=0
+		-DZSTD_TRACE=0 \
+		-DZSTD_NO_TRACE
+	mv linux/lib/zstd/zstd.h linux/include/linux/zstd_lib.h
+	mv linux/lib/zstd/zstd_errors.h linux/include/linux/
 	cp linux_zstd.h linux/include/linux/zstd.h
 	cp zstd_compress_module.c linux/lib/zstd
 	cp zstd_decompress_module.c linux/lib/zstd
@@ -62,15 +69,18 @@
 	rm -f $(LINUX)/include/linux/zstd_errors.h
 	rm -rf $(LINUX)/lib/zstd
 	cp linux/include/linux/zstd.h $(LINUX)/include/linux
+	cp linux/include/linux/zstd_lib.h $(LINUX)/include/linux
+	cp linux/include/linux/zstd_errors.h $(LINUX)/include/linux
 	cp -r linux/lib/zstd $(LINUX)/lib
 
 import-upstream:
 	rm -rf $(LINUX)/lib/zstd
 	mkdir $(LINUX)/lib/zstd
-	cp ../../lib/zstd.h $(LINUX)/lib/zstd
+	cp ../../lib/zstd.h $(LINUX)/include/linux/zstd_lib.h
 	cp -r ../../lib/common $(LINUX)/lib/zstd
 	cp -r ../../lib/compress $(LINUX)/lib/zstd
 	cp -r ../../lib/decompress $(LINUX)/lib/zstd
+	mv $(LINUX)/lib/zstd/zstd_errors.h $(LINUX)/include/linux
 	rm $(LINUX)/lib/zstd/common/threading.*
 	rm $(LINUX)/lib/zstd/common/pool.*
 	rm $(LINUX)/lib/zstd/common/xxhash.*
diff --git a/contrib/linux-kernel/decompress_sources.h b/contrib/linux-kernel/decompress_sources.h
index 907753e..f35bef0 100644
--- a/contrib/linux-kernel/decompress_sources.h
+++ b/contrib/linux-kernel/decompress_sources.h
@@ -1,4 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
 /*
  * This file includes every .c file needed for decompression.
diff --git a/contrib/linux-kernel/linux.mk b/contrib/linux-kernel/linux.mk
index 06bf079..19485e3 100644
--- a/contrib/linux-kernel/linux.mk
+++ b/contrib/linux-kernel/linux.mk
@@ -1,4 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
 obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
diff --git a/contrib/linux-kernel/linux_zstd.h b/contrib/linux-kernel/linux_zstd.h
index dcd1ec1..446ecab 100644
--- a/contrib/linux-kernel/linux_zstd.h
+++ b/contrib/linux-kernel/linux_zstd.h
@@ -1,18 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
  */
 
 #ifndef LINUX_ZSTD_H
@@ -27,6 +22,8 @@
 
 /* ======   Dependency   ====== */
 #include <linux/types.h>
+#include <linux/zstd_errors.h>
+#include <linux/zstd_lib.h>
 
 /* ======   Helper Functions   ====== */
 /**
@@ -46,12 +43,17 @@
 unsigned int zstd_is_error(size_t code);
 
 /**
+ * enum zstd_error_code - zstd error codes
+ */
+typedef ZSTD_ErrorCode zstd_error_code;
+
+/**
  * zstd_get_error_code() - translates an error function result to an error code
  * @code:  The function result for which zstd_is_error(code) is true.
  *
  * Return: A unique error code for this error.
  */
-int zstd_get_error_code(size_t code);
+zstd_error_code zstd_get_error_code(size_t code);
 
 /**
  * zstd_get_error_name() - translates an error function result to a string
@@ -61,76 +63,67 @@
  */
 const char *zstd_get_error_name(size_t code);
 
+/**
+ * zstd_min_clevel() - minimum allowed compression level
+ *
+ * Return: The minimum allowed compression level.
+ */
+int zstd_min_clevel(void);
+
+/**
+ * zstd_max_clevel() - maximum allowed compression level
+ *
+ * Return: The maximum allowed compression level.
+ */
+int zstd_max_clevel(void);
+
 /* ======   Parameter Selection   ====== */
 
 /**
  * enum zstd_strategy - zstd compression search strategy
  *
- * From faster to stronger.
+ * From faster to stronger. See zstd_lib.h.
  */
-enum zstd_strategy {
-	zstd_fast = 1,
-	zstd_dfast = 2,
-	zstd_greedy = 3,
-	zstd_lazy = 4,
-	zstd_lazy2 = 5,
-	zstd_btlazy2 = 6,
-	zstd_btopt = 7,
-	zstd_btultra = 8,
-	zstd_btultra2 = 9
-};
+typedef ZSTD_strategy zstd_strategy;
 
 /**
  * struct zstd_compression_parameters - zstd compression parameters
- * @window_log:    Log of the largest match distance. Larger means more
- *                 compression, and more memory needed during decompression.
- * @chain_log:     Fully searched segment. Larger means more compression,
- *                 slower, and more memory (useless for fast).
- * @hash_log:      Dispatch table. Larger means more compression,
- *                 slower, and more memory.
- * @search_log:    Number of searches. Larger means more compression and slower.
- * @search_length: Match length searched. Larger means faster decompression,
- *                 sometimes less compression.
- * @target_length: Acceptable match size for optimal parser (only). Larger means
- *                 more compression, and slower.
- * @strategy:      The zstd compression strategy.
+ * @windowLog:    Log of the largest match distance. Larger means more
+ *                compression, and more memory needed during decompression.
+ * @chainLog:     Fully searched segment. Larger means more compression,
+ *                slower, and more memory (useless for fast).
+ * @hashLog:      Dispatch table. Larger means more compression,
+ *                slower, and more memory.
+ * @searchLog:    Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ *                sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ *                more compression, and slower.
+ * @strategy:     The zstd compression strategy.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_compression_parameters {
-	unsigned int window_log;
-	unsigned int chain_log;
-	unsigned int hash_log;
-	unsigned int search_log;
-	unsigned int search_length;
-	unsigned int target_length;
-	enum zstd_strategy strategy;
-};
+typedef ZSTD_compressionParameters zstd_compression_parameters;
 
 /**
  * struct zstd_frame_parameters - zstd frame parameters
- * @content_size_flag: Controls whether content size will be present in the
- *                     frame header (when known).
- * @checksum_flag:     Controls whether a 32-bit checksum is generated at the
- *                     end of the frame for error detection.
- * @no_dict_id_flag:   Controls whether dictID will be saved into the frame
- *                     header when using dictionary compression.
+ * @contentSizeFlag: Controls whether content size will be present in the
+ *                   frame header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
+ *                   end of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame
+ *                   header when using dictionary compression.
  *
- * The default value is all fields set to 0.
+ * The default value is all fields set to 0. See zstd_lib.h.
  */
-struct zstd_frame_parameters {
-	unsigned int content_size_flag;
-	unsigned int checksum_flag;
-	unsigned int no_dict_id_flag;
-};
+typedef ZSTD_frameParameters zstd_frame_parameters;
 
 /**
  * struct zstd_parameters - zstd parameters
- * @cparams: The compression parameters.
- * @fparams: The frame parameters.
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
  */
-struct zstd_parameters {
-	struct zstd_compression_parameters cparams;
-	struct zstd_frame_parameters fparams;
-};
+typedef ZSTD_parameters zstd_parameters;
 
 /**
  * zstd_get_params() - returns zstd_parameters for selected level
@@ -140,12 +133,12 @@
  *
  * Return:              The selected zstd_parameters.
  */
-struct zstd_parameters zstd_get_params(int level,
+zstd_parameters zstd_get_params(int level,
 	unsigned long long estimated_src_size);
 
 /* ======   Single-pass Compression   ====== */
 
-typedef struct ZSTD_CCtx_s zstd_cctx;
+typedef ZSTD_CCtx zstd_cctx;
 
 /**
  * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
@@ -158,8 +151,7 @@
  * Return:      A lower bound on the size of the workspace that is passed to
  *              zstd_init_cctx().
  */
-size_t zstd_cctx_workspace_bound(
-	const struct zstd_compression_parameters *parameters);
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
 
 /**
  * zstd_init_cctx() - initialize a zstd compression context
@@ -186,11 +178,11 @@
  *                zstd_is_error().
  */
 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
-	const void *src, size_t src_size, const struct zstd_parameters *parameters);
+	const void *src, size_t src_size, const zstd_parameters *parameters);
 
 /* ======   Single-pass Decompression   ====== */
 
-typedef struct ZSTD_DCtx_s zstd_dctx;
+typedef ZSTD_DCtx zstd_dctx;
 
 /**
  * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
@@ -236,12 +228,10 @@
  * @size: Size of the input buffer.
  * @pos:  Position where reading stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_in_buffer {
-	const void *src;
-	size_t size;
-	size_t pos;
-};
+typedef ZSTD_inBuffer zstd_in_buffer;
 
 /**
  * struct zstd_out_buffer - output buffer for streaming
@@ -249,16 +239,14 @@
  * @size: Size of the output buffer.
  * @pos:  Position where writing stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_out_buffer {
-	void *dst;
-	size_t size;
-	size_t pos;
-};
+typedef ZSTD_outBuffer zstd_out_buffer;
 
 /* ======   Streaming Compression   ====== */
 
-typedef struct ZSTD_CCtx_s zstd_cstream;
+typedef ZSTD_CStream zstd_cstream;
 
 /**
  * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
@@ -267,8 +255,7 @@
  * Return:   A lower bound on the size of the workspace that is passed to
  *           zstd_init_cstream().
  */
-size_t zstd_cstream_workspace_bound(
-	const struct zstd_compression_parameters *cparams);
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
 
 /**
  * zstd_init_cstream() - initialize a zstd streaming compression context
@@ -285,7 +272,7 @@
  *
  * Return:            The zstd streaming compression context or NULL on error.
  */
-zstd_cstream *zstd_init_cstream(const struct zstd_parameters *parameters,
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
 	unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
 
 /**
@@ -320,8 +307,8 @@
  *           function call or an error, which can be checked using
  *           zstd_is_error().
  */
-size_t zstd_compress_stream(zstd_cstream *cstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input);
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
 
 /**
  * zstd_flush_stream() - flush internal buffers into output
@@ -336,7 +323,7 @@
  * Return:   The number of bytes still present within internal buffers or an
  *           error, which can be checked using zstd_is_error().
  */
-size_t zstd_flush_stream(zstd_cstream *cstream, struct zstd_out_buffer *output);
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
 /**
  * zstd_end_stream() - flush internal buffers into output and end the frame
@@ -350,11 +337,11 @@
  * Return:   The number of bytes still present within internal buffers or an
  *           error, which can be checked using zstd_is_error().
  */
-size_t zstd_end_stream(zstd_cstream *cstream, struct zstd_out_buffer *output);
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
 /* ======   Streaming Decompression   ====== */
 
-typedef struct ZSTD_DCtx_s zstd_dstream;
+typedef ZSTD_DStream zstd_dstream;
 
 /**
  * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
@@ -411,8 +398,8 @@
  *           using zstd_is_error(). The size hint will never load more than the
  *           frame.
  */
-size_t zstd_decompress_stream(zstd_dstream *dstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input);
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
 
 /* ======   Frame Inspection Functions ====== */
 
@@ -431,20 +418,21 @@
 
 /**
  * struct zstd_frame_params - zstd frame parameters stored in the frame header
- * @frame_content_size: The frame content size, or 0 if not present.
- * @window_size:        The window size, or 0 if the frame is a skippable frame.
- * @dict_id:            The dictionary id, or 0 if not present.
- * @checksum_flag:      Whether a checksum was used.
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ *                    present.
+ * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax:     The maximum block size.
+ * @frameType:        The frame type (zstd or skippable)
+ * @headerSize:       The size of the frame header.
+ * @dictID:           The dictionary id, or 0 if not present.
+ * @checksumFlag:     Whether a checksum was used.
+ *
+ * See zstd_lib.h.
  */
-struct zstd_frame_params {
-	unsigned long long frame_content_size;
-	unsigned int window_size;
-	unsigned int dict_id;
-	unsigned int checksum_flag;
-};
+typedef ZSTD_frameHeader zstd_frame_header;
 
 /**
- * zstd_get_frame_params() - extracts parameters from a zstd or skippable frame
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
  * @params:   On success the frame parameters are written here.
  * @src:      The source buffer. It must point to a zstd or skippable frame.
  * @src_size: The size of the source buffer.
@@ -453,7 +441,7 @@
  *            must be provided to make forward progress. Otherwise it returns
  *            an error, which can be checked using zstd_is_error().
  */
-size_t zstd_get_frame_params(struct zstd_frame_params *params, const void *src,
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
 	size_t src_size);
 
 #endif  /* LINUX_ZSTD_H */
diff --git a/contrib/linux-kernel/mem.h b/contrib/linux-kernel/mem.h
index 63f1a68..4b5db57 100644
--- a/contrib/linux-kernel/mem.h
+++ b/contrib/linux-kernel/mem.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/Makefile b/contrib/linux-kernel/test/Makefile
index 80bce74..2908839 100644
--- a/contrib/linux-kernel/test/Makefile
+++ b/contrib/linux-kernel/test/Makefile
@@ -1,15 +1,24 @@
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 
 LINUX := ../linux
 LINUX_ZSTDLIB := $(LINUX)/lib/zstd
 
-CPPFLAGS += -I$(LINUX)/include -I$(LINUX_ZSTDLIB) -Iinclude -DNDEBUG
+CPPFLAGS += -I$(LINUX)/include -I$(LINUX_ZSTDLIB) -Iinclude -DNDEBUG -Wno-deprecated-declarations
 # Don't poison the workspace, it currently doesn't work with static allocation and workspace reuse
 CPPFLAGS += -DZSTD_ASAN_DONT_POISON_WORKSPACE
 
 LINUX_ZSTD_MODULE     := $(wildcard $(LINUX_ZSTDLIB)/*.c)
-LINUX_ZSTD_COMMON     := $(wildcard $(LINUX_ZSTDLIB)/common/*.c) 
-LINUX_ZSTD_COMPRESS   := $(wildcard $(LINUX_ZSTDLIB)/compress/*.c) 
-LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c) 
+LINUX_ZSTD_COMMON     := $(wildcard $(LINUX_ZSTDLIB)/common/*.c)
+LINUX_ZSTD_COMPRESS   := $(wildcard $(LINUX_ZSTDLIB)/compress/*.c)
+LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c)
 LINUX_ZSTD_FILES      := $(LINUX_ZSTD_MODULE) $(LINUX_ZSTD_COMMON) $(LINUX_ZSTD_COMPRESS) $(LINUX_ZSTD_DECOMPRESS)
 LINUX_ZSTD_OBJECTS    := $(LINUX_ZSTD_FILES:.c=.o)
 
@@ -29,6 +38,7 @@
 
 .PHONY:
 clean:
+	$(RM) -f $(LINUX_ZSTDLIB)/*.o
 	$(RM) -f $(LINUX_ZSTDLIB)/**/*.o
 	$(RM) -f *.o *.a
 	$(RM) -f test
diff --git a/contrib/linux-kernel/test/include/asm/unaligned.h b/contrib/linux-kernel/test/include/asm/unaligned.h
index 6576b37..02c2d74 100644
--- a/contrib/linux-kernel/test/include/asm/unaligned.h
+++ b/contrib/linux-kernel/test/include/asm/unaligned.h
@@ -4,13 +4,23 @@
 #include <assert.h>
 #include <linux/types.h>
 
-#define _LITTLE_ENDIAN 1
+#ifndef __LITTLE_ENDIAN
+# if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN__)
+#  define __LITTLE_ENDIAN 1
+# endif
+#endif
+
+#ifdef __LITTLE_ENDIAN
+# define _IS_LITTLE_ENDIAN 1
+#else
+# define _IS_LITTLE_ENDIAN 0
+#endif
 
 static unsigned _isLittleEndian(void)
 {
     const union { uint32_t u; uint8_t c[4]; } one = { 1 };
-    assert(_LITTLE_ENDIAN == one.c[0]);
-    return _LITTLE_ENDIAN;
+    assert(_IS_LITTLE_ENDIAN == one.c[0]);
+    return _IS_LITTLE_ENDIAN;
 }
 
 static uint16_t _swap16(uint16_t in)
@@ -165,7 +175,7 @@
     (void)0;                                                                   \
   })
 
-#if _LITTLE_ENDIAN
+#if _IS_LITTLE_ENDIAN
 #  define get_unaligned __get_unaligned_le
 #  define put_unaligned __put_unaligned_le
 #else
diff --git a/contrib/linux-kernel/test/include/linux/compiler.h b/contrib/linux-kernel/test/include/linux/compiler.h
index 58c7118..ea3422e 100644
--- a/contrib/linux-kernel/test/include/linux/compiler.h
+++ b/contrib/linux-kernel/test/include/linux/compiler.h
@@ -14,4 +14,8 @@
 #define inline __inline __attribute__((unused))
 #endif
 
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
 #endif
diff --git a/contrib/linux-kernel/test/include/linux/kernel.h b/contrib/linux-kernel/test/include/linux/kernel.h
index 9b481ef..1f702ab 100644
--- a/contrib/linux-kernel/test/include/linux/kernel.h
+++ b/contrib/linux-kernel/test/include/linux/kernel.h
@@ -12,4 +12,8 @@
 
 #define WARN_ON(x)
 
+#define PTR_ALIGN(p, a) (typeof(p))ALIGN((unsigned long long)(p), (a))
+#define ALIGN(x, a)         ALIGN_MASK((x), (a) - 1)
+#define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
 #endif
diff --git a/contrib/linux-kernel/test/static_test.c b/contrib/linux-kernel/test/static_test.c
index 6ac534d..50c594c 100644
--- a/contrib/linux-kernel/test/static_test.c
+++ b/contrib/linux-kernel/test/static_test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/contrib/linux-kernel/test/test.c b/contrib/linux-kernel/test/test.c
index 47af82f..9064be7 100644
--- a/contrib/linux-kernel/test/test.c
+++ b/contrib/linux-kernel/test/test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 7-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -57,10 +57,10 @@
   fprintf(stderr, "testing btrfs use cases... ");
   size_t const size = MIN(data->dataSize, 128 * 1024);
   for (int level = -1; level < 16; ++level) {
-    struct zstd_parameters params = zstd_get_params(level, size);
-    CONTROL(params.cparams.window_log <= 17);
+    zstd_parameters params = zstd_get_params(level, size);
+    CONTROL(params.cParams.windowLog <= 17);
     size_t const workspaceSize =
-        MAX(zstd_cstream_workspace_bound(&params.cparams),
+        MAX(zstd_cstream_workspace_bound(&params.cParams),
             zstd_dstream_workspace_bound(size));
     void *workspace = malloc(workspaceSize);
     CONTROL(workspace != NULL);
@@ -72,8 +72,8 @@
     {
       zstd_cstream *cctx = zstd_init_cstream(&params, size, workspace, workspaceSize);
       CONTROL(cctx != NULL);
-      struct zstd_out_buffer out = {NULL, 0, 0};
-      struct zstd_in_buffer in = {NULL, 0, 0};
+      zstd_out_buffer out = {NULL, 0, 0};
+      zstd_in_buffer in = {NULL, 0, 0};
       for (;;) {
         if (in.pos == in.size) {
           in.src = ip;
@@ -107,10 +107,10 @@
     op = data->data2;
     oend = op + size;
     {
-      zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cparams.window_log, workspace, workspaceSize);
+      zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cParams.windowLog, workspace, workspaceSize);
       CONTROL(dctx != NULL);
-      struct zstd_out_buffer out = {NULL, 0, 0};
-      struct zstd_in_buffer in = {NULL, 0, 0};
+      zstd_out_buffer out = {NULL, 0, 0};
+      zstd_in_buffer in = {NULL, 0, 0};
       for (;;) {
         if (in.pos == in.size) {
           in.src = ip;
@@ -144,8 +144,8 @@
     fprintf(stderr, "Testing decompress unzstd... ");
     size_t cSize;
     {
-        struct zstd_parameters params = zstd_get_params(19, 0);
-        size_t const wkspSize = zstd_cctx_workspace_bound(&params.cparams);
+        zstd_parameters params = zstd_get_params(19, 0);
+        size_t const wkspSize = zstd_cctx_workspace_bound(&params.cParams);
         void* wksp = malloc(wkspSize);
         CONTROL(wksp != NULL);
         zstd_cctx* cctx = zstd_init_cctx(wksp, wkspSize);
@@ -169,6 +169,13 @@
     fprintf(stderr, "Ok\n");
 }
 
+static void test_f2fs() {
+  fprintf(stderr, "testing f2fs uses... ");
+  CONTROL(zstd_min_clevel() < 0);
+  CONTROL(zstd_max_clevel() == 22);
+  fprintf(stderr, "Ok\n");
+}
+
 static char *g_stack = NULL;
 
 static void __attribute__((noinline)) use(void *x) {
@@ -195,6 +202,7 @@
 
 static void test_stack_usage(test_data_t const *data) {
   set_stack();
+  test_f2fs();
   test_btrfs(data);
   test_decompress_unzstd(data);
   check_stack();
@@ -202,6 +210,7 @@
 
 int main(void) {
   test_data_t data = create_test_data();
+  test_f2fs();
   test_btrfs(&data);
   test_decompress_unzstd(&data);
   test_stack_usage(&data);
diff --git a/contrib/linux-kernel/zstd_compress_module.c b/contrib/linux-kernel/zstd_compress_module.c
index bab79af..37d08ff 100644
--- a/contrib/linux-kernel/zstd_compress_module.c
+++ b/contrib/linux-kernel/zstd_compress_module.c
@@ -1,75 +1,33 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/zstd.h>
 
-#include "zstd.h"
 #include "common/zstd_deps.h"
 #include "common/zstd_internal.h"
 
-static void zstd_check_structs(void) {
-	/* Check that the structs have the same size. */
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_parameters) ==
-		sizeof(struct zstd_parameters));
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_compressionParameters) ==
-		sizeof(struct zstd_compression_parameters));
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_frameParameters) ==
-		sizeof(struct zstd_frame_parameters));
-	/* Zstd guarantees that the layout of the structs never change. Verify it. */
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_parameters, cParams) ==
-		offsetof(struct zstd_parameters, cparams));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_parameters, fParams) ==
-		offsetof(struct zstd_parameters, fparams));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, windowLog) ==
-		offsetof(struct zstd_compression_parameters, window_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, chainLog) ==
-		offsetof(struct zstd_compression_parameters, chain_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, hashLog) ==
-		offsetof(struct zstd_compression_parameters, hash_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, searchLog) ==
-		offsetof(struct zstd_compression_parameters, search_log));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, minMatch) ==
-		offsetof(struct zstd_compression_parameters, search_length));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, targetLength) ==
-		offsetof(struct zstd_compression_parameters, target_length));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_compressionParameters, strategy) ==
-		offsetof(struct zstd_compression_parameters, strategy));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_frameParameters, contentSizeFlag) ==
-		offsetof(struct zstd_frame_parameters, content_size_flag));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_frameParameters, checksumFlag) ==
-		offsetof(struct zstd_frame_parameters, checksum_flag));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_frameParameters, noDictIDFlag) ==
-		offsetof(struct zstd_frame_parameters, no_dict_id_flag));
-	/* Check that the strategies are the same. This can change. */
-	ZSTD_STATIC_ASSERT((int)ZSTD_fast == (int)zstd_fast);
-	ZSTD_STATIC_ASSERT((int)ZSTD_dfast == (int)zstd_dfast);
-	ZSTD_STATIC_ASSERT((int)ZSTD_greedy == (int)zstd_greedy);
-	ZSTD_STATIC_ASSERT((int)ZSTD_lazy == (int)zstd_lazy);
-	ZSTD_STATIC_ASSERT((int)ZSTD_lazy2 == (int)zstd_lazy2);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btlazy2 == (int)zstd_btlazy2);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btopt == (int)zstd_btopt);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btultra == (int)zstd_btultra);
-	ZSTD_STATIC_ASSERT((int)ZSTD_btultra2 == (int)zstd_btultra2);
-	/* Check input buffer */
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_inBuffer) == sizeof(struct zstd_in_buffer));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_inBuffer, src) ==
-		offsetof(struct zstd_in_buffer, src));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_inBuffer, size) ==
-		offsetof(struct zstd_in_buffer, size));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_inBuffer, pos) ==
-		offsetof(struct zstd_in_buffer, pos));
-	/* Check output buffer */
-	ZSTD_STATIC_ASSERT(sizeof(ZSTD_outBuffer) ==
-		sizeof(struct zstd_out_buffer));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_outBuffer, dst) ==
-		offsetof(struct zstd_out_buffer, dst));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_outBuffer, size) ==
-		offsetof(struct zstd_out_buffer, size));
-	ZSTD_STATIC_ASSERT(offsetof(ZSTD_outBuffer, pos) ==
-		offsetof(struct zstd_out_buffer, pos));
+int zstd_min_clevel(void)
+{
+	return ZSTD_minCLevel();
 }
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+	return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
 
 size_t zstd_compress_bound(size_t src_size)
 {
@@ -77,26 +35,16 @@
 }
 EXPORT_SYMBOL(zstd_compress_bound);
 
-struct zstd_parameters zstd_get_params(int level,
+zstd_parameters zstd_get_params(int level,
 	unsigned long long estimated_src_size)
 {
-	const ZSTD_parameters params = ZSTD_getParams(level, estimated_src_size, 0);
-	struct zstd_parameters out;
-
-	/* no-op */
-	zstd_check_structs();
-	ZSTD_memcpy(&out, &params, sizeof(out));
-	return out;
+	return ZSTD_getParams(level, estimated_src_size, 0);
 }
 EXPORT_SYMBOL(zstd_get_params);
 
-size_t zstd_cctx_workspace_bound(
-	const struct zstd_compression_parameters *cparams)
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
 {
-	ZSTD_compressionParameters p;
-
-	ZSTD_memcpy(&p, cparams, sizeof(p));
-	return ZSTD_estimateCCtxSize_usingCParams(p);
+	return ZSTD_estimateCCtxSize_usingCParams(*cparams);
 }
 EXPORT_SYMBOL(zstd_cctx_workspace_bound);
 
@@ -109,29 +57,21 @@
 EXPORT_SYMBOL(zstd_init_cctx);
 
 size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
-	const void *src, size_t src_size, const struct zstd_parameters *parameters)
+	const void *src, size_t src_size, const zstd_parameters *parameters)
 {
-	ZSTD_parameters p;
-
-	ZSTD_memcpy(&p, parameters, sizeof(p));
-	return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, p);
+	return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, *parameters);
 }
 EXPORT_SYMBOL(zstd_compress_cctx);
 
-size_t zstd_cstream_workspace_bound(
-	const struct zstd_compression_parameters *cparams)
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
 {
-	ZSTD_compressionParameters p;
-
-	ZSTD_memcpy(&p, cparams, sizeof(p));
-	return ZSTD_estimateCStreamSize_usingCParams(p);
+	return ZSTD_estimateCStreamSize_usingCParams(*cparams);
 }
 EXPORT_SYMBOL(zstd_cstream_workspace_bound);
 
-zstd_cstream *zstd_init_cstream(const struct zstd_parameters *parameters,
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
 	unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
 {
-	ZSTD_parameters p;
 	zstd_cstream *cstream;
 	size_t ret;
 
@@ -146,8 +86,7 @@
 	if (pledged_src_size == 0)
 		pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
 
-	ZSTD_memcpy(&p, parameters, sizeof(p));
-	ret = ZSTD_initCStream_advanced(cstream, NULL, 0, p, pledged_src_size);
+	ret = ZSTD_initCStream_advanced(cstream, NULL, 0, *parameters, pledged_src_size);
 	if (ZSTD_isError(ret))
 		return NULL;
 
@@ -162,43 +101,22 @@
 }
 EXPORT_SYMBOL(zstd_reset_cstream);
 
-size_t zstd_compress_stream(zstd_cstream *cstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input)
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
 {
-	ZSTD_outBuffer o;
-	ZSTD_inBuffer i;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ZSTD_memcpy(&i, input, sizeof(i));
-	ret = ZSTD_compressStream(cstream, &o, &i);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	ZSTD_memcpy(input, &i, sizeof(i));
-	return ret;
+	return ZSTD_compressStream(cstream, output, input);
 }
 EXPORT_SYMBOL(zstd_compress_stream);
 
-size_t zstd_flush_stream(zstd_cstream *cstream, struct zstd_out_buffer *output)
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
 {
-	ZSTD_outBuffer o;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ret = ZSTD_flushStream(cstream, &o);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	return ret;
+	return ZSTD_flushStream(cstream, output);
 }
 EXPORT_SYMBOL(zstd_flush_stream);
 
-size_t zstd_end_stream(zstd_cstream *cstream, struct zstd_out_buffer *output)
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
 {
-	ZSTD_outBuffer o;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ret = ZSTD_endStream(cstream, &o);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	return ret;
+	return ZSTD_endStream(cstream, output);
 }
 EXPORT_SYMBOL(zstd_end_stream);
 
diff --git a/contrib/linux-kernel/zstd_decompress_module.c b/contrib/linux-kernel/zstd_decompress_module.c
index 988fdb5..15005cd 100644
--- a/contrib/linux-kernel/zstd_decompress_module.c
+++ b/contrib/linux-kernel/zstd_decompress_module.c
@@ -1,13 +1,20 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/zstd.h>
 
-#include "zstd.h"
 #include "common/zstd_deps.h"
-#include "common/zstd_errors.h"
 
 /* Common symbols. zstd_compress must depend on zstd_decompress. */
 
@@ -17,7 +24,7 @@
 }
 EXPORT_SYMBOL(zstd_is_error);
 
-int zstd_get_error_code(size_t code)
+zstd_error_code zstd_get_error_code(size_t code)
 {
 	return ZSTD_getErrorCode(code);
 }
@@ -74,19 +81,10 @@
 }
 EXPORT_SYMBOL(zstd_reset_dstream);
 
-size_t zstd_decompress_stream(zstd_dstream *dstream,
-	struct zstd_out_buffer *output, struct zstd_in_buffer *input)
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
 {
-	ZSTD_outBuffer o;
-	ZSTD_inBuffer i;
-	size_t ret;
-
-	ZSTD_memcpy(&o, output, sizeof(o));
-	ZSTD_memcpy(&i, input, sizeof(i));
-	ret = ZSTD_decompressStream(dstream, &o, &i);
-	ZSTD_memcpy(output, &o, sizeof(o));
-	ZSTD_memcpy(input, &i, sizeof(i));
-	return ret;
+	return ZSTD_decompressStream(dstream, output, input);
 }
 EXPORT_SYMBOL(zstd_decompress_stream);
 
@@ -96,27 +94,12 @@
 }
 EXPORT_SYMBOL(zstd_find_frame_compressed_size);
 
-size_t zstd_get_frame_params(struct zstd_frame_params *params, const void *src,
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
 	size_t src_size)
 {
-	ZSTD_frameHeader h;
-	const size_t ret = ZSTD_getFrameHeader(&h, src, src_size);
-
-	if (ret != 0)
-		return ret;
-
-	if (h.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
-		params->frame_content_size = h.frameContentSize;
-	else
-		params->frame_content_size = 0;
-
-	params->window_size = h.windowSize;
-	params->dict_id = h.dictID;
-	params->checksum_flag = h.checksumFlag;
-
-	return ret;
+	return ZSTD_getFrameHeader(header, src, src_size);
 }
-EXPORT_SYMBOL(zstd_get_frame_params);
+EXPORT_SYMBOL(zstd_get_frame_header);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/contrib/linux-kernel/zstd_deps.h b/contrib/linux-kernel/zstd_deps.h
index f9fd4c0..853b724 100644
--- a/contrib/linux-kernel/zstd_deps.h
+++ b/contrib/linux-kernel/zstd_deps.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -71,7 +72,7 @@
 #endif /* ZSTD_DEPS_MATH64 */
 #endif /* ZSTD_DEPS_NEED_MATH64 */
 
-/* 
+/*
  * This is only requested when DEBUGLEVEL >= 1, meaning
  * it is disabled in production.
  * Need:
@@ -88,7 +89,7 @@
 #endif /* ZSTD_DEPS_ASSERT */
 #endif /* ZSTD_DEPS_NEED_ASSERT */
 
-/* 
+/*
  * This is only requested when DEBUGLEVEL >= 2, meaning
  * it is disabled in production.
  * Need:
@@ -105,7 +106,7 @@
 #endif /* ZSTD_DEPS_IO */
 #endif /* ZSTD_DEPS_NEED_IO */
 
-/* 
+/*
  * Only requested when MSAN is enabled.
  * Need:
  * intptr_t
diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile
index 8d2b193..25265e7 100644
--- a/contrib/pzstd/Makefile
+++ b/contrib/pzstd/Makefile
@@ -30,6 +30,9 @@
 CPPFLAGS ?=
 LDFLAGS  ?=
 
+# PZstd uses legacy APIs
+CFLAGS   += -Wno-deprecated-declarations
+
 # Include flags
 PZSTD_INC  = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I.
 GTEST_INC  = -isystem googletest/googletest/include
diff --git a/contrib/pzstd/Options.h b/contrib/pzstd/Options.h
index f4f2aaa..924543a 100644
--- a/contrib/pzstd/Options.h
+++ b/contrib/pzstd/Options.h
@@ -9,6 +9,9 @@
 #pragma once
 
 #define ZSTD_STATIC_LINKING_ONLY
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
+                                         * and uses deprecated functions
+                                         */
 #include "zstd.h"
 #undef ZSTD_STATIC_LINKING_ONLY
 
diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h
index 033adef..c667c88 100644
--- a/contrib/pzstd/Pzstd.h
+++ b/contrib/pzstd/Pzstd.h
@@ -17,6 +17,9 @@
 #include "utils/ThreadPool.h"
 #include "utils/WorkQueue.h"
 #define ZSTD_STATIC_LINKING_ONLY
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
+                                         * and uses deprecated functions
+                                         */
 #include "zstd.h"
 #undef ZSTD_STATIC_LINKING_ONLY
 
diff --git a/contrib/seekable_format/examples/Makefile b/contrib/seekable_format/examples/Makefile
index 543780f..9df6b75 100644
--- a/contrib/seekable_format/examples/Makefile
+++ b/contrib/seekable_format/examples/Makefile
@@ -13,7 +13,7 @@
 ZSTDLIB_NAME = libzstd.a
 ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
 
-CPPFLAGS += -I../ -I../../../lib -I../../../lib/common
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I../ -I../../../lib -I../../../lib/common
 
 CFLAGS ?= -O3
 CFLAGS += -g
diff --git a/contrib/seekable_format/examples/parallel_compression.c b/contrib/seekable_format/examples/parallel_compression.c
index 69644d2..4118b0a 100644
--- a/contrib/seekable_format/examples/parallel_compression.c
+++ b/contrib/seekable_format/examples/parallel_compression.c
@@ -21,7 +21,6 @@
 #  define SLEEP(x) usleep(x * 1000)
 #endif
 
-#define XXH_NAMESPACE ZSTD_
 #include "xxhash.h"
 
 #include "pool.h"      // use zstd thread pool for demo
diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c
index 7050e0f..e9e2013 100644
--- a/contrib/seekable_format/examples/seekable_decompression.c
+++ b/contrib/seekable_format/examples/seekable_decompression.c
@@ -99,6 +99,9 @@
 
     while (startOffset < endOffset) {
         size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+        if (!result) {
+            break;
+        }
 
         if (ZSTD_isError(result)) {
             fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
diff --git a/contrib/seekable_format/examples/seekable_decompression_mem.c b/contrib/seekable_format/examples/seekable_decompression_mem.c
index c36d222..e7b1c65 100644
--- a/contrib/seekable_format/examples/seekable_decompression_mem.c
+++ b/contrib/seekable_format/examples/seekable_decompression_mem.c
@@ -104,6 +104,9 @@
 
     while (startOffset < endOffset) {
         size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+        if (!result) {
+            break;
+        }
 
         if (ZSTD_isError(result)) {
             fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
diff --git a/contrib/seekable_format/tests/Makefile b/contrib/seekable_format/tests/Makefile
index b00657f..d51deb3 100644
--- a/contrib/seekable_format/tests/Makefile
+++ b/contrib/seekable_format/tests/Makefile
@@ -13,16 +13,16 @@
 ZSTDLIB_NAME = libzstd.a
 ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
 
-CPPFLAGS += -I../ -I$(ZSTDLIB_PATH) -I$(ZSTDLIB_PATH)/common
+CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I../ -I$(ZSTDLIB_PATH) -I$(ZSTDLIB_PATH)/common
 
 CFLAGS ?= -O3
-CFLAGS += -g
+CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wconversion \
+          -Wformat=2 -Wstrict-aliasing=1
 
 SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB)
 
 .PHONY: default clean test
-
-default: seekable_tests
+default: test
 
 test: seekable_tests
 	./seekable_tests
@@ -30,9 +30,9 @@
 $(ZSTDLIB):
 	$(MAKE) -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)
 
-seekable_tests : seekable_tests.c $(SEEKABLE_OBJS)
+seekable_tests : $(SEEKABLE_OBJS)
 
 clean:
-	@rm -f core *.o tmp* result* *.zst \
+	@$(RM) core *.o tmp* result* *.zst \
 		seekable_tests
 	@echo Cleaning completed
diff --git a/contrib/seekable_format/tests/seekable_tests.c b/contrib/seekable_format/tests/seekable_tests.c
index f2556b5..a482638 100644
--- a/contrib/seekable_format/tests/seekable_tests.c
+++ b/contrib/seekable_format/tests/seekable_tests.c
@@ -1,6 +1,8 @@
 #include <stddef.h>
 #include <stdint.h>
+#include <stdlib.h>  // malloc
 #include <stdio.h>
+#include <assert.h>
 
 #include "zstd_seekable.h"
 
@@ -8,7 +10,83 @@
 int main(int argc, const char** argv)
 {
     unsigned testNb = 1;
+    (void)argc; (void)argv;
     printf("Beginning zstd seekable format tests...\n");
+
+    printf("Test %u - simple round trip: ", testNb++);
+    {   size_t const inSize = 4000;
+        void* const inBuffer = malloc(inSize);
+        assert(inBuffer != NULL);
+
+        size_t const seekCapacity = 5000;
+        void* const seekBuffer = malloc(seekCapacity);
+        assert(seekBuffer != NULL);
+        size_t seekSize;
+
+        size_t const outCapacity = inSize;
+        void* const outBuffer = malloc(outCapacity);
+        assert(outBuffer != NULL);
+
+        ZSTD_seekable_CStream* const zscs = ZSTD_seekable_createCStream();
+        assert(zscs != NULL);
+
+        { size_t const initStatus = ZSTD_seekable_initCStream(zscs, 9, 0 /* checksumFlag */, (unsigned)inSize /* maxFrameSize */);
+          assert(!ZSTD_isError(initStatus));
+        }
+
+        {   ZSTD_outBuffer outb = { .dst=seekBuffer, .pos=0, .size=seekCapacity };
+            ZSTD_inBuffer inb = { .src=inBuffer, .pos=0, .size=inSize };
+
+            size_t const cStatus = ZSTD_seekable_compressStream(zscs, &outb, &inb);
+            assert(!ZSTD_isError(cStatus));
+            assert(inb.pos == inb.size);
+
+            size_t const endStatus = ZSTD_seekable_endStream(zscs, &outb);
+            assert(!ZSTD_isError(endStatus));
+            seekSize = outb.pos;
+        }
+
+        ZSTD_seekable* const stream = ZSTD_seekable_create();
+        assert(stream != NULL);
+        { size_t const initStatus = ZSTD_seekable_initBuff(stream, seekBuffer, seekSize);
+          assert(!ZSTD_isError(initStatus)); }
+
+        { size_t const decStatus = ZSTD_seekable_decompress(stream, outBuffer, outCapacity, 0);
+          assert(decStatus == inSize); }
+
+        /* unit test ZSTD_seekTable functions */
+        ZSTD_seekTable* const zst = ZSTD_seekTable_create_fromSeekable(stream);
+        assert(zst != NULL);
+
+        unsigned const nbFrames = ZSTD_seekTable_getNumFrames(zst);
+        assert(nbFrames > 0);
+
+        unsigned long long const frame0Offset = ZSTD_seekTable_getFrameCompressedOffset(zst, 0);
+        assert(frame0Offset == 0);
+
+        unsigned long long const content0Offset = ZSTD_seekTable_getFrameDecompressedOffset(zst, 0);
+        assert(content0Offset == 0);
+
+        size_t const cSize = ZSTD_seekTable_getFrameCompressedSize(zst, 0);
+        assert(!ZSTD_isError(cSize));
+        assert(cSize <= seekCapacity);
+
+        size_t const origSize = ZSTD_seekTable_getFrameDecompressedSize(zst, 0);
+        assert(origSize == inSize);
+
+        unsigned const fo1idx = ZSTD_seekTable_offsetToFrameIndex(zst, 1);
+        assert(fo1idx == 0);
+
+        free(inBuffer);
+        free(seekBuffer);
+        free(outBuffer);
+        ZSTD_seekable_freeCStream(zscs);
+        ZSTD_seekTable_free(zst);
+        ZSTD_seekable_free(stream);
+    }
+    printf("Success!\n");
+
+
     printf("Test %u - check that seekable decompress does not hang: ", testNb++);
     {   /* Github issue #2335 */
         const size_t compressed_size = 17;
@@ -25,7 +103,7 @@
             '\x00',
             '\x00',
             '\x00',
-            ';',
+            (uint8_t)('\x03'),
             (uint8_t)('\xb1'),
             (uint8_t)('\xea'),
             (uint8_t)('\x92'),
@@ -34,6 +112,61 @@
         const size_t uncompressed_size = 32;
         uint8_t uncompressed_data[32];
 
+        ZSTD_seekable* const stream = ZSTD_seekable_create();
+        assert(stream != NULL);
+        {   size_t const status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
+            if (ZSTD_isError(status)) {
+                ZSTD_seekable_free(stream);
+                goto _test_error;
+        }   }
+
+        /* Should return an error, but not hang */
+        {   const size_t offset = 2;
+            size_t const status = ZSTD_seekable_decompress(stream, uncompressed_data, uncompressed_size, offset);
+            if (!ZSTD_isError(status)) {
+                ZSTD_seekable_free(stream);
+                goto _test_error;
+        }   }
+
+        ZSTD_seekable_free(stream);
+    }
+    printf("Success!\n");
+
+    printf("Test %u - check #2 that seekable decompress does not hang: ", testNb++);
+    {   /* Github issue #FIXME */
+        const size_t compressed_size = 27;
+        const uint8_t compressed_data[27] = {
+            (uint8_t)'\x28',
+            (uint8_t)'\xb5',
+            (uint8_t)'\x2f',
+            (uint8_t)'\xfd',
+            (uint8_t)'\x00',
+            (uint8_t)'\x32',
+            (uint8_t)'\x91',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x5e',
+            (uint8_t)'\x2a',
+            (uint8_t)'\x4d',
+            (uint8_t)'\x18',
+            (uint8_t)'\x09',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\x00',
+            (uint8_t)'\xb1',
+            (uint8_t)'\xea',
+            (uint8_t)'\x92',
+            (uint8_t)'\x8f',
+        };
+        const size_t uncompressed_size = 400;
+        uint8_t uncompressed_data[400];
+
         ZSTD_seekable* stream = ZSTD_seekable_create();
         size_t status = ZSTD_seekable_initBuff(stream, compressed_data, compressed_size);
         if (ZSTD_isError(status)) {
diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h
index 7ffd1ba..d2807cf 100644
--- a/contrib/seekable_format/zstd_seekable.h
+++ b/contrib/seekable_format/zstd_seekable.h
@@ -29,6 +29,7 @@
 
 typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
 typedef struct ZSTD_seekable_s ZSTD_seekable;
+typedef struct ZSTD_seekTable_s ZSTD_seekTable;
 
 /*-****************************************************************************
 *  Seekable compression - HowTo
@@ -107,6 +108,7 @@
 ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
 ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
 
+
 /*-****************************************************************************
 *  Seekable decompression - HowTo
 *  A ZSTD_seekable object is required to tracking the seekTable.
@@ -161,13 +163,42 @@
 ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
 
 #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
-/*===== Seek Table access functions =====*/
-ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset);
+/*===== Seekable seek table access functions =====*/
+ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs);
+ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex);
+ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long offset);
+
+
+/*-****************************************************************************
+*  Direct exploitation of the seekTable
+*
+*  Memory constrained use cases that manage multiple archives
+*  benefit from retaining multiple archive seek tables
+*  without retaining a ZSTD_seekable instance for each.
+*
+*  Below API allow the above-mentioned use cases
+*  to initialize a ZSTD_seekable, extract its (smaller) ZSTD_seekTable,
+*  then throw the ZSTD_seekable away to save memory.
+*
+*  Standard ZSTD operations can then be used
+*  to decompress frames based on seek table offsets.
+******************************************************************************/
+
+/*===== Independent seek table management =====*/
+ZSTDLIB_API ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs);
+ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st);
+
+/*===== Direct seek table access functions =====*/
+ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st);
+ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex);
+ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long offset);
+
 
 /*===== Seekable advanced I/O API =====*/
 typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c
index 3db2918..242bd2a 100644
--- a/contrib/seekable_format/zstdseek_compress.c
+++ b/contrib/seekable_format/zstdseek_compress.c
@@ -12,13 +12,13 @@
 #include <assert.h>
 
 #define XXH_STATIC_LINKING_ONLY
-#define XXH_NAMESPACE ZSTD_
 #include "xxhash.h"
 
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 #include "zstd_errors.h"
 #include "mem.h"
+
 #include "zstd_seekable.h"
 
 #define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }
@@ -63,19 +63,18 @@
     int writingSeekTable;
 };
 
-size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
+static size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
 {
     /* allocate some initial space */
     size_t const FRAMELOG_STARTING_CAPACITY = 16;
     fl->entries = (framelogEntry_t*)malloc(
             sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
     if (fl->entries == NULL) return ERROR(memory_allocation);
-    fl->capacity = FRAMELOG_STARTING_CAPACITY;
-
+    fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY;
     return 0;
 }
 
-size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
+static size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
 {
     if (fl != NULL) free(fl->entries);
     return 0;
@@ -83,7 +82,7 @@
 
 ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag)
 {
-    ZSTD_frameLog* fl = malloc(sizeof(ZSTD_frameLog));
+    ZSTD_frameLog* const fl = (ZSTD_frameLog*)malloc(sizeof(ZSTD_frameLog));
     if (fl == NULL) return NULL;
 
     if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) {
@@ -106,10 +105,9 @@
     return 0;
 }
 
-ZSTD_seekable_CStream* ZSTD_seekable_createCStream()
+ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void)
 {
-    ZSTD_seekable_CStream* zcs = malloc(sizeof(ZSTD_seekable_CStream));
-
+    ZSTD_seekable_CStream* const zcs = (ZSTD_seekable_CStream*)malloc(sizeof(ZSTD_seekable_CStream));
     if (zcs == NULL) return NULL;
 
     memset(zcs, 0, sizeof(*zcs));
@@ -134,7 +132,6 @@
     ZSTD_freeCStream(zcs->cstream);
     ZSTD_seekable_frameLog_freeVec(&zcs->framelog);
     free(zcs);
-
     return 0;
 }
 
@@ -152,9 +149,8 @@
         return ERROR(frameParameter_unsupported);
     }
 
-    zcs->maxFrameSize = maxFrameSize
-                                ? maxFrameSize
-                                : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
+    zcs->maxFrameSize = maxFrameSize ?
+                        maxFrameSize : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
 
     zcs->framelog.checksumFlag = checksumFlag;
     if (zcs->framelog.checksumFlag) {
@@ -180,7 +176,7 @@
     if (fl->size == fl->capacity) {
         /* exponential size increase for constant amortized runtime */
         size_t const newCapacity = fl->capacity * 2;
-        framelogEntry_t* const newEntries = realloc(fl->entries,
+        framelogEntry_t* const newEntries = (framelogEntry_t*)realloc(fl->entries,
                 sizeof(framelogEntry_t) * newCapacity);
 
         if (newEntries == NULL) return ERROR(memory_allocation);
@@ -204,7 +200,7 @@
     /* end the frame */
     size_t ret = ZSTD_endStream(zcs->cstream, output);
 
-    zcs->frameCSize += output->pos - prevOutPos;
+    zcs->frameCSize += (U32)(output->pos - prevOutPos);
 
     /* need to flush before doing the rest */
     if (ret) return ret;
@@ -224,8 +220,7 @@
     zcs->frameDSize = 0;
 
     ZSTD_CCtx_reset(zcs->cstream, ZSTD_reset_session_only);
-    if (zcs->framelog.checksumFlag)
-        XXH64_reset(&zcs->xxhState, 0);
+    if (zcs->framelog.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
 
     return 0;
 }
@@ -248,8 +243,8 @@
             XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
         }
 
-        zcs->frameCSize += output->pos - prevOutPos;
-        zcs->frameDSize += inTmp.pos;
+        zcs->frameCSize += (U32)(output->pos - prevOutPos);
+        zcs->frameDSize += (U32)inTmp.pos;
 
         input->pos += inTmp.pos;
 
@@ -290,7 +285,7 @@
         memcpy((BYTE*)output->dst + output->pos,
                tmp + (fl->seekTablePos - offset), lenWrite);
         output->pos += lenWrite;
-        fl->seekTablePos += lenWrite;
+        fl->seekTablePos += (U32)lenWrite;
 
         if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos;
     }
@@ -339,8 +334,7 @@
 
     if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
     if (fl->seekTablePos < seekTableLen - 4) {
-        BYTE sfd = 0;
-        sfd |= (fl->checksumFlag) << 7;
+        BYTE const sfd = (BYTE)((fl->checksumFlag) << 7);
 
         ((BYTE*)output->dst)[output->pos] = sfd;
         output->pos++;
diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c
index 4f76158..5eed024 100644
--- a/contrib/seekable_format/zstdseek_decompress.c
+++ b/contrib/seekable_format/zstdseek_decompress.c
@@ -60,7 +60,6 @@
 #include <assert.h>
 
 #define XXH_STATIC_LINKING_ONLY
-#define XXH_NAMESPACE ZSTD_
 #include "xxhash.h"
 
 #define ZSTD_STATIC_LINKING_ONLY
@@ -107,7 +106,8 @@
 
 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
 {
-    buffWrapper_t* buff = (buffWrapper_t*) opaque;
+    buffWrapper_t* const buff = (buffWrapper_t*)opaque;
+    assert(buff != NULL);
     if (buff->pos + n > buff->size) return -1;
     memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
     buff->pos += n;
@@ -118,15 +118,17 @@
 {
     buffWrapper_t* const buff = (buffWrapper_t*) opaque;
     unsigned long long newOffset;
+    assert(buff != NULL);
     switch (origin) {
     case SEEK_SET:
-        newOffset = offset;
+        assert(offset >= 0);
+        newOffset = (unsigned long long)offset;
         break;
     case SEEK_CUR:
-        newOffset = (unsigned long long)buff->pos + offset;
+        newOffset = (unsigned long long)((long long)buff->pos + offset);
         break;
     case SEEK_END:
-        newOffset = (unsigned long long)buff->size + offset;
+        newOffset = (unsigned long long)((long long)buff->size + offset);
         break;
     default:
         assert(0);  /* not possible */
@@ -144,18 +146,18 @@
     U32 checksum;
 } seekEntry_t;
 
-typedef struct {
+struct ZSTD_seekTable_s {
     seekEntry_t* entries;
     size_t tableLen;
 
     int checksumFlag;
-} seekTable_t;
+};
 
 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
 
 struct ZSTD_seekable_s {
     ZSTD_DStream* dstream;
-    seekTable_t seekTable;
+    ZSTD_seekTable seekTable;
     ZSTD_seekable_customFile src;
 
     U64 decompressedOffset;
@@ -173,8 +175,7 @@
 
 ZSTD_seekable* ZSTD_seekable_create(void)
 {
-    ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
-
+    ZSTD_seekable* const zs = (ZSTD_seekable*)malloc(sizeof(ZSTD_seekable));
     if (zs == NULL) return NULL;
 
     /* also initializes stage to zsds_init */
@@ -195,7 +196,35 @@
     ZSTD_freeDStream(zs->dstream);
     free(zs->seekTable.entries);
     free(zs);
+    return 0;
+}
 
+ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs)
+{
+    ZSTD_seekTable* const st = (ZSTD_seekTable*)malloc(sizeof(ZSTD_seekTable));
+    if (st==NULL) return NULL;
+
+    st->checksumFlag = zs->seekTable.checksumFlag;
+    st->tableLen = zs->seekTable.tableLen;
+
+    /* Allocate an extra entry at the end to match logic of initial allocation */
+    size_t const entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1);
+    seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize);
+    if (entries==NULL) {
+        free(st);
+        return NULL;
+    }
+
+    memcpy(entries, zs->seekTable.entries, entriesSize);
+    st->entries = entries;
+    return st;
+}
+
+size_t ZSTD_seekTable_free(ZSTD_seekTable* st)
+{
+    if (st == NULL) return 0; /* support free on null */
+    free(st->entries);
+    free(st);
     return 0;
 }
 
@@ -203,19 +232,24 @@
  *  Performs a binary search to find the last frame with a decompressed offset
  *  <= pos
  *  @return : the frame's index */
-unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
+unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long pos)
+{
+    return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos);
+}
+
+unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long pos)
 {
     U32 lo = 0;
-    U32 hi = (U32)zs->seekTable.tableLen;
-    assert(zs->seekTable.tableLen <= UINT_MAX);
+    U32 hi = (U32)st->tableLen;
+    assert(st->tableLen <= UINT_MAX);
 
-    if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
-        return (U32)zs->seekTable.tableLen;
+    if (pos >= st->entries[st->tableLen].dOffset) {
+        return (unsigned)st->tableLen;
     }
 
     while (lo + 1 < hi) {
         U32 const mid = lo + ((hi - lo) >> 1);
-        if (zs->seekTable.entries[mid].dOffset <= pos) {
+        if (st->entries[mid].dOffset <= pos) {
             lo = mid;
         } else {
             hi = mid;
@@ -224,36 +258,61 @@
     return lo;
 }
 
-unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
+unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs)
 {
-    assert(zs->seekTable.tableLen <= UINT_MAX);
-    return (unsigned)zs->seekTable.tableLen;
+    return ZSTD_seekTable_getNumFrames(&zs->seekTable);
 }
 
-unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st)
 {
-    if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
-    return zs->seekTable.entries[frameIndex].cOffset;
+    assert(st->tableLen <= UINT_MAX);
+    return (unsigned)st->tableLen;
 }
 
-unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex)
 {
-    if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
-    return zs->seekTable.entries[frameIndex].dOffset;
+    return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex);
 }
 
-size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex)
 {
-    if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
-    return zs->seekTable.entries[frameIndex + 1].cOffset -
-           zs->seekTable.entries[frameIndex].cOffset;
+    if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
+    return st->entries[frameIndex].cOffset;
 }
 
-size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
+unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex)
 {
-    if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
-    return zs->seekTable.entries[frameIndex + 1].dOffset -
-           zs->seekTable.entries[frameIndex].dOffset;
+    return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex);
+}
+
+unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex)
+{
+    if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
+    return st->entries[frameIndex].dOffset;
+}
+
+size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex)
+{
+    return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex);
+}
+
+size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex)
+{
+    if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge);
+    return st->entries[frameIndex + 1].cOffset -
+           st->entries[frameIndex].cOffset;
+}
+
+size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex)
+{
+    return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex);
+}
+
+size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex)
+{
+    if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge);
+    return st->entries[frameIndex + 1].dOffset -
+           st->entries[frameIndex].dOffset;
 }
 
 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
@@ -272,10 +331,9 @@
         checksumFlag = sfd >> 7;
 
         /* check reserved bits */
-        if ((checksumFlag >> 2) & 0x1f) {
+        if ((sfd >> 2) & 0x1f) {
             return ERROR(corruption_detected);
-        }
-    }
+    }   }
 
     {   U32 const numFrames = MEM_readLE32(zs->inBuff);
         U32 const sizePerEntry = 8 + (checksumFlag?4:0);
@@ -283,12 +341,9 @@
         U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
 
         U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
-        {
-            U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
-
+        {   U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
             CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
             CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
-
             remaining -= toRead;
         }
 
@@ -301,19 +356,15 @@
 
         {   /* Allocate an extra entry at the end so that we can do size
              * computations on the last element without special case */
-            seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
+            seekEntry_t* const entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
 
             U32 idx = 0;
             U32 pos = 8;
 
-
             U64 cOffset = 0;
             U64 dOffset = 0;
 
-            if (!entries) {
-                free(entries);
-                return ERROR(memory_allocation);
-            }
+            if (entries == NULL) return ERROR(memory_allocation);
 
             /* compute cumulative positions */
             for (; idx < numFrames; idx++) {
@@ -381,26 +432,37 @@
 
 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
 {
+    unsigned long long const eos = zs->seekTable.entries[zs->seekTable.tableLen].dOffset;
+    if (offset + len > eos) {
+        len = eos - offset;
+    }
+
     U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
     U32 noOutputProgressCount = 0;
+    size_t srcBytesRead = 0;
     do {
         /* check if we can continue from a previous decompress job */
         if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
             zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
             zs->curFrame = targetFrame;
 
+            assert(zs->seekTable.entries[targetFrame].cOffset < LLONG_MAX);
             CHECK_IO(zs->src.seek(zs->src.opaque,
-                                  zs->seekTable.entries[targetFrame].cOffset,
+                                  (long long)zs->seekTable.entries[targetFrame].cOffset,
                                   SEEK_SET));
             zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
             XXH64_reset(&zs->xxhState, 0);
             ZSTD_DCtx_reset(zs->dstream, ZSTD_reset_session_only);
+            if (zs->buffWrapper.size && srcBytesRead > zs->buffWrapper.size) {
+                return ERROR(seekableIO);
+            }
         }
 
         while (zs->decompressedOffset < offset + len) {
             size_t toRead;
             ZSTD_outBuffer outTmp;
             size_t prevOutPos;
+            size_t prevInPos;
             size_t forwardProgress;
             if (zs->decompressedOffset < offset) {
                 /* dummy decompressions until we get to the target offset */
@@ -410,6 +472,7 @@
             }
 
             prevOutPos = outTmp.pos;
+            prevInPos = zs->in.pos;
             toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
             if (ZSTD_isError(toRead)) {
                 return toRead;
@@ -428,6 +491,7 @@
                 noOutputProgressCount = 0;
             }
             zs->decompressedOffset += forwardProgress;
+            srcBytesRead += zs->in.pos - prevInPos;
 
             if (toRead == 0) {
                 /* frame complete */
@@ -442,6 +506,8 @@
                 if (zs->decompressedOffset < offset + len) {
                     /* go back to the start and force a reset of the stream */
                     targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
+                    /* in this case it will fail later with corruption_detected, since last block does not have checksum */
+                    assert(targetFrame != zs->seekTable.tableLen);
                 }
                 break;
             }
@@ -453,7 +519,7 @@
                 zs->in.size = toRead;
                 zs->in.pos = 0;
             }
-        }
+        }  /* while (zs->decompressedOffset < offset + len) */
     } while (zs->decompressedOffset != offset + len);
 
     return len;
@@ -465,8 +531,7 @@
         return ERROR(frameIndex_tooLarge);
     }
 
-    {
-        size_t const decompressedSize =
+    {   size_t const decompressedSize =
                 zs->seekTable.entries[frameIndex + 1].dOffset -
                 zs->seekTable.entries[frameIndex].dOffset;
         if (dstSize < decompressedSize) {
diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile
index b57b608..a9c601e 100644
--- a/doc/educational_decoder/Makefile
+++ b/doc/educational_decoder/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c
index 0bf7579..935f60d 100644
--- a/doc/educational_decoder/harness.c
+++ b/doc/educational_decoder/harness.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index b4b34b5..62e6f0d 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h
index eeeb61b..d89c835 100644
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 450f19b..010f10a 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.9 Manual</title>
+<title>zstd 1.5.0 Manual</title>
 </head>
 <body>
-<h1>zstd 1.4.9 Manual</h1>
+<h1>zstd 1.5.0 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
@@ -12,15 +12,15 @@
 <li><a href="#Chapter2">Version</a></li>
 <li><a href="#Chapter3">Simple API</a></li>
 <li><a href="#Chapter4">Explicit context</a></li>
-<li><a href="#Chapter5">Advanced compression API</a></li>
-<li><a href="#Chapter6">Advanced decompression API</a></li>
+<li><a href="#Chapter5">Advanced compression API (Requires v1.4.0+)</a></li>
+<li><a href="#Chapter6">Advanced decompression API (Requires v1.4.0+)</a></li>
 <li><a href="#Chapter7">Streaming</a></li>
 <li><a href="#Chapter8">Streaming compression - HowTo</a></li>
 <li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
 <li><a href="#Chapter10">Simple dictionary API</a></li>
 <li><a href="#Chapter11">Bulk processing dictionary API</a></li>
 <li><a href="#Chapter12">Dictionary helper functions</a></li>
-<li><a href="#Chapter13">Advanced dictionary and prefix API</a></li>
+<li><a href="#Chapter13">Advanced dictionary and prefix API (Requires v1.4.0+)</a></li>
 <li><a href="#Chapter14">experimental API (static linking only)</a></li>
 <li><a href="#Chapter15">Frame size functions</a></li>
 <li><a href="#Chapter16">Memory management</a></li>
@@ -141,8 +141,9 @@
 size_t      ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case single-pass scenario */<b>
 unsigned    ZSTD_isError(size_t code);          </b>/*!< tells if a `size_t` function result is an error code */<b>
 const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable string from an error code */<b>
-int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed */<b>
+int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed, requires v1.4.0+ */<b>
 int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
+int         ZSTD_defaultCLevel(void);           </b>/*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */<b>
 </pre></b><BR>
 <a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>
 
@@ -157,7 +158,7 @@
  
 </pre><b><pre>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTD_CCtx* ZSTD_createCCtx(void);
-size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
                          void* dst, size_t dstCapacity,
@@ -179,7 +180,7 @@
   Use one context per thread for parallel execution. 
 </pre><b><pre>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
-size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
                            void* dst, size_t dstCapacity,
@@ -190,7 +191,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter5"></a><h2>Advanced compression API</h2><pre></pre>
+<a name="Chapter5"></a><h2>Advanced compression API (Requires v1.4.0+)</h2><pre></pre>
 
 <pre><b>typedef enum { ZSTD_fast=1,
                ZSTD_dfast=2,
@@ -270,7 +271,6 @@
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
-
     </b>/* LDM mode parameters */<b>
     ZSTD_c_enableLongDistanceMatching=160, </b>/* Enable long distance matching.<b>
                                      * This parameter is designed to improve compression ratio
@@ -327,7 +327,7 @@
     ZSTD_c_jobSize=401,      </b>/* Size of a compression job. This value is enforced only when nbWorkers >= 1.<b>
                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                               * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
                               * The minimum size is automatically and transparently enforced. */
     ZSTD_c_overlapLog=402,   </b>/* Control the overlap size, as a fraction of window size.<b>
                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
@@ -357,6 +357,8 @@
      * ZSTD_c_stableOutBuffer
      * ZSTD_c_blockDelimiters
      * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -372,7 +374,10 @@
      ZSTD_c_experimentalParam9=1006,
      ZSTD_c_experimentalParam10=1007,
      ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
 } ZSTD_cParameter;
 </b></pre><BR>
 <pre><b>typedef struct {
@@ -456,7 +461,7 @@
  
 </p></pre><BR>
 
-<a name="Chapter6"></a><h2>Advanced decompression API</h2><pre></pre>
+<a name="Chapter6"></a><h2>Advanced decompression API (Requires v1.4.0+)</h2><pre></pre>
 
 <pre><b>typedef enum {
 
@@ -587,7 +592,7 @@
 <pre><b>typedef ZSTD_CCtx ZSTD_CStream;  </b>/**< CCtx and CStream are now effectively same object (>= v1.3.0) */<b>
 </b></pre><BR>
 <h3>ZSTD_CStream management functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream(void);
-size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <h3>Streaming compression functions</h3><pre></pre><b><pre>typedef enum {
     ZSTD_e_continue=0, </b>/* collect more data, encoder decides when to output compressed result, for optimal compression ratio */<b>
@@ -681,7 +686,7 @@
 <pre><b>typedef ZSTD_DCtx ZSTD_DStream;  </b>/**< DCtx and DStream are now effectively same object (>= v1.3.0) */<b>
 </b></pre><BR>
 <h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
-size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+size_t ZSTD_freeDStream(ZSTD_DStream* zds);  </b>/* accept NULL pointer */<b>
 </pre></b><BR>
 <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 <pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
@@ -697,7 +702,7 @@
                                int compressionLevel);
 </b><p>  Compression at an explicit compression level using a Dictionary.
   A dictionary can be any arbitrary data segment (also called a prefix),
-  or a buffer with specified information (see dictBuilder/zdict.h).
+  or a buffer with specified information (see zdict.h).
   Note : This function loads the dictionary, resulting in significant startup delay.
          It's intended for a dictionary used only once.
   Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. 
@@ -732,7 +737,8 @@
 </p></pre><BR>
 
 <pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
-</b><p>  Function frees memory allocated by ZSTD_createCDict(). 
+</b><p>  Function frees memory allocated by ZSTD_createCDict().
+  If a NULL pointer is passed, no operation is performed. 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
@@ -751,7 +757,8 @@
 </p></pre><BR>
 
 <pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
-</b><p>  Function frees memory allocated with ZSTD_createDDict() 
+</b><p>  Function frees memory allocated with ZSTD_createDDict()
+  If a NULL pointer is passed, no operation is performed. 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
@@ -770,6 +777,12 @@
   It can still be loaded, but as a content-only dictionary. 
 </p></pre><BR>
 
+<pre><b>unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+</b><p>  Provides the dictID of the dictionary loaded into `cdict`.
+  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
+</p></pre><BR>
+
 <pre><b>unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 </b><p>  Provides the dictID of the dictionary loaded into `ddict`.
   If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
@@ -788,7 +801,7 @@
   When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
 </p></pre><BR>
 
-<a name="Chapter13"></a><h2>Advanced dictionary and prefix API</h2><pre>
+<a name="Chapter13"></a><h2>Advanced dictionary and prefix API (Requires v1.4.0+)</h2><pre>
  This API allows dictionaries to be used with ZSTD_compress2(),
  ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
  only reset with the context is reset with ZSTD_reset_parameters or
@@ -1058,6 +1071,12 @@
   ZSTD_lcm_uncompressed = 2   </b>/**< Always emit uncompressed literals. */<b>
 } ZSTD_literalCompressionMode_e;
 </b></pre><BR>
+<pre><b>typedef enum {
+  ZSTD_urm_auto = 0,                   </b>/* Automatically determine whether or not we use row matchfinder */<b>
+  ZSTD_urm_disableRowMatchFinder = 1,  </b>/* Never use row matchfinder */<b>
+  ZSTD_urm_enableRowMatchFinder = 2    </b>/* Always use row matchfinder when applicable */<b>
+} ZSTD_useRowMatchFinderMode_e;
+</b></pre><BR>
 <a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
 
 <pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
@@ -1294,12 +1313,6 @@
   note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef 
 </p></pre><BR>
 
-<pre><b>unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
-</b><p>  Provides the dictID of the dictionary loaded into `cdict`.
-  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
-  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
-</p></pre><BR>
-
 <pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 </b><p> @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  `estimatedSrcSize` value is optional, select 0 if not known 
@@ -1323,24 +1336,26 @@
   This function never fails (wide contract) 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize,
                         const void* dict,size_t dictSize,
                               ZSTD_parameters params);
 </b><p>  Note : this function is now DEPRECATED.
          It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
-  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x 
+  This prototype will generate compilation warnings. 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                   void* dst, size_t dstCapacity,
                             const void* src, size_t srcSize,
                             const ZSTD_CDict* cdict,
                                   ZSTD_frameParameters fParams);
-</b><p>  Note : this function is now REDUNDANT.
+</b><p>  Note : this function is now DEPRECATED.
          It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
-  This prototype will be marked as deprecated and generate compilation warning in some future version 
+  This prototype will generate compilation warnings. 
 </p></pre><BR>
 
 <pre><b>size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
@@ -1367,7 +1382,7 @@
 </p></pre><BR>
 
 <pre><b>ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  </b>/* accept NULL pointer */<b>
 </b><p>  Quick howto :
   - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
   - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
@@ -1379,7 +1394,7 @@
                                     These parameters will be applied to
                                     all subsequent frames.
   - ZSTD_compressStream2() : Do compression using the CCtx.
-  - ZSTD_freeCCtxParams() : Free the memory.
+  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
 
   This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
   for static allocation of CCtx for single-threaded compression.
@@ -1493,8 +1508,10 @@
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
-</b><p>  Instruct the decoder context about what kind of data to decode next.
+<pre><b>ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+</b><p>  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+  Instruct the decoder context about what kind of data to decode next.
   This instruction is mandatory to decode data without a fully-formed header,
   such ZSTD_f_zstd1_magicless for example.
  @return : 0, or an error code (which can be tested using ZSTD_isError()). 
@@ -1517,11 +1534,11 @@
 <BR></pre>
 
 <h3>Advanced Streaming compression functions</h3><pre></pre><b><pre></pre></b><BR>
-<pre><b>size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
              int compressionLevel,
              unsigned long long pledgedSrcSize);
-</b><p> This function is deprecated, and equivalent to:
+</b><p> This function is DEPRECATED, and equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@@ -1530,15 +1547,15 @@
  pledgedSrcSize must be correct. If it is not known at init time, use
  ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
  "0" also disables frame content size field. It may be enabled in the future.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
          const void* dict, size_t dictSize,
                int compressionLevel);
-</b><p> This function is deprecated, and is equivalent to:
+</b><p> This function is DEPRECATED, and is equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
      ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
@@ -1547,16 +1564,16 @@
  dict == NULL or dictSize < 8, in which case no dict is used.
  Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
  it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
         const void* dict, size_t dictSize,
               ZSTD_parameters params,
               unsigned long long pledgedSrcSize);
-</b><p> This function is deprecated, and is approximately equivalent to:
+</b><p> This function is DEPRECATED, and is approximately equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      // Pseudocode: Set each zstd parameter and leave the rest as-is.
      for ((param, value) : params) {
@@ -1568,22 +1585,23 @@
  dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
  pledgedSrcSize must be correct.
  If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
-</b><p> This function is deprecated, and equivalent to:
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+</b><p> This function is DEPRECATED, and equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_refCDict(zcs, cdict);
-
+ 
  note : cdict will just be referenced, and must outlive compression session
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                    const ZSTD_CDict* cdict,
                          ZSTD_frameParameters fParams,
                          unsigned long long pledgedSrcSize);
@@ -1599,14 +1617,18 @@
  same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
  pledgedSrcSize must be correct. If srcSize is not known at init time, use
  value ZSTD_CONTENTSIZE_UNKNOWN.
- Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
-</b><p> This function is deprecated, and is equivalent to:
+<pre><b>ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+</b><p> This function is DEPRECATED, and is equivalent to:
      ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
      ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+       explicitly specified.
 
   start a new frame, using same parameters from previous frame.
   This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
@@ -1616,7 +1638,7 @@
   For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
   but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  @return : 0, or an error code (which can be tested using ZSTD_isError())
-  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+  This prototype will generate compilation warnings.
  
 </p></pre><BR>
 
@@ -1687,8 +1709,7 @@
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
 
   Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
   It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
 
   Then, consume your input using ZSTD_compressContinue().
@@ -1712,11 +1733,11 @@
 
 <h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
-size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </pre></b><BR>
+<pre><b>size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
+</b></pre><BR>
 <a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
diff --git a/examples/Makefile b/examples/Makefile
index 1787bf2..8d7361d 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/common.h b/examples/common.h
index a3f743b..181f6b3 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c
index 5599770..0eee650 100644
--- a/examples/dictionary_compression.c
+++ b/examples/dictionary_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c
index 6bf7705..107cfc1 100644
--- a/examples/dictionary_decompression.c
+++ b/examples/dictionary_decompression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c
index e03ce86..5d2a28f 100644
--- a/examples/multiple_simple_compression.c
+++ b/examples/multiple_simple_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c
index 5a92a31..d4efc8e 100644
--- a/examples/multiple_streaming_compression.c
+++ b/examples/multiple_streaming_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/simple_compression.c b/examples/simple_compression.c
index 3d6c3ae..27a65b1 100644
--- a/examples/simple_compression.c
+++ b/examples/simple_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c
index 4a179d1..59c1fd4 100644
--- a/examples/simple_decompression.c
+++ b/examples/simple_decompression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c
index 6a039dc..e20bcde 100644
--- a/examples/streaming_compression.c
+++ b/examples/streaming_compression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_compression_thread_pool.c b/examples/streaming_compression_thread_pool.c
index 7489eae..5a6551b 100644
--- a/examples/streaming_compression_thread_pool.c
+++ b/examples/streaming_compression_thread_pool.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Martin Liska, SUSE, Facebook, Inc.
+ * Copyright (c) Martin Liska, SUSE, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c
index 3f8e460..6dc4c22 100644
--- a/examples/streaming_decompression.c
+++ b/examples/streaming_decompression.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c
index 8c1e9be..a5219ef 100644
--- a/examples/streaming_memory_usage.c
+++ b/examples/streaming_memory_usage.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/BUCK b/lib/BUCK
index 637c20d..60c6bbb 100644
--- a/lib/BUCK
+++ b/lib/BUCK
@@ -65,9 +65,7 @@
     name='zdict',
     header_namespace='',
     visibility=['PUBLIC'],
-    exported_headers=subdir_glob([
-        ('dictBuilder', 'zdict.h'),
-    ]),
+    exported_headers=['zdict.h'],
     headers=subdir_glob([
         ('dictBuilder', 'divsufsort.h'),
         ('dictBuilder', 'cover.h'),
@@ -131,10 +129,10 @@
     name='errors',
     header_namespace='',
     visibility=['PUBLIC'],
-    exported_headers=subdir_glob([
-        ('common', 'error_private.h'),
-        ('common', 'zstd_errors.h'),
-    ]),
+    exported_headers=[
+        'zstd_errors.h',
+        'common/error_private.h',
+    ]
     srcs=['common/error_private.c'],
 )
 
diff --git a/lib/Makefile b/lib/Makefile
index 39ff304..9109476 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,9 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################
 
+# Note: by default, the static library is built single-threaded and dynamic library is built
+# multi-threaded. It is possible to force multi or single threaded builds by appending
+# -mt or -nomt to the build target (like lib-mt for multi-threaded, lib-nomt for single-threaded).
 .PHONY: default
 default: lib-release
 
@@ -68,6 +71,10 @@
 CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
 FLAGS     = $(CPPFLAGS) $(CFLAGS)
 
+CPPFLAGS_DYNLIB  = -DZSTD_MULTITHREAD # dynamic library build defaults to multi-threaded
+LDFLAGS_DYNLIB   = -pthread
+CPPFLAGS_STATLIB =                    # static library build defaults to single-threaded
+
 HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 GREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
@@ -91,7 +98,7 @@
 ZSTD_LIB_COMPRESSION ?= 1
 ZSTD_LIB_DECOMPRESSION ?= 1
 ZSTD_LIB_DICTBUILDER ?= 1
-ZSTD_LIB_DEPRECATED ?= 1
+ZSTD_LIB_DEPRECATED ?= 0
 
 # Legacy support
 ZSTD_LEGACY_SUPPORT ?= 5
@@ -176,7 +183,9 @@
 
 ifndef BUILD_DIR
 ifeq ($(UNAME), Darwin)
-  HASH ?= md5
+  ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
+    HASH ?= md5
+  endif
 else ifeq ($(UNAME), FreeBSD)
   HASH ?= gmd5sum
 else ifeq ($(UNAME), NetBSD)
@@ -222,6 +231,7 @@
 
 
 .PHONY: libzstd.a  # must be run every time
+libzstd.a: CPPFLAGS += $(CPPFLAGS_STATLIB)
 
 ifndef BUILD_DIR
 # determine BUILD_DIR from compilation flags
@@ -238,7 +248,10 @@
 $(ZSTD_STATLIB): ARFLAGS = rcs
 $(ZSTD_STATLIB): | $(ZSTD_STATLIB_DIR)
 $(ZSTD_STATLIB): $(ZSTD_STATLIB_OBJ)
-	@echo compiling static library
+  # Check for multithread flag at target execution time
+	$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
+    @echo compiling multi-threaded static library $(LIBVER),\
+    @echo compiling single-threaded static library $(LIBVER))
 	$(AR) $(ARFLAGS) $@ $^
 
 libzstd.a: $(ZSTD_STATLIB)
@@ -257,8 +270,9 @@
 
 LIBZSTD = libzstd.$(SHARED_EXT_VER)
 .PHONY: $(LIBZSTD)  # must be run every time
-$(LIBZSTD): CFLAGS += -fPIC -fvisibility=hidden
-$(LIBZSTD): LDFLAGS += -shared
+$(LIBZSTD): CPPFLAGS += $(CPPFLAGS_DYNLIB)
+$(LIBZSTD): CFLAGS   += -fPIC -fvisibility=hidden
+$(LIBZSTD): LDFLAGS  += -shared $(LDFLAGS_DYNLIB)
 
 ifndef BUILD_DIR
 # determine BUILD_DIR from compilation flags
@@ -275,7 +289,10 @@
 
 $(ZSTD_DYNLIB): | $(ZSTD_DYNLIB_DIR)
 $(ZSTD_DYNLIB): $(ZSTD_DYNLIB_OBJ)
-	@echo compiling dynamic library $(LIBVER)
+# Check for multithread flag at target execution time
+	$(if $(filter -DZSTD_MULTITHREAD,$(CPPFLAGS)),\
+    @echo compiling multi-threaded dynamic library $(LIBVER),\
+    @echo compiling single-threaded dynamic library $(LIBVER))
 	$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
 	@echo creating versioned links
 	ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
@@ -297,10 +314,17 @@
 # note : do not define lib-mt or lib-release as .PHONY
 # make does not consider implicit pattern rule for .PHONY target
 
-%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
-%-mt : LDFLAGS  += -pthread
+%-mt : CPPFLAGS_DYNLIB  := -DZSTD_MULTITHREAD
+%-mt : CPPFLAGS_STATLIB := -DZSTD_MULTITHREAD
+%-mt : LDFLAGS_DYNLIB   := -pthread
 %-mt : %
-	@echo multi-threading build completed
+	@echo multi-threaded build completed
+
+%-nomt : CPPFLAGS_DYNLIB  :=
+%-nomt : LDFLAGS_DYNLIB   :=
+%-nomt : CPPFLAGS_STATLIB :=
+%-nomt : %
+	@echo single-threaded build completed
 
 %-release : DEBUGFLAGS :=
 %-release : %
@@ -332,7 +356,8 @@
 # Special case : building library in single-thread mode _and_ without zstdmt_compress.c
 ZSTDMT_FILES = compress/zstdmt_compress.c
 ZSTD_NOMT_FILES = $(filter-out $(ZSTDMT_FILES),$(ZSTD_FILES))
-libzstd-nomt: LDFLAGS += -shared -fPIC -fvisibility=hidden
+libzstd-nomt: CFLAGS += -fPIC -fvisibility=hidden
+libzstd-nomt: LDFLAGS += -shared
 libzstd-nomt: $(ZSTD_NOMT_FILES)
 	@echo compiling single-thread dynamic library $(LIBVER)
 	@echo files : $(ZSTD_NOMT_FILES)
@@ -411,10 +436,12 @@
 install: install-pc install-static install-shared install-includes
 	@echo zstd static and shared library installed
 
+.PHONY: install-pc
 install-pc: libzstd.pc
 	[ -e $(DESTDIR)$(PKGCONFIGDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
 	$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
 
+.PHONY: install-static
 install-static:
 	# only generate libzstd.a if it's not already present
 	[ -e libzstd.a ] || $(MAKE) libzstd.a-release
@@ -422,6 +449,7 @@
 	@echo Installing static library
 	$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
 
+.PHONY: install-shared
 install-shared:
 	# only generate libzstd.so if it's not already present
 	[ -e $(LIBZSTD) ] || $(MAKE) libzstd-release
@@ -431,12 +459,13 @@
 	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
 	ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
 
+.PHONY: install-includes
 install-includes:
 	[ -e $(DESTDIR)$(INCLUDEDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
 	@echo Installing includes
 	$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
-	$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
-	$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
+	$(INSTALL_DATA) zdict.h $(DESTDIR)$(INCLUDEDIR)
 
 .PHONY: uninstall
 uninstall:
diff --git a/lib/README.md b/lib/README.md
index db9170a..f781ac5 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -19,12 +19,16 @@
 
 #### Multithreading support
 
-Multithreading is disabled by default when building with `make`.
+When building with `make`, by default the dynamic library is multithreaded and static library is single-threaded (for compatibility reasons).
+
 Enabling multithreading requires 2 conditions :
 - set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
 - for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
 
-Both conditions are automatically applied when invoking `make lib-mt` target.
+For convenience, we provide a build target to generate multi and single threaded libraries:
+- Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
+- Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
+- By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
 
 When linking a POSIX program with a multithreaded version of `libzstd`,
 note that it's necessary to invoke the `-pthread` flag during link stage.
@@ -42,8 +46,8 @@
 
 Optional advanced features are exposed via :
 
-- `lib/common/zstd_errors.h` : translates `size_t` function results
-                               into a `ZSTD_ErrorCode`, for accurate error handling.
+- `lib/zstd_errors.h` : translates `size_t` function results
+                        into a `ZSTD_ErrorCode`, for accurate error handling.
 
 - `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
                           it unlocks access to the experimental API,
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index aa526b4..2e5a933 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * bitstream
  * Part of FSE library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index 1142002..a951d0a 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/cpu.h b/lib/common/cpu.h
index 1b4d26d..8acd33b 100644
--- a/lib/common/cpu.h
+++ b/lib/common/cpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/debug.c b/lib/common/debug.c
index cd1742c..bb863c9 100644
--- a/lib/common/debug.c
+++ b/lib/common/debug.c
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * debug
  * Part of FSE library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/debug.h b/lib/common/debug.h
index b45cda8..3b2a320 100644
--- a/lib/common/debug.h
+++ b/lib/common/debug.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * debug
  * Part of FSE library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c
index 033c075..41cd695 100644
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * Common functions of New Generation Entropy library
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
index b6db380..6d1135f 100644
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/error_private.h b/lib/common/error_private.h
index 9d36e89..6d8b9f7 100644
--- a/lib/common/error_private.h
+++ b/lib/common/error_private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,8 +21,8 @@
 /* ****************************************
 *  Dependencies
 ******************************************/
-#include "zstd_deps.h"    /* size_t */
-#include "zstd_errors.h"  /* enum list */
+#include "../zstd_errors.h"  /* enum list */
+#include "zstd_deps.h"       /* size_t */
 
 
 /* ****************************************
diff --git a/lib/common/fse.h b/lib/common/fse.h
index 5292446..19dd4fe 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * FSE : Finite State Entropy codec
  * Public Prototypes declaration
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -352,7 +352,7 @@
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index c575db0..f4ff58f 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * FSE : Finite State Entropy decoder
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -310,6 +310,12 @@
     return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
 }
 
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
 FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
         void* dst, size_t dstCapacity,
         const void* cSrc, size_t cSrcSize,
@@ -318,33 +324,37 @@
 {
     const BYTE* const istart = (const BYTE*)cSrc;
     const BYTE* ip = istart;
-    short counting[FSE_MAX_SYMBOL_VALUE+1];
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-    FSE_DTable* const dtable = (FSE_DTable*)workSpace;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
 
     /* normal FSE decoding mode */
-    size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
-    if (FSE_isError(NCountLength)) return NCountLength;
-    if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
-    assert(NCountLength <= cSrcSize);
-    ip += NCountLength;
-    cSrcSize -= NCountLength;
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
 
     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
-    workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
-    wkspSize -= FSE_DTABLE_SIZE(tableLog);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
 
-    CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
 
     {
-        const void* ptr = dtable;
+        const void* ptr = wksp->dtable;
         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
         const U32 fastMode = DTableH->fastMode;
 
         /* select fast mode (static) */
-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
     }
 }
 
diff --git a/lib/common/huf.h b/lib/common/huf.h
index 43213e5..3d47ced 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * huff0 huffman codec,
  * part of Finite State Entropy library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -192,6 +192,7 @@
 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@@ -278,7 +279,7 @@
  *  a required workspace size greater than that specified in the following
  *  macro.
  */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 9813bfc..9f3b81a 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -143,9 +143,7 @@
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
+#  if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@@ -308,7 +306,7 @@
 
 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
 {
-    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+    return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16);
 }
 
 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
diff --git a/lib/common/pool.c b/lib/common/pool.c
index 5b092cc..ea70b8b 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/pool.h b/lib/common/pool.h
index d66942a..e18aa07 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index f0ac381..926b336 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -1,6 +1,6 @@
 /*
  *  xxHash - Fast Hash algorithm
- *  Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
+ *  Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - xxHash homepage: http://www.xxhash.com
@@ -30,9 +30,7 @@
  * Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-#  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
-#    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
+#  if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
   defined(__ICCARM__)
 #    define XXH_FORCE_MEMORY_ACCESS 1
diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index 31c488b..16c1f16 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -1,7 +1,7 @@
 /*
  * xxHash - Extremely Fast Hash algorithm
  * Header File
- * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  * You can contact the author at :
  * - xxHash source repository : https://github.com/Cyan4973/xxHash
diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c
index 009b466..3d7e35b 100644
--- a/lib/common/zstd_common.c
+++ b/lib/common/zstd_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/zstd_deps.h b/lib/common/zstd_deps.h
index eebc4fd..1421134 100644
--- a/lib/common/zstd_deps.h
+++ b/lib/common/zstd_deps.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 53a982c..68252e9 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,11 @@
 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */
+#ifndef ZSTD_NO_TRACE
+#  include "zstd_trace.h"
+#else
+#  define ZSTD_TRACE 0
+#endif
 
 #if defined (__cplusplus)
 extern "C" {
@@ -347,11 +352,18 @@
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;         /* Offset code of the sequence */
+    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
     U16 litLength;
     U16 matchLength;
 } seqDef;
 
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
+
 typedef struct {
     seqDef* sequencesStart;
     seqDef* sequences;      /* ptr to end of sequences */
@@ -363,12 +375,12 @@
     size_t maxNbSeq;
     size_t maxNbLit;
 
-    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
-     * the existing value of the litLength or matchLength by 0x10000. 
+     * the existing value of the litLength or matchLength by 0x10000.
      */
-    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
-    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+    ZSTD_longLengthType_e   longLengthType;
+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;
 
 typedef struct {
@@ -378,7 +390,7 @@
 
 /**
  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
  */
 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
 {
@@ -386,10 +398,10 @@
     seqLen.litLength = seq->litLength;
     seqLen.matchLength = seq->matchLength + MINMATCH;
     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-        if (seqStore->longLengthID == 1) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
             seqLen.litLength += 0xFFFF;
         }
-        if (seqStore->longLengthID == 2) {
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
             seqLen.matchLength += 0xFFFF;
         }
     }
diff --git a/lib/common/zstd_trace.c b/lib/common/zstd_trace.c
deleted file mode 100644
index 36f3b5d..0000000
--- a/lib/common/zstd_trace.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016-2021, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-#include "zstd_trace.h"
-#include "../zstd.h"
-
-#include "compiler.h"
-
-#if ZSTD_TRACE && ZSTD_HAVE_WEAK_SYMBOLS
-
-ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx)
-{
-    (void)cctx;
-    return 0;
-}
-
-ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
-{
-    (void)ctx;
-    (void)trace;
-}
-
-ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx)
-{
-    (void)dctx;
-    return 0;
-}
-
-ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
-{
-    (void)ctx;
-    (void)trace;
-}
-
-#endif
diff --git a/lib/common/zstd_trace.h b/lib/common/zstd_trace.h
index 487617c..2da5640 100644
--- a/lib/common/zstd_trace.h
+++ b/lib/common/zstd_trace.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -114,14 +114,15 @@
  * @returns Non-zero if tracing is enabled. The return value is
  *          passed to ZSTD_trace_compress_end().
  */
-ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
+    struct ZSTD_CCtx_s const* cctx);
 
 /**
  * Trace the end of a compression call.
  * @param ctx The return value of ZSTD_trace_compress_begin().
  * @param trace The zstd tracing info.
  */
-void ZSTD_trace_compress_end(
+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
     ZSTD_TraceCtx ctx,
     ZSTD_Trace const* trace);
 
@@ -132,14 +133,15 @@
  * @returns Non-zero if tracing is enabled. The return value is
  *          passed to ZSTD_trace_compress_end().
  */
-ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
+    struct ZSTD_DCtx_s const* dctx);
 
 /**
  * Trace the end of a decompression call.
  * @param ctx The return value of ZSTD_trace_decompress_begin().
  * @param trace The zstd tracing info.
  */
-void ZSTD_trace_decompress_end(
+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
     ZSTD_TraceCtx ctx,
     ZSTD_Trace const* trace);
 
diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 887bdc8..b4297ec 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * FSE : Finite State Entropy encoder
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/compress/hist.c b/lib/compress/hist.c
index 9af1aa1..073c57e 100644
--- a/lib/compress/hist.c
+++ b/lib/compress/hist.c
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * hist : Histogram functions
  * part of Finite State Entropy project
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/compress/hist.h b/lib/compress/hist.h
index 43c5787..228ed48 100644
--- a/lib/compress/hist.h
+++ b/lib/compress/hist.h
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * hist : Histogram functions
  * part of Finite State Entropy project
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 71438d7..485906e 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -1,6 +1,6 @@
 /* ******************************************************************
  * Huffman encoder, part of New Generation Entropy library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -59,7 +59,15 @@
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
  */
 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
 {
     BYTE* const ostart = (BYTE*) dst;
     BYTE* op = ostart;
@@ -67,33 +75,30 @@
 
     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
 
-    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
-    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
-
-    unsigned count[HUF_TABLELOG_MAX+1];
-    S16 norm[HUF_TABLELOG_MAX+1];
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
 
     /* init conditions */
     if (wtSize <= 1) return 0;  /* Not compressible */
 
     /* Scan input and build symbol stats */
-    {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
     }
 
     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-    CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
 
     /* Write table description header */
-    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
         op += hSize;
     }
 
     /* Compress */
-    CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
-    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
         if (cSize == 0) return 0;   /* not enough space for compressed data */
         op += cSize;
     }
@@ -102,29 +107,33 @@
 }
 
 
-/*! HUF_writeCTable() :
-    `CTable` : Huffman tree to save, using huf representation.
-    @return : size of saved CTable */
-size_t HUF_writeCTable (void* dst, size_t maxDstSize,
-                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
-{
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
     BYTE* op = (BYTE*)dst;
     U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
 
-     /* check conditions */
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
 
     /* convert to weight */
-    bitsToWeight[0] = 0;
+    wksp->bitsToWeight[0] = 0;
     for (n=1; n<huffLog+1; n++)
-        bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
     for (n=0; n<maxSymbolValue; n++)
-        huffWeight[n] = bitsToWeight[CTable[n].nbBits];
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
 
     /* attempt weights compression by FSE */
-    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
             op[0] = (BYTE)hSize;
             return hSize+1;
@@ -134,12 +143,22 @@
     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
-    huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
     for (n=0; n<maxSymbolValue; n+=2)
-        op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
     return ((maxSymbolValue+1)/2) + 1;
 }
 
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+
 
 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
 {
@@ -732,7 +751,10 @@
 typedef struct {
     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
-    HUF_buildCTable_wksp_tables buildCTable_wksp;
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
 } HUF_compress_tables_t;
 
 /* HUF_compress_internal() :
@@ -795,7 +817,7 @@
     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
                                             maxSymbolValue, huffLog,
-                                            &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
         CHECK_F(maxBits);
         huffLog = (U32)maxBits;
         /* Zero unused symbols in CTable, so we can check it for validity */
@@ -804,7 +826,8 @@
     }
 
     /* Write table description header */
-    {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
         /* Check if using previous huffman table is beneficial */
         if (repeat && *repeat != HUF_repeat_none) {
             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 93c4075..b7ee298 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,6 @@
 #include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
 #include "../common/cpu.h"
 #include "../common/mem.h"
-#include "../common/zstd_trace.h"
 #include "hist.h"           /* HIST_countFast_wksp */
 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
 #include "../common/fse.h"
@@ -73,6 +72,10 @@
     ZSTD_customMem customMem;
     U32 dictID;
     int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
+                                                     * row-based matchfinder. Unless the cdict is reloaded, we will use
+                                                     * the same greedy/lazy matchfinder at compression time.
+                                                     */
 };  /* typedef'd to ZSTD_CDict within "zstd.h" */
 
 ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -203,6 +206,49 @@
 /* private API call, for dictBuilder only */
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
 
+/* Returns true if the strategy supports using a row based matchfinder */
+static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
+    return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
+}
+
+/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
+ * for this compression.
+ */
+static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) {
+    assert(mode != ZSTD_urm_auto);
+    return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder);
+}
+
+/* Returns row matchfinder usage enum given an initial mode and cParams */
+static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode,
+                                                                   const ZSTD_compressionParameters* const cParams) {
+#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
+    int const kHasSIMD128 = 1;
+#else
+    int const kHasSIMD128 = 0;
+#endif
+    if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
+    mode = ZSTD_urm_disableRowMatchFinder;
+    if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
+    if (kHasSIMD128) {
+        if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder;
+    } else {
+        if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder;
+    }
+    return mode;
+}
+
+/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
+static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
+                                   const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                                   const U32 forDDSDict) {
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+    /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
+     * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
+     */
+    return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
+}
+
 /* Returns 1 if compression parameters are such that we should
  * enable long distance matching (wlog >= 27, strategy >= btopt).
  * Returns 0 otherwise.
@@ -211,6 +257,14 @@
     return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
 }
 
+/* Returns 1 if compression parameters are such that we should
+ * enable blockSplitter (wlog >= 17, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17;
+}
+
 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
         ZSTD_compressionParameters cParams)
 {
@@ -219,6 +273,7 @@
     ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
     cctxParams.cParams = cParams;
 
+    /* Adjust advanced params according to cParams */
     if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
         DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
         cctxParams.ldmParams.enableLdm = 1;
@@ -228,6 +283,12 @@
         assert(cctxParams.ldmParams.hashRateLog < 32);
     }
 
+    if (ZSTD_CParams_useBlockSplitter(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
+        cctxParams.splitBlocks = 1;
+    }
+
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
     assert(!ZSTD_checkCParams(cParams));
     return cctxParams;
 }
@@ -286,6 +347,8 @@
      * But, set it for tracing anyway.
      */
     cctxParams->compressionLevel = compressionLevel;
+    cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
+    DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder);
 }
 
 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
@@ -486,6 +549,21 @@
         bounds.upperBound = 1;
         return bounds;
 
+    case ZSTD_c_splitBlocks:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_useRowMatchFinder:
+        bounds.lowerBound = (int)ZSTD_urm_auto;
+        bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
+        return bounds;
+
+    case ZSTD_c_deterministicRefPrefix:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
     default:
         bounds.error = ERROR(parameter_unsupported);
         return bounds;
@@ -547,6 +625,9 @@
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
     case ZSTD_c_validateSequences:
+    case ZSTD_c_splitBlocks:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
     default:
         return 0;
     }
@@ -599,6 +680,9 @@
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
     case ZSTD_c_validateSequences:
+    case ZSTD_c_splitBlocks:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
         break;
 
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -810,6 +894,21 @@
         CCtxParams->validateSequences = value;
         return CCtxParams->validateSequences;
 
+    case ZSTD_c_splitBlocks:
+        BOUNDCHECK(ZSTD_c_splitBlocks, value);
+        CCtxParams->splitBlocks = value;
+        return CCtxParams->splitBlocks;
+
+    case ZSTD_c_useRowMatchFinder:
+        BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
+        CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
+        return CCtxParams->useRowMatchFinder;
+
+    case ZSTD_c_deterministicRefPrefix:
+        BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
+        CCtxParams->deterministicRefPrefix = !!value;
+        return CCtxParams->deterministicRefPrefix;
+
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
 }
@@ -933,6 +1032,15 @@
     case ZSTD_c_validateSequences :
         *value = (int)CCtxParams->validateSequences;
         break;
+    case ZSTD_c_splitBlocks :
+        *value = (int)CCtxParams->splitBlocks;
+        break;
+    case ZSTD_c_useRowMatchFinder :
+        *value = (int)CCtxParams->useRowMatchFinder;
+        break;
+    case ZSTD_c_deterministicRefPrefix:
+        *value = (int)CCtxParams->deterministicRefPrefix;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -1299,9 +1407,14 @@
 
 static size_t
 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                       const U32 enableDedicatedDictSearch,
                        const U32 forCCtx)
 {
-    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    /* chain table size should be 0 for fast or row-hash strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
     size_t const hSize = ((size_t)1) << cParams->hashLog;
     U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
@@ -1311,24 +1424,34 @@
                             + hSize * sizeof(U32)
                             + h3Size * sizeof(U32);
     size_t const optPotentialSpace =
-        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
-      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+        ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
+                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+                                            : 0;
     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
                                 ? optPotentialSpace
                                 : 0;
+    size_t const slackSpace = ZSTD_cwksp_slack_space_required();
+
+    /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
+    ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
+    assert(useRowMatchFinder != ZSTD_urm_auto);
+
     DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
                 (U32)chainSize, (U32)hSize, (U32)h3Size);
-    return tableSpace + optSpace;
+    return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
 }
 
 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
         const ZSTD_compressionParameters* cParams,
         const ldmParams_t* ldmParams,
         const int isStatic,
+        const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
         const size_t buffInSize,
         const size_t buffOutSize,
         const U64 pledgedSrcSize)
@@ -1338,16 +1461,16 @@
     U32    const divider = (cParams->minMatch==3) ? 3 : 4;
     size_t const maxNbSeq = blockSize / divider;
     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
-                            + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
     size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
     size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
-    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
 
     size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
     size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
     size_t const ldmSeqSpace = ldmParams->enableLdm ?
-        ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+        ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
 
 
     size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
@@ -1373,25 +1496,45 @@
 {
     ZSTD_compressionParameters const cParams =
                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
+                                                                                         &cParams);
 
     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
     /* estimateCCtxSize is for one-shot compression. So no buffers should
      * be needed. However, we still allocate two 0-sized buffers, which can
      * take space under ASAN. */
     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
-    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+        noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+        rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+    }
 }
 
 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
 {
-    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
-    return ZSTD_estimateCCtxSize_usingCParams(cParams);
+    int tier = 0;
+    size_t largestSize = 0;
+    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
+    for (; tier < 4; ++tier) {
+        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
+        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
+    }
+    return largestSize;
 }
 
 size_t ZSTD_estimateCCtxSize(int compressionLevel)
@@ -1399,6 +1542,7 @@
     int level;
     size_t memBudget = 0;
     for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        /* Ensure monotonically increasing memory usage as compression level increases */
         size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
         if (newMB > memBudget) memBudget = newMB;
     }
@@ -1417,17 +1561,29 @@
         size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
+        ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-            &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
+            &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
             ZSTD_CONTENTSIZE_UNKNOWN);
     }
 }
 
 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
-    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
+        noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
+        rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+    }
 }
 
 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
@@ -1552,20 +1708,27 @@
     ZSTD_resetTarget_CCtx
 } ZSTD_resetTarget_e;
 
+
 static size_t
 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
                       ZSTD_cwksp* ws,
                 const ZSTD_compressionParameters* cParams,
+                const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
                 const ZSTD_compResetPolicy_e crp,
                 const ZSTD_indexResetPolicy_e forceResetIndex,
                 const ZSTD_resetTarget_e forWho)
 {
-    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    /* disable chain table allocation for fast or row-based strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
+                                                     ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
     size_t const hSize = ((size_t)1) << cParams->hashLog;
     U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
 
     DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    assert(useRowMatchFinder != ZSTD_urm_auto);
     if (forceResetIndex == ZSTDirp_reset) {
         ZSTD_window_init(&ms->window);
         ZSTD_cwksp_mark_tables_dirty(ws);
@@ -1604,11 +1767,23 @@
         ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
     }
 
+    if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+        {   /* Row match finder needs an additional table of hashes ("tags") */
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
+        }
+        {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
+            U32 const rowLog = cParams->searchLog < 5 ? 4 : 5;
+            assert(cParams->hashLog > rowLog);
+            ms->rowHashLog = cParams->hashLog - rowLog;
+        }
+    }
+
     ms->cParams = *cParams;
 
     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
                     "failed a workspace allocation in ZSTD_reset_matchState");
-
     return 0;
 }
 
@@ -1625,61 +1800,85 @@
     return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
 }
 
+/** ZSTD_dictTooBig():
+ * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
+ * one go generically. So we ensure that in that case we reset the tables to zero,
+ * so that we can load as much of the dictionary as possible.
+ */
+static int ZSTD_dictTooBig(size_t const loadedDictSize)
+{
+    return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
+}
+
 /*! ZSTD_resetCCtx_internal() :
-    note : `params` are assumed fully validated at this stage */
+ * @param loadedDictSize The size of the dictionary to be loaded
+ * into the context, if any. If no dictionary is used, or the
+ * dictionary is being attached / copied, then pass 0.
+ * note : `params` are assumed fully validated at this stage.
+ */
 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-                                      ZSTD_CCtx_params params,
+                                      ZSTD_CCtx_params const* params,
                                       U64 const pledgedSrcSize,
+                                      size_t const loadedDictSize,
                                       ZSTD_compResetPolicy_e const crp,
                                       ZSTD_buffered_policy_e const zbuff)
 {
     ZSTD_cwksp* const ws = &zc->workspace;
-    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
-                (U32)pledgedSrcSize, params.cParams.windowLog);
-    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
+                (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
 
     zc->isFirstBlock = 1;
 
-    if (params.ldmParams.enableLdm) {
+    /* Set applied params early so we can modify them for LDM,
+     * and point params at the applied params.
+     */
+    zc->appliedParams = *params;
+    params = &zc->appliedParams;
+
+    assert(params->useRowMatchFinder != ZSTD_urm_auto);
+    if (params->ldmParams.enableLdm) {
         /* Adjust long distance matching parameters */
-        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
-        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
-        assert(params.ldmParams.hashRateLog < 32);
+        ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
+        assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
+        assert(params->ldmParams.hashRateLog < 32);
     }
 
-    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
-        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
         size_t const maxNbSeq = blockSize / divider;
-        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
-        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
                 ? windowSize + blockSize
                 : 0;
-        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
 
         int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
         ZSTD_indexResetPolicy_e needsIndexReset =
-            (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
+            (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
 
         size_t const neededSpace =
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-                &params.cParams, &params.ldmParams, zc->staticSize != 0,
+                &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
                 buffInSize, buffOutSize, pledgedSrcSize);
+        int resizeWorkspace;
+
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
 
         if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
 
-        /* Check if workspace is large enough, alloc a new one if needed */
-        {
+        {   /* Check if workspace is large enough, alloc a new one if needed */
             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
-
+            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
 
-            if (workspaceTooSmall || workspaceWasteful) {
+            if (resizeWorkspace) {
                 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
                             ZSTD_cwksp_sizeof(ws) >> 10,
                             neededSpace >> 10);
@@ -1707,8 +1906,7 @@
         ZSTD_cwksp_clear(ws);
 
         /* init params */
-        zc->appliedParams = params;
-        zc->blockState.matchState.cParams = params.cParams;
+        zc->blockState.matchState.cParams = params->cParams;
         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
         zc->consumedSrcSize = 0;
         zc->producedCSize = 0;
@@ -1739,11 +1937,11 @@
         zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
 
         /* ldm bucketOffsets table */
-        if (params.ldmParams.enableLdm) {
+        if (params->ldmParams.enableLdm) {
             /* TODO: avoid memset? */
             size_t const numBuckets =
-                  ((size_t)1) << (params.ldmParams.hashLog -
-                                  params.ldmParams.bucketSizeLog);
+                  ((size_t)1) << (params->ldmParams.hashLog -
+                                  params->ldmParams.bucketSizeLog);
             zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
             ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
         }
@@ -1759,32 +1957,28 @@
         FORWARD_IF_ERROR(ZSTD_reset_matchState(
             &zc->blockState.matchState,
             ws,
-            &params.cParams,
+            &params->cParams,
+            params->useRowMatchFinder,
             crp,
             needsIndexReset,
             ZSTD_resetTarget_CCtx), "");
 
         /* ldm hash table */
-        if (params.ldmParams.enableLdm) {
+        if (params->ldmParams.enableLdm) {
             /* TODO: avoid memset? */
-            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
             zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
             ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
             zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
             zc->maxNbLdmSequences = maxNbLdmSeq;
 
             ZSTD_window_init(&zc->ldmState.window);
-            ZSTD_window_clear(&zc->ldmState.window);
             zc->ldmState.loadedDictEnd = 0;
         }
 
-        /* Due to alignment, when reusing a workspace, we can actually consume
-         * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
-         */
-        assert(ZSTD_cwksp_used(ws) >= neededSpace &&
-               ZSTD_cwksp_used(ws) <= neededSpace + 3);
-
+        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+
         zc->initialized = 1;
 
         return 0;
@@ -1840,6 +2034,8 @@
                         U64 pledgedSrcSize,
                         ZSTD_buffered_policy_e zbuff)
 {
+    DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
     {
         ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
         unsigned const windowLog = params.cParams.windowLog;
@@ -1855,7 +2051,9 @@
         params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
                                                      cdict->dictContentSize, ZSTD_cpm_attachDict);
         params.cParams.windowLog = windowLog;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+        params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
                                                  ZSTDcrp_makeClean, zbuff), "");
         assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
     }
@@ -1899,15 +2097,17 @@
     const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
 
     assert(!cdict->matchState.dedicatedDictSearch);
-
-    DEBUGLOG(4, "copying dictionary into context");
+    DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
 
     {   unsigned const windowLog = params.cParams.windowLog;
         assert(windowLog != 0);
         /* Copy only compression parameters related to tables. */
         params.cParams = *cdict_cParams;
         params.cParams.windowLog = windowLog;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+        params.useRowMatchFinder = cdict->useRowMatchFinder;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
                                                  ZSTDcrp_leaveDirty, zbuff), "");
         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
         assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
@@ -1915,17 +2115,30 @@
     }
 
     ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+    assert(params.useRowMatchFinder != ZSTD_urm_auto);
 
     /* copy tables */
-    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+    {   size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
+                                                            ? ((size_t)1 << cdict_cParams->chainLog)
+                                                            : 0;
         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
 
         ZSTD_memcpy(cctx->blockState.matchState.hashTable,
                cdict->matchState.hashTable,
                hSize * sizeof(U32));
-        ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+        /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
+        if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
+            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
                cdict->matchState.chainTable,
                chainSize * sizeof(U32));
+        }
+        /* copy tag table */
+        if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ZSTD_memcpy(cctx->blockState.matchState.tagTable,
+                cdict->matchState.tagTable,
+                tagTableSize);
+        }
     }
 
     /* Zero the hashTable3, since the cdict never fills it */
@@ -1989,16 +2202,18 @@
                             U64 pledgedSrcSize,
                             ZSTD_buffered_policy_e zbuff)
 {
-    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
                     "Can't copy a ctx that's not in init stage.");
-
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
     {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
         /* Copy only compression parameters related to tables. */
         params.cParams = srcCCtx->appliedParams.cParams;
+        assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto);
+        params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
         params.fParams = fParams;
-        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+        ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
+                                /* loadedDictSize */ 0,
                                 ZSTDcrp_leaveDirty, zbuff);
         assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
         assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
@@ -2010,7 +2225,11 @@
     ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
 
     /* copy tables */
-    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+    {   size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
+                                                         srcCCtx->appliedParams.useRowMatchFinder,
+                                                         0 /* forDDSDict */)
+                                    ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
+                                    : 0;
         size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
         int const h3log = srcCCtx->blockState.matchState.hashLog3;
         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
@@ -2124,7 +2343,7 @@
         ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
     }
 
-    if (params->cParams.strategy != ZSTD_fast) {
+    if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
         U32 const chainSize = (U32)1 << params->cParams.chainLog;
         if (params->cParams.strategy == ZSTD_btlazy2)
             ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
@@ -2161,9 +2380,9 @@
         ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
     }
-    if (seqStorePtr->longLengthID==1)
+    if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-    if (seqStorePtr->longLengthID==2)
+    if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
@@ -2177,10 +2396,158 @@
     return (cctxParams->targetCBlockSize != 0);
 }
 
-/* ZSTD_entropyCompressSequences_internal():
- * actually compresses both literals and sequences */
+/* ZSTD_blockSplitterEnabled():
+ * Returns if block splitting param is being used
+ * If used, compression will do best effort to split a block in order to improve compression ratio.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks);
+    return (cctxParams->splitBlocks != 0);
+}
+
+/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
+ * and size of the sequences statistics
+ */
+typedef struct {
+    U32 LLtype;
+    U32 Offtype;
+    U32 MLtype;
+    size_t size;
+    size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_symbolEncodingTypeStats_t;
+
+/* ZSTD_buildSequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
+ * Modifies `nextEntropy` to have the appropriate values as a side effect.
+ * nbSeq must be greater than 0.
+ *
+ * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
+                              BYTE* dst, const BYTE* const dstEnd,
+                              ZSTD_strategy strategy, unsigned* countWorkspace,
+                              void* entropyWorkspace, size_t entropyWkspSize) {
+    BYTE* const ostart = dst;
+    const BYTE* const oend = dstEnd;
+    BYTE* op = ostart;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    stats.lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    assert(op <= oend);
+    assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
+                countWorkspace, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->litlengthCTable,
+                sizeof(prevEntropy->litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.LLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
+                countWorkspace, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->offcodeCTable,
+                sizeof(prevEntropy->offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.Offtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
+                countWorkspace, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->matchlengthCTable,
+                sizeof(prevEntropy->matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.MLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    stats.size = (size_t)(op-ostart);
+    return stats;
+}
+
+/* ZSTD_entropyCompressSeqStore_internal():
+ * compresses both literals and sequences
+ * Returns compressed size of block, or a zstd error.
+ */
 MEM_STATIC size_t
-ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
                           const ZSTD_entropyCTables_t* prevEntropy,
                                 ZSTD_entropyCTables_t* nextEntropy,
                           const ZSTD_CCtx_params* cctxParams,
@@ -2194,22 +2561,20 @@
     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
-    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
     const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
     const BYTE* const llCodeTable = seqStorePtr->llCode;
     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-    BYTE* seqHead;
-    BYTE* lastNCount = NULL;
+    size_t lastCountSize;
 
     entropyWorkspace = count + (MaxSeq + 1);
     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
 
-    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
 
@@ -2249,95 +2614,20 @@
         ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
         return (size_t)(op - ostart);
     }
-
-    /* seqHead : flags for FSE encoding type */
-    seqHead = op++;
-    assert(op <= oend);
-
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->fse.litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                count, max, llCodeTable, nbSeq,
-                LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                prevEntropy->fse.litlengthCTable,
-                sizeof(prevEntropy->fse.litlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for Offsets */
-    {   unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->fse.offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                count, max, ofCodeTable, nbSeq,
-                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                prevEntropy->fse.offcodeCTable,
-                sizeof(prevEntropy->fse.offcodeCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for MatchLengths */
-    {   unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                count, max, mlCodeTable, nbSeq,
-                ML_defaultNorm, ML_defaultNormLog, MaxML,
-                prevEntropy->fse.matchlengthCTable,
-                sizeof(prevEntropy->fse.matchlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-
-    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+    {
+        ZSTD_symbolEncodingTypeStats_t stats;
+        BYTE* seqHead = op++;
+        /* build stats for sequences */
+        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                             &prevEntropy->fse, &nextEntropy->fse,
+                                              op, oend,
+                                              strategy, count,
+                                              entropyWorkspace, entropyWkspSize);
+        FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+        *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
+        lastCountSize = stats.lastCountSize;
+        op += stats.size;
+    }
 
     {   size_t const bitstreamSize = ZSTD_encodeSequences(
                                         op, (size_t)(oend - op),
@@ -2357,9 +2647,9 @@
          * In this exceedingly rare case, we will simply emit an uncompressed
          * block, since it isn't worth optimizing.
          */
-        if (lastNCount && (op - lastNCount) < 4) {
-            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
-            assert(op - lastNCount == 3);
+        if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
+            /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(lastCountSize + bitstreamSize == 3);
             DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
                         "emitting an uncompressed block.");
             return 0;
@@ -2371,7 +2661,7 @@
 }
 
 MEM_STATIC size_t
-ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
                        const ZSTD_entropyCTables_t* prevEntropy,
                              ZSTD_entropyCTables_t* nextEntropy,
                        const ZSTD_CCtx_params* cctxParams,
@@ -2380,7 +2670,7 @@
                              void* entropyWorkspace, size_t entropyWkspSize,
                              int bmi2)
 {
-    size_t const cSize = ZSTD_entropyCompressSequences_internal(
+    size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
                             dst, dstCapacity,
                             entropyWorkspace, entropyWkspSize, bmi2);
@@ -2390,20 +2680,20 @@
      */
     if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
         return 0;  /* block not compressed */
-    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
 
     /* Check compressibility */
     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
         if (cSize >= maxCSize) return 0;  /* block not compressed */
     }
-    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
     return cSize;
 }
 
 /* ZSTD_selectBlockCompressor() :
  * Not static, but internal use only (used by long distance matcher)
  * assumption : strat is a valid strategy */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
 {
     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
         { ZSTD_compressBlock_fast  /* default for 0 */,
@@ -2451,7 +2741,28 @@
     ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
 
     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
-    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
+    if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
+        static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
+            { ZSTD_compressBlock_greedy_row,
+            ZSTD_compressBlock_lazy_row,
+            ZSTD_compressBlock_lazy2_row },
+            { ZSTD_compressBlock_greedy_extDict_row,
+            ZSTD_compressBlock_lazy_extDict_row,
+            ZSTD_compressBlock_lazy2_extDict_row },
+            { ZSTD_compressBlock_greedy_dictMatchState_row,
+            ZSTD_compressBlock_lazy_dictMatchState_row,
+            ZSTD_compressBlock_lazy2_dictMatchState_row },
+            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+        };
+        DEBUGLOG(4, "Selecting a row-based matchfinder");
+        assert(useRowMatchFinder != ZSTD_urm_auto);
+        selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
+    } else {
+        selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    }
     assert(selectedCompressor != NULL);
     return selectedCompressor;
 }
@@ -2467,7 +2778,7 @@
 {
     ssPtr->lit = ssPtr->litStart;
     ssPtr->sequences = ssPtr->sequencesStart;
-    ssPtr->longLengthID = 0;
+    ssPtr->longLengthType = ZSTD_llt_none;
 }
 
 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
@@ -2520,6 +2831,7 @@
                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
                                        ms, &zc->seqStore,
                                        zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
         } else if (zc->appliedParams.ldmParams.enableLdm) {
@@ -2536,10 +2848,13 @@
                 ZSTD_ldm_blockCompress(&ldmSeqStore,
                                        ms, &zc->seqStore,
                                        zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(ldmSeqStore.pos == ldmSeqStore.size);
         } else {   /* not long range mode */
-            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+                                                                                    zc->appliedParams.useRowMatchFinder,
+                                                                                    dictMode);
             ms->ldmSeqStore = NULL;
             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
         }
@@ -2573,9 +2888,9 @@
         outSeqs[i].rep = 0;
 
         if (i == seqStore->longLengthPos) {
-            if (seqStore->longLengthID == 1) {
+            if (seqStore->longLengthType == ZSTD_llt_literalLength) {
                 outSeqs[i].litLength += 0x10000;
-            } else if (seqStore->longLengthID == 2) {
+            } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
                 outSeqs[i].matchLength += 0x10000;
             }
         }
@@ -2686,11 +3001,713 @@
     return nbSeqs < 4 && nbLits < 10;
 }
 
-static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
 {
-    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
-    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
-    zc->blockState.nextCBlock = tmp;
+    ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
+    bs->prevCBlock = bs->nextCBlock;
+    bs->nextCBlock = tmp;
+}
+
+/* Writes the block header */
+static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
+    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+    MEM_writeLE24(op, cBlockHeader);
+    DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
+}
+
+/** ZSTD_buildBlockEntropyStats_literals() :
+ *  Builds entropy for the literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE workspace
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#ifndef COMPRESS_LITERALS_SIZE_MIN
+#define COMPRESS_LITERALS_SIZE_MIN 63
+#endif
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+
+/* ZSTD_buildDummySequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
+ * and updates nextEntropy to the appropriate repeatMode.
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
+    nextEntropy->litlength_repeatMode = FSE_repeat_none;
+    nextEntropy->offcode_repeatMode = FSE_repeat_none;
+    nextEntropy->matchlength_repeatMode = FSE_repeat_none;
+    return stats;
+}
+
+/** ZSTD_buildBlockEntropyStats_sequences() :
+ *  Builds entropy for the sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE wksp.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+    unsigned* countWorkspace = (unsigned*)workspace;
+    unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
+    size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
+    stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                          prevEntropy, nextEntropy, op, oend,
+                                          strategy, countWorkspace,
+                                          entropyWorkspace, entropyWorkspaceSize)
+                       : ZSTD_buildDummySequencesStatistics(nextEntropy);
+    FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+    fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
+    fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
+    fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
+    fseMetadata->lastCountSize = stats.lastCountSize;
+    return stats.size;
+}
+
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  Requires workspace size ENTROPY_WORKSPACE_SIZE
+ *
+ *  @return : 0 on success or error code
+ */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
+    return 0;
+}
+
+/* Returns the size estimate for the literals section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
+    U32 singleStream = litSize < 256;
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
+static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
+                        const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        (void)defaultMax;
+        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
+        return nbSeq * 10;
+    }
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits >> 3;
+}
+
+/* Returns the size estimate for the sequences section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
+                                         fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
+                                         fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
+                                         fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+/* Returns the size estimate for a given stream of literals, of, ll, ml */
+static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
+                                     const BYTE* ofCodeTable,
+                                     const BYTE* llCodeTable,
+                                     const BYTE* mlCodeTable,
+                                     size_t nbSeq,
+                                     const ZSTD_entropyCTables_t* entropy,
+                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                     void* workspace, size_t wkspSize,
+                                     int writeLitEntropy, int writeSeqEntropy) {
+    size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return seqSize + literalsSize + ZSTD_blockHeaderSize;
+}
+
+/* Builds entropy statistics and uses them for blocksize estimation.
+ *
+ * Returns the estimated compressed size of the seqStore, or a zstd error.
+ */
+static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
+                    &zc->blockState.prevCBlock->entropy,
+                    &zc->blockState.nextCBlock->entropy,
+                    &zc->appliedParams,
+                    &entropyMetadata,
+                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
+                    seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
+                    (size_t)(seqStore->sequences - seqStore->sequencesStart),
+                    &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+                    (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1);
+}
+
+/* Returns literals bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
+    size_t literalsBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        literalsBytes += seq.litLength;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
+            literalsBytes += 0x10000;
+        }
+    }
+    return literalsBytes;
+}
+
+/* Returns match bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
+    size_t matchBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        matchBytes += seq.matchLength + MINMATCH;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
+            matchBytes += 0x10000;
+        }
+    }
+    return matchBytes;
+}
+
+/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
+ * Stores the result in resultSeqStore.
+ */
+static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+                               const seqStore_t* originalSeqStore,
+                                     size_t startIdx, size_t endIdx) {
+    BYTE* const litEnd = originalSeqStore->lit;
+    size_t literalsBytes;
+    size_t literalsBytesPreceding = 0;
+
+    *resultSeqStore = *originalSeqStore;
+    if (startIdx > 0) {
+        resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
+        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    }
+
+    /* Move longLengthPos into the correct position if necessary */
+    if (originalSeqStore->longLengthType != ZSTD_llt_none) {
+        if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
+            resultSeqStore->longLengthType = ZSTD_llt_none;
+        } else {
+            resultSeqStore->longLengthPos -= (U32)startIdx;
+        }
+    }
+    resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
+    resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
+    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    resultSeqStore->litStart += literalsBytesPreceding;
+    if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
+        /* This accounts for possible last literals if the derived chunk reaches the end of the block */
+        resultSeqStore->lit = litEnd;
+    } else {
+        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
+    }
+    resultSeqStore->llCode += startIdx;
+    resultSeqStore->mlCode += startIdx;
+    resultSeqStore->ofCode += startIdx;
+}
+
+/**
+ * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
+ * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
+ */
+static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) {
+    U32 const adjustedOffCode = offCode + ll0;
+    assert(offCode < ZSTD_REP_NUM);
+    if (adjustedOffCode == ZSTD_REP_NUM) {
+        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
+        assert(rep[0] > 0);
+        return rep[0] - 1;
+    }
+    return rep[adjustedOffCode];
+}
+
+/**
+ * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
+ * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
+ * the seqStore that may be invalid.
+ *
+ * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
+ * accordance with the seqStore.
+ */
+static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
+                                          seqStore_t* const seqStore, U32 const nbSeq) {
+    U32 idx = 0;
+    for (; idx < nbSeq; ++idx) {
+        seqDef* const seq = seqStore->sequencesStart + idx;
+        U32 const ll0 = (seq->litLength == 0);
+        U32 offCode = seq->offset - 1;
+        assert(seq->offset > 0);
+        if (offCode <= ZSTD_REP_MOVE) {
+            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
+            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
+            /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
+             * the repcode with the offset it actually references, determined by the compression
+             * repcode history.
+             */
+            if (dRawOffset != cRawOffset) {
+                seq->offset = cRawOffset + ZSTD_REP_NUM;
+            }
+        }
+        /* Compression repcode history is always updated with values directly from the unmodified seqStore.
+         * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
+         */
+        *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0);
+        *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
+    }
+}
+
+/* ZSTD_compressSeqStore_singleBlock():
+ * Compresses a seqStore into a block with a block header, into the buffer dst.
+ *
+ * Returns the total size of that block (including header) or a ZSTD error code.
+ */
+static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+                                                repcodes_t* const dRep, repcodes_t* const cRep,
+                                                void* dst, size_t dstCapacity,
+                                                const void* src, size_t srcSize,
+                                                U32 lastBlock, U32 isPartition) {
+    const U32 rleMaxLength = 25;
+    BYTE* op = (BYTE*)dst;
+    const BYTE* ip = (const BYTE*)src;
+    size_t cSize;
+    size_t cSeqsSize;
+
+    /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
+    repcodes_t const dRepOriginal = *dRep;
+    if (isPartition)
+        ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
+
+    cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
+                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                &zc->appliedParams,
+                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
+                srcSize,
+                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                zc->bmi2);
+    FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
+
+    if (!zc->isFirstBlock &&
+        cSeqsSize < rleMaxLength &&
+        ZSTD_isRLE((BYTE const*)src, srcSize)) {
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+        * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+        * This is only an issue for zstd <= v1.4.3
+        */
+        cSeqsSize = 1;
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        return 0;
+    }
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    if (cSeqsSize == 0) {
+        cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "Nocompress block failed");
+        DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else if (cSeqsSize == 1) {
+        cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "RLE compress block failed");
+        DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else {
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
+        cSize = ZSTD_blockHeaderSize + cSeqsSize;
+        DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
+    }
+    return cSize;
+}
+
+/* Struct to keep track of where we are in our recursive calls. */
+typedef struct {
+    U32* splitLocations;    /* Array of split indices */
+    size_t idx;             /* The current index within splitLocations being worked on */
+} seqStoreSplits;
+
+#define MIN_SEQUENCES_BLOCK_SPLITTING 300
+#define MAX_NB_SPLITS 196
+
+/* Helper function to perform the recursive search for block splits.
+ * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
+ * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
+ * we do not recurse.
+ *
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
+ * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
+ * In practice, recursion depth usually doesn't go beyond 4.
+ *
+ * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
+ * maximum of 128 KB, this value is actually impossible to reach.
+ */
+static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
+                                         const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
+    seqStore_t fullSeqStoreChunk;
+    seqStore_t firstHalfSeqStore;
+    seqStore_t secondHalfSeqStore;
+    size_t estimatedOriginalSize;
+    size_t estimatedFirstHalfSize;
+    size_t estimatedSecondHalfSize;
+    size_t midIdx = (startIdx + endIdx)/2;
+
+    if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
+        return;
+    }
+    ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
+    ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
+    ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
+    estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
+    estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
+    estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
+    DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
+             estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
+    if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
+        return;
+    }
+    if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
+        ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
+        splits->splitLocations[splits->idx] = (U32)midIdx;
+        splits->idx++;
+        ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
+    }
+}
+
+/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
+ *
+ * Returns the number of splits made (which equals the size of the partition table - 1).
+ */
+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
+    seqStoreSplits splits = {partitions, 0};
+    if (nbSeq <= 4) {
+        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
+        /* Refuse to try and split anything with less than 4 sequences */
+        return 0;
+    }
+    ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
+    splits.splitLocations[splits.idx] = nbSeq;
+    DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
+    return splits.idx;
+}
+
+/* ZSTD_compressBlock_splitBlock():
+ * Attempts to split a given block into multiple blocks to improve compression ratio.
+ *
+ * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
+ */
+static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
+                                                     const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
+    size_t cSize = 0;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 partitions[MAX_NB_SPLITS];
+    size_t i = 0;
+    size_t srcBytesTotal = 0;
+    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
+    seqStore_t nextSeqStore;
+    seqStore_t currSeqStore;
+
+    /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
+     * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
+     * separate repcode histories that simulate repcode history on compression and decompression side,
+     * and use the histories to determine whether we must replace a particular repcode with its raw offset.
+     *
+     * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
+     *    or RLE. This allows us to retrieve the offset value that an invalid repcode references within
+     *    a nocompress/RLE block.
+     * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
+     *    the replacement offset value rather than the original repcode to update the repcode history.
+     *    dRep also will be the final repcode history sent to the next block.
+     *
+     * See ZSTD_seqStore_resolveOffCodes() for more details.
+     */
+    repcodes_t dRep;
+    repcodes_t cRep;
+    ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    if (numSplits == 0) {
+        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+                                                                   &dRep, &cRep,
+                                                                    op, dstCapacity,
+                                                                    ip, blockSize,
+                                                                    lastBlock, 0 /* isPartition */);
+        FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
+        DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
+        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+        return cSizeSingleBlock;
+    }
+
+    ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
+    for (i = 0; i <= numSplits; ++i) {
+        size_t srcBytes;
+        size_t cSizeChunk;
+        U32 const lastPartition = (i == numSplits);
+        U32 lastBlockEntireSrc = 0;
+
+        srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
+        srcBytesTotal += srcBytes;
+        if (lastPartition) {
+            /* This is the final partition, need to account for possible last literals */
+            srcBytes += blockSize - srcBytesTotal;
+            lastBlockEntireSrc = lastBlock;
+        } else {
+            ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
+        }
+
+        cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
+                                                      &dRep, &cRep,
+                                                       op, dstCapacity,
+                                                       ip, srcBytes,
+                                                       lastBlockEntireSrc, 1 /* isPartition */);
+        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
+        FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
+
+        ip += srcBytes;
+        op += cSizeChunk;
+        dstCapacity -= cSizeChunk;
+        cSize += cSizeChunk;
+        currSeqStore = nextSeqStore;
+        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+    }
+    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
+     * for the next block.
+     */
+    ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 lastBlock) {
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 nbSeq;
+    size_t cSize;
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) {
+            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+            FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+            DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
+            return cSize;
+        }
+        nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
+    }
+
+    assert(zc->appliedParams.splitBlocks == 1);
+    cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
+    FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
+    return cSize;
 }
 
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
@@ -2716,12 +3733,12 @@
 
     if (zc->seqCollector.collectSequences) {
         ZSTD_copyBlockSequences(zc);
-        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
         return 0;
     }
 
     /* encode sequences and literals */
-    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
+    cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
             &zc->appliedParams,
             dst, dstCapacity,
@@ -2750,7 +3767,7 @@
 
 out:
     if (!ZSTD_isError(cSize) && cSize > 1) {
-        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
     }
     /* We check that dictionaries have offset codes available for the first
      * block. After the first block, the offcode table might not have large
@@ -2803,7 +3820,7 @@
                 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
-                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
                     return cSize;
                 }
             }
@@ -2843,9 +3860,9 @@
                                          void const* ip,
                                          void const* iend)
 {
-    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
-        U32 const maxDist = (U32)1 << params->cParams.windowLog;
-        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const maxDist = (U32)1 << params->cParams.windowLog;
+    if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
         U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
         ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
@@ -2868,7 +3885,7 @@
 *   Frame is supposed already started (header already produced)
 *   @return : compressed size, or an error code
 */
-static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
                                      void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize,
                                      U32 lastFrameChunk)
@@ -2908,6 +3925,10 @@
                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
                 assert(cSize > 0);
                 assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
+                assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
             } else {
                 cSize = ZSTD_compressBlock_internal(cctx,
                                         op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -3063,11 +4084,12 @@
 
     if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
 
-    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+    if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
+        ms->forceNonContiguous = 0;
         ms->nextToUpdate = ms->window.dictLimit;
     }
     if (cctx->appliedParams.ldmParams.enableLdm) {
-        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
     }
 
     if (!frame) {
@@ -3135,63 +4157,86 @@
 {
     const BYTE* ip = (const BYTE*) src;
     const BYTE* const iend = ip + srcSize;
-
-    ZSTD_window_update(&ms->window, src, srcSize);
-    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
-
-    if (params->ldmParams.enableLdm && ls != NULL) {
-        ZSTD_window_update(&ls->window, src, srcSize);
-        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
-    }
+    int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL;
 
     /* Assert that we the ms params match the params we're being given */
     ZSTD_assertEqualCParams(params->cParams, ms->cParams);
 
+    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
+        /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
+         * Dictionaries right at the edge will immediately trigger overflow
+         * correction, but I don't want to insert extra constraints here.
+         */
+        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
+        /* We must have cleared our windows when our source is this large. */
+        assert(ZSTD_window_isEmpty(ms->window));
+        if (loadLdmDict)
+            assert(ZSTD_window_isEmpty(ls->window));
+        /* If the dictionary is too large, only load the suffix of the dictionary. */
+        if (srcSize > maxDictSize) {
+            ip = iend - maxDictSize;
+            src = ip;
+            srcSize = maxDictSize;
+        }
+    }
+
+    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
+    ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+    ms->forceNonContiguous = params->deterministicRefPrefix;
+
+    if (loadLdmDict) {
+        ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
     if (srcSize <= HASH_READ_SIZE) return 0;
 
-    while (iend - ip > HASH_READ_SIZE) {
-        size_t const remaining = (size_t)(iend - ip);
-        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
-        const BYTE* const ichunk = ip + chunk;
+    ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
 
-        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+    if (loadLdmDict)
+        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
 
-        if (params->ldmParams.enableLdm && ls != NULL)
-            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+    switch(params->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, dtlm);
+        break;
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
+        break;
 
-        switch(params->cParams.strategy)
-        {
-        case ZSTD_fast:
-            ZSTD_fillHashTable(ms, ichunk, dtlm);
-            break;
-        case ZSTD_dfast:
-            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
-            break;
-
-        case ZSTD_greedy:
-        case ZSTD_lazy:
-        case ZSTD_lazy2:
-            if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
-                assert(chunk == remaining); /* must load everything in one go */
-                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
-            } else if (chunk >= HASH_READ_SIZE) {
-                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+        assert(srcSize >= HASH_READ_SIZE);
+        if (ms->dedicatedDictSearch) {
+            assert(ms->chainTable != NULL);
+            ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
+        } else {
+            assert(params->useRowMatchFinder != ZSTD_urm_auto);
+            if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
+                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+                ZSTD_memset(ms->tagTable, 0, tagTableSize);
+                ZSTD_row_update(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using row-based hash table for lazy dict");
+            } else {
+                ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using chain-based hash table for lazy dict");
             }
-            break;
-
-        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
-        case ZSTD_btopt:
-        case ZSTD_btultra:
-        case ZSTD_btultra2:
-            if (chunk >= HASH_READ_SIZE)
-                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
-            break;
-
-        default:
-            assert(0);  /* not possible : not a valid strategy id */
         }
+        break;
 
-        ip = ichunk;
+    case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        assert(srcSize >= HASH_READ_SIZE);
+        ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+        break;
+
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
     }
 
     ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3330,7 +4375,6 @@
     const BYTE* const dictEnd = dictPtr + dictSize;
     size_t dictID;
     size_t eSize;
-
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(dictSize >= 8);
     assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
@@ -3401,8 +4445,9 @@
                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
                                     ZSTD_buffered_policy_e zbuff)
 {
+    size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
 #if ZSTD_TRACE
-    cctx->traceCtx = ZSTD_trace_compress_begin(cctx);
+    cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
 #endif
     DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
     /* params are supposed to be fully validated at this point */
@@ -3418,7 +4463,8 @@
         return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
     }
 
-    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                     dictContentSize,
                                      ZSTDcrp_makeClean, zbuff) , "");
     {   size_t const dictID = cdict ?
                 ZSTD_compress_insertDictionary(
@@ -3433,7 +4479,7 @@
         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
         assert(dictID <= UINT_MAX);
         cctx->dictID = (U32)dictID;
-        cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+        cctx->dictContentSize = dictContentSize;
     }
     return 0;
 }
@@ -3533,7 +4579,7 @@
 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
 {
 #if ZSTD_TRACE
-    if (cctx->traceCtx) {
+    if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
         int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;
         ZSTD_Trace trace;
         ZSTD_memset(&trace, 0, sizeof(trace));
@@ -3586,15 +4632,14 @@
                          const void* dict,size_t dictSize,
                                ZSTD_parameters params)
 {
-    ZSTD_CCtx_params cctxParams;
     DEBUGLOG(4, "ZSTD_compress_advanced");
     FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
-    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
     return ZSTD_compress_advanced_internal(cctx,
                                            dst, dstCapacity,
                                            src, srcSize,
                                            dict, dictSize,
-                                           &cctxParams);
+                                           &cctx->simpleApiParams);
 }
 
 /* Internal */
@@ -3618,14 +4663,13 @@
                          const void* dict, size_t dictSize,
                                int compressionLevel)
 {
-    ZSTD_CCtx_params cctxParams;
     {
         ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
         assert(params.fParams.contentSizeFlag == 1);
-        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+        ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
     }
     DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
-    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
 }
 
 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3669,7 +4713,10 @@
     DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
     return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
          + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
-         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
+          * in case we are using DDS with row-hash. */
+         + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams),
+                                  /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
          + (dictLoadMethod == ZSTD_dlm_byRef ? 0
             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
 }
@@ -3700,9 +4747,6 @@
     assert(!ZSTD_checkCParams(params.cParams));
     cdict->matchState.cParams = params.cParams;
     cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
-    if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
-        cdict->matchState.dedicatedDictSearch = 0;
-    }
     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
         cdict->dictContent = dictBuffer;
     } else {
@@ -3723,6 +4767,7 @@
         &cdict->matchState,
         &cdict->workspace,
         &params.cParams,
+        params.useRowMatchFinder,
         ZSTDcrp_makeClean,
         ZSTDirp_reset,
         ZSTD_resetTarget_CDict), "");
@@ -3746,14 +4791,17 @@
 
 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
                                       ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
+                                      U32 enableDedicatedDictSearch,
+                                      ZSTD_customMem customMem)
 {
     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
 
     {   size_t const workspaceSize =
             ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
             ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
-            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
             (dictLoadMethod == ZSTD_dlm_byRef ? 0
              : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
         void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
@@ -3772,7 +4820,7 @@
         ZSTD_cwksp_move(&cdict->workspace, &ws);
         cdict->customMem = customMem;
         cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
-
+        cdict->useRowMatchFinder = useRowMatchFinder;
         return cdict;
     }
 }
@@ -3824,10 +4872,13 @@
             &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     }
 
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
     cctxParams.cParams = cParams;
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
 
     cdict = ZSTD_createCDict_advanced_internal(dictSize,
                         dictLoadMethod, cctxParams.cParams,
+                        cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
                         customMem);
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
@@ -3896,7 +4947,9 @@
                                  ZSTD_dictContentType_e dictContentType,
                                  ZSTD_compressionParameters cParams)
 {
-    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams);
+    /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
     size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0
                                : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
@@ -3921,6 +4974,8 @@
 
     ZSTD_CCtxParams_init(&params, 0);
     params.cParams = cParams;
+    params.useRowMatchFinder = useRowMatchFinder;
+    cdict->useRowMatchFinder = useRowMatchFinder;
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
                                               dict, dictSize,
@@ -3947,15 +5002,15 @@
     return cdict->dictID;
 }
 
-
-/* ZSTD_compressBegin_usingCDict_advanced() :
- * cdict must be != NULL */
-size_t ZSTD_compressBegin_usingCDict_advanced(
+/* ZSTD_compressBegin_usingCDict_internal() :
+ * Implementation of various ZSTD_compressBegin_usingCDict* functions.
+ */
+static size_t ZSTD_compressBegin_usingCDict_internal(
     ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
     ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
 {
     ZSTD_CCtx_params cctxParams;
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
     RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
     /* Initialize the cctxParams from the cdict */
     {
@@ -3987,23 +5042,46 @@
                                         ZSTDb_not_buffered);
 }
 
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * This function is DEPRECATED.
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
+}
+
 /* ZSTD_compressBegin_usingCDict() :
- * pledgedSrcSize=0 means "unknown"
- * if pledgedSrcSize>0, it will enable contentSizeFlag */
+ * cdict must be != NULL */
 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
 {
     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
-    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
+/*! ZSTD_compress_usingCDict_internal():
+ * Implementation of various ZSTD_compress_usingCDict* functions.
+ */
+static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict_advanced():
+ * This function is DEPRECATED.
+ */
 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                 void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
 {
-    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
-    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 /*! ZSTD_compress_usingCDict() :
@@ -4017,7 +5095,7 @@
                                 const ZSTD_CDict* cdict)
 {
     ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 
@@ -4427,8 +5505,13 @@
     FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
     ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
     assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
-    if (cctx->cdict)
-        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    if (cctx->cdict && !cctx->localDict.cdict) {
+        /* Let the cdict's compression level take priority over the requested params.
+         * But do not take the cdict's compression level if the "cdict" is actually a localDict
+         * generated from ZSTD_initLocalDict().
+         */
+        params.compressionLevel = cctx->cdict->compressionLevel;
+    }
     DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
     if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
     {
@@ -4447,13 +5530,20 @@
         params.ldmParams.enableLdm = 1;
     }
 
+    if (ZSTD_CParams_useBlockSplitter(&params.cParams)) {
+        DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
+        params.splitBlocks = 1;
+    }
+
+    params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
+
 #ifdef ZSTD_MULTITHREAD
     if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
         params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
     }
     if (params.nbWorkers > 0) {
 #if ZSTD_TRACE
-        cctx->traceCtx = ZSTD_trace_compress_begin(cctx);
+        cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
 #endif
         /* mt context creation */
         if (cctx->mtctx == NULL) {
@@ -4921,7 +6011,7 @@
             continue;
         }
 
-        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+        compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
                                 &cctx->appliedParams,
                                 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
@@ -4953,7 +6043,7 @@
         } else {
             U32 cBlockHeader;
             /* Error checking and repcodes update */
-            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
             if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
                 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
 
@@ -5054,6 +6144,7 @@
 #define ZSTD_MAX_CLEVEL     22
 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
 
 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
 {   /* "default" - for any srcSize > 256 KB */
@@ -5186,7 +6277,10 @@
 static int ZSTD_dedicatedDictSearch_isSupported(
         ZSTD_compressionParameters const* cParams)
 {
-    return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2);
+    return (cParams->strategy >= ZSTD_greedy)
+        && (cParams->strategy <= ZSTD_lazy2)
+        && (cParams->hashLog > cParams->chainLog)
+        && (cParams->chainLog <= 24);
 }
 
 /**
@@ -5204,6 +6298,9 @@
         case ZSTD_lazy:
         case ZSTD_lazy2:
             cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
+                cParams->hashLog = ZSTD_HASHLOG_MIN;
+            }
             break;
         case ZSTD_btlazy2:
         case ZSTD_btopt:
@@ -5252,6 +6349,7 @@
     else row = compressionLevel;
 
     {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
         /* acceleration factor */
         if (compressionLevel < 0) {
             int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 6083ed6..3b04fd0 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,7 +19,6 @@
 *  Dependencies
 ***************************************/
 #include "../common/zstd_internal.h"
-#include "../common/zstd_trace.h"  /* ZSTD_TraceCtx */
 #include "zstd_cwksp.h"
 #ifdef ZSTD_MULTITHREAD
 #  include "zstdmt_compress.h"
@@ -82,6 +81,53 @@
     ZSTD_fseCTables_t fse;
 } ZSTD_entropyCTables_t;
 
+/***********************************************
+*  Entropy buffer statistics structs and funcs *
+***********************************************/
+/** ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/** ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+/** ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  @return : 0 on success or error code */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize);
+
+/*********************************
+*  Compression internals structs *
+*********************************/
+
 typedef struct {
     U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
     U32 len;            /* Raw length of match */
@@ -142,14 +188,21 @@
 } ZSTD_compressedBlockState_t;
 
 typedef struct {
-    BYTE const* nextSrc;    /* next block here to continue on current prefix */
-    BYTE const* base;       /* All regular indexes relative to this position */
-    BYTE const* dictBase;   /* extDict indexes relative to this position */
-    U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more valid data */
+    BYTE const* nextSrc;       /* next block here to continue on current prefix */
+    BYTE const* base;          /* All regular indexes relative to this position */
+    BYTE const* dictBase;      /* extDict indexes relative to this position */
+    U32 dictLimit;             /* below that point, need extDict */
+    U32 lowLimit;              /* below that point, no more valid data */
+    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
+                                * ZSTD_window_init(). Useful for debugging coredumps
+                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
+                                */
 } ZSTD_window_t;
 
 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+
+#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
+
 struct ZSTD_matchState_t {
     ZSTD_window_t window;   /* State for window round buffer management */
     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
@@ -161,9 +214,17 @@
                              */
     U32 nextToUpdate;       /* index from which to continue table update */
     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+
+    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
+
+    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+
     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
                                * dedicated dictionary search structure.
                                */
@@ -256,6 +317,15 @@
     ZSTD_sequenceFormat_e blockDelimiters;
     int validateSequences;
 
+    /* Block splitting */
+    int splitBlocks;
+
+    /* Param for deciding whether to use row-based matchfinder */
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
+
+    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
+    int deterministicRefPrefix;
+
     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
     ZSTD_customMem customMem;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
@@ -279,6 +349,7 @@
     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
     ZSTD_CCtx_params requestedParams;
     ZSTD_CCtx_params appliedParams;
+    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
     U32   dictID;
     size_t dictContentSize;
 
@@ -371,7 +442,7 @@
 typedef size_t (*ZSTD_blockCompressor) (
         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
 
 
 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -548,8 +619,8 @@
 
     /* literal Length */
     if (litLength>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 1;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
     }
     seqStorePtr->sequences[0].litLength = (U16)litLength;
@@ -559,8 +630,8 @@
 
     /* match Length */
     if (mlBase>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 2;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_matchLength;
         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
     }
     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
@@ -811,6 +882,13 @@
     window->dictLimit = end;
 }
 
+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
+{
+    return window.dictLimit == 1 &&
+           window.lowLimit == 1 &&
+           (window.nextSrc - window.base) == 1;
+}
+
 /**
  * ZSTD_window_hasExtDict():
  * Returns non-zero if the window has a non-empty extDict.
@@ -834,15 +912,69 @@
             ZSTD_noDict;
 }
 
+/* Defining this macro to non-zero tells zstd to run the overflow correction
+ * code much more frequently. This is very inefficient, and should only be
+ * used for tests and fuzzers.
+ */
+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
+#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
+#  else
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
+#  endif
+#endif
+
+/**
+ * ZSTD_window_canOverflowCorrect():
+ * Returns non-zero if the indices are large enough for overflow correction
+ * to work correctly without impacting compression ratio.
+ */
+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
+                                              U32 cycleLog,
+                                              U32 maxDist,
+                                              U32 loadedDictEnd,
+                                              void const* src)
+{
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const curr = (U32)((BYTE const*)src - window.base);
+    U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
+
+    /* Adjust the min index to backoff the overflow correction frequency,
+     * so we don't waste too much CPU in overflow correction. If this
+     * computation overflows we don't really care, we just need to make
+     * sure it is at least minIndexToOverflowCorrect.
+     */
+    U32 const adjustment = window.nbOverflowCorrections + 1;
+    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
+                                  minIndexToOverflowCorrect);
+    U32 const indexLargeEnough = curr > adjustedIndex;
+
+    /* Only overflow correct early if the dictionary is invalidated already,
+     * so we don't hurt compression ratio.
+     */
+    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
+
+    return indexLargeEnough && dictionaryInvalidated;
+}
+
 /**
  * ZSTD_window_needOverflowCorrection():
  * Returns non-zero if the indices are getting too large and need overflow
  * protection.
  */
 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  U32 cycleLog,
+                                                  U32 maxDist,
+                                                  U32 loadedDictEnd,
+                                                  void const* src,
                                                   void const* srcEnd)
 {
     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
+            return 1;
+        }
+    }
     return curr > ZSTD_CURRENT_MAX;
 }
 
@@ -854,7 +986,6 @@
  *
  * The least significant cycleLog bits of the indices must remain the same,
  * which may be 0. Every index up to maxDist in the past must be valid.
- * NOTE: (maxDist & cycleMask) must be zero.
  */
 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                            U32 maxDist, void const* src)
@@ -878,17 +1009,25 @@
      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
      */
-    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const cycleMask = cycleSize - 1;
     U32 const curr = (U32)((BYTE const*)src - window->base);
     U32 const currentCycle0 = curr & cycleMask;
     /* Exclude zero so that newCurrent - maxDist >= 1. */
-    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
-    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
+    U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
     U32 const correction = curr - newCurrent;
-    assert((maxDist & cycleMask) == 0);
+    /* maxDist must be a power of two so that:
+     *   (newCurrent & cycleMask) == (curr & cycleMask)
+     * This is required to not corrupt the chains / binary tree.
+     */
+    assert((maxDist & (maxDist - 1)) == 0);
+    assert((curr & cycleMask) == (newCurrent & cycleMask));
     assert(curr > newCurrent);
-    /* Loose bound, should be around 1<<29 (see above) */
-    assert(correction > 1<<28);
+    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        /* Loose bound, should be around 1<<29 (see above) */
+        assert(correction > 1<<28);
+    }
 
     window->base += correction;
     window->dictBase += correction;
@@ -904,6 +1043,8 @@
     assert(window->lowLimit <= newCurrent);
     assert(window->dictLimit <= newCurrent);
 
+    ++window->nbOverflowCorrections;
+
     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
              window->lowLimit);
     return correction;
@@ -1013,6 +1154,7 @@
     window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
     window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
     window->nextSrc = window->base + 1;   /* see issue #1241 */
+    window->nbOverflowCorrections = 0;
 }
 
 /**
@@ -1023,7 +1165,8 @@
  * Returns non-zero if the segment is contiguous.
  */
 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
-                                  void const* src, size_t srcSize)
+                                  void const* src, size_t srcSize,
+                                  int forceNonContiguous)
 {
     BYTE const* const ip = (BYTE const*)src;
     U32 contiguous = 1;
@@ -1033,7 +1176,7 @@
     assert(window->base != NULL);
     assert(window->dictBase != NULL);
     /* Check if blocks follow each other */
-    if (src != window->nextSrc) {
+    if (src != window->nextSrc || forceNonContiguous) {
         /* not contiguous */
         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c
index 1d9188d..008337b 100644
--- a/lib/compress/zstd_compress_literals.c
+++ b/lib/compress/zstd_compress_literals.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -117,7 +117,7 @@
         }
     }
 
-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
         ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
diff --git a/lib/compress/zstd_compress_literals.h b/lib/compress/zstd_compress_literals.h
index c8ebe2f..9904c0c 100644
--- a/lib/compress/zstd_compress_literals.h
+++ b/lib/compress/zstd_compress_literals.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c
index 4bccf01..611eabd 100644
--- a/lib/compress/zstd_compress_sequences.c
+++ b/lib/compress/zstd_compress_sequences.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -85,6 +85,8 @@
 {
     unsigned cost = 0;
     unsigned s;
+
+    assert(total > 0);
     for (s = 0; s <= max; ++s) {
         unsigned norm = (unsigned)((256 * count[s]) / total);
         if (count[s] != 0 && norm == 0)
@@ -232,6 +234,11 @@
     return set_compressed;
 }
 
+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
 size_t
 ZSTD_buildCTable(void* dst, size_t dstCapacity,
                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
@@ -258,7 +265,7 @@
         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
         return 0;
     case set_compressed: {
-        S16 norm[MaxSeq + 1];
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
         size_t nbSeq_1 = nbSeq;
         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
         if (count[codeTable[nbSeq-1]] > 1) {
@@ -266,11 +273,12 @@
             nbSeq_1--;
         }
         assert(nbSeq_1 > 1);
-        assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
-        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
-        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
             return NCountSize;
         }
     }
diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h
index 3629b3f..7991364 100644
--- a/lib/compress/zstd_compress_sequences.h
+++ b/lib/compress/zstd_compress_sequences.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 6a7b023..e4e4506 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,288 +15,10 @@
 
 #include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
 #include "hist.h"                     /* HIST_countFast_wksp */
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"   /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
 #include "zstd_compress_sequences.h"
 #include "zstd_compress_literals.h"
 
-/*-*************************************
-*  Superblock entropy buffer structs
-***************************************/
-/** ZSTD_hufCTablesMetadata_t :
- *  Stores Literals Block Type for a super-block in hType, and
- *  huffman tree description in hufDesBuffer.
- *  hufDesSize refers to the size of huffman tree description in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
-typedef struct {
-    symbolEncodingType_e hType;
-    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
-    size_t hufDesSize;
-} ZSTD_hufCTablesMetadata_t;
-
-/** ZSTD_fseCTablesMetadata_t :
- *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
- *  fse tables in fseTablesBuffer.
- *  fseTablesSize refers to the size of fse tables in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
-typedef struct {
-    symbolEncodingType_e llType;
-    symbolEncodingType_e ofType;
-    symbolEncodingType_e mlType;
-    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
-    size_t fseTablesSize;
-    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
-} ZSTD_fseCTablesMetadata_t;
-
-typedef struct {
-    ZSTD_hufCTablesMetadata_t hufMetadata;
-    ZSTD_fseCTablesMetadata_t fseMetadata;
-} ZSTD_entropyCTablesMetadata_t;
-
-
-/** ZSTD_buildSuperBlockEntropy_literal() :
- *  Builds entropy for the super-block literals.
- *  Stores literals block type (raw, rle, compressed, repeat) and
- *  huffman description table to hufMetadata.
- *  @return : size of huffman description table or error code */
-static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
-                                            const ZSTD_hufCTables_t* prevHuf,
-                                                  ZSTD_hufCTables_t* nextHuf,
-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                  const int disableLiteralsCompression,
-                                                  void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
-    BYTE* const nodeWksp = countWkspStart + countWkspSize;
-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
-    unsigned maxSymbolValue = 255;
-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
-    HUF_repeat repeat = prevHuf->repeatMode;
-
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
-
-    /* Prepare nextEntropy assuming reusing the existing table */
-    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
-    if (disableLiteralsCompression) {
-        DEBUGLOG(5, "set_basic - disabled");
-        hufMetadata->hType = set_basic;
-        return 0;
-    }
-
-    /* small ? don't even attempt compression (speed opt) */
-#   define COMPRESS_LITERALS_SIZE_MIN 63
-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) {
-            DEBUGLOG(5, "set_basic - too small");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Scan input and build symbol stats */
-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
-        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
-        if (largest == srcSize) {
-            DEBUGLOG(5, "set_rle");
-            hufMetadata->hType = set_rle;
-            return 0;
-        }
-        if (largest <= (srcSize >> 7)+4) {
-            DEBUGLOG(5, "set_basic - no gain");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Validate the previous Huffman table */
-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-        repeat = HUF_repeat_none;
-    }
-
-    /* Build Huffman Tree */
-    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
-                                                    maxSymbolValue, huffLog,
-                                                    nodeWksp, nodeWkspSize);
-        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
-        huffLog = (U32)maxBits;
-        {   /* Build and write the CTable */
-            size_t const newCSize = HUF_estimateCompressedSize(
-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-            size_t const hSize = HUF_writeCTable(
-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
-            /* Check against repeating the previous CTable */
-            if (repeat != HUF_repeat_none) {
-                size_t const oldCSize = HUF_estimateCompressedSize(
-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
-                    DEBUGLOG(5, "set_repeat - smaller");
-                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                    hufMetadata->hType = set_repeat;
-                    return 0;
-                }
-            }
-            if (newCSize + hSize >= srcSize) {
-                DEBUGLOG(5, "set_basic - no gains");
-                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                hufMetadata->hType = set_basic;
-                return 0;
-            }
-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
-            hufMetadata->hType = set_compressed;
-            nextHuf->repeatMode = HUF_repeat_check;
-            return hSize;
-        }
-    }
-}
-
-/** ZSTD_buildSuperBlockEntropy_sequences() :
- *  Builds entropy for the super-block sequences.
- *  Stores symbol compression modes and fse table to fseMetadata.
- *  @return : size of fse tables or error code */
-static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
-                                              const ZSTD_fseCTables_t* prevEntropy,
-                                                    ZSTD_fseCTables_t* nextEntropy,
-                                              const ZSTD_CCtx_params* cctxParams,
-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                    void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
-    BYTE* const cTableWksp = countWkspStart + countWkspSize;
-    const size_t cTableWkspSize = wkspEnd-cTableWksp;
-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
-    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    const BYTE* const llCodeTable = seqStorePtr->llCode;
-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-    BYTE* const ostart = fseMetadata->fseTablesBuffer;
-    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
-    BYTE* op = ostart;
-
-    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
-    ZSTD_memset(workspace, 0, wkspSize);
-
-    fseMetadata->lastCountSize = 0;
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   U32 LLtype;
-        unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->llType = (symbolEncodingType_e) LLtype;
-    }   }
-    /* build CTable for Offsets */
-    {   U32 Offtype;
-        unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
-    }   }
-    /* build CTable for MatchLengths */
-    {   U32 MLtype;
-        unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
-                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
-    }   }
-    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
-    return op-ostart;
-}
-
-
-/** ZSTD_buildSuperBlockEntropy() :
- *  Builds entropy for the super-block.
- *  @return : 0 on success or error code */
-static size_t
-ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
-                      const ZSTD_entropyCTables_t* prevEntropy,
-                            ZSTD_entropyCTables_t* nextEntropy,
-                      const ZSTD_CCtx_params* cctxParams,
-                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                            void* workspace, size_t wkspSize)
-{
-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
-    entropyMetadata->hufMetadata.hufDesSize =
-        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
-                                            &prevEntropy->huf, &nextEntropy->huf,
-                                            &entropyMetadata->hufMetadata,
-                                            ZSTD_disableLiteralsCompression(cctxParams),
-                                            workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
-    entropyMetadata->fseMetadata.fseTablesSize =
-        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
-                                              &prevEntropy->fse, &nextEntropy->fse,
-                                              cctxParams,
-                                              &entropyMetadata->fseMetadata,
-                                              workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
-    return 0;
-}
-
 /** ZSTD_compressSubBlock_literal() :
  *  Compresses literals section for a sub-block.
  *  When we have to write the Huffman table we will sometimes choose a header
@@ -643,8 +365,9 @@
                                                   void* workspace, size_t wkspSize,
                                                   int writeEntropy)
 {
-    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
     size_t cSeqSizeEstimate = 0;
+    if (nbSeq == 0) return sequencesSectionHeaderSize;
     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
                                          nbSeq, fseTables->offcodeCTable, NULL,
                                          OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
@@ -830,7 +553,7 @@
                                unsigned lastBlock) {
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 
-    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
           &zc->blockState.prevCBlock->entropy,
           &zc->blockState.nextCBlock->entropy,
           &zc->appliedParams,
diff --git a/lib/compress/zstd_compress_superblock.h b/lib/compress/zstd_compress_superblock.h
index 8138afa..176f9b1 100644
--- a/lib/compress/zstd_compress_superblock.h
+++ b/lib/compress/zstd_compress_superblock.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index daec8bd..2656d26 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -35,6 +35,10 @@
 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
 #endif
 
+
+/* Set our tables and aligneds to align by 64 bytes */
+#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
+
 /*-*************************************
 *  Structures
 ***************************************/
@@ -117,10 +121,11 @@
  * - Tables: these are any of several different datastructures (hash tables,
  *   chain tables, binary trees) that all respect a common format: they are
  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
- *   Their sizes depend on the cparams.
+ *   Their sizes depend on the cparams. These tables are 64-byte aligned.
  *
  * - Aligned: these buffers are used for various purposes that require 4 byte
- *   alignment, but don't require any initialization before they're used.
+ *   alignment, but don't require any initialization before they're used. These
+ *   buffers are each aligned to 64 bytes.
  *
  * - Buffers: these buffers are used for various purposes that don't require
  *   any alignment or initialization before they're used. This means they can
@@ -133,8 +138,7 @@
  *
  * 1. Objects
  * 2. Buffers
- * 3. Aligned
- * 4. Tables
+ * 3. Aligned/Tables
  *
  * Attempts to reserve objects of different types out of order will fail.
  */
@@ -187,6 +191,8 @@
  * Since tables aren't currently redzoned, you don't need to call through this
  * to figure out how much space you need for the matchState tables. Everything
  * else is though.
+ *
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
  */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
     if (size == 0)
@@ -198,30 +204,110 @@
 #endif
 }
 
-MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+/**
+ * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
+ * Used to determine the number of bytes required for a given "aligned".
+ */
+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+}
+
+/**
+ * Returns the amount of additional space the cwksp must allocate
+ * for internal purposes (currently only alignment).
+ */
+MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
+    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
+     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
+     * to align the beginning of the aligned secion.
+     *
+     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
+     * aligneds being sized in multiples of 64 bytes.
+     */
+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+    return slackSpace;
+}
+
+
+/**
+ * Return the number of additional bytes required to align a pointer to the given number of bytes.
+ * alignBytes must be a power of two.
+ */
+MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
+    size_t const alignBytesMask = alignBytes - 1;
+    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
+    assert((alignBytes & alignBytesMask) == 0);
+    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+    return bytes;
+}
+
+/**
+ * Internal function. Do not use directly.
+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
+ * counts from the end of the wksp. (as opposed to the object/table segment)
+ *
+ * Returns a pointer to the beginning of that space.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
+    void* const alloc = (BYTE*)ws->allocStart - bytes;
+    void* const bottom = ws->tableEnd;
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+    return alloc;
+}
+
+/**
+ * Moves the cwksp to the next phase, and does any necessary allocations.
+ * Returns a 0 on success, or zstd error
+ */
+MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
         ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
     assert(phase >= ws->phase);
     if (phase > ws->phase) {
+        /* Going from allocating objects to allocating buffers */
         if (ws->phase < ZSTD_cwksp_alloc_buffers &&
                 phase >= ZSTD_cwksp_alloc_buffers) {
             ws->tableValidEnd = ws->objectEnd;
         }
+
+        /* Going from allocating buffers to allocating aligneds/tables */
         if (ws->phase < ZSTD_cwksp_alloc_aligned &&
                 phase >= ZSTD_cwksp_alloc_aligned) {
-            /* If unaligned allocations down from a too-large top have left us
-             * unaligned, we need to realign our alloc ptr. Technically, this
-             * can consume space that is unaccounted for in the neededSpace
-             * calculation. However, I believe this can only happen when the
-             * workspace is too large, and specifically when it is too large
-             * by a larger margin than the space that will be consumed. */
-            /* TODO: cleaner, compiler warning friendly way to do this??? */
-            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
-            if (ws->allocStart < ws->tableValidEnd) {
-                ws->tableValidEnd = ws->allocStart;
+            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
+                size_t const bytesToAlign =
+                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
+                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
+                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
+                                memory_allocation, "aligned phase - alignment initial allocation failed!");
+            }
+            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
+                void* const alloc = ws->objectEnd;
+                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                void* const end = (BYTE*)alloc + bytesToAlign;
+                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
+                RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
+                                "table phase - alignment initial allocation failed!");
+                ws->objectEnd = end;
+                ws->tableEnd = end;
+                ws->tableValidEnd = end;
             }
         }
         ws->phase = phase;
+        ZSTD_cwksp_assert_internal_consistency(ws);
     }
+    return 0;
 }
 
 /**
@@ -237,38 +323,25 @@
 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
         ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
     void* alloc;
-    void* bottom = ws->tableEnd;
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
-    alloc = (BYTE *)ws->allocStart - bytes;
-
-    if (bytes == 0)
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
         return NULL;
+    }
 
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     /* over-reserve space */
-    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
 
-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    assert(alloc >= bottom);
-    if (alloc < bottom) {
-        DEBUGLOG(4, "cwksp: alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    if (alloc < ws->tableValidEnd) {
-        ws->tableValidEnd = alloc;
-    }
-    ws->allocStart = alloc;
+    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
 
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
      * either size. */
-    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
-    if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
-        __asan_unpoison_memory_region(alloc, bytes);
+    if (alloc) {
+        alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+        if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
+            __asan_unpoison_memory_region(alloc, bytes);
+        }
     }
 #endif
 
@@ -283,28 +356,36 @@
 }
 
 /**
- * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
  */
 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
-    assert((bytes & (sizeof(U32)-1)) == 0);
-    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+                                            ZSTD_cwksp_alloc_aligned);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    return ptr;
 }
 
 /**
- * Aligned on sizeof(unsigned). These buffers have the special property that
+ * Aligned on 64 bytes. These buffers have the special property that
  * their values remain constrained, allowing us to re-use them without
  * memset()-ing them.
  */
 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
     const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
-    void* alloc = ws->tableEnd;
-    void* end = (BYTE *)alloc + bytes;
-    void* top = ws->allocStart;
+    void* alloc;
+    void* end;
+    void* top;
+
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+        return NULL;
+    }
+    alloc = ws->tableEnd;
+    end = (BYTE *)alloc + bytes;
+    top = ws->allocStart;
 
     DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
         alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
     assert((bytes & (sizeof(U32)-1)) == 0);
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
     ZSTD_cwksp_assert_internal_consistency(ws);
     assert(end <= top);
     if (end > top) {
@@ -320,6 +401,8 @@
     }
 #endif
 
+    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
     return alloc;
 }
 
@@ -527,6 +610,24 @@
 *  Functions Checking Free Space
 ***************************************/
 
+/* ZSTD_alignmentSpaceWithinBounds() :
+ * Returns if the estimated space needed for a wksp is within an acceptable limit of the
+ * actual amount of space used.
+ */
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
+                                                        size_t const estimatedSpace, int resizedWorkspace) {
+    if (resizedWorkspace) {
+        /* Resized/newly allocated wksp should have exact bounds */
+        return ZSTD_cwksp_used(ws) == estimatedSpace;
+    } else {
+        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
+         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
+         */
+        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
+    }
+}
+
+
 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
     return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
 }
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index 9ed8fa6..d0d3a78 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -409,7 +409,7 @@
         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
 
         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
-            & (repIndex > dictStartIndex))
+            & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@@ -477,7 +477,7 @@
                 U32 const repIndex2 = current2 - offset_2;
                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
-                    & (repIndex2 > dictStartIndex))
+                    & (offset_2 < current2 - dictStartIndex))
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h
index b17cf3e..e16b7b0 100644
--- a/lib/compress/zstd_double_fast.h
+++ b/lib/compress/zstd_double_fast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index 8f8dfcd..4edc04d 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -416,9 +416,9 @@
         const BYTE* const repMatch = repBase + repIndex;
         hashTable[h] = curr;   /* update hash table */
         DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
-        assert(offset_1 <= curr +1);   /* check repIndex */
 
-        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+             & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@@ -453,7 +453,7 @@
                 U32 const current2 = (U32)(ip-base);
                 U32 const repIndex2 = current2 - offset_2;
                 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h
index 1a5fd33..0d4a0c1 100644
--- a/lib/compress/zstd_fast.h
+++ b/lib/compress/zstd_fast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 3d35ee4..3d523e8 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -438,43 +438,9 @@
     }
 }
 
-
-
-/* *********************************
-*  Hash Chain
+/***********************************
+* Dedicated dict search
 ***********************************/
-#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
-                        ZSTD_matchState_t* ms,
-                        const ZSTD_compressionParameters* const cParams,
-                        const BYTE* ip, U32 const mls)
-{
-    U32* const hashTable  = ms->hashTable;
-    const U32 hashLog = cParams->hashLog;
-    U32* const chainTable = ms->chainTable;
-    const U32 chainMask = (1 << cParams->chainLog) - 1;
-    const BYTE* const base = ms->window.base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = ms->nextToUpdate;
-
-    while(idx < target) { /* catch up */
-        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
-        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-        hashTable[h] = idx;
-        idx++;
-    }
-
-    ms->nextToUpdate = target;
-    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
-}
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
 {
@@ -500,11 +466,10 @@
     U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
     U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
     U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
-
     U32 hashIdx;
 
     assert(ms->cParams.chainLog <= 24);
-    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(ms->cParams.hashLog > ms->cParams.chainLog);
     assert(idx != 0);
     assert(tmpMinChain <= minChain);
 
@@ -535,7 +500,7 @@
             if (count == cacheSize) {
                 for (count = 0; count < chainLimit;) {
                     if (i < minChain) {
-                        if (!i || countBeyondMinChain++ > cacheSize) {
+                        if (!i || ++countBeyondMinChain > cacheSize) {
                             /* only allow pulling `cacheSize` number of entries
                              * into the cache or chainTable beyond `minChain`,
                              * to replace the entries pulled out of the
@@ -591,6 +556,139 @@
     ms->nextToUpdate = target;
 }
 
+/* Returns the longest match length found in the dedicated dict search structure.
+ * If none are longer than the argument ml, then ml will be returned.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
+                                            const ZSTD_matchState_t* const dms,
+                                            const BYTE* const ip, const BYTE* const iLimit,
+                                            const BYTE* const prefixStart, const U32 curr,
+                                            const U32 dictLimit, const size_t ddsIdx) {
+    const U32 ddsLowestIndex  = dms->window.dictLimit;
+    const BYTE* const ddsBase = dms->window.base;
+    const BYTE* const ddsEnd  = dms->window.nextSrc;
+    const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+    const U32 ddsIndexDelta   = dictLimit - ddsSize;
+    const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+    const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+    U32 ddsAttempt;
+    U32 matchIndex;
+
+    for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+        PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 const chainIndex = chainPackedPointer >> 8;
+
+        PREFETCH_L1(&dms->chainTable[chainIndex]);
+    }
+
+    for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+        size_t currentMl=0;
+        const BYTE* match;
+        matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+        match = ddsBase + matchIndex;
+
+        if (!matchIndex) {
+            return ml;
+        }
+
+        /* guaranteed by table construction */
+        (void)ddsLowestIndex;
+        assert(matchIndex >= ddsLowestIndex);
+        assert(match+4 <= ddsEnd);
+        if (MEM_read32(match) == MEM_read32(ip)) {
+            /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+            currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) {
+                /* best possible, avoids read overflow on next attempt */
+                return ml;
+            }
+        }
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 chainIndex = chainPackedPointer >> 8;
+        U32 const chainLength = chainPackedPointer & 0xFF;
+        U32 const chainAttempts = nbAttempts - ddsAttempt;
+        U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+        U32 chainAttempt;
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+            PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+        }
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->chainTable[chainIndex];
+            match = ddsBase + matchIndex;
+
+            /* guaranteed by table construction */
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+    return ml;
+}
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
 
 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
@@ -661,90 +759,8 @@
     }
 
     if (dictMode == ZSTD_dedicatedDictSearch) {
-        const U32 ddsLowestIndex  = dms->window.dictLimit;
-        const BYTE* const ddsBase = dms->window.base;
-        const BYTE* const ddsEnd  = dms->window.nextSrc;
-        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
-        const U32 ddsIndexDelta   = dictLimit - ddsSize;
-        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
-        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
-        U32 ddsAttempt;
-
-        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
-            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
-        }
-
-        {
-            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-            U32 const chainIndex = chainPackedPointer >> 8;
-
-            PREFETCH_L1(&dms->chainTable[chainIndex]);
-        }
-
-        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
-            size_t currentMl=0;
-            const BYTE* match;
-            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
-            match = ddsBase + matchIndex;
-
-            if (!matchIndex) {
-                return ml;
-            }
-
-            /* guaranteed by table construction */
-            (void)ddsLowestIndex;
-            assert(matchIndex >= ddsLowestIndex);
-            assert(match+4 <= ddsEnd);
-            if (MEM_read32(match) == MEM_read32(ip)) {
-                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-            }
-
-            /* save best solution */
-            if (currentMl > ml) {
-                ml = currentMl;
-                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                if (ip+currentMl == iLimit) {
-                    /* best possible, avoids read overflow on next attempt */
-                    return ml;
-                }
-            }
-        }
-
-        {
-            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-            U32 chainIndex = chainPackedPointer >> 8;
-            U32 const chainLength = chainPackedPointer & 0xFF;
-            U32 const chainAttempts = nbAttempts - ddsAttempt;
-            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
-            U32 chainAttempt;
-
-            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
-                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
-            }
-
-            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
-                size_t currentMl=0;
-                const BYTE* match;
-                matchIndex = dms->chainTable[chainIndex];
-                match = ddsBase + matchIndex;
-
-                /* guaranteed by table construction */
-                assert(matchIndex >= ddsLowestIndex);
-                assert(match+4 <= ddsEnd);
-                if (MEM_read32(match) == MEM_read32(ip)) {
-                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-                }
-
-                /* save best solution */
-                if (currentMl > ml) {
-                    ml = currentMl;
-                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-                }
-            }
-        }
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
     } else if (dictMode == ZSTD_dictMatchState) {
         const U32* const dmsChainTable = dms->chainTable;
         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
@@ -845,11 +861,657 @@
     }
 }
 
+/* *********************************
+* (SIMD) Row-based matchfinder
+***********************************/
+/* Constants for row-based hash */
+#define ZSTD_ROW_HASH_TAG_OFFSET 1                               /* byte offset of hashes in the match state's tagTable from the beginning of a row */
+#define ZSTD_ROW_HASH_TAG_BITS 8                                 /* nb bits to use for the tag */
+#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
+
+#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
+
+typedef U32 ZSTD_VecMask;   /* Clarifies when we are interacting with a U32 representing a mask of matches */
+
+#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */
+
+#include <emmintrin.h>
+typedef __m128i ZSTD_Vec128;
+
+/* Returns a 128-bit container with 128-bits from src */
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+  return _mm_loadu_si128((ZSTD_Vec128 const*)src);
+}
+
+/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+  return _mm_set1_epi8((char)val);
+}
+
+/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask
+ * into a 32-bit mask that is the MSB of each byte.
+ * */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+  return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
+}
+
+typedef struct {
+  __m128i fst;
+  __m128i snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_read(ptr);
+  v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+  return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_set8(val);
+  v.snd = ZSTD_Vec128_set8(val);
+  return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+  ZSTD_VecMask fstMask;
+  ZSTD_VecMask sndMask;
+  fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+  sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+  return fstMask | (sndMask << 16);
+}
+
+#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */
+
+#include <arm_neon.h>
+typedef uint8x16_t ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+  return vld1q_u8((const BYTE* const)src);
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+  return vdupq_n_u8(val);
+}
+
+/* Mimics '_mm_movemask_epi8()' from SSE */
+static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) {
+    /* Shift out everything but the MSB bits in each byte */
+    uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7));
+    /* Merge the even lanes together with vsra (right shift and add) */
+    uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7));
+    uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
+    uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
+    /* Extract the low 8 bits from each lane, merge */
+    return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8);
+}
+
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+  return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y));
+}
+
+typedef struct {
+    uint8x16_t fst;
+    uint8x16_t snd;
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_read(ptr);
+  v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
+  return v;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+  ZSTD_Vec256 v;
+  v.fst = ZSTD_Vec128_set8(val);
+  v.snd = ZSTD_Vec128_set8(val);
+  return v;
+}
+
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+  ZSTD_VecMask fstMask;
+  ZSTD_VecMask sndMask;
+  fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
+  sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
+  return fstMask | (sndMask << 16);
+}
+
+#else /* Scalar fallback version */
+
+#define VEC128_NB_SIZE_T (16 / sizeof(size_t))
+typedef struct {
+    size_t vec[VEC128_NB_SIZE_T];
+} ZSTD_Vec128;
+
+static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
+    ZSTD_Vec128 ret;
+    ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t));
+    return ret;
+}
+
+static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
+    ZSTD_Vec128 ret = { {0} };
+    int startBit = sizeof(size_t) * 8 - 8;
+    for (;startBit >= 0; startBit -= 8) {
+        unsigned j = 0;
+        for (;j < VEC128_NB_SIZE_T; ++j) {
+            ret.vec[j] |= ((size_t)val << startBit);
+        }
+    }
+    return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
+    ZSTD_VecMask res = 0;
+    unsigned i = 0;
+    unsigned l = 0;
+    for (; i < VEC128_NB_SIZE_T; ++i) {
+        const size_t cmp1 = x.vec[i];
+        const size_t cmp2 = y.vec[i];
+        unsigned j = 0;
+        for (; j < sizeof(size_t); ++j, ++l) {
+            if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+                res |= ((U32)1 << (j+i*sizeof(size_t)));
+            }
+        }
+    }
+    return res;
+}
+
+#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T
+typedef struct {
+    size_t vec[VEC256_NB_SIZE_T];
+} ZSTD_Vec256;
+
+static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) {
+    ZSTD_Vec256 ret;
+    ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t));
+    return ret;
+}
+
+static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
+    ZSTD_Vec256 ret = { {0} };
+    int startBit = sizeof(size_t) * 8 - 8;
+    for (;startBit >= 0; startBit -= 8) {
+        unsigned j = 0;
+        for (;j < VEC256_NB_SIZE_T; ++j) {
+            ret.vec[j] |= ((size_t)val << startBit);
+        }
+    }
+    return ret;
+}
+
+/* Compare x to y, byte by byte, generating a "matches" bitfield */
+static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
+    ZSTD_VecMask res = 0;
+    unsigned i = 0;
+    unsigned l = 0;
+    for (; i < VEC256_NB_SIZE_T; ++i) {
+        const size_t cmp1 = x.vec[i];
+        const size_t cmp2 = y.vec[i];
+        unsigned j = 0;
+        for (; j < sizeof(size_t); ++j, ++l) {
+            if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
+                res |= ((U32)1 << (j+i*sizeof(size_t)));
+            }
+        }
+    }
+    return res;
+}
+
+#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */
+
+/* ZSTD_VecMask_next():
+ * Starting from the LSB, returns the idx of the next non-zero bit.
+ * Basically counting the nb of trailing zeroes.
+ */
+static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    return _BitScanForward(&r, val) ? (U32)r : 0;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)
+    return (U32)__builtin_ctz(val);
+#   else
+    /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */
+    static const U32 multiplyDeBruijnBitPosition[32] =
+    {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+	return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27];
+#   endif
+}
+
+/* ZSTD_VecMask_rotateRight():
+ * Rotates a bitfield to the right by "rotation" bits.
+ * If the rotation is greater than totalBits, the returned mask is 0.
+ */
+FORCE_INLINE_TEMPLATE ZSTD_VecMask
+ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) {
+  if (rotation == 0)
+    return mask;
+  switch (totalBits) {
+    default:
+      assert(0);
+    case 16:
+      return (mask >> rotation) | (U16)(mask << (16 - rotation));
+    case 32:
+      return (mask >> rotation) | (U32)(mask << (32 - rotation));
+  }
+}
+
+/* ZSTD_row_nextIndex():
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
+ * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
+  U32 const next = (*tagRow - 1) & rowMask;
+  *tagRow = (BYTE)next;
+  return next;
+}
+
+/* ZSTD_isAligned():
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
+ */
+MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
+    assert((align & (align - 1)) == 0);
+    return (((size_t)ptr) & (align - 1)) == 0;
+}
+
+/* ZSTD_row_prefetch():
+ * Performs prefetching for the hashTable and tagTable at a given row.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+    PREFETCH_L1(hashTable + relRow);
+    if (rowLog == 5) {
+        PREFETCH_L1(hashTable + relRow + 16);
+    }
+    PREFETCH_L1(tagTable + relRow);
+    assert(rowLog == 4 || rowLog == 5);
+    assert(ZSTD_isAligned(hashTable + relRow, 64));                 /* prefetched hash row always 64-byte aligned */
+    assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */
+}
+
+/* ZSTD_row_fillHashCache():
+ * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
+ * but not beyond iLimit.
+ */
+static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+                                   U32 const rowLog, U32 const mls,
+                                   U32 idx, const BYTE* const iLimit)
+{
+    U32 const* const hashTable = ms->hashTable;
+    U16 const* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
+    U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
+
+    for (; idx < lim; ++idx) {
+        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+        ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
+    }
+
+    DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
+                                                     ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
+                                                     ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
+}
+
+/* ZSTD_row_nextCachedHash():
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+                                                  U16 const* tagTable, BYTE const* base,
+                                                  U32 idx, U32 const hashLog,
+                                                  U32 const rowLog, U32 const mls)
+{
+    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+    U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+    ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+    {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
+        cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
+        return hash;
+    }
+}
+
+/* ZSTD_row_update_internal():
+ * Inserts the byte at ip into the appropriate position in the hash table.
+ * Determines the relative row, and the position within the {16, 32} entry row to insert at.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+                                                    U32 const mls, U32 const rowLog,
+                                                    U32 const rowMask, U32 const useCache)
+{
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target);
+    for (; idx < target; ++idx) {
+        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls)
+                                  : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
+                                                       Explicit cast allows us to get exact desired position within each row */
+        U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+
+        assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
+        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+        row[pos] = idx;
+    }
+    ms->nextToUpdate = target;
+}
+
+/* ZSTD_row_update():
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
+ * processing.
+ */
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
+    const U32 rowMask = (1u << rowLog) - 1;
+    const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
+
+    DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
+}
+
+/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
+ * the hash at the nth position in a row of the tagTable.
+ */
+FORCE_INLINE_TEMPLATE
+ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) {
+    ZSTD_VecMask matches = 0;
+    if (rowEntries == 16) {
+        ZSTD_Vec128 hashes        = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+        ZSTD_Vec128 expandedTags  = ZSTD_Vec128_set8(tag);
+        matches                   = ZSTD_Vec128_cmpMask8(hashes, expandedTags);
+    } else if (rowEntries == 32) {
+        ZSTD_Vec256 hashes        = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
+        ZSTD_Vec256 expandedTags  = ZSTD_Vec256_set8(tag);
+        matches                   = ZSTD_Vec256_cmpMask8(hashes, expandedTags);
+    } else {
+        assert(0);
+    }
+    /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
+        to match up with the actual layout of the entries within the hashTable */
+    return ZSTD_VecMask_rotateRight(matches, head, rowEntries);
+}
+
+/* The high-level approach of the SIMD row based match finder is as follows:
+ * - Figure out where to insert the new entry:
+ *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
+ *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ *        which row to insert into.
+ *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
+ *        be considered as a circular buffer with a "head" index that resides in the tagTable.
+ *      - Also insert the "tag" into the equivalent row and position in the tagTable.
+ *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
+ *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
+ *                  for alignment/performance reasons, leaving some bytes unused.
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ *   generate a bitfield that we can cycle through to check the collisions in the hash table.
+ * - Pick the longest match.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_RowFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode,
+                        const U32 rowLog)
+{
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32* const hashCache = ms->hashCache;
+    const U32 hashLog = ms->rowHashLog;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 rowEntries = (1U << rowLog);
+    const U32 rowMask = rowEntries - 1;
+    const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
+    U32 nbAttempts = 1U << cappedSearchLog;
+    size_t ml=4-1;
+
+    /* DMS/DDS variables that may be referenced laster */
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    size_t ddsIdx;
+    U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
+    U32 dmsTag;
+    U32* dmsRow;
+    BYTE* dmsTagRow;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+        {   /* Prefetch DDS hashtable entry */
+            ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
+            PREFETCH_L1(&dms->hashTable[ddsIdx]);
+        }
+        ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
+    }
+
+    if (dictMode == ZSTD_dictMatchState) {
+        /* Prefetch DMS rows */
+        U32* const dmsHashTable = dms->hashTable;
+        U16* const dmsTagTable = dms->tagTable;
+        U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
+        dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
+        dmsRow = dmsHashTable + dmsRelRow;
+        ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
+    }
+
+    /* Update the hashTable and tagTable up to (but not including) ip */
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+    {   /* Get the hash for ip, compute the appropriate row */
+        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);
+        U32 const head = *tagRow & rowMask;
+        U32 matchBuffer[32 /* maximum nb entries per row */];
+        size_t numMatches = 0;
+        size_t currMatch = 0;
+        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
+
+        /* Cycle through the matches and prefetch */
+        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+            U32 const matchIndex = row[matchPos];
+            assert(numMatches < rowEntries);
+            if (matchIndex < lowLimit)
+                break;
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                PREFETCH_L1(base + matchIndex);
+            } else {
+                PREFETCH_L1(dictBase + matchIndex);
+            }
+            matchBuffer[numMatches++] = matchIndex;
+        }
+
+        /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
+           in ZSTD_row_update_internal() at the next search. */
+        {
+            U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+            row[pos] = ms->nextToUpdate++;
+        }
+
+        /* Return the longest match */
+        for (; currMatch < numMatches; ++currMatch) {
+            U32 const matchIndex = matchBuffer[currMatch];
+            size_t currentMl=0;
+            assert(matchIndex < curr);
+            assert(matchIndex >= lowLimit);
+
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                const BYTE* const match = base + matchIndex;
+                assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+                if (match[ml] == ip[ml])   /* potentially better */
+                    currentMl = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex;
+                assert(match+4 <= dictEnd);
+                if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+            }
+
+            /* Save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
+    } else if (dictMode == ZSTD_dictMatchState) {
+        /* TODO: Measure and potentially add prefetching to DMS */
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+
+        {   U32 const head = *dmsTagRow & rowMask;
+            U32 matchBuffer[32 /* maximum nb row entries */];
+            size_t numMatches = 0;
+            size_t currMatch = 0;
+            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
+
+            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+                U32 const matchIndex = dmsRow[matchPos];
+                if (matchIndex < dmsLowestIndex)
+                    break;
+                PREFETCH_L1(dmsBase + matchIndex);
+                matchBuffer[numMatches++] = matchIndex;
+            }
+
+            /* Return the longest match */
+            for (; currMatch < numMatches; ++currMatch) {
+                U32 const matchIndex = matchBuffer[currMatch];
+                size_t currentMl=0;
+                assert(matchIndex >= dmsLowestIndex);
+                assert(matchIndex < curr);
+
+                {   const BYTE* const match = dmsBase + matchIndex;
+                    assert(match+4 <= dmsEnd);
+                    if (MEM_read32(match) == MEM_read32(ip))
+                        currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+                }
+
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break;
+                }
+            }
+        }
+    }
+    return ml;
+}
+
+/* Inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog);
+    case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog);
+    case 7 :
+    case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
+    switch(cappedSearchLog)
+    {
+    default :
+    case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4);
+    case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5);
+    }
+}
+
 
 /* *******************************
 *  Common parser - lazy strategy
 *********************************/
-typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
 
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_compressBlock_lazy_generic(
@@ -863,10 +1525,11 @@
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
     const BYTE* const base = ms->window.base;
     const U32 prefixLowestIndex = ms->window.dictLimit;
     const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
 
     typedef size_t (*searchMax_f)(
                         ZSTD_matchState_t* ms,
@@ -878,26 +1541,30 @@
      * that should never occur (extDict modes go to the other implementation
      * below and there is no DDSS for binary tree search yet).
      */
-    const searchMax_f searchFuncs[4][2] = {
+    const searchMax_f searchFuncs[4][3] = {
         {
             ZSTD_HcFindBestMatch_selectMLS,
-            ZSTD_BtFindBestMatch_selectMLS
+            ZSTD_BtFindBestMatch_selectMLS,
+            ZSTD_RowFindBestMatch_selectRowLog
         },
         {
             NULL,
+            NULL,
             NULL
         },
         {
             ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
-            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_RowFindBestMatch_dictMatchState_selectRowLog
         },
         {
             ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
-            NULL
+            NULL,
+            ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog
         }
     };
 
-    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
+    searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod];
     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
 
     const int isDMS = dictMode == ZSTD_dictMatchState;
@@ -915,9 +1582,7 @@
 
     assert(searchMax != NULL);
 
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
-
-    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
     ip += (dictAndPrefixLength == 0);
     if (dictMode == ZSTD_noDict) {
         U32 const curr = (U32)(ip - base);
@@ -933,6 +1598,12 @@
         assert(offset_2 <= dictAndPrefixLength);
     }
 
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                            ms->nextToUpdate, ilimit);
+    }
+
     /* Match Loop */
 #if defined(__GNUC__) && defined(__x86_64__)
     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
@@ -1198,6 +1869,70 @@
     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
 }
 
+/* Row-based matchfinder */
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+}
 
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_lazy_extDict_generic(
@@ -1210,7 +1945,7 @@
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
     const BYTE* const base = ms->window.base;
     const U32 dictLimit = ms->window.dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
@@ -1218,18 +1953,28 @@
     const BYTE* const dictEnd  = dictBase + dictLimit;
     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
     const U32 windowLog = ms->cParams.windowLog;
+    const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
 
     typedef size_t (*searchMax_f)(
                         ZSTD_matchState_t* ms,
                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
-
+    const searchMax_f searchFuncs[3] = {
+        ZSTD_HcFindBestMatch_extDict_selectMLS,
+        ZSTD_BtFindBestMatch_extDict_selectMLS,
+        ZSTD_RowFindBestMatch_extDict_selectRowLog
+    };
+    searchMax_f searchMax = searchFuncs[(int)searchMethod];
     U32 offset_1 = rep[0], offset_2 = rep[1];
 
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
 
     /* init */
     ip += (ip == prefixStart);
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                               ms->nextToUpdate, ilimit);
+    }
 
     /* Match Loop */
 #if defined(__GNUC__) && defined(__x86_64__)
@@ -1249,7 +1994,8 @@
             const U32 repIndex = (U32)(curr+1 - offset_1);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
+               & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1280,7 +2026,8 @@
                 const U32 repIndex = (U32)(curr - offset_1);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
-                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                   & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
                     /* repcode detected */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1311,7 +2058,8 @@
                     const U32 repIndex = (U32)(curr - offset_1);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
-                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                       & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1357,7 +2105,8 @@
             const U32 repIndex = repCurrent - offset_2;
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+               & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
             if (MEM_read32(ip) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1410,3 +2159,26 @@
 {
     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
 }
+
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+}
diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h
index 87a3971..150f7b3 100644
--- a/lib/compress/zstd_lazy.h
+++ b/lib/compress/zstd_lazy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,6 +26,7 @@
 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
 
 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
 
@@ -43,6 +44,15 @@
 size_t ZSTD_compressBlock_greedy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -56,6 +66,15 @@
 size_t ZSTD_compressBlock_greedy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -66,6 +85,15 @@
 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_greedy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -76,9 +104,19 @@
 size_t ZSTD_compressBlock_lazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+        
 
 #if defined (__cplusplus)
 }
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index fb4b8a0..fa4ebea 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -57,6 +57,33 @@
     }
 }
 
+/** ZSTD_ldm_gear_reset()
+ * Feeds [data, data + minMatchLength) into the hash without registering any
+ * splits. This effectively resets the hash state. This is used when skipping
+ * over data, either at the beginning of a block, or skipping sections.
+ */
+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
+                                BYTE const* data, size_t minMatchLength)
+{
+    U64 hash = state->rolling;
+    size_t n = 0;
+
+#define GEAR_ITER_ONCE() do {                                  \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1;                                                \
+    } while (0)
+    while (n + 3 < minMatchLength) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < minMatchLength) {
+        GEAR_ITER_ONCE();
+    }
+#undef GEAR_ITER_ONCE
+}
+
 /** ZSTD_ldm_gear_feed():
  *
  * Registers in the splits array all the split points found in the first
@@ -255,7 +282,7 @@
     while (ip < iend) {
         size_t hashed;
         unsigned n;
-        
+
         numSplits = 0;
         hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
 
@@ -327,16 +354,8 @@
 
     /* Initialize the rolling hash state with the first minMatchLength bytes */
     ZSTD_ldm_gear_init(&hashState, params);
-    {
-        size_t n = 0;
-
-        while (n < minMatchLength) {
-            numSplits = 0;
-            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
-                                    splits, &numSplits);
-        }
-        ip += minMatchLength;
-    }
+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
+    ip += minMatchLength;
 
     while (ip < ilimit) {
         size_t hashed;
@@ -361,6 +380,7 @@
         for (n = 0; n < numSplits; n++) {
             size_t forwardMatchLength = 0, backwardMatchLength = 0,
                    bestMatchLength = 0, mLength;
+            U32 offset;
             BYTE const* const split = candidates[n].split;
             U32 const checksum = candidates[n].checksum;
             U32 const hash = candidates[n].hash;
@@ -428,9 +448,9 @@
             }
 
             /* Match found */
+            offset = (U32)(split - base) - bestEntry->offset;
             mLength = forwardMatchLength + backwardMatchLength;
             {
-                U32 const offset = (U32)(split - base) - bestEntry->offset;
                 rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
 
                 /* Out of sequence storage */
@@ -447,6 +467,21 @@
             ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
 
             anchor = split + forwardMatchLength;
+
+            /* If we find a match that ends after the data that we've hashed
+             * then we have a repeating, overlapping, pattern. E.g. all zeros.
+             * If one repetition of the pattern matches our `stopMask` then all
+             * repetitions will. We don't need to insert them all into out table,
+             * only the first one. So skip over overlapping matches.
+             * This is a major speed boost (20x) for compressing a single byte
+             * repeated, when that byte ends up in the table.
+             */
+            if (anchor > ip + hashed) {
+                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
+                /* Continue the outter loop at anchor (ip + hashed == anchor). */
+                ip = anchor - hashed;
+                break;
+            }
         }
 
         ip += hashed;
@@ -500,7 +535,7 @@
 
         assert(chunkStart < iend);
         /* 1. Perform overflow correction if necessary. */
-        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
             U32 const ldmHSize = 1U << params->hashLog;
             U32 const correction = ZSTD_window_correctOverflow(
                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@@ -622,12 +657,13 @@
 
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
     void const* src, size_t srcSize)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     unsigned const minMatch = cParams->minMatch;
     ZSTD_blockCompressor const blockCompressor =
-        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
     /* Input bounds */
     BYTE const* const istart = (BYTE const*)src;
     BYTE const* const iend = istart + srcSize;
diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h
index e9137f0..393466f 100644
--- a/lib/compress/zstd_ldm.h
+++ b/lib/compress/zstd_ldm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -66,6 +66,7 @@
  */
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
             void const* src, size_t srcSize);
 
 /**
diff --git a/lib/compress/zstd_ldm_geartab.h b/lib/compress/zstd_ldm_geartab.h
index d24c1f6..e5c24d8 100644
--- a/lib/compress/zstd_ldm_geartab.h
+++ b/lib/compress/zstd_ldm_geartab.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 6ec368b..402a7e5 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index bf31ccc..627255f 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index e28e8bb..22aa3e1 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -512,7 +512,7 @@
         if (dictSize > 0) {
             if (dictContentType == ZSTD_dct_rawContent) {
                 BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
-                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
+                ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
                 ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
                 serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
             } else {
@@ -569,7 +569,7 @@
             assert(seqStore.seq != NULL && seqStore.pos == 0 &&
                    seqStore.size == 0 && seqStore.capacity > 0);
             assert(src.size <= serialState->params.jobSize);
-            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+            ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
             error = ZSTD_ldm_generateSequences(
                 &serialState->ldmState, &seqStore,
                 &serialState->params.ldmParams, src.start, src.size);
@@ -695,6 +695,10 @@
         {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
             if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
         }
+        if (!job->firstJob) {
+            size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
+            if (ZSTD_isError(err)) JOB_ERROR(err);
+        }
         {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
                                         job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                         ZSTD_dtlm_fast,
@@ -750,6 +754,12 @@
             if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
             lastCBlockSize = cSize;
     }   }
+    if (!job->firstJob) {
+        /* Double check that we don't have an ext-dict, because then our
+         * repcode invalidation doesn't work.
+         */
+        assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+    }
     ZSTD_CCtx_trace(cctx, 0);
 
 _endJob:
@@ -1240,9 +1250,8 @@
 
     if (params.rsyncable) {
         /* Aim for the targetsectionSize as the average job size. */
-        U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
-        U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
-        assert(jobSizeMB >= 1);
+        U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
+        U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
         DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
         mtctx->rsync.hash = 0;
         mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index c69031b..2fee2ec 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -32,11 +32,11 @@
 
 
 /* ===   Constants   === */
-#ifndef ZSTDMT_NBWORKERS_MAX
-#  define ZSTDMT_NBWORKERS_MAX 200
+#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
+#  define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
 #endif
-#ifndef ZSTDMT_JOBSIZE_MIN
-#  define ZSTDMT_JOBSIZE_MIN (1 MB)
+#ifndef ZSTDMT_JOBSIZE_MIN   /* a different value can be selected at compile time */
+#  define ZSTDMT_JOBSIZE_MIN (512 KB)
 #endif
 #define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index 7699c92..b93c9a0 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -1,7 +1,7 @@
 /* ******************************************************************
  * huff0 huffman decoder,
  * part of Finite State Entropy library
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  *
  *  You can contact the author at :
  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -528,13 +528,15 @@
 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
                            const U32* rankValOrigin, const int minWeight,
                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
-                           U32 nbBitsBaseline, U16 baseSeq)
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
 {
     HUF_DEltX2 DElt;
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    U32* rankVal = wksp;
 
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
     /* get pre-calculated rankVal */
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
 
     /* fill skipped values */
     if (minWeight>1) {
@@ -569,14 +571,18 @@
 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
-                           const U32 nbBitsBaseline)
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
 {
-    U32 rankVal[HUF_TABLELOG_MAX + 1];
+    U32* rankVal = wksp;
     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
     const U32 minBits  = nbBitsBaseline - maxWeight;
     U32 s;
 
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
 
     /* fill DTable */
     for (s=0; s<sortedListSize; s++) {
@@ -594,7 +600,7 @@
             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
                            rankValOrigin[nbBits], minWeight,
                            sortedList+sortedRank, sortedListSize-sortedRank,
-                           nbBitsBaseline, symbol);
+                           nbBitsBaseline, symbol, wksp, wkspSize);
         } else {
             HUF_DEltX2 DElt;
             MEM_writeLE16(&(DElt.sequence), symbol);
@@ -608,6 +614,15 @@
     }
 }
 
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
                        const void* src, size_t srcSize,
                              void* workSpace, size_t wkspSize)
@@ -620,47 +635,32 @@
     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
     U32 *rankStart;
 
-    rankValCol_t* rankVal;
-    U32* rankStats;
-    U32* rankStart0;
-    sortedSymbol_t* sortedSymbol;
-    BYTE* weightList;
-    size_t spaceUsed32 = 0;
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
 
-    rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-    rankStats = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 1;
-    rankStart0 = (U32 *)workSpace + spaceUsed32;
-    spaceUsed32 += HUF_TABLELOG_MAX + 2;
-    sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
-    spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-    weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
-    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
 
-    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
-
-    rankStart = rankStart0 + 1;
-    ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
 
     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
 
-    iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
     if (HUF_isError(iSize)) return iSize;
 
     /* check result */
     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
 
     /* Get start index of each weight */
     {   U32 w, nextRankStart = 0;
         for (w=1; w<maxW+1; w++) {
             U32 curr = nextRankStart;
-            nextRankStart += rankStats[w];
+            nextRankStart += wksp->rankStats[w];
             rankStart[w] = curr;
         }
         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
@@ -670,37 +670,38 @@
     /* sort symbols by weight */
     {   U32 s;
         for (s=0; s<nbSymbols; s++) {
-            U32 const w = weightList[s];
+            U32 const w = wksp->weightList[s];
             U32 const r = rankStart[w]++;
-            sortedSymbol[r].symbol = (BYTE)s;
-            sortedSymbol[r].weight = (BYTE)w;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
         }
         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
     }
 
     /* Build rankVal */
-    {   U32* const rankVal0 = rankVal[0];
+    {   U32* const rankVal0 = wksp->rankVal[0];
         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
             U32 nextRankVal = 0;
             U32 w;
             for (w=1; w<maxW+1; w++) {
                 U32 curr = nextRankVal;
-                nextRankVal += rankStats[w] << (w+rescale);
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
                 rankVal0[w] = curr;
         }   }
         {   U32 const minBits = tableLog+1 - maxW;
             U32 consumed;
             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-                U32* const rankValPtr = rankVal[consumed];
+                U32* const rankValPtr = wksp->rankVal[consumed];
                 U32 w;
                 for (w = 1; w < maxW+1; w++) {
                     rankValPtr[w] = rankVal0[w] >> consumed;
     }   }   }   }
 
     HUF_fillDTableX2(dt, maxTableLog,
-                   sortedSymbol, sizeOfSort,
-                   rankStart0, rankVal, maxW,
-                   tableLog+1);
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
 
     dtd.tableLog = (BYTE)maxTableLog;
     dtd.tableType = 1;
@@ -1225,7 +1226,7 @@
     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
     return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
 }
-#endif 
+#endif
 
 #ifndef HUF_FORCE_DECOMPRESS_X1
 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c
index 443fe6b..ce33547 100644
--- a/lib/decompress/zstd_ddict.c
+++ b/lib/decompress/zstd_ddict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/decompress/zstd_ddict.h b/lib/decompress/zstd_ddict.h
index 2f17445..bd03268 100644
--- a/lib/decompress/zstd_ddict.h
+++ b/lib/decompress/zstd_ddict.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 1513950..910bc03 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -58,7 +58,6 @@
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
 #include "../common/cpu.h"         /* bmi2 */
 #include "../common/mem.h"         /* low level memory routines */
-#include "../common/zstd_trace.h"
 #define FSE_STATIC_LINKING_ONLY
 #include "../common/fse.h"
 #define HUF_STATIC_LINKING_ONLY
@@ -789,7 +788,7 @@
 static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
 {
 #if ZSTD_TRACE
-    if (dctx->traceCtx) {
+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) {
         ZSTD_Trace trace;
         ZSTD_memset(&trace, 0, sizeof(trace));
         trace.version = ZSTD_VERSION_NUMBER;
@@ -1384,7 +1383,7 @@
 {
     assert(dctx != NULL);
 #if ZSTD_TRACE
-    dctx->traceCtx = ZSTD_trace_decompress_begin(dctx);
+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
 #endif
     dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
     dctx->stage = ZSTDds_getFrameHeaderSize;
diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c
index b71bc20..349dcdc 100644
--- a/lib/decompress/zstd_decompress_block.c
+++ b/lib/decompress/zstd_decompress_block.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -658,7 +658,6 @@
     size_t litLength;
     size_t matchLength;
     size_t offset;
-    const BYTE* match;
 } seq_t;
 
 typedef struct {
@@ -672,9 +671,6 @@
     ZSTD_fseState stateOffb;
     ZSTD_fseState stateML;
     size_t prevOffset[ZSTD_REP_NUM];
-    const BYTE* prefixStart;
-    const BYTE* dictEnd;
-    size_t pos;
 } seqState_t;
 
 /*! ZSTD_overlapCopy8() :
@@ -936,10 +932,9 @@
         : 0)
 
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
 
 FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
     seq_t seq;
     ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
@@ -1014,14 +1009,6 @@
     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
 
-    if (prefetch == ZSTD_p_prefetch) {
-        size_t const pos = seqState->pos + seq.litLength;
-        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
-        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
-        seqState->pos = pos + seq.matchLength;
-    }
-
     /* ANS state update
      * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
      * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
@@ -1122,7 +1109,6 @@
     /* Regen sequences */
     if (nbSeq) {
         seqState_t seqState;
-        size_t error = 0;
         dctx->fseEntropy = 1;
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         RETURN_ERROR_IF(
@@ -1156,13 +1142,14 @@
          * If you see most cycles served out of the DSB you've hit the good case.
          * If it is pretty even then you may be in an okay case.
          *
-         * I've been able to reproduce this issue on the following CPUs:
+         * This issue has been reproduced on the following CPUs:
          *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
          *               Use Instruments->Counters to get DSB/MITE cycles.
          *               I never got performance swings, but I was able to
          *               go from the good case of mostly DSB to half of the
          *               cycles served from MITE.
          *   - Coffeelake: Intel i9-9900k
+         *   - Coffeelake: Intel i7-9700k
          *
          * I haven't been able to reproduce the instability or DSB misses on any
          * of the following CPUS:
@@ -1175,33 +1162,35 @@
          *
          *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
          */
+        __asm__(".p2align 6");
+        __asm__("nop");
         __asm__(".p2align 5");
         __asm__("nop");
+#  if __GNUC__ >= 9
+        /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
+        __asm__(".p2align 3");
+#  else
         __asm__(".p2align 4");
+#  endif
 #endif
         for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
             assert(!ZSTD_isError(oneSeqSize));
             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-            BIT_reloadDStream(&(seqState.DStream));
             op += oneSeqSize;
-            /* gcc and clang both don't like early returns in this loop.
-             * Instead break and check for an error at the end of the loop.
-             */
-            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
-                error = oneSeqSize;
+            if (UNLIKELY(!--nbSeq))
                 break;
-            }
-            if (UNLIKELY(!--nbSeq)) break;
+            BIT_reloadDStream(&(seqState.DStream));
         }
 
         /* check if reached exact end */
         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        if (ZSTD_isError(error)) return error;
         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
         /* save reps for next block */
@@ -1232,6 +1221,24 @@
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+    prefetchPos += sequence.litLength;
+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_decompressSequencesLong_body(
                                ZSTD_DCtx* dctx,
@@ -1254,18 +1261,17 @@
 
     /* Regen sequences */
     if (nbSeq) {
-#define STORED_SEQS 4
+#define STORED_SEQS 8
 #define STORED_SEQS_MASK (STORED_SEQS-1)
-#define ADVANCED_SEQS 4
+#define ADVANCED_SEQS STORED_SEQS
         seq_t sequences[STORED_SEQS];
         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
         seqState_t seqState;
         int seqNb;
+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
         dctx->fseEntropy = 1;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        seqState.prefixStart = prefixStart;
-        seqState.pos = (size_t)(op-prefixStart);
-        seqState.dictEnd = dictEnd;
         assert(dst != NULL);
         assert(iend >= ip);
         RETURN_ERROR_IF(
@@ -1277,21 +1283,23 @@
 
         /* prepare in advance */
         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
-            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb] = sequence;
         }
         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
 
         /* decode and decompress */
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
             assert(!ZSTD_isError(oneSeqSize));
             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
             sequences[seqNb & STORED_SEQS_MASK] = sequence;
             op += oneSeqSize;
         }
diff --git a/lib/decompress/zstd_decompress_block.h b/lib/decompress/zstd_decompress_block.h
index 891cf98..049a0cd 100644
--- a/lib/decompress/zstd_decompress_block.h
+++ b/lib/decompress/zstd_decompress_block.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/decompress/zstd_decompress_internal.h b/lib/decompress/zstd_decompress_internal.h
index 3fcec6c..ebda0c9 100644
--- a/lib/decompress/zstd_decompress_internal.h
+++ b/lib/decompress/zstd_decompress_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,7 +21,6 @@
  *********************************************************/
 #include "../common/mem.h"             /* BYTE, U16, U32 */
 #include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
-#include "../common/zstd_trace.h"      /* ZSTD_TraceCtx */
 
 
 
diff --git a/lib/deprecated/zbuff.h b/lib/deprecated/zbuff.h
index ed98b46..b83ea0f 100644
--- a/lib/deprecated/zbuff.h
+++ b/lib/deprecated/zbuff.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/deprecated/zbuff_common.c b/lib/deprecated/zbuff_common.c
index cb370cb..e7d01a0 100644
--- a/lib/deprecated/zbuff_common.c
+++ b/lib/deprecated/zbuff_common.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/deprecated/zbuff_compress.c b/lib/deprecated/zbuff_compress.c
index 972afd8..2e72267 100644
--- a/lib/deprecated/zbuff_compress.c
+++ b/lib/deprecated/zbuff_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/deprecated/zbuff_decompress.c b/lib/deprecated/zbuff_decompress.c
index baf1829..d73c0f3 100644
--- a/lib/deprecated/zbuff_decompress.c
+++ b/lib/deprecated/zbuff_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
index f069b63..8364444 100644
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,15 +26,16 @@
 #include <string.h> /* memset */
 #include <time.h>   /* clock */
 
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include "../common/mem.h" /* read */
 #include "../common/pool.h"
 #include "../common/threading.h"
-#include "cover.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"
 
 /*-*************************************
 *  Constants
diff --git a/lib/dictBuilder/cover.h b/lib/dictBuilder/cover.h
index 7bbeaac..1aacddd 100644
--- a/lib/dictBuilder/cover.h
+++ b/lib/dictBuilder/cover.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,6 +8,10 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include <stdio.h>  /* fprintf */
 #include <stdlib.h> /* malloc, free, qsort */
 #include <string.h> /* memset */
@@ -16,10 +20,7 @@
 #include "../common/pool.h"
 #include "../common/threading.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
 
 /**
  * COVER_best_t is used for two purposes:
diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c
index 5f880e4..ed789f9 100644
--- a/lib/dictBuilder/fastcover.c
+++ b/lib/dictBuilder/fastcover.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,16 +16,17 @@
 #include <string.h> /* memset */
 #include <time.h>   /* clock */
 
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#  define ZDICT_STATIC_LINKING_ONLY
+#endif
+
 #include "../common/mem.h" /* read */
 #include "../common/pool.h"
 #include "../common/threading.h"
-#include "cover.h"
 #include "../common/zstd_internal.h" /* includes zstd.h */
 #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
+#include "../zdict.h"
+#include "cover.h"
 
 
 /*-*************************************
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 4df5a94..459cbe4 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -41,18 +41,19 @@
 #include <stdio.h>         /* fprintf, fopen, ftello64 */
 #include <time.h>          /* clock */
 
-#include "../common/mem.h"           /* read */
-#include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
-#define HUF_STATIC_LINKING_ONLY
-#include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
-#include "../common/zstd_internal.h" /* includes zstd.h */
-#include "../common/xxhash.h"        /* XXH64 */
-#include "divsufsort.h"
 #ifndef ZDICT_STATIC_LINKING_ONLY
 #  define ZDICT_STATIC_LINKING_ONLY
 #endif
-#include "zdict.h"
+#define HUF_STATIC_LINKING_ONLY
+
+#include "../common/mem.h"           /* read */
+#include "../common/fse.h"           /* FSE_normalizeCount, FSE_writeNCount */
+#include "../common/huf.h"           /* HUF_buildCTable, HUF_writeCTable */
+#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/xxhash.h"        /* XXH64 */
 #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
+#include "../zdict.h"
+#include "divsufsort.h"
 
 
 /*-*************************************
diff --git a/lib/dll/example/Makefile b/lib/dll/example/Makefile
index a1cc189..03b034d 100644
--- a/lib/dll/example/Makefile
+++ b/lib/dll/example/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_legacy.h b/lib/legacy/zstd_legacy.h
index 3f21d22..a6f1174 100644
--- a/lib/legacy/zstd_legacy.h
+++ b/lib/legacy/zstd_legacy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v01.c b/lib/legacy/zstd_v01.c
index e2b93ed..7ab5547 100644
--- a/lib/legacy/zstd_v01.c
+++ b/lib/legacy/zstd_v01.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v01.h b/lib/legacy/zstd_v01.h
index f85d65d..f777eb6 100644
--- a/lib/legacy/zstd_v01.h
+++ b/lib/legacy/zstd_v01.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 0368cb6..89fdc71 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v02.h b/lib/legacy/zstd_v02.h
index 4dc2c7f..1b37195 100644
--- a/lib/legacy/zstd_v02.h
+++ b/lib/legacy/zstd_v02.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index e2a35b4..5262d51 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v03.h b/lib/legacy/zstd_v03.h
index 89fa3b1..7a00d43 100644
--- a/lib/legacy/zstd_v03.h
+++ b/lib/legacy/zstd_v03.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 9c73d4a..bee1b99 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v04.h b/lib/legacy/zstd_v04.h
index 8d93215..66b97ab 100644
--- a/lib/legacy/zstd_v04.h
+++ b/lib/legacy/zstd_v04.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 6027a7b..eb8966b 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v05.h b/lib/legacy/zstd_v05.h
index 074ff1b..bd423bf 100644
--- a/lib/legacy/zstd_v05.h
+++ b/lib/legacy/zstd_v05.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index 76fc838..fcb16d4 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v06.h b/lib/legacy/zstd_v06.h
index 1fa8f9d..9e32b76 100644
--- a/lib/legacy/zstd_v06.h
+++ b/lib/legacy/zstd_v06.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index 1239d81..0d0e466 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/legacy/zstd_v07.h b/lib/legacy/zstd_v07.h
index 3982a04..bc35cfa 100644
--- a/lib/legacy/zstd_v07.h
+++ b/lib/legacy/zstd_v07.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/dictBuilder/zdict.h b/lib/zdict.h
similarity index 71%
rename from lib/dictBuilder/zdict.h
rename to lib/zdict.h
index 190ffa5..75b05db 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/zdict.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,145 @@
 #  define ZDICTLIB_API ZDICTLIB_VISIBILITY
 #endif
 
+/*******************************************************************************
+ * Zstd dictionary builder
+ *
+ * FAQ
+ * ===
+ * Why should I use a dictionary?
+ * ------------------------------
+ *
+ * Zstd can use dictionaries to improve compression ratio of small data.
+ * Traditionally small files don't compress well because there is very little
+ * repetion in a single sample, since it is small. But, if you are compressing
+ * many similar files, like a bunch of JSON records that share the same
+ * structure, you can train a dictionary on ahead of time on some samples of
+ * these files. Then, zstd can use the dictionary to find repetitions that are
+ * present across samples. This can vastly improve compression ratio.
+ *
+ * When is a dictionary useful?
+ * ----------------------------
+ *
+ * Dictionaries are useful when compressing many small files that are similar.
+ * The larger a file is, the less benefit a dictionary will have. Generally,
+ * we don't expect dictionary compression to be effective past 100KB. And the
+ * smaller a file is, the more we would expect the dictionary to help.
+ *
+ * How do I use a dictionary?
+ * --------------------------
+ *
+ * Simply pass the dictionary to the zstd compressor with
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
+ * more advanced functions that allow selecting some options, see zstd.h for
+ * complete documentation.
+ *
+ * What is a zstd dictionary?
+ * --------------------------
+ *
+ * A zstd dictionary has two pieces: Its header, and its content. The header
+ * contains a magic number, the dictionary ID, and entropy tables. These
+ * entropy tables allow zstd to save on header costs in the compressed file,
+ * which really matters for small data. The content is just bytes, which are
+ * repeated content that is common across many samples.
+ *
+ * What is a raw content dictionary?
+ * ---------------------------------
+ *
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
+ * content dictionary.
+ *
+ * How do I train a dictionary?
+ * ----------------------------
+ *
+ * Gather samples from your use case. These samples should be similar to each
+ * other. If you have several use cases, you could try to train one dictionary
+ * per use case.
+ *
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
+ * dictionary. There are a few advanced versions of this function, but this
+ * is a great starting point. If you want to further tune your dictionary
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
+ *
+ * If the dictionary training function fails, that is likely because you
+ * either passed too few samples, or a dictionary would not be effective
+ * for your data. Look at the messages that the dictionary trainer printed,
+ * if it doesn't say too few samples, then a dictionary would not be effective.
+ *
+ * How large should my dictionary be?
+ * ----------------------------------
+ *
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
+ * dictionary larger than that. But, most use cases can get away with a
+ * smaller dictionary. The advanced dictionary builders can automatically
+ * shrink the dictionary for you, and select a the smallest size that
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
+ * A smaller dictionary can save memory, and potentially speed up
+ * compression.
+ *
+ * How many samples should I provide to the dictionary builder?
+ * ------------------------------------------------------------
+ *
+ * We generally recommend passing ~100x the size of the dictionary
+ * in samples. A few thousand should suffice. Having too few samples
+ * can hurt the dictionaries effectiveness. Having more samples will
+ * only improve the dictionaries effectiveness. But having too many
+ * samples can slow down the dictionary builder.
+ *
+ * How do I determine if a dictionary will be effective?
+ * -----------------------------------------------------
+ *
+ * Simply train a dictionary and try it out. You can use zstd's built in
+ * benchmarking tool to test the dictionary effectiveness.
+ *
+ *   # Benchmark levels 1-3 without a dictionary
+ *   zstd -b1e3 -r /path/to/my/files
+ *   # Benchmark levels 1-3 with a dictioanry
+ *   zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
+ *
+ * When should I retrain a dictionary?
+ * -----------------------------------
+ *
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
+ * effectiveness drops as the data you are compressing changes. Generally, we do
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
+ * at which they decay depends on your use case. Internally, we regularly
+ * retrain dictionaries, and if the new dictionary performs significantly
+ * better than the old dictionary, we will ship the new dictionary.
+ *
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
+ * -------------------------------------------------------------------------
+ *
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
+ * using a third-party dictionary builder, you can turn it into a zstd
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
+ * provide some samples of the data. It will add the zstd header to the
+ * raw content, which contains a dictionary ID and entropy tables, which
+ * will improve compression ratio, and allow zstd to write the dictionary ID
+ * into the frame, if you so choose.
+ *
+ * Do I have to use zstd's dictionary builder?
+ * -------------------------------------------
+ *
+ * No! You can construct dictionary content however you please, it is just
+ * bytes. It will always be valid as a raw content dictionary. If you want
+ * a zstd dictionary, which can improve compression ratio, use
+ * `ZDICT_finalizeDictionary()`.
+ *
+ * What is the attack surface of a zstd dictionary?
+ * ------------------------------------------------
+ *
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
+ * zstd should never crash, or access out-of-bounds memory no matter what
+ * the dictionary is. However, if an attacker can control the dictionary
+ * during decompression, they can cause zstd to generate arbitrary bytes,
+ * just like if they controlled the compressed data.
+ *
+ ******************************************************************************/
+
 
 /*! ZDICT_trainFromBuffer():
  *  Train a dictionary from an array of samples.
@@ -64,7 +203,14 @@
 typedef struct {
     int      compressionLevel;   /*< optimize for a specific zstd compression level; 0 means default */
     unsigned notificationLevel;  /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
-    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value) */
+    unsigned dictID;             /*< force dictID value; 0 means auto mode (32-bits random value)
+                                  *   NOTE: The zstd format reserves some dictionary IDs for future use.
+                                  *         You may use them in private settings, but be warned that they
+                                  *         may be used by zstd in a public dictionary registry in the future.
+                                  *         These dictionary IDs are:
+                                  *           - low range  : <= 32767
+                                  *           - high range : >= (2^31)
+                                  */
 } ZDICT_params_t;
 
 /*! ZDICT_finalizeDictionary():
diff --git a/lib/zstd.h b/lib/zstd.h
index 222339d..4651e6c 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -71,8 +71,8 @@
 
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  9
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  0
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 
 /*! ZSTD_versionNumber() :
@@ -109,7 +109,6 @@
 #define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
 
 
-
 /***************************************
 *  Simple API
 ***************************************/
@@ -166,7 +165,7 @@
  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
 
-/*! ZSTD_findFrameCompressedSize() :
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
  * `src` should point to the start of a ZSTD frame or skippable frame.
  * `srcSize` must be >= first frame size
  * @return : the compressed size of the first frame starting at `src`,
@@ -180,8 +179,9 @@
 ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
 ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
 
 
 /***************************************
@@ -199,7 +199,7 @@
  */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
 
 /*! ZSTD_compressCCtx() :
  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
@@ -222,7 +222,7 @@
  *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
-ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
 
 /*! ZSTD_decompressDCtx() :
  *  Same as ZSTD_decompress(),
@@ -234,9 +234,9 @@
                                  const void* src, size_t srcSize);
 
 
-/***************************************
-*  Advanced compression API
-***************************************/
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
 
 /* API design :
  *   Parameters are pushed one by one into an existing context,
@@ -266,7 +266,6 @@
                          Only the order (from fast to strong) is guaranteed */
 } ZSTD_strategy;
 
-
 typedef enum {
 
     /* compression parameters
@@ -332,7 +331,6 @@
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
-
     /* LDM mode parameters */
     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                      * This parameter is designed to improve compression ratio
@@ -389,7 +387,7 @@
     ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                               * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
                               * The minimum size is automatically and transparently enforced. */
     ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
@@ -419,6 +417,8 @@
      * ZSTD_c_stableOutBuffer
      * ZSTD_c_blockDelimiters
      * ZSTD_c_validateSequences
+     * ZSTD_c_splitBlocks
+     * ZSTD_c_useRowMatchFinder
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -434,7 +434,10 @@
      ZSTD_c_experimentalParam9=1006,
      ZSTD_c_experimentalParam10=1007,
      ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
 } ZSTD_cParameter;
 
 typedef struct {
@@ -519,9 +522,9 @@
                              const void* src, size_t srcSize);
 
 
-/***************************************
-*  Advanced decompression API
-***************************************/
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
 
 /* The advanced API pushes parameters one by one into an existing DCtx context.
  * Parameters are sticky, and remain valid for all following frames
@@ -667,7 +670,7 @@
                                  /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
 /*===== ZSTD_CStream management functions =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
-ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
 
 /*===== Streaming compression functions =====*/
 typedef enum {
@@ -683,7 +686,7 @@
                         : note : multithreaded compression will block to flush as much output as possible. */
 } ZSTD_EndDirective;
 
-/*! ZSTD_compressStream2() :
+/*! ZSTD_compressStream2() : Requires v1.4.0+
  *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
  *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
@@ -729,11 +732,11 @@
 
 
 /* *****************************************************************************
- * This following is a legacy streaming API.
+ * This following is a legacy streaming API, available since v1.0+ .
  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
  * It is redundant, but remains fully supported.
- * Advanced parameters and dictionary compression can only be used through the
- * new API.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
  ******************************************************************************/
 
 /*!
@@ -788,7 +791,7 @@
                                  /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
 /*===== ZSTD_DStream management functions =====*/
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
-ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
 
 /*===== Streaming decompression functions =====*/
 
@@ -811,7 +814,7 @@
 /*! ZSTD_compress_usingDict() :
  *  Compression at an explicit compression level using a Dictionary.
  *  A dictionary can be any arbitrary data segment (also called a prefix),
- *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  or a buffer with specified information (see zdict.h).
  *  Note : This function loads the dictionary, resulting in significant startup delay.
  *         It's intended for a dictionary used only once.
  *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
@@ -854,7 +857,8 @@
                                          int compressionLevel);
 
 /*! ZSTD_freeCDict() :
- *  Function frees memory allocated by ZSTD_createCDict(). */
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
 /*! ZSTD_compress_usingCDict() :
@@ -876,7 +880,8 @@
 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
 
 /*! ZSTD_freeDDict() :
- *  Function frees memory allocated with ZSTD_createDDict() */
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
 /*! ZSTD_decompress_usingDDict() :
@@ -892,19 +897,25 @@
  *  Dictionary helper functions
  *******************************/
 
-/*! ZSTD_getDictID_fromDict() :
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
  *  Provides the dictID stored within dictionary.
  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
  *  It can still be loaded, but as a content-only dictionary. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 
-/*! ZSTD_getDictID_fromDDict() :
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
  *  Provides the dictID of the dictionary loaded into `ddict`.
  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 
-/*! ZSTD_getDictID_fromFrame() :
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
  *  Provides the dictID required to decompressed the frame stored within `src`.
  *  If @return == 0, the dictID could not be decoded.
  *  This could for one of the following reasons :
@@ -918,7 +929,7 @@
 
 
 /*******************************************************************************
- * Advanced dictionary and prefix API
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
  *
  * This API allows dictionaries to be used with ZSTD_compress2(),
  * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
@@ -927,7 +938,7 @@
  ******************************************************************************/
 
 
-/*! ZSTD_CCtx_loadDictionary() :
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
  *  Create an internal CDict from `dict` buffer.
  *  Decompression will have to use same dictionary.
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -946,7 +957,7 @@
  *           to precisely select how dictionary content must be interpreted. */
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_CCtx_refCDict() :
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
  *  Reference a prepared dictionary, to be used for all next compressed frames.
  *  Note that compression parameters are enforced from within CDict,
  *  and supersede any compression parameter previously set within CCtx.
@@ -960,7 +971,7 @@
  *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 
-/*! ZSTD_CCtx_refPrefix() :
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
  *  Reference a prefix (single-usage dictionary) for next compressed frame.
  *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
  *  Decompression will need same prefix to properly regenerate data.
@@ -981,7 +992,7 @@
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
                                  const void* prefix, size_t prefixSize);
 
-/*! ZSTD_DCtx_loadDictionary() :
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
  *  Create an internal DDict from dict buffer,
  *  to be used to decompress next frames.
  *  The dictionary remains valid for all future frames, until explicitly invalidated.
@@ -998,7 +1009,7 @@
  */
 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_DCtx_refDDict() :
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
  *  Reference a prepared dictionary, to be used to decompress next frames.
  *  The dictionary remains active for decompression of future frames using same DCtx.
  *
@@ -1016,7 +1027,7 @@
  */
 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-/*! ZSTD_DCtx_refPrefix() :
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
  *  Reference a prefix (single-usage dictionary) to decompress next frame.
  *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
  *  and must use the same prefix as the one used during compression.
@@ -1037,7 +1048,7 @@
 
 /* ===   Memory management   === */
 
-/*! ZSTD_sizeof_*() :
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
  *  These functions give the _current_ memory usage of selected object.
  *  Note that object memory usage can evolve (increase or decrease) over time. */
 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
@@ -1062,6 +1073,28 @@
 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
 
+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
 /****************************************************************************************
  *   experimental API (static linking only)
  ****************************************************************************************
@@ -1268,6 +1301,11 @@
   ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
 } ZSTD_literalCompressionMode_e;
 
+typedef enum {
+  ZSTD_urm_auto = 0,                   /* Automatically determine whether or not we use row matchfinder */
+  ZSTD_urm_disableRowMatchFinder = 1,  /* Never use row matchfinder */
+  ZSTD_urm_enableRowMatchFinder = 2    /* Always use row matchfinder when applicable */
+} ZSTD_useRowMatchFinderMode_e;
 
 /***************************************
 *  Frame size functions
@@ -1538,11 +1576,11 @@
  * Note that the lifetime of such pool must exist while being used.
  * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
  * to use an internal thread pool).
- * ZSTD_freeThreadPool frees a thread pool.
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
  */
 typedef struct POOL_ctx_s ZSTD_threadPool;
 ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
-ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
 ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
 
 
@@ -1575,12 +1613,6 @@
  *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 
-/*! ZSTD_getDictID_fromCDict() :
- *  Provides the dictID of the dictionary loaded into `cdict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
-
 /*! ZSTD_getCParams() :
  * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  * `estimatedSrcSize` value is optional, select 0 if not known */
@@ -1607,18 +1639,20 @@
 /*! ZSTD_compress_advanced() :
  *  Note : this function is now DEPRECATED.
  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
-ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                                           void* dst, size_t dstCapacity,
                                     const void* src, size_t srcSize,
                                     const void* dict,size_t dictSize,
                                           ZSTD_parameters params);
 
 /*! ZSTD_compress_usingCDict_advanced() :
- *  Note : this function is now REDUNDANT.
+ *  Note : this function is now DEPRECATED.
  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning in some future version */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                               void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize,
                                         const ZSTD_CDict* cdict,
@@ -1680,7 +1714,7 @@
 
 /* Controls how the literals are compressed (default is auto).
  * The value must be of type ZSTD_literalCompressionMode_e.
- * See ZSTD_literalCompressionMode_t enum definition for details.
+ * See ZSTD_literalCompressionMode_e enum definition for details.
  */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
@@ -1832,6 +1866,46 @@
  */
 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
 
+/* ZSTD_c_splitBlocks
+ * Default is 0 == disabled. Set to 1 to enable block splitting.
+ *
+ * Will attempt to split blocks in order to improve compression ratio at the cost of speed.
+ */
+#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13
+
+/* ZSTD_c_useRowMatchFinder
+ * Default is ZSTD_urm_auto.
+ * Controlled with ZSTD_useRowMatchFinderMode_e enum.
+ *
+ * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library
+ * will decide at runtime whether to use the row-based matchfinder based on support for SIMD
+ * instructions as well as the windowLog.
+ *
+ * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder.
+ * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder.
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
@@ -1852,13 +1926,13 @@
  *                                    These parameters will be applied to
  *                                    all subsequent frames.
  *  - ZSTD_compressStream2() : Do compression using the CCtx.
- *  - ZSTD_freeCCtxParams() : Free the memory.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
  *
  *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
  *  for static allocation of CCtx for single-threaded compression.
  */
 ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
 
 /*! ZSTD_CCtxParams_reset() :
  *  Reset params to default values.
@@ -1877,7 +1951,7 @@
  */
 ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 
-/*! ZSTD_CCtxParams_setParameter() :
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
  *  Similar to ZSTD_CCtx_setParameter.
  *  Set one compression parameter, selected by enum ZSTD_cParameter.
  *  Parameters must be applied to a ZSTD_CCtx using
@@ -2043,11 +2117,13 @@
 
 
 /*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
  *  Instruct the decoder context about what kind of data to decode next.
  *  This instruction is mandatory to decode data without a fully-formed header,
  *  such ZSTD_f_zstd1_magicless for example.
  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 
 /*! ZSTD_decompressStream_simpleArgs() :
  *  Same as ZSTD_decompressStream(),
@@ -2071,7 +2147,7 @@
 /*=====   Advanced Streaming compression functions  =====*/
 
 /*! ZSTD_initCStream_srcSize() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@@ -2080,15 +2156,15 @@
  * pledgedSrcSize must be correct. If it is not known at init time, use
  * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
  * "0" also disables frame content size field. It may be enabled in the future.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
                          int compressionLevel,
                          unsigned long long pledgedSrcSize);
 
 /*! ZSTD_initCStream_usingDict() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
@@ -2097,15 +2173,15 @@
  * dict == NULL or dictSize < 8, in which case no dict is used.
  * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
  * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
                      const void* dict, size_t dictSize,
                            int compressionLevel);
 
 /*! ZSTD_initCStream_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ * This function is DEPRECATED, and is approximately equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
  *     for ((param, value) : params) {
@@ -2117,23 +2193,24 @@
  * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
  * pledgedSrcSize must be correct.
  * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                           ZSTD_parameters params,
                           unsigned long long pledgedSrcSize);
 
 /*! ZSTD_initCStream_usingCDict() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_refCDict(zcs, cdict);
- *
+ * 
  * note : cdict will just be referenced, and must outlive compression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
 
 /*! ZSTD_initCStream_usingCDict_advanced() :
  *   This function is DEPRECATED, and is approximately equivalent to:
@@ -2148,18 +2225,21 @@
  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
  * pledgedSrcSize must be correct. If srcSize is not known at init time, use
  * value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                                const ZSTD_CDict* cdict,
                                      ZSTD_frameParameters fParams,
                                      unsigned long long pledgedSrcSize);
 
 /*! ZSTD_resetCStream() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
  *
  *  start a new frame, using same parameters from previous frame.
  *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
@@ -2169,9 +2249,10 @@
  *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
  *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  * @return : 0, or an error code (which can be tested using ZSTD_isError())
- *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ *  This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
 
 typedef struct {
@@ -2258,8 +2339,7 @@
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
 
   Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
   It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
 
   Then, consume your input using ZSTD_compressContinue().
@@ -2284,15 +2364,17 @@
 /*=====   Buffer-less streaming compression functions  =====*/
 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
 
 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 /**
   Buffer-less streaming decompression (synchronous mode)
 
diff --git a/lib/common/zstd_errors.h b/lib/zstd_errors.h
similarity index 98%
rename from lib/common/zstd_errors.h
rename to lib/zstd_errors.h
index 3ba57e1..fa3686b 100644
--- a/lib/common/zstd_errors.h
+++ b/lib/zstd_errors.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/Makefile b/programs/Makefile
index 936f3c8..599fb02 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -100,8 +100,12 @@
 ZSTD_ALL_OBJ := $(ZSTD_ALL_SRC:.c=.o)
 
 UNAME := $(shell uname)
+
+ifndef BUILD_DIR
 ifeq ($(UNAME), Darwin)
-  HASH ?= md5
+  ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
+    HASH ?= md5
+  endif
 else ifeq ($(UNAME), FreeBSD)
   HASH ?= gmd5sum
 else ifeq ($(UNAME), NetBSD)
@@ -112,7 +116,6 @@
 HASH ?= md5sum
 HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
 
-ifndef BUILD_DIR
 HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ")
 ifeq ($(HAVE_HASH),0)
   $(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
@@ -276,16 +279,19 @@
 zstd-nolegacy : $(ZSTDLIB_CORE_SRC) $(ZDICT_SRC) $(ZSTD_CLI_OBJ)
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
+.PHONY: zstd-nomt
 zstd-nomt : THREAD_CPP :=
 zstd-nomt : THREAD_LD  :=
 zstd-nomt : THREAD_MSG := - multi-threading disabled
 zstd-nomt : zstd
 
+.PHONY: zstd-nogz
 zstd-nogz : ZLIBCPP :=
 zstd-nogz : ZLIBLD  :=
 zstd-nogz : ZLIB_MSG := - gzip support is disabled
 zstd-nogz : zstd
 
+.PHONY: zstd-noxz
 zstd-noxz : LZMACPP :=
 zstd-noxz : LZMALD  :=
 zstd-noxz : LZMA_MSG := - xz/lzma support is disabled
@@ -300,6 +306,7 @@
 
 
 ## zstd-pgo: zstd executable optimized with PGO.
+.PHONY: zstd-pgo
 zstd-pgo :
 	$(MAKE) clean
 	$(MAKE) zstd MOREFLAGS=-fprofile-generate
diff --git a/programs/README.md b/programs/README.md
index cf7f5ba..7fd7104 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -224,7 +224,8 @@
 that `zstd` will use for compression, which by default is `1`.
 This functionality only exists when `zstd` is compiled with multithread support.
 `0` means "use as many threads as detected cpu cores on local system".
-The max # of threads is capped at: `ZSTDMT_NBWORKERS_MAX==200`.
+The max # of threads is capped at `ZSTDMT_NBWORKERS_MAX`,
+which is either 64 in 32-bit mode, or 256 for 64-bit environments.
 
 This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments.
 One such scenario is `tar --zstd`.
diff --git a/programs/benchfn.c b/programs/benchfn.c
index ce39f41..1aadbdd 100644
--- a/programs/benchfn.c
+++ b/programs/benchfn.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/benchfn.h b/programs/benchfn.h
index 8c36831..590f292 100644
--- a/programs/benchfn.h
+++ b/programs/benchfn.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 314a343..49c0349 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,7 +36,7 @@
 #include "datagen.h"     /* RDG_genBuffer */
 #include "../lib/common/xxhash.h"
 #include "benchzstd.h"
-#include "../lib/common/zstd_errors.h"
+#include "../lib/zstd_errors.h"
 
 
 /* *************************************
@@ -67,18 +67,10 @@
 /* *************************************
 *  console display
 ***************************************/
-#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
 
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
-            { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (displayLevel>=4) fflush(stderr); } } }
-
 
 /* *************************************
 *  Exceptions
@@ -137,7 +129,8 @@
         0, /* ldmHashLog */
         0, /* ldmBuckSizeLog */
         0,  /* ldmHashRateLog */
-        ZSTD_lcm_auto /* literalCompressionMode */
+        ZSTD_lcm_auto, /* literalCompressionMode */
+        0 /* useRowMatchFinder */
     };
     return res;
 }
@@ -175,6 +168,7 @@
         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
     }
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
@@ -766,7 +760,7 @@
         }
         {   FILE* const f = fopen(fileNamesTable[n], "rb");
             if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
-            DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
+            DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
             if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
             {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
                 if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index 7692685..9b40dcc 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -117,6 +117,7 @@
     int ldmBucketSizeLog;
     int ldmHashRateLog;
     ZSTD_literalCompressionMode_e literalCompressionMode;
+    int useRowMatchFinder;  /* use row-based matchfinder if possible */
 } BMK_advancedParams_t;
 
 /* returns default parameters used by nonAdvanced functions */
diff --git a/programs/datagen.c b/programs/datagen.c
index 835e2c0..3b4f9e5 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/datagen.h b/programs/datagen.h
index 97e0c2b..b76ae2a 100644
--- a/programs/datagen.h
+++ b/programs/datagen.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/dibio.c b/programs/dibio.c
index c6d267c..d6c9f6d 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/dibio.h b/programs/dibio.h
index 98ba911..f65ed9b 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,7 +19,7 @@
 *  Dependencies
 ***************************************/
 #define ZDICT_STATIC_LINKING_ONLY
-#include "../lib/dictBuilder/zdict.h"     /* ZDICT_params_t */
+#include "../lib/zdict.h"     /* ZDICT_params_t */
 
 
 /*-*************************************
diff --git a/programs/fileio.c b/programs/fileio.c
index 1f1cbb9..5693ac3 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -25,9 +25,10 @@
 ***************************************/
 #include "platform.h"   /* Large Files support, SET_BINARY_MODE */
 #include "util.h"       /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
-#include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
+#include <stdio.h>      /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
 #include <stdlib.h>     /* malloc, free */
 #include <string.h>     /* strcmp, strlen */
+#include <fcntl.h>      /* O_WRONLY */
 #include <assert.h>
 #include <errno.h>      /* errno */
 #include <limits.h>     /* INT_MAX */
@@ -44,7 +45,7 @@
 
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
 #include "../lib/zstd.h"
-#include "../lib/common/zstd_errors.h"  /* ZSTD_error_frameParameter_windowTooLarge */
+#include "../lib/zstd_errors.h"  /* ZSTD_error_frameParameter_windowTooLarge */
 
 #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
 #  include <zlib.h>
@@ -73,6 +74,14 @@
 
 #define FNSPACE 30
 
+/* Default file permissions 0666 (modulated by umask) */
+#if !defined(_WIN32)
+/* These macros aren't defined on windows. */
+#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
+#else
+#define DEFAULT_FILE_PERMISSIONS (0666)
+#endif
+
 /*-*************************************
 *  Macros
 ***************************************/
@@ -84,10 +93,10 @@
 
 struct FIO_display_prefs_s {
     int displayLevel;   /* 0 : no display;  1: errors;  2: + result + interaction + warnings;  3: + progression;  4: + information */
-    U32 noProgress;
+    FIO_progressSetting_e progressSetting;
 };
 
-static FIO_display_prefs_t g_display_prefs = {2, 0};
+static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
 
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
 #define DISPLAYOUT(...)      fprintf(stdout, __VA_ARGS__)
@@ -96,10 +105,10 @@
 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
 
-#define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
+#define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
 #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
 #define DISPLAYUPDATE(l, ...) {                              \
-        if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \
+        if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \
             if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
                 DELAY_NEXT_UPDATE();                         \
                 DISPLAY(__VA_ARGS__);                        \
@@ -298,6 +307,7 @@
     int blockSize;
     int overlapLog;
     U32 adaptiveMode;
+    U32 useRowMatchFinder;
     int rsyncable;
     int minAdaptLevel;
     int maxAdaptLevel;
@@ -323,6 +333,7 @@
     int excludeCompressedFiles;
     int patchFromMode;
     int contentSize;
+    int allowBlockDevices;
 };
 
 /*-*************************************
@@ -383,6 +394,7 @@
     ret->testMode = 0;
     ret->literalCompressionMode = ZSTD_lcm_auto;
     ret->excludeCompressedFiles = 0;
+    ret->allowBlockDevices = 0;
     return ret;
 }
 
@@ -418,7 +430,7 @@
 
 void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
 
-void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; }
+void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
 
 
 /*-*************************************
@@ -450,6 +462,8 @@
 
 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
 
+void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
+
 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
     if (blockSize && prefs->nbWorkers==0)
         DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
@@ -468,6 +482,10 @@
     prefs->adaptiveMode = adapt;
 }
 
+void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
+    prefs->useRowMatchFinder = useRowMatchFinder;
+}
+
 void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
     if ((rsyncable>0) && (prefs->nbWorkers==0))
         EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
@@ -588,11 +606,12 @@
 }
 
 /** FIO_openSrcFile() :
- *  condition : `srcFileName` must be non-NULL.
+ *  condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
  * @result : FILE* to `srcFileName`, or NULL if it fails */
-static FILE* FIO_openSrcFile(const char* srcFileName)
+static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName)
 {
     stat_t statbuf;
+    int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
     assert(srcFileName != NULL);
     if (!strcmp (srcFileName, stdinmark)) {
         DISPLAYLEVEL(4,"Using stdin for input \n");
@@ -608,6 +627,7 @@
 
     if (!UTIL_isRegularFileStat(&statbuf)
      && !UTIL_isFIFOStat(&statbuf)
+     && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf))
     ) {
         DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
                         srcFileName);
@@ -626,7 +646,8 @@
  * @result : FILE* to `dstFileName`, or NULL if it fails */
 static FILE*
 FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
-                const char* srcFileName, const char* dstFileName)
+                const char* srcFileName, const char* dstFileName,
+                const int mode)
 {
     if (prefs->testMode) return NULL;  /* do not open file in test mode */
 
@@ -653,7 +674,6 @@
 
     if (UTIL_isRegularFile(dstFileName)) {
         /* Check if destination file already exists */
-        FILE* const fCheck = fopen( dstFileName, "rb" );
 #if !defined(_WIN32)
         /* this test does not work on Windows :
          * `NUL` and `nul` are detected as regular files */
@@ -662,26 +682,39 @@
                         dstFileName);
         }
 #endif
-        if (fCheck != NULL) {  /* dst file exists, authorization prompt */
-            fclose(fCheck);
-            if (!prefs->overwrite) {
-                if (g_display_prefs.displayLevel <= 1) {
-                    /* No interaction possible */
-                    DISPLAY("zstd: %s already exists; not overwritten  \n",
-                            dstFileName);
-                    return NULL;
-                }
-                DISPLAY("zstd: %s already exists; ", dstFileName);
-                if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten  \n", "yY", fCtx->hasStdinInput))
-                    return NULL;
+        if (!prefs->overwrite) {
+            if (g_display_prefs.displayLevel <= 1) {
+                /* No interaction possible */
+                DISPLAY("zstd: %s already exists; not overwritten  \n",
+                        dstFileName);
+                return NULL;
             }
-            /* need to unlink */
-            FIO_removeFile(dstFileName);
-    }   }
+            DISPLAY("zstd: %s already exists; ", dstFileName);
+            if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten  \n", "yY", fCtx->hasStdinInput))
+                return NULL;
+        }
+        /* need to unlink */
+        FIO_removeFile(dstFileName);
+    }
 
-    {   const int old_umask = UTIL_umask(0177); /* u-x,go-rwx */
-        FILE* const f = fopen( dstFileName, "wb" );
-        UTIL_umask(old_umask);
+    {
+#if defined(_WIN32)
+        /* Windows requires opening the file as a "binary" file to avoid
+         * mangling. This macro doesn't exist on unix. */
+        const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
+        const int fd = _open(dstFileName, openflags, mode);
+        FILE* f = NULL;
+        if (fd != -1) {
+            f = _fdopen(fd, "wb");
+        }
+#else
+        const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
+        const int fd = open(dstFileName, openflags, mode);
+        FILE* f = NULL;
+        if (fd != -1) {
+            f = fdopen(fd, "wb");
+        }
+#endif
         if (f == NULL) {
             DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
         }
@@ -918,7 +951,7 @@
     FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
     if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
         DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
-    comprParams->windowLog = MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog);
+    comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
     if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
         if (!prefs->ldmFlag)
             DISPLAYLEVEL(1, "long mode automatically triggered\n");
@@ -986,6 +1019,7 @@
     if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
         CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
     }
+    CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
     /* compression parameters */
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
     CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
@@ -1370,24 +1404,25 @@
                                 (unsigned)(zfp.consumed >> 20),
                                 (unsigned)(zfp.produced >> 20),
                                 cShare );
-                } else {   /* summarized notifications if == 2 */
-                    DISPLAYLEVEL(2, "\r%79s\r", "");    /* Clear out the current displayed line */
+                } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) {
+                    /* Require level 2 or forcibly displayed progress counter for summarized updates */
+                    DISPLAYLEVEL(1, "\r%79s\r", "");    /* Clear out the current displayed line */
                     if (fCtx->nbFilesTotal > 1) {
                         size_t srcFileNameSize = strlen(srcFileName);
                         /* Ensure that the string we print is roughly the same size each time */
                         if (srcFileNameSize > 18) {
                             const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
-                            DISPLAYLEVEL(2, "Compress: %u/%u files. Current: ...%s ",
-                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
+                            DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ",
+                                        fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
                         } else {
-                            DISPLAYLEVEL(2, "Compress: %u/%u files. Current: %*s ",
-                                         fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
+                            DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ",
+                                        fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
                         }
                     }
-                    DISPLAYLEVEL(2, "Read : %2u ", (unsigned)(zfp.consumed >> 20));
+                    DISPLAYLEVEL(1, "Read : %2u ", (unsigned)(zfp.consumed >> 20));
                     if (fileSize != UTIL_FILESIZE_UNKNOWN)
                         DISPLAYLEVEL(2, "/ %2u ", (unsigned)(fileSize >> 20));
-                    DISPLAYLEVEL(2, "MB ==> %2.f%%", cShare);
+                    DISPLAYLEVEL(1, "MB ==> %2.f%%", cShare);
                     DELAY_NEXT_UPDATE();
                 }
 
@@ -1509,7 +1544,7 @@
     U64 readsize = 0;
     U64 compressedfilesize = 0;
     U64 const fileSize = UTIL_getFileSize(srcFileName);
-    DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize);
+    DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
 
     /* compression format selection */
     switch (prefs->compressionType) {
@@ -1603,23 +1638,24 @@
     int closeDstFile = 0;
     int result;
     stat_t statbuf;
-    int transfer_permissions = 0;
     assert(ress.srcFile != NULL);
     if (ress.dstFile == NULL) {
+        int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
+        if ( strcmp (srcFileName, stdinmark)
+          && UTIL_stat(srcFileName, &statbuf)
+          && UTIL_isRegularFileStat(&statbuf) ) {
+            dstFilePermissions = statbuf.st_mode;
+        }
+
         closeDstFile = 1;
         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName);
+        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
         if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
         /* Must only be added after FIO_openDstFile() succeeds.
          * Otherwise we may delete the destination file if it already exists,
          * and the user presses Ctrl-C when asked if they wish to overwrite.
          */
         addHandler(dstFileName);
-
-        if ( strcmp (srcFileName, stdinmark)
-          && UTIL_stat(srcFileName, &statbuf)
-          && UTIL_isRegularFileStat(&statbuf) )
-            transfer_permissions = 1;
     }
 
     result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
@@ -1639,11 +1675,6 @@
           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
           ) {
             FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
-        } else if (transfer_permissions) {
-            DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: transferring permissions into dst: %s \n", dstFileName);
-            UTIL_setFileStat(dstFileName, &statbuf);
-        } else {
-            DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: do not transfer permissions into dst: %s \n", dstFileName);
         }
     }
 
@@ -1702,7 +1733,7 @@
         return 0;
     }
 
-    ress.srcFile = FIO_openSrcFile(srcFileName);
+    ress.srcFile = FIO_openSrcFile(prefs, srcFileName);
     if (ress.srcFile == NULL) return 1;   /* srcFile could not be opened */
 
     result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
@@ -1815,7 +1846,7 @@
             FIO_freeCResources(&ress);
             return 1;
         }
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
+        ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
         if (ress.dstFile == NULL) {  /* could not open outFileName */
             error = 1;
         } else {
@@ -2134,7 +2165,7 @@
         /* Write block */
         storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
         frameSize += outBuff.pos;
-        if (!fCtx->hasStdoutOutput) {
+        if (!fCtx->hasStdoutOutput || g_display_prefs.progressSetting == FIO_ps_always) {
             if (fCtx->nbFilesTotal > 1) {
                 size_t srcFileNameSize = strlen(srcFileName);
                 if (srcFileNameSize > 18) {
@@ -2505,13 +2536,19 @@
 {
     int result;
     stat_t statbuf;
-    int transfer_permissions = 0;
     int releaseDstFile = 0;
 
     if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
+        int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
+        if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
+          && UTIL_stat(srcFileName, &statbuf)
+          && UTIL_isRegularFileStat(&statbuf) ) {
+            dstFilePermissions = statbuf.st_mode;
+        }
+
         releaseDstFile = 1;
 
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName);
+        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
         if (ress.dstFile==NULL) return 1;
 
         /* Must only be added after FIO_openDstFile() succeeds.
@@ -2519,11 +2556,6 @@
          * and the user presses Ctrl-C when asked if they wish to overwrite.
          */
         addHandler(dstFileName);
-
-        if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
-          && UTIL_stat(srcFileName, &statbuf)
-          && UTIL_isRegularFileStat(&statbuf) )
-            transfer_permissions = 1;
     }
 
     result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
@@ -2541,8 +2573,6 @@
           && strcmp(dstFileName, stdoutmark)  /* special case : don't remove() stdout */
           ) {
             FIO_removeFile(dstFileName);  /* remove decompression artefact; note: don't do anything special if remove() fails */
-        } else if ( transfer_permissions /* file permissions correctly extracted from src */ ) {
-            UTIL_setFileStat(dstFileName, &statbuf);  /* transfer file permissions from src into dst */
         }
     }
 
@@ -2565,7 +2595,7 @@
         return 1;
     }
 
-    srcFile = FIO_openSrcFile(srcFileName);
+    srcFile = FIO_openSrcFile(prefs, srcFileName);
     if (srcFile==NULL) return 1;
     ress.srcBufferLoaded = 0;
 
@@ -2744,7 +2774,7 @@
             return 1;
         }
         if (!prefs->testMode) {
-            ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName);
+            ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
             if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
         }
         for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
@@ -2915,7 +2945,7 @@
 getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
 {
     InfoError status;
-    FILE* const srcFile = FIO_openSrcFile(inFileName);
+    FILE* const srcFile = FIO_openSrcFile(NULL, inFileName);
     ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
 
     info->compressedSize = UTIL_getFileSize(inFileName);
diff --git a/programs/fileio.h b/programs/fileio.h
index 007440b..9d97ec8 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -68,6 +68,8 @@
 
 typedef struct FIO_display_prefs_s FIO_display_prefs_t;
 
+typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e;
+
 /*-*************************************
 *  Parameters
 ***************************************/
@@ -77,6 +79,7 @@
 void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt);
 void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel);
 void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel);
+void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder);
 void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize);
 void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag);
 void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag);
@@ -99,9 +102,10 @@
         FIO_prefs_t* const prefs,
         ZSTD_literalCompressionMode_e mode);
 
-void FIO_setNoProgress(unsigned noProgress);
+void FIO_setProgressSetting(FIO_progressSetting_e progressSetting);
 void FIO_setNotificationLevel(int level);
 void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
+void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices);
 void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value);
 void FIO_setContentSize(FIO_prefs_t* const prefs, int value);
 
diff --git a/programs/platform.h b/programs/platform.h
index b4c6ee9..b858e3b 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -22,6 +22,7 @@
 ****************************************/
 #if defined(_MSC_VER)
 #  define _CRT_SECURE_NO_WARNINGS    /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
+#  define _CRT_NONSTDC_NO_WARNINGS   /* Disable C4996 complaining about posix function names */
 #  if (_MSC_VER <= 1800)             /* 1800 == Visual Studio 2013 */
 #    define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */
 #    define snprintf sprintf_s       /* snprintf unsupported by Visual <= 2013 */
diff --git a/programs/timefn.c b/programs/timefn.c
index f04b8cc..64577b0 100644
--- a/programs/timefn.c
+++ b/programs/timefn.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/timefn.h b/programs/timefn.h
index 9118004..3fcd78a 100644
--- a/programs/timefn.h
+++ b/programs/timefn.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/util.c b/programs/util.c
index 3fd4cd1..8d190c6 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -159,15 +159,6 @@
     return chmod(filename, permissions);
 }
 
-int UTIL_umask(int mode) {
-#if PLATFORM_POSIX_VERSION > 0
-    return umask(mode);
-#else
-    /* do nothing, fake return value */
-    return mode;
-#endif
-}
-
 int UTIL_setFileStat(const char *filename, const stat_t *statbuf)
 {
     int res = 0;
@@ -269,6 +260,17 @@
     return 0;
 }
 
+/* UTIL_isBlockDevStat : distinguish named pipes */
+int UTIL_isBlockDevStat(const stat_t* statbuf)
+{
+/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
+#if PLATFORM_POSIX_VERSION >= 200112L
+    if (S_ISBLK(statbuf->st_mode)) return 1;
+#endif
+    (void)statbuf;
+    return 0;
+}
+
 int UTIL_isLink(const char* infilename)
 {
 /* macro guards, as defined in : https://linux.die.net/man/2/lstat */
@@ -321,9 +323,7 @@
 static size_t readLineFromFile(char* buf, size_t len, FILE* file)
 {
     assert(!feof(file));
-    /* Work around Cygwin problem when len == 1 it returns NULL. */
-    if (len <= 1) return 0;
-    CONTROL( fgets(buf, (int) len, file) );
+    if ( fgets(buf, (int) len, file) == NULL ) return 0;
     {   size_t linelen = strlen(buf);
         if (strlen(buf)==0) return 0;
         if (buf[linelen-1] == '\n') linelen--;
@@ -983,7 +983,7 @@
 }
 
 FileNamesTable*
-UTIL_createExpandedFNT(const char** inputNames, size_t nbIfns, int followLinks)
+UTIL_createExpandedFNT(const char* const* inputNames, size_t nbIfns, int followLinks)
 {
     unsigned nbFiles;
     char* buf = (char*)malloc(LIST_SIZE_INCREASE);
@@ -1212,12 +1212,17 @@
                 /* fall back on the sysconf value */
                 goto failed;
         }   }
-        if (siblings && cpu_cores) {
+        if (siblings && cpu_cores && siblings > cpu_cores) {
             ratio = siblings / cpu_cores;
         }
+
+        if (ratio && numPhysicalCores > ratio) {
+            numPhysicalCores = numPhysicalCores / ratio;
+        }
+
 failed:
         fclose(cpuinfo);
-        return numPhysicalCores = numPhysicalCores / ratio;
+        return numPhysicalCores;
     }
 }
 
diff --git a/programs/util.h b/programs/util.h
index 0e696f0..24cce44 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -22,7 +22,7 @@
 #include "platform.h"     /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
 #include <stddef.h>       /* size_t, ptrdiff_t */
 #include <sys/types.h>    /* stat, utime */
-#include <sys/stat.h>     /* stat, chmod, umask */
+#include <sys/stat.h>     /* stat, chmod */
 #include "../lib/common/mem.h"          /* U64 */
 
 
@@ -143,6 +143,7 @@
 int UTIL_isRegularFileStat(const stat_t* statbuf);
 int UTIL_isDirectoryStat(const stat_t* statbuf);
 int UTIL_isFIFOStat(const stat_t* statbuf);
+int UTIL_isBlockDevStat(const stat_t* statbuf);
 U64 UTIL_getFileSizeStat(const stat_t* statbuf);
 
 /**
@@ -152,11 +153,6 @@
  */
 int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions);
 
-/**
- * Wraps umask(). Does nothing when the platform doesn't have that concept.
- */
-int UTIL_umask(int mode);
-
 /*
  * In the absence of a pre-existing stat result on the file in question, these
  * functions will do a stat() call internally and then use that result to
@@ -277,7 +273,7 @@
  *        or NULL in case of error
  */
 FileNamesTable*
-UTIL_createExpandedFNT(const char** filenames, size_t nbFilenames, int followLinks);
+UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks);
 
 
 /*-****************************************
diff --git a/programs/windres/verrsrc.h b/programs/windres/verrsrc.h
index 6afb48a..c1b60e9 100644
--- a/programs/windres/verrsrc.h
+++ b/programs/windres/verrsrc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/zstd.1 b/programs/zstd.1
index a8fc277..861f938 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "December 2020" "zstd 1.4.8" "User Commands"
+.TH "ZSTD" "1" "May 2021" "zstd 1.5.0" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -105,7 +105,7 @@
 \fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
 .
 .IP "\(bu" 4
-\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBWORKERS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
+\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
 .
 .IP "\(bu" 4
 \fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
@@ -156,7 +156,7 @@
 \fB\-o FILE\fR: save result into \fBFILE\fR
 .
 .IP "\(bu" 4
-\fB\-f\fR, \fB\-\-force\fR: overwrite output without prompting, and (de)compress symbolic links
+\fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\.
 .
 .IP "\(bu" 4
 \fB\-c\fR, \fB\-\-stdout\fR: force write to standard output, even if it is the console
@@ -218,7 +218,7 @@
 \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\.
 .
 .P
-\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \'ZSTD_NBTHREADS\fBhas a default value of (\fR1\fB), and is capped at ZSTDMT_NBWORKERS_MAX==200\.\fRzstd` must be compiled with multithread support for this to have any effect\.
+\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this to have any effect\.
 .
 .P
 They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
@@ -343,6 +343,9 @@
 .
 .SH "ADVANCED COMPRESSION OPTIONS"
 .
+.SS "\-B#:"
+Select the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to (slightly) different compressed frames\.
+.
 .SS "\-\-zstd[=options]:"
 \fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
 .
@@ -481,9 +484,6 @@
 .P
 \fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
 .
-.SS "\-B#:"
-Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\.
-.
 .SH "BUGS"
 Report bugs at: https://github\.com/facebook/zstd/issues
 .
diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 1e4a7f4..ae50928 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@@ -115,7 +115,8 @@
 * `-T#`, `--threads=#`:
     Compress using `#` working threads (default: 1).
     If `#` is 0, attempt to detect and use the number of physical CPU cores.
-    In all cases, the nb of threads is capped to ZSTDMT_NBWORKERS_MAX==200.
+    In all cases, the nb of threads is capped to `ZSTDMT_NBWORKERS_MAX`,
+    which is either 64 in 32-bit mode, or 256 for 64-bit environments.
     This modifier does nothing if `zstd` is compiled without multithread support.
 * `--single-thread`:
     Does not spawn a thread for compression, use a single thread for both I/O and compression.
@@ -202,7 +203,7 @@
     save result into `FILE`
 * `-f`, `--force`:
     disable input and output checks. Allows overwriting existing files, input
-    from console, output to stdout, operating on links, etc.
+    from console, output to stdout, operating on links, block devices, etc.
 * `-c`, `--stdout`:
     force write to standard output, even if it is the console
 * `--[no-]sparse`:
@@ -215,7 +216,7 @@
     This setting overrides default and can force sparse mode over stdout.
 * `--rm`:
     remove source file(s) after successful compression or decompression. If used in combination with
-    -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation. 
+    -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation.
 * `-k`, `--keep`:
     keep source file(s) after successful compression or decompression.
     This is the default behavior.
@@ -281,11 +282,11 @@
 
 `ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression.
 If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message.
-'ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be
+`ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be
 compiled with multithread support for this to have any effect.
 
 They can both be overridden by corresponding command line arguments:
-`-#` for compression level and `-T#` for number of compression threads. 
+`-#` for compression level and `-T#` for number of compression threads.
 
 
 DICTIONARY BUILDER
@@ -423,6 +424,16 @@
 
 ADVANCED COMPRESSION OPTIONS
 ----------------------------
+### -B#:
+Select the size of each compression job.
+This parameter is only available when multi-threading is enabled.
+Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
+Default job size varies depending on compression level (generally  `4 * windowSize`).
+`-B#` makes it possible to manually select a custom size.
+Note that job size must respect a minimum value which is enforced transparently.
+This minimum is either 512 KB, or `overlapSize`, whichever is largest.
+Different job sizes will lead to (slightly) different compressed frames.
+
 ### --zstd[=options]:
 `zstd` provides 22 predefined compression levels.
 The selected or default predefined compression level can be changed with
@@ -566,13 +577,6 @@
 
 `--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
 
-### -B#:
-Select the size of each compression job.
-This parameter is available only when multi-threading is enabled.
-Default value is `4 * windowSize`, which means it varies depending on compression level.
-`-B#` makes it possible to select a custom value.
-Note that job size must respect a minimum value which is enforced transparently.
-This minimum is either 1 MB, or `overlapSize`, whichever is largest.
 
 BUGS
 ----
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index f263e96..239aaf4 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -148,7 +148,8 @@
     DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n");
     DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n");
     DISPLAY_F(f, " -f     : disable input and output checks. Allows overwriting existing files,\n");
-    DISPLAY_F(f, "          input from console, output to stdout, operating on links, etc.\n");
+    DISPLAY_F(f, "          input from console, output to stdout, operating on links,\n");
+    DISPLAY_F(f, "          block devices, etc.\n");
     DISPLAY_F(f, "--rm    : remove source file(s) after successful de/compression \n");
     DISPLAY_F(f, " -k     : preserve source file(s) (default) \n");
     DISPLAY_F(f, " -h/-H  : display help/long help and exit \n");
@@ -166,7 +167,8 @@
 
     DISPLAYOUT( " -v     : verbose mode; specify multiple times to increase verbosity \n");
     DISPLAYOUT( " -q     : suppress warnings; specify twice to suppress errors too \n");
-    DISPLAYOUT( "--no-progress : do not display the progress counter \n");
+    DISPLAYOUT( "--[no-]progress : forcibly display, or never display the progress counter.\n");
+    DISPLAYOUT( "                  note: any (de)compressed output to terminal will mix with progress counter text. \n");
 
 #ifdef UTIL_HAS_CREATEFILELIST
     DISPLAYOUT( " -r     : operate recursively on directories \n");
@@ -205,6 +207,7 @@
     DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog);
     DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1);
     DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n");
+    DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n");
 # ifdef ZSTD_MULTITHREAD
     DISPLAYOUT( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
     DISPLAYOUT( " -B#    : select size of each job (default: 0==automatic) \n");
@@ -723,6 +726,7 @@
 {
     int argNb,
         followLinks = 0,
+        allowBlockDevices = 0,
         forceStdin = 0,
         forceStdout = 0,
         hasStdout = 0,
@@ -730,6 +734,7 @@
         main_pause = 0,
         nbWorkers = 0,
         adapt = 0,
+        useRowMatchFinder = 0,
         adaptMin = MINCLEVEL,
         adaptMax = MAXCLEVEL,
         rsyncable = 0,
@@ -836,7 +841,7 @@
                 if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
                 if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
                 if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
-                if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; continue; }
+                if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; continue; }
                 if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); }
                 if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); }
                 if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
@@ -857,6 +862,8 @@
                 if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; }
                 if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; }
                 if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
+                if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; }
+                if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; }
                 if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
                 if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
                 if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
@@ -873,7 +880,8 @@
                 if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
                 if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
                 if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
-                if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
+                if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; }
+                if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; }
                 if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
 
                 /* long commands with arguments */
@@ -1020,7 +1028,7 @@
                 case 'D': argument++; NEXT_FIELD(dictFileName); break;
 
                     /* Overwrite */
-                case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; argument++; break;
+                case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; argument++; break;
 
                     /* Verbose mode */
                 case 'v': g_displayLevel++; argument++; break;
@@ -1196,6 +1204,7 @@
         benchParams.ldmFlag = ldmFlag;
         benchParams.ldmMinMatch = (int)g_ldmMinMatch;
         benchParams.ldmHashLog = (int)g_ldmHashLog;
+        benchParams.useRowMatchFinder = useRowMatchFinder;
         if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
             benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
         }
@@ -1326,6 +1335,7 @@
     FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize);
     FIO_determineHasStdinInput(fCtx, filenames);
     FIO_setNotificationLevel(g_displayLevel);
+    FIO_setAllowBlockDevices(prefs, allowBlockDevices);
     FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
     if (memLimit == 0) {
         if (compressionParams.windowLog == 0) {
@@ -1348,6 +1358,7 @@
         if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);
         if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);
         FIO_setAdaptiveMode(prefs, (unsigned)adapt);
+        FIO_setUseRowMatchFinder(prefs, useRowMatchFinder);
         FIO_setAdaptMin(prefs, adaptMin);
         FIO_setAdaptMax(prefs, adaptMax);
         FIO_setRsyncable(prefs, rsyncable);
@@ -1387,7 +1398,7 @@
         else
             operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
 #else
-        (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */
+        (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */
         DISPLAY("Compression not supported \n");
 #endif
     } else {  /* decompression or test */
diff --git a/programs/zstdcli_trace.c b/programs/zstdcli_trace.c
index cd220b9..b3b977f 100644
--- a/programs/zstdcli_trace.c
+++ b/programs/zstdcli_trace.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/zstdcli_trace.h b/programs/zstdcli_trace.h
index 6ed3908..38c27dc 100644
--- a/programs/zstdcli_trace.h
+++ b/programs/zstdcli_trace.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
index 4d143b5..bf96185 100644
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTDGREP" "1" "December 2020" "zstd 1.4.8" "User Commands"
+.TH "ZSTDGREP" "1" "May 2021" "zstd 1.5.0" "User Commands"
 .
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
diff --git a/programs/zstdless.1 b/programs/zstdless.1
index 43f2354..f08bc19 100644
--- a/programs/zstdless.1
+++ b/programs/zstdless.1
@@ -1,5 +1,5 @@
 .
-.TH "ZSTDLESS" "1" "December 2020" "zstd 1.4.8" "User Commands"
+.TH "ZSTDLESS" "1" "May 2021" "zstd 1.5.0" "User Commands"
 .
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
diff --git a/tests/DEPRECATED-test-zstd-speed.py b/tests/DEPRECATED-test-zstd-speed.py
index ab699cf..665e0a7 100755
--- a/tests/DEPRECATED-test-zstd-speed.py
+++ b/tests/DEPRECATED-test-zstd-speed.py
@@ -2,7 +2,7 @@
 # THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py
 
 # ################################################################
-# Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+# Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/Makefile b/tests/Makefile
index f060505..8555300 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -24,11 +24,12 @@
 PYTHON ?= python3
 TESTARTEFACT := versionsTest
 
-DEBUGLEVEL ?= 1
+DEBUGLEVEL ?= 2
 export DEBUGLEVEL  # transmit value to sub-makefiles
 DEBUGFLAGS  = -g -DDEBUGLEVEL=$(DEBUGLEVEL)
 CPPFLAGS   += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-              -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
+              -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) \
+			  -DZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY=1
 ifeq ($(OS),Windows_NT)   # MinGW assumed
 CPPFLAGS   += -D__USE_MINGW_ANSI_STDIO   # compatibility with %zu formatting
 endif
@@ -37,7 +38,7 @@
               -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
               -Wstrict-prototypes -Wundef                                     \
               -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings      \
-              -Wredundant-decls -Wmissing-prototypes
+              -Wredundant-decls -Wmissing-prototypes -Wno-deprecated-declarations
 CFLAGS     += $(DEBUGFLAGS)
 CPPFLAGS   += $(MOREFLAGS)
 
@@ -46,7 +47,6 @@
 ZSTDCOMP_FILES   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
 ZSTD_FILES  := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
-ZBUFF_FILES := $(ZSTDDIR)/deprecated/*.c
 ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c
 
 ZSTD_F1 := $(wildcard $(ZSTD_FILES))
@@ -131,7 +131,7 @@
 	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 
 fullbench32: CPPFLAGS += -m32
-fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP)
+fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) -Wno-deprecated-declarations
 fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD)
 fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG  # turn off assert() for speed measurements
 fullbench fullbench32 : $(ZSTD_FILES)
@@ -147,7 +147,7 @@
 #	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
 	$(LINK.c) $^ $(LDLIBS) -o $@$(EXT)
 
-fuzzer : CPPFLAGS += $(MULTITHREAD_CPP)
+fuzzer : CPPFLAGS += $(MULTITHREAD_CPP) -Wno-deprecated-declarations
 fuzzer : LDFLAGS += $(MULTITHREAD_LD)
 fuzzer : $(ZSTDMT_OBJECTS)
 fuzzer fuzzer32 : $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
@@ -160,15 +160,6 @@
 fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
 
-zbufftest zbufftest32 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated
-zbufftest zbufftest32 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations   # required to silence deprecation warnings
-zbufftest32 : CFLAGS +=  -m32
-zbufftest zbufftest32 : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
-	$(LINK.c) $^ -o $@$(EXT)
-
-zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
-
 ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c
 ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES)
 ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES)
@@ -240,8 +231,8 @@
         $(PRGDIR)/zstd$(EXT) $(PRGDIR)/zstd32$(EXT) \
         fullbench$(EXT) fullbench32$(EXT) \
         fullbench-lib$(EXT) fullbench-dll$(EXT) \
-        fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \
-        fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT) \
+        fuzzer$(EXT) fuzzer32$(EXT) \
+        fuzzer-dll$(EXT) zstreamtest-dll$(EXT) \
         zstreamtest$(EXT) zstreamtest32$(EXT) \
         datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \
         symbols$(EXT) invalidDictionaries$(EXT) legacy$(EXT) poolTests$(EXT) \
@@ -345,12 +336,6 @@
 test-fuzzer32: fuzzer32
 	$(QEMU_SYS) ./fuzzer32 -v $(FUZZERTEST) $(FUZZER_FLAGS)
 
-test-zbuff: zbufftest
-	$(QEMU_SYS) ./zbufftest $(ZSTREAM_TESTTIME)
-
-test-zbuff32: zbufftest32
-	$(QEMU_SYS) ./zbufftest32 $(ZSTREAM_TESTTIME)
-
 test-zstream: zstreamtest
 	$(QEMU_SYS) ./zstreamtest -v $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
 	$(QEMU_SYS) ./zstreamtest --newapi -t1 $(ZSTREAM_TESTTIME) $(FUZZER_FLAGS)
diff --git a/tests/README.md b/tests/README.md
index 1e40c46..cd255e9 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -8,7 +8,6 @@
 - `paramgrill` : parameter tester for zstd
 - `test-zstd-speed.py` : script for testing zstd speed difference between commits
 - `test-zstd-versions.py` : compatibility test between zstd versions stored on Github (v0.1+)
-- `zbufftest`  : Test tool to check ZBUFF (a buffered streaming API) integrity
 - `zstreamtest` : Fuzzer test tool for zstd streaming API
 - `legacy` : Test tool to test decoding of legacy zstd frames
 - `decodecorpus` : Tool to generate valid Zstandard frames, for verifying decoder implementations
diff --git a/tests/automated_benchmarking.py b/tests/automated_benchmarking.py
index 3230821..458bda4 100644
--- a/tests/automated_benchmarking.py
+++ b/tests/automated_benchmarking.py
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2021-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/bigdict.c b/tests/bigdict.c
index 4d08ca1..fb08925 100644
--- a/tests/bigdict.c
+++ b/tests/bigdict.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/checkTag.c b/tests/checkTag.c
index 76664e0..f6c5e97 100644
--- a/tests/checkTag.c
+++ b/tests/checkTag.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/datagencli.c b/tests/datagencli.c
index 2ca5631..ecc05f9 100644
--- a/tests/datagencli.c
+++ b/tests/datagencli.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index e469191..fa6a2d6 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 5dc42ee..a71117e 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -12,6 +12,7 @@
 /*_************************************
 *  Includes
 **************************************/
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, we still bench some deprecated functions */
 #include "util.h"        /* Compiler options, UTIL_GetFileSize */
 #include <stdlib.h>      /* malloc */
 #include <stdio.h>       /* fprintf, fopen, ftello64 */
diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore
index 9bd280c..93d935a 100644
--- a/tests/fuzz/.gitignore
+++ b/tests/fuzz/.gitignore
@@ -16,9 +16,11 @@
 decompress_dstSize_tooSmall
 fse_read_ncount
 sequence_compression_api
+seekable_roundtrip
 fuzz-*.log
 rt_lib_*
 d_lib_*
+crash-*
 
 # misc
 trace
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index f3a561d..ccb574b 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2016-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -25,10 +25,11 @@
 
 ZSTDDIR = ../../lib
 PRGDIR = ../../programs
+CONTRIBDIR = ../../contrib
 
 FUZZ_CPPFLAGS := -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
 	-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(ZSTDDIR)/legacy \
-	-I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
+	-I$(CONTRIBDIR)/seekable_format -I$(PRGDIR) -DZSTD_MULTITHREAD -DZSTD_LEGACY_SUPPORT=1 $(CPPFLAGS)
 FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
 	-Wstrict-prototypes -Wundef \
@@ -46,6 +47,9 @@
 FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h
 FUZZ_SRC := $(PRGDIR)/util.c ./fuzz_helpers.c ./zstd_helpers.c ./fuzz_data_producer.c
 
+SEEKABLE_HEADERS = $(CONTRIBDIR)/seekable_format/zstd_seekable.h
+SEEKABLE_OBJS = $(CONTRIBDIR)/seekable_format/zstdseek_compress.c $(CONTRIBDIR)/seekable_format/zstdseek_decompress.c
+
 ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c
 ZSTDCOMP_SRC   := $(ZSTDDIR)/compress/*.c
 ZSTDDECOMP_SRC := $(ZSTDDIR)/decompress/*.c
@@ -98,7 +102,8 @@
 	dictionary_stream_round_trip \
 	decompress_dstSize_tooSmall \
 	fse_read_ncount \
-	sequence_compression_api
+	sequence_compression_api \
+	seekable_roundtrip
 
 all: libregression.a $(FUZZ_TARGETS)
 
@@ -192,6 +197,9 @@
 sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o
 	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequence_compression_api.o $(LIB_FUZZING_ENGINE) -o $@
 
+seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS)  rt_fuzz_seekable_roundtrip.o
+	$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(LIB_FUZZING_ENGINE) -o $@
+
 libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
 	$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
 
diff --git a/tests/fuzz/block_decompress.c b/tests/fuzz/block_decompress.c
index 2f98781..bdbf769 100644
--- a/tests/fuzz/block_decompress.c
+++ b/tests/fuzz/block_decompress.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/block_round_trip.c b/tests/fuzz/block_round_trip.c
index c88850f..46a84c7 100644
--- a/tests/fuzz/block_round_trip.c
+++ b/tests/fuzz/block_round_trip.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/decompress_dstSize_tooSmall.c b/tests/fuzz/decompress_dstSize_tooSmall.c
index 76806e3..3f7607b 100644
--- a/tests/fuzz/decompress_dstSize_tooSmall.c
+++ b/tests/fuzz/decompress_dstSize_tooSmall.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_decompress.c b/tests/fuzz/dictionary_decompress.c
index 1703521..33c58c8 100644
--- a/tests/fuzz/dictionary_decompress.c
+++ b/tests/fuzz/dictionary_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_loader.c b/tests/fuzz/dictionary_loader.c
index 53c2526..5b60bc4 100644
--- a/tests/fuzz/dictionary_loader.c
+++ b/tests/fuzz/dictionary_loader.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c
index f08f024..7bff4bd 100644
--- a/tests/fuzz/dictionary_round_trip.c
+++ b/tests/fuzz/dictionary_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/dictionary_stream_round_trip.c b/tests/fuzz/dictionary_stream_round_trip.c
index 95e8470..9af712f 100644
--- a/tests/fuzz/dictionary_stream_round_trip.c
+++ b/tests/fuzz/dictionary_stream_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fse_read_ncount.c b/tests/fuzz/fse_read_ncount.c
index fc809ef..c323860 100644
--- a/tests/fuzz/fse_read_ncount.c
+++ b/tests/fuzz/fse_read_ncount.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
index c9ba39c..810daa2 100644
--- a/tests/fuzz/fuzz.h
+++ b/tests/fuzz/fuzz.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 05a7787..d8dfa77 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 # ################################################################
-# Copyright (c) 2016-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -62,6 +62,7 @@
     'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
     'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
     'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
+    'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
 }
 TARGETS = list(TARGET_INFO.keys())
 ALL_TARGETS = TARGETS + ['all']
diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c
index 738409c..beb0155 100644
--- a/tests/fuzz/fuzz_data_producer.c
+++ b/tests/fuzz/fuzz_data_producer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h
index 1cfcad9..045aaff 100644
--- a/tests/fuzz/fuzz_data_producer.h
+++ b/tests/fuzz/fuzz_data_producer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz_helpers.c b/tests/fuzz/fuzz_helpers.c
index 8d62ee9..61c0deb 100644
--- a/tests/fuzz/fuzz_helpers.c
+++ b/tests/fuzz/fuzz_helpers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/fuzz_helpers.h b/tests/fuzz/fuzz_helpers.h
index 7085e14..c180478 100644
--- a/tests/fuzz/fuzz_helpers.h
+++ b/tests/fuzz/fuzz_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/raw_dictionary_round_trip.c b/tests/fuzz/raw_dictionary_round_trip.c
index c6e44db..0e65176 100644
--- a/tests/fuzz/raw_dictionary_round_trip.c
+++ b/tests/fuzz/raw_dictionary_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/regression_driver.c b/tests/fuzz/regression_driver.c
index 326cfdc..e6d2dec 100644
--- a/tests/fuzz/regression_driver.c
+++ b/tests/fuzz/regression_driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/seekable_roundtrip.c b/tests/fuzz/seekable_roundtrip.c
new file mode 100644
index 0000000..dcdcaae
--- /dev/null
+++ b/tests/fuzz/seekable_roundtrip.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd.h"
+#include "zstd_seekable.h"
+#include "fuzz_helpers.h"
+#include "fuzz_data_producer.h"
+
+static ZSTD_seekable *stream = NULL;
+static ZSTD_seekable_CStream *zscs = NULL;
+static const size_t kSeekableOverheadSize = ZSTD_seekTableFooterSize;
+
+int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
+{
+    /* Give a random portion of src data to the producer, to use for
+    parameter generation. The rest will be used for (de)compression */
+    FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
+    size = FUZZ_dataProducer_reserveDataPrefix(producer);
+    size_t const compressedBufferSize = ZSTD_compressBound(size) + kSeekableOverheadSize;
+    uint8_t* compressedBuffer = (uint8_t*)malloc(compressedBufferSize);
+    uint8_t* decompressedBuffer = (uint8_t*)malloc(size);
+
+    int const cLevel = FUZZ_dataProducer_int32Range(producer, ZSTD_minCLevel(), ZSTD_maxCLevel());
+    unsigned const checksumFlag = FUZZ_dataProducer_int32Range(producer, 0, 1);
+    size_t const uncompressedSize = FUZZ_dataProducer_uint32Range(producer, 0, size);
+    size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, size - uncompressedSize);
+    size_t seekSize;
+
+    if (!zscs) {
+        zscs = ZSTD_seekable_createCStream();
+        FUZZ_ASSERT(zscs);
+    }
+    if (!stream) {
+        stream = ZSTD_seekable_create();
+        FUZZ_ASSERT(stream);
+    }
+
+    {   /* Perform a compression */
+        size_t const initStatus = ZSTD_seekable_initCStream(zscs, cLevel, checksumFlag, size);
+        size_t endStatus;
+        ZSTD_outBuffer out = { .dst=compressedBuffer, .pos=0, .size=compressedBufferSize };
+        ZSTD_inBuffer  in  = { .src=src, .pos=0, .size=size };
+        FUZZ_ASSERT(!ZSTD_isError(initStatus));
+
+        do {
+            size_t cSize = ZSTD_seekable_compressStream(zscs, &out, &in);
+            FUZZ_ASSERT(!ZSTD_isError(cSize));
+        } while (in.pos != in.size);
+
+        FUZZ_ASSERT(in.pos == in.size);
+        endStatus = ZSTD_seekable_endStream(zscs, &out);
+        FUZZ_ASSERT(!ZSTD_isError(endStatus));
+        seekSize = out.pos;
+    }
+
+    {   /* Decompress at an offset */
+        size_t const initStatus = ZSTD_seekable_initBuff(stream, compressedBuffer, seekSize);
+        size_t decompressedBytesTotal = 0;
+        size_t dSize;
+
+        FUZZ_ZASSERT(initStatus);
+        do {
+            dSize = ZSTD_seekable_decompress(stream, decompressedBuffer, uncompressedSize, offset);
+            FUZZ_ASSERT(!ZSTD_isError(dSize));
+            decompressedBytesTotal += dSize;
+        } while (decompressedBytesTotal < uncompressedSize && dSize > 0);
+        FUZZ_ASSERT(decompressedBytesTotal == uncompressedSize);
+    }
+
+    FUZZ_ASSERT_MSG(!FUZZ_memcmp(src+offset, decompressedBuffer, uncompressedSize), "Corruption!");
+
+    free(decompressedBuffer);
+    free(compressedBuffer);
+    FUZZ_dataProducer_free(producer);
+
+#ifndef STATEFUL_FUZZING
+    ZSTD_seekable_free(stream); stream = NULL;
+    ZSTD_seekable_freeCStream(zscs); zscs = NULL;
+#endif
+    return 0;
+}
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
index d9003d4..cc840bf 100644
--- a/tests/fuzz/sequence_compression_api.c
+++ b/tests/fuzz/sequence_compression_api.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/simple_compress.c b/tests/fuzz/simple_compress.c
index f02223f..3716d0d 100644
--- a/tests/fuzz/simple_compress.c
+++ b/tests/fuzz/simple_compress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c
index 4ecc22b..dfff11c 100644
--- a/tests/fuzz/simple_decompress.c
+++ b/tests/fuzz/simple_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
index f0e1096..c9fac26 100644
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/stream_decompress.c b/tests/fuzz/stream_decompress.c
index 6117aca..e0cdd34 100644
--- a/tests/fuzz/stream_decompress.c
+++ b/tests/fuzz/stream_decompress.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/stream_round_trip.c b/tests/fuzz/stream_round_trip.c
index a09b04a..719eac4 100644
--- a/tests/fuzz/stream_round_trip.c
+++ b/tests/fuzz/stream_round_trip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/zstd_frame_info.c b/tests/fuzz/zstd_frame_info.c
index 8b3608a..9ce645d 100644
--- a/tests/fuzz/zstd_frame_info.c
+++ b/tests/fuzz/zstd_frame_info.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 8ef7786..4d889de 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -91,9 +91,13 @@
     /* Set misc parameters */
     setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
     setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
+    setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
+    setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer);
     setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
+    setRand(cctx, ZSTD_c_splitBlocks, 0, 1, producer);
+    setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
       setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
     }
diff --git a/tests/fuzz/zstd_helpers.h b/tests/fuzz/zstd_helpers.h
index 46c6d09..7813884 100644
--- a/tests/fuzz/zstd_helpers.h
+++ b/tests/fuzz/zstd_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 5f707e0..1ea6521 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -30,6 +30,7 @@
 #define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_compressContinue, ZSTD_compressBlock */
 #include "debug.h"        /* DEBUG_STATIC_ASSERT */
 #include "fse.h"
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, we still test some deprecated functions */
 #include "zstd.h"         /* ZSTD_VERSION_STRING */
 #include "zstd_errors.h"  /* ZSTD_getErrorCode */
 #define ZDICT_STATIC_LINKING_ONLY
@@ -42,6 +43,7 @@
 #include "timefn.h"       /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
 /* must be included after util.h, due to ERROR macro redefinition issue on Visual Studio */
 #include "zstd_internal.h"  /* ZSTD_WORKSPACETOOLARGE_MAXDURATION, ZSTD_WORKSPACETOOLARGE_FACTOR, KB, MB */
+#include "threading.h"    /* ZSTD_pthread_create, ZSTD_pthread_join */
 
 
 /*-************************************
@@ -335,6 +337,126 @@
     }
 }
 
+#ifdef ZSTD_MULTITHREAD
+typedef struct {
+    ZSTD_CCtx* cctx;
+    ZSTD_threadPool* pool;
+    void* CNBuffer;
+    size_t CNBuffSize;
+    void* compressedBuffer;
+    size_t compressedBufferSize;
+    void* decodedBuffer;
+    int err;
+} threadPoolTests_compressionJob_payload;
+
+static void* threadPoolTests_compressionJob(void* payload) {
+    threadPoolTests_compressionJob_payload* args = (threadPoolTests_compressionJob_payload*)payload;
+    size_t cSize;
+    if (ZSTD_isError(ZSTD_CCtx_refThreadPool(args->cctx, args->pool))) args->err = 1;
+    cSize = ZSTD_compress2(args->cctx, args->compressedBuffer, args->compressedBufferSize, args->CNBuffer, args->CNBuffSize);
+    if (ZSTD_isError(cSize)) args->err = 1;
+    if (ZSTD_isError(ZSTD_decompress(args->decodedBuffer, args->CNBuffSize, args->compressedBuffer, cSize))) args->err = 1;
+    return payload;
+}
+
+static int threadPoolTests(void) {
+    int testResult = 0;
+    size_t err;
+
+    size_t const CNBuffSize = 5 MB;
+    void* const CNBuffer = malloc(CNBuffSize);
+    size_t const compressedBufferSize = ZSTD_compressBound(CNBuffSize);
+    void* const compressedBuffer = malloc(compressedBufferSize);
+    void* const decodedBuffer = malloc(CNBuffSize);
+
+    size_t const kPoolNumThreads = 8;
+
+    RDG_genBuffer(CNBuffer, CNBuffSize, 0.5, 0.5, 0);
+
+    DISPLAYLEVEL(3, "thread pool test : threadPool re-use roundtrips: ");
+    {
+        ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        ZSTD_threadPool* pool = ZSTD_createThreadPool(kPoolNumThreads);
+
+        size_t nbThreads = 1;
+        for (; nbThreads <= kPoolNumThreads; ++nbThreads) {
+            ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+            ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, (int)nbThreads);
+            err = ZSTD_CCtx_refThreadPool(cctx, pool);
+            if (ZSTD_isError(err)) {
+                DISPLAYLEVEL(3, "refThreadPool error!\n");
+                ZSTD_freeCCtx(cctx);
+                goto _output_error;
+            }
+            err = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+            if (ZSTD_isError(err)) {
+                DISPLAYLEVEL(3, "Compression error!\n");
+                ZSTD_freeCCtx(cctx);
+                goto _output_error;
+            }
+            err = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, err);
+            if (ZSTD_isError(err)) {
+                DISPLAYLEVEL(3, "Decompression error!\n");
+                ZSTD_freeCCtx(cctx);
+                goto _output_error;
+            }
+        }
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeThreadPool(pool);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "thread pool test : threadPool simultaneous usage: ");
+    {
+        void* const decodedBuffer2 = malloc(CNBuffSize);
+        void* const compressedBuffer2 = malloc(compressedBufferSize);
+        ZSTD_threadPool* pool = ZSTD_createThreadPool(kPoolNumThreads);
+        ZSTD_CCtx* cctx1 = ZSTD_createCCtx();
+        ZSTD_CCtx* cctx2 = ZSTD_createCCtx();
+
+        ZSTD_pthread_t t1;
+        ZSTD_pthread_t t2;
+        threadPoolTests_compressionJob_payload p1 = {cctx1, pool, CNBuffer, CNBuffSize,
+                                                     compressedBuffer, compressedBufferSize, decodedBuffer, 0 /* err */};
+        threadPoolTests_compressionJob_payload p2 = {cctx2, pool, CNBuffer, CNBuffSize,
+                                                     compressedBuffer2, compressedBufferSize, decodedBuffer2, 0 /* err */};
+
+        ZSTD_CCtx_setParameter(cctx1, ZSTD_c_nbWorkers, 2);
+        ZSTD_CCtx_setParameter(cctx2, ZSTD_c_nbWorkers, 2);
+        ZSTD_CCtx_refThreadPool(cctx1, pool);
+        ZSTD_CCtx_refThreadPool(cctx2, pool);
+
+        ZSTD_pthread_create(&t1, NULL, threadPoolTests_compressionJob, &p1);
+        ZSTD_pthread_create(&t2, NULL, threadPoolTests_compressionJob, &p2);
+        ZSTD_pthread_join(t1, NULL);
+        ZSTD_pthread_join(t2, NULL);
+
+        assert(!memcmp(decodedBuffer, decodedBuffer2, CNBuffSize));
+        free(decodedBuffer2);
+        free(compressedBuffer2);
+
+        ZSTD_freeThreadPool(pool);
+        ZSTD_freeCCtx(cctx1);
+        ZSTD_freeCCtx(cctx2);
+
+        if (p1.err || p2.err) goto _output_error;
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+_end:
+    free(CNBuffer);
+    free(compressedBuffer);
+    free(decodedBuffer);
+    return testResult;
+
+_output_error:
+    testResult = 1;
+    DISPLAY("Error detected in Unit tests ! \n");
+    goto _end;
+}
+#endif /* ZSTD_MULTITHREAD */
+
 /*=============================================
 *   Unit tests
 =============================================*/
@@ -374,6 +496,12 @@
         DISPLAYLEVEL(3, "%i (OK) \n", mcl);
     }
 
+    DISPLAYLEVEL(3, "test%3u : default compression level : ", testNb++);
+    {   int const defaultCLevel = ZSTD_defaultCLevel();
+        if (defaultCLevel != ZSTD_CLEVEL_DEFAULT) goto _output_error;
+        DISPLAYLEVEL(3, "%i (OK) \n", defaultCLevel);
+    }
+
     DISPLAYLEVEL(3, "test%3u : ZSTD_versionNumber : ", testNb++);
     {   unsigned const vn = ZSTD_versionNumber();
         DISPLAYLEVEL(3, "%u (OK) \n", vn);
@@ -760,6 +888,50 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
+    DISPLAYLEVEL(3, "test%3i : testing dict compression for determinism : ", testNb++);
+    {
+        size_t const testSize = 1024;
+        ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+        char* dict = (char*)malloc(2 * testSize);
+        int ldmEnabled, level;
+
+        RDG_genBuffer(dict, testSize, 0.5, 0.5, seed);
+        RDG_genBuffer(CNBuffer, testSize, 0.6, 0.6, seed);
+        memcpy(dict + testSize, CNBuffer, testSize);
+        for (level = 1; level <= 5; ++level) {
+            for (ldmEnabled = 0; ldmEnabled <= 1; ++ldmEnabled) {
+                size_t cSize0;
+                XXH64_hash_t compressedChecksum0;
+
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level));
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ldmEnabled));
+                CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_deterministicRefPrefix, 1));
+
+                CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, testSize));
+                cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, testSize);
+                CHECK_Z(cSize);
+                CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, testSize, compressedBuffer, cSize, dict, testSize));
+
+                cSize0 = cSize;
+                compressedChecksum0 = XXH64(compressedBuffer, cSize, 0);
+
+                CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, testSize));
+                cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, dict + testSize, testSize);
+                CHECK_Z(cSize);
+
+                if (cSize != cSize0) goto _output_error;
+                if (XXH64(compressedBuffer, cSize, 0) != compressedChecksum0) goto _output_error;
+            }
+        }
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeDCtx(dctx);
+        free(dict);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : LDM + opt parser with small uncompressible block ", testNb++);
     {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
         ZSTD_DCtx* dctx = ZSTD_createDCtx();
@@ -1544,6 +1716,15 @@
         ZSTD_freeCCtx(cctx);
     }
 
+    DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++)
+    {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
+        CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_splitBlocks, 1) );
+        cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
+        CHECK(cSize);
+        ZSTD_freeCCtx(cctx);
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
     DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
     {   ZSTD_CCtx* cctx = ZSTD_createCCtx();
         size_t cSize1, cSize2;
@@ -1574,6 +1755,7 @@
 
     DISPLAYLEVEL(3, "test%3i : setting multithreaded parameters : ", testNb++)
     {   ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
+        int const jobSize = 512 KB;
         int value;
         /* Check that the overlap log and job size are unset. */
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
@@ -1582,19 +1764,18 @@
         CHECK_EQ(value, 0);
         /* Set and check the overlap log and job size. */
         CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) );
-        CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, 2 MB) );
+        CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, jobSize) );
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
         CHECK_EQ(value, 5);
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
-        CHECK_EQ(value, 2 MB);
+        CHECK_EQ(value, jobSize);
         /* Set the number of workers and check the overlap log and job size. */
         CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) );
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
         CHECK_EQ(value, 5);
         CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
-        CHECK_EQ(value, 2 MB);
+        CHECK_EQ(value, jobSize);
         ZSTD_freeCCtxParams(params);
-
     }
     DISPLAYLEVEL(3, "OK \n");
 
@@ -1723,10 +1904,7 @@
 
         DISPLAYLEVEL(3, "test%3i : check content size on duplicated context : ", testNb++);
         {   size_t const testSize = CNBuffSize / 3;
-            {   ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);
-                p.fParams.contentSizeFlag = 1;
-                CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
-            }
+            CHECK( ZSTD_compressBegin(ctxOrig, ZSTD_defaultCLevel()) );
             CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) );
 
             CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
@@ -1742,13 +1920,14 @@
             size_t const contentSize = 9 KB;
             const void* const dict = (const char*)CNBuffer;
             const void* const contentStart = (const char*)dict + flatdictSize;
+            /* These upper bounds are generally within a few bytes of the compressed size */
             size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770,
                                                        3770, 3770, 3770, 3750, 3750,
                                                        3742, 3670, 3670, 3660, 3660,
                                                        3660, 3660, 3660, 3660, 3660,
                                                        3660, 3660, 3660 };
             size_t const target_wdict_cSize[22+1] =  { 2830, 2890, 2890, 2820, 2940,
-                                                       2950, 2950, 2921, 2900, 2891,
+                                                       2950, 2950, 2925, 2900, 2891,
                                                        2910, 2910, 2910, 2770, 2760,
                                                        2750, 2750, 2750, 2750, 2750,
                                                        2750, 2750, 2750 };
@@ -1785,6 +1964,22 @@
                 DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n",
                                 l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
             }
+            /* Dict compression with DMS */
+            for ( l=1 ; l <= maxLevel; l++) {
+                size_t wdict_cSize;
+                CHECK_Z( ZSTD_CCtx_loadDictionary(ctxOrig, dict, flatdictSize) );
+                CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
+                CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
+                CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
+                wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
+                if (wdict_cSize > target_wdict_cSize[l]) {
+                    DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",
+                                    l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]);
+                    goto _output_error;
+                }
+                DISPLAYLEVEL(4, "level %i with dictionary and compress2 : max expected %u >= reached %u \n",
+                                l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize);
+            }
 
             DISPLAYLEVEL(4, "compression efficiency tests OK \n");
         }
@@ -2530,12 +2725,8 @@
         int const compressionLevel = -1;
 
         assert(cctx != NULL);
-        {   ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0);
-            size_t const cSize_1pass = ZSTD_compress_advanced(cctx,
-                                        compressedBuffer, compressedBufferSize,
-                                        CNBuffer, srcSize,
-                                        NULL, 0,
-                                        params);
+        {   size_t const cSize_1pass = ZSTD_compress(compressedBuffer, compressedBufferSize,
+                                                     CNBuffer, srcSize, compressionLevel);
             if (ZSTD_isError(cSize_1pass)) goto _output_error;
 
             CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
@@ -3181,7 +3372,58 @@
     }
     DISPLAYLEVEL(3, "OK \n");
 
-#endif
+    DISPLAYLEVEL(3, "test%3i : check compression mem usage monotonicity over levels for estimateCCtxSize() : ", testNb++);
+    {
+        int level = 1;
+        size_t prevSize = 0;
+        for (; level < ZSTD_maxCLevel(); ++level) {
+            size_t const currSize = ZSTD_estimateCCtxSize(level);
+            if (prevSize > currSize) {
+                DISPLAYLEVEL(3, "Error! previous cctx size: %zu at level: %d is larger than current cctx size: %zu at level: %d",
+                             prevSize, level-1, currSize, level);
+                goto _output_error;
+            }
+            prevSize = currSize;
+        }
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3i : check estimateCCtxSize() always larger or equal to ZSTD_estimateCCtxSize_usingCParams() : ", testNb++);
+    {
+        size_t const kSizeIncrement = 2 KB;
+        int level = -3;
+
+        for (; level <= ZSTD_maxCLevel(); ++level) {
+            size_t dictSize = 0;
+            for (; dictSize <= 256 KB; dictSize += 8 * kSizeIncrement) {
+                size_t srcSize = 2 KB;
+                for (; srcSize < 300 KB; srcSize += kSizeIncrement) {
+                    ZSTD_compressionParameters const cParams = ZSTD_getCParams(level, srcSize, dictSize);
+                    size_t const cctxSizeUsingCParams = ZSTD_estimateCCtxSize_usingCParams(cParams);
+                    size_t const cctxSizeUsingLevel = ZSTD_estimateCCtxSize(level);
+                    if (cctxSizeUsingLevel < cctxSizeUsingCParams
+                     || ZSTD_isError(cctxSizeUsingCParams)
+                     || ZSTD_isError(cctxSizeUsingLevel)) {
+                        DISPLAYLEVEL(3, "error! l: %d dict: %zu srcSize: %zu cctx size cpar: %zu, cctx size level: %zu\n",
+                                     level, dictSize, srcSize, cctxSizeUsingCParams, cctxSizeUsingLevel);
+                        goto _output_error;
+                    }
+                }
+            }
+        }
+    }
+    DISPLAYLEVEL(3, "OK \n");
+
+    DISPLAYLEVEL(3, "test%3i : thread pool API tests : \n", testNb++)
+    {
+        int const threadPoolTestResult = threadPoolTests();
+        if (threadPoolTestResult) {
+            goto _output_error;
+        }
+    }
+    DISPLAYLEVEL(3, "thread pool tests OK \n");
+
+#endif /* ZSTD_MULTITHREAD */
 
 _end:
     free(CNBuffer);
diff --git a/tests/gzip/Makefile b/tests/gzip/Makefile
index ac953bd..a50350f 100644
--- a/tests/gzip/Makefile
+++ b/tests/gzip/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2017-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/invalidDictionaries.c b/tests/invalidDictionaries.c
index 48ba6ae..b71f741 100644
--- a/tests/invalidDictionaries.c
+++ b/tests/invalidDictionaries.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/legacy.c b/tests/legacy.c
index d6407a7..ac4938f 100644
--- a/tests/legacy.c
+++ b/tests/legacy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/libzstd_partial_builds.sh b/tests/libzstd_partial_builds.sh
index bee2dbd..05dad8f 100755
--- a/tests/libzstd_partial_builds.sh
+++ b/tests/libzstd_partial_builds.sh
@@ -21,7 +21,7 @@
     $ECHO "$@ correctly not present"  # for some reason, this $ECHO must exist, otherwise mustBeAbsent() always fails (??)
 }
 
-# default compilation : all features enabled
+# default compilation : all features enabled - no zbuff
 $ECHO "testing default library compilation"
 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
@@ -29,10 +29,10 @@
 isPresent "zstd_decompress.o"
 isPresent "zdict.o"
 isPresent "zstd_v07.o"
-isPresent "zbuff_compress.o"
+mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
-# compression disabled => also disable zdict and zbuff
+# compression disabled => also disable zdict
 $ECHO "testing with compression disabled"
 ZSTD_LIB_COMPRESSION=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
@@ -43,7 +43,7 @@
 mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
-# decompression disabled => also disable legacy and zbuff
+# decompression disabled => also disable legacy
 $ECHO "testing with decompression disabled"
 ZSTD_LIB_DECOMPRESSION=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
 nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
@@ -65,6 +65,17 @@
 mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
+# deprecated function enabled => zbuff present
+$ECHO "testing with deprecated functions enabled"
+ZSTD_LIB_DEPRECATED=1 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
+nm $DIR/../lib/libzstd.a | $GREP "\.o" > tmplog
+isPresent "zstd_compress.o"
+isPresent "zstd_decompress.o"
+isPresent "zdict.o"
+isPresent "zstd_v07.o"
+isPresent "zbuff_compress.o"
+$RM $DIR/../lib/libzstd.a tmplog
+
 # dictionary builder disabled => only remove zdict
 $ECHO "testing with dictionary builder disabled"
 ZSTD_LIB_DICTBUILDER=0 CFLAGS= make -C $DIR/../lib libzstd.a > $INTOVOID
@@ -73,7 +84,7 @@
 isPresent "zstd_decompress.o"
 mustBeAbsent "zdict.o"
 isPresent "zstd_v07.o"
-isPresent "zbuff_compress.o"
+mustBeAbsent "zbuff_compress.o"
 $RM $DIR/../lib/libzstd.a tmplog
 
 # both decompression and dictionary builder disabled => only compression remains
diff --git a/tests/longmatch.c b/tests/longmatch.c
index 075b152..a171c0e 100644
--- a/tests/longmatch.c
+++ b/tests/longmatch.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 8962182..a0cfa58 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/playTests.sh b/tests/playTests.sh
index da0e67a..fa748c0 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -124,6 +124,23 @@
     Darwin | FreeBSD | OpenBSD | NetBSD) MTIME="stat -f %m" ;;
 esac
 
+GET_PERMS="stat -c %a"
+case "$UNAME" in
+    Darwin | FreeBSD | OpenBSD | NetBSD) GET_PERMS="stat -f %Lp" ;;
+esac
+
+assertFilePermissions() {
+    STAT1=$($GET_PERMS "$1")
+    STAT2=$2
+    [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match expected ($STAT1 != $STAT2)"
+}
+
+assertSamePermissions() {
+    STAT1=$($GET_PERMS "$1")
+    STAT2=$($GET_PERMS "$2")
+    [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match those on $2 ($STAT1 != $STAT2)"
+}
+
 DIFF="diff"
 case "$UNAME" in
   SunOS) DIFF="gdiff" ;;
@@ -192,7 +209,7 @@
 zstd tmp -c > tmpCompressed
 zstd tmp --stdout > tmpCompressed       # long command format
 println "test : compress to named file"
-rm tmpCompressed
+rm -f tmpCompressed
 zstd tmp -o tmpCompressed
 test -f tmpCompressed   # file must be created
 println "test : force write, correct order"
@@ -346,7 +363,7 @@
 zstd tmp -f -o "$INTOVOID" 2>&1 | grep -v "Refusing to remove non-regular file"
 println "test : --rm on stdin"
 println a | zstd --rm > $INTOVOID   # --rm should remain silent
-rm tmp
+rm -f tmp
 zstd -f tmp && die "tmp not present : should have failed"
 test ! -f tmp.zst  # tmp.zst should not be created
 println "test : -d -f do not delete destination when source is not present"
@@ -354,7 +371,7 @@
 zstd -d -f tmp.zst && die "attempt to decompress a non existing file"
 test -f tmp  # destination file should still be present
 println "test : -f do not delete destination when source is not present"
-rm tmp         # erase source file
+rm -f tmp         # erase source file
 touch tmp.zst  # create destination file
 zstd -f tmp && die "attempt to compress a non existing file"
 test -f tmp.zst  # destination file should still be present
@@ -368,7 +385,7 @@
 dd bs=1048576 count=1 if=/dev/zero of=tmp
 zstd -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
 $DIFF -s tmp1 tmp
-rm tmp*
+rm -f tmp*
 
 
 println "\n===>  compress multiple files"
@@ -415,7 +432,7 @@
 test -f tmp1.zst
 test -f tmp2.zst
 test -f tmp3.zst
-rm tmp1 tmp2 tmp3
+rm -f tmp1 tmp2 tmp3
 println "decompress tmp* : "
 zstd -df ./*.zst
 test -f tmp1
@@ -430,7 +447,7 @@
 test -f tmpdec  # should check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
 println "compress multiple files including a missing one (notHere) : "
 zstd -f tmp1 notHere tmp2 && die "missing file not detected!"
-rm tmp*
+rm -f tmp*
 
 
 if [ "$isWindows" = false ] ; then
@@ -445,6 +462,96 @@
     rm -rf tmp*
 fi
 
+println "\n===>  zstd created file permissions tests"
+if [ "$isWindows" = false ] ; then
+    rm -f tmp1 tmp2 tmp1.zst tmp2.zst tmp1.out tmp2.out # todo: remove
+
+    ORIGINAL_UMASK=$(umask)
+    umask 0000
+
+    datagen > tmp1
+    datagen > tmp2
+    assertFilePermissions tmp1 666
+    assertFilePermissions tmp2 666
+
+    println "test : copy 666 permissions in file -> file compression "
+    zstd -f tmp1 -o tmp1.zst
+    assertSamePermissions tmp1 tmp1.zst
+    println "test : copy 666 permissions in file -> file decompression "
+    zstd -f -d tmp1.zst -o tmp1.out
+    assertSamePermissions tmp1.zst tmp1.out
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : copy 400 permissions in file -> file compression (write to a read-only file) "
+    chmod 0400 tmp1
+    assertFilePermissions tmp1 400
+    zstd -f tmp1 -o tmp1.zst
+    assertSamePermissions tmp1 tmp1.zst
+    println "test : copy 400 permissions in file -> file decompression (write to a read-only file) "
+    zstd -f -d tmp1.zst -o tmp1
+    assertSamePermissions tmp1.zst tmp1
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : check created permissions from stdin input in compression "
+    zstd -f -o tmp1.zst < tmp1
+    assertFilePermissions tmp1.zst 666
+    println "test : check created permissions from stdin input in decompression "
+    zstd -f -d -o tmp1.out < tmp1.zst
+    assertFilePermissions tmp1.out 666
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : check created permissions from multiple inputs in compression "
+    zstd -f tmp1 tmp2 -o tmp1.zst
+    assertFilePermissions tmp1.zst 666
+    println "test : check created permissions from multiple inputs in decompression "
+    cp tmp1.zst tmp2.zst
+    zstd -f -d tmp1.zst tmp2.zst -o tmp1.out
+    assertFilePermissions tmp1.out 666
+
+    rm -f tmp1.zst tmp2.zst tmp1.out tmp2.out
+
+    println "test : check permissions on pre-existing output file in compression "
+    chmod 0600 tmp1
+    touch tmp1.zst
+    chmod 0400 tmp1.zst
+    zstd -f tmp1 -o tmp1.zst
+    assertFilePermissions tmp1.zst 600
+    println "test : check permissions on pre-existing output file in decompression "
+    chmod 0400 tmp1.zst
+    touch tmp1.out
+    chmod 0200 tmp1.out
+    zstd -f -d tmp1.zst -o tmp1.out
+    assertFilePermissions tmp1.out 400
+
+    rm -f tmp1.zst tmp1.out
+
+    umask 0666
+    chmod 0666 tmp1 tmp2
+
+    println "test : respect umask when copying permissions in file -> file compression "
+    zstd -f tmp1 -o tmp1.zst
+    assertFilePermissions tmp1.zst 0
+    println "test : respect umask when copying permissions in file -> file decompression "
+    chmod 0666 tmp1.zst
+    zstd -f -d tmp1.zst -o tmp1.out
+    assertFilePermissions tmp1.out 0
+
+    rm -f tmp1.zst tmp1.out
+
+    println "test : respect umask when compressing from stdin input "
+    zstd -f -o tmp1.zst < tmp1
+    assertFilePermissions tmp1.zst 0
+    println "test : respect umask when decompressing from stdin input "
+    chmod 0666 tmp1.zst
+    zstd -f -d -o tmp1.out < tmp1.zst
+    assertFilePermissions tmp1.out 0
+
+    rm -f tmp1 tmp2 tmp1.zst tmp2.zst tmp1.out tmp2.out
+    umask $ORIGINAL_UMASK
+fi
 
 if [ -n "$DEVNULLRIGHTS" ] ; then
     # these tests requires sudo rights, which is uncommon.
@@ -459,6 +566,22 @@
     ls -las $INTOVOID | grep "rw-rw-rw-"
 fi
 
+if [ -n "$READFROMBLOCKDEVICE" ] ; then
+    # This creates a temporary block device, which is only possible on unix-y
+    # systems, is somewhat invasive, and requires sudo. For these reasons, you
+    # have to specifically ask for this test.
+    println "\n===> checking that zstd can read from a block device"
+    datagen -g65536 > tmp.img
+    sudo losetup -fP tmp.img
+    LOOP_DEV=$(losetup -a | grep 'tmp\.img' | cut -f1 -d:)
+    [ -z "$LOOP_DEV" ] && die "failed to get loopback device"
+    sudoZstd $LOOP_DEV -c > tmp.img.zst && die "should fail without -f"
+    sudoZstd -f $LOOP_DEV -c > tmp.img.zst
+    zstd -d tmp.img.zst -o tmp.img.copy
+    sudo losetup -d $LOOP_DEV
+    $DIFF -s tmp.img tmp.img.copy || die "round trip failed"
+    rm -f tmp.img tmp.img.zst tmp.img.copy
+fi
 
 println "\n===>  compress multiple files into an output directory, --output-dir-flat"
 println henlo > tmp1
@@ -651,16 +774,16 @@
 ln -s helloworld.zst helloworld.link.zst
 $EXE_PREFIX ./zstdcat helloworld.link.zst > result.tmp
 $DIFF helloworld.tmp result.tmp
-rm zstdcat
-rm result.tmp
+rm -f zstdcat
+rm -f result.tmp
 println "testing zcat symlink"
 ln -sf "$ZSTD_BIN" zcat
 $EXE_PREFIX ./zcat helloworld.zst > result.tmp
 $DIFF helloworld.tmp result.tmp
 $EXE_PREFIX ./zcat helloworld.link.zst > result.tmp
 $DIFF helloworld.tmp result.tmp
-rm zcat
-rm ./*.tmp ./*.zstd
+rm -f zcat
+rm -f ./*.tmp ./*.zstd
 println "frame concatenation tests completed"
 
 
@@ -722,7 +845,7 @@
 zstd -d -v -f tmpSparseCompressed -c >> tmpSparseRegenerated
 ls -ls tmpSparse*  # look at file size and block size on disk
 $DIFF tmpSparse2M tmpSparseRegenerated
-rm tmpSparse*
+rm -f tmpSparse*
 
 
 println "\n===>  stream-size mode"
@@ -805,6 +928,8 @@
 zstd -f tmp -D tmpDict
 zstd -d tmp.zst -D tmpDict -fo result
 $DIFF "$TESTFILE" result
+println "- Dictionary compression with hlog < clog"
+zstd -6f tmp -D tmpDict --zstd=clog=25,hlog=23
 println "- Dictionary compression with btlazy2 strategy"
 zstd -f tmp -D tmpDict --zstd=strategy=6
 zstd -d tmp.zst -D tmpDict -fo result
@@ -866,7 +991,7 @@
   println "- Create dictionary with multithreading enabled"
   zstd --train -T0 "$TESTDIR"/*.c "$PRGDIR"/*.c -o tmpDict
 fi
-rm tmp* dictionary
+rm -f tmp* dictionary
 
 
 println "\n===>  fastCover dictionary builder : advanced options "
@@ -908,7 +1033,7 @@
 test -f tmpDict
 zstd --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
-rm tmp* dictionary
+rm -f tmp* dictionary
 
 
 println "\n===>  legacy dictionary builder "
@@ -936,7 +1061,7 @@
 test -f tmpDict
 zstd --train-legacy "$TESTDIR"/*.c "$PRGDIR"/*.c
 test -f dictionary
-rm tmp* dictionary
+rm -f tmp* dictionary
 
 
 println "\n===>  integrity tests "
@@ -1008,7 +1133,7 @@
         gzip -t -v tmp.gz
         gzip -f tmp
         zstd -d -f -v tmp.gz
-        rm tmp*
+        rm -f tmp*
     else
         println "gzip binary not detected"
     fi
@@ -1025,7 +1150,7 @@
     zstd -f tmp
     cat tmp.gz tmp.zst tmp.gz tmp.zst | zstd -d -f -o tmp
     truncateLastByte tmp.gz | zstd -t > $INTOVOID && die "incomplete frame not detected !"
-    rm tmp*
+    rm -f tmp*
 else
     println "gzip mode not supported"
 fi
@@ -1056,7 +1181,7 @@
         lzma -Q -f -k --lzma1 tmp
         zstd -d -f -v tmp.xz
         zstd -d -f -v tmp.lzma
-        rm tmp*
+        rm -f tmp*
         println "Creating symlinks"
         ln -s "$ZSTD_BIN" ./xz
         ln -s "$ZSTD_BIN" ./unxz
@@ -1073,8 +1198,8 @@
         ./xz -d tmp.xz
         lzma -Q tmp
         ./lzma -d tmp.lzma
-        rm xz unxz lzma unlzma
-        rm tmp*
+        rm -f xz unxz lzma unlzma
+        rm -f tmp*
     else
         println "xz binary not detected"
     fi
@@ -1093,7 +1218,7 @@
     cat tmp.xz tmp.lzma tmp.zst tmp.lzma tmp.xz tmp.zst | zstd -d -f -o tmp
     truncateLastByte tmp.xz | zstd -t > $INTOVOID && die "incomplete frame not detected !"
     truncateLastByte tmp.lzma | zstd -t > $INTOVOID && die "incomplete frame not detected !"
-    rm tmp*
+    rm -f tmp*
 else
     println "xz mode not supported"
 fi
@@ -1112,7 +1237,7 @@
         lz4 -t -v tmp.lz4
         lz4 -f -m tmp   # ensure result is sent into tmp.lz4, not stdout
         zstd -d -f -v tmp.lz4
-        rm tmp*
+        rm -f tmp*
     else
         println "lz4 binary not detected"
     fi
@@ -1128,7 +1253,7 @@
     zstd -f tmp
     cat tmp.lz4 tmp.zst tmp.lz4 tmp.zst | zstd -d -f -o tmp
     truncateLastByte tmp.lz4 | zstd -t > $INTOVOID && die "incomplete frame not detected !"
-    rm tmp*
+    rm -f tmp*
 else
     println "\nlz4 mode not supported"
 fi
@@ -1162,7 +1287,7 @@
 datagen > tmp
 tar cf tmp.tar tmp
 zstd tmp.tar -o tmp.tzst
-rm tmp.tar
+rm -f tmp.tar
 zstd -d tmp.tzst
 [ -e tmp.tar ] || die ".tzst failed to decompress to .tar!"
 rm -f tmp.tar tmp.tzst
@@ -1223,7 +1348,7 @@
     println "\n===>  zstdmt round-trip tests "
     roundTripTest -g4M "1 -T0"
     roundTripTest -g8M "3 -T2"
-    roundTripTest -g8M "19 -T0 --long"
+    roundTripTest -g8M "19 --long"
     roundTripTest -g8000K "2 --threads=2"
     fileRoundTripTest -g4M "19 -T2 -B1M"
 
@@ -1241,7 +1366,7 @@
     ZSTD_NBTHREADS=50000000000 zstd -f mt_tmp # numeric value too large, warn and revert to default setting=
     ZSTD_NBTHREADS=2  zstd -f mt_tmp # correct usage
     ZSTD_NBTHREADS=1  zstd -f mt_tmp # correct usage: single thread
-    rm mt_tmp*
+    rm -f mt_tmp*
 
     println "\n===>  ovLog tests "
     datagen -g2MB > tmp
@@ -1265,7 +1390,7 @@
     println "\n===>  no multithreading, skipping zstdmt tests "
 fi
 
-rm tmp*
+rm -f tmp*
 
 println "\n===>  zstd --list/-l single frame tests "
 datagen > tmp1
@@ -1298,9 +1423,9 @@
 dd bs=1 count=100 if=$FULL_COMPRESSED_FILE of=$TRUNCATED_COMPRESSED_FILE
 zstd --list $TRUNCATED_COMPRESSED_FILE && die "-l must fail on truncated file"
 
-rm $TEST_DATA_FILE
-rm $FULL_COMPRESSED_FILE
-rm $TRUNCATED_COMPRESSED_FILE
+rm -f $TEST_DATA_FILE
+rm -f $FULL_COMPRESSED_FILE
+rm -f $TRUNCATED_COMPRESSED_FILE
 
 println "\n===>  zstd --list/-l errors when presented with stdin / no files"
 zstd -l && die "-l must fail on empty list of files"
@@ -1344,7 +1469,7 @@
 zstd -b1e10i0 --trace tmp.trace tmp1
 zstd -b1e10i0 --trace tmp.trace tmp1 tmp2 tmp3
 
-rm tmp*
+rm -f tmp*
 
 
 println "\n===>   zstd long distance matching tests "
@@ -1428,6 +1553,14 @@
 zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered"
 rm -rf tmp*
 
+println "\n===> patch-from very large dictionary and file test"
+datagen -g550000000 -P0 > tmp_dict
+datagen -g100000000 -P1 > tmp_patch
+zstd --long=30 -1f --patch-from tmp_dict tmp_patch
+zstd --long=30 -df --patch-from tmp_dict tmp_patch.zst -o tmp_patch_recon
+$DIFF -s tmp_patch_recon tmp_patch
+rm -rf tmp*
+
 println "\n===> patch-from --stream-size test"
 datagen -g1000 -P50 > tmp_dict
 datagen -g1000 -P10 > tmp_patch
diff --git a/tests/poolTests.c b/tests/poolTests.c
index c7fcfa8..08f31c0 100644
--- a/tests/poolTests.c
+++ b/tests/poolTests.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/rateLimiter.py b/tests/rateLimiter.py
index fbec8c2..2629372 100755
--- a/tests/rateLimiter.py
+++ b/tests/rateLimiter.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # ################################################################
-# Copyright (c) 2018-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/Makefile b/tests/regression/Makefile
index 6874d81..d0d7bcf 100644
--- a/tests/regression/Makefile
+++ b/tests/regression/Makefile
@@ -1,5 +1,5 @@
 # ################################################################
-# Copyright (c) 2015-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/config.c b/tests/regression/config.c
index ee0a71e..4c66dd1 100644
--- a/tests/regression/config.c
+++ b/tests/regression/config.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,20 +28,134 @@
     };
 
 /* Define a config for each level we want to test with. */
-#define LEVEL(x)                                                \
-    param_value_t const level_##x##_param_values[] = {          \
-        {.param = ZSTD_c_compressionLevel, .value = x},         \
-    };                                                          \
-    config_t const level_##x = {                                \
-        .name = "level " #x,                                    \
-        .cli_args = "-" #x,                                     \
-        .param_values = PARAM_VALUES(level_##x##_param_values), \
-    };                                                          \
-    config_t const level_##x##_dict = {                         \
-        .name = "level " #x " with dict",                       \
-        .cli_args = "-" #x,                                     \
-        .param_values = PARAM_VALUES(level_##x##_param_values), \
-        .use_dictionary = 1,                                    \
+#define LEVEL(x)                                                                  \
+    param_value_t const level_##x##_param_values[] = {                            \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dms[] = {                        \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},         \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dds[] = {                        \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 1},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},         \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dictcopy[] = {                   \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceCopy},           \
+    };                                                                            \
+    param_value_t const level_##x##_param_values_dictload[] = {                   \
+        {.param = ZSTD_c_compressionLevel, .value = x},                           \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                  \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceLoad},           \
+    };                                                                            \
+    config_t const level_##x = {                                                  \
+        .name = "level " #x,                                                      \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values),                   \
+    };                                                                            \
+    config_t const level_##x##_dict = {                                           \
+        .name = "level " #x " with dict",                                         \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values),                   \
+        .use_dictionary = 1,                                                      \
+    };                                                                            \
+    config_t const level_##x##_dict_dms = {                                       \
+        .name = "level " #x " with dict dms",                                     \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dms),               \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };                                                                            \
+    config_t const level_##x##_dict_dds = {                                       \
+        .name = "level " #x " with dict dds",                                     \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dds),               \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };                                                                            \
+    config_t const level_##x##_dict_copy = {                                      \
+        .name = "level " #x " with dict copy",                                    \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dictcopy),          \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };                                                                            \
+    config_t const level_##x##_dict_load = {                                      \
+        .name = "level " #x " with dict load",                                    \
+        .cli_args = "-" #x,                                                       \
+        .param_values = PARAM_VALUES(level_##x##_param_values_dictload),          \
+        .use_dictionary = 1,                                                      \
+        .advanced_api_only = 1,                                                   \
+    };
+
+/* Define a config specifically to test row hash based levels and settings.
+ */
+#define ROW_LEVEL(x, y)                                                            \
+    param_value_t const row_##y##_level_##x##_param_values[] = {                   \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dms[] = {               \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},          \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dds[] = {               \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 1},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach},          \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dictcopy[] = {          \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceCopy},            \
+    };                                                                             \
+    param_value_t const row_##y##_level_##x##_param_values_dictload[] = {          \
+        {.param = ZSTD_c_useRowMatchFinder, .value = y},                           \
+        {.param = ZSTD_c_compressionLevel, .value = x},                            \
+        {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0},                   \
+        {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceLoad},            \
+    };                                                                             \
+    config_t const row_##y##_level_##x = {                                         \
+        .name = "level " #x " row " #y,                                            \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values),          \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_dms = {                              \
+        .name = "level " #x " row " #y " with dict dms",                           \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dms),      \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_dds = {                              \
+        .name = "level " #x " row " #y " with dict dds",                           \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dds),      \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_copy = {                             \
+        .name = "level " #x " row " #y" with dict copy",                          \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dictcopy), \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
+    };                                                                             \
+    config_t const row_##y##_level_##x##_dict_load = {                             \
+        .name = "level " #x " row " #y " with dict load",                          \
+        .cli_args = "-" #x,                                                        \
+        .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dictload), \
+        .use_dictionary = 1,                                                       \
+        .advanced_api_only = 1,                                                    \
     };
 
 #define PARAM_VALUES(pv) \
@@ -51,6 +165,7 @@
 
 #undef LEVEL
 #undef FAST_LEVEL
+#undef ROW_LEVEL
 
 static config_t no_pledged_src_size = {
     .name = "no source size",
@@ -194,8 +309,10 @@
 static config_t const* g_configs[] = {
 
 #define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict,
-#define LEVEL(x) &level_##x, &level_##x##_dict,
+#define LEVEL(x) &level_##x, &level_##x##_dict, &level_##x##_dict_dms, &level_##x##_dict_dds, &level_##x##_dict_copy, &level_##x##_dict_load,
+#define ROW_LEVEL(x, y) &row_##y##_level_##x, &row_##y##_level_##x##_dict_dms, &row_##y##_level_##x##_dict_dds, &row_##y##_level_##x##_dict_copy, &row_##y##_level_##x##_dict_load,
 #include "levels.h"
+#undef ROW_LEVEL
 #undef LEVEL
 #undef FAST_LEVEL
 
diff --git a/tests/regression/config.h b/tests/regression/config.h
index 9c4562c..dd88937 100644
--- a/tests/regression/config.h
+++ b/tests/regression/config.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -53,6 +53,11 @@
      * when the method allows it. Defaults to yes.
      */
     int no_pledged_src_size;
+    /**
+     * Boolean parameter that says that this config should only be used
+     * for methods that use the advanced compression API
+     */
+    int advanced_api_only;
 } config_t;
 
 /**
diff --git a/tests/regression/data.c b/tests/regression/data.c
index b06c691..341b02d 100644
--- a/tests/regression/data.c
+++ b/tests/regression/data.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <stdio.h>
 #include <string.h>
+#include <stdlib.h>   /* free() */
 
 #include <sys/stat.h>
 
diff --git a/tests/regression/data.h b/tests/regression/data.h
index 9f2fc89..e54e6a1 100644
--- a/tests/regression/data.h
+++ b/tests/regression/data.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/levels.h b/tests/regression/levels.h
index aedc1ce..3b211f8 100644
--- a/tests/regression/levels.h
+++ b/tests/regression/levels.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,6 +14,9 @@
 #ifndef FAST_LEVEL
 # error FAST_LEVEL(x) must be defined
 #endif
+#ifndef ROW_LEVEL
+# error ROW_LEVEL(x, y) must be defined
+#endif
 
 /**
  * The levels are chosen to trigger every strategy in every source size,
@@ -31,12 +34,22 @@
 
 LEVEL(3)
 LEVEL(4)
+/* ROW_LEVEL triggers the row hash (force enabled and disabled) with different
+ * dictionary strategies, and 16/32 row entries based on the level/searchLog.
+ * 1 == disabled, 2 == enabled.
+ */
+ROW_LEVEL(5, 1)
+ROW_LEVEL(5, 2)
 LEVEL(5)
 LEVEL(6)
+ROW_LEVEL(7, 1)
+ROW_LEVEL(7, 2)
 LEVEL(7)
 
 LEVEL(9)
 
+ROW_LEVEL(12, 1)
+ROW_LEVEL(12, 2)
 LEVEL(13)
 
 LEVEL(16)
diff --git a/tests/regression/method.c b/tests/regression/method.c
index c4fe94a..55b1154 100644
--- a/tests/regression/method.c
+++ b/tests/regression/method.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -102,6 +102,9 @@
      */
     if (base->data->type != data_type_file)
         return result_error(result_error_skip);
+    
+    if (config->advanced_api_only)
+        return result_error(result_error_skip);
 
     if (config->use_dictionary || config->no_pledged_src_size)
         return result_error(result_error_skip);
@@ -151,6 +154,9 @@
 
     if (base->data->type != data_type_dir)
         return result_error(result_error_skip);
+    
+    if (config->advanced_api_only)
+        return result_error(result_error_skip);
 
     int const level = config_get_level(config);
 
@@ -254,6 +260,9 @@
     if (config->cli_args == NULL)
         return result_error(result_error_skip);
 
+    if (config->advanced_api_only)
+        return result_error(result_error_skip);
+
     /* We don't support no pledged source size with directories. Too slow. */
     if (state->data->type == data_type_dir && config->no_pledged_src_size)
         return result_error(result_error_skip);
@@ -523,6 +532,10 @@
     result = result_error(result_error_skip);
     goto out;
   }
+  if (config->advanced_api_only) {
+    result = result_error(result_error_skip);
+    goto out;
+  }
   if (init_cstream(state, zcs, config, advanced, cdict ? &cd : NULL)) {
     result = result_error(result_error_compression_error);
     goto out;
@@ -651,7 +664,7 @@
 };
 
 method_t const old_streaming_cdict = {
-    .name = "old streaming cdcit",
+    .name = "old streaming cdict",
     .create = buffer_state_create,
     .compress = old_streaming_compress_cdict,
     .destroy = buffer_state_destroy,
diff --git a/tests/regression/method.h b/tests/regression/method.h
index d89e64d..1a36a93 100644
--- a/tests/regression/method.h
+++ b/tests/regression/method.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/result.c b/tests/regression/result.c
index 5fc37c0..1f879c1 100644
--- a/tests/regression/result.c
+++ b/tests/regression/result.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/result.h b/tests/regression/result.h
index 7b45616..197fa90 100644
--- a/tests/regression/result.h
+++ b/tests/regression/result.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/regression/results.csv b/tests/regression/results.csv
index 0fb1d16..b94d550 100644
--- a/tests/regression/results.csv
+++ b/tests/regression/results.csv
@@ -6,10 +6,10 @@
 silesia.tar,                        level 1,                            compress simple,                    5334885
 silesia.tar,                        level 3,                            compress simple,                    4861425
 silesia.tar,                        level 4,                            compress simple,                    4799630
-silesia.tar,                        level 5,                            compress simple,                    4722324
-silesia.tar,                        level 6,                            compress simple,                    4672279
-silesia.tar,                        level 7,                            compress simple,                    4606715
-silesia.tar,                        level 9,                            compress simple,                    4554147
+silesia.tar,                        level 5,                            compress simple,                    4719256
+silesia.tar,                        level 6,                            compress simple,                    4677721
+silesia.tar,                        level 7,                            compress simple,                    4613541
+silesia.tar,                        level 9,                            compress simple,                    4555426
 silesia.tar,                        level 13,                           compress simple,                    4491764
 silesia.tar,                        level 16,                           compress simple,                    4381332
 silesia.tar,                        level 19,                           compress simple,                    4281605
@@ -23,10 +23,10 @@
 github.tar,                         level 1,                            compress simple,                    39265
 github.tar,                         level 3,                            compress simple,                    38441
 github.tar,                         level 4,                            compress simple,                    38467
-github.tar,                         level 5,                            compress simple,                    39788
-github.tar,                         level 6,                            compress simple,                    39603
-github.tar,                         level 7,                            compress simple,                    39206
-github.tar,                         level 9,                            compress simple,                    36717
+github.tar,                         level 5,                            compress simple,                    39693
+github.tar,                         level 6,                            compress simple,                    39621
+github.tar,                         level 7,                            compress simple,                    39213
+github.tar,                         level 9,                            compress simple,                    36758
 github.tar,                         level 13,                           compress simple,                    35621
 github.tar,                         level 16,                           compress simple,                    40255
 github.tar,                         level 19,                           compress simple,                    32837
@@ -40,10 +40,10 @@
 silesia,                            level 1,                            compress cctx,                      5313204
 silesia,                            level 3,                            compress cctx,                      4849552
 silesia,                            level 4,                            compress cctx,                      4786970
-silesia,                            level 5,                            compress cctx,                      4710236
-silesia,                            level 6,                            compress cctx,                      4660056
-silesia,                            level 7,                            compress cctx,                      4596296
-silesia,                            level 9,                            compress cctx,                      4543925
+silesia,                            level 5,                            compress cctx,                      4707794
+silesia,                            level 6,                            compress cctx,                      4666383
+silesia,                            level 7,                            compress cctx,                      4603381
+silesia,                            level 9,                            compress cctx,                      4546001
 silesia,                            level 13,                           compress cctx,                      4482135
 silesia,                            level 16,                           compress cctx,                      4377465
 silesia,                            level 19,                           compress cctx,                      4293330
@@ -53,7 +53,7 @@
 silesia,                            small window log,                   compress cctx,                      7084179
 silesia,                            small hash log,                     compress cctx,                      6555021
 silesia,                            small chain log,                    compress cctx,                      4931148
-silesia,                            explicit params,                    compress cctx,                      4794677
+silesia,                            explicit params,                    compress cctx,                      4794479
 silesia,                            uncompressed literals,              compress cctx,                      4849552
 silesia,                            uncompressed literals optimal,      compress cctx,                      4293330
 silesia,                            huffman literals,                   compress cctx,                      6178460
@@ -73,13 +73,13 @@
 github,                             level 4,                            compress cctx,                      136199
 github,                             level 4 with dict,                  compress cctx,                      41725
 github,                             level 5,                            compress cctx,                      135121
-github,                             level 5 with dict,                  compress cctx,                      38934
+github,                             level 5 with dict,                  compress cctx,                      38759
 github,                             level 6,                            compress cctx,                      135122
-github,                             level 6 with dict,                  compress cctx,                      38628
+github,                             level 6 with dict,                  compress cctx,                      38669
 github,                             level 7,                            compress cctx,                      135122
-github,                             level 7 with dict,                  compress cctx,                      38745
+github,                             level 7 with dict,                  compress cctx,                      38755
 github,                             level 9,                            compress cctx,                      135122
-github,                             level 9 with dict,                  compress cctx,                      39341
+github,                             level 9 with dict,                  compress cctx,                      39398
 github,                             level 13,                           compress cctx,                      134064
 github,                             level 13 with dict,                 compress cctx,                      39948
 github,                             level 16,                           compress cctx,                      134064
@@ -97,30 +97,30 @@
 github,                             uncompressed literals optimal,      compress cctx,                      134064
 github,                             huffman literals,                   compress cctx,                      175568
 github,                             multithreaded with advanced params, compress cctx,                      141102
-silesia,                            level -5,                           zstdcli,                            6882553
-silesia,                            level -3,                           zstdcli,                            6568424
-silesia,                            level -1,                           zstdcli,                            6183451
+silesia,                            level -5,                           zstdcli,                            6737655
+silesia,                            level -3,                           zstdcli,                            6444725
+silesia,                            level -1,                           zstdcli,                            6178508
 silesia,                            level 0,                            zstdcli,                            4849600
-silesia,                            level 1,                            zstdcli,                            5314210
+silesia,                            level 1,                            zstdcli,                            5313252
 silesia,                            level 3,                            zstdcli,                            4849600
 silesia,                            level 4,                            zstdcli,                            4787018
-silesia,                            level 5,                            zstdcli,                            4710284
-silesia,                            level 6,                            zstdcli,                            4660104
-silesia,                            level 7,                            zstdcli,                            4596344
-silesia,                            level 9,                            zstdcli,                            4543973
+silesia,                            level 5,                            zstdcli,                            4707842
+silesia,                            level 6,                            zstdcli,                            4666431
+silesia,                            level 7,                            zstdcli,                            4603429
+silesia,                            level 9,                            zstdcli,                            4546049
 silesia,                            level 13,                           zstdcli,                            4482183
-silesia,                            level 16,                           zstdcli,                            4377513
-silesia,                            level 19,                           zstdcli,                            4293378
-silesia,                            long distance mode,                 zstdcli,                            4840792
+silesia,                            level 16,                           zstdcli,                            4360299
+silesia,                            level 19,                           zstdcli,                            4283285
+silesia,                            long distance mode,                 zstdcli,                            4840806
 silesia,                            multithreaded,                      zstdcli,                            4849600
-silesia,                            multithreaded long distance mode,   zstdcli,                            4840792
-silesia,                            small window log,                   zstdcli,                            7111012
-silesia,                            small hash log,                     zstdcli,                            6555069
-silesia,                            small chain log,                    zstdcli,                            4931196
-silesia,                            explicit params,                    zstdcli,                            4797112
+silesia,                            multithreaded long distance mode,   zstdcli,                            4840806
+silesia,                            small window log,                   zstdcli,                            7095967
+silesia,                            small hash log,                     zstdcli,                            6526189
+silesia,                            small chain log,                    zstdcli,                            4912245
+silesia,                            explicit params,                    zstdcli,                            4795856
 silesia,                            uncompressed literals,              zstdcli,                            5128030
-silesia,                            uncompressed literals optimal,      zstdcli,                            4325520
-silesia,                            huffman literals,                   zstdcli,                            5331216
+silesia,                            uncompressed literals optimal,      zstdcli,                            4317944
+silesia,                            huffman literals,                   zstdcli,                            5326316
 silesia,                            multithreaded with advanced params, zstdcli,                            5128030
 silesia.tar,                        level -5,                           zstdcli,                            6738934
 silesia.tar,                        level -3,                           zstdcli,                            6448419
@@ -129,23 +129,23 @@
 silesia.tar,                        level 1,                            zstdcli,                            5336318
 silesia.tar,                        level 3,                            zstdcli,                            4861512
 silesia.tar,                        level 4,                            zstdcli,                            4800529
-silesia.tar,                        level 5,                            zstdcli,                            4723364
-silesia.tar,                        level 6,                            zstdcli,                            4673663
-silesia.tar,                        level 7,                            zstdcli,                            4608403
-silesia.tar,                        level 9,                            zstdcli,                            4554751
+silesia.tar,                        level 5,                            zstdcli,                            4720121
+silesia.tar,                        level 6,                            zstdcli,                            4678661
+silesia.tar,                        level 7,                            zstdcli,                            4614424
+silesia.tar,                        level 9,                            zstdcli,                            4556062
 silesia.tar,                        level 13,                           zstdcli,                            4491768
-silesia.tar,                        level 16,                           zstdcli,                            4381336
-silesia.tar,                        level 19,                           zstdcli,                            4281609
+silesia.tar,                        level 16,                           zstdcli,                            4356831
+silesia.tar,                        level 19,                           zstdcli,                            4264491
 silesia.tar,                        no source size,                     zstdcli,                            4861508
-silesia.tar,                        long distance mode,                 zstdcli,                            4853153
+silesia.tar,                        long distance mode,                 zstdcli,                            4853226
 silesia.tar,                        multithreaded,                      zstdcli,                            4861512
-silesia.tar,                        multithreaded long distance mode,   zstdcli,                            4853153
+silesia.tar,                        multithreaded long distance mode,   zstdcli,                            4853226
 silesia.tar,                        small window log,                   zstdcli,                            7101576
-silesia.tar,                        small hash log,                     zstdcli,                            6587959
-silesia.tar,                        small chain log,                    zstdcli,                            4943310
-silesia.tar,                        explicit params,                    zstdcli,                            4822362
+silesia.tar,                        small hash log,                     zstdcli,                            6529290
+silesia.tar,                        small chain log,                    zstdcli,                            4917022
+silesia.tar,                        explicit params,                    zstdcli,                            4821274
 silesia.tar,                        uncompressed literals,              zstdcli,                            5129559
-silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4320931
+silesia.tar,                        uncompressed literals optimal,      zstdcli,                            4307457
 silesia.tar,                        huffman literals,                   zstdcli,                            5347610
 silesia.tar,                        multithreaded with advanced params, zstdcli,                            5129559
 github,                             level -5,                           zstdcli,                            207285
@@ -163,13 +163,13 @@
 github,                             level 4,                            zstdcli,                            138199
 github,                             level 4 with dict,                  zstdcli,                            43251
 github,                             level 5,                            zstdcli,                            137121
-github,                             level 5 with dict,                  zstdcli,                            40741
+github,                             level 5 with dict,                  zstdcli,                            40728
 github,                             level 6,                            zstdcli,                            137122
-github,                             level 6 with dict,                  zstdcli,                            40632
+github,                             level 6 with dict,                  zstdcli,                            40630
 github,                             level 7,                            zstdcli,                            137122
-github,                             level 7 with dict,                  zstdcli,                            40771
+github,                             level 7 with dict,                  zstdcli,                            40747
 github,                             level 9,                            zstdcli,                            137122
-github,                             level 9 with dict,                  zstdcli,                            41332
+github,                             level 9 with dict,                  zstdcli,                            41338
 github,                             level 13,                           zstdcli,                            136064
 github,                             level 13 with dict,                 zstdcli,                            41743
 github,                             level 16,                           zstdcli,                            136064
@@ -187,28 +187,28 @@
 github,                             uncompressed literals optimal,      zstdcli,                            159227
 github,                             huffman literals,                   zstdcli,                            144465
 github,                             multithreaded with advanced params, zstdcli,                            167915
-github.tar,                         level -5,                           zstdcli,                            46751
-github.tar,                         level -5 with dict,                 zstdcli,                            43975
-github.tar,                         level -3,                           zstdcli,                            43541
-github.tar,                         level -3 with dict,                 zstdcli,                            40809
-github.tar,                         level -1,                           zstdcli,                            42469
-github.tar,                         level -1 with dict,                 zstdcli,                            41126
+github.tar,                         level -5,                           zstdcli,                            46860
+github.tar,                         level -5 with dict,                 zstdcli,                            44575
+github.tar,                         level -3,                           zstdcli,                            43758
+github.tar,                         level -3 with dict,                 zstdcli,                            41451
+github.tar,                         level -1,                           zstdcli,                            42494
+github.tar,                         level -1 with dict,                 zstdcli,                            41135
 github.tar,                         level 0,                            zstdcli,                            38445
 github.tar,                         level 0 with dict,                  zstdcli,                            37999
-github.tar,                         level 1,                            zstdcli,                            39346
-github.tar,                         level 1 with dict,                  zstdcli,                            38313
+github.tar,                         level 1,                            zstdcli,                            39269
+github.tar,                         level 1 with dict,                  zstdcli,                            38284
 github.tar,                         level 3,                            zstdcli,                            38445
 github.tar,                         level 3 with dict,                  zstdcli,                            37999
 github.tar,                         level 4,                            zstdcli,                            38471
 github.tar,                         level 4 with dict,                  zstdcli,                            37952
-github.tar,                         level 5,                            zstdcli,                            39792
-github.tar,                         level 5 with dict,                  zstdcli,                            39231
-github.tar,                         level 6,                            zstdcli,                            39607
-github.tar,                         level 6 with dict,                  zstdcli,                            38669
-github.tar,                         level 7,                            zstdcli,                            39210
-github.tar,                         level 7 with dict,                  zstdcli,                            37958
-github.tar,                         level 9,                            zstdcli,                            36721
-github.tar,                         level 9 with dict,                  zstdcli,                            36886
+github.tar,                         level 5,                            zstdcli,                            39697
+github.tar,                         level 5 with dict,                  zstdcli,                            39032
+github.tar,                         level 6,                            zstdcli,                            39625
+github.tar,                         level 6 with dict,                  zstdcli,                            38614
+github.tar,                         level 7,                            zstdcli,                            39217
+github.tar,                         level 7 with dict,                  zstdcli,                            37871
+github.tar,                         level 9,                            zstdcli,                            36762
+github.tar,                         level 9 with dict,                  zstdcli,                            36641
 github.tar,                         level 13,                           zstdcli,                            35625
 github.tar,                         level 13 with dict,                 zstdcli,                            38730
 github.tar,                         level 16,                           zstdcli,                            40259
@@ -217,16 +217,16 @@
 github.tar,                         level 19 with dict,                 zstdcli,                            32899
 github.tar,                         no source size,                     zstdcli,                            38442
 github.tar,                         no source size with dict,           zstdcli,                            38004
-github.tar,                         long distance mode,                 zstdcli,                            39726
+github.tar,                         long distance mode,                 zstdcli,                            39730
 github.tar,                         multithreaded,                      zstdcli,                            38445
-github.tar,                         multithreaded long distance mode,   zstdcli,                            39726
-github.tar,                         small window log,                   zstdcli,                            199432
+github.tar,                         multithreaded long distance mode,   zstdcli,                            39730
+github.tar,                         small window log,                   zstdcli,                            198544
 github.tar,                         small hash log,                     zstdcli,                            129874
 github.tar,                         small chain log,                    zstdcli,                            41673
-github.tar,                         explicit params,                    zstdcli,                            41199
+github.tar,                         explicit params,                    zstdcli,                            41227
 github.tar,                         uncompressed literals,              zstdcli,                            41126
 github.tar,                         uncompressed literals optimal,      zstdcli,                            35392
-github.tar,                         huffman literals,                   zstdcli,                            38804
+github.tar,                         huffman literals,                   zstdcli,                            38781
 github.tar,                         multithreaded with advanced params, zstdcli,                            41126
 silesia,                            level -5,                           advanced one pass,                  6737607
 silesia,                            level -3,                           advanced one pass,                  6444677
@@ -235,23 +235,29 @@
 silesia,                            level 1,                            advanced one pass,                  5313204
 silesia,                            level 3,                            advanced one pass,                  4849552
 silesia,                            level 4,                            advanced one pass,                  4786970
-silesia,                            level 5,                            advanced one pass,                  4710236
-silesia,                            level 6,                            advanced one pass,                  4660056
-silesia,                            level 7,                            advanced one pass,                  4596296
-silesia,                            level 9,                            advanced one pass,                  4543925
+silesia,                            level 5 row 1,                      advanced one pass,                  4710236
+silesia,                            level 5 row 2,                      advanced one pass,                  4707794
+silesia,                            level 5,                            advanced one pass,                  4707794
+silesia,                            level 6,                            advanced one pass,                  4666383
+silesia,                            level 7 row 1,                      advanced one pass,                  4596296
+silesia,                            level 7 row 2,                      advanced one pass,                  4603381
+silesia,                            level 7,                            advanced one pass,                  4603381
+silesia,                            level 9,                            advanced one pass,                  4546001
+silesia,                            level 12 row 1,                     advanced one pass,                  4519288
+silesia,                            level 12 row 2,                     advanced one pass,                  4521397
 silesia,                            level 13,                           advanced one pass,                  4482135
-silesia,                            level 16,                           advanced one pass,                  4377465
-silesia,                            level 19,                           advanced one pass,                  4293330
+silesia,                            level 16,                           advanced one pass,                  4360251
+silesia,                            level 19,                           advanced one pass,                  4283237
 silesia,                            no source size,                     advanced one pass,                  4849552
-silesia,                            long distance mode,                 advanced one pass,                  4840744
+silesia,                            long distance mode,                 advanced one pass,                  4840738
 silesia,                            multithreaded,                      advanced one pass,                  4849552
-silesia,                            multithreaded long distance mode,   advanced one pass,                  4840744
+silesia,                            multithreaded long distance mode,   advanced one pass,                  4840758
 silesia,                            small window log,                   advanced one pass,                  7095919
-silesia,                            small hash log,                     advanced one pass,                  6555021
-silesia,                            small chain log,                    advanced one pass,                  4931148
-silesia,                            explicit params,                    advanced one pass,                  4797095
+silesia,                            small hash log,                     advanced one pass,                  6526141
+silesia,                            small chain log,                    advanced one pass,                  4912197
+silesia,                            explicit params,                    advanced one pass,                  4795856
 silesia,                            uncompressed literals,              advanced one pass,                  5127982
-silesia,                            uncompressed literals optimal,      advanced one pass,                  4325472
+silesia,                            uncompressed literals optimal,      advanced one pass,                  4317896
 silesia,                            huffman literals,                   advanced one pass,                  5326268
 silesia,                            multithreaded with advanced params, advanced one pass,                  5127982
 silesia.tar,                        level -5,                           advanced one pass,                  6738593
@@ -261,23 +267,29 @@
 silesia.tar,                        level 1,                            advanced one pass,                  5334885
 silesia.tar,                        level 3,                            advanced one pass,                  4861425
 silesia.tar,                        level 4,                            advanced one pass,                  4799630
-silesia.tar,                        level 5,                            advanced one pass,                  4722324
-silesia.tar,                        level 6,                            advanced one pass,                  4672279
-silesia.tar,                        level 7,                            advanced one pass,                  4606715
-silesia.tar,                        level 9,                            advanced one pass,                  4554147
+silesia.tar,                        level 5 row 1,                      advanced one pass,                  4722324
+silesia.tar,                        level 5 row 2,                      advanced one pass,                  4719256
+silesia.tar,                        level 5,                            advanced one pass,                  4719256
+silesia.tar,                        level 6,                            advanced one pass,                  4677721
+silesia.tar,                        level 7 row 1,                      advanced one pass,                  4606715
+silesia.tar,                        level 7 row 2,                      advanced one pass,                  4613541
+silesia.tar,                        level 7,                            advanced one pass,                  4613541
+silesia.tar,                        level 9,                            advanced one pass,                  4555426
+silesia.tar,                        level 12 row 1,                     advanced one pass,                  4529459
+silesia.tar,                        level 12 row 2,                     advanced one pass,                  4530256
 silesia.tar,                        level 13,                           advanced one pass,                  4491764
-silesia.tar,                        level 16,                           advanced one pass,                  4381332
-silesia.tar,                        level 19,                           advanced one pass,                  4281605
+silesia.tar,                        level 16,                           advanced one pass,                  4356827
+silesia.tar,                        level 19,                           advanced one pass,                  4264487
 silesia.tar,                        no source size,                     advanced one pass,                  4861425
-silesia.tar,                        long distance mode,                 advanced one pass,                  4847735
+silesia.tar,                        long distance mode,                 advanced one pass,                  4847754
 silesia.tar,                        multithreaded,                      advanced one pass,                  4861508
-silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4853149
+silesia.tar,                        multithreaded long distance mode,   advanced one pass,                  4853222
 silesia.tar,                        small window log,                   advanced one pass,                  7101530
-silesia.tar,                        small hash log,                     advanced one pass,                  6587951
-silesia.tar,                        small chain log,                    advanced one pass,                  4943307
-silesia.tar,                        explicit params,                    advanced one pass,                  4808589
+silesia.tar,                        small hash log,                     advanced one pass,                  6529232
+silesia.tar,                        small chain log,                    advanced one pass,                  4917041
+silesia.tar,                        explicit params,                    advanced one pass,                  4807380
 silesia.tar,                        uncompressed literals,              advanced one pass,                  5129458
-silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4320927
+silesia.tar,                        uncompressed literals optimal,      advanced one pass,                  4307453
 silesia.tar,                        huffman literals,                   advanced one pass,                  5347335
 silesia.tar,                        multithreaded with advanced params, advanced one pass,                  5129555
 github,                             level -5,                           advanced one pass,                  205285
@@ -288,26 +300,100 @@
 github,                             level -1 with dict,                 advanced one pass,                  43170
 github,                             level 0,                            advanced one pass,                  136335
 github,                             level 0 with dict,                  advanced one pass,                  41148
+github,                             level 0 with dict dms,              advanced one pass,                  41148
+github,                             level 0 with dict dds,              advanced one pass,                  41148
+github,                             level 0 with dict copy,             advanced one pass,                  41124
+github,                             level 0 with dict load,             advanced one pass,                  42252
 github,                             level 1,                            advanced one pass,                  142465
 github,                             level 1 with dict,                  advanced one pass,                  41682
+github,                             level 1 with dict dms,              advanced one pass,                  41682
+github,                             level 1 with dict dds,              advanced one pass,                  41682
+github,                             level 1 with dict copy,             advanced one pass,                  41674
+github,                             level 1 with dict load,             advanced one pass,                  43755
 github,                             level 3,                            advanced one pass,                  136335
 github,                             level 3 with dict,                  advanced one pass,                  41148
+github,                             level 3 with dict dms,              advanced one pass,                  41148
+github,                             level 3 with dict dds,              advanced one pass,                  41148
+github,                             level 3 with dict copy,             advanced one pass,                  41124
+github,                             level 3 with dict load,             advanced one pass,                  42252
 github,                             level 4,                            advanced one pass,                  136199
 github,                             level 4 with dict,                  advanced one pass,                  41251
+github,                             level 4 with dict dms,              advanced one pass,                  41251
+github,                             level 4 with dict dds,              advanced one pass,                  41251
+github,                             level 4 with dict copy,             advanced one pass,                  41216
+github,                             level 4 with dict load,             advanced one pass,                  41159
+github,                             level 5 row 1,                      advanced one pass,                  135121
+github,                             level 5 row 1 with dict dms,        advanced one pass,                  38938
+github,                             level 5 row 1 with dict dds,        advanced one pass,                  38732
+github,                             level 5 row 1 with dict copy,       advanced one pass,                  38934
+github,                             level 5 row 1 with dict load,       advanced one pass,                  40725
+github,                             level 5 row 2,                      advanced one pass,                  134584
+github,                             level 5 row 2 with dict dms,        advanced one pass,                  38758
+github,                             level 5 row 2 with dict dds,        advanced one pass,                  38728
+github,                             level 5 row 2 with dict copy,       advanced one pass,                  38759
+github,                             level 5 row 2 with dict load,       advanced one pass,                  41518
 github,                             level 5,                            advanced one pass,                  135121
-github,                             level 5 with dict,                  advanced one pass,                  38938
+github,                             level 5 with dict,                  advanced one pass,                  38758
+github,                             level 5 with dict dms,              advanced one pass,                  38758
+github,                             level 5 with dict dds,              advanced one pass,                  38728
+github,                             level 5 with dict copy,             advanced one pass,                  38759
+github,                             level 5 with dict load,             advanced one pass,                  40725
 github,                             level 6,                            advanced one pass,                  135122
-github,                             level 6 with dict,                  advanced one pass,                  38632
+github,                             level 6 with dict,                  advanced one pass,                  38671
+github,                             level 6 with dict dms,              advanced one pass,                  38671
+github,                             level 6 with dict dds,              advanced one pass,                  38630
+github,                             level 6 with dict copy,             advanced one pass,                  38669
+github,                             level 6 with dict load,             advanced one pass,                  40695
+github,                             level 7 row 1,                      advanced one pass,                  135122
+github,                             level 7 row 1 with dict dms,        advanced one pass,                  38771
+github,                             level 7 row 1 with dict dds,        advanced one pass,                  38771
+github,                             level 7 row 1 with dict copy,       advanced one pass,                  38745
+github,                             level 7 row 1 with dict load,       advanced one pass,                  40695
+github,                             level 7 row 2,                      advanced one pass,                  134584
+github,                             level 7 row 2 with dict dms,        advanced one pass,                  38758
+github,                             level 7 row 2 with dict dds,        advanced one pass,                  38747
+github,                             level 7 row 2 with dict copy,       advanced one pass,                  38755
+github,                             level 7 row 2 with dict load,       advanced one pass,                  41030
 github,                             level 7,                            advanced one pass,                  135122
-github,                             level 7 with dict,                  advanced one pass,                  38771
+github,                             level 7 with dict,                  advanced one pass,                  38758
+github,                             level 7 with dict dms,              advanced one pass,                  38758
+github,                             level 7 with dict dds,              advanced one pass,                  38747
+github,                             level 7 with dict copy,             advanced one pass,                  38755
+github,                             level 7 with dict load,             advanced one pass,                  40695
 github,                             level 9,                            advanced one pass,                  135122
-github,                             level 9 with dict,                  advanced one pass,                  39332
+github,                             level 9 with dict,                  advanced one pass,                  39437
+github,                             level 9 with dict dms,              advanced one pass,                  39437
+github,                             level 9 with dict dds,              advanced one pass,                  39338
+github,                             level 9 with dict copy,             advanced one pass,                  39398
+github,                             level 9 with dict load,             advanced one pass,                  41710
+github,                             level 12 row 1,                     advanced one pass,                  134180
+github,                             level 12 row 1 with dict dms,       advanced one pass,                  39677
+github,                             level 12 row 1 with dict dds,       advanced one pass,                  39677
+github,                             level 12 row 1 with dict copy,      advanced one pass,                  39677
+github,                             level 12 row 1 with dict load,      advanced one pass,                  41166
+github,                             level 12 row 2,                     advanced one pass,                  134180
+github,                             level 12 row 2 with dict dms,       advanced one pass,                  39677
+github,                             level 12 row 2 with dict dds,       advanced one pass,                  39677
+github,                             level 12 row 2 with dict copy,      advanced one pass,                  39677
+github,                             level 12 row 2 with dict load,      advanced one pass,                  41166
 github,                             level 13,                           advanced one pass,                  134064
 github,                             level 13 with dict,                 advanced one pass,                  39743
+github,                             level 13 with dict dms,             advanced one pass,                  39743
+github,                             level 13 with dict dds,             advanced one pass,                  39743
+github,                             level 13 with dict copy,            advanced one pass,                  39948
+github,                             level 13 with dict load,            advanced one pass,                  42626
 github,                             level 16,                           advanced one pass,                  134064
 github,                             level 16 with dict,                 advanced one pass,                  37577
+github,                             level 16 with dict dms,             advanced one pass,                  37577
+github,                             level 16 with dict dds,             advanced one pass,                  37577
+github,                             level 16 with dict copy,            advanced one pass,                  37568
+github,                             level 16 with dict load,            advanced one pass,                  42340
 github,                             level 19,                           advanced one pass,                  134064
 github,                             level 19 with dict,                 advanced one pass,                  37576
+github,                             level 19 with dict dms,             advanced one pass,                  37576
+github,                             level 19 with dict dds,             advanced one pass,                  37576
+github,                             level 19 with dict copy,            advanced one pass,                  37567
+github,                             level 19 with dict load,            advanced one pass,                  39613
 github,                             no source size,                     advanced one pass,                  136335
 github,                             no source size with dict,           advanced one pass,                  41148
 github,                             long distance mode,                 advanced one pass,                  136335
@@ -322,42 +408,116 @@
 github,                             huffman literals,                   advanced one pass,                  142465
 github,                             multithreaded with advanced params, advanced one pass,                  165915
 github.tar,                         level -5,                           advanced one pass,                  46856
-github.tar,                         level -5 with dict,                 advanced one pass,                  43971
+github.tar,                         level -5 with dict,                 advanced one pass,                  44571
 github.tar,                         level -3,                           advanced one pass,                  43754
-github.tar,                         level -3 with dict,                 advanced one pass,                  40805
+github.tar,                         level -3 with dict,                 advanced one pass,                  41447
 github.tar,                         level -1,                           advanced one pass,                  42490
-github.tar,                         level -1 with dict,                 advanced one pass,                  41122
+github.tar,                         level -1 with dict,                 advanced one pass,                  41131
 github.tar,                         level 0,                            advanced one pass,                  38441
 github.tar,                         level 0 with dict,                  advanced one pass,                  37995
+github.tar,                         level 0 with dict dms,              advanced one pass,                  38003
+github.tar,                         level 0 with dict dds,              advanced one pass,                  38003
+github.tar,                         level 0 with dict copy,             advanced one pass,                  37995
+github.tar,                         level 0 with dict load,             advanced one pass,                  37956
 github.tar,                         level 1,                            advanced one pass,                  39265
-github.tar,                         level 1 with dict,                  advanced one pass,                  38309
+github.tar,                         level 1 with dict,                  advanced one pass,                  38280
+github.tar,                         level 1 with dict dms,              advanced one pass,                  38290
+github.tar,                         level 1 with dict dds,              advanced one pass,                  38290
+github.tar,                         level 1 with dict copy,             advanced one pass,                  38280
+github.tar,                         level 1 with dict load,             advanced one pass,                  38729
 github.tar,                         level 3,                            advanced one pass,                  38441
 github.tar,                         level 3 with dict,                  advanced one pass,                  37995
+github.tar,                         level 3 with dict dms,              advanced one pass,                  38003
+github.tar,                         level 3 with dict dds,              advanced one pass,                  38003
+github.tar,                         level 3 with dict copy,             advanced one pass,                  37995
+github.tar,                         level 3 with dict load,             advanced one pass,                  37956
 github.tar,                         level 4,                            advanced one pass,                  38467
 github.tar,                         level 4 with dict,                  advanced one pass,                  37948
-github.tar,                         level 5,                            advanced one pass,                  39788
-github.tar,                         level 5 with dict,                  advanced one pass,                  39715
-github.tar,                         level 6,                            advanced one pass,                  39603
-github.tar,                         level 6 with dict,                  advanced one pass,                  38800
-github.tar,                         level 7,                            advanced one pass,                  39206
-github.tar,                         level 7 with dict,                  advanced one pass,                  38071
-github.tar,                         level 9,                            advanced one pass,                  36717
-github.tar,                         level 9 with dict,                  advanced one pass,                  36898
+github.tar,                         level 4 with dict dms,              advanced one pass,                  37954
+github.tar,                         level 4 with dict dds,              advanced one pass,                  37954
+github.tar,                         level 4 with dict copy,             advanced one pass,                  37948
+github.tar,                         level 4 with dict load,             advanced one pass,                  37927
+github.tar,                         level 5 row 1,                      advanced one pass,                  39788
+github.tar,                         level 5 row 1 with dict dms,        advanced one pass,                  39365
+github.tar,                         level 5 row 1 with dict dds,        advanced one pass,                  39233
+github.tar,                         level 5 row 1 with dict copy,       advanced one pass,                  39715
+github.tar,                         level 5 row 1 with dict load,       advanced one pass,                  39209
+github.tar,                         level 5 row 2,                      advanced one pass,                  39693
+github.tar,                         level 5 row 2 with dict dms,        advanced one pass,                  39024
+github.tar,                         level 5 row 2 with dict dds,        advanced one pass,                  39028
+github.tar,                         level 5 row 2 with dict copy,       advanced one pass,                  39040
+github.tar,                         level 5 row 2 with dict load,       advanced one pass,                  39037
+github.tar,                         level 5,                            advanced one pass,                  39693
+github.tar,                         level 5 with dict,                  advanced one pass,                  39040
+github.tar,                         level 5 with dict dms,              advanced one pass,                  39024
+github.tar,                         level 5 with dict dds,              advanced one pass,                  39028
+github.tar,                         level 5 with dict copy,             advanced one pass,                  39040
+github.tar,                         level 5 with dict load,             advanced one pass,                  39037
+github.tar,                         level 6,                            advanced one pass,                  39621
+github.tar,                         level 6 with dict,                  advanced one pass,                  38622
+github.tar,                         level 6 with dict dms,              advanced one pass,                  38608
+github.tar,                         level 6 with dict dds,              advanced one pass,                  38610
+github.tar,                         level 6 with dict copy,             advanced one pass,                  38622
+github.tar,                         level 6 with dict load,             advanced one pass,                  38962
+github.tar,                         level 7 row 1,                      advanced one pass,                  39206
+github.tar,                         level 7 row 1 with dict dms,        advanced one pass,                  37954
+github.tar,                         level 7 row 1 with dict dds,        advanced one pass,                  37954
+github.tar,                         level 7 row 1 with dict copy,       advanced one pass,                  38071
+github.tar,                         level 7 row 1 with dict load,       advanced one pass,                  38584
+github.tar,                         level 7 row 2,                      advanced one pass,                  39213
+github.tar,                         level 7 row 2 with dict dms,        advanced one pass,                  37848
+github.tar,                         level 7 row 2 with dict dds,        advanced one pass,                  37867
+github.tar,                         level 7 row 2 with dict copy,       advanced one pass,                  37848
+github.tar,                         level 7 row 2 with dict load,       advanced one pass,                  38582
+github.tar,                         level 7,                            advanced one pass,                  39213
+github.tar,                         level 7 with dict,                  advanced one pass,                  37848
+github.tar,                         level 7 with dict dms,              advanced one pass,                  37848
+github.tar,                         level 7 with dict dds,              advanced one pass,                  37867
+github.tar,                         level 7 with dict copy,             advanced one pass,                  37848
+github.tar,                         level 7 with dict load,             advanced one pass,                  38582
+github.tar,                         level 9,                            advanced one pass,                  36758
+github.tar,                         level 9 with dict,                  advanced one pass,                  36457
+github.tar,                         level 9 with dict dms,              advanced one pass,                  36549
+github.tar,                         level 9 with dict dds,              advanced one pass,                  36637
+github.tar,                         level 9 with dict copy,             advanced one pass,                  36457
+github.tar,                         level 9 with dict load,             advanced one pass,                  36350
+github.tar,                         level 12 row 1,                     advanced one pass,                  36435
+github.tar,                         level 12 row 1 with dict dms,       advanced one pass,                  36986
+github.tar,                         level 12 row 1 with dict dds,       advanced one pass,                  36986
+github.tar,                         level 12 row 1 with dict copy,      advanced one pass,                  36609
+github.tar,                         level 12 row 1 with dict load,      advanced one pass,                  36419
+github.tar,                         level 12 row 2,                     advanced one pass,                  36435
+github.tar,                         level 12 row 2 with dict dms,       advanced one pass,                  36986
+github.tar,                         level 12 row 2 with dict dds,       advanced one pass,                  36986
+github.tar,                         level 12 row 2 with dict copy,      advanced one pass,                  36609
+github.tar,                         level 12 row 2 with dict load,      advanced one pass,                  36424
 github.tar,                         level 13,                           advanced one pass,                  35621
 github.tar,                         level 13 with dict,                 advanced one pass,                  38726
+github.tar,                         level 13 with dict dms,             advanced one pass,                  38903
+github.tar,                         level 13 with dict dds,             advanced one pass,                  38903
+github.tar,                         level 13 with dict copy,            advanced one pass,                  38726
+github.tar,                         level 13 with dict load,            advanced one pass,                  36372
 github.tar,                         level 16,                           advanced one pass,                  40255
 github.tar,                         level 16 with dict,                 advanced one pass,                  33639
+github.tar,                         level 16 with dict dms,             advanced one pass,                  33544
+github.tar,                         level 16 with dict dds,             advanced one pass,                  33544
+github.tar,                         level 16 with dict copy,            advanced one pass,                  33639
+github.tar,                         level 16 with dict load,            advanced one pass,                  39353
 github.tar,                         level 19,                           advanced one pass,                  32837
 github.tar,                         level 19 with dict,                 advanced one pass,                  32895
+github.tar,                         level 19 with dict dms,             advanced one pass,                  32672
+github.tar,                         level 19 with dict dds,             advanced one pass,                  32672
+github.tar,                         level 19 with dict copy,            advanced one pass,                  32895
+github.tar,                         level 19 with dict load,            advanced one pass,                  32676
 github.tar,                         no source size,                     advanced one pass,                  38441
 github.tar,                         no source size with dict,           advanced one pass,                  37995
-github.tar,                         long distance mode,                 advanced one pass,                  39722
+github.tar,                         long distance mode,                 advanced one pass,                  39757
 github.tar,                         multithreaded,                      advanced one pass,                  38441
-github.tar,                         multithreaded long distance mode,   advanced one pass,                  39722
+github.tar,                         multithreaded long distance mode,   advanced one pass,                  39726
 github.tar,                         small window log,                   advanced one pass,                  198540
 github.tar,                         small hash log,                     advanced one pass,                  129870
 github.tar,                         small chain log,                    advanced one pass,                  41669
-github.tar,                         explicit params,                    advanced one pass,                  41199
+github.tar,                         explicit params,                    advanced one pass,                  41227
 github.tar,                         uncompressed literals,              advanced one pass,                  41122
 github.tar,                         uncompressed literals optimal,      advanced one pass,                  35388
 github.tar,                         huffman literals,                   advanced one pass,                  38777
@@ -369,23 +529,29 @@
 silesia,                            level 1,                            advanced one pass small out,        5313204
 silesia,                            level 3,                            advanced one pass small out,        4849552
 silesia,                            level 4,                            advanced one pass small out,        4786970
-silesia,                            level 5,                            advanced one pass small out,        4710236
-silesia,                            level 6,                            advanced one pass small out,        4660056
-silesia,                            level 7,                            advanced one pass small out,        4596296
-silesia,                            level 9,                            advanced one pass small out,        4543925
+silesia,                            level 5 row 1,                      advanced one pass small out,        4710236
+silesia,                            level 5 row 2,                      advanced one pass small out,        4707794
+silesia,                            level 5,                            advanced one pass small out,        4707794
+silesia,                            level 6,                            advanced one pass small out,        4666383
+silesia,                            level 7 row 1,                      advanced one pass small out,        4596296
+silesia,                            level 7 row 2,                      advanced one pass small out,        4603381
+silesia,                            level 7,                            advanced one pass small out,        4603381
+silesia,                            level 9,                            advanced one pass small out,        4546001
+silesia,                            level 12 row 1,                     advanced one pass small out,        4519288
+silesia,                            level 12 row 2,                     advanced one pass small out,        4521397
 silesia,                            level 13,                           advanced one pass small out,        4482135
-silesia,                            level 16,                           advanced one pass small out,        4377465
-silesia,                            level 19,                           advanced one pass small out,        4293330
+silesia,                            level 16,                           advanced one pass small out,        4360251
+silesia,                            level 19,                           advanced one pass small out,        4283237
 silesia,                            no source size,                     advanced one pass small out,        4849552
-silesia,                            long distance mode,                 advanced one pass small out,        4840744
+silesia,                            long distance mode,                 advanced one pass small out,        4840738
 silesia,                            multithreaded,                      advanced one pass small out,        4849552
-silesia,                            multithreaded long distance mode,   advanced one pass small out,        4840744
+silesia,                            multithreaded long distance mode,   advanced one pass small out,        4840758
 silesia,                            small window log,                   advanced one pass small out,        7095919
-silesia,                            small hash log,                     advanced one pass small out,        6555021
-silesia,                            small chain log,                    advanced one pass small out,        4931148
-silesia,                            explicit params,                    advanced one pass small out,        4797095
+silesia,                            small hash log,                     advanced one pass small out,        6526141
+silesia,                            small chain log,                    advanced one pass small out,        4912197
+silesia,                            explicit params,                    advanced one pass small out,        4795856
 silesia,                            uncompressed literals,              advanced one pass small out,        5127982
-silesia,                            uncompressed literals optimal,      advanced one pass small out,        4325472
+silesia,                            uncompressed literals optimal,      advanced one pass small out,        4317896
 silesia,                            huffman literals,                   advanced one pass small out,        5326268
 silesia,                            multithreaded with advanced params, advanced one pass small out,        5127982
 silesia.tar,                        level -5,                           advanced one pass small out,        6738593
@@ -395,23 +561,29 @@
 silesia.tar,                        level 1,                            advanced one pass small out,        5334885
 silesia.tar,                        level 3,                            advanced one pass small out,        4861425
 silesia.tar,                        level 4,                            advanced one pass small out,        4799630
-silesia.tar,                        level 5,                            advanced one pass small out,        4722324
-silesia.tar,                        level 6,                            advanced one pass small out,        4672279
-silesia.tar,                        level 7,                            advanced one pass small out,        4606715
-silesia.tar,                        level 9,                            advanced one pass small out,        4554147
+silesia.tar,                        level 5 row 1,                      advanced one pass small out,        4722324
+silesia.tar,                        level 5 row 2,                      advanced one pass small out,        4719256
+silesia.tar,                        level 5,                            advanced one pass small out,        4719256
+silesia.tar,                        level 6,                            advanced one pass small out,        4677721
+silesia.tar,                        level 7 row 1,                      advanced one pass small out,        4606715
+silesia.tar,                        level 7 row 2,                      advanced one pass small out,        4613541
+silesia.tar,                        level 7,                            advanced one pass small out,        4613541
+silesia.tar,                        level 9,                            advanced one pass small out,        4555426
+silesia.tar,                        level 12 row 1,                     advanced one pass small out,        4529459
+silesia.tar,                        level 12 row 2,                     advanced one pass small out,        4530256
 silesia.tar,                        level 13,                           advanced one pass small out,        4491764
-silesia.tar,                        level 16,                           advanced one pass small out,        4381332
-silesia.tar,                        level 19,                           advanced one pass small out,        4281605
+silesia.tar,                        level 16,                           advanced one pass small out,        4356827
+silesia.tar,                        level 19,                           advanced one pass small out,        4264487
 silesia.tar,                        no source size,                     advanced one pass small out,        4861425
-silesia.tar,                        long distance mode,                 advanced one pass small out,        4847735
+silesia.tar,                        long distance mode,                 advanced one pass small out,        4847754
 silesia.tar,                        multithreaded,                      advanced one pass small out,        4861508
-silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4853149
+silesia.tar,                        multithreaded long distance mode,   advanced one pass small out,        4853222
 silesia.tar,                        small window log,                   advanced one pass small out,        7101530
-silesia.tar,                        small hash log,                     advanced one pass small out,        6587951
-silesia.tar,                        small chain log,                    advanced one pass small out,        4943307
-silesia.tar,                        explicit params,                    advanced one pass small out,        4808589
+silesia.tar,                        small hash log,                     advanced one pass small out,        6529232
+silesia.tar,                        small chain log,                    advanced one pass small out,        4917041
+silesia.tar,                        explicit params,                    advanced one pass small out,        4807380
 silesia.tar,                        uncompressed literals,              advanced one pass small out,        5129458
-silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4320927
+silesia.tar,                        uncompressed literals optimal,      advanced one pass small out,        4307453
 silesia.tar,                        huffman literals,                   advanced one pass small out,        5347335
 silesia.tar,                        multithreaded with advanced params, advanced one pass small out,        5129555
 github,                             level -5,                           advanced one pass small out,        205285
@@ -422,26 +594,100 @@
 github,                             level -1 with dict,                 advanced one pass small out,        43170
 github,                             level 0,                            advanced one pass small out,        136335
 github,                             level 0 with dict,                  advanced one pass small out,        41148
+github,                             level 0 with dict dms,              advanced one pass small out,        41148
+github,                             level 0 with dict dds,              advanced one pass small out,        41148
+github,                             level 0 with dict copy,             advanced one pass small out,        41124
+github,                             level 0 with dict load,             advanced one pass small out,        42252
 github,                             level 1,                            advanced one pass small out,        142465
 github,                             level 1 with dict,                  advanced one pass small out,        41682
+github,                             level 1 with dict dms,              advanced one pass small out,        41682
+github,                             level 1 with dict dds,              advanced one pass small out,        41682
+github,                             level 1 with dict copy,             advanced one pass small out,        41674
+github,                             level 1 with dict load,             advanced one pass small out,        43755
 github,                             level 3,                            advanced one pass small out,        136335
 github,                             level 3 with dict,                  advanced one pass small out,        41148
+github,                             level 3 with dict dms,              advanced one pass small out,        41148
+github,                             level 3 with dict dds,              advanced one pass small out,        41148
+github,                             level 3 with dict copy,             advanced one pass small out,        41124
+github,                             level 3 with dict load,             advanced one pass small out,        42252
 github,                             level 4,                            advanced one pass small out,        136199
 github,                             level 4 with dict,                  advanced one pass small out,        41251
+github,                             level 4 with dict dms,              advanced one pass small out,        41251
+github,                             level 4 with dict dds,              advanced one pass small out,        41251
+github,                             level 4 with dict copy,             advanced one pass small out,        41216
+github,                             level 4 with dict load,             advanced one pass small out,        41159
+github,                             level 5 row 1,                      advanced one pass small out,        135121
+github,                             level 5 row 1 with dict dms,        advanced one pass small out,        38938
+github,                             level 5 row 1 with dict dds,        advanced one pass small out,        38732
+github,                             level 5 row 1 with dict copy,       advanced one pass small out,        38934
+github,                             level 5 row 1 with dict load,       advanced one pass small out,        40725
+github,                             level 5 row 2,                      advanced one pass small out,        134584
+github,                             level 5 row 2 with dict dms,        advanced one pass small out,        38758
+github,                             level 5 row 2 with dict dds,        advanced one pass small out,        38728
+github,                             level 5 row 2 with dict copy,       advanced one pass small out,        38759
+github,                             level 5 row 2 with dict load,       advanced one pass small out,        41518
 github,                             level 5,                            advanced one pass small out,        135121
-github,                             level 5 with dict,                  advanced one pass small out,        38938
+github,                             level 5 with dict,                  advanced one pass small out,        38758
+github,                             level 5 with dict dms,              advanced one pass small out,        38758
+github,                             level 5 with dict dds,              advanced one pass small out,        38728
+github,                             level 5 with dict copy,             advanced one pass small out,        38759
+github,                             level 5 with dict load,             advanced one pass small out,        40725
 github,                             level 6,                            advanced one pass small out,        135122
-github,                             level 6 with dict,                  advanced one pass small out,        38632
+github,                             level 6 with dict,                  advanced one pass small out,        38671
+github,                             level 6 with dict dms,              advanced one pass small out,        38671
+github,                             level 6 with dict dds,              advanced one pass small out,        38630
+github,                             level 6 with dict copy,             advanced one pass small out,        38669
+github,                             level 6 with dict load,             advanced one pass small out,        40695
+github,                             level 7 row 1,                      advanced one pass small out,        135122
+github,                             level 7 row 1 with dict dms,        advanced one pass small out,        38771
+github,                             level 7 row 1 with dict dds,        advanced one pass small out,        38771
+github,                             level 7 row 1 with dict copy,       advanced one pass small out,        38745
+github,                             level 7 row 1 with dict load,       advanced one pass small out,        40695
+github,                             level 7 row 2,                      advanced one pass small out,        134584
+github,                             level 7 row 2 with dict dms,        advanced one pass small out,        38758
+github,                             level 7 row 2 with dict dds,        advanced one pass small out,        38747
+github,                             level 7 row 2 with dict copy,       advanced one pass small out,        38755
+github,                             level 7 row 2 with dict load,       advanced one pass small out,        41030
 github,                             level 7,                            advanced one pass small out,        135122
-github,                             level 7 with dict,                  advanced one pass small out,        38771
+github,                             level 7 with dict,                  advanced one pass small out,        38758
+github,                             level 7 with dict dms,              advanced one pass small out,        38758
+github,                             level 7 with dict dds,              advanced one pass small out,        38747
+github,                             level 7 with dict copy,             advanced one pass small out,        38755
+github,                             level 7 with dict load,             advanced one pass small out,        40695
 github,                             level 9,                            advanced one pass small out,        135122
-github,                             level 9 with dict,                  advanced one pass small out,        39332
+github,                             level 9 with dict,                  advanced one pass small out,        39437
+github,                             level 9 with dict dms,              advanced one pass small out,        39437
+github,                             level 9 with dict dds,              advanced one pass small out,        39338
+github,                             level 9 with dict copy,             advanced one pass small out,        39398
+github,                             level 9 with dict load,             advanced one pass small out,        41710
+github,                             level 12 row 1,                     advanced one pass small out,        134180
+github,                             level 12 row 1 with dict dms,       advanced one pass small out,        39677
+github,                             level 12 row 1 with dict dds,       advanced one pass small out,        39677
+github,                             level 12 row 1 with dict copy,      advanced one pass small out,        39677
+github,                             level 12 row 1 with dict load,      advanced one pass small out,        41166
+github,                             level 12 row 2,                     advanced one pass small out,        134180
+github,                             level 12 row 2 with dict dms,       advanced one pass small out,        39677
+github,                             level 12 row 2 with dict dds,       advanced one pass small out,        39677
+github,                             level 12 row 2 with dict copy,      advanced one pass small out,        39677
+github,                             level 12 row 2 with dict load,      advanced one pass small out,        41166
 github,                             level 13,                           advanced one pass small out,        134064
 github,                             level 13 with dict,                 advanced one pass small out,        39743
+github,                             level 13 with dict dms,             advanced one pass small out,        39743
+github,                             level 13 with dict dds,             advanced one pass small out,        39743
+github,                             level 13 with dict copy,            advanced one pass small out,        39948
+github,                             level 13 with dict load,            advanced one pass small out,        42626
 github,                             level 16,                           advanced one pass small out,        134064
 github,                             level 16 with dict,                 advanced one pass small out,        37577
+github,                             level 16 with dict dms,             advanced one pass small out,        37577
+github,                             level 16 with dict dds,             advanced one pass small out,        37577
+github,                             level 16 with dict copy,            advanced one pass small out,        37568
+github,                             level 16 with dict load,            advanced one pass small out,        42340
 github,                             level 19,                           advanced one pass small out,        134064
 github,                             level 19 with dict,                 advanced one pass small out,        37576
+github,                             level 19 with dict dms,             advanced one pass small out,        37576
+github,                             level 19 with dict dds,             advanced one pass small out,        37576
+github,                             level 19 with dict copy,            advanced one pass small out,        37567
+github,                             level 19 with dict load,            advanced one pass small out,        39613
 github,                             no source size,                     advanced one pass small out,        136335
 github,                             no source size with dict,           advanced one pass small out,        41148
 github,                             long distance mode,                 advanced one pass small out,        136335
@@ -456,42 +702,116 @@
 github,                             huffman literals,                   advanced one pass small out,        142465
 github,                             multithreaded with advanced params, advanced one pass small out,        165915
 github.tar,                         level -5,                           advanced one pass small out,        46856
-github.tar,                         level -5 with dict,                 advanced one pass small out,        43971
+github.tar,                         level -5 with dict,                 advanced one pass small out,        44571
 github.tar,                         level -3,                           advanced one pass small out,        43754
-github.tar,                         level -3 with dict,                 advanced one pass small out,        40805
+github.tar,                         level -3 with dict,                 advanced one pass small out,        41447
 github.tar,                         level -1,                           advanced one pass small out,        42490
-github.tar,                         level -1 with dict,                 advanced one pass small out,        41122
+github.tar,                         level -1 with dict,                 advanced one pass small out,        41131
 github.tar,                         level 0,                            advanced one pass small out,        38441
 github.tar,                         level 0 with dict,                  advanced one pass small out,        37995
+github.tar,                         level 0 with dict dms,              advanced one pass small out,        38003
+github.tar,                         level 0 with dict dds,              advanced one pass small out,        38003
+github.tar,                         level 0 with dict copy,             advanced one pass small out,        37995
+github.tar,                         level 0 with dict load,             advanced one pass small out,        37956
 github.tar,                         level 1,                            advanced one pass small out,        39265
-github.tar,                         level 1 with dict,                  advanced one pass small out,        38309
+github.tar,                         level 1 with dict,                  advanced one pass small out,        38280
+github.tar,                         level 1 with dict dms,              advanced one pass small out,        38290
+github.tar,                         level 1 with dict dds,              advanced one pass small out,        38290
+github.tar,                         level 1 with dict copy,             advanced one pass small out,        38280
+github.tar,                         level 1 with dict load,             advanced one pass small out,        38729
 github.tar,                         level 3,                            advanced one pass small out,        38441
 github.tar,                         level 3 with dict,                  advanced one pass small out,        37995
+github.tar,                         level 3 with dict dms,              advanced one pass small out,        38003
+github.tar,                         level 3 with dict dds,              advanced one pass small out,        38003
+github.tar,                         level 3 with dict copy,             advanced one pass small out,        37995
+github.tar,                         level 3 with dict load,             advanced one pass small out,        37956
 github.tar,                         level 4,                            advanced one pass small out,        38467
 github.tar,                         level 4 with dict,                  advanced one pass small out,        37948
-github.tar,                         level 5,                            advanced one pass small out,        39788
-github.tar,                         level 5 with dict,                  advanced one pass small out,        39715
-github.tar,                         level 6,                            advanced one pass small out,        39603
-github.tar,                         level 6 with dict,                  advanced one pass small out,        38800
-github.tar,                         level 7,                            advanced one pass small out,        39206
-github.tar,                         level 7 with dict,                  advanced one pass small out,        38071
-github.tar,                         level 9,                            advanced one pass small out,        36717
-github.tar,                         level 9 with dict,                  advanced one pass small out,        36898
+github.tar,                         level 4 with dict dms,              advanced one pass small out,        37954
+github.tar,                         level 4 with dict dds,              advanced one pass small out,        37954
+github.tar,                         level 4 with dict copy,             advanced one pass small out,        37948
+github.tar,                         level 4 with dict load,             advanced one pass small out,        37927
+github.tar,                         level 5 row 1,                      advanced one pass small out,        39788
+github.tar,                         level 5 row 1 with dict dms,        advanced one pass small out,        39365
+github.tar,                         level 5 row 1 with dict dds,        advanced one pass small out,        39233
+github.tar,                         level 5 row 1 with dict copy,       advanced one pass small out,        39715
+github.tar,                         level 5 row 1 with dict load,       advanced one pass small out,        39209
+github.tar,                         level 5 row 2,                      advanced one pass small out,        39693
+github.tar,                         level 5 row 2 with dict dms,        advanced one pass small out,        39024
+github.tar,                         level 5 row 2 with dict dds,        advanced one pass small out,        39028
+github.tar,                         level 5 row 2 with dict copy,       advanced one pass small out,        39040
+github.tar,                         level 5 row 2 with dict load,       advanced one pass small out,        39037
+github.tar,                         level 5,                            advanced one pass small out,        39693
+github.tar,                         level 5 with dict,                  advanced one pass small out,        39040
+github.tar,                         level 5 with dict dms,              advanced one pass small out,        39024
+github.tar,                         level 5 with dict dds,              advanced one pass small out,        39028
+github.tar,                         level 5 with dict copy,             advanced one pass small out,        39040
+github.tar,                         level 5 with dict load,             advanced one pass small out,        39037
+github.tar,                         level 6,                            advanced one pass small out,        39621
+github.tar,                         level 6 with dict,                  advanced one pass small out,        38622
+github.tar,                         level 6 with dict dms,              advanced one pass small out,        38608
+github.tar,                         level 6 with dict dds,              advanced one pass small out,        38610
+github.tar,                         level 6 with dict copy,             advanced one pass small out,        38622
+github.tar,                         level 6 with dict load,             advanced one pass small out,        38962
+github.tar,                         level 7 row 1,                      advanced one pass small out,        39206
+github.tar,                         level 7 row 1 with dict dms,        advanced one pass small out,        37954
+github.tar,                         level 7 row 1 with dict dds,        advanced one pass small out,        37954
+github.tar,                         level 7 row 1 with dict copy,       advanced one pass small out,        38071
+github.tar,                         level 7 row 1 with dict load,       advanced one pass small out,        38584
+github.tar,                         level 7 row 2,                      advanced one pass small out,        39213
+github.tar,                         level 7 row 2 with dict dms,        advanced one pass small out,        37848
+github.tar,                         level 7 row 2 with dict dds,        advanced one pass small out,        37867
+github.tar,                         level 7 row 2 with dict copy,       advanced one pass small out,        37848
+github.tar,                         level 7 row 2 with dict load,       advanced one pass small out,        38582
+github.tar,                         level 7,                            advanced one pass small out,        39213
+github.tar,                         level 7 with dict,                  advanced one pass small out,        37848
+github.tar,                         level 7 with dict dms,              advanced one pass small out,        37848
+github.tar,                         level 7 with dict dds,              advanced one pass small out,        37867
+github.tar,                         level 7 with dict copy,             advanced one pass small out,        37848
+github.tar,                         level 7 with dict load,             advanced one pass small out,        38582
+github.tar,                         level 9,                            advanced one pass small out,        36758
+github.tar,                         level 9 with dict,                  advanced one pass small out,        36457
+github.tar,                         level 9 with dict dms,              advanced one pass small out,        36549
+github.tar,                         level 9 with dict dds,              advanced one pass small out,        36637
+github.tar,                         level 9 with dict copy,             advanced one pass small out,        36457
+github.tar,                         level 9 with dict load,             advanced one pass small out,        36350
+github.tar,                         level 12 row 1,                     advanced one pass small out,        36435
+github.tar,                         level 12 row 1 with dict dms,       advanced one pass small out,        36986
+github.tar,                         level 12 row 1 with dict dds,       advanced one pass small out,        36986
+github.tar,                         level 12 row 1 with dict copy,      advanced one pass small out,        36609
+github.tar,                         level 12 row 1 with dict load,      advanced one pass small out,        36419
+github.tar,                         level 12 row 2,                     advanced one pass small out,        36435
+github.tar,                         level 12 row 2 with dict dms,       advanced one pass small out,        36986
+github.tar,                         level 12 row 2 with dict dds,       advanced one pass small out,        36986
+github.tar,                         level 12 row 2 with dict copy,      advanced one pass small out,        36609
+github.tar,                         level 12 row 2 with dict load,      advanced one pass small out,        36424
 github.tar,                         level 13,                           advanced one pass small out,        35621
 github.tar,                         level 13 with dict,                 advanced one pass small out,        38726
+github.tar,                         level 13 with dict dms,             advanced one pass small out,        38903
+github.tar,                         level 13 with dict dds,             advanced one pass small out,        38903
+github.tar,                         level 13 with dict copy,            advanced one pass small out,        38726
+github.tar,                         level 13 with dict load,            advanced one pass small out,        36372
 github.tar,                         level 16,                           advanced one pass small out,        40255
 github.tar,                         level 16 with dict,                 advanced one pass small out,        33639
+github.tar,                         level 16 with dict dms,             advanced one pass small out,        33544
+github.tar,                         level 16 with dict dds,             advanced one pass small out,        33544
+github.tar,                         level 16 with dict copy,            advanced one pass small out,        33639
+github.tar,                         level 16 with dict load,            advanced one pass small out,        39353
 github.tar,                         level 19,                           advanced one pass small out,        32837
 github.tar,                         level 19 with dict,                 advanced one pass small out,        32895
+github.tar,                         level 19 with dict dms,             advanced one pass small out,        32672
+github.tar,                         level 19 with dict dds,             advanced one pass small out,        32672
+github.tar,                         level 19 with dict copy,            advanced one pass small out,        32895
+github.tar,                         level 19 with dict load,            advanced one pass small out,        32676
 github.tar,                         no source size,                     advanced one pass small out,        38441
 github.tar,                         no source size with dict,           advanced one pass small out,        37995
-github.tar,                         long distance mode,                 advanced one pass small out,        39722
+github.tar,                         long distance mode,                 advanced one pass small out,        39757
 github.tar,                         multithreaded,                      advanced one pass small out,        38441
-github.tar,                         multithreaded long distance mode,   advanced one pass small out,        39722
+github.tar,                         multithreaded long distance mode,   advanced one pass small out,        39726
 github.tar,                         small window log,                   advanced one pass small out,        198540
 github.tar,                         small hash log,                     advanced one pass small out,        129870
 github.tar,                         small chain log,                    advanced one pass small out,        41669
-github.tar,                         explicit params,                    advanced one pass small out,        41199
+github.tar,                         explicit params,                    advanced one pass small out,        41227
 github.tar,                         uncompressed literals,              advanced one pass small out,        41122
 github.tar,                         uncompressed literals optimal,      advanced one pass small out,        35388
 github.tar,                         huffman literals,                   advanced one pass small out,        38777
@@ -503,23 +823,29 @@
 silesia,                            level 1,                            advanced streaming,                 5314162
 silesia,                            level 3,                            advanced streaming,                 4849552
 silesia,                            level 4,                            advanced streaming,                 4786970
-silesia,                            level 5,                            advanced streaming,                 4710236
-silesia,                            level 6,                            advanced streaming,                 4660056
-silesia,                            level 7,                            advanced streaming,                 4596296
-silesia,                            level 9,                            advanced streaming,                 4543925
+silesia,                            level 5 row 1,                      advanced streaming,                 4710236
+silesia,                            level 5 row 2,                      advanced streaming,                 4707794
+silesia,                            level 5,                            advanced streaming,                 4707794
+silesia,                            level 6,                            advanced streaming,                 4666383
+silesia,                            level 7 row 1,                      advanced streaming,                 4596296
+silesia,                            level 7 row 2,                      advanced streaming,                 4603381
+silesia,                            level 7,                            advanced streaming,                 4603381
+silesia,                            level 9,                            advanced streaming,                 4546001
+silesia,                            level 12 row 1,                     advanced streaming,                 4519288
+silesia,                            level 12 row 2,                     advanced streaming,                 4521397
 silesia,                            level 13,                           advanced streaming,                 4482135
-silesia,                            level 16,                           advanced streaming,                 4377465
-silesia,                            level 19,                           advanced streaming,                 4293330
+silesia,                            level 16,                           advanced streaming,                 4360251
+silesia,                            level 19,                           advanced streaming,                 4283237
 silesia,                            no source size,                     advanced streaming,                 4849516
-silesia,                            long distance mode,                 advanced streaming,                 4840744
+silesia,                            long distance mode,                 advanced streaming,                 4840738
 silesia,                            multithreaded,                      advanced streaming,                 4849552
-silesia,                            multithreaded long distance mode,   advanced streaming,                 4840744
+silesia,                            multithreaded long distance mode,   advanced streaming,                 4840758
 silesia,                            small window log,                   advanced streaming,                 7112062
-silesia,                            small hash log,                     advanced streaming,                 6555021
-silesia,                            small chain log,                    advanced streaming,                 4931148
-silesia,                            explicit params,                    advanced streaming,                 4797112
+silesia,                            small hash log,                     advanced streaming,                 6526141
+silesia,                            small chain log,                    advanced streaming,                 4912197
+silesia,                            explicit params,                    advanced streaming,                 4795887
 silesia,                            uncompressed literals,              advanced streaming,                 5127982
-silesia,                            uncompressed literals optimal,      advanced streaming,                 4325472
+silesia,                            uncompressed literals optimal,      advanced streaming,                 4317896
 silesia,                            huffman literals,                   advanced streaming,                 5331168
 silesia,                            multithreaded with advanced params, advanced streaming,                 5127982
 silesia.tar,                        level -5,                           advanced streaming,                 6982759
@@ -529,23 +855,29 @@
 silesia.tar,                        level 1,                            advanced streaming,                 5336939
 silesia.tar,                        level 3,                            advanced streaming,                 4861427
 silesia.tar,                        level 4,                            advanced streaming,                 4799630
-silesia.tar,                        level 5,                            advanced streaming,                 4722329
-silesia.tar,                        level 6,                            advanced streaming,                 4672288
-silesia.tar,                        level 7,                            advanced streaming,                 4606715
-silesia.tar,                        level 9,                            advanced streaming,                 4554154
+silesia.tar,                        level 5 row 1,                      advanced streaming,                 4722329
+silesia.tar,                        level 5 row 2,                      advanced streaming,                 4719261
+silesia.tar,                        level 5,                            advanced streaming,                 4719261
+silesia.tar,                        level 6,                            advanced streaming,                 4677729
+silesia.tar,                        level 7 row 1,                      advanced streaming,                 4606715
+silesia.tar,                        level 7 row 2,                      advanced streaming,                 4613544
+silesia.tar,                        level 7,                            advanced streaming,                 4613544
+silesia.tar,                        level 9,                            advanced streaming,                 4555432
+silesia.tar,                        level 12 row 1,                     advanced streaming,                 4529459
+silesia.tar,                        level 12 row 2,                     advanced streaming,                 4530258
 silesia.tar,                        level 13,                           advanced streaming,                 4491765
-silesia.tar,                        level 16,                           advanced streaming,                 4381350
-silesia.tar,                        level 19,                           advanced streaming,                 4281562
+silesia.tar,                        level 16,                           advanced streaming,                 4356834
+silesia.tar,                        level 19,                           advanced streaming,                 4264392
 silesia.tar,                        no source size,                     advanced streaming,                 4861423
-silesia.tar,                        long distance mode,                 advanced streaming,                 4847735
+silesia.tar,                        long distance mode,                 advanced streaming,                 4847754
 silesia.tar,                        multithreaded,                      advanced streaming,                 4861508
-silesia.tar,                        multithreaded long distance mode,   advanced streaming,                 4853149
+silesia.tar,                        multithreaded long distance mode,   advanced streaming,                 4853222
 silesia.tar,                        small window log,                   advanced streaming,                 7118769
-silesia.tar,                        small hash log,                     advanced streaming,                 6587952
-silesia.tar,                        small chain log,                    advanced streaming,                 4943312
-silesia.tar,                        explicit params,                    advanced streaming,                 4808618
+silesia.tar,                        small hash log,                     advanced streaming,                 6529235
+silesia.tar,                        small chain log,                    advanced streaming,                 4917021
+silesia.tar,                        explicit params,                    advanced streaming,                 4807401
 silesia.tar,                        uncompressed literals,              advanced streaming,                 5129461
-silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4320858
+silesia.tar,                        uncompressed literals optimal,      advanced streaming,                 4307400
 silesia.tar,                        huffman literals,                   advanced streaming,                 5352360
 silesia.tar,                        multithreaded with advanced params, advanced streaming,                 5129555
 github,                             level -5,                           advanced streaming,                 205285
@@ -556,26 +888,100 @@
 github,                             level -1 with dict,                 advanced streaming,                 43170
 github,                             level 0,                            advanced streaming,                 136335
 github,                             level 0 with dict,                  advanced streaming,                 41148
+github,                             level 0 with dict dms,              advanced streaming,                 41148
+github,                             level 0 with dict dds,              advanced streaming,                 41148
+github,                             level 0 with dict copy,             advanced streaming,                 41124
+github,                             level 0 with dict load,             advanced streaming,                 42252
 github,                             level 1,                            advanced streaming,                 142465
 github,                             level 1 with dict,                  advanced streaming,                 41682
+github,                             level 1 with dict dms,              advanced streaming,                 41682
+github,                             level 1 with dict dds,              advanced streaming,                 41682
+github,                             level 1 with dict copy,             advanced streaming,                 41674
+github,                             level 1 with dict load,             advanced streaming,                 43755
 github,                             level 3,                            advanced streaming,                 136335
 github,                             level 3 with dict,                  advanced streaming,                 41148
+github,                             level 3 with dict dms,              advanced streaming,                 41148
+github,                             level 3 with dict dds,              advanced streaming,                 41148
+github,                             level 3 with dict copy,             advanced streaming,                 41124
+github,                             level 3 with dict load,             advanced streaming,                 42252
 github,                             level 4,                            advanced streaming,                 136199
 github,                             level 4 with dict,                  advanced streaming,                 41251
+github,                             level 4 with dict dms,              advanced streaming,                 41251
+github,                             level 4 with dict dds,              advanced streaming,                 41251
+github,                             level 4 with dict copy,             advanced streaming,                 41216
+github,                             level 4 with dict load,             advanced streaming,                 41159
+github,                             level 5 row 1,                      advanced streaming,                 135121
+github,                             level 5 row 1 with dict dms,        advanced streaming,                 38938
+github,                             level 5 row 1 with dict dds,        advanced streaming,                 38732
+github,                             level 5 row 1 with dict copy,       advanced streaming,                 38934
+github,                             level 5 row 1 with dict load,       advanced streaming,                 40725
+github,                             level 5 row 2,                      advanced streaming,                 134584
+github,                             level 5 row 2 with dict dms,        advanced streaming,                 38758
+github,                             level 5 row 2 with dict dds,        advanced streaming,                 38728
+github,                             level 5 row 2 with dict copy,       advanced streaming,                 38759
+github,                             level 5 row 2 with dict load,       advanced streaming,                 41518
 github,                             level 5,                            advanced streaming,                 135121
-github,                             level 5 with dict,                  advanced streaming,                 38938
+github,                             level 5 with dict,                  advanced streaming,                 38758
+github,                             level 5 with dict dms,              advanced streaming,                 38758
+github,                             level 5 with dict dds,              advanced streaming,                 38728
+github,                             level 5 with dict copy,             advanced streaming,                 38759
+github,                             level 5 with dict load,             advanced streaming,                 40725
 github,                             level 6,                            advanced streaming,                 135122
-github,                             level 6 with dict,                  advanced streaming,                 38632
+github,                             level 6 with dict,                  advanced streaming,                 38671
+github,                             level 6 with dict dms,              advanced streaming,                 38671
+github,                             level 6 with dict dds,              advanced streaming,                 38630
+github,                             level 6 with dict copy,             advanced streaming,                 38669
+github,                             level 6 with dict load,             advanced streaming,                 40695
+github,                             level 7 row 1,                      advanced streaming,                 135122
+github,                             level 7 row 1 with dict dms,        advanced streaming,                 38771
+github,                             level 7 row 1 with dict dds,        advanced streaming,                 38771
+github,                             level 7 row 1 with dict copy,       advanced streaming,                 38745
+github,                             level 7 row 1 with dict load,       advanced streaming,                 40695
+github,                             level 7 row 2,                      advanced streaming,                 134584
+github,                             level 7 row 2 with dict dms,        advanced streaming,                 38758
+github,                             level 7 row 2 with dict dds,        advanced streaming,                 38747
+github,                             level 7 row 2 with dict copy,       advanced streaming,                 38755
+github,                             level 7 row 2 with dict load,       advanced streaming,                 41030
 github,                             level 7,                            advanced streaming,                 135122
-github,                             level 7 with dict,                  advanced streaming,                 38771
+github,                             level 7 with dict,                  advanced streaming,                 38758
+github,                             level 7 with dict dms,              advanced streaming,                 38758
+github,                             level 7 with dict dds,              advanced streaming,                 38747
+github,                             level 7 with dict copy,             advanced streaming,                 38755
+github,                             level 7 with dict load,             advanced streaming,                 40695
 github,                             level 9,                            advanced streaming,                 135122
-github,                             level 9 with dict,                  advanced streaming,                 39332
+github,                             level 9 with dict,                  advanced streaming,                 39437
+github,                             level 9 with dict dms,              advanced streaming,                 39437
+github,                             level 9 with dict dds,              advanced streaming,                 39338
+github,                             level 9 with dict copy,             advanced streaming,                 39398
+github,                             level 9 with dict load,             advanced streaming,                 41710
+github,                             level 12 row 1,                     advanced streaming,                 134180
+github,                             level 12 row 1 with dict dms,       advanced streaming,                 39677
+github,                             level 12 row 1 with dict dds,       advanced streaming,                 39677
+github,                             level 12 row 1 with dict copy,      advanced streaming,                 39677
+github,                             level 12 row 1 with dict load,      advanced streaming,                 41166
+github,                             level 12 row 2,                     advanced streaming,                 134180
+github,                             level 12 row 2 with dict dms,       advanced streaming,                 39677
+github,                             level 12 row 2 with dict dds,       advanced streaming,                 39677
+github,                             level 12 row 2 with dict copy,      advanced streaming,                 39677
+github,                             level 12 row 2 with dict load,      advanced streaming,                 41166
 github,                             level 13,                           advanced streaming,                 134064
 github,                             level 13 with dict,                 advanced streaming,                 39743
+github,                             level 13 with dict dms,             advanced streaming,                 39743
+github,                             level 13 with dict dds,             advanced streaming,                 39743
+github,                             level 13 with dict copy,            advanced streaming,                 39948
+github,                             level 13 with dict load,            advanced streaming,                 42626
 github,                             level 16,                           advanced streaming,                 134064
 github,                             level 16 with dict,                 advanced streaming,                 37577
+github,                             level 16 with dict dms,             advanced streaming,                 37577
+github,                             level 16 with dict dds,             advanced streaming,                 37577
+github,                             level 16 with dict copy,            advanced streaming,                 37568
+github,                             level 16 with dict load,            advanced streaming,                 42340
 github,                             level 19,                           advanced streaming,                 134064
 github,                             level 19 with dict,                 advanced streaming,                 37576
+github,                             level 19 with dict dms,             advanced streaming,                 37576
+github,                             level 19 with dict dds,             advanced streaming,                 37576
+github,                             level 19 with dict copy,            advanced streaming,                 37567
+github,                             level 19 with dict load,            advanced streaming,                 39613
 github,                             no source size,                     advanced streaming,                 136335
 github,                             no source size with dict,           advanced streaming,                 41148
 github,                             long distance mode,                 advanced streaming,                 136335
@@ -590,42 +996,116 @@
 github,                             huffman literals,                   advanced streaming,                 142465
 github,                             multithreaded with advanced params, advanced streaming,                 165915
 github.tar,                         level -5,                           advanced streaming,                 46747
-github.tar,                         level -5 with dict,                 advanced streaming,                 43971
+github.tar,                         level -5 with dict,                 advanced streaming,                 44440
 github.tar,                         level -3,                           advanced streaming,                 43537
-github.tar,                         level -3 with dict,                 advanced streaming,                 40805
+github.tar,                         level -3 with dict,                 advanced streaming,                 41112
 github.tar,                         level -1,                           advanced streaming,                 42465
-github.tar,                         level -1 with dict,                 advanced streaming,                 41122
+github.tar,                         level -1 with dict,                 advanced streaming,                 41196
 github.tar,                         level 0,                            advanced streaming,                 38441
 github.tar,                         level 0 with dict,                  advanced streaming,                 37995
+github.tar,                         level 0 with dict dms,              advanced streaming,                 38003
+github.tar,                         level 0 with dict dds,              advanced streaming,                 38003
+github.tar,                         level 0 with dict copy,             advanced streaming,                 37995
+github.tar,                         level 0 with dict load,             advanced streaming,                 37956
 github.tar,                         level 1,                            advanced streaming,                 39342
-github.tar,                         level 1 with dict,                  advanced streaming,                 38309
+github.tar,                         level 1 with dict,                  advanced streaming,                 38293
+github.tar,                         level 1 with dict dms,              advanced streaming,                 38303
+github.tar,                         level 1 with dict dds,              advanced streaming,                 38303
+github.tar,                         level 1 with dict copy,             advanced streaming,                 38293
+github.tar,                         level 1 with dict load,             advanced streaming,                 38766
 github.tar,                         level 3,                            advanced streaming,                 38441
 github.tar,                         level 3 with dict,                  advanced streaming,                 37995
+github.tar,                         level 3 with dict dms,              advanced streaming,                 38003
+github.tar,                         level 3 with dict dds,              advanced streaming,                 38003
+github.tar,                         level 3 with dict copy,             advanced streaming,                 37995
+github.tar,                         level 3 with dict load,             advanced streaming,                 37956
 github.tar,                         level 4,                            advanced streaming,                 38467
 github.tar,                         level 4 with dict,                  advanced streaming,                 37948
-github.tar,                         level 5,                            advanced streaming,                 39788
-github.tar,                         level 5 with dict,                  advanced streaming,                 39715
-github.tar,                         level 6,                            advanced streaming,                 39603
-github.tar,                         level 6 with dict,                  advanced streaming,                 38800
-github.tar,                         level 7,                            advanced streaming,                 39206
-github.tar,                         level 7 with dict,                  advanced streaming,                 38071
-github.tar,                         level 9,                            advanced streaming,                 36717
-github.tar,                         level 9 with dict,                  advanced streaming,                 36898
+github.tar,                         level 4 with dict dms,              advanced streaming,                 37954
+github.tar,                         level 4 with dict dds,              advanced streaming,                 37954
+github.tar,                         level 4 with dict copy,             advanced streaming,                 37948
+github.tar,                         level 4 with dict load,             advanced streaming,                 37927
+github.tar,                         level 5 row 1,                      advanced streaming,                 39788
+github.tar,                         level 5 row 1 with dict dms,        advanced streaming,                 39365
+github.tar,                         level 5 row 1 with dict dds,        advanced streaming,                 39233
+github.tar,                         level 5 row 1 with dict copy,       advanced streaming,                 39715
+github.tar,                         level 5 row 1 with dict load,       advanced streaming,                 39209
+github.tar,                         level 5 row 2,                      advanced streaming,                 39693
+github.tar,                         level 5 row 2 with dict dms,        advanced streaming,                 39024
+github.tar,                         level 5 row 2 with dict dds,        advanced streaming,                 39028
+github.tar,                         level 5 row 2 with dict copy,       advanced streaming,                 39040
+github.tar,                         level 5 row 2 with dict load,       advanced streaming,                 39037
+github.tar,                         level 5,                            advanced streaming,                 39693
+github.tar,                         level 5 with dict,                  advanced streaming,                 39040
+github.tar,                         level 5 with dict dms,              advanced streaming,                 39024
+github.tar,                         level 5 with dict dds,              advanced streaming,                 39028
+github.tar,                         level 5 with dict copy,             advanced streaming,                 39040
+github.tar,                         level 5 with dict load,             advanced streaming,                 39037
+github.tar,                         level 6,                            advanced streaming,                 39621
+github.tar,                         level 6 with dict,                  advanced streaming,                 38622
+github.tar,                         level 6 with dict dms,              advanced streaming,                 38608
+github.tar,                         level 6 with dict dds,              advanced streaming,                 38610
+github.tar,                         level 6 with dict copy,             advanced streaming,                 38622
+github.tar,                         level 6 with dict load,             advanced streaming,                 38962
+github.tar,                         level 7 row 1,                      advanced streaming,                 39206
+github.tar,                         level 7 row 1 with dict dms,        advanced streaming,                 37954
+github.tar,                         level 7 row 1 with dict dds,        advanced streaming,                 37954
+github.tar,                         level 7 row 1 with dict copy,       advanced streaming,                 38071
+github.tar,                         level 7 row 1 with dict load,       advanced streaming,                 38584
+github.tar,                         level 7 row 2,                      advanced streaming,                 39213
+github.tar,                         level 7 row 2 with dict dms,        advanced streaming,                 37848
+github.tar,                         level 7 row 2 with dict dds,        advanced streaming,                 37867
+github.tar,                         level 7 row 2 with dict copy,       advanced streaming,                 37848
+github.tar,                         level 7 row 2 with dict load,       advanced streaming,                 38582
+github.tar,                         level 7,                            advanced streaming,                 39213
+github.tar,                         level 7 with dict,                  advanced streaming,                 37848
+github.tar,                         level 7 with dict dms,              advanced streaming,                 37848
+github.tar,                         level 7 with dict dds,              advanced streaming,                 37867
+github.tar,                         level 7 with dict copy,             advanced streaming,                 37848
+github.tar,                         level 7 with dict load,             advanced streaming,                 38582
+github.tar,                         level 9,                            advanced streaming,                 36758
+github.tar,                         level 9 with dict,                  advanced streaming,                 36457
+github.tar,                         level 9 with dict dms,              advanced streaming,                 36549
+github.tar,                         level 9 with dict dds,              advanced streaming,                 36637
+github.tar,                         level 9 with dict copy,             advanced streaming,                 36457
+github.tar,                         level 9 with dict load,             advanced streaming,                 36350
+github.tar,                         level 12 row 1,                     advanced streaming,                 36435
+github.tar,                         level 12 row 1 with dict dms,       advanced streaming,                 36986
+github.tar,                         level 12 row 1 with dict dds,       advanced streaming,                 36986
+github.tar,                         level 12 row 1 with dict copy,      advanced streaming,                 36609
+github.tar,                         level 12 row 1 with dict load,      advanced streaming,                 36419
+github.tar,                         level 12 row 2,                     advanced streaming,                 36435
+github.tar,                         level 12 row 2 with dict dms,       advanced streaming,                 36986
+github.tar,                         level 12 row 2 with dict dds,       advanced streaming,                 36986
+github.tar,                         level 12 row 2 with dict copy,      advanced streaming,                 36609
+github.tar,                         level 12 row 2 with dict load,      advanced streaming,                 36424
 github.tar,                         level 13,                           advanced streaming,                 35621
 github.tar,                         level 13 with dict,                 advanced streaming,                 38726
+github.tar,                         level 13 with dict dms,             advanced streaming,                 38903
+github.tar,                         level 13 with dict dds,             advanced streaming,                 38903
+github.tar,                         level 13 with dict copy,            advanced streaming,                 38726
+github.tar,                         level 13 with dict load,            advanced streaming,                 36372
 github.tar,                         level 16,                           advanced streaming,                 40255
 github.tar,                         level 16 with dict,                 advanced streaming,                 33639
+github.tar,                         level 16 with dict dms,             advanced streaming,                 33544
+github.tar,                         level 16 with dict dds,             advanced streaming,                 33544
+github.tar,                         level 16 with dict copy,            advanced streaming,                 33639
+github.tar,                         level 16 with dict load,            advanced streaming,                 39353
 github.tar,                         level 19,                           advanced streaming,                 32837
 github.tar,                         level 19 with dict,                 advanced streaming,                 32895
+github.tar,                         level 19 with dict dms,             advanced streaming,                 32672
+github.tar,                         level 19 with dict dds,             advanced streaming,                 32672
+github.tar,                         level 19 with dict copy,            advanced streaming,                 32895
+github.tar,                         level 19 with dict load,            advanced streaming,                 32676
 github.tar,                         no source size,                     advanced streaming,                 38438
 github.tar,                         no source size with dict,           advanced streaming,                 38000
-github.tar,                         long distance mode,                 advanced streaming,                 39722
+github.tar,                         long distance mode,                 advanced streaming,                 39757
 github.tar,                         multithreaded,                      advanced streaming,                 38441
-github.tar,                         multithreaded long distance mode,   advanced streaming,                 39722
+github.tar,                         multithreaded long distance mode,   advanced streaming,                 39726
 github.tar,                         small window log,                   advanced streaming,                 199558
 github.tar,                         small hash log,                     advanced streaming,                 129870
 github.tar,                         small chain log,                    advanced streaming,                 41669
-github.tar,                         explicit params,                    advanced streaming,                 41199
+github.tar,                         explicit params,                    advanced streaming,                 41227
 github.tar,                         uncompressed literals,              advanced streaming,                 41122
 github.tar,                         uncompressed literals optimal,      advanced streaming,                 35388
 github.tar,                         huffman literals,                   advanced streaming,                 38800
@@ -637,16 +1117,16 @@
 silesia,                            level 1,                            old streaming,                      5314162
 silesia,                            level 3,                            old streaming,                      4849552
 silesia,                            level 4,                            old streaming,                      4786970
-silesia,                            level 5,                            old streaming,                      4710236
-silesia,                            level 6,                            old streaming,                      4660056
-silesia,                            level 7,                            old streaming,                      4596296
-silesia,                            level 9,                            old streaming,                      4543925
+silesia,                            level 5,                            old streaming,                      4707794
+silesia,                            level 6,                            old streaming,                      4666383
+silesia,                            level 7,                            old streaming,                      4603381
+silesia,                            level 9,                            old streaming,                      4546001
 silesia,                            level 13,                           old streaming,                      4482135
-silesia,                            level 16,                           old streaming,                      4377465
-silesia,                            level 19,                           old streaming,                      4293330
+silesia,                            level 16,                           old streaming,                      4360251
+silesia,                            level 19,                           old streaming,                      4283237
 silesia,                            no source size,                     old streaming,                      4849516
 silesia,                            uncompressed literals,              old streaming,                      4849552
-silesia,                            uncompressed literals optimal,      old streaming,                      4293330
+silesia,                            uncompressed literals optimal,      old streaming,                      4283237
 silesia,                            huffman literals,                   old streaming,                      6183403
 silesia.tar,                        level -5,                           old streaming,                      6982759
 silesia.tar,                        level -3,                           old streaming,                      6641283
@@ -655,16 +1135,16 @@
 silesia.tar,                        level 1,                            old streaming,                      5336939
 silesia.tar,                        level 3,                            old streaming,                      4861427
 silesia.tar,                        level 4,                            old streaming,                      4799630
-silesia.tar,                        level 5,                            old streaming,                      4722329
-silesia.tar,                        level 6,                            old streaming,                      4672288
-silesia.tar,                        level 7,                            old streaming,                      4606715
-silesia.tar,                        level 9,                            old streaming,                      4554154
+silesia.tar,                        level 5,                            old streaming,                      4719261
+silesia.tar,                        level 6,                            old streaming,                      4677729
+silesia.tar,                        level 7,                            old streaming,                      4613544
+silesia.tar,                        level 9,                            old streaming,                      4555432
 silesia.tar,                        level 13,                           old streaming,                      4491765
-silesia.tar,                        level 16,                           old streaming,                      4381350
-silesia.tar,                        level 19,                           old streaming,                      4281562
+silesia.tar,                        level 16,                           old streaming,                      4356834
+silesia.tar,                        level 19,                           old streaming,                      4264392
 silesia.tar,                        no source size,                     old streaming,                      4861423
 silesia.tar,                        uncompressed literals,              old streaming,                      4861427
-silesia.tar,                        uncompressed literals optimal,      old streaming,                      4281562
+silesia.tar,                        uncompressed literals optimal,      old streaming,                      4264392
 silesia.tar,                        huffman literals,                   old streaming,                      6190795
 github,                             level -5,                           old streaming,                      205285
 github,                             level -5 with dict,                 old streaming,                      46718
@@ -681,13 +1161,13 @@
 github,                             level 4,                            old streaming,                      136199
 github,                             level 4 with dict,                  old streaming,                      41251
 github,                             level 5,                            old streaming,                      135121
-github,                             level 5 with dict,                  old streaming,                      38938
+github,                             level 5 with dict,                  old streaming,                      38758
 github,                             level 6,                            old streaming,                      135122
-github,                             level 6 with dict,                  old streaming,                      38632
+github,                             level 6 with dict,                  old streaming,                      38671
 github,                             level 7,                            old streaming,                      135122
-github,                             level 7 with dict,                  old streaming,                      38771
+github,                             level 7 with dict,                  old streaming,                      38758
 github,                             level 9,                            old streaming,                      135122
-github,                             level 9 with dict,                  old streaming,                      39332
+github,                             level 9 with dict,                  old streaming,                      39437
 github,                             level 13,                           old streaming,                      134064
 github,                             level 13 with dict,                 old streaming,                      39743
 github,                             level 16,                           old streaming,                      134064
@@ -700,27 +1180,27 @@
 github,                             uncompressed literals optimal,      old streaming,                      134064
 github,                             huffman literals,                   old streaming,                      175568
 github.tar,                         level -5,                           old streaming,                      46747
-github.tar,                         level -5 with dict,                 old streaming,                      43971
+github.tar,                         level -5 with dict,                 old streaming,                      44440
 github.tar,                         level -3,                           old streaming,                      43537
-github.tar,                         level -3 with dict,                 old streaming,                      40805
+github.tar,                         level -3 with dict,                 old streaming,                      41112
 github.tar,                         level -1,                           old streaming,                      42465
-github.tar,                         level -1 with dict,                 old streaming,                      41122
+github.tar,                         level -1 with dict,                 old streaming,                      41196
 github.tar,                         level 0,                            old streaming,                      38441
 github.tar,                         level 0 with dict,                  old streaming,                      37995
 github.tar,                         level 1,                            old streaming,                      39342
-github.tar,                         level 1 with dict,                  old streaming,                      38309
+github.tar,                         level 1 with dict,                  old streaming,                      38293
 github.tar,                         level 3,                            old streaming,                      38441
 github.tar,                         level 3 with dict,                  old streaming,                      37995
 github.tar,                         level 4,                            old streaming,                      38467
 github.tar,                         level 4 with dict,                  old streaming,                      37948
-github.tar,                         level 5,                            old streaming,                      39788
-github.tar,                         level 5 with dict,                  old streaming,                      39715
-github.tar,                         level 6,                            old streaming,                      39603
-github.tar,                         level 6 with dict,                  old streaming,                      38800
-github.tar,                         level 7,                            old streaming,                      39206
-github.tar,                         level 7 with dict,                  old streaming,                      38071
-github.tar,                         level 9,                            old streaming,                      36717
-github.tar,                         level 9 with dict,                  old streaming,                      36898
+github.tar,                         level 5,                            old streaming,                      39693
+github.tar,                         level 5 with dict,                  old streaming,                      39040
+github.tar,                         level 6,                            old streaming,                      39621
+github.tar,                         level 6 with dict,                  old streaming,                      38622
+github.tar,                         level 7,                            old streaming,                      39213
+github.tar,                         level 7 with dict,                  old streaming,                      37848
+github.tar,                         level 9,                            old streaming,                      36758
+github.tar,                         level 9 with dict,                  old streaming,                      36457
 github.tar,                         level 13,                           old streaming,                      35621
 github.tar,                         level 13 with dict,                 old streaming,                      38726
 github.tar,                         level 16,                           old streaming,                      40255
@@ -739,23 +1219,23 @@
 silesia,                            level 1,                            old streaming advanced,             5314162
 silesia,                            level 3,                            old streaming advanced,             4849552
 silesia,                            level 4,                            old streaming advanced,             4786970
-silesia,                            level 5,                            old streaming advanced,             4710236
-silesia,                            level 6,                            old streaming advanced,             4660056
-silesia,                            level 7,                            old streaming advanced,             4596296
-silesia,                            level 9,                            old streaming advanced,             4543925
+silesia,                            level 5,                            old streaming advanced,             4707794
+silesia,                            level 6,                            old streaming advanced,             4666383
+silesia,                            level 7,                            old streaming advanced,             4603381
+silesia,                            level 9,                            old streaming advanced,             4546001
 silesia,                            level 13,                           old streaming advanced,             4482135
-silesia,                            level 16,                           old streaming advanced,             4377465
-silesia,                            level 19,                           old streaming advanced,             4293330
+silesia,                            level 16,                           old streaming advanced,             4360251
+silesia,                            level 19,                           old streaming advanced,             4283237
 silesia,                            no source size,                     old streaming advanced,             4849516
 silesia,                            long distance mode,                 old streaming advanced,             4849552
 silesia,                            multithreaded,                      old streaming advanced,             4849552
 silesia,                            multithreaded long distance mode,   old streaming advanced,             4849552
 silesia,                            small window log,                   old streaming advanced,             7112062
-silesia,                            small hash log,                     old streaming advanced,             6555021
-silesia,                            small chain log,                    old streaming advanced,             4931148
-silesia,                            explicit params,                    old streaming advanced,             4797112
+silesia,                            small hash log,                     old streaming advanced,             6526141
+silesia,                            small chain log,                    old streaming advanced,             4912197
+silesia,                            explicit params,                    old streaming advanced,             4795887
 silesia,                            uncompressed literals,              old streaming advanced,             4849552
-silesia,                            uncompressed literals optimal,      old streaming advanced,             4293330
+silesia,                            uncompressed literals optimal,      old streaming advanced,             4283237
 silesia,                            huffman literals,                   old streaming advanced,             6183403
 silesia,                            multithreaded with advanced params, old streaming advanced,             4849552
 silesia.tar,                        level -5,                           old streaming advanced,             6982759
@@ -765,23 +1245,23 @@
 silesia.tar,                        level 1,                            old streaming advanced,             5336939
 silesia.tar,                        level 3,                            old streaming advanced,             4861427
 silesia.tar,                        level 4,                            old streaming advanced,             4799630
-silesia.tar,                        level 5,                            old streaming advanced,             4722329
-silesia.tar,                        level 6,                            old streaming advanced,             4672288
-silesia.tar,                        level 7,                            old streaming advanced,             4606715
-silesia.tar,                        level 9,                            old streaming advanced,             4554154
+silesia.tar,                        level 5,                            old streaming advanced,             4719261
+silesia.tar,                        level 6,                            old streaming advanced,             4677729
+silesia.tar,                        level 7,                            old streaming advanced,             4613544
+silesia.tar,                        level 9,                            old streaming advanced,             4555432
 silesia.tar,                        level 13,                           old streaming advanced,             4491765
-silesia.tar,                        level 16,                           old streaming advanced,             4381350
-silesia.tar,                        level 19,                           old streaming advanced,             4281562
+silesia.tar,                        level 16,                           old streaming advanced,             4356834
+silesia.tar,                        level 19,                           old streaming advanced,             4264392
 silesia.tar,                        no source size,                     old streaming advanced,             4861423
 silesia.tar,                        long distance mode,                 old streaming advanced,             4861427
 silesia.tar,                        multithreaded,                      old streaming advanced,             4861427
 silesia.tar,                        multithreaded long distance mode,   old streaming advanced,             4861427
 silesia.tar,                        small window log,                   old streaming advanced,             7118772
-silesia.tar,                        small hash log,                     old streaming advanced,             6587952
-silesia.tar,                        small chain log,                    old streaming advanced,             4943312
-silesia.tar,                        explicit params,                    old streaming advanced,             4808618
+silesia.tar,                        small hash log,                     old streaming advanced,             6529235
+silesia.tar,                        small chain log,                    old streaming advanced,             4917021
+silesia.tar,                        explicit params,                    old streaming advanced,             4807401
 silesia.tar,                        uncompressed literals,              old streaming advanced,             4861427
-silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4281562
+silesia.tar,                        uncompressed literals optimal,      old streaming advanced,             4264392
 silesia.tar,                        huffman literals,                   old streaming advanced,             6190795
 silesia.tar,                        multithreaded with advanced params, old streaming advanced,             4861427
 github,                             level -5,                           old streaming advanced,             216734
@@ -799,13 +1279,13 @@
 github,                             level 4,                            old streaming advanced,             141104
 github,                             level 4 with dict,                  old streaming advanced,             41084
 github,                             level 5,                            old streaming advanced,             139399
-github,                             level 5 with dict,                  old streaming advanced,             39159
+github,                             level 5 with dict,                  old streaming advanced,             38633
 github,                             level 6,                            old streaming advanced,             139402
-github,                             level 6 with dict,                  old streaming advanced,             38749
+github,                             level 6 with dict,                  old streaming advanced,             38723
 github,                             level 7,                            old streaming advanced,             138676
-github,                             level 7 with dict,                  old streaming advanced,             38746
+github,                             level 7 with dict,                  old streaming advanced,             38744
 github,                             level 9,                            old streaming advanced,             138676
-github,                             level 9 with dict,                  old streaming advanced,             38993
+github,                             level 9 with dict,                  old streaming advanced,             38981
 github,                             level 13,                           old streaming advanced,             138676
 github,                             level 13 with dict,                 old streaming advanced,             39731
 github,                             level 16,                           old streaming advanced,             138676
@@ -839,14 +1319,14 @@
 github.tar,                         level 3 with dict,                  old streaming advanced,             38013
 github.tar,                         level 4,                            old streaming advanced,             38467
 github.tar,                         level 4 with dict,                  old streaming advanced,             38063
-github.tar,                         level 5,                            old streaming advanced,             39788
-github.tar,                         level 5 with dict,                  old streaming advanced,             39310
-github.tar,                         level 6,                            old streaming advanced,             39603
-github.tar,                         level 6 with dict,                  old streaming advanced,             39279
-github.tar,                         level 7,                            old streaming advanced,             39206
-github.tar,                         level 7 with dict,                  old streaming advanced,             38728
-github.tar,                         level 9,                            old streaming advanced,             36717
-github.tar,                         level 9 with dict,                  old streaming advanced,             36504
+github.tar,                         level 5,                            old streaming advanced,             39693
+github.tar,                         level 5 with dict,                  old streaming advanced,             39049
+github.tar,                         level 6,                            old streaming advanced,             39621
+github.tar,                         level 6 with dict,                  old streaming advanced,             38959
+github.tar,                         level 7,                            old streaming advanced,             39213
+github.tar,                         level 7 with dict,                  old streaming advanced,             38573
+github.tar,                         level 9,                            old streaming advanced,             36758
+github.tar,                         level 9 with dict,                  old streaming advanced,             36233
 github.tar,                         level 13,                           old streaming advanced,             35621
 github.tar,                         level 13 with dict,                 old streaming advanced,             36035
 github.tar,                         level 16,                           old streaming advanced,             40255
@@ -861,41 +1341,41 @@
 github.tar,                         small window log,                   old streaming advanced,             199561
 github.tar,                         small hash log,                     old streaming advanced,             129870
 github.tar,                         small chain log,                    old streaming advanced,             41669
-github.tar,                         explicit params,                    old streaming advanced,             41199
+github.tar,                         explicit params,                    old streaming advanced,             41227
 github.tar,                         uncompressed literals,              old streaming advanced,             38441
 github.tar,                         uncompressed literals optimal,      old streaming advanced,             32837
 github.tar,                         huffman literals,                   old streaming advanced,             42465
 github.tar,                         multithreaded with advanced params, old streaming advanced,             38441
-github,                             level -5 with dict,                 old streaming cdcit,                46718
-github,                             level -3 with dict,                 old streaming cdcit,                45395
-github,                             level -1 with dict,                 old streaming cdcit,                43170
-github,                             level 0 with dict,                  old streaming cdcit,                41148
-github,                             level 1 with dict,                  old streaming cdcit,                41682
-github,                             level 3 with dict,                  old streaming cdcit,                41148
-github,                             level 4 with dict,                  old streaming cdcit,                41251
-github,                             level 5 with dict,                  old streaming cdcit,                38938
-github,                             level 6 with dict,                  old streaming cdcit,                38632
-github,                             level 7 with dict,                  old streaming cdcit,                38771
-github,                             level 9 with dict,                  old streaming cdcit,                39332
-github,                             level 13 with dict,                 old streaming cdcit,                39743
-github,                             level 16 with dict,                 old streaming cdcit,                37577
-github,                             level 19 with dict,                 old streaming cdcit,                37576
-github,                             no source size with dict,           old streaming cdcit,                40654
-github.tar,                         level -5 with dict,                 old streaming cdcit,                45018
-github.tar,                         level -3 with dict,                 old streaming cdcit,                41886
-github.tar,                         level -1 with dict,                 old streaming cdcit,                41636
-github.tar,                         level 0 with dict,                  old streaming cdcit,                37956
-github.tar,                         level 1 with dict,                  old streaming cdcit,                38766
-github.tar,                         level 3 with dict,                  old streaming cdcit,                37956
-github.tar,                         level 4 with dict,                  old streaming cdcit,                37927
-github.tar,                         level 5 with dict,                  old streaming cdcit,                39209
-github.tar,                         level 6 with dict,                  old streaming cdcit,                38983
-github.tar,                         level 7 with dict,                  old streaming cdcit,                38584
-github.tar,                         level 9 with dict,                  old streaming cdcit,                36363
-github.tar,                         level 13 with dict,                 old streaming cdcit,                36372
-github.tar,                         level 16 with dict,                 old streaming cdcit,                39353
-github.tar,                         level 19 with dict,                 old streaming cdcit,                32676
-github.tar,                         no source size with dict,           old streaming cdcit,                38000
+github,                             level -5 with dict,                 old streaming cdict,                46718
+github,                             level -3 with dict,                 old streaming cdict,                45395
+github,                             level -1 with dict,                 old streaming cdict,                43170
+github,                             level 0 with dict,                  old streaming cdict,                41148
+github,                             level 1 with dict,                  old streaming cdict,                41682
+github,                             level 3 with dict,                  old streaming cdict,                41148
+github,                             level 4 with dict,                  old streaming cdict,                41251
+github,                             level 5 with dict,                  old streaming cdict,                38758
+github,                             level 6 with dict,                  old streaming cdict,                38671
+github,                             level 7 with dict,                  old streaming cdict,                38758
+github,                             level 9 with dict,                  old streaming cdict,                39437
+github,                             level 13 with dict,                 old streaming cdict,                39743
+github,                             level 16 with dict,                 old streaming cdict,                37577
+github,                             level 19 with dict,                 old streaming cdict,                37576
+github,                             no source size with dict,           old streaming cdict,                40654
+github.tar,                         level -5 with dict,                 old streaming cdict,                45018
+github.tar,                         level -3 with dict,                 old streaming cdict,                41886
+github.tar,                         level -1 with dict,                 old streaming cdict,                41636
+github.tar,                         level 0 with dict,                  old streaming cdict,                37956
+github.tar,                         level 1 with dict,                  old streaming cdict,                38766
+github.tar,                         level 3 with dict,                  old streaming cdict,                37956
+github.tar,                         level 4 with dict,                  old streaming cdict,                37927
+github.tar,                         level 5 with dict,                  old streaming cdict,                39037
+github.tar,                         level 6 with dict,                  old streaming cdict,                38962
+github.tar,                         level 7 with dict,                  old streaming cdict,                38582
+github.tar,                         level 9 with dict,                  old streaming cdict,                36350
+github.tar,                         level 13 with dict,                 old streaming cdict,                36372
+github.tar,                         level 16 with dict,                 old streaming cdict,                39353
+github.tar,                         level 19 with dict,                 old streaming cdict,                32676
+github.tar,                         no source size with dict,           old streaming cdict,                38000
 github,                             level -5 with dict,                 old streaming advanced cdict,       49562
 github,                             level -3 with dict,                 old streaming advanced cdict,       44956
 github,                             level -1 with dict,                 old streaming advanced cdict,       42383
@@ -903,10 +1383,10 @@
 github,                             level 1 with dict,                  old streaming advanced cdict,       42430
 github,                             level 3 with dict,                  old streaming advanced cdict,       41113
 github,                             level 4 with dict,                  old streaming advanced cdict,       41084
-github,                             level 5 with dict,                  old streaming advanced cdict,       39159
-github,                             level 6 with dict,                  old streaming advanced cdict,       38749
-github,                             level 7 with dict,                  old streaming advanced cdict,       38746
-github,                             level 9 with dict,                  old streaming advanced cdict,       38993
+github,                             level 5 with dict,                  old streaming advanced cdict,       38633
+github,                             level 6 with dict,                  old streaming advanced cdict,       38723
+github,                             level 7 with dict,                  old streaming advanced cdict,       38744
+github,                             level 9 with dict,                  old streaming advanced cdict,       38981
 github,                             level 13 with dict,                 old streaming advanced cdict,       39731
 github,                             level 16 with dict,                 old streaming advanced cdict,       40789
 github,                             level 19 with dict,                 old streaming advanced cdict,       37576
@@ -918,10 +1398,10 @@
 github.tar,                         level 1 with dict,                  old streaming advanced cdict,       39002
 github.tar,                         level 3 with dict,                  old streaming advanced cdict,       38013
 github.tar,                         level 4 with dict,                  old streaming advanced cdict,       38063
-github.tar,                         level 5 with dict,                  old streaming advanced cdict,       39310
-github.tar,                         level 6 with dict,                  old streaming advanced cdict,       39279
-github.tar,                         level 7 with dict,                  old streaming advanced cdict,       38728
-github.tar,                         level 9 with dict,                  old streaming advanced cdict,       36504
+github.tar,                         level 5 with dict,                  old streaming advanced cdict,       39049
+github.tar,                         level 6 with dict,                  old streaming advanced cdict,       38959
+github.tar,                         level 7 with dict,                  old streaming advanced cdict,       38573
+github.tar,                         level 9 with dict,                  old streaming advanced cdict,       36233
 github.tar,                         level 13 with dict,                 old streaming advanced cdict,       36035
 github.tar,                         level 16 with dict,                 old streaming advanced cdict,       38736
 github.tar,                         level 19 with dict,                 old streaming advanced cdict,       32876
diff --git a/tests/regression/test.c b/tests/regression/test.c
index 5e28d4c..1de6be8 100644
--- a/tests/regression/test.c
+++ b/tests/regression/test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/roundTripCrash.c b/tests/roundTripCrash.c
index 9e3b861..9aa208c 100644
--- a/tests/roundTripCrash.c
+++ b/tests/roundTripCrash.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/seqgen.c b/tests/seqgen.c
index 3461522..1e340c8 100644
--- a/tests/seqgen.c
+++ b/tests/seqgen.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/seqgen.h b/tests/seqgen.h
index e4948ea..cea3f55 100644
--- a/tests/seqgen.h
+++ b/tests/seqgen.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/test-license.py b/tests/test-license.py
index a7e2ba4..2247765 100755
--- a/tests/test-license.py
+++ b/tests/test-license.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # ################################################################
-# Copyright (c) 2016-2021, Facebook, Inc.
+# Copyright (c) Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
@@ -10,16 +10,12 @@
 # You may select, at your option, one of the above-listed licenses.
 # ################################################################
 
-import datetime
 import enum
 import glob
 import os
+import re
 import sys
 
-YEAR = datetime.datetime.now().year
-
-YEAR_STR = str(YEAR)
-
 ROOT = os.path.join(os.path.dirname(__file__), "..")
 
 RELDIRS = [
@@ -28,22 +24,26 @@
     "lib",
     "programs",
     "tests",
+    "contrib/linux-kernel",
 ]
 
-DIRS = [os.path.join(ROOT, d) for d in RELDIRS]
+REL_EXCLUDES = [
+    "contrib/linux-kernel/test/include",
+]
 
-class File(enum.Enum):
-    C = 1
-    H = 2
-    MAKE = 3
-    PY = 4
+def to_abs(d):
+    return os.path.normpath(os.path.join(ROOT, d)) + "/"
 
-SUFFIX = {
-    File.C: ".c",
-    File.H: ".h",
-    File.MAKE: "Makefile",
-    File.PY: ".py",
-}
+DIRS = [to_abs(d) for d in RELDIRS]
+EXCLUDES = [to_abs(d) for d in REL_EXCLUDES]
+
+SUFFIXES = [
+    ".c",
+    ".h",
+    "Makefile",
+    ".mk",
+    ".py",
+]
 
 # License should certainly be in the first 10 KB.
 MAX_BYTES = 10000
@@ -69,10 +69,13 @@
     # From divsufsort
     "divsufsort.c",
     "divsufsort.h",
+    # License is slightly different because it references GitHub
+    "linux_zstd.h",
 }
 
 
 def valid_copyright(lines):
+    YEAR_REGEX = re.compile("\d\d\d\d|present")
     for line in lines:
         line = line.strip()
         if "Copyright" not in line:
@@ -81,8 +84,9 @@
             return (False, f"Copyright line '{line}' contains 'present'!")
         if "Facebook, Inc" not in line:
             return (False, f"Copyright line '{line}' does not contain 'Facebook, Inc'")
-        if YEAR_STR not in line:
-            return (False, f"Copyright line '{line}' does not contain {YEAR}")
+        year = YEAR_REGEX.search(line)
+        if year is not None:
+            return (False, f"Copyright line '{line}' contains {year.group(0)}; it should be yearless")
         if " (c) " not in line:
             return (False, f"Copyright line '{line}' does not contain ' (c) '!")
         return (True, "")
@@ -107,35 +111,45 @@
     with open(filename, "r") as f:
         lines = f.readlines(MAX_BYTES)
     lines = lines[:min(len(lines), MAX_LINES)]
-                
+
     ok = True
     if os.path.basename(filename) not in COPYRIGHT_EXCEPTIONS:
         c_ok, c_msg = valid_copyright(lines)
         if not c_ok:
-            print(f"{filename}: {c_msg}")
+            print(f"{filename}: {c_msg}", file=sys.stderr)
             ok = False
     if os.path.basename(filename) not in LICENSE_EXCEPTIONS:
         l_ok, l_msg = valid_license(lines)
         if not l_ok:
-            print(f"{filename}: {l_msg}")
+            print(f"{filename}: {l_msg}", file=sys.stderr)
             ok = False
     return ok
 
 
+def exclude(filename):
+    for x in EXCLUDES:
+        if filename.startswith(x):
+            return True
+    return False
+
 def main():
     invalid_files = []
     for directory in DIRS:
-        for suffix in SUFFIX.values():
-            files = set(glob.glob(f"{directory}/*{suffix}"))
-            files |= set(glob.glob(f"{directory}/**/*{suffix}"))
+        for suffix in SUFFIXES:
+            files = set(glob.glob(f"{directory}/**/*{suffix}", recursive=True))
             for filename in files:
+                if exclude(filename):
+                    continue
                 if not valid_file(filename):
                     invalid_files.append(filename)
     if len(invalid_files) > 0:
-        print(f"Invalid files: {invalid_files}")
+        print("Fail!", file=sys.stderr)
+        for f in invalid_files:
+            print(f)
+        return 1
     else:
-        print("Pass!")
-    return len(invalid_files)
+        print("Pass!", file=sys.stderr)
+        return 0
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py
index 4b2dc6a..c86af7d 100755
--- a/tests/test-zstd-versions.py
+++ b/tests/test-zstd-versions.py
@@ -2,7 +2,7 @@
 """Test zstd interoperability between versions"""
 
 # ################################################################
-# Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
 # This source code is licensed under both the BSD-style license (found in the
diff --git a/tests/zbufftest.c b/tests/zbufftest.c
deleted file mode 100644
index 557e622..0000000
--- a/tests/zbufftest.c
+++ /dev/null
@@ -1,625 +0,0 @@
-/*
- * Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-
-/*-************************************
-*  Compiler specific
-**************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define _CRT_SECURE_NO_WARNINGS     /* fgets */
-#  pragma warning(disable : 4127)     /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4146)     /* disable: C4146: minus unsigned expression */
-#endif
-
-
-/*-************************************
-*  Includes
-**************************************/
-#include <stdlib.h>       /* free */
-#include <stdio.h>        /* fgets, sscanf */
-#include <string.h>       /* strcmp */
-#include "timefn.h"       /* UTIL_time_t */
-#include "mem.h"
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_maxCLevel */
-#include "zstd.h"         /* ZSTD_compressBound */
-#define ZBUFF_STATIC_LINKING_ONLY  /* ZBUFF_createCCtx_advanced */
-#include "zbuff.h"        /* ZBUFF_isError */
-#include "datagen.h"      /* RDG_genBuffer */
-#define XXH_STATIC_LINKING_ONLY
-#include "xxhash.h"       /* XXH64_* */
-#include "util.h"
-#include "assert.h"
-
-
-/*-************************************
-*  Constants
-**************************************/
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
-static const U32 nbTestsDefault = 10000;
-#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
-#define FUZ_COMPRESSIBILITY_DEFAULT 50
-static const U32 prime1 = 2654435761U;
-static const U32 prime2 = 2246822519U;
-
-
-
-/*-************************************
-*  Display Macros
-**************************************/
-#define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static U32 g_displayLevel = 2;
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (g_displayLevel>=4)) \
-            { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stderr); } }
-
-static U64 g_clockTime = 0;
-
-
-/*-*******************************************************
-*  Fuzzer functions
-*********************************************************/
-#undef MIN
-#undef MAX
-#define MIN(a,b) ((a)<(b)?(a):(b))
-#define MAX(a,b) ((a)>(b)?(a):(b))
-/*! FUZ_rand() :
-    @return : a 27 bits random value, from a 32-bits `seed`.
-    `seed` is also modified */
-#  define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-static unsigned int FUZ_rand(unsigned int* seedPtr)
-{
-    U32 rand32 = *seedPtr;
-    rand32 *= prime1;
-    rand32 += prime2;
-    rand32  = FUZ_rotl32(rand32, 13);
-    *seedPtr = rand32;
-    return rand32 >> 5;
-}
-
-
-/*
-static unsigned FUZ_highbit32(U32 v32)
-{
-    unsigned nbBits = 0;
-    if (v32==0) return 0;
-    for ( ; v32 ; v32>>=1) nbBits++;
-    return nbBits;
-}
-*/
-
-static void* ZBUFF_allocFunction(void* opaque, size_t size)
-{
-    void* address = malloc(size);
-    (void)opaque;
-    /* DISPLAYLEVEL(4, "alloc %p, %d opaque=%p \n", address, (int)size, opaque); */
-    return address;
-}
-
-static void ZBUFF_freeFunction(void* opaque, void* address)
-{
-    (void)opaque;
-    /* if (address) DISPLAYLEVEL(4, "free %p opaque=%p \n", address, opaque); */
-    free(address);
-}
-
-static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
-{
-    int testResult = 0;
-    size_t CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
-    void* CNBuffer = malloc(CNBufferSize);
-    size_t const skippableFrameSize = 11;
-    size_t const compressedBufferSize = (8 + skippableFrameSize) + ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH);
-    void* compressedBuffer = malloc(compressedBufferSize);
-    size_t const decodedBufferSize = CNBufferSize;
-    void* decodedBuffer = malloc(decodedBufferSize);
-    size_t cSize, readSize, readSkipSize, genSize;
-    U32 testNb=0;
-    ZBUFF_CCtx* zc = ZBUFF_createCCtx_advanced(customMem);
-    ZBUFF_DCtx* zd = ZBUFF_createDCtx_advanced(customMem);
-
-    /* Create compressible test buffer */
-    if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
-        DISPLAY("Not enough memory, aborting\n");
-        goto _output_error;
-    }
-    RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
-
-    /* generate skippable frame */
-    MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START);
-    MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize);
-    cSize = skippableFrameSize + 8;
-
-    /* Basic compression test */
-    DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    ZBUFF_compressInitDictionary(zc, CNBuffer, 128 KB, 1);
-    readSize = CNBufferSize;
-    genSize = compressedBufferSize;
-    { size_t const r = ZBUFF_compressContinue(zc, ((char*)compressedBuffer)+cSize, &genSize, CNBuffer, &readSize);
-      if (ZBUFF_isError(r)) goto _output_error; }
-    if (readSize != CNBufferSize) goto _output_error;   /* entire input should be consumed */
-    cSize += genSize;
-    genSize = compressedBufferSize - cSize;
-    { size_t const r = ZBUFF_compressEnd(zc, ((char*)compressedBuffer)+cSize, &genSize);
-      if (r != 0) goto _output_error; }  /* error, or some data not flushed */
-    cSize += genSize;
-    DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
-
-    /* skippable frame test */
-    DISPLAYLEVEL(4, "test%3i : decompress skippable frame : ", testNb++);
-    ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB);
-    readSkipSize = cSize;
-    genSize = CNBufferSize;
-    { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, compressedBuffer, &readSkipSize);
-      if (r != 0) goto _output_error; }
-    if (genSize != 0) goto _output_error;   /* skippable frame len is 0 */
-    DISPLAYLEVEL(4, "OK \n");
-
-    /* Basic decompression test */
-    DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB);
-    readSize = cSize - readSkipSize;
-    genSize = CNBufferSize;
-    { size_t const r = ZBUFF_decompressContinue(zd, decodedBuffer, &genSize, ((char*)compressedBuffer)+readSkipSize, &readSize);
-      if (r != 0) goto _output_error; }  /* should reach end of frame == 0; otherwise, some data left, or an error */
-    if (genSize != CNBufferSize) goto _output_error;   /* should regenerate the same amount */
-    if (readSize+readSkipSize != cSize) goto _output_error;   /* should have read the entire frame */
-    DISPLAYLEVEL(4, "OK \n");
-
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedCInSize : ", testNb++); { assert(ZBUFF_recommendedCInSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedCOutSize : ", testNb++); { assert(ZBUFF_recommendedCOutSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedDInSize : ", testNb++); { assert(ZBUFF_recommendedDInSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-    DISPLAYLEVEL(4, "test%3i : ZBUFF_recommendedDOutSize : ", testNb++); { assert(ZBUFF_recommendedDOutSize() != 0); } DISPLAYLEVEL(4, "OK \n");
-
-    /* check regenerated data is byte exact */
-    DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-    {   size_t i;
-        for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
-    }   }
-    DISPLAYLEVEL(4, "OK \n");
-
-    /* Byte-by-byte decompression test */
-    DISPLAYLEVEL(4, "test%3i : decompress byte-by-byte : ", testNb++);
-    {   size_t r, pIn=0, pOut=0;
-        do
-        {   ZBUFF_decompressInitDictionary(zd, CNBuffer, 128 KB);
-            r = 1;
-            while (r) {
-                size_t inS = 1;
-                size_t outS = 1;
-                r = ZBUFF_decompressContinue(zd, ((BYTE*)decodedBuffer)+pOut, &outS, ((BYTE*)compressedBuffer)+pIn, &inS);
-                pIn += inS;
-                pOut += outS;
-            }
-            readSize = pIn;
-            genSize = pOut;
-        } while (genSize==0);
-    }
-    if (genSize != CNBufferSize) goto _output_error;   /* should regenerate the same amount */
-    if (readSize != cSize) goto _output_error;   /* should have read the entire frame */
-    DISPLAYLEVEL(4, "OK \n");
-
-    /* check regenerated data is byte exact */
-    DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-    {   size_t i;
-        for (i=0; i<CNBufferSize; i++) {
-            if (((BYTE*)decodedBuffer)[i] != ((BYTE*)CNBuffer)[i]) goto _output_error;
-    }   }
-    DISPLAYLEVEL(4, "OK \n");
-
-_end:
-    ZBUFF_freeCCtx(zc);
-    ZBUFF_freeDCtx(zd);
-    free(CNBuffer);
-    free(compressedBuffer);
-    free(decodedBuffer);
-    return testResult;
-
-_output_error:
-    testResult = 1;
-    DISPLAY("Error detected in Unit tests ! \n");
-    goto _end;
-}
-
-
-static size_t findDiff(const void* buf1, const void* buf2, size_t max)
-{
-    const BYTE* b1 = (const BYTE*)buf1;
-    const BYTE* b2 = (const BYTE*)buf2;
-    size_t u;
-    for (u=0; u<max; u++) {
-        if (b1[u] != b2[u]) break;
-    }
-    return u;
-}
-
-static size_t FUZ_rLogLength(U32* seed, U32 logLength)
-{
-    size_t const lengthMask = ((size_t)1 << logLength) - 1;
-    return (lengthMask+1) + (FUZ_rand(seed) & lengthMask);
-}
-
-static size_t FUZ_randomLength(U32* seed, U32 maxLog)
-{
-    U32 const logLength = FUZ_rand(seed) % maxLog;
-    return FUZ_rLogLength(seed, logLength);
-}
-
-#define CHECK(cond, ...) if (cond) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \
-                         DISPLAY(" (seed %u, test nb %u)  \n", seed, testNb); goto _output_error; }
-
-static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
-{
-    static const U32 maxSrcLog = 24;
-    static const U32 maxSampleLog = 19;
-    BYTE* cNoiseBuffer[5];
-    size_t const srcBufferSize = (size_t)1<<maxSrcLog;
-    BYTE* copyBuffer;
-    size_t const copyBufferSize= srcBufferSize + (1<<maxSampleLog);
-    BYTE* cBuffer;
-    size_t const cBufferSize   = ZSTD_compressBound(srcBufferSize);
-    BYTE* dstBuffer;
-    size_t dstBufferSize = srcBufferSize;
-    U32 result = 0;
-    U32 testNb = 0;
-    U32 coreSeed = seed;
-    ZBUFF_CCtx* zc;
-    ZBUFF_DCtx* zd;
-    UTIL_time_t startClock = UTIL_getTime();
-
-    /* allocations */
-    zc = ZBUFF_createCCtx();
-    zd = ZBUFF_createDCtx();
-    cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
-    cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
-    copyBuffer= (BYTE*)malloc (copyBufferSize);
-    dstBuffer = (BYTE*)malloc (dstBufferSize);
-    cBuffer   = (BYTE*)malloc (cBufferSize);
-    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4] ||
-           !copyBuffer || !dstBuffer || !cBuffer || !zc || !zd,
-           "Not enough memory, fuzzer tests cancelled");
-
-    /* Create initial samples */
-    RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed);    /* pure noise */
-    RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed);    /* barely compressible */
-    RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
-    RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed);    /* highly compressible */
-    RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed);    /* sparse content */
-    memset(copyBuffer, 0x65, copyBufferSize);                             /* make copyBuffer considered initialized */
-
-    /* catch up testNb */
-    for (testNb=1; testNb < startTest; testNb++)
-        FUZ_rand(&coreSeed);
-
-    /* test loop */
-    for ( ; (testNb <= nbTests) || (UTIL_clockSpanMicro(startClock) < g_clockTime) ; testNb++ ) {
-        U32 lseed;
-        const BYTE* srcBuffer;
-        const BYTE* dict;
-        size_t maxTestSize, dictSize;
-        size_t cSize, totalTestSize, totalCSize, totalGenSize;
-        size_t errorCode;
-        U32 n, nbChunks;
-        XXH64_state_t xxhState;
-        U64 crcOrig;
-
-        /* init */
-        DISPLAYUPDATE(2, "\r%6u", testNb);
-        if (nbTests >= testNb) DISPLAYUPDATE(2, "/%6u   ", nbTests);
-        FUZ_rand(&coreSeed);
-        lseed = coreSeed ^ prime1;
-
-        /* states full reset (unsynchronized) */
-        /* some issues only happen when reusing states in a specific sequence of parameters */
-        if ((FUZ_rand(&lseed) & 0xFF) == 131) { ZBUFF_freeCCtx(zc); zc = ZBUFF_createCCtx(); }
-        if ((FUZ_rand(&lseed) & 0xFF) == 132) { ZBUFF_freeDCtx(zd); zd = ZBUFF_createDCtx(); }
-
-        /* srcBuffer selection [0-4] */
-        {   U32 buffNb = FUZ_rand(&lseed) & 0x7F;
-            if (buffNb & 7) buffNb=2;   /* most common : compressible (P) */
-            else {
-                buffNb >>= 3;
-                if (buffNb & 7) {
-                    const U32 tnb[2] = { 1, 3 };   /* barely/highly compressible */
-                    buffNb = tnb[buffNb >> 3];
-                } else {
-                    const U32 tnb[2] = { 0, 4 };   /* not compressible / sparse */
-                    buffNb = tnb[buffNb >> 3];
-            }   }
-            srcBuffer = cNoiseBuffer[buffNb];
-        }
-
-        /* compression init */
-        {   U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
-            U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1;
-            maxTestSize = FUZ_rLogLength(&lseed, testLog);
-            dictSize  = (FUZ_rand(&lseed)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0;
-            /* random dictionary selection */
-            {   size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize);
-                dict = srcBuffer + dictStart;
-            }
-            {   ZSTD_parameters params = ZSTD_getParams(cLevel, 0, dictSize);
-                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
-                {   size_t const initError = ZBUFF_compressInit_advanced(zc, dict, dictSize, params, ZSTD_CONTENTSIZE_UNKNOWN);
-                    CHECK (ZBUFF_isError(initError),"init error : %s", ZBUFF_getErrorName(initError));
-        }   }   }
-
-        /* multi-segments compression test */
-        XXH64_reset(&xxhState, 0);
-        nbChunks    = (FUZ_rand(&lseed) & 127) + 2;
-        for (n=0, cSize=0, totalTestSize=0 ; (n<nbChunks) && (totalTestSize < maxTestSize) ; n++) {
-            /* compress random chunk into random size dst buffer */
-            {   size_t readChunkSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - readChunkSize);
-
-                size_t const compressionError = ZBUFF_compressContinue(zc, cBuffer+cSize, &dstBuffSize, srcBuffer+srcStart, &readChunkSize);
-                CHECK (ZBUFF_isError(compressionError), "compression error : %s", ZBUFF_getErrorName(compressionError));
-
-                XXH64_update(&xxhState, srcBuffer+srcStart, readChunkSize);
-                memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, readChunkSize);
-                cSize += dstBuffSize;
-                totalTestSize += readChunkSize;
-            }
-
-            /* random flush operation, to mess around */
-            if ((FUZ_rand(&lseed) & 15) == 0) {
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                size_t const flushError = ZBUFF_compressFlush(zc, cBuffer+cSize, &dstBuffSize);
-                CHECK (ZBUFF_isError(flushError), "flush error : %s", ZBUFF_getErrorName(flushError));
-                cSize += dstBuffSize;
-        }   }
-
-        /* final frame epilogue */
-        {   size_t remainingToFlush = (size_t)(-1);
-            while (remainingToFlush) {
-                size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t dstBuffSize = MIN(cBufferSize - cSize, randomDstSize);
-                U32 const enoughDstSize = dstBuffSize >= remainingToFlush;
-                remainingToFlush = ZBUFF_compressEnd(zc, cBuffer+cSize, &dstBuffSize);
-                CHECK (ZBUFF_isError(remainingToFlush), "flush error : %s", ZBUFF_getErrorName(remainingToFlush));
-                CHECK (enoughDstSize && remainingToFlush, "ZBUFF_compressEnd() not fully flushed (%u remaining), but enough space available", (U32)remainingToFlush);
-                cSize += dstBuffSize;
-        }   }
-        crcOrig = XXH64_digest(&xxhState);
-
-        /* multi - fragments decompression test */
-        ZBUFF_decompressInitDictionary(zd, dict, dictSize);
-        errorCode = 1;
-        for (totalCSize = 0, totalGenSize = 0 ; errorCode ; ) {
-            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
-            errorCode = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
-            CHECK (ZBUFF_isError(errorCode), "decompression error : %s", ZBUFF_getErrorName(errorCode));
-            totalGenSize += dstBuffSize;
-            totalCSize += readCSrcSize;
-        }
-        CHECK (errorCode != 0, "frame not fully decoded");
-        CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
-        CHECK (totalCSize != cSize, "compressed data should be fully read")
-        { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0);
-          if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize);
-          CHECK (crcDest!=crcOrig, "decompressed data corrupted"); }
-
-        /*=====   noisy/erroneous src decompression test   =====*/
-
-        /* add some noise */
-        {   U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2;
-            U32 nn; for (nn=0; nn<nbNoiseChunks; nn++) {
-                size_t const randomNoiseSize = FUZ_randomLength(&lseed, maxSampleLog);
-                size_t const noiseSize  = MIN((cSize/3) , randomNoiseSize);
-                size_t const noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseSize);
-                size_t const cStart = FUZ_rand(&lseed) % (cSize - noiseSize);
-                memcpy(cBuffer+cStart, srcBuffer+noiseStart, noiseSize);
-        }   }
-
-        /* try decompression on noisy data */
-        ZBUFF_decompressInit(zd);
-        totalCSize = 0;
-        totalGenSize = 0;
-        while ( (totalCSize < cSize) && (totalGenSize < dstBufferSize) ) {
-            size_t readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog);
-            size_t dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize);
-            size_t const decompressError = ZBUFF_decompressContinue(zd, dstBuffer+totalGenSize, &dstBuffSize, cBuffer+totalCSize, &readCSrcSize);
-            if (ZBUFF_isError(decompressError)) break;   /* error correctly detected */
-            totalGenSize += dstBuffSize;
-            totalCSize += readCSrcSize;
-    }   }
-    DISPLAY("\r%u fuzzer tests completed   \n", testNb);
-
-_cleanup:
-    ZBUFF_freeCCtx(zc);
-    ZBUFF_freeDCtx(zd);
-    free(cNoiseBuffer[0]);
-    free(cNoiseBuffer[1]);
-    free(cNoiseBuffer[2]);
-    free(cNoiseBuffer[3]);
-    free(cNoiseBuffer[4]);
-    free(copyBuffer);
-    free(cBuffer);
-    free(dstBuffer);
-    return result;
-
-_output_error:
-    result = 1;
-    goto _cleanup;
-}
-
-
-/*-*******************************************************
-*  Command line
-*********************************************************/
-static int FUZ_usage(const char* programName)
-{
-    DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [args]\n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "Arguments :\n");
-    DISPLAY( " -i#    : Nb of tests (default:%u) \n", nbTestsDefault);
-    DISPLAY( " -s#    : Select seed (default:prompt user)\n");
-    DISPLAY( " -t#    : Select starting test number (default:0)\n");
-    DISPLAY( " -P#    : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT);
-    DISPLAY( " -v     : verbose\n");
-    DISPLAY( " -p     : pause at the end\n");
-    DISPLAY( " -h     : display help and exit\n");
-    return 0;
-}
-
-
-int main(int argc, const char** argv)
-{
-    U32 seed=0;
-    int seedset=0;
-    int argNb;
-    int nbTests = nbTestsDefault;
-    int testNb = 0;
-    int proba = FUZ_COMPRESSIBILITY_DEFAULT;
-    int result=0;
-    U32 mainPause = 0;
-    const char* programName = argv[0];
-    ZSTD_customMem customMem = { ZBUFF_allocFunction, ZBUFF_freeFunction, NULL };
-    ZSTD_customMem customNULL = { NULL, NULL, NULL };
-
-    /* Check command line */
-    for(argNb=1; argNb<argc; argNb++) {
-        const char* argument = argv[argNb];
-        if(!argument) continue;   /* Protection if argument empty */
-
-        /* Parsing commands. Aggregated commands are allowed */
-        if (argument[0]=='-') {
-            argument++;
-
-            while (*argument!=0) {
-                switch(*argument)
-                {
-                case 'h':
-                    return FUZ_usage(programName);
-                case 'v':
-                    argument++;
-                    g_displayLevel=4;
-                    break;
-                case 'q':
-                    argument++;
-                    g_displayLevel--;
-                    break;
-                case 'p': /* pause at the end */
-                    argument++;
-                    mainPause = 1;
-                    break;
-
-                case 'i':
-                    argument++;
-                    nbTests=0; g_clockTime=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        nbTests *= 10;
-                        nbTests += *argument - '0';
-                        argument++;
-                    }
-                    break;
-
-                case 'T':
-                    argument++;
-                    nbTests=0; g_clockTime=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        g_clockTime *= 10;
-                        g_clockTime += *argument - '0';
-                        argument++;
-                    }
-                    if (*argument=='m') g_clockTime *=60, argument++;
-                    if (*argument=='n') argument++;
-                    g_clockTime *= SEC_TO_MICRO;
-                    break;
-
-                case 's':
-                    argument++;
-                    seed=0;
-                    seedset=1;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        seed *= 10;
-                        seed += *argument - '0';
-                        argument++;
-                    }
-                    break;
-
-                case 't':
-                    argument++;
-                    testNb=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        testNb *= 10;
-                        testNb += *argument - '0';
-                        argument++;
-                    }
-                    break;
-
-                case 'P':   /* compressibility % */
-                    argument++;
-                    proba=0;
-                    while ((*argument>='0') && (*argument<='9')) {
-                        proba *= 10;
-                        proba += *argument - '0';
-                        argument++;
-                    }
-                    if (proba<0) proba=0;
-                    if (proba>100) proba=100;
-                    break;
-
-                default:
-                    return FUZ_usage(programName);
-                }
-    }   }   }   /* for(argNb=1; argNb<argc; argNb++) */
-
-    /* Get Seed */
-    DISPLAY("Starting zstd_buffered tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), ZSTD_VERSION_STRING);
-
-    if (!seedset) {
-        time_t const t = time(NULL);
-        U32 const h = XXH32(&t, sizeof(t), 1);
-        seed = h % 10000;
-    }
-    DISPLAY("Seed = %u\n", seed);
-    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);
-
-    if (nbTests<=0) nbTests=1;
-
-    if (testNb==0) {
-        result = basicUnitTests(0, ((double)proba) / 100, customNULL);  /* constant seed for predictability */
-        if (!result) {
-            DISPLAYLEVEL(4, "Unit tests using customMem :\n")
-            result = basicUnitTests(0, ((double)proba) / 100, customMem);  /* use custom memory allocation functions */
-    }   }
-
-    if (!result)
-        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
-
-    if (mainPause) {
-        int unused;
-        DISPLAY("Press Enter \n");
-        unused = getchar();
-        (void)unused;
-    }
-    return result;
-}
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 688fec6..bbef903 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,6 +28,7 @@
 #include <assert.h>       /* assert */
 #include "timefn.h"       /* UTIL_time_t, UTIL_getTime */
 #include "mem.h"
+#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, we still test some deprecated functions */
 #define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
 #include "zstd.h"         /* ZSTD_compressBound */
 #include "zstd_errors.h"  /* ZSTD_error_srcSize_wrong */
@@ -321,7 +322,9 @@
 
     /* Basic compression test using dict */
     DISPLAYLEVEL(3, "test%3i : skipframe + compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    CHECK_Z( ZSTD_initCStream_usingDict(zc, CNBuffer, dictSize, 1 /* cLevel */) );
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_loadDictionary(zc, CNBuffer, dictSize) );
     outBuff.dst = (char*)(compressedBuffer)+cSize;
     assert(compressedBufferSize > cSize);
     outBuff.size = compressedBufferSize - cSize;
@@ -368,7 +371,7 @@
     }
 
     /* Attempt bad compression parameters */
-    DISPLAYLEVEL(3, "test%3i : use bad compression parameters : ", testNb++);
+    DISPLAYLEVEL(3, "test%3i : use bad compression parameters with ZSTD_initCStream_advanced : ", testNb++);
     {   size_t r;
         ZSTD_parameters params = ZSTD_getParams(1, 0, 0);
         params.cParams.minMatch = 2;
@@ -539,7 +542,10 @@
     DISPLAYLEVEL(3, "OK\n");
     /* _srcSize compression test */
     DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
-    CHECK_Z( ZSTD_initCStream_srcSize(zc, 1, CNBufferSize) );
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize) );
     outBuff.dst = (char*)(compressedBuffer);
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -559,7 +565,10 @@
 
     /* wrong _srcSize compression test */
     DISPLAYLEVEL(3, "test%3i : too large srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1);
-    ZSTD_initCStream_srcSize(zc, 1, CNBufferSize+1);
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize+1) );
     outBuff.dst = (char*)(compressedBuffer);
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -574,7 +583,10 @@
 
     /* wrong _srcSize compression test */
     DISPLAYLEVEL(3, "test%3i : too small srcSize : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1);
-    ZSTD_initCStream_srcSize(zc, 1, CNBufferSize-1);
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_refCDict(zc, NULL) );
+    CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 1) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize-1) );
     outBuff.dst = (char*)(compressedBuffer);
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -587,9 +599,9 @@
     }
 
     DISPLAYLEVEL(3, "test%3i : wrong srcSize !contentSizeFlag : %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH-1);
-    {   ZSTD_parameters params = ZSTD_getParams(1, CNBufferSize, 0);
-        params.fParams.contentSizeFlag = 0;
-        CHECK_Z(ZSTD_initCStream_advanced(zc, NULL, 0, params, CNBufferSize - MIN(CNBufferSize, 200 KB)));
+    {   CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_contentSizeFlag, 0) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, CNBufferSize - MIN(CNBufferSize, 200 KB)) );
         outBuff.dst = (char*)compressedBuffer;
         outBuff.size = compressedBufferSize;
         outBuff.pos = 0;
@@ -609,7 +621,9 @@
     /* use 1 */
     {   size_t const inSize = 513;
         DISPLAYLEVEL(5, "use1 ");
-        ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize);   /* needs btopt + search3 to trigger hashLog3 */
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 19) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, inSize) );
         inBuff.src = CNBuffer;
         inBuff.size = inSize;
         inBuff.pos = 0;
@@ -626,7 +640,9 @@
     /* use 2 */
     {   size_t const inSize = 1025;   /* will not continue, because tables auto-adjust and are therefore different size */
         DISPLAYLEVEL(5, "use2 ");
-        ZSTD_initCStream_advanced(zc, NULL, 0, ZSTD_getParams(19, inSize, 0), inSize);   /* needs btopt + search3 to trigger hashLog3 */
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, 19) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, inSize) );
         inBuff.src = CNBuffer;
         inBuff.size = inSize;
         inBuff.pos = 0;
@@ -672,7 +688,7 @@
     cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1);
     CHECK_Z(cSize);
     {   ZSTD_DCtx* dctx = ZSTD_createDCtx();
-        size_t const dctxSize0 = ZSTD_sizeof_DCtx(dctx);        
+        size_t const dctxSize0 = ZSTD_sizeof_DCtx(dctx);
         size_t dctxSize1;
         CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 1));
 
@@ -735,7 +751,7 @@
             CHECK(ZSTD_getErrorCode(r) != ZSTD_error_dstBuffer_wrong, "Must error but got %s", ZSTD_getErrorName(r));
         }
         DISPLAYLEVEL(3, "OK \n");
-        
+
         DISPLAYLEVEL(3, "test%3i : ZSTD_decompressStream() buffered output : ", testNb++);
         ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
         CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_stableOutBuffer, 0));
@@ -1274,7 +1290,7 @@
     if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error;
     DISPLAYLEVEL(3, "OK \n");
 
-    DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly : ", testNb++);
+    DISPLAYLEVEL(3, "test%3i : pledgedSrcSize == 0 behaves properly with ZSTD_initCStream_advanced : ", testNb++);
     {   ZSTD_parameters params = ZSTD_getParams(5, 0, 0);
         params.fParams.contentSizeFlag = 1;
         CHECK_Z( ZSTD_initCStream_advanced(zc, NULL, 0, params, 0) );
@@ -1290,7 +1306,8 @@
     cSize = outBuff.pos;
     if (ZSTD_findDecompressedSize(compressedBuffer, cSize) != 0) goto _output_error;
 
-    ZSTD_resetCStream(zc, 0); /* resetCStream should treat 0 as unknown */
+    CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+    CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, ZSTD_CONTENTSIZE_UNKNOWN) );
     outBuff.dst = compressedBuffer;
     outBuff.size = compressedBufferSize;
     outBuff.pos = 0;
@@ -1434,7 +1451,8 @@
         CHECK_Z(ZSTD_initCStream_srcSize(zc, 11, ZSTD_CONTENTSIZE_UNKNOWN));
         CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_compressionLevel, &level));
         CHECK(level != 11, "Compression level does not match");
-        ZSTD_resetCStream(zc, ZSTD_CONTENTSIZE_UNKNOWN);
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, ZSTD_CONTENTSIZE_UNKNOWN) );
         CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_compressionLevel, &level));
         CHECK(level != 11, "Compression level does not match");
     }
@@ -1444,7 +1462,8 @@
     {   ZSTD_parameters const params = ZSTD_getParams(9, 0, 0);
         CHECK_Z(ZSTD_initCStream_advanced(zc, NULL, 0, params, ZSTD_CONTENTSIZE_UNKNOWN));
         CHECK(badParameters(zc, params), "Compression parameters do not match");
-        ZSTD_resetCStream(zc, ZSTD_CONTENTSIZE_UNKNOWN);
+        CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+        CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, ZSTD_CONTENTSIZE_UNKNOWN) );
         CHECK(badParameters(zc, params), "Compression parameters do not match");
     }
     DISPLAYLEVEL(3, "OK \n");
@@ -1836,8 +1855,9 @@
             && oldTestLog /* at least one test happened */ && resetAllowed) {
             maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2);
             maxTestSize = MIN(maxTestSize, srcBufferSize-16);
-            {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize;
-                CHECK_Z( ZSTD_resetCStream(zc, pledgedSrcSize) );
+            {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; 
+                CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+                CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) );
             }
         } else {
             U32 const testLog = FUZ_rand(&lseed) % maxSrcLog;
@@ -1855,11 +1875,13 @@
                 dict = srcBuffer + dictStart;
             }
             {   U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize;
-                ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize);
-                params.fParams.checksumFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
-                params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1;
-                CHECK_Z ( ZSTD_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) );
+                CHECK_Z( ZSTD_CCtx_reset(zc, ZSTD_reset_session_only) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_compressionLevel, cLevel) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, FUZ_rand(&lseed) & 1) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_contentSizeFlag, FUZ_rand(&lseed) & 1) );
+                CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_c_dictIDFlag, FUZ_rand(&lseed) & 1) );
+                CHECK_Z( ZSTD_CCtx_setPledgedSrcSize(zc, pledgedSrcSize) );
+                CHECK_Z( ZSTD_CCtx_loadDictionary(zc, dict, dictSize) );
         }   }
 
         /* multi-segments compression test */
@@ -2215,6 +2237,7 @@
                 }
 
                 if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
+                if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) );
 
                 /* Apply parameters */
                 if (opaqueAPI) {
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index 127f611..5993e51 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -264,9 +264,22 @@
                     ZSTD_outBuffer outBuffer;
                     ZSTD_CStream* zbc = ZSTD_createCStream();
                     size_t rSize;
+                    ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
+
+                    if (!cctxParams) EXM_THROW(1, "ZSTD_createCCtxParams() allocation failure");
                     if (zbc == NULL) EXM_THROW(1, "ZSTD_createCStream() allocation failure");
-                    rSize = ZSTD_initCStream_advanced(zbc, dictBuffer, dictBufferSize, zparams, avgSize);
-                    if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_initCStream_advanced() failed : %s", ZSTD_getErrorName(rSize));
+
+                    {   int initErr = 0;
+                        initErr |= ZSTD_isError(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only));
+                        initErr |= ZSTD_isError(ZSTD_CCtxParams_init_advanced(cctxParams, zparams));
+                        initErr |= ZSTD_isError(ZSTD_CCtx_setParametersUsingCCtxParams(zbc, cctxParams));
+                        initErr |= ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(zbc, avgSize));
+                        initErr |= ZSTD_isError(ZSTD_CCtx_loadDictionary(zbc, dictBuffer, dictBufferSize));
+
+                        ZSTD_freeCCtxParams(cctxParams);
+                        if (initErr) EXM_THROW(1, "CCtx init failed!");
+                    }
+
                     do {
                         U32 blockNb;
                         for (blockNb=0; blockNb<nbBlocks; blockNb++) {
diff --git a/zlibWrapper/zstd_zlibwrapper.c b/zlibWrapper/zstd_zlibwrapper.c
index b73bf59..ceb2393 100644
--- a/zlibWrapper/zstd_zlibwrapper.c
+++ b/zlibWrapper/zstd_zlibwrapper.c
@@ -205,12 +205,21 @@
     if (zwc == NULL || zwc->zbc == NULL) return Z_STREAM_ERROR;
 
     if (!pledgedSrcSize) pledgedSrcSize = zwc->pledgedSrcSize;
-    {   ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize);
-        size_t initErr;
+    {   unsigned initErr = 0;
+        ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize);
+        ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
+        if (!cctxParams) return Z_STREAM_ERROR;
         LOG_WRAPPERC("pledgedSrcSize=%d windowLog=%d chainLog=%d hashLog=%d searchLog=%d minMatch=%d strategy=%d\n",
                     (int)pledgedSrcSize, params.cParams.windowLog, params.cParams.chainLog, params.cParams.hashLog, params.cParams.searchLog, params.cParams.minMatch, params.cParams.strategy);
-        initErr = ZSTD_initCStream_advanced(zwc->zbc, dict, dictSize, params, pledgedSrcSize);
-        if (ZSTD_isError(initErr)) return Z_STREAM_ERROR;
+
+        initErr |= ZSTD_isError(ZSTD_CCtx_reset(zwc->zbc, ZSTD_reset_session_only));
+        initErr |= ZSTD_isError(ZSTD_CCtxParams_init_advanced(cctxParams, params));
+        initErr |= ZSTD_isError(ZSTD_CCtx_setParametersUsingCCtxParams(zwc->zbc, cctxParams));
+        initErr |= ZSTD_isError(ZSTD_CCtx_setPledgedSrcSize(zwc->zbc, pledgedSrcSize));
+        initErr |= ZSTD_isError(ZSTD_CCtx_loadDictionary(zwc->zbc, dict, dictSize));
+
+        ZSTD_freeCCtxParams(cctxParams);
+        if (initErr) return Z_STREAM_ERROR;
     }
 
     return Z_OK;