Merge pull request #1699 from felixhandte/seekable-gitignore Add New Seekable Compression Example to .gitignore

commit: 2314906b688a08fa6efdcb1f44ee71903d9d444e [log] [tgz]
author: Felix Handte <w@felixhandte.com> Wed Jul 24 19:07:55 2019 -0400
committer: GitHub <noreply@github.com> Wed Jul 24 19:07:55 2019 -0400
tree: ae0b0e1b38ab10e72468ce2833ee425a1f734ef7
parent: 1f16245e9ea9a4f9231d93493c6e913e795159cc [diff]
parent: 15da57820d0c6efa72648bd8171c0856e3fbfd5d [diff]
diff --git a/CHANGELOG b/CHANGELOG
index a3a1f61..e5b5afd 100644
--- a/CHANGELOG
+++ b/CHANGELOG

@@ -1,3 +1,9 @@
+v1.4.2
+bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
+bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
+misc: Validate blocks are smaller than size limit by @vivekmg (#1685)
+misc: Restructure source files by @ephiepark (#1679)
+
 v1.4.1
 bug: Fix data corruption in niche use cases by @terrelln (#1659)
 bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595)

diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c
index dcc186b..627a691 100644
--- a/contrib/largeNbDicts/largeNbDicts.c
+++ b/contrib/largeNbDicts/largeNbDicts.c

@@ -559,7 +559,7 @@
         CONTROL(BMK_isSuccessful_runOutcome(outcome));
 
         BMK_runTime_t const result = BMK_extract_runTime(outcome);
-        U64 const dTime_ns = result.nanoSecPerRun;
+        double const dTime_ns = result.nanoSecPerRun;
         double const dTime_sec = (double)dTime_ns / 1000000000;
         size_t const srcSize = result.sumOfReturn;
         double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index f281120..6d8e74f 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html

@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.1 Manual</title>
+<title>zstd 1.4.2 Manual</title>
 </head>
 <body>
-<h1>zstd 1.4.1 Manual</h1>
+<h1>zstd 1.4.2 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>

diff --git a/lib/zstd.h b/lib/zstd.h
index a1910ee..4a1f816 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h

@@ -71,7 +71,7 @@
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  1
+#define ZSTD_VERSION_RELEASE  2
 
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */

diff --git a/programs/README.md b/programs/README.md
index d9ef5dd..c3a5590 100644
--- a/programs/README.md
+++ b/programs/README.md

@@ -157,8 +157,8 @@
 
 Dictionary builder :
 --train ## : create a dictionary from a training set of files
---train-cover[=k=#,d=#,steps=#,split=#] : use the cover algorithm with optional args
---train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#] : use the fastcover algorithm with optional args
+--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args
+--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,shrink[=#],accel=#] : use the fastcover algorithm with optional args
 --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9)
  -o file : `file` is dictionary name (default: dictionary)
 --maxdict=# : limit dictionary to specified size (default: 112640)

diff --git a/programs/zstd.1 b/programs/zstd.1
index beca9da..b25d353 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1

@@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "July 2019" "zstd 1.4.1" "User Commands"
+.TH "ZSTD" "1" "July 2019" "zstd 1.4.2" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@@ -229,11 +229,11 @@
 A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
 .
 .TP
-\fB\-\-train\-cover[=k#,d=#,steps=#,split=#]\fR
-Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\.
+\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
 .
 .IP
-Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
 .
 .IP
 Examples:
@@ -253,6 +253,12 @@
 .IP
 \fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
 .
+.IP
+\fBzstd \-\-train\-cover=shrink FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
+.
 .TP
 \fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
 Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.

diff --git a/programs/zstd.1.md b/programs/zstd.1.md
index 93c6fa4..3ab2667 100644
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md

@@ -244,13 +244,15 @@
     This compares favorably to 4 bytes default.
     However, it's up to the dictionary manager to not assign twice the same ID to
     2 different dictionaries.
-* `--train-cover[=k#,d=#,steps=#,split=#]`:
+* `--train-cover[=k#,d=#,steps=#,split=#,shrink[=#]]`:
     Select parameters for the default dictionary builder algorithm named cover.
     If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
     If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
     If _steps_ is not specified, then the default value of 40 is used.
     If _split_ is not specified or split <= 0, then the default value of 100 is used.
     Requires that _d_ <= _k_.
+    If _shrink_ flag is not used, then the default value for _shrinkDict_ of 0 is used.
+    If _shrink_ is not specified, then the default value for _shrinkDictMaxRegression_ of 1 is used.
 
     Selects segments of size _k_ with highest score to put in the dictionary.
     The score of a segment is computed by the sum of the frequencies of all the
@@ -262,6 +264,9 @@
     If _split_ is 100, all input samples are used for both training and testing
     to find optimal _d_ and _k_ to build dictionary.
     Supports multithreading if `zstd` is compiled with threading support.
+    Having _shrink_ enabled takes a truncated dictionary of minimum size and doubles
+    in size until compression ratio of the truncated dictionary is at most
+    _shrinkDictMaxRegression%_ worse than the compression ratio of the largest dictionary.
 
     Examples:
 
@@ -275,6 +280,10 @@
 
     `zstd --train-cover=k=50,split=60 FILEs`
 
+    `zstd --train-cover=shrink FILEs`
+
+    `zstd --train-cover=shrink=2 FILEs`
+
 * `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`:
     Same as cover but with extra parameters _f_ and _accel_ and different default value of split
     If _split_ is not specified, then it tries _split_ = 75.

diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index a13c924..de286cd 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c

@@ -294,13 +294,14 @@
 
 
 #ifndef ZSTD_NODICT
+
+static const unsigned kDefaultRegression = 1;
 /**
  * parseCoverParameters() :
  * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params
  * @return 1 means that cover parameters were correct
  * @return 0 in case of malformed parameters
  */
-static const unsigned kDefaultRegression = 1;
 static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
 {
     memset(params, 0, sizeof(*params));

diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
index d0a0292..0bc6ed7 100644
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1

@@ -1,5 +1,5 @@
 .
-.TH "ZSTDGREP" "1" "July 2019" "zstd 1.4.1" "User Commands"
+.TH "ZSTDGREP" "1" "July 2019" "zstd 1.4.2" "User Commands"
 .
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files

diff --git a/programs/zstdless.1 b/programs/zstdless.1
index 4e21d5a..73e9504 100644
--- a/programs/zstdless.1
+++ b/programs/zstdless.1

@@ -1,5 +1,5 @@
 .
-.TH "ZSTDLESS" "1" "July 2019" "zstd 1.4.1" "User Commands"
+.TH "ZSTDLESS" "1" "July 2019" "zstd 1.4.2" "User Commands"
 .
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
commit	2314906b688a08fa6efdcb1f44ee71903d9d444e	[log] [tgz]
author	Felix Handte <w@felixhandte.com>	Wed Jul 24 19:07:55 2019 -0400
committer	GitHub <noreply@github.com>	Wed Jul 24 19:07:55 2019 -0400
tree	ae0b0e1b38ab10e72468ce2833ee425a1f734ef7
parent	1f16245e9ea9a4f9231d93493c6e913e795159cc [diff]
parent	15da57820d0c6efa72648bd8171c0856e3fbfd5d [diff]