Merge pull request #1443 from lz4/dictAPI promote dictionary API to stable GitOrigin-RevId: 76c48c1e43131c282b43e02efbb9a64378fbcffa Change-Id: I31d119de550f7aaf76654406837256f78d5b6ce4
diff --git a/lib/lz4.h b/lib/lz4.h index 5c79972..6202400 100644 --- a/lib/lz4.h +++ b/lib/lz4.h
@@ -129,8 +129,8 @@ /*------ Version ------*/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ -#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 5 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_MINOR 10 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) @@ -370,7 +370,7 @@ */ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); -/*! LZ4_loadDictSlow() : v1.9.5+ +/*! LZ4_loadDictSlow() : v1.10.0+ * Same as LZ4_loadDict(), * but uses a bit more cpu to reference the dictionary content more thoroughly. * This is expected to slightly improve compression ratio. @@ -379,6 +379,42 @@ */ LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); +/*! LZ4_attach_dictionary() : stable since v1.10.0 + * + * This allows efficient re-use of a static dictionary multiple times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references @dictionaryStream in-place. + * + * Several assumptions are made about the state of @dictionaryStream. + * Currently, only states which have been prepared by LZ4_loadDict() or + * LZ4_loadDictSlow() should be expected to work. + * + * Alternatively, the provided @dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. + * @dictionaryStream stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the compression session. + * + * Note: there is no equivalent LZ4_attach_*() method on the decompression side + * because there is no initialization cost, hence no need to share the cost across multiple sessions. + * To decompress LZ4 blocks using dictionary, attached or not, + * just employ the regular LZ4_setStreamDecode() for streaming, + * or the stateless LZ4_decompress_safe_usingDict() for one-shot decompression. + */ +LZ4LIB_API void +LZ4_attach_dictionary(LZ4_stream_t* workingStream, + const LZ4_stream_t* dictionaryStream); + /*! LZ4_compress_fast_continue() : * Compress 'src' content using data from previously compressed blocks, for better compression ratio. * 'dst' buffer must be already allocated. @@ -580,37 +616,6 @@ */ int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration); -/*! LZ4_attach_dictionary() : - * This is an experimental API that allows - * efficient use of a static dictionary many times. - * - * Rather than re-loading the dictionary buffer into a working context before - * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a - * working LZ4_stream_t, this function introduces a no-copy setup mechanism, - * in which the working stream references the dictionary stream in-place. - * - * Several assumptions are made about the state of the dictionary stream. - * Currently, only streams which have been prepared by LZ4_loadDict() should - * be expected to work. - * - * Alternatively, the provided dictionaryStream may be NULL, - * in which case any existing dictionary stream is unset. - * - * If a dictionary is provided, it replaces any pre-existing stream history. - * The dictionary contents are the only history that can be referenced and - * logically immediately precede the data compressed in the first subsequent - * compression call. - * - * The dictionary will only remain attached to the working stream through the - * first compression call, at the end of which it is cleared. The dictionary - * stream (and source buffer) must remain in-place / accessible / unchanged - * through the completion of the first compression call on the stream. - */ -LZ4LIB_STATIC_API void -LZ4_attach_dictionary(LZ4_stream_t* workingStream, - const LZ4_stream_t* dictionaryStream); - - /*! In-place compression and decompression * * It's possible to have input and output sharing the same buffer,
diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 88d59df..b8ae322 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h
@@ -513,6 +513,109 @@ LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx); /* always successful */ +/********************************** + * Dictionary compression API + *********************************/ + +/* A Dictionary is useful for the compression of small messages (KB range). + * It dramatically improves compression efficiency. + * + * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful. + * Better results are generally achieved by using Zstandard's Dictionary Builder + * to generate a high-quality dictionary from a set of samples. + * + * The same dictionary will have to be used on the decompression side + * for decoding to be successful. + * To help identify the correct dictionary at decoding stage, + * the frame header allows optional embedding of a dictID field. + */ + +/*! LZ4F_compressBegin_usingDict() : stable since v1.10 + * Inits dictionary compression streaming, and writes the frame header into dstBuffer. + * @dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. + * @prefsPtr is optional : one may provide NULL as argument, + * however, it's the only way to provide dictID in the frame header. + * @dictBuffer must outlive the compression session. + * @return : number of bytes written into dstBuffer for the header, + * or an error code (which can be tested using LZ4F_isError()) + * NOTE: The LZ4Frame spec allows each independent block to be compressed with the dictionary, + * but this entry supports a more limited scenario, where only the first block uses the dictionary. + * This is still useful for small data, which only need one block anyway. + * For larger inputs, one may be more interested in LZ4F_compressFrame_usingCDict() below. + */ +LZ4FLIB_API size_t +LZ4F_compressBegin_usingDict(LZ4F_cctx* cctx, + void* dstBuffer, size_t dstCapacity, + const void* dictBuffer, size_t dictSize, + const LZ4F_preferences_t* prefsPtr); + +/*! LZ4F_decompress_usingDict() : stable since v1.10 + * Same as LZ4F_decompress(), using a predefined dictionary. + * Dictionary is used "in place", without any preprocessing. +** It must remain accessible throughout the entire frame decoding. */ +LZ4FLIB_API size_t +LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr, + void* dstBuffer, size_t* dstSizePtr, + const void* srcBuffer, size_t* srcSizePtr, + const void* dict, size_t dictSize, + const LZ4F_decompressOptions_t* decompressOptionsPtr); + +/***************************************** + * Bulk processing dictionary compression + *****************************************/ + +/* Loading a dictionary has a cost, since it involves construction of tables. + * The Bulk processing dictionary API makes it possible to share this cost + * over an arbitrary number of compression jobs, even concurrently, + * markedly improving compression latency for these cases. + * + * Note that there is no corresponding bulk API for the decompression side, + * because dictionary does not carry any initialization cost for decompression. + * Use the regular LZ4F_decompress_usingDict() there. + */ +typedef struct LZ4F_CDict_s LZ4F_CDict; + +/*! LZ4_createCDict() : stable since v1.10 + * When compressing multiple messages / blocks using the same dictionary, it's recommended to initialize it just once. + * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. + * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after LZ4_CDict creation, since its content is copied within CDict. */ +LZ4FLIB_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize); +LZ4FLIB_API void LZ4F_freeCDict(LZ4F_CDict* CDict); + +/*! LZ4_compressFrame_usingCDict() : stable since v1.10 + * Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary. + * @cctx must point to a context created by LZ4F_createCompressionContext(). + * If @cdict==NULL, compress without a dictionary. + * @dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). + * If this condition is not respected, function will fail (@return an errorCode). + * The LZ4F_preferences_t structure is optional : one may provide NULL as argument, + * but it's not recommended, as it's the only way to provide @dictID in the frame header. + * @return : number of bytes written into dstBuffer. + * or an error code if it fails (can be tested using LZ4F_isError()) + * Note: for larger inputs generating multiple independent blocks, + * this entry point uses the dictionary for each block. */ +LZ4FLIB_API size_t +LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const LZ4F_CDict* cdict, + const LZ4F_preferences_t* preferencesPtr); + +/*! LZ4F_compressBegin_usingCDict() : stable since v1.10 + * Inits streaming dictionary compression, and writes the frame header into dstBuffer. + * @dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. + * @prefsPtr is optional : one may provide NULL as argument, + * note however that it's the only way to insert a @dictID in the frame header. + * @cdict must outlive the compression session. + * @return : number of bytes written into dstBuffer for the header, + * or an error code, which can be tested using LZ4F_isError(). */ +LZ4FLIB_API size_t +LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx, + void* dstBuffer, size_t dstCapacity, + const LZ4F_CDict* cdict, + const LZ4F_preferences_t* prefsPtr); + #if defined (__cplusplus) } @@ -613,104 +716,6 @@ const LZ4F_compressOptions_t* cOptPtr); /********************************** - * Dictionary compression API - *********************************/ - -/* A Dictionary is useful for the compression of small messages (KB range). - * It dramatically improves compression efficiency. - * - * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful. - * Better results are generally achieved by using Zstandard's Dictionary Builder - * to generate a high-quality dictionary from a set of samples. - * - * The same dictionary will have to be used on the decompression side - * for decoding to be successful. - * To help identify the correct dictionary at decoding stage, - * the frame header allows optional embedding of a dictID field. - */ - -/*! LZ4F_compressBegin_usingDict() : - * Inits dictionary compression streaming, and writes the frame header into dstBuffer. - * `dstCapacity` must be >= LZ4F_HEADER_SIZE_MAX bytes. - * `prefsPtr` is optional : you may provide NULL as argument, - * however, it's the only way to provide dictID in the frame header. - * `dictBuffer` must outlive the compression session. - * @return : number of bytes written into dstBuffer for the header, - * or an error code (which can be tested using LZ4F_isError()) - * NOTE: this entry point doesn't fully exploit the spec, - * which allows each independent block to be compressed with the dictionary. - * Currently, only the first block uses the dictionary. - * This is still technically compliant, but less efficient for large inputs. - */ -LZ4FLIB_STATIC_API size_t -LZ4F_compressBegin_usingDict(LZ4F_cctx* cctx, - void* dstBuffer, size_t dstCapacity, - const void* dictBuffer, size_t dictSize, - const LZ4F_preferences_t* prefsPtr); - -/*! LZ4F_decompress_usingDict() : - * Same as LZ4F_decompress(), using a predefined dictionary. - * Dictionary is used "in place", without any preprocessing. -** It must remain accessible throughout the entire frame decoding. */ -LZ4FLIB_STATIC_API size_t -LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr, - void* dstBuffer, size_t* dstSizePtr, - const void* srcBuffer, size_t* srcSizePtr, - const void* dict, size_t dictSize, - const LZ4F_decompressOptions_t* decompressOptionsPtr); - -/********************************** - * Bulk processing dictionary API - *********************************/ - -/* Loading a dictionary has a cost, since it involves construction of tables. - * The Bulk processing dictionary API makes it possible to share this cost - * over an arbitrary number of compression jobs, even concurrently, - * markedly improving compression latency for these cases. - */ -typedef struct LZ4F_CDict_s LZ4F_CDict; - -/*! LZ4_createCDict() : - * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once. - * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. - * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */ -LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize); -LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict); - -/*! LZ4_compressFrame_usingCDict() : - * Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary. - * cctx must point to a context created by LZ4F_createCompressionContext(). - * If cdict==NULL, compress without a dictionary. - * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). - * If this condition is not respected, function will fail (@return an errorCode). - * The LZ4F_preferences_t structure is optional : you may provide NULL as argument, - * but it's not recommended, as it's the only way to provide dictID in the frame header. - * @return : number of bytes written into dstBuffer. - * or an error code if it fails (can be tested using LZ4F_isError()) */ -LZ4FLIB_STATIC_API size_t -LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const LZ4F_CDict* cdict, - const LZ4F_preferences_t* preferencesPtr); - -/*! LZ4F_compressBegin_usingCDict() : - * Inits streaming dictionary compression, and writes the frame header into dstBuffer. - * `dstCapacity` must be >= LZ4F_HEADER_SIZE_MAX bytes. - * `prefsPtr` is optional : you may provide NULL as argument, - * however, it's the only way to provide dictID in the frame header. - * `cdict` must outlive the compression session. - * @return : number of bytes written into dstBuffer for the header, - * or an error code (which can be tested using LZ4F_isError()) */ -LZ4FLIB_STATIC_API size_t -LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx, - void* dstBuffer, size_t dstCapacity, - const LZ4F_CDict* cdict, - const LZ4F_preferences_t* prefsPtr); - - -/********************************** * Custom memory allocation *********************************/