blob: 3ad8857460908fb5e2a60200cee6bd9b9fad829e [file] [log] [blame]
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* strcmp, strlen */
#include <errno.h> /* errno */
#include <ctype.h>
#include "random.h"
#include "io.h"
#include "util.h"
#include "zdict.h"
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (displayLevel>=4) fflush(stderr); } } }
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAY("Error %i : ", error); \
DISPLAY(__VA_ARGS__); \
DISPLAY("\n"); \
exit(error); \
}
/*-*************************************
* Constants
***************************************/
static const unsigned g_defaultMaxDictSize = 110 KB;
#define DEFAULT_CLEVEL 3
#define DEFAULT_k 200
#define DEFAULT_OUTPUTFILE "defaultDict"
#define DEFAULT_DICTID 0
/*-*************************************
* RANDOM
***************************************/
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
unsigned maxDictSize,
ZDICT_random_params_t *params) {
unsigned const displayLevel = params->zParams.notificationLevel;
void* const dictBuffer = malloc(maxDictSize);
int result = 0;
/* Checks */
if (!dictBuffer)
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
{ size_t dictSize;
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *params);
DISPLAYLEVEL(2, "k=%u\n", params->k);
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _done;
}
/* save dict */
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
saveDict(dictFileName, dictBuffer, dictSize);
}
/* clean up */
_done:
free(dictBuffer);
return result;
}
int main(int argCount, const char* argv[])
{
int displayLevel = 2;
const char* programName = argv[0];
int operationResult = 0;
/* Initialize arguments to default values */
unsigned k = DEFAULT_k;
const char* outputFile = DEFAULT_OUTPUTFILE;
unsigned dictID = DEFAULT_DICTID;
unsigned maxDictSize = g_defaultMaxDictSize;
/* Initialize table to store input files */
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
unsigned filenameIdx = 0;
/* Parse arguments */
for (int i = 1; i < argCount; i++) {
const char* argument = argv[i];
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "in=")) {
filenameTable[filenameIdx] = argument;
filenameIdx++;
continue;
}
if (longCommandWArg(&argument, "out=")) {
outputFile = argument;
continue;
}
DISPLAYLEVEL(1, "Incorrect parameters\n");
operationResult = 1;
return operationResult;
}
char* fileNamesBuf = NULL;
unsigned fileNamesNb = filenameIdx;
int followLinks = 0; /* follow directory recursively */
const char** extendedFileList = NULL;
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
&fileNamesNb, followLinks);
if (extendedFileList) {
unsigned u;
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
free((void*)filenameTable);
filenameTable = extendedFileList;
filenameIdx = fileNamesNb;
}
size_t blockSize = 0;
ZDICT_random_params_t params;
ZDICT_params_t zParams;
zParams.compressionLevel = DEFAULT_CLEVEL;
zParams.notificationLevel = displayLevel;
zParams.dictID = dictID;
params.zParams = zParams;
params.k = k;
sampleInfo* info = getSampleInfo(filenameTable,
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, &params);
/* Free allocated memory */
UTIL_freeFileList(extendedFileList, fileNamesBuf);
freeSampleInfo(info);
return operationResult;
}