/* Copyright 2013 Google Inc. All Rights Reserved. | |
Distributed under MIT license. | |
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
*/ | |
/* Transformations on dictionary words. */ | |
#ifndef BROTLI_DEC_TRANSFORM_H_ | |
#define BROTLI_DEC_TRANSFORM_H_ | |
#include "../common/types.h" | |
#include "./port.h" | |
#if defined(__cplusplus) || defined(c_plusplus) | |
extern "C" { | |
#endif | |
enum WordTransformType { | |
kIdentity = 0, | |
kOmitLast1 = 1, | |
kOmitLast2 = 2, | |
kOmitLast3 = 3, | |
kOmitLast4 = 4, | |
kOmitLast5 = 5, | |
kOmitLast6 = 6, | |
kOmitLast7 = 7, | |
kOmitLast8 = 8, | |
kOmitLast9 = 9, | |
kUppercaseFirst = 10, | |
kUppercaseAll = 11, | |
kOmitFirst1 = 12, | |
kOmitFirst2 = 13, | |
kOmitFirst3 = 14, | |
kOmitFirst4 = 15, | |
kOmitFirst5 = 16, | |
kOmitFirst6 = 17, | |
kOmitFirst7 = 18, | |
kOmitFirst8 = 19, | |
kOmitFirst9 = 20 | |
}; | |
typedef struct { | |
const uint8_t prefix_id; | |
const uint8_t transform; | |
const uint8_t suffix_id; | |
} Transform; | |
static const char kPrefixSuffix[208] = | |
"\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" | |
" for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" | |
" is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" | |
" not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; | |
enum { | |
/* EMPTY = "" | |
SP = " " | |
DQUOT = "\"" | |
SQUOT = "'" | |
CLOSEBR = "]" | |
OPEN = "(" | |
SLASH = "/" | |
NBSP = non-breaking space "\0xc2\xa0" | |
*/ | |
kPFix_EMPTY = 0, | |
kPFix_SP = 1, | |
kPFix_COMMASP = 3, | |
kPFix_SPofSPtheSP = 6, | |
kPFix_SPtheSP = 9, | |
kPFix_eSP = 12, | |
kPFix_SPofSP = 15, | |
kPFix_sSP = 20, | |
kPFix_DOT = 23, | |
kPFix_SPandSP = 25, | |
kPFix_SPinSP = 31, | |
kPFix_DQUOT = 36, | |
kPFix_SPtoSP = 38, | |
kPFix_DQUOTGT = 43, | |
kPFix_NEWLINE = 46, | |
kPFix_DOTSP = 48, | |
kPFix_CLOSEBR = 51, | |
kPFix_SPforSP = 53, | |
kPFix_SPaSP = 59, | |
kPFix_SPthatSP = 63, | |
kPFix_SQUOT = 70, | |
kPFix_SPwithSP = 72, | |
kPFix_SPfromSP = 79, | |
kPFix_SPbySP = 86, | |
kPFix_OPEN = 91, | |
kPFix_DOTSPTheSP = 93, | |
kPFix_SPonSP = 100, | |
kPFix_SPasSP = 105, | |
kPFix_SPisSP = 110, | |
kPFix_ingSP = 115, | |
kPFix_NEWLINETAB = 120, | |
kPFix_COLON = 123, | |
kPFix_edSP = 125, | |
kPFix_EQDQUOT = 129, | |
kPFix_SPatSP = 132, | |
kPFix_lySP = 137, | |
kPFix_COMMA = 141, | |
kPFix_EQSQUOT = 143, | |
kPFix_DOTcomSLASH = 146, | |
kPFix_DOTSPThisSP = 152, | |
kPFix_SPnotSP = 160, | |
kPFix_erSP = 166, | |
kPFix_alSP = 170, | |
kPFix_fulSP = 174, | |
kPFix_iveSP = 179, | |
kPFix_lessSP = 184, | |
kPFix_estSP = 190, | |
kPFix_izeSP = 195, | |
kPFix_NBSP = 200, | |
kPFix_ousSP = 203 | |
}; | |
static const Transform kTransforms[] = { | |
{ kPFix_EMPTY, kIdentity, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SP }, | |
{ kPFix_SP, kIdentity, kPFix_SP }, | |
{ kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, | |
{ kPFix_SP, kIdentity, kPFix_EMPTY }, | |
{ kPFix_sSP, kIdentity, kPFix_SP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPofSP }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPandSP }, | |
{ kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, | |
{ kPFix_COMMASP, kIdentity, kPFix_SP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_COMMASP }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_SP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPinSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, | |
{ kPFix_eSP, kIdentity, kPFix_SP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_DQUOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_DOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, | |
{ kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPforSP }, | |
{ kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPaSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_DOTSP }, | |
{ kPFix_DOT, kIdentity, kPFix_EMPTY }, | |
{ kPFix_SP, kIdentity, kPFix_COMMASP }, | |
{ kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SQUOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPbySP }, | |
{ kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, | |
{ kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPonSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPasSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPisSP }, | |
{ kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, | |
{ kPFix_EMPTY, kIdentity, kPFix_COLON }, | |
{ kPFix_SP, kIdentity, kPFix_DOTSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_edSP }, | |
{ kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_OPEN }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, | |
{ kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPatSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_lySP }, | |
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, | |
{ kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, | |
{ kPFix_DOT, kIdentity, kPFix_OPEN }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_SP }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, | |
{ kPFix_SP, kIdentity, kPFix_DOT }, | |
{ kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, | |
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_COMMA }, | |
{ kPFix_DOT, kIdentity, kPFix_SP }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, | |
{ kPFix_SP, kIdentity, kPFix_EQDQUOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_erSP }, | |
{ kPFix_SP, kUppercaseAll, kPFix_SP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_alSP }, | |
{ kPFix_SP, kUppercaseAll, kPFix_EMPTY }, | |
{ kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, | |
{ kPFix_SP, kIdentity, kPFix_OPEN }, | |
{ kPFix_EMPTY, kIdentity, kPFix_fulSP }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_iveSP }, | |
{ kPFix_EMPTY, kIdentity, kPFix_lessSP }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_estSP }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_DOT }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, | |
{ kPFix_SP, kIdentity, kPFix_EQSQUOT }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, | |
{ kPFix_EMPTY, kIdentity, kPFix_izeSP }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, | |
{ kPFix_NBSP, kIdentity, kPFix_EMPTY }, | |
{ kPFix_SP, kIdentity, kPFix_COMMA }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, | |
{ kPFix_EMPTY, kIdentity, kPFix_ousSP }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, | |
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_COMMA }, | |
{ kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, | |
{ kPFix_SP, kUppercaseAll, kPFix_COMMASP }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, | |
{ kPFix_SP, kUppercaseAll, kPFix_DOT }, | |
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, | |
{ kPFix_SP, kUppercaseAll, kPFix_DOTSP }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, | |
{ kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, | |
{ kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, | |
}; | |
static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); | |
static int ToUpperCase(uint8_t* p) { | |
if (p[0] < 0xc0) { | |
if (p[0] >= 'a' && p[0] <= 'z') { | |
p[0] ^= 32; | |
} | |
return 1; | |
} | |
/* An overly simplified uppercasing model for utf-8. */ | |
if (p[0] < 0xe0) { | |
p[1] ^= 32; | |
return 2; | |
} | |
/* An arbitrary transform for three byte characters. */ | |
p[2] ^= 5; | |
return 3; | |
} | |
static BROTLI_NOINLINE int TransformDictionaryWord( | |
uint8_t* dst, const uint8_t* word, int len, int transform) { | |
int idx = 0; | |
{ | |
const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; | |
while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } | |
} | |
{ | |
const int t = kTransforms[transform].transform; | |
int i = 0; | |
int skip = t - (kOmitFirst1 - 1); | |
if (skip > 0) { | |
word += skip; | |
len -= skip; | |
} else if (t <= kOmitLast9) { | |
len -= t; | |
} | |
while (i < len) { dst[idx++] = word[i++]; } | |
if (t == kUppercaseFirst) { | |
ToUpperCase(&dst[idx - len]); | |
} else if (t == kUppercaseAll) { | |
uint8_t* uppercase = &dst[idx - len]; | |
while (len > 0) { | |
int step = ToUpperCase(uppercase); | |
uppercase += step; | |
len -= step; | |
} | |
} | |
} | |
{ | |
const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; | |
while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } | |
return idx; | |
} | |
} | |
#if defined(__cplusplus) || defined(c_plusplus) | |
} /* extern "C" */ | |
#endif | |
#endif /* BROTLI_DEC_TRANSFORM_H_ */ |