301 lines
9.3 KiB
C
301 lines
9.3 KiB
C
|
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||
|
|
||
|
Distributed under MIT license.
|
||
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||
|
*/
|
||
|
|
||
|
/* Transformations on dictionary words. */
|
||
|
|
||
|
#ifndef BROTLI_DEC_TRANSFORM_H_
|
||
|
#define BROTLI_DEC_TRANSFORM_H_
|
||
|
|
||
|
#include <brotli/types.h>
|
||
|
#include "./port.h"
|
||
|
|
||
|
#if defined(__cplusplus) || defined(c_plusplus)
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
enum WordTransformType {
|
||
|
kIdentity = 0,
|
||
|
kOmitLast1 = 1,
|
||
|
kOmitLast2 = 2,
|
||
|
kOmitLast3 = 3,
|
||
|
kOmitLast4 = 4,
|
||
|
kOmitLast5 = 5,
|
||
|
kOmitLast6 = 6,
|
||
|
kOmitLast7 = 7,
|
||
|
kOmitLast8 = 8,
|
||
|
kOmitLast9 = 9,
|
||
|
kUppercaseFirst = 10,
|
||
|
kUppercaseAll = 11,
|
||
|
kOmitFirst1 = 12,
|
||
|
kOmitFirst2 = 13,
|
||
|
kOmitFirst3 = 14,
|
||
|
kOmitFirst4 = 15,
|
||
|
kOmitFirst5 = 16,
|
||
|
kOmitFirst6 = 17,
|
||
|
kOmitFirst7 = 18,
|
||
|
kOmitFirst8 = 19,
|
||
|
kOmitFirst9 = 20
|
||
|
};
|
||
|
|
||
|
typedef struct {
|
||
|
const uint8_t prefix_id;
|
||
|
const uint8_t transform;
|
||
|
const uint8_t suffix_id;
|
||
|
} Transform;
|
||
|
|
||
|
static const char kPrefixSuffix[208] =
|
||
|
"\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
|
||
|
" for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
|
||
|
" is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
|
||
|
" not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
|
||
|
|
||
|
enum {
|
||
|
/* EMPTY = ""
|
||
|
SP = " "
|
||
|
DQUOT = "\""
|
||
|
SQUOT = "'"
|
||
|
CLOSEBR = "]"
|
||
|
OPEN = "("
|
||
|
SLASH = "/"
|
||
|
NBSP = non-breaking space "\0xc2\xa0"
|
||
|
*/
|
||
|
kPFix_EMPTY = 0,
|
||
|
kPFix_SP = 1,
|
||
|
kPFix_COMMASP = 3,
|
||
|
kPFix_SPofSPtheSP = 6,
|
||
|
kPFix_SPtheSP = 9,
|
||
|
kPFix_eSP = 12,
|
||
|
kPFix_SPofSP = 15,
|
||
|
kPFix_sSP = 20,
|
||
|
kPFix_DOT = 23,
|
||
|
kPFix_SPandSP = 25,
|
||
|
kPFix_SPinSP = 31,
|
||
|
kPFix_DQUOT = 36,
|
||
|
kPFix_SPtoSP = 38,
|
||
|
kPFix_DQUOTGT = 43,
|
||
|
kPFix_NEWLINE = 46,
|
||
|
kPFix_DOTSP = 48,
|
||
|
kPFix_CLOSEBR = 51,
|
||
|
kPFix_SPforSP = 53,
|
||
|
kPFix_SPaSP = 59,
|
||
|
kPFix_SPthatSP = 63,
|
||
|
kPFix_SQUOT = 70,
|
||
|
kPFix_SPwithSP = 72,
|
||
|
kPFix_SPfromSP = 79,
|
||
|
kPFix_SPbySP = 86,
|
||
|
kPFix_OPEN = 91,
|
||
|
kPFix_DOTSPTheSP = 93,
|
||
|
kPFix_SPonSP = 100,
|
||
|
kPFix_SPasSP = 105,
|
||
|
kPFix_SPisSP = 110,
|
||
|
kPFix_ingSP = 115,
|
||
|
kPFix_NEWLINETAB = 120,
|
||
|
kPFix_COLON = 123,
|
||
|
kPFix_edSP = 125,
|
||
|
kPFix_EQDQUOT = 129,
|
||
|
kPFix_SPatSP = 132,
|
||
|
kPFix_lySP = 137,
|
||
|
kPFix_COMMA = 141,
|
||
|
kPFix_EQSQUOT = 143,
|
||
|
kPFix_DOTcomSLASH = 146,
|
||
|
kPFix_DOTSPThisSP = 152,
|
||
|
kPFix_SPnotSP = 160,
|
||
|
kPFix_erSP = 166,
|
||
|
kPFix_alSP = 170,
|
||
|
kPFix_fulSP = 174,
|
||
|
kPFix_iveSP = 179,
|
||
|
kPFix_lessSP = 184,
|
||
|
kPFix_estSP = 190,
|
||
|
kPFix_izeSP = 195,
|
||
|
kPFix_NBSP = 200,
|
||
|
kPFix_ousSP = 203
|
||
|
};
|
||
|
|
||
|
static const Transform kTransforms[] = {
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SP },
|
||
|
{ kPFix_SP, kIdentity, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
|
||
|
{ kPFix_SP, kIdentity, kPFix_EMPTY },
|
||
|
{ kPFix_sSP, kIdentity, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPofSP },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPandSP },
|
||
|
{ kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
|
||
|
{ kPFix_COMMASP, kIdentity, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_COMMASP },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPinSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
|
||
|
{ kPFix_eSP, kIdentity, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_DQUOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_DOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
|
||
|
{ kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPforSP },
|
||
|
{ kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPaSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_DOTSP },
|
||
|
{ kPFix_DOT, kIdentity, kPFix_EMPTY },
|
||
|
{ kPFix_SP, kIdentity, kPFix_COMMASP },
|
||
|
{ kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SQUOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPbySP },
|
||
|
{ kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
|
||
|
{ kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPonSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPasSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPisSP },
|
||
|
{ kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_COLON },
|
||
|
{ kPFix_SP, kIdentity, kPFix_DOTSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_edSP },
|
||
|
{ kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_OPEN },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
|
||
|
{ kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPatSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_lySP },
|
||
|
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
|
||
|
{ kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
|
||
|
{ kPFix_DOT, kIdentity, kPFix_OPEN },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
|
||
|
{ kPFix_SP, kIdentity, kPFix_DOT },
|
||
|
{ kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
|
||
|
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_COMMA },
|
||
|
{ kPFix_DOT, kIdentity, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
|
||
|
{ kPFix_SP, kIdentity, kPFix_EQDQUOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_erSP },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_SP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_alSP },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_EMPTY },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
|
||
|
{ kPFix_SP, kIdentity, kPFix_OPEN },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_fulSP },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_iveSP },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_lessSP },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_estSP },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_DOT },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
|
||
|
{ kPFix_SP, kIdentity, kPFix_EQSQUOT },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_izeSP },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
|
||
|
{ kPFix_NBSP, kIdentity, kPFix_EMPTY },
|
||
|
{ kPFix_SP, kIdentity, kPFix_COMMA },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
|
||
|
{ kPFix_EMPTY, kIdentity, kPFix_ousSP },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
|
||
|
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_COMMA },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_COMMASP },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_DOT },
|
||
|
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_DOTSP },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
|
||
|
{ kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
|
||
|
{ kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
|
||
|
};
|
||
|
|
||
|
static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
|
||
|
|
||
|
static int ToUpperCase(uint8_t* p) {
|
||
|
if (p[0] < 0xc0) {
|
||
|
if (p[0] >= 'a' && p[0] <= 'z') {
|
||
|
p[0] ^= 32;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
/* An overly simplified uppercasing model for UTF-8. */
|
||
|
if (p[0] < 0xe0) {
|
||
|
p[1] ^= 32;
|
||
|
return 2;
|
||
|
}
|
||
|
/* An arbitrary transform for three byte characters. */
|
||
|
p[2] ^= 5;
|
||
|
return 3;
|
||
|
}
|
||
|
|
||
|
static BROTLI_NOINLINE int TransformDictionaryWord(
|
||
|
uint8_t* dst, const uint8_t* word, int len, int transform) {
|
||
|
int idx = 0;
|
||
|
{
|
||
|
const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
|
||
|
while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
|
||
|
}
|
||
|
{
|
||
|
const int t = kTransforms[transform].transform;
|
||
|
int i = 0;
|
||
|
int skip = t - (kOmitFirst1 - 1);
|
||
|
if (skip > 0) {
|
||
|
word += skip;
|
||
|
len -= skip;
|
||
|
} else if (t <= kOmitLast9) {
|
||
|
len -= t;
|
||
|
}
|
||
|
while (i < len) { dst[idx++] = word[i++]; }
|
||
|
if (t == kUppercaseFirst) {
|
||
|
ToUpperCase(&dst[idx - len]);
|
||
|
} else if (t == kUppercaseAll) {
|
||
|
uint8_t* uppercase = &dst[idx - len];
|
||
|
while (len > 0) {
|
||
|
int step = ToUpperCase(uppercase);
|
||
|
uppercase += step;
|
||
|
len -= step;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
{
|
||
|
const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
|
||
|
while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
|
||
|
return idx;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if defined(__cplusplus) || defined(c_plusplus)
|
||
|
} /* extern "C" */
|
||
|
#endif
|
||
|
|
||
|
#endif /* BROTLI_DEC_TRANSFORM_H_ */
|