/* Copyright 2013 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* Transformations on dictionary words. */ #ifndef BROTLI_DEC_TRANSFORM_H_ #define BROTLI_DEC_TRANSFORM_H_ #include #include "./port.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif enum WordTransformType { kIdentity = 0, kOmitLast1 = 1, kOmitLast2 = 2, kOmitLast3 = 3, kOmitLast4 = 4, kOmitLast5 = 5, kOmitLast6 = 6, kOmitLast7 = 7, kOmitLast8 = 8, kOmitLast9 = 9, kUppercaseFirst = 10, kUppercaseAll = 11, kOmitFirst1 = 12, kOmitFirst2 = 13, kOmitFirst3 = 14, kOmitFirst4 = 15, kOmitFirst5 = 16, kOmitFirst6 = 17, kOmitFirst7 = 18, kOmitFirst8 = 19, kOmitFirst9 = 20 }; typedef struct { const uint8_t prefix_id; const uint8_t transform; const uint8_t suffix_id; } Transform; static const char kPrefixSuffix[208] = "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0" " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0" " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0" " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous "; enum { /* EMPTY = "" SP = " " DQUOT = "\"" SQUOT = "'" CLOSEBR = "]" OPEN = "(" SLASH = "/" NBSP = non-breaking space "\0xc2\xa0" */ kPFix_EMPTY = 0, kPFix_SP = 1, kPFix_COMMASP = 3, kPFix_SPofSPtheSP = 6, kPFix_SPtheSP = 9, kPFix_eSP = 12, kPFix_SPofSP = 15, kPFix_sSP = 20, kPFix_DOT = 23, kPFix_SPandSP = 25, kPFix_SPinSP = 31, kPFix_DQUOT = 36, kPFix_SPtoSP = 38, kPFix_DQUOTGT = 43, kPFix_NEWLINE = 46, kPFix_DOTSP = 48, kPFix_CLOSEBR = 51, kPFix_SPforSP = 53, kPFix_SPaSP = 59, kPFix_SPthatSP = 63, kPFix_SQUOT = 70, kPFix_SPwithSP = 72, kPFix_SPfromSP = 79, kPFix_SPbySP = 86, kPFix_OPEN = 91, kPFix_DOTSPTheSP = 93, kPFix_SPonSP = 100, kPFix_SPasSP = 105, kPFix_SPisSP = 110, kPFix_ingSP = 115, kPFix_NEWLINETAB = 120, kPFix_COLON = 123, kPFix_edSP = 125, kPFix_EQDQUOT = 129, kPFix_SPatSP = 132, kPFix_lySP = 137, kPFix_COMMA = 141, kPFix_EQSQUOT = 143, kPFix_DOTcomSLASH = 146, kPFix_DOTSPThisSP = 152, kPFix_SPnotSP = 160, kPFix_erSP = 166, kPFix_alSP = 170, kPFix_fulSP = 174, kPFix_iveSP = 179, kPFix_lessSP = 184, kPFix_estSP = 190, kPFix_izeSP = 195, kPFix_NBSP = 200, kPFix_ousSP = 203 }; static const Transform kTransforms[] = { { kPFix_EMPTY, kIdentity, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_SP }, { kPFix_SP, kIdentity, kPFix_SP }, { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY }, { kPFix_EMPTY, kUppercaseFirst, kPFix_SP }, { kPFix_EMPTY, kIdentity, kPFix_SPtheSP }, { kPFix_SP, kIdentity, kPFix_EMPTY }, { kPFix_sSP, kIdentity, kPFix_SP }, { kPFix_EMPTY, kIdentity, kPFix_SPofSP }, { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_SPandSP }, { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY }, { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY }, { kPFix_COMMASP, kIdentity, kPFix_SP }, { kPFix_EMPTY, kIdentity, kPFix_COMMASP }, { kPFix_SP, kUppercaseFirst, kPFix_SP }, { kPFix_EMPTY, kIdentity, kPFix_SPinSP }, { kPFix_EMPTY, kIdentity, kPFix_SPtoSP }, { kPFix_eSP, kIdentity, kPFix_SP }, { kPFix_EMPTY, kIdentity, kPFix_DQUOT }, { kPFix_EMPTY, kIdentity, kPFix_DOT }, { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT }, { kPFix_EMPTY, kIdentity, kPFix_NEWLINE }, { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR }, { kPFix_EMPTY, kIdentity, kPFix_SPforSP }, { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY }, { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_SPaSP }, { kPFix_EMPTY, kIdentity, kPFix_SPthatSP }, { kPFix_SP, kUppercaseFirst, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_DOTSP }, { kPFix_DOT, kIdentity, kPFix_EMPTY }, { kPFix_SP, kIdentity, kPFix_COMMASP }, { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_SPwithSP }, { kPFix_EMPTY, kIdentity, kPFix_SQUOT }, { kPFix_EMPTY, kIdentity, kPFix_SPfromSP }, { kPFix_EMPTY, kIdentity, kPFix_SPbySP }, { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY }, { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY }, { kPFix_SPtheSP, kIdentity, kPFix_EMPTY }, { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP }, { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_SPonSP }, { kPFix_EMPTY, kIdentity, kPFix_SPasSP }, { kPFix_EMPTY, kIdentity, kPFix_SPisSP }, { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY }, { kPFix_EMPTY, kOmitLast1, kPFix_ingSP }, { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB }, { kPFix_EMPTY, kIdentity, kPFix_COLON }, { kPFix_SP, kIdentity, kPFix_DOTSP }, { kPFix_EMPTY, kIdentity, kPFix_edSP }, { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY }, { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY }, { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_OPEN }, { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP }, { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_SPatSP }, { kPFix_EMPTY, kIdentity, kPFix_lySP }, { kPFix_SPtheSP, kIdentity, kPFix_SPofSP }, { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY }, { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY }, { kPFix_SP, kUppercaseFirst, kPFix_COMMASP }, { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT }, { kPFix_DOT, kIdentity, kPFix_OPEN }, { kPFix_EMPTY, kUppercaseAll, kPFix_SP }, { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT }, { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT }, { kPFix_SP, kIdentity, kPFix_DOT }, { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY }, { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP }, { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT }, { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP }, { kPFix_EMPTY, kIdentity, kPFix_COMMA }, { kPFix_DOT, kIdentity, kPFix_SP }, { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN }, { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT }, { kPFix_EMPTY, kIdentity, kPFix_SPnotSP }, { kPFix_SP, kIdentity, kPFix_EQDQUOT }, { kPFix_EMPTY, kIdentity, kPFix_erSP }, { kPFix_SP, kUppercaseAll, kPFix_SP }, { kPFix_EMPTY, kIdentity, kPFix_alSP }, { kPFix_SP, kUppercaseAll, kPFix_EMPTY }, { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT }, { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT }, { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP }, { kPFix_SP, kIdentity, kPFix_OPEN }, { kPFix_EMPTY, kIdentity, kPFix_fulSP }, { kPFix_SP, kUppercaseFirst, kPFix_DOTSP }, { kPFix_EMPTY, kIdentity, kPFix_iveSP }, { kPFix_EMPTY, kIdentity, kPFix_lessSP }, { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT }, { kPFix_EMPTY, kIdentity, kPFix_estSP }, { kPFix_SP, kUppercaseFirst, kPFix_DOT }, { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT }, { kPFix_SP, kIdentity, kPFix_EQSQUOT }, { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA }, { kPFix_EMPTY, kIdentity, kPFix_izeSP }, { kPFix_EMPTY, kUppercaseAll, kPFix_DOT }, { kPFix_NBSP, kIdentity, kPFix_EMPTY }, { kPFix_SP, kIdentity, kPFix_COMMA }, { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT }, { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT }, { kPFix_EMPTY, kIdentity, kPFix_ousSP }, { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP }, { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT }, { kPFix_SP, kUppercaseFirst, kPFix_COMMA }, { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT }, { kPFix_SP, kUppercaseAll, kPFix_COMMASP }, { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA }, { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN }, { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP }, { kPFix_SP, kUppercaseAll, kPFix_DOT }, { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT }, { kPFix_SP, kUppercaseAll, kPFix_DOTSP }, { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT }, { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT }, { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT }, }; static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]); static int ToUpperCase(uint8_t* p) { if (p[0] < 0xc0) { if (p[0] >= 'a' && p[0] <= 'z') { p[0] ^= 32; } return 1; } /* An overly simplified uppercasing model for UTF-8. */ if (p[0] < 0xe0) { p[1] ^= 32; return 2; } /* An arbitrary transform for three byte characters. */ p[2] ^= 5; return 3; } static BROTLI_NOINLINE int TransformDictionaryWord( uint8_t* dst, const uint8_t* word, int len, int transform) { int idx = 0; { const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id]; while (*prefix) { dst[idx++] = (uint8_t)*prefix++; } } { const int t = kTransforms[transform].transform; int i = 0; int skip = t - (kOmitFirst1 - 1); if (skip > 0) { word += skip; len -= skip; } else if (t <= kOmitLast9) { len -= t; } while (i < len) { dst[idx++] = word[i++]; } if (t == kUppercaseFirst) { ToUpperCase(&dst[idx - len]); } else if (t == kUppercaseAll) { uint8_t* uppercase = &dst[idx - len]; while (len > 0) { int step = ToUpperCase(uppercase); uppercase += step; len -= step; } } } { const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id]; while (*suffix) { dst[idx++] = (uint8_t)*suffix++; } return idx; } } #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ #endif #endif /* BROTLI_DEC_TRANSFORM_H_ */