fse_compress.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. /*
  2. * FSE : Finite State Entropy encoder
  3. * Copyright (C) 2013-2015, Yann Collet.
  4. *
  5. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are
  9. * met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following disclaimer
  15. * in the documentation and/or other materials provided with the
  16. * distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * This program is free software; you can redistribute it and/or modify it under
  31. * the terms of the GNU General Public License version 2 as published by the
  32. * Free Software Foundation. This program is dual-licensed; you may select
  33. * either version 2 of the GNU General Public License ("GPL") or BSD license
  34. * ("BSD").
  35. *
  36. * You can contact the author at :
  37. * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  38. */
  39. /* **************************************************************
  40. * Compiler specifics
  41. ****************************************************************/
  42. #define FORCE_INLINE static __always_inline
  43. /* **************************************************************
  44. * Includes
  45. ****************************************************************/
  46. #include "bitstream.h"
  47. #include "fse.h"
  48. #include <linux/compiler.h>
  49. #include <linux/kernel.h>
  50. #include <linux/math64.h>
  51. #include <linux/string.h> /* memcpy, memset */
  52. /* **************************************************************
  53. * Error Management
  54. ****************************************************************/
  55. #define FSE_STATIC_ASSERT(c) \
  56. { \
  57. enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
  58. } /* use only *after* variable declarations */
  59. /* **************************************************************
  60. * Templates
  61. ****************************************************************/
  62. /*
  63. designed to be included
  64. for type-specific functions (template emulation in C)
  65. Objective is to write these functions only once, for improved maintenance
  66. */
  67. /* safety checks */
  68. #ifndef FSE_FUNCTION_EXTENSION
  69. #error "FSE_FUNCTION_EXTENSION must be defined"
  70. #endif
  71. #ifndef FSE_FUNCTION_TYPE
  72. #error "FSE_FUNCTION_TYPE must be defined"
  73. #endif
  74. /* Function names */
  75. #define FSE_CAT(X, Y) X##Y
  76. #define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
  77. #define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
  78. /* Function templates */
  79. /* FSE_buildCTable_wksp() :
  80. * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
  81. * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
  82. * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
  83. */
  84. size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
  85. {
  86. U32 const tableSize = 1 << tableLog;
  87. U32 const tableMask = tableSize - 1;
  88. void *const ptr = ct;
  89. U16 *const tableU16 = ((U16 *)ptr) + 2;
  90. void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
  91. FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
  92. U32 const step = FSE_TABLESTEP(tableSize);
  93. U32 highThreshold = tableSize - 1;
  94. U32 *cumul;
  95. FSE_FUNCTION_TYPE *tableSymbol;
  96. size_t spaceUsed32 = 0;
  97. cumul = (U32 *)workspace + spaceUsed32;
  98. spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
  99. tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
  100. spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
  101. if ((spaceUsed32 << 2) > workspaceSize)
  102. return ERROR(tableLog_tooLarge);
  103. workspace = (U32 *)workspace + spaceUsed32;
  104. workspaceSize -= (spaceUsed32 << 2);
  105. /* CTable header */
  106. tableU16[-2] = (U16)tableLog;
  107. tableU16[-1] = (U16)maxSymbolValue;
  108. /* For explanations on how to distribute symbol values over the table :
  109. * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
  110. /* symbol start positions */
  111. {
  112. U32 u;
  113. cumul[0] = 0;
  114. for (u = 1; u <= maxSymbolValue + 1; u++) {
  115. if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
  116. cumul[u] = cumul[u - 1] + 1;
  117. tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
  118. } else {
  119. cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
  120. }
  121. }
  122. cumul[maxSymbolValue + 1] = tableSize + 1;
  123. }
  124. /* Spread symbols */
  125. {
  126. U32 position = 0;
  127. U32 symbol;
  128. for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
  129. int nbOccurences;
  130. for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
  131. tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
  132. position = (position + step) & tableMask;
  133. while (position > highThreshold)
  134. position = (position + step) & tableMask; /* Low proba area */
  135. }
  136. }
  137. if (position != 0)
  138. return ERROR(GENERIC); /* Must have gone through all positions */
  139. }
  140. /* Build table */
  141. {
  142. U32 u;
  143. for (u = 0; u < tableSize; u++) {
  144. FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
  145. tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
  146. }
  147. }
  148. /* Build Symbol Transformation Table */
  149. {
  150. unsigned total = 0;
  151. unsigned s;
  152. for (s = 0; s <= maxSymbolValue; s++) {
  153. switch (normalizedCounter[s]) {
  154. case 0: break;
  155. case -1:
  156. case 1:
  157. symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
  158. symbolTT[s].deltaFindState = total - 1;
  159. total++;
  160. break;
  161. default: {
  162. U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
  163. U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
  164. symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
  165. symbolTT[s].deltaFindState = total - normalizedCounter[s];
  166. total += normalizedCounter[s];
  167. }
  168. }
  169. }
  170. }
  171. return 0;
  172. }
  173. /*-**************************************************************
  174. * FSE NCount encoding-decoding
  175. ****************************************************************/
  176. size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
  177. {
  178. size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
  179. return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
  180. }
  181. static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
  182. unsigned writeIsSafe)
  183. {
  184. BYTE *const ostart = (BYTE *)header;
  185. BYTE *out = ostart;
  186. BYTE *const oend = ostart + headerBufferSize;
  187. int nbBits;
  188. const int tableSize = 1 << tableLog;
  189. int remaining;
  190. int threshold;
  191. U32 bitStream;
  192. int bitCount;
  193. unsigned charnum = 0;
  194. int previous0 = 0;
  195. bitStream = 0;
  196. bitCount = 0;
  197. /* Table Size */
  198. bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
  199. bitCount += 4;
  200. /* Init */
  201. remaining = tableSize + 1; /* +1 for extra accuracy */
  202. threshold = tableSize;
  203. nbBits = tableLog + 1;
  204. while (remaining > 1) { /* stops at 1 */
  205. if (previous0) {
  206. unsigned start = charnum;
  207. while (!normalizedCounter[charnum])
  208. charnum++;
  209. while (charnum >= start + 24) {
  210. start += 24;
  211. bitStream += 0xFFFFU << bitCount;
  212. if ((!writeIsSafe) && (out > oend - 2))
  213. return ERROR(dstSize_tooSmall); /* Buffer overflow */
  214. out[0] = (BYTE)bitStream;
  215. out[1] = (BYTE)(bitStream >> 8);
  216. out += 2;
  217. bitStream >>= 16;
  218. }
  219. while (charnum >= start + 3) {
  220. start += 3;
  221. bitStream += 3 << bitCount;
  222. bitCount += 2;
  223. }
  224. bitStream += (charnum - start) << bitCount;
  225. bitCount += 2;
  226. if (bitCount > 16) {
  227. if ((!writeIsSafe) && (out > oend - 2))
  228. return ERROR(dstSize_tooSmall); /* Buffer overflow */
  229. out[0] = (BYTE)bitStream;
  230. out[1] = (BYTE)(bitStream >> 8);
  231. out += 2;
  232. bitStream >>= 16;
  233. bitCount -= 16;
  234. }
  235. }
  236. {
  237. int count = normalizedCounter[charnum++];
  238. int const max = (2 * threshold - 1) - remaining;
  239. remaining -= count < 0 ? -count : count;
  240. count++; /* +1 for extra accuracy */
  241. if (count >= threshold)
  242. count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
  243. bitStream += count << bitCount;
  244. bitCount += nbBits;
  245. bitCount -= (count < max);
  246. previous0 = (count == 1);
  247. if (remaining < 1)
  248. return ERROR(GENERIC);
  249. while (remaining < threshold)
  250. nbBits--, threshold >>= 1;
  251. }
  252. if (bitCount > 16) {
  253. if ((!writeIsSafe) && (out > oend - 2))
  254. return ERROR(dstSize_tooSmall); /* Buffer overflow */
  255. out[0] = (BYTE)bitStream;
  256. out[1] = (BYTE)(bitStream >> 8);
  257. out += 2;
  258. bitStream >>= 16;
  259. bitCount -= 16;
  260. }
  261. }
  262. /* flush remaining bitStream */
  263. if ((!writeIsSafe) && (out > oend - 2))
  264. return ERROR(dstSize_tooSmall); /* Buffer overflow */
  265. out[0] = (BYTE)bitStream;
  266. out[1] = (BYTE)(bitStream >> 8);
  267. out += (bitCount + 7) / 8;
  268. if (charnum > maxSymbolValue + 1)
  269. return ERROR(GENERIC);
  270. return (out - ostart);
  271. }
  272. size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
  273. {
  274. if (tableLog > FSE_MAX_TABLELOG)
  275. return ERROR(tableLog_tooLarge); /* Unsupported */
  276. if (tableLog < FSE_MIN_TABLELOG)
  277. return ERROR(GENERIC); /* Unsupported */
  278. if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
  279. return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
  280. return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
  281. }
  282. /*-**************************************************************
  283. * Counting histogram
  284. ****************************************************************/
  285. /*! FSE_count_simple
  286. This function counts byte values within `src`, and store the histogram into table `count`.
  287. It doesn't use any additional memory.
  288. But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
  289. For this reason, prefer using a table `count` with 256 elements.
  290. @return : count of most numerous element
  291. */
  292. size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
  293. {
  294. const BYTE *ip = (const BYTE *)src;
  295. const BYTE *const end = ip + srcSize;
  296. unsigned maxSymbolValue = *maxSymbolValuePtr;
  297. unsigned max = 0;
  298. memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
  299. if (srcSize == 0) {
  300. *maxSymbolValuePtr = 0;
  301. return 0;
  302. }
  303. while (ip < end)
  304. count[*ip++]++;
  305. while (!count[maxSymbolValue])
  306. maxSymbolValue--;
  307. *maxSymbolValuePtr = maxSymbolValue;
  308. {
  309. U32 s;
  310. for (s = 0; s <= maxSymbolValue; s++)
  311. if (count[s] > max)
  312. max = count[s];
  313. }
  314. return (size_t)max;
  315. }
  316. /* FSE_count_parallel_wksp() :
  317. * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
  318. * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
  319. static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
  320. unsigned *const workSpace)
  321. {
  322. const BYTE *ip = (const BYTE *)source;
  323. const BYTE *const iend = ip + sourceSize;
  324. unsigned maxSymbolValue = *maxSymbolValuePtr;
  325. unsigned max = 0;
  326. U32 *const Counting1 = workSpace;
  327. U32 *const Counting2 = Counting1 + 256;
  328. U32 *const Counting3 = Counting2 + 256;
  329. U32 *const Counting4 = Counting3 + 256;
  330. memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
  331. /* safety checks */
  332. if (!sourceSize) {
  333. memset(count, 0, maxSymbolValue + 1);
  334. *maxSymbolValuePtr = 0;
  335. return 0;
  336. }
  337. if (!maxSymbolValue)
  338. maxSymbolValue = 255; /* 0 == default */
  339. /* by stripes of 16 bytes */
  340. {
  341. U32 cached = ZSTD_read32(ip);
  342. ip += 4;
  343. while (ip < iend - 15) {
  344. U32 c = cached;
  345. cached = ZSTD_read32(ip);
  346. ip += 4;
  347. Counting1[(BYTE)c]++;
  348. Counting2[(BYTE)(c >> 8)]++;
  349. Counting3[(BYTE)(c >> 16)]++;
  350. Counting4[c >> 24]++;
  351. c = cached;
  352. cached = ZSTD_read32(ip);
  353. ip += 4;
  354. Counting1[(BYTE)c]++;
  355. Counting2[(BYTE)(c >> 8)]++;
  356. Counting3[(BYTE)(c >> 16)]++;
  357. Counting4[c >> 24]++;
  358. c = cached;
  359. cached = ZSTD_read32(ip);
  360. ip += 4;
  361. Counting1[(BYTE)c]++;
  362. Counting2[(BYTE)(c >> 8)]++;
  363. Counting3[(BYTE)(c >> 16)]++;
  364. Counting4[c >> 24]++;
  365. c = cached;
  366. cached = ZSTD_read32(ip);
  367. ip += 4;
  368. Counting1[(BYTE)c]++;
  369. Counting2[(BYTE)(c >> 8)]++;
  370. Counting3[(BYTE)(c >> 16)]++;
  371. Counting4[c >> 24]++;
  372. }
  373. ip -= 4;
  374. }
  375. /* finish last symbols */
  376. while (ip < iend)
  377. Counting1[*ip++]++;
  378. if (checkMax) { /* verify stats will fit into destination table */
  379. U32 s;
  380. for (s = 255; s > maxSymbolValue; s--) {
  381. Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
  382. if (Counting1[s])
  383. return ERROR(maxSymbolValue_tooSmall);
  384. }
  385. }
  386. {
  387. U32 s;
  388. for (s = 0; s <= maxSymbolValue; s++) {
  389. count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
  390. if (count[s] > max)
  391. max = count[s];
  392. }
  393. }
  394. while (!count[maxSymbolValue])
  395. maxSymbolValue--;
  396. *maxSymbolValuePtr = maxSymbolValue;
  397. return (size_t)max;
  398. }
  399. /* FSE_countFast_wksp() :
  400. * Same as FSE_countFast(), but using an externally provided scratch buffer.
  401. * `workSpace` size must be table of >= `1024` unsigned */
  402. size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
  403. {
  404. if (sourceSize < 1500)
  405. return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
  406. return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
  407. }
  408. /* FSE_count_wksp() :
  409. * Same as FSE_count(), but using an externally provided scratch buffer.
  410. * `workSpace` size must be table of >= `1024` unsigned */
  411. size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
  412. {
  413. if (*maxSymbolValuePtr < 255)
  414. return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
  415. *maxSymbolValuePtr = 255;
  416. return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
  417. }
  418. /*-**************************************************************
  419. * FSE Compression Code
  420. ****************************************************************/
  421. /*! FSE_sizeof_CTable() :
  422. FSE_CTable is a variable size structure which contains :
  423. `U16 tableLog;`
  424. `U16 maxSymbolValue;`
  425. `U16 nextStateNumber[1 << tableLog];` // This size is variable
  426. `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
  427. Allocation is manual (C standard does not support variable-size structures).
  428. */
  429. size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
  430. {
  431. if (tableLog > FSE_MAX_TABLELOG)
  432. return ERROR(tableLog_tooLarge);
  433. return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
  434. }
  435. /* provides the minimum logSize to safely represent a distribution */
  436. static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
  437. {
  438. U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
  439. U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
  440. U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
  441. return minBits;
  442. }
  443. unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
  444. {
  445. U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
  446. U32 tableLog = maxTableLog;
  447. U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
  448. if (tableLog == 0)
  449. tableLog = FSE_DEFAULT_TABLELOG;
  450. if (maxBitsSrc < tableLog)
  451. tableLog = maxBitsSrc; /* Accuracy can be reduced */
  452. if (minBits > tableLog)
  453. tableLog = minBits; /* Need a minimum to safely represent all symbol values */
  454. if (tableLog < FSE_MIN_TABLELOG)
  455. tableLog = FSE_MIN_TABLELOG;
  456. if (tableLog > FSE_MAX_TABLELOG)
  457. tableLog = FSE_MAX_TABLELOG;
  458. return tableLog;
  459. }
  460. unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
  461. {
  462. return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
  463. }
  464. /* Secondary normalization method.
  465. To be used when primary method fails. */
  466. static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
  467. {
  468. short const NOT_YET_ASSIGNED = -2;
  469. U32 s;
  470. U32 distributed = 0;
  471. U32 ToDistribute;
  472. /* Init */
  473. U32 const lowThreshold = (U32)(total >> tableLog);
  474. U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
  475. for (s = 0; s <= maxSymbolValue; s++) {
  476. if (count[s] == 0) {
  477. norm[s] = 0;
  478. continue;
  479. }
  480. if (count[s] <= lowThreshold) {
  481. norm[s] = -1;
  482. distributed++;
  483. total -= count[s];
  484. continue;
  485. }
  486. if (count[s] <= lowOne) {
  487. norm[s] = 1;
  488. distributed++;
  489. total -= count[s];
  490. continue;
  491. }
  492. norm[s] = NOT_YET_ASSIGNED;
  493. }
  494. ToDistribute = (1 << tableLog) - distributed;
  495. if ((total / ToDistribute) > lowOne) {
  496. /* risk of rounding to zero */
  497. lowOne = (U32)((total * 3) / (ToDistribute * 2));
  498. for (s = 0; s <= maxSymbolValue; s++) {
  499. if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
  500. norm[s] = 1;
  501. distributed++;
  502. total -= count[s];
  503. continue;
  504. }
  505. }
  506. ToDistribute = (1 << tableLog) - distributed;
  507. }
  508. if (distributed == maxSymbolValue + 1) {
  509. /* all values are pretty poor;
  510. probably incompressible data (should have already been detected);
  511. find max, then give all remaining points to max */
  512. U32 maxV = 0, maxC = 0;
  513. for (s = 0; s <= maxSymbolValue; s++)
  514. if (count[s] > maxC)
  515. maxV = s, maxC = count[s];
  516. norm[maxV] += (short)ToDistribute;
  517. return 0;
  518. }
  519. if (total == 0) {
  520. /* all of the symbols were low enough for the lowOne or lowThreshold */
  521. for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
  522. if (norm[s] > 0)
  523. ToDistribute--, norm[s]++;
  524. return 0;
  525. }
  526. {
  527. U64 const vStepLog = 62 - tableLog;
  528. U64 const mid = (1ULL << (vStepLog - 1)) - 1;
  529. U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
  530. U64 tmpTotal = mid;
  531. for (s = 0; s <= maxSymbolValue; s++) {
  532. if (norm[s] == NOT_YET_ASSIGNED) {
  533. U64 const end = tmpTotal + (count[s] * rStep);
  534. U32 const sStart = (U32)(tmpTotal >> vStepLog);
  535. U32 const sEnd = (U32)(end >> vStepLog);
  536. U32 const weight = sEnd - sStart;
  537. if (weight < 1)
  538. return ERROR(GENERIC);
  539. norm[s] = (short)weight;
  540. tmpTotal = end;
  541. }
  542. }
  543. }
  544. return 0;
  545. }
  546. size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
  547. {
  548. /* Sanity checks */
  549. if (tableLog == 0)
  550. tableLog = FSE_DEFAULT_TABLELOG;
  551. if (tableLog < FSE_MIN_TABLELOG)
  552. return ERROR(GENERIC); /* Unsupported size */
  553. if (tableLog > FSE_MAX_TABLELOG)
  554. return ERROR(tableLog_tooLarge); /* Unsupported size */
  555. if (tableLog < FSE_minTableLog(total, maxSymbolValue))
  556. return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
  557. {
  558. U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
  559. U64 const scale = 62 - tableLog;
  560. U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
  561. U64 const vStep = 1ULL << (scale - 20);
  562. int stillToDistribute = 1 << tableLog;
  563. unsigned s;
  564. unsigned largest = 0;
  565. short largestP = 0;
  566. U32 lowThreshold = (U32)(total >> tableLog);
  567. for (s = 0; s <= maxSymbolValue; s++) {
  568. if (count[s] == total)
  569. return 0; /* rle special case */
  570. if (count[s] == 0) {
  571. normalizedCounter[s] = 0;
  572. continue;
  573. }
  574. if (count[s] <= lowThreshold) {
  575. normalizedCounter[s] = -1;
  576. stillToDistribute--;
  577. } else {
  578. short proba = (short)((count[s] * step) >> scale);
  579. if (proba < 8) {
  580. U64 restToBeat = vStep * rtbTable[proba];
  581. proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
  582. }
  583. if (proba > largestP)
  584. largestP = proba, largest = s;
  585. normalizedCounter[s] = proba;
  586. stillToDistribute -= proba;
  587. }
  588. }
  589. if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
  590. /* corner case, need another normalization method */
  591. size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
  592. if (FSE_isError(errorCode))
  593. return errorCode;
  594. } else
  595. normalizedCounter[largest] += (short)stillToDistribute;
  596. }
  597. return tableLog;
  598. }
  599. /* fake FSE_CTable, for raw (uncompressed) input */
  600. size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
  601. {
  602. const unsigned tableSize = 1 << nbBits;
  603. const unsigned tableMask = tableSize - 1;
  604. const unsigned maxSymbolValue = tableMask;
  605. void *const ptr = ct;
  606. U16 *const tableU16 = ((U16 *)ptr) + 2;
  607. void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
  608. FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
  609. unsigned s;
  610. /* Sanity checks */
  611. if (nbBits < 1)
  612. return ERROR(GENERIC); /* min size */
  613. /* header */
  614. tableU16[-2] = (U16)nbBits;
  615. tableU16[-1] = (U16)maxSymbolValue;
  616. /* Build table */
  617. for (s = 0; s < tableSize; s++)
  618. tableU16[s] = (U16)(tableSize + s);
  619. /* Build Symbol Transformation Table */
  620. {
  621. const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
  622. for (s = 0; s <= maxSymbolValue; s++) {
  623. symbolTT[s].deltaNbBits = deltaNbBits;
  624. symbolTT[s].deltaFindState = s - 1;
  625. }
  626. }
  627. return 0;
  628. }
  629. /* fake FSE_CTable, for rle input (always same symbol) */
  630. size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
  631. {
  632. void *ptr = ct;
  633. U16 *tableU16 = ((U16 *)ptr) + 2;
  634. void *FSCTptr = (U32 *)ptr + 2;
  635. FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
  636. /* header */
  637. tableU16[-2] = (U16)0;
  638. tableU16[-1] = (U16)symbolValue;
  639. /* Build table */
  640. tableU16[0] = 0;
  641. tableU16[1] = 0; /* just in case */
  642. /* Build Symbol Transformation Table */
  643. symbolTT[symbolValue].deltaNbBits = 0;
  644. symbolTT[symbolValue].deltaFindState = 0;
  645. return 0;
  646. }
  647. static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
  648. {
  649. const BYTE *const istart = (const BYTE *)src;
  650. const BYTE *const iend = istart + srcSize;
  651. const BYTE *ip = iend;
  652. BIT_CStream_t bitC;
  653. FSE_CState_t CState1, CState2;
  654. /* init */
  655. if (srcSize <= 2)
  656. return 0;
  657. {
  658. size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
  659. if (FSE_isError(initError))
  660. return 0; /* not enough space available to write a bitstream */
  661. }
  662. #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
  663. if (srcSize & 1) {
  664. FSE_initCState2(&CState1, ct, *--ip);
  665. FSE_initCState2(&CState2, ct, *--ip);
  666. FSE_encodeSymbol(&bitC, &CState1, *--ip);
  667. FSE_FLUSHBITS(&bitC);
  668. } else {
  669. FSE_initCState2(&CState2, ct, *--ip);
  670. FSE_initCState2(&CState1, ct, *--ip);
  671. }
  672. /* join to mod 4 */
  673. srcSize -= 2;
  674. if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
  675. FSE_encodeSymbol(&bitC, &CState2, *--ip);
  676. FSE_encodeSymbol(&bitC, &CState1, *--ip);
  677. FSE_FLUSHBITS(&bitC);
  678. }
  679. /* 2 or 4 encoding per loop */
  680. while (ip > istart) {
  681. FSE_encodeSymbol(&bitC, &CState2, *--ip);
  682. if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
  683. FSE_FLUSHBITS(&bitC);
  684. FSE_encodeSymbol(&bitC, &CState1, *--ip);
  685. if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
  686. FSE_encodeSymbol(&bitC, &CState2, *--ip);
  687. FSE_encodeSymbol(&bitC, &CState1, *--ip);
  688. }
  689. FSE_FLUSHBITS(&bitC);
  690. }
  691. FSE_flushCState(&bitC, &CState2);
  692. FSE_flushCState(&bitC, &CState1);
  693. return BIT_closeCStream(&bitC);
  694. }
  695. size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
  696. {
  697. unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
  698. if (fast)
  699. return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
  700. else
  701. return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
  702. }
  703. size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }