entropy_common.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /*
  2. * Common functions of New Generation Entropy library
  3. * Copyright (C) 2016, Yann Collet.
  4. *
  5. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are
  9. * met:
  10. *
  11. * * Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * * Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following disclaimer
  15. * in the documentation and/or other materials provided with the
  16. * distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * This program is free software; you can redistribute it and/or modify it under
  31. * the terms of the GNU General Public License version 2 as published by the
  32. * Free Software Foundation. This program is dual-licensed; you may select
  33. * either version 2 of the GNU General Public License ("GPL") or BSD license
  34. * ("BSD").
  35. *
  36. * You can contact the author at :
  37. * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  38. */
  39. /* *************************************
  40. * Dependencies
  41. ***************************************/
  42. #include "error_private.h" /* ERR_*, ERROR */
  43. #include "fse.h"
  44. #include "huf.h"
  45. #include "mem.h"
  46. /*=== Version ===*/
  47. unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
  48. /*=== Error Management ===*/
  49. unsigned FSE_isError(size_t code) { return ERR_isError(code); }
  50. unsigned HUF_isError(size_t code) { return ERR_isError(code); }
  51. /*-**************************************************************
  52. * FSE NCount encoding-decoding
  53. ****************************************************************/
  54. size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize)
  55. {
  56. const BYTE *const istart = (const BYTE *)headerBuffer;
  57. const BYTE *const iend = istart + hbSize;
  58. const BYTE *ip = istart;
  59. int nbBits;
  60. int remaining;
  61. int threshold;
  62. U32 bitStream;
  63. int bitCount;
  64. unsigned charnum = 0;
  65. int previous0 = 0;
  66. if (hbSize < 4)
  67. return ERROR(srcSize_wrong);
  68. bitStream = ZSTD_readLE32(ip);
  69. nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
  70. if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
  71. return ERROR(tableLog_tooLarge);
  72. bitStream >>= 4;
  73. bitCount = 4;
  74. *tableLogPtr = nbBits;
  75. remaining = (1 << nbBits) + 1;
  76. threshold = 1 << nbBits;
  77. nbBits++;
  78. while ((remaining > 1) & (charnum <= *maxSVPtr)) {
  79. if (previous0) {
  80. unsigned n0 = charnum;
  81. while ((bitStream & 0xFFFF) == 0xFFFF) {
  82. n0 += 24;
  83. if (ip < iend - 5) {
  84. ip += 2;
  85. bitStream = ZSTD_readLE32(ip) >> bitCount;
  86. } else {
  87. bitStream >>= 16;
  88. bitCount += 16;
  89. }
  90. }
  91. while ((bitStream & 3) == 3) {
  92. n0 += 3;
  93. bitStream >>= 2;
  94. bitCount += 2;
  95. }
  96. n0 += bitStream & 3;
  97. bitCount += 2;
  98. if (n0 > *maxSVPtr)
  99. return ERROR(maxSymbolValue_tooSmall);
  100. while (charnum < n0)
  101. normalizedCounter[charnum++] = 0;
  102. if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
  103. ip += bitCount >> 3;
  104. bitCount &= 7;
  105. bitStream = ZSTD_readLE32(ip) >> bitCount;
  106. } else {
  107. bitStream >>= 2;
  108. }
  109. }
  110. {
  111. int const max = (2 * threshold - 1) - remaining;
  112. int count;
  113. if ((bitStream & (threshold - 1)) < (U32)max) {
  114. count = bitStream & (threshold - 1);
  115. bitCount += nbBits - 1;
  116. } else {
  117. count = bitStream & (2 * threshold - 1);
  118. if (count >= threshold)
  119. count -= max;
  120. bitCount += nbBits;
  121. }
  122. count--; /* extra accuracy */
  123. remaining -= count < 0 ? -count : count; /* -1 means +1 */
  124. normalizedCounter[charnum++] = (short)count;
  125. previous0 = !count;
  126. while (remaining < threshold) {
  127. nbBits--;
  128. threshold >>= 1;
  129. }
  130. if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
  131. ip += bitCount >> 3;
  132. bitCount &= 7;
  133. } else {
  134. bitCount -= (int)(8 * (iend - 4 - ip));
  135. ip = iend - 4;
  136. }
  137. bitStream = ZSTD_readLE32(ip) >> (bitCount & 31);
  138. }
  139. } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
  140. if (remaining != 1)
  141. return ERROR(corruption_detected);
  142. if (bitCount > 32)
  143. return ERROR(corruption_detected);
  144. *maxSVPtr = charnum - 1;
  145. ip += (bitCount + 7) >> 3;
  146. return ip - istart;
  147. }
  148. /*! HUF_readStats() :
  149. Read compact Huffman tree, saved by HUF_writeCTable().
  150. `huffWeight` is destination buffer.
  151. `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
  152. @return : size read from `src` , or an error Code .
  153. Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
  154. */
  155. size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
  156. {
  157. U32 weightTotal;
  158. const BYTE *ip = (const BYTE *)src;
  159. size_t iSize;
  160. size_t oSize;
  161. if (!srcSize)
  162. return ERROR(srcSize_wrong);
  163. iSize = ip[0];
  164. /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
  165. if (iSize >= 128) { /* special header */
  166. oSize = iSize - 127;
  167. iSize = ((oSize + 1) / 2);
  168. if (iSize + 1 > srcSize)
  169. return ERROR(srcSize_wrong);
  170. if (oSize >= hwSize)
  171. return ERROR(corruption_detected);
  172. ip += 1;
  173. {
  174. U32 n;
  175. for (n = 0; n < oSize; n += 2) {
  176. huffWeight[n] = ip[n / 2] >> 4;
  177. huffWeight[n + 1] = ip[n / 2] & 15;
  178. }
  179. }
  180. } else { /* header compressed with FSE (normal case) */
  181. if (iSize + 1 > srcSize)
  182. return ERROR(srcSize_wrong);
  183. oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */
  184. if (FSE_isError(oSize))
  185. return oSize;
  186. }
  187. /* collect weight stats */
  188. memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
  189. weightTotal = 0;
  190. {
  191. U32 n;
  192. for (n = 0; n < oSize; n++) {
  193. if (huffWeight[n] >= HUF_TABLELOG_MAX)
  194. return ERROR(corruption_detected);
  195. rankStats[huffWeight[n]]++;
  196. weightTotal += (1 << huffWeight[n]) >> 1;
  197. }
  198. }
  199. if (weightTotal == 0)
  200. return ERROR(corruption_detected);
  201. /* get last non-null symbol weight (implied, total must be 2^n) */
  202. {
  203. U32 const tableLog = BIT_highbit32(weightTotal) + 1;
  204. if (tableLog > HUF_TABLELOG_MAX)
  205. return ERROR(corruption_detected);
  206. *tableLogPtr = tableLog;
  207. /* determine last weight */
  208. {
  209. U32 const total = 1 << tableLog;
  210. U32 const rest = total - weightTotal;
  211. U32 const verif = 1 << BIT_highbit32(rest);
  212. U32 const lastWeight = BIT_highbit32(rest) + 1;
  213. if (verif != rest)
  214. return ERROR(corruption_detected); /* last value must be a clean power of 2 */
  215. huffWeight[oSize] = (BYTE)lastWeight;
  216. rankStats[lastWeight]++;
  217. }
  218. }
  219. /* check tree construction validity */
  220. if ((rankStats[1] < 2) || (rankStats[1] & 1))
  221. return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
  222. /* results */
  223. *nbSymbolsPtr = (U32)(oSize + 1);
  224. return iSize + 1;
  225. }