zstd_decompress_block.c 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* zstd_decompress_block :
  11. * this module takes care of decompressing _compressed_ block */
  12. /*-*******************************************************
  13. * Dependencies
  14. *********************************************************/
  15. #include <string.h> /* memcpy, memmove, memset */
  16. #include "compiler.h" /* prefetch */
  17. #include "cpu.h" /* bmi2 */
  18. #include "mem.h" /* low level memory routines */
  19. #define FSE_STATIC_LINKING_ONLY
  20. #include "fse.h"
  21. #define HUF_STATIC_LINKING_ONLY
  22. #include "huf.h"
  23. #include "zstd_internal.h"
  24. #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
  25. #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
  26. #include "zstd_decompress_block.h"
  27. /*_*******************************************************
  28. * Macros
  29. **********************************************************/
  30. /* These two optional macros force the use one way or another of the two
  31. * ZSTD_decompressSequences implementations. You can't force in both directions
  32. * at the same time.
  33. */
  34. #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  35. defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  36. #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
  37. #endif
  38. /*_*******************************************************
  39. * Memory operations
  40. **********************************************************/
  41. static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
  42. /*-*************************************************************
  43. * Block decoding
  44. ***************************************************************/
  45. /*! ZSTD_getcBlockSize() :
  46. * Provides the size of compressed block from block header `src` */
  47. size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
  48. blockProperties_t* bpPtr)
  49. {
  50. if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
  51. { U32 const cBlockHeader = MEM_readLE24(src);
  52. U32 const cSize = cBlockHeader >> 3;
  53. bpPtr->lastBlock = cBlockHeader & 1;
  54. bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
  55. bpPtr->origSize = cSize; /* only useful for RLE */
  56. if (bpPtr->blockType == bt_rle) return 1;
  57. if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
  58. return cSize;
  59. }
  60. }
  61. /* Hidden declaration for fullbench */
  62. size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  63. const void* src, size_t srcSize);
  64. /*! ZSTD_decodeLiteralsBlock() :
  65. * @return : nb of bytes read from src (< srcSize )
  66. * note : symbol not declared but exposed for fullbench */
  67. size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  68. const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
  69. {
  70. if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
  71. { const BYTE* const istart = (const BYTE*) src;
  72. symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
  73. switch(litEncType)
  74. {
  75. case set_repeat:
  76. if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
  77. /* fall-through */
  78. case set_compressed:
  79. if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
  80. { size_t lhSize, litSize, litCSize;
  81. U32 singleStream=0;
  82. U32 const lhlCode = (istart[0] >> 2) & 3;
  83. U32 const lhc = MEM_readLE32(istart);
  84. size_t hufSuccess;
  85. switch(lhlCode)
  86. {
  87. case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
  88. /* 2 - 2 - 10 - 10 */
  89. singleStream = !lhlCode;
  90. lhSize = 3;
  91. litSize = (lhc >> 4) & 0x3FF;
  92. litCSize = (lhc >> 14) & 0x3FF;
  93. break;
  94. case 2:
  95. /* 2 - 2 - 14 - 14 */
  96. lhSize = 4;
  97. litSize = (lhc >> 4) & 0x3FFF;
  98. litCSize = lhc >> 18;
  99. break;
  100. case 3:
  101. /* 2 - 2 - 18 - 18 */
  102. lhSize = 5;
  103. litSize = (lhc >> 4) & 0x3FFFF;
  104. litCSize = (lhc >> 22) + (istart[4] << 10);
  105. break;
  106. }
  107. if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
  108. if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
  109. /* prefetch huffman table if cold */
  110. if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
  111. PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
  112. }
  113. if (litEncType==set_repeat) {
  114. if (singleStream) {
  115. hufSuccess = HUF_decompress1X_usingDTable_bmi2(
  116. dctx->litBuffer, litSize, istart+lhSize, litCSize,
  117. dctx->HUFptr, dctx->bmi2);
  118. } else {
  119. hufSuccess = HUF_decompress4X_usingDTable_bmi2(
  120. dctx->litBuffer, litSize, istart+lhSize, litCSize,
  121. dctx->HUFptr, dctx->bmi2);
  122. }
  123. } else {
  124. if (singleStream) {
  125. #if defined(HUF_FORCE_DECOMPRESS_X2)
  126. hufSuccess = HUF_decompress1X_DCtx_wksp(
  127. dctx->entropy.hufTable, dctx->litBuffer, litSize,
  128. istart+lhSize, litCSize, dctx->workspace,
  129. sizeof(dctx->workspace));
  130. #else
  131. hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
  132. dctx->entropy.hufTable, dctx->litBuffer, litSize,
  133. istart+lhSize, litCSize, dctx->workspace,
  134. sizeof(dctx->workspace), dctx->bmi2);
  135. #endif
  136. } else {
  137. hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
  138. dctx->entropy.hufTable, dctx->litBuffer, litSize,
  139. istart+lhSize, litCSize, dctx->workspace,
  140. sizeof(dctx->workspace), dctx->bmi2);
  141. }
  142. }
  143. if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
  144. dctx->litPtr = dctx->litBuffer;
  145. dctx->litSize = litSize;
  146. dctx->litEntropy = 1;
  147. if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
  148. memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
  149. return litCSize + lhSize;
  150. }
  151. case set_basic:
  152. { size_t litSize, lhSize;
  153. U32 const lhlCode = ((istart[0]) >> 2) & 3;
  154. switch(lhlCode)
  155. {
  156. case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
  157. lhSize = 1;
  158. litSize = istart[0] >> 3;
  159. break;
  160. case 1:
  161. lhSize = 2;
  162. litSize = MEM_readLE16(istart) >> 4;
  163. break;
  164. case 3:
  165. lhSize = 3;
  166. litSize = MEM_readLE24(istart) >> 4;
  167. break;
  168. }
  169. if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
  170. if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
  171. memcpy(dctx->litBuffer, istart+lhSize, litSize);
  172. dctx->litPtr = dctx->litBuffer;
  173. dctx->litSize = litSize;
  174. memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
  175. return lhSize+litSize;
  176. }
  177. /* direct reference into compressed stream */
  178. dctx->litPtr = istart+lhSize;
  179. dctx->litSize = litSize;
  180. return lhSize+litSize;
  181. }
  182. case set_rle:
  183. { U32 const lhlCode = ((istart[0]) >> 2) & 3;
  184. size_t litSize, lhSize;
  185. switch(lhlCode)
  186. {
  187. case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
  188. lhSize = 1;
  189. litSize = istart[0] >> 3;
  190. break;
  191. case 1:
  192. lhSize = 2;
  193. litSize = MEM_readLE16(istart) >> 4;
  194. break;
  195. case 3:
  196. lhSize = 3;
  197. litSize = MEM_readLE24(istart) >> 4;
  198. if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
  199. break;
  200. }
  201. if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
  202. memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
  203. dctx->litPtr = dctx->litBuffer;
  204. dctx->litSize = litSize;
  205. return lhSize+1;
  206. }
  207. default:
  208. return ERROR(corruption_detected); /* impossible */
  209. }
  210. }
  211. }
  212. /* Default FSE distribution tables.
  213. * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
  214. * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
  215. * They were generated programmatically with following method :
  216. * - start from default distributions, present in /lib/common/zstd_internal.h
  217. * - generate tables normally, using ZSTD_buildFSETable()
  218. * - printout the content of tables
  219. * - pretify output, report below, test with fuzzer to ensure it's correct */
  220. /* Default FSE distribution table for Literal Lengths */
  221. static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
  222. { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  223. /* nextState, nbAddBits, nbBits, baseVal */
  224. { 0, 0, 4, 0}, { 16, 0, 4, 0},
  225. { 32, 0, 5, 1}, { 0, 0, 5, 3},
  226. { 0, 0, 5, 4}, { 0, 0, 5, 6},
  227. { 0, 0, 5, 7}, { 0, 0, 5, 9},
  228. { 0, 0, 5, 10}, { 0, 0, 5, 12},
  229. { 0, 0, 6, 14}, { 0, 1, 5, 16},
  230. { 0, 1, 5, 20}, { 0, 1, 5, 22},
  231. { 0, 2, 5, 28}, { 0, 3, 5, 32},
  232. { 0, 4, 5, 48}, { 32, 6, 5, 64},
  233. { 0, 7, 5, 128}, { 0, 8, 6, 256},
  234. { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
  235. { 32, 0, 4, 0}, { 0, 0, 4, 1},
  236. { 0, 0, 5, 2}, { 32, 0, 5, 4},
  237. { 0, 0, 5, 5}, { 32, 0, 5, 7},
  238. { 0, 0, 5, 8}, { 32, 0, 5, 10},
  239. { 0, 0, 5, 11}, { 0, 0, 6, 13},
  240. { 32, 1, 5, 16}, { 0, 1, 5, 18},
  241. { 32, 1, 5, 22}, { 0, 2, 5, 24},
  242. { 32, 3, 5, 32}, { 0, 3, 5, 40},
  243. { 0, 6, 4, 64}, { 16, 6, 4, 64},
  244. { 32, 7, 5, 128}, { 0, 9, 6, 512},
  245. { 0, 11, 6, 2048}, { 48, 0, 4, 0},
  246. { 16, 0, 4, 1}, { 32, 0, 5, 2},
  247. { 32, 0, 5, 3}, { 32, 0, 5, 5},
  248. { 32, 0, 5, 6}, { 32, 0, 5, 8},
  249. { 32, 0, 5, 9}, { 32, 0, 5, 11},
  250. { 32, 0, 5, 12}, { 0, 0, 6, 15},
  251. { 32, 1, 5, 18}, { 32, 1, 5, 20},
  252. { 32, 2, 5, 24}, { 32, 2, 5, 28},
  253. { 32, 3, 5, 40}, { 32, 4, 5, 48},
  254. { 0, 16, 6,65536}, { 0, 15, 6,32768},
  255. { 0, 14, 6,16384}, { 0, 13, 6, 8192},
  256. }; /* LL_defaultDTable */
  257. /* Default FSE distribution table for Offset Codes */
  258. static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
  259. { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  260. /* nextState, nbAddBits, nbBits, baseVal */
  261. { 0, 0, 5, 0}, { 0, 6, 4, 61},
  262. { 0, 9, 5, 509}, { 0, 15, 5,32765},
  263. { 0, 21, 5,2097149}, { 0, 3, 5, 5},
  264. { 0, 7, 4, 125}, { 0, 12, 5, 4093},
  265. { 0, 18, 5,262141}, { 0, 23, 5,8388605},
  266. { 0, 5, 5, 29}, { 0, 8, 4, 253},
  267. { 0, 14, 5,16381}, { 0, 20, 5,1048573},
  268. { 0, 2, 5, 1}, { 16, 7, 4, 125},
  269. { 0, 11, 5, 2045}, { 0, 17, 5,131069},
  270. { 0, 22, 5,4194301}, { 0, 4, 5, 13},
  271. { 16, 8, 4, 253}, { 0, 13, 5, 8189},
  272. { 0, 19, 5,524285}, { 0, 1, 5, 1},
  273. { 16, 6, 4, 61}, { 0, 10, 5, 1021},
  274. { 0, 16, 5,65533}, { 0, 28, 5,268435453},
  275. { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
  276. { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
  277. }; /* OF_defaultDTable */
  278. /* Default FSE distribution table for Match Lengths */
  279. static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
  280. { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  281. /* nextState, nbAddBits, nbBits, baseVal */
  282. { 0, 0, 6, 3}, { 0, 0, 4, 4},
  283. { 32, 0, 5, 5}, { 0, 0, 5, 6},
  284. { 0, 0, 5, 8}, { 0, 0, 5, 9},
  285. { 0, 0, 5, 11}, { 0, 0, 6, 13},
  286. { 0, 0, 6, 16}, { 0, 0, 6, 19},
  287. { 0, 0, 6, 22}, { 0, 0, 6, 25},
  288. { 0, 0, 6, 28}, { 0, 0, 6, 31},
  289. { 0, 0, 6, 34}, { 0, 1, 6, 37},
  290. { 0, 1, 6, 41}, { 0, 2, 6, 47},
  291. { 0, 3, 6, 59}, { 0, 4, 6, 83},
  292. { 0, 7, 6, 131}, { 0, 9, 6, 515},
  293. { 16, 0, 4, 4}, { 0, 0, 4, 5},
  294. { 32, 0, 5, 6}, { 0, 0, 5, 7},
  295. { 32, 0, 5, 9}, { 0, 0, 5, 10},
  296. { 0, 0, 6, 12}, { 0, 0, 6, 15},
  297. { 0, 0, 6, 18}, { 0, 0, 6, 21},
  298. { 0, 0, 6, 24}, { 0, 0, 6, 27},
  299. { 0, 0, 6, 30}, { 0, 0, 6, 33},
  300. { 0, 1, 6, 35}, { 0, 1, 6, 39},
  301. { 0, 2, 6, 43}, { 0, 3, 6, 51},
  302. { 0, 4, 6, 67}, { 0, 5, 6, 99},
  303. { 0, 8, 6, 259}, { 32, 0, 4, 4},
  304. { 48, 0, 4, 4}, { 16, 0, 4, 5},
  305. { 32, 0, 5, 7}, { 32, 0, 5, 8},
  306. { 32, 0, 5, 10}, { 32, 0, 5, 11},
  307. { 0, 0, 6, 14}, { 0, 0, 6, 17},
  308. { 0, 0, 6, 20}, { 0, 0, 6, 23},
  309. { 0, 0, 6, 26}, { 0, 0, 6, 29},
  310. { 0, 0, 6, 32}, { 0, 16, 6,65539},
  311. { 0, 15, 6,32771}, { 0, 14, 6,16387},
  312. { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
  313. { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
  314. }; /* ML_defaultDTable */
  315. static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
  316. {
  317. void* ptr = dt;
  318. ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
  319. ZSTD_seqSymbol* const cell = dt + 1;
  320. DTableH->tableLog = 0;
  321. DTableH->fastMode = 0;
  322. cell->nbBits = 0;
  323. cell->nextState = 0;
  324. assert(nbAddBits < 255);
  325. cell->nbAdditionalBits = (BYTE)nbAddBits;
  326. cell->baseValue = baseValue;
  327. }
  328. /* ZSTD_buildFSETable() :
  329. * generate FSE decoding table for one symbol (ll, ml or off)
  330. * cannot fail if input is valid =>
  331. * all inputs are presumed validated at this stage */
  332. void
  333. ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
  334. const short* normalizedCounter, unsigned maxSymbolValue,
  335. const U32* baseValue, const U32* nbAdditionalBits,
  336. unsigned tableLog)
  337. {
  338. ZSTD_seqSymbol* const tableDecode = dt+1;
  339. U16 symbolNext[MaxSeq+1];
  340. U32 const maxSV1 = maxSymbolValue + 1;
  341. U32 const tableSize = 1 << tableLog;
  342. U32 highThreshold = tableSize-1;
  343. /* Sanity Checks */
  344. assert(maxSymbolValue <= MaxSeq);
  345. assert(tableLog <= MaxFSELog);
  346. /* Init, lay down lowprob symbols */
  347. { ZSTD_seqSymbol_header DTableH;
  348. DTableH.tableLog = tableLog;
  349. DTableH.fastMode = 1;
  350. { S16 const largeLimit= (S16)(1 << (tableLog-1));
  351. U32 s;
  352. for (s=0; s<maxSV1; s++) {
  353. if (normalizedCounter[s]==-1) {
  354. tableDecode[highThreshold--].baseValue = s;
  355. symbolNext[s] = 1;
  356. } else {
  357. if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
  358. symbolNext[s] = normalizedCounter[s];
  359. } } }
  360. memcpy(dt, &DTableH, sizeof(DTableH));
  361. }
  362. /* Spread symbols */
  363. { U32 const tableMask = tableSize-1;
  364. U32 const step = FSE_TABLESTEP(tableSize);
  365. U32 s, position = 0;
  366. for (s=0; s<maxSV1; s++) {
  367. int i;
  368. for (i=0; i<normalizedCounter[s]; i++) {
  369. tableDecode[position].baseValue = s;
  370. position = (position + step) & tableMask;
  371. while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
  372. } }
  373. assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
  374. }
  375. /* Build Decoding table */
  376. { U32 u;
  377. for (u=0; u<tableSize; u++) {
  378. U32 const symbol = tableDecode[u].baseValue;
  379. U32 const nextState = symbolNext[symbol]++;
  380. tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
  381. tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
  382. assert(nbAdditionalBits[symbol] < 255);
  383. tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
  384. tableDecode[u].baseValue = baseValue[symbol];
  385. } }
  386. }
  387. /*! ZSTD_buildSeqTable() :
  388. * @return : nb bytes read from src,
  389. * or an error code if it fails */
  390. static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
  391. symbolEncodingType_e type, unsigned max, U32 maxLog,
  392. const void* src, size_t srcSize,
  393. const U32* baseValue, const U32* nbAdditionalBits,
  394. const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
  395. int ddictIsCold, int nbSeq)
  396. {
  397. switch(type)
  398. {
  399. case set_rle :
  400. if (!srcSize) return ERROR(srcSize_wrong);
  401. if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
  402. { U32 const symbol = *(const BYTE*)src;
  403. U32 const baseline = baseValue[symbol];
  404. U32 const nbBits = nbAdditionalBits[symbol];
  405. ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
  406. }
  407. *DTablePtr = DTableSpace;
  408. return 1;
  409. case set_basic :
  410. *DTablePtr = defaultTable;
  411. return 0;
  412. case set_repeat:
  413. if (!flagRepeatTable) return ERROR(corruption_detected);
  414. /* prefetch FSE table if used */
  415. if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
  416. const void* const pStart = *DTablePtr;
  417. size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
  418. PREFETCH_AREA(pStart, pSize);
  419. }
  420. return 0;
  421. case set_compressed :
  422. { unsigned tableLog;
  423. S16 norm[MaxSeq+1];
  424. size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
  425. if (FSE_isError(headerSize)) return ERROR(corruption_detected);
  426. if (tableLog > maxLog) return ERROR(corruption_detected);
  427. ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
  428. *DTablePtr = DTableSpace;
  429. return headerSize;
  430. }
  431. default : /* impossible */
  432. assert(0);
  433. return ERROR(GENERIC);
  434. }
  435. }
  436. size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
  437. const void* src, size_t srcSize)
  438. {
  439. const BYTE* const istart = (const BYTE* const)src;
  440. const BYTE* const iend = istart + srcSize;
  441. const BYTE* ip = istart;
  442. int nbSeq;
  443. DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
  444. /* check */
  445. if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
  446. /* SeqHead */
  447. nbSeq = *ip++;
  448. if (!nbSeq) {
  449. *nbSeqPtr=0;
  450. if (srcSize != 1) return ERROR(srcSize_wrong);
  451. return 1;
  452. }
  453. if (nbSeq > 0x7F) {
  454. if (nbSeq == 0xFF) {
  455. if (ip+2 > iend) return ERROR(srcSize_wrong);
  456. nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
  457. } else {
  458. if (ip >= iend) return ERROR(srcSize_wrong);
  459. nbSeq = ((nbSeq-0x80)<<8) + *ip++;
  460. }
  461. }
  462. *nbSeqPtr = nbSeq;
  463. /* FSE table descriptors */
  464. if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
  465. { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
  466. symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
  467. symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
  468. ip++;
  469. /* Build DTables */
  470. { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
  471. LLtype, MaxLL, LLFSELog,
  472. ip, iend-ip,
  473. LL_base, LL_bits,
  474. LL_defaultDTable, dctx->fseEntropy,
  475. dctx->ddictIsCold, nbSeq);
  476. if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
  477. ip += llhSize;
  478. }
  479. { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
  480. OFtype, MaxOff, OffFSELog,
  481. ip, iend-ip,
  482. OF_base, OF_bits,
  483. OF_defaultDTable, dctx->fseEntropy,
  484. dctx->ddictIsCold, nbSeq);
  485. if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
  486. ip += ofhSize;
  487. }
  488. { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
  489. MLtype, MaxML, MLFSELog,
  490. ip, iend-ip,
  491. ML_base, ML_bits,
  492. ML_defaultDTable, dctx->fseEntropy,
  493. dctx->ddictIsCold, nbSeq);
  494. if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
  495. ip += mlhSize;
  496. }
  497. }
  498. return ip-istart;
  499. }
  500. typedef struct {
  501. size_t litLength;
  502. size_t matchLength;
  503. size_t offset;
  504. const BYTE* match;
  505. } seq_t;
  506. typedef struct {
  507. size_t state;
  508. const ZSTD_seqSymbol* table;
  509. } ZSTD_fseState;
  510. typedef struct {
  511. BIT_DStream_t DStream;
  512. ZSTD_fseState stateLL;
  513. ZSTD_fseState stateOffb;
  514. ZSTD_fseState stateML;
  515. size_t prevOffset[ZSTD_REP_NUM];
  516. const BYTE* prefixStart;
  517. const BYTE* dictEnd;
  518. size_t pos;
  519. } seqState_t;
  520. /* ZSTD_execSequenceLast7():
  521. * exceptional case : decompress a match starting within last 7 bytes of output buffer.
  522. * requires more careful checks, to ensure there is no overflow.
  523. * performance does not matter though.
  524. * note : this case is supposed to be never generated "naturally" by reference encoder,
  525. * since in most cases it needs at least 8 bytes to look for a match.
  526. * but it's allowed by the specification. */
  527. FORCE_NOINLINE
  528. size_t ZSTD_execSequenceLast7(BYTE* op,
  529. BYTE* const oend, seq_t sequence,
  530. const BYTE** litPtr, const BYTE* const litLimit,
  531. const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
  532. {
  533. BYTE* const oLitEnd = op + sequence.litLength;
  534. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  535. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  536. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  537. const BYTE* match = oLitEnd - sequence.offset;
  538. /* check */
  539. if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
  540. if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
  541. /* copy literals */
  542. while (op < oLitEnd) *op++ = *(*litPtr)++;
  543. /* copy Match */
  544. if (sequence.offset > (size_t)(oLitEnd - base)) {
  545. /* offset beyond prefix */
  546. if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
  547. match = dictEnd - (base-match);
  548. if (match + sequence.matchLength <= dictEnd) {
  549. memmove(oLitEnd, match, sequence.matchLength);
  550. return sequenceLength;
  551. }
  552. /* span extDict & currentPrefixSegment */
  553. { size_t const length1 = dictEnd - match;
  554. memmove(oLitEnd, match, length1);
  555. op = oLitEnd + length1;
  556. sequence.matchLength -= length1;
  557. match = base;
  558. } }
  559. while (op < oMatchEnd) *op++ = *match++;
  560. return sequenceLength;
  561. }
  562. HINT_INLINE
  563. size_t ZSTD_execSequence(BYTE* op,
  564. BYTE* const oend, seq_t sequence,
  565. const BYTE** litPtr, const BYTE* const litLimit,
  566. const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
  567. {
  568. BYTE* const oLitEnd = op + sequence.litLength;
  569. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  570. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  571. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
  572. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  573. const BYTE* match = oLitEnd - sequence.offset;
  574. /* check */
  575. if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
  576. if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
  577. if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
  578. /* copy Literals */
  579. ZSTD_copy8(op, *litPtr);
  580. if (sequence.litLength > 8)
  581. ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
  582. op = oLitEnd;
  583. *litPtr = iLitEnd; /* update for next sequence */
  584. /* copy Match */
  585. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  586. /* offset beyond prefix -> go into extDict */
  587. if (sequence.offset > (size_t)(oLitEnd - virtualStart))
  588. return ERROR(corruption_detected);
  589. match = dictEnd + (match - prefixStart);
  590. if (match + sequence.matchLength <= dictEnd) {
  591. memmove(oLitEnd, match, sequence.matchLength);
  592. return sequenceLength;
  593. }
  594. /* span extDict & currentPrefixSegment */
  595. { size_t const length1 = dictEnd - match;
  596. memmove(oLitEnd, match, length1);
  597. op = oLitEnd + length1;
  598. sequence.matchLength -= length1;
  599. match = prefixStart;
  600. if (op > oend_w || sequence.matchLength < MINMATCH) {
  601. U32 i;
  602. for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
  603. return sequenceLength;
  604. }
  605. } }
  606. /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
  607. /* match within prefix */
  608. if (sequence.offset < 8) {
  609. /* close range match, overlap */
  610. static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
  611. static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
  612. int const sub2 = dec64table[sequence.offset];
  613. op[0] = match[0];
  614. op[1] = match[1];
  615. op[2] = match[2];
  616. op[3] = match[3];
  617. match += dec32table[sequence.offset];
  618. ZSTD_copy4(op+4, match);
  619. match -= sub2;
  620. } else {
  621. ZSTD_copy8(op, match);
  622. }
  623. op += 8; match += 8;
  624. if (oMatchEnd > oend-(16-MINMATCH)) {
  625. if (op < oend_w) {
  626. ZSTD_wildcopy(op, match, oend_w - op);
  627. match += oend_w - op;
  628. op = oend_w;
  629. }
  630. while (op < oMatchEnd) *op++ = *match++;
  631. } else {
  632. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
  633. }
  634. return sequenceLength;
  635. }
  636. HINT_INLINE
  637. size_t ZSTD_execSequenceLong(BYTE* op,
  638. BYTE* const oend, seq_t sequence,
  639. const BYTE** litPtr, const BYTE* const litLimit,
  640. const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
  641. {
  642. BYTE* const oLitEnd = op + sequence.litLength;
  643. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  644. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  645. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
  646. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  647. const BYTE* match = sequence.match;
  648. /* check */
  649. if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
  650. if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
  651. if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
  652. /* copy Literals */
  653. ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
  654. if (sequence.litLength > 8)
  655. ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
  656. op = oLitEnd;
  657. *litPtr = iLitEnd; /* update for next sequence */
  658. /* copy Match */
  659. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  660. /* offset beyond prefix */
  661. if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
  662. if (match + sequence.matchLength <= dictEnd) {
  663. memmove(oLitEnd, match, sequence.matchLength);
  664. return sequenceLength;
  665. }
  666. /* span extDict & currentPrefixSegment */
  667. { size_t const length1 = dictEnd - match;
  668. memmove(oLitEnd, match, length1);
  669. op = oLitEnd + length1;
  670. sequence.matchLength -= length1;
  671. match = prefixStart;
  672. if (op > oend_w || sequence.matchLength < MINMATCH) {
  673. U32 i;
  674. for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
  675. return sequenceLength;
  676. }
  677. } }
  678. assert(op <= oend_w);
  679. assert(sequence.matchLength >= MINMATCH);
  680. /* match within prefix */
  681. if (sequence.offset < 8) {
  682. /* close range match, overlap */
  683. static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
  684. static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
  685. int const sub2 = dec64table[sequence.offset];
  686. op[0] = match[0];
  687. op[1] = match[1];
  688. op[2] = match[2];
  689. op[3] = match[3];
  690. match += dec32table[sequence.offset];
  691. ZSTD_copy4(op+4, match);
  692. match -= sub2;
  693. } else {
  694. ZSTD_copy8(op, match);
  695. }
  696. op += 8; match += 8;
  697. if (oMatchEnd > oend-(16-MINMATCH)) {
  698. if (op < oend_w) {
  699. ZSTD_wildcopy(op, match, oend_w - op);
  700. match += oend_w - op;
  701. op = oend_w;
  702. }
  703. while (op < oMatchEnd) *op++ = *match++;
  704. } else {
  705. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
  706. }
  707. return sequenceLength;
  708. }
  709. static void
  710. ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
  711. {
  712. const void* ptr = dt;
  713. const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
  714. DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
  715. DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
  716. (U32)DStatePtr->state, DTableH->tableLog);
  717. BIT_reloadDStream(bitD);
  718. DStatePtr->table = dt + 1;
  719. }
  720. FORCE_INLINE_TEMPLATE void
  721. ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
  722. {
  723. ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
  724. U32 const nbBits = DInfo.nbBits;
  725. size_t const lowBits = BIT_readBits(bitD, nbBits);
  726. DStatePtr->state = DInfo.nextState + lowBits;
  727. }
  728. /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
  729. * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
  730. * bits before reloading. This value is the maximum number of bytes we read
  731. * after reloading when we are decoding long offets.
  732. */
  733. #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
  734. (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
  735. ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
  736. : 0)
  737. typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
  738. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  739. FORCE_INLINE_TEMPLATE seq_t
  740. ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
  741. {
  742. seq_t seq;
  743. U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
  744. U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
  745. U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
  746. U32 const totalBits = llBits+mlBits+ofBits;
  747. U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
  748. U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
  749. U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
  750. /* sequence */
  751. { size_t offset;
  752. if (!ofBits)
  753. offset = 0;
  754. else {
  755. ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
  756. ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
  757. assert(ofBits <= MaxOff);
  758. if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
  759. U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
  760. offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
  761. BIT_reloadDStream(&seqState->DStream);
  762. if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
  763. assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
  764. } else {
  765. offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
  766. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
  767. }
  768. }
  769. if (ofBits <= 1) {
  770. offset += (llBase==0);
  771. if (offset) {
  772. size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
  773. temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
  774. if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
  775. seqState->prevOffset[1] = seqState->prevOffset[0];
  776. seqState->prevOffset[0] = offset = temp;
  777. } else { /* offset == 0 */
  778. offset = seqState->prevOffset[0];
  779. }
  780. } else {
  781. seqState->prevOffset[2] = seqState->prevOffset[1];
  782. seqState->prevOffset[1] = seqState->prevOffset[0];
  783. seqState->prevOffset[0] = offset;
  784. }
  785. seq.offset = offset;
  786. }
  787. seq.matchLength = mlBase
  788. + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
  789. if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
  790. BIT_reloadDStream(&seqState->DStream);
  791. if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
  792. BIT_reloadDStream(&seqState->DStream);
  793. /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
  794. ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
  795. seq.litLength = llBase
  796. + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
  797. if (MEM_32bits())
  798. BIT_reloadDStream(&seqState->DStream);
  799. DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
  800. (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
  801. /* ANS state update */
  802. ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
  803. ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
  804. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
  805. ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
  806. return seq;
  807. }
  808. FORCE_INLINE_TEMPLATE size_t
  809. ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
  810. void* dst, size_t maxDstSize,
  811. const void* seqStart, size_t seqSize, int nbSeq,
  812. const ZSTD_longOffset_e isLongOffset)
  813. {
  814. const BYTE* ip = (const BYTE*)seqStart;
  815. const BYTE* const iend = ip + seqSize;
  816. BYTE* const ostart = (BYTE* const)dst;
  817. BYTE* const oend = ostart + maxDstSize;
  818. BYTE* op = ostart;
  819. const BYTE* litPtr = dctx->litPtr;
  820. const BYTE* const litEnd = litPtr + dctx->litSize;
  821. const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
  822. const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
  823. const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
  824. DEBUGLOG(5, "ZSTD_decompressSequences_body");
  825. /* Regen sequences */
  826. if (nbSeq) {
  827. seqState_t seqState;
  828. dctx->fseEntropy = 1;
  829. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  830. CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
  831. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  832. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  833. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  834. for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
  835. nbSeq--;
  836. { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  837. size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
  838. DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
  839. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  840. op += oneSeqSize;
  841. } }
  842. /* check if reached exact end */
  843. DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
  844. if (nbSeq) return ERROR(corruption_detected);
  845. /* save reps for next block */
  846. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  847. }
  848. /* last literal segment */
  849. { size_t const lastLLSize = litEnd - litPtr;
  850. if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
  851. memcpy(op, litPtr, lastLLSize);
  852. op += lastLLSize;
  853. }
  854. return op-ostart;
  855. }
  856. static size_t
  857. ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
  858. void* dst, size_t maxDstSize,
  859. const void* seqStart, size_t seqSize, int nbSeq,
  860. const ZSTD_longOffset_e isLongOffset)
  861. {
  862. return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  863. }
  864. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
  865. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  866. FORCE_INLINE_TEMPLATE seq_t
  867. ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
  868. {
  869. seq_t seq;
  870. U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
  871. U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
  872. U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
  873. U32 const totalBits = llBits+mlBits+ofBits;
  874. U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
  875. U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
  876. U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
  877. /* sequence */
  878. { size_t offset;
  879. if (!ofBits)
  880. offset = 0;
  881. else {
  882. ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
  883. ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
  884. assert(ofBits <= MaxOff);
  885. if (MEM_32bits() && longOffsets) {
  886. U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
  887. offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
  888. if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
  889. if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
  890. } else {
  891. offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
  892. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
  893. }
  894. }
  895. if (ofBits <= 1) {
  896. offset += (llBase==0);
  897. if (offset) {
  898. size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
  899. temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
  900. if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
  901. seqState->prevOffset[1] = seqState->prevOffset[0];
  902. seqState->prevOffset[0] = offset = temp;
  903. } else {
  904. offset = seqState->prevOffset[0];
  905. }
  906. } else {
  907. seqState->prevOffset[2] = seqState->prevOffset[1];
  908. seqState->prevOffset[1] = seqState->prevOffset[0];
  909. seqState->prevOffset[0] = offset;
  910. }
  911. seq.offset = offset;
  912. }
  913. seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
  914. if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
  915. BIT_reloadDStream(&seqState->DStream);
  916. if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
  917. BIT_reloadDStream(&seqState->DStream);
  918. /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
  919. ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
  920. seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
  921. if (MEM_32bits())
  922. BIT_reloadDStream(&seqState->DStream);
  923. { size_t const pos = seqState->pos + seq.litLength;
  924. const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
  925. seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
  926. * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
  927. seqState->pos = pos + seq.matchLength;
  928. }
  929. /* ANS state update */
  930. ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
  931. ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
  932. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
  933. ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
  934. return seq;
  935. }
  936. FORCE_INLINE_TEMPLATE size_t
  937. ZSTD_decompressSequencesLong_body(
  938. ZSTD_DCtx* dctx,
  939. void* dst, size_t maxDstSize,
  940. const void* seqStart, size_t seqSize, int nbSeq,
  941. const ZSTD_longOffset_e isLongOffset)
  942. {
  943. const BYTE* ip = (const BYTE*)seqStart;
  944. const BYTE* const iend = ip + seqSize;
  945. BYTE* const ostart = (BYTE* const)dst;
  946. BYTE* const oend = ostart + maxDstSize;
  947. BYTE* op = ostart;
  948. const BYTE* litPtr = dctx->litPtr;
  949. const BYTE* const litEnd = litPtr + dctx->litSize;
  950. const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
  951. const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
  952. const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
  953. /* Regen sequences */
  954. if (nbSeq) {
  955. #define STORED_SEQS 4
  956. #define STORED_SEQS_MASK (STORED_SEQS-1)
  957. #define ADVANCED_SEQS 4
  958. seq_t sequences[STORED_SEQS];
  959. int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
  960. seqState_t seqState;
  961. int seqNb;
  962. dctx->fseEntropy = 1;
  963. { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  964. seqState.prefixStart = prefixStart;
  965. seqState.pos = (size_t)(op-prefixStart);
  966. seqState.dictEnd = dictEnd;
  967. assert(iend >= ip);
  968. CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
  969. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  970. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  971. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  972. /* prepare in advance */
  973. for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
  974. sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
  975. PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
  976. }
  977. if (seqNb<seqAdvance) return ERROR(corruption_detected);
  978. /* decode and decompress */
  979. for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
  980. seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
  981. size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
  982. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  983. PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
  984. sequences[seqNb & STORED_SEQS_MASK] = sequence;
  985. op += oneSeqSize;
  986. }
  987. if (seqNb<nbSeq) return ERROR(corruption_detected);
  988. /* finish queue */
  989. seqNb -= seqAdvance;
  990. for ( ; seqNb<nbSeq ; seqNb++) {
  991. size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
  992. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  993. op += oneSeqSize;
  994. }
  995. /* save reps for next block */
  996. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  997. }
  998. /* last literal segment */
  999. { size_t const lastLLSize = litEnd - litPtr;
  1000. if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
  1001. memcpy(op, litPtr, lastLLSize);
  1002. op += lastLLSize;
  1003. }
  1004. return op-ostart;
  1005. }
  1006. static size_t
  1007. ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
  1008. void* dst, size_t maxDstSize,
  1009. const void* seqStart, size_t seqSize, int nbSeq,
  1010. const ZSTD_longOffset_e isLongOffset)
  1011. {
  1012. return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1013. }
  1014. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
  1015. #if DYNAMIC_BMI2
  1016. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1017. static TARGET_ATTRIBUTE("bmi2") size_t
  1018. ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
  1019. void* dst, size_t maxDstSize,
  1020. const void* seqStart, size_t seqSize, int nbSeq,
  1021. const ZSTD_longOffset_e isLongOffset)
  1022. {
  1023. return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1024. }
  1025. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
  1026. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1027. static TARGET_ATTRIBUTE("bmi2") size_t
  1028. ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
  1029. void* dst, size_t maxDstSize,
  1030. const void* seqStart, size_t seqSize, int nbSeq,
  1031. const ZSTD_longOffset_e isLongOffset)
  1032. {
  1033. return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1034. }
  1035. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
  1036. #endif /* DYNAMIC_BMI2 */
  1037. typedef size_t (*ZSTD_decompressSequences_t)(
  1038. ZSTD_DCtx* dctx,
  1039. void* dst, size_t maxDstSize,
  1040. const void* seqStart, size_t seqSize, int nbSeq,
  1041. const ZSTD_longOffset_e isLongOffset);
  1042. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1043. static size_t
  1044. ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
  1045. const void* seqStart, size_t seqSize, int nbSeq,
  1046. const ZSTD_longOffset_e isLongOffset)
  1047. {
  1048. DEBUGLOG(5, "ZSTD_decompressSequences");
  1049. #if DYNAMIC_BMI2
  1050. if (dctx->bmi2) {
  1051. return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1052. }
  1053. #endif
  1054. return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1055. }
  1056. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
  1057. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1058. /* ZSTD_decompressSequencesLong() :
  1059. * decompression function triggered when a minimum share of offsets is considered "long",
  1060. * aka out of cache.
  1061. * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
  1062. * This function will try to mitigate main memory latency through the use of prefetching */
  1063. static size_t
  1064. ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
  1065. void* dst, size_t maxDstSize,
  1066. const void* seqStart, size_t seqSize, int nbSeq,
  1067. const ZSTD_longOffset_e isLongOffset)
  1068. {
  1069. DEBUGLOG(5, "ZSTD_decompressSequencesLong");
  1070. #if DYNAMIC_BMI2
  1071. if (dctx->bmi2) {
  1072. return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1073. }
  1074. #endif
  1075. return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1076. }
  1077. #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
  1078. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1079. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1080. /* ZSTD_getLongOffsetsShare() :
  1081. * condition : offTable must be valid
  1082. * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
  1083. * compared to maximum possible of (1<<OffFSELog) */
  1084. static unsigned
  1085. ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
  1086. {
  1087. const void* ptr = offTable;
  1088. U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
  1089. const ZSTD_seqSymbol* table = offTable + 1;
  1090. U32 const max = 1 << tableLog;
  1091. U32 u, total = 0;
  1092. DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
  1093. assert(max <= (1 << OffFSELog)); /* max not too large */
  1094. for (u=0; u<max; u++) {
  1095. if (table[u].nbAdditionalBits > 22) total += 1;
  1096. }
  1097. assert(tableLog <= OffFSELog);
  1098. total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
  1099. return total;
  1100. }
  1101. #endif
  1102. size_t
  1103. ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
  1104. void* dst, size_t dstCapacity,
  1105. const void* src, size_t srcSize, const int frame)
  1106. { /* blockType == blockCompressed */
  1107. const BYTE* ip = (const BYTE*)src;
  1108. /* isLongOffset must be true if there are long offsets.
  1109. * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
  1110. * We don't expect that to be the case in 64-bit mode.
  1111. * In block mode, window size is not known, so we have to be conservative.
  1112. * (note: but it could be evaluated from current-lowLimit)
  1113. */
  1114. ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
  1115. DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
  1116. if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
  1117. /* Decode literals section */
  1118. { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
  1119. DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
  1120. if (ZSTD_isError(litCSize)) return litCSize;
  1121. ip += litCSize;
  1122. srcSize -= litCSize;
  1123. }
  1124. /* Build Decoding Tables */
  1125. {
  1126. /* These macros control at build-time which decompressor implementation
  1127. * we use. If neither is defined, we do some inspection and dispatch at
  1128. * runtime.
  1129. */
  1130. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1131. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1132. int usePrefetchDecoder = dctx->ddictIsCold;
  1133. #endif
  1134. int nbSeq;
  1135. size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
  1136. if (ZSTD_isError(seqHSize)) return seqHSize;
  1137. ip += seqHSize;
  1138. srcSize -= seqHSize;
  1139. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1140. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1141. if ( !usePrefetchDecoder
  1142. && (!frame || (dctx->fParams.windowSize > (1<<24)))
  1143. && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
  1144. U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
  1145. U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
  1146. usePrefetchDecoder = (shareLongOffsets >= minShare);
  1147. }
  1148. #endif
  1149. dctx->ddictIsCold = 0;
  1150. #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
  1151. !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
  1152. if (usePrefetchDecoder)
  1153. #endif
  1154. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
  1155. return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
  1156. #endif
  1157. #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
  1158. /* else */
  1159. return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
  1160. #endif
  1161. }
  1162. }
  1163. size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
  1164. void* dst, size_t dstCapacity,
  1165. const void* src, size_t srcSize)
  1166. {
  1167. size_t dSize;
  1168. ZSTD_checkContinuity(dctx, dst);
  1169. dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
  1170. dctx->previousDstEnd = (char*)dst + dSize;
  1171. return dSize;
  1172. }