zstd_decompress.c 128 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* ***************************************************************
  11. * Tuning parameters
  12. *****************************************************************/
  13. /*!
  14. * HEAPMODE :
  15. * Select how default decompression function ZSTD_decompress() allocates its context,
  16. * on stack (0), or into heap (1, default; requires malloc()).
  17. * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
  18. */
  19. #ifndef ZSTD_HEAPMODE
  20. # define ZSTD_HEAPMODE 1
  21. #endif
  22. /*!
  23. * LEGACY_SUPPORT :
  24. * if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
  25. */
  26. #ifndef ZSTD_LEGACY_SUPPORT
  27. # define ZSTD_LEGACY_SUPPORT 0
  28. #endif
  29. /*!
  30. * MAXWINDOWSIZE_DEFAULT :
  31. * maximum window size accepted by DStream __by default__.
  32. * Frames requiring more memory will be rejected.
  33. * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
  34. */
  35. #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
  36. # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
  37. #endif
  38. /*!
  39. * NO_FORWARD_PROGRESS_MAX :
  40. * maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic()
  41. * without any forward progress
  42. * (defined as: no byte read from input, and no byte flushed to output)
  43. * before triggering an error.
  44. */
  45. #ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
  46. # define ZSTD_NO_FORWARD_PROGRESS_MAX 16
  47. #endif
  48. /*-*******************************************************
  49. * Dependencies
  50. *********************************************************/
  51. #include <string.h> /* memcpy, memmove, memset */
  52. #include "compiler.h" /* prefetch */
  53. #include "cpu.h" /* bmi2 */
  54. #include "mem.h" /* low level memory routines */
  55. #define FSE_STATIC_LINKING_ONLY
  56. #include "fse.h"
  57. #define HUF_STATIC_LINKING_ONLY
  58. #include "huf.h"
  59. #include "zstd_internal.h"
  60. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
  61. # include "zstd_legacy.h"
  62. #endif
  63. static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
  64. static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
  65. /*-*************************************
  66. * Errors
  67. ***************************************/
  68. #define ZSTD_isError ERR_isError /* for inlining */
  69. #define FSE_isError ERR_isError
  70. #define HUF_isError ERR_isError
  71. /*_*******************************************************
  72. * Memory operations
  73. **********************************************************/
  74. static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
  75. /*-*************************************************************
  76. * Context management
  77. ***************************************************************/
  78. typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
  79. ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
  80. ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
  81. ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
  82. typedef enum { zdss_init=0, zdss_loadHeader,
  83. zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
  84. typedef struct {
  85. U32 fastMode;
  86. U32 tableLog;
  87. } ZSTD_seqSymbol_header;
  88. typedef struct {
  89. U16 nextState;
  90. BYTE nbAdditionalBits;
  91. BYTE nbBits;
  92. U32 baseValue;
  93. } ZSTD_seqSymbol;
  94. #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
  95. typedef struct {
  96. ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
  97. ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
  98. ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
  99. HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
  100. U32 rep[ZSTD_REP_NUM];
  101. } ZSTD_entropyDTables_t;
  102. struct ZSTD_DCtx_s
  103. {
  104. const ZSTD_seqSymbol* LLTptr;
  105. const ZSTD_seqSymbol* MLTptr;
  106. const ZSTD_seqSymbol* OFTptr;
  107. const HUF_DTable* HUFptr;
  108. ZSTD_entropyDTables_t entropy;
  109. U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
  110. const void* previousDstEnd; /* detect continuity */
  111. const void* prefixStart; /* start of current segment */
  112. const void* virtualStart; /* virtual start of previous segment if it was just before current one */
  113. const void* dictEnd; /* end of previous segment */
  114. size_t expected;
  115. ZSTD_frameHeader fParams;
  116. U64 decodedSize;
  117. blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
  118. ZSTD_dStage stage;
  119. U32 litEntropy;
  120. U32 fseEntropy;
  121. XXH64_state_t xxhState;
  122. size_t headerSize;
  123. ZSTD_format_e format;
  124. const BYTE* litPtr;
  125. ZSTD_customMem customMem;
  126. size_t litSize;
  127. size_t rleSize;
  128. size_t staticSize;
  129. int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
  130. /* dictionary */
  131. ZSTD_DDict* ddictLocal;
  132. const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
  133. U32 dictID;
  134. int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
  135. /* streaming */
  136. ZSTD_dStreamStage streamStage;
  137. char* inBuff;
  138. size_t inBuffSize;
  139. size_t inPos;
  140. size_t maxWindowSize;
  141. char* outBuff;
  142. size_t outBuffSize;
  143. size_t outStart;
  144. size_t outEnd;
  145. size_t lhSize;
  146. void* legacyContext;
  147. U32 previousLegacyVersion;
  148. U32 legacyVersion;
  149. U32 hostageByte;
  150. int noForwardProgress;
  151. /* workspace */
  152. BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
  153. BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
  154. }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
  155. size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
  156. {
  157. if (dctx==NULL) return 0; /* support sizeof NULL */
  158. return sizeof(*dctx)
  159. + ZSTD_sizeof_DDict(dctx->ddictLocal)
  160. + dctx->inBuffSize + dctx->outBuffSize;
  161. }
  162. size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
  163. static size_t ZSTD_startingInputLength(ZSTD_format_e format)
  164. {
  165. size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
  166. ZSTD_frameHeaderSize_prefix - ZSTD_FRAMEIDSIZE :
  167. ZSTD_frameHeaderSize_prefix;
  168. ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
  169. /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
  170. assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
  171. return startingInputLength;
  172. }
  173. static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
  174. {
  175. dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
  176. dctx->staticSize = 0;
  177. dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
  178. dctx->ddict = NULL;
  179. dctx->ddictLocal = NULL;
  180. dctx->dictEnd = NULL;
  181. dctx->ddictIsCold = 0;
  182. dctx->inBuff = NULL;
  183. dctx->inBuffSize = 0;
  184. dctx->outBuffSize = 0;
  185. dctx->streamStage = zdss_init;
  186. dctx->legacyContext = NULL;
  187. dctx->previousLegacyVersion = 0;
  188. dctx->noForwardProgress = 0;
  189. dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
  190. }
  191. ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
  192. {
  193. ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
  194. if ((size_t)workspace & 7) return NULL; /* 8-aligned */
  195. if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */
  196. ZSTD_initDCtx_internal(dctx);
  197. dctx->staticSize = workspaceSize;
  198. dctx->inBuff = (char*)(dctx+1);
  199. return dctx;
  200. }
  201. ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
  202. {
  203. if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
  204. { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
  205. if (!dctx) return NULL;
  206. dctx->customMem = customMem;
  207. ZSTD_initDCtx_internal(dctx);
  208. return dctx;
  209. }
  210. }
  211. ZSTD_DCtx* ZSTD_createDCtx(void)
  212. {
  213. DEBUGLOG(3, "ZSTD_createDCtx");
  214. return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
  215. }
  216. size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
  217. {
  218. if (dctx==NULL) return 0; /* support free on NULL */
  219. if (dctx->staticSize) return ERROR(memory_allocation); /* not compatible with static DCtx */
  220. { ZSTD_customMem const cMem = dctx->customMem;
  221. ZSTD_freeDDict(dctx->ddictLocal);
  222. dctx->ddictLocal = NULL;
  223. ZSTD_free(dctx->inBuff, cMem);
  224. dctx->inBuff = NULL;
  225. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
  226. if (dctx->legacyContext)
  227. ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
  228. #endif
  229. ZSTD_free(dctx, cMem);
  230. return 0;
  231. }
  232. }
  233. /* no longer useful */
  234. void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
  235. {
  236. size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
  237. memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
  238. }
  239. /*-*************************************************************
  240. * Frame header decoding
  241. ***************************************************************/
  242. /*! ZSTD_isFrame() :
  243. * Tells if the content of `buffer` starts with a valid Frame Identifier.
  244. * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
  245. * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
  246. * Note 3 : Skippable Frame Identifiers are considered valid. */
  247. unsigned ZSTD_isFrame(const void* buffer, size_t size)
  248. {
  249. if (size < ZSTD_FRAMEIDSIZE) return 0;
  250. { U32 const magic = MEM_readLE32(buffer);
  251. if (magic == ZSTD_MAGICNUMBER) return 1;
  252. if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
  253. }
  254. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
  255. if (ZSTD_isLegacy(buffer, size)) return 1;
  256. #endif
  257. return 0;
  258. }
  259. /** ZSTD_frameHeaderSize_internal() :
  260. * srcSize must be large enough to reach header size fields.
  261. * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
  262. * @return : size of the Frame Header
  263. * or an error code, which can be tested with ZSTD_isError() */
  264. static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
  265. {
  266. size_t const minInputSize = ZSTD_startingInputLength(format);
  267. if (srcSize < minInputSize) return ERROR(srcSize_wrong);
  268. { BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
  269. U32 const dictID= fhd & 3;
  270. U32 const singleSegment = (fhd >> 5) & 1;
  271. U32 const fcsId = fhd >> 6;
  272. return minInputSize + !singleSegment
  273. + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
  274. + (singleSegment && !fcsId);
  275. }
  276. }
  277. /** ZSTD_frameHeaderSize() :
  278. * srcSize must be >= ZSTD_frameHeaderSize_prefix.
  279. * @return : size of the Frame Header,
  280. * or an error code (if srcSize is too small) */
  281. size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
  282. {
  283. return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
  284. }
  285. /** ZSTD_getFrameHeader_advanced() :
  286. * decode Frame Header, or require larger `srcSize`.
  287. * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
  288. * @return : 0, `zfhPtr` is correctly filled,
  289. * >0, `srcSize` is too small, value is wanted `srcSize` amount,
  290. * or an error code, which can be tested using ZSTD_isError() */
  291. size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
  292. {
  293. const BYTE* ip = (const BYTE*)src;
  294. size_t const minInputSize = ZSTD_startingInputLength(format);
  295. memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
  296. if (srcSize < minInputSize) return minInputSize;
  297. if (src==NULL) return ERROR(GENERIC); /* invalid parameter */
  298. if ( (format != ZSTD_f_zstd1_magicless)
  299. && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
  300. if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
  301. /* skippable frame */
  302. if (srcSize < ZSTD_skippableHeaderSize)
  303. return ZSTD_skippableHeaderSize; /* magic number + frame length */
  304. memset(zfhPtr, 0, sizeof(*zfhPtr));
  305. zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
  306. zfhPtr->frameType = ZSTD_skippableFrame;
  307. return 0;
  308. }
  309. return ERROR(prefix_unknown);
  310. }
  311. /* ensure there is enough `srcSize` to fully read/decode frame header */
  312. { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
  313. if (srcSize < fhsize) return fhsize;
  314. zfhPtr->headerSize = (U32)fhsize;
  315. }
  316. { BYTE const fhdByte = ip[minInputSize-1];
  317. size_t pos = minInputSize;
  318. U32 const dictIDSizeCode = fhdByte&3;
  319. U32 const checksumFlag = (fhdByte>>2)&1;
  320. U32 const singleSegment = (fhdByte>>5)&1;
  321. U32 const fcsID = fhdByte>>6;
  322. U64 windowSize = 0;
  323. U32 dictID = 0;
  324. U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
  325. if ((fhdByte & 0x08) != 0)
  326. return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */
  327. if (!singleSegment) {
  328. BYTE const wlByte = ip[pos++];
  329. U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
  330. if (windowLog > ZSTD_WINDOWLOG_MAX)
  331. return ERROR(frameParameter_windowTooLarge);
  332. windowSize = (1ULL << windowLog);
  333. windowSize += (windowSize >> 3) * (wlByte&7);
  334. }
  335. switch(dictIDSizeCode)
  336. {
  337. default: assert(0); /* impossible */
  338. case 0 : break;
  339. case 1 : dictID = ip[pos]; pos++; break;
  340. case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
  341. case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
  342. }
  343. switch(fcsID)
  344. {
  345. default: assert(0); /* impossible */
  346. case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
  347. case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
  348. case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
  349. case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
  350. }
  351. if (singleSegment) windowSize = frameContentSize;
  352. zfhPtr->frameType = ZSTD_frame;
  353. zfhPtr->frameContentSize = frameContentSize;
  354. zfhPtr->windowSize = windowSize;
  355. zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
  356. zfhPtr->dictID = dictID;
  357. zfhPtr->checksumFlag = checksumFlag;
  358. }
  359. return 0;
  360. }
  361. /** ZSTD_getFrameHeader() :
  362. * decode Frame Header, or require larger `srcSize`.
  363. * note : this function does not consume input, it only reads it.
  364. * @return : 0, `zfhPtr` is correctly filled,
  365. * >0, `srcSize` is too small, value is wanted `srcSize` amount,
  366. * or an error code, which can be tested using ZSTD_isError() */
  367. size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
  368. {
  369. return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
  370. }
  371. /** ZSTD_getFrameContentSize() :
  372. * compatible with legacy mode
  373. * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
  374. * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
  375. * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
  376. unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
  377. {
  378. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
  379. if (ZSTD_isLegacy(src, srcSize)) {
  380. unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
  381. return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
  382. }
  383. #endif
  384. { ZSTD_frameHeader zfh;
  385. if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
  386. return ZSTD_CONTENTSIZE_ERROR;
  387. if (zfh.frameType == ZSTD_skippableFrame) {
  388. return 0;
  389. } else {
  390. return zfh.frameContentSize;
  391. } }
  392. }
  393. /** ZSTD_findDecompressedSize() :
  394. * compatible with legacy mode
  395. * `srcSize` must be the exact length of some number of ZSTD compressed and/or
  396. * skippable frames
  397. * @return : decompressed size of the frames contained */
  398. unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
  399. {
  400. unsigned long long totalDstSize = 0;
  401. while (srcSize >= ZSTD_frameHeaderSize_prefix) {
  402. U32 const magicNumber = MEM_readLE32(src);
  403. if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
  404. size_t skippableSize;
  405. if (srcSize < ZSTD_skippableHeaderSize)
  406. return ERROR(srcSize_wrong);
  407. skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_FRAMEIDSIZE)
  408. + ZSTD_skippableHeaderSize;
  409. if (srcSize < skippableSize) {
  410. return ZSTD_CONTENTSIZE_ERROR;
  411. }
  412. src = (const BYTE *)src + skippableSize;
  413. srcSize -= skippableSize;
  414. continue;
  415. }
  416. { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
  417. if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
  418. /* check for overflow */
  419. if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
  420. totalDstSize += ret;
  421. }
  422. { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
  423. if (ZSTD_isError(frameSrcSize)) {
  424. return ZSTD_CONTENTSIZE_ERROR;
  425. }
  426. src = (const BYTE *)src + frameSrcSize;
  427. srcSize -= frameSrcSize;
  428. }
  429. } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
  430. if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
  431. return totalDstSize;
  432. }
  433. /** ZSTD_getDecompressedSize() :
  434. * compatible with legacy mode
  435. * @return : decompressed size if known, 0 otherwise
  436. note : 0 can mean any of the following :
  437. - frame content is empty
  438. - decompressed size field is not present in frame header
  439. - frame header unknown / not supported
  440. - frame header not complete (`srcSize` too small) */
  441. unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
  442. {
  443. unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
  444. ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
  445. return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
  446. }
  447. /** ZSTD_decodeFrameHeader() :
  448. * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
  449. * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
  450. static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
  451. {
  452. size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
  453. if (ZSTD_isError(result)) return result; /* invalid header */
  454. if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */
  455. if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
  456. return ERROR(dictionary_wrong);
  457. if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
  458. return 0;
  459. }
  460. /*-*************************************************************
  461. * Block decoding
  462. ***************************************************************/
  463. /*! ZSTD_getcBlockSize() :
  464. * Provides the size of compressed block from block header `src` */
  465. size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
  466. blockProperties_t* bpPtr)
  467. {
  468. if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
  469. { U32 const cBlockHeader = MEM_readLE24(src);
  470. U32 const cSize = cBlockHeader >> 3;
  471. bpPtr->lastBlock = cBlockHeader & 1;
  472. bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
  473. bpPtr->origSize = cSize; /* only useful for RLE */
  474. if (bpPtr->blockType == bt_rle) return 1;
  475. if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
  476. return cSize;
  477. }
  478. }
  479. static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
  480. const void* src, size_t srcSize)
  481. {
  482. if (dst==NULL) return ERROR(dstSize_tooSmall);
  483. if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
  484. memcpy(dst, src, srcSize);
  485. return srcSize;
  486. }
  487. static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
  488. const void* src, size_t srcSize,
  489. size_t regenSize)
  490. {
  491. if (srcSize != 1) return ERROR(srcSize_wrong);
  492. if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
  493. memset(dst, *(const BYTE*)src, regenSize);
  494. return regenSize;
  495. }
  496. /* Hidden declaration for fullbench */
  497. size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  498. const void* src, size_t srcSize);
  499. /*! ZSTD_decodeLiteralsBlock() :
  500. * @return : nb of bytes read from src (< srcSize )
  501. * note : symbol not declared but exposed for fullbench */
  502. size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  503. const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
  504. {
  505. if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
  506. { const BYTE* const istart = (const BYTE*) src;
  507. symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
  508. switch(litEncType)
  509. {
  510. case set_repeat:
  511. if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
  512. /* fall-through */
  513. case set_compressed:
  514. if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
  515. { size_t lhSize, litSize, litCSize;
  516. U32 singleStream=0;
  517. U32 const lhlCode = (istart[0] >> 2) & 3;
  518. U32 const lhc = MEM_readLE32(istart);
  519. switch(lhlCode)
  520. {
  521. case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
  522. /* 2 - 2 - 10 - 10 */
  523. singleStream = !lhlCode;
  524. lhSize = 3;
  525. litSize = (lhc >> 4) & 0x3FF;
  526. litCSize = (lhc >> 14) & 0x3FF;
  527. break;
  528. case 2:
  529. /* 2 - 2 - 14 - 14 */
  530. lhSize = 4;
  531. litSize = (lhc >> 4) & 0x3FFF;
  532. litCSize = lhc >> 18;
  533. break;
  534. case 3:
  535. /* 2 - 2 - 18 - 18 */
  536. lhSize = 5;
  537. litSize = (lhc >> 4) & 0x3FFFF;
  538. litCSize = (lhc >> 22) + (istart[4] << 10);
  539. break;
  540. }
  541. if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
  542. if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
  543. /* prefetch huffman table if cold */
  544. if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
  545. PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
  546. }
  547. if (HUF_isError((litEncType==set_repeat) ?
  548. ( singleStream ?
  549. HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
  550. HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
  551. ( singleStream ?
  552. HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
  553. dctx->workspace, sizeof(dctx->workspace), dctx->bmi2) :
  554. HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
  555. dctx->workspace, sizeof(dctx->workspace), dctx->bmi2))))
  556. return ERROR(corruption_detected);
  557. dctx->litPtr = dctx->litBuffer;
  558. dctx->litSize = litSize;
  559. dctx->litEntropy = 1;
  560. if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
  561. memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
  562. return litCSize + lhSize;
  563. }
  564. case set_basic:
  565. { size_t litSize, lhSize;
  566. U32 const lhlCode = ((istart[0]) >> 2) & 3;
  567. switch(lhlCode)
  568. {
  569. case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
  570. lhSize = 1;
  571. litSize = istart[0] >> 3;
  572. break;
  573. case 1:
  574. lhSize = 2;
  575. litSize = MEM_readLE16(istart) >> 4;
  576. break;
  577. case 3:
  578. lhSize = 3;
  579. litSize = MEM_readLE24(istart) >> 4;
  580. break;
  581. }
  582. if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
  583. if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
  584. memcpy(dctx->litBuffer, istart+lhSize, litSize);
  585. dctx->litPtr = dctx->litBuffer;
  586. dctx->litSize = litSize;
  587. memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
  588. return lhSize+litSize;
  589. }
  590. /* direct reference into compressed stream */
  591. dctx->litPtr = istart+lhSize;
  592. dctx->litSize = litSize;
  593. return lhSize+litSize;
  594. }
  595. case set_rle:
  596. { U32 const lhlCode = ((istart[0]) >> 2) & 3;
  597. size_t litSize, lhSize;
  598. switch(lhlCode)
  599. {
  600. case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
  601. lhSize = 1;
  602. litSize = istart[0] >> 3;
  603. break;
  604. case 1:
  605. lhSize = 2;
  606. litSize = MEM_readLE16(istart) >> 4;
  607. break;
  608. case 3:
  609. lhSize = 3;
  610. litSize = MEM_readLE24(istart) >> 4;
  611. if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
  612. break;
  613. }
  614. if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
  615. memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
  616. dctx->litPtr = dctx->litBuffer;
  617. dctx->litSize = litSize;
  618. return lhSize+1;
  619. }
  620. default:
  621. return ERROR(corruption_detected); /* impossible */
  622. }
  623. }
  624. }
  625. /* Default FSE distribution tables.
  626. * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
  627. * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
  628. * They were generated programmatically with following method :
  629. * - start from default distributions, present in /lib/common/zstd_internal.h
  630. * - generate tables normally, using ZSTD_buildFSETable()
  631. * - printout the content of tables
  632. * - pretify output, report below, test with fuzzer to ensure it's correct */
  633. /* Default FSE distribution table for Literal Lengths */
  634. static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
  635. { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  636. /* nextState, nbAddBits, nbBits, baseVal */
  637. { 0, 0, 4, 0}, { 16, 0, 4, 0},
  638. { 32, 0, 5, 1}, { 0, 0, 5, 3},
  639. { 0, 0, 5, 4}, { 0, 0, 5, 6},
  640. { 0, 0, 5, 7}, { 0, 0, 5, 9},
  641. { 0, 0, 5, 10}, { 0, 0, 5, 12},
  642. { 0, 0, 6, 14}, { 0, 1, 5, 16},
  643. { 0, 1, 5, 20}, { 0, 1, 5, 22},
  644. { 0, 2, 5, 28}, { 0, 3, 5, 32},
  645. { 0, 4, 5, 48}, { 32, 6, 5, 64},
  646. { 0, 7, 5, 128}, { 0, 8, 6, 256},
  647. { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
  648. { 32, 0, 4, 0}, { 0, 0, 4, 1},
  649. { 0, 0, 5, 2}, { 32, 0, 5, 4},
  650. { 0, 0, 5, 5}, { 32, 0, 5, 7},
  651. { 0, 0, 5, 8}, { 32, 0, 5, 10},
  652. { 0, 0, 5, 11}, { 0, 0, 6, 13},
  653. { 32, 1, 5, 16}, { 0, 1, 5, 18},
  654. { 32, 1, 5, 22}, { 0, 2, 5, 24},
  655. { 32, 3, 5, 32}, { 0, 3, 5, 40},
  656. { 0, 6, 4, 64}, { 16, 6, 4, 64},
  657. { 32, 7, 5, 128}, { 0, 9, 6, 512},
  658. { 0, 11, 6, 2048}, { 48, 0, 4, 0},
  659. { 16, 0, 4, 1}, { 32, 0, 5, 2},
  660. { 32, 0, 5, 3}, { 32, 0, 5, 5},
  661. { 32, 0, 5, 6}, { 32, 0, 5, 8},
  662. { 32, 0, 5, 9}, { 32, 0, 5, 11},
  663. { 32, 0, 5, 12}, { 0, 0, 6, 15},
  664. { 32, 1, 5, 18}, { 32, 1, 5, 20},
  665. { 32, 2, 5, 24}, { 32, 2, 5, 28},
  666. { 32, 3, 5, 40}, { 32, 4, 5, 48},
  667. { 0, 16, 6,65536}, { 0, 15, 6,32768},
  668. { 0, 14, 6,16384}, { 0, 13, 6, 8192},
  669. }; /* LL_defaultDTable */
  670. /* Default FSE distribution table for Offset Codes */
  671. static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
  672. { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  673. /* nextState, nbAddBits, nbBits, baseVal */
  674. { 0, 0, 5, 0}, { 0, 6, 4, 61},
  675. { 0, 9, 5, 509}, { 0, 15, 5,32765},
  676. { 0, 21, 5,2097149}, { 0, 3, 5, 5},
  677. { 0, 7, 4, 125}, { 0, 12, 5, 4093},
  678. { 0, 18, 5,262141}, { 0, 23, 5,8388605},
  679. { 0, 5, 5, 29}, { 0, 8, 4, 253},
  680. { 0, 14, 5,16381}, { 0, 20, 5,1048573},
  681. { 0, 2, 5, 1}, { 16, 7, 4, 125},
  682. { 0, 11, 5, 2045}, { 0, 17, 5,131069},
  683. { 0, 22, 5,4194301}, { 0, 4, 5, 13},
  684. { 16, 8, 4, 253}, { 0, 13, 5, 8189},
  685. { 0, 19, 5,524285}, { 0, 1, 5, 1},
  686. { 16, 6, 4, 61}, { 0, 10, 5, 1021},
  687. { 0, 16, 5,65533}, { 0, 28, 5,268435453},
  688. { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
  689. { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
  690. }; /* OF_defaultDTable */
  691. /* Default FSE distribution table for Match Lengths */
  692. static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
  693. { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
  694. /* nextState, nbAddBits, nbBits, baseVal */
  695. { 0, 0, 6, 3}, { 0, 0, 4, 4},
  696. { 32, 0, 5, 5}, { 0, 0, 5, 6},
  697. { 0, 0, 5, 8}, { 0, 0, 5, 9},
  698. { 0, 0, 5, 11}, { 0, 0, 6, 13},
  699. { 0, 0, 6, 16}, { 0, 0, 6, 19},
  700. { 0, 0, 6, 22}, { 0, 0, 6, 25},
  701. { 0, 0, 6, 28}, { 0, 0, 6, 31},
  702. { 0, 0, 6, 34}, { 0, 1, 6, 37},
  703. { 0, 1, 6, 41}, { 0, 2, 6, 47},
  704. { 0, 3, 6, 59}, { 0, 4, 6, 83},
  705. { 0, 7, 6, 131}, { 0, 9, 6, 515},
  706. { 16, 0, 4, 4}, { 0, 0, 4, 5},
  707. { 32, 0, 5, 6}, { 0, 0, 5, 7},
  708. { 32, 0, 5, 9}, { 0, 0, 5, 10},
  709. { 0, 0, 6, 12}, { 0, 0, 6, 15},
  710. { 0, 0, 6, 18}, { 0, 0, 6, 21},
  711. { 0, 0, 6, 24}, { 0, 0, 6, 27},
  712. { 0, 0, 6, 30}, { 0, 0, 6, 33},
  713. { 0, 1, 6, 35}, { 0, 1, 6, 39},
  714. { 0, 2, 6, 43}, { 0, 3, 6, 51},
  715. { 0, 4, 6, 67}, { 0, 5, 6, 99},
  716. { 0, 8, 6, 259}, { 32, 0, 4, 4},
  717. { 48, 0, 4, 4}, { 16, 0, 4, 5},
  718. { 32, 0, 5, 7}, { 32, 0, 5, 8},
  719. { 32, 0, 5, 10}, { 32, 0, 5, 11},
  720. { 0, 0, 6, 14}, { 0, 0, 6, 17},
  721. { 0, 0, 6, 20}, { 0, 0, 6, 23},
  722. { 0, 0, 6, 26}, { 0, 0, 6, 29},
  723. { 0, 0, 6, 32}, { 0, 16, 6,65539},
  724. { 0, 15, 6,32771}, { 0, 14, 6,16387},
  725. { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
  726. { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
  727. }; /* ML_defaultDTable */
  728. static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
  729. {
  730. void* ptr = dt;
  731. ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
  732. ZSTD_seqSymbol* const cell = dt + 1;
  733. DTableH->tableLog = 0;
  734. DTableH->fastMode = 0;
  735. cell->nbBits = 0;
  736. cell->nextState = 0;
  737. assert(nbAddBits < 255);
  738. cell->nbAdditionalBits = (BYTE)nbAddBits;
  739. cell->baseValue = baseValue;
  740. }
  741. /* ZSTD_buildFSETable() :
  742. * generate FSE decoding table for one symbol (ll, ml or off) */
  743. static void
  744. ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
  745. const short* normalizedCounter, unsigned maxSymbolValue,
  746. const U32* baseValue, const U32* nbAdditionalBits,
  747. unsigned tableLog)
  748. {
  749. ZSTD_seqSymbol* const tableDecode = dt+1;
  750. U16 symbolNext[MaxSeq+1];
  751. U32 const maxSV1 = maxSymbolValue + 1;
  752. U32 const tableSize = 1 << tableLog;
  753. U32 highThreshold = tableSize-1;
  754. /* Sanity Checks */
  755. assert(maxSymbolValue <= MaxSeq);
  756. assert(tableLog <= MaxFSELog);
  757. /* Init, lay down lowprob symbols */
  758. { ZSTD_seqSymbol_header DTableH;
  759. DTableH.tableLog = tableLog;
  760. DTableH.fastMode = 1;
  761. { S16 const largeLimit= (S16)(1 << (tableLog-1));
  762. U32 s;
  763. for (s=0; s<maxSV1; s++) {
  764. if (normalizedCounter[s]==-1) {
  765. tableDecode[highThreshold--].baseValue = s;
  766. symbolNext[s] = 1;
  767. } else {
  768. if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
  769. symbolNext[s] = normalizedCounter[s];
  770. } } }
  771. memcpy(dt, &DTableH, sizeof(DTableH));
  772. }
  773. /* Spread symbols */
  774. { U32 const tableMask = tableSize-1;
  775. U32 const step = FSE_TABLESTEP(tableSize);
  776. U32 s, position = 0;
  777. for (s=0; s<maxSV1; s++) {
  778. int i;
  779. for (i=0; i<normalizedCounter[s]; i++) {
  780. tableDecode[position].baseValue = s;
  781. position = (position + step) & tableMask;
  782. while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
  783. } }
  784. assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
  785. }
  786. /* Build Decoding table */
  787. { U32 u;
  788. for (u=0; u<tableSize; u++) {
  789. U32 const symbol = tableDecode[u].baseValue;
  790. U32 const nextState = symbolNext[symbol]++;
  791. tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
  792. tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
  793. assert(nbAdditionalBits[symbol] < 255);
  794. tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
  795. tableDecode[u].baseValue = baseValue[symbol];
  796. } }
  797. }
  798. /*! ZSTD_buildSeqTable() :
  799. * @return : nb bytes read from src,
  800. * or an error code if it fails */
  801. static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
  802. symbolEncodingType_e type, U32 max, U32 maxLog,
  803. const void* src, size_t srcSize,
  804. const U32* baseValue, const U32* nbAdditionalBits,
  805. const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
  806. int ddictIsCold, int nbSeq)
  807. {
  808. switch(type)
  809. {
  810. case set_rle :
  811. if (!srcSize) return ERROR(srcSize_wrong);
  812. if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
  813. { U32 const symbol = *(const BYTE*)src;
  814. U32 const baseline = baseValue[symbol];
  815. U32 const nbBits = nbAdditionalBits[symbol];
  816. ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
  817. }
  818. *DTablePtr = DTableSpace;
  819. return 1;
  820. case set_basic :
  821. *DTablePtr = defaultTable;
  822. return 0;
  823. case set_repeat:
  824. if (!flagRepeatTable) return ERROR(corruption_detected);
  825. /* prefetch FSE table if used */
  826. if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
  827. const void* const pStart = *DTablePtr;
  828. size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
  829. PREFETCH_AREA(pStart, pSize);
  830. }
  831. return 0;
  832. case set_compressed :
  833. { U32 tableLog;
  834. S16 norm[MaxSeq+1];
  835. size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
  836. if (FSE_isError(headerSize)) return ERROR(corruption_detected);
  837. if (tableLog > maxLog) return ERROR(corruption_detected);
  838. ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
  839. *DTablePtr = DTableSpace;
  840. return headerSize;
  841. }
  842. default : /* impossible */
  843. assert(0);
  844. return ERROR(GENERIC);
  845. }
  846. }
  847. static const U32 LL_base[MaxLL+1] = {
  848. 0, 1, 2, 3, 4, 5, 6, 7,
  849. 8, 9, 10, 11, 12, 13, 14, 15,
  850. 16, 18, 20, 22, 24, 28, 32, 40,
  851. 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
  852. 0x2000, 0x4000, 0x8000, 0x10000 };
  853. static const U32 OF_base[MaxOff+1] = {
  854. 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
  855. 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
  856. 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
  857. 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
  858. static const U32 OF_bits[MaxOff+1] = {
  859. 0, 1, 2, 3, 4, 5, 6, 7,
  860. 8, 9, 10, 11, 12, 13, 14, 15,
  861. 16, 17, 18, 19, 20, 21, 22, 23,
  862. 24, 25, 26, 27, 28, 29, 30, 31 };
  863. static const U32 ML_base[MaxML+1] = {
  864. 3, 4, 5, 6, 7, 8, 9, 10,
  865. 11, 12, 13, 14, 15, 16, 17, 18,
  866. 19, 20, 21, 22, 23, 24, 25, 26,
  867. 27, 28, 29, 30, 31, 32, 33, 34,
  868. 35, 37, 39, 41, 43, 47, 51, 59,
  869. 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
  870. 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
  871. /* Hidden delcaration for fullbench */
  872. size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
  873. const void* src, size_t srcSize);
  874. size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
  875. const void* src, size_t srcSize)
  876. {
  877. const BYTE* const istart = (const BYTE* const)src;
  878. const BYTE* const iend = istart + srcSize;
  879. const BYTE* ip = istart;
  880. int nbSeq;
  881. DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
  882. /* check */
  883. if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
  884. /* SeqHead */
  885. nbSeq = *ip++;
  886. if (!nbSeq) { *nbSeqPtr=0; return 1; }
  887. if (nbSeq > 0x7F) {
  888. if (nbSeq == 0xFF) {
  889. if (ip+2 > iend) return ERROR(srcSize_wrong);
  890. nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
  891. } else {
  892. if (ip >= iend) return ERROR(srcSize_wrong);
  893. nbSeq = ((nbSeq-0x80)<<8) + *ip++;
  894. }
  895. }
  896. *nbSeqPtr = nbSeq;
  897. /* FSE table descriptors */
  898. if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
  899. { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
  900. symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
  901. symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
  902. ip++;
  903. /* Build DTables */
  904. { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
  905. LLtype, MaxLL, LLFSELog,
  906. ip, iend-ip,
  907. LL_base, LL_bits,
  908. LL_defaultDTable, dctx->fseEntropy,
  909. dctx->ddictIsCold, nbSeq);
  910. if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
  911. ip += llhSize;
  912. }
  913. { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
  914. OFtype, MaxOff, OffFSELog,
  915. ip, iend-ip,
  916. OF_base, OF_bits,
  917. OF_defaultDTable, dctx->fseEntropy,
  918. dctx->ddictIsCold, nbSeq);
  919. if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
  920. ip += ofhSize;
  921. }
  922. { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
  923. MLtype, MaxML, MLFSELog,
  924. ip, iend-ip,
  925. ML_base, ML_bits,
  926. ML_defaultDTable, dctx->fseEntropy,
  927. dctx->ddictIsCold, nbSeq);
  928. if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
  929. ip += mlhSize;
  930. }
  931. }
  932. /* prefetch dictionary content */
  933. if (dctx->ddictIsCold) {
  934. size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart;
  935. size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ );
  936. size_t const pSize = MIN(psmin, 128 KB /* protection */ );
  937. const void* const pStart = (const char*)dctx->dictEnd - pSize;
  938. PREFETCH_AREA(pStart, pSize);
  939. dctx->ddictIsCold = 0;
  940. }
  941. return ip-istart;
  942. }
  943. typedef struct {
  944. size_t litLength;
  945. size_t matchLength;
  946. size_t offset;
  947. const BYTE* match;
  948. } seq_t;
  949. typedef struct {
  950. size_t state;
  951. const ZSTD_seqSymbol* table;
  952. } ZSTD_fseState;
  953. typedef struct {
  954. BIT_DStream_t DStream;
  955. ZSTD_fseState stateLL;
  956. ZSTD_fseState stateOffb;
  957. ZSTD_fseState stateML;
  958. size_t prevOffset[ZSTD_REP_NUM];
  959. const BYTE* prefixStart;
  960. const BYTE* dictEnd;
  961. size_t pos;
  962. } seqState_t;
  963. FORCE_NOINLINE
  964. size_t ZSTD_execSequenceLast7(BYTE* op,
  965. BYTE* const oend, seq_t sequence,
  966. const BYTE** litPtr, const BYTE* const litLimit,
  967. const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
  968. {
  969. BYTE* const oLitEnd = op + sequence.litLength;
  970. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  971. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  972. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
  973. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  974. const BYTE* match = oLitEnd - sequence.offset;
  975. /* check */
  976. if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
  977. if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
  978. if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */
  979. /* copy literals */
  980. if (op < oend_w) {
  981. ZSTD_wildcopy(op, *litPtr, oend_w - op);
  982. *litPtr += oend_w - op;
  983. op = oend_w;
  984. }
  985. while (op < oLitEnd) *op++ = *(*litPtr)++;
  986. /* copy Match */
  987. if (sequence.offset > (size_t)(oLitEnd - base)) {
  988. /* offset beyond prefix */
  989. if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
  990. match = dictEnd - (base-match);
  991. if (match + sequence.matchLength <= dictEnd) {
  992. memmove(oLitEnd, match, sequence.matchLength);
  993. return sequenceLength;
  994. }
  995. /* span extDict & currentPrefixSegment */
  996. { size_t const length1 = dictEnd - match;
  997. memmove(oLitEnd, match, length1);
  998. op = oLitEnd + length1;
  999. sequence.matchLength -= length1;
  1000. match = base;
  1001. } }
  1002. while (op < oMatchEnd) *op++ = *match++;
  1003. return sequenceLength;
  1004. }
  1005. HINT_INLINE
  1006. size_t ZSTD_execSequence(BYTE* op,
  1007. BYTE* const oend, seq_t sequence,
  1008. const BYTE** litPtr, const BYTE* const litLimit,
  1009. const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
  1010. {
  1011. BYTE* const oLitEnd = op + sequence.litLength;
  1012. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  1013. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  1014. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
  1015. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  1016. const BYTE* match = oLitEnd - sequence.offset;
  1017. /* check */
  1018. if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
  1019. if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
  1020. if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
  1021. /* copy Literals */
  1022. ZSTD_copy8(op, *litPtr);
  1023. if (sequence.litLength > 8)
  1024. ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
  1025. op = oLitEnd;
  1026. *litPtr = iLitEnd; /* update for next sequence */
  1027. /* copy Match */
  1028. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  1029. /* offset beyond prefix -> go into extDict */
  1030. if (sequence.offset > (size_t)(oLitEnd - virtualStart))
  1031. return ERROR(corruption_detected);
  1032. match = dictEnd + (match - prefixStart);
  1033. if (match + sequence.matchLength <= dictEnd) {
  1034. memmove(oLitEnd, match, sequence.matchLength);
  1035. return sequenceLength;
  1036. }
  1037. /* span extDict & currentPrefixSegment */
  1038. { size_t const length1 = dictEnd - match;
  1039. memmove(oLitEnd, match, length1);
  1040. op = oLitEnd + length1;
  1041. sequence.matchLength -= length1;
  1042. match = prefixStart;
  1043. if (op > oend_w || sequence.matchLength < MINMATCH) {
  1044. U32 i;
  1045. for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
  1046. return sequenceLength;
  1047. }
  1048. } }
  1049. /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
  1050. /* match within prefix */
  1051. if (sequence.offset < 8) {
  1052. /* close range match, overlap */
  1053. static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
  1054. static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
  1055. int const sub2 = dec64table[sequence.offset];
  1056. op[0] = match[0];
  1057. op[1] = match[1];
  1058. op[2] = match[2];
  1059. op[3] = match[3];
  1060. match += dec32table[sequence.offset];
  1061. ZSTD_copy4(op+4, match);
  1062. match -= sub2;
  1063. } else {
  1064. ZSTD_copy8(op, match);
  1065. }
  1066. op += 8; match += 8;
  1067. if (oMatchEnd > oend-(16-MINMATCH)) {
  1068. if (op < oend_w) {
  1069. ZSTD_wildcopy(op, match, oend_w - op);
  1070. match += oend_w - op;
  1071. op = oend_w;
  1072. }
  1073. while (op < oMatchEnd) *op++ = *match++;
  1074. } else {
  1075. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
  1076. }
  1077. return sequenceLength;
  1078. }
  1079. HINT_INLINE
  1080. size_t ZSTD_execSequenceLong(BYTE* op,
  1081. BYTE* const oend, seq_t sequence,
  1082. const BYTE** litPtr, const BYTE* const litLimit,
  1083. const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
  1084. {
  1085. BYTE* const oLitEnd = op + sequence.litLength;
  1086. size_t const sequenceLength = sequence.litLength + sequence.matchLength;
  1087. BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
  1088. BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
  1089. const BYTE* const iLitEnd = *litPtr + sequence.litLength;
  1090. const BYTE* match = sequence.match;
  1091. /* check */
  1092. if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
  1093. if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
  1094. if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
  1095. /* copy Literals */
  1096. ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
  1097. if (sequence.litLength > 8)
  1098. ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
  1099. op = oLitEnd;
  1100. *litPtr = iLitEnd; /* update for next sequence */
  1101. /* copy Match */
  1102. if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
  1103. /* offset beyond prefix */
  1104. if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
  1105. if (match + sequence.matchLength <= dictEnd) {
  1106. memmove(oLitEnd, match, sequence.matchLength);
  1107. return sequenceLength;
  1108. }
  1109. /* span extDict & currentPrefixSegment */
  1110. { size_t const length1 = dictEnd - match;
  1111. memmove(oLitEnd, match, length1);
  1112. op = oLitEnd + length1;
  1113. sequence.matchLength -= length1;
  1114. match = prefixStart;
  1115. if (op > oend_w || sequence.matchLength < MINMATCH) {
  1116. U32 i;
  1117. for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
  1118. return sequenceLength;
  1119. }
  1120. } }
  1121. assert(op <= oend_w);
  1122. assert(sequence.matchLength >= MINMATCH);
  1123. /* match within prefix */
  1124. if (sequence.offset < 8) {
  1125. /* close range match, overlap */
  1126. static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
  1127. static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
  1128. int const sub2 = dec64table[sequence.offset];
  1129. op[0] = match[0];
  1130. op[1] = match[1];
  1131. op[2] = match[2];
  1132. op[3] = match[3];
  1133. match += dec32table[sequence.offset];
  1134. ZSTD_copy4(op+4, match);
  1135. match -= sub2;
  1136. } else {
  1137. ZSTD_copy8(op, match);
  1138. }
  1139. op += 8; match += 8;
  1140. if (oMatchEnd > oend-(16-MINMATCH)) {
  1141. if (op < oend_w) {
  1142. ZSTD_wildcopy(op, match, oend_w - op);
  1143. match += oend_w - op;
  1144. op = oend_w;
  1145. }
  1146. while (op < oMatchEnd) *op++ = *match++;
  1147. } else {
  1148. ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
  1149. }
  1150. return sequenceLength;
  1151. }
  1152. static void
  1153. ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
  1154. {
  1155. const void* ptr = dt;
  1156. const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
  1157. DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
  1158. DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
  1159. (U32)DStatePtr->state, DTableH->tableLog);
  1160. BIT_reloadDStream(bitD);
  1161. DStatePtr->table = dt + 1;
  1162. }
  1163. FORCE_INLINE_TEMPLATE void
  1164. ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
  1165. {
  1166. ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
  1167. U32 const nbBits = DInfo.nbBits;
  1168. size_t const lowBits = BIT_readBits(bitD, nbBits);
  1169. DStatePtr->state = DInfo.nextState + lowBits;
  1170. }
  1171. /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
  1172. * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
  1173. * bits before reloading. This value is the maximum number of bytes we read
  1174. * after reloading when we are decoding long offets.
  1175. */
  1176. #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
  1177. (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
  1178. ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
  1179. : 0)
  1180. typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
  1181. FORCE_INLINE_TEMPLATE seq_t
  1182. ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
  1183. {
  1184. seq_t seq = {0};
  1185. U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
  1186. U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
  1187. U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
  1188. U32 const totalBits = llBits+mlBits+ofBits;
  1189. U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
  1190. U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
  1191. U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
  1192. /* sequence */
  1193. { size_t offset;
  1194. if (!ofBits)
  1195. offset = 0;
  1196. else {
  1197. ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
  1198. ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
  1199. assert(ofBits <= MaxOff);
  1200. if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
  1201. U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
  1202. offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
  1203. BIT_reloadDStream(&seqState->DStream);
  1204. if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
  1205. assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
  1206. } else {
  1207. offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
  1208. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
  1209. }
  1210. }
  1211. if (ofBits <= 1) {
  1212. offset += (llBase==0);
  1213. if (offset) {
  1214. size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
  1215. temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
  1216. if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
  1217. seqState->prevOffset[1] = seqState->prevOffset[0];
  1218. seqState->prevOffset[0] = offset = temp;
  1219. } else { /* offset == 0 */
  1220. offset = seqState->prevOffset[0];
  1221. }
  1222. } else {
  1223. seqState->prevOffset[2] = seqState->prevOffset[1];
  1224. seqState->prevOffset[1] = seqState->prevOffset[0];
  1225. seqState->prevOffset[0] = offset;
  1226. }
  1227. seq.offset = offset;
  1228. }
  1229. seq.matchLength = mlBase
  1230. + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
  1231. if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
  1232. BIT_reloadDStream(&seqState->DStream);
  1233. if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
  1234. BIT_reloadDStream(&seqState->DStream);
  1235. /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
  1236. ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
  1237. seq.litLength = llBase
  1238. + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
  1239. if (MEM_32bits())
  1240. BIT_reloadDStream(&seqState->DStream);
  1241. DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
  1242. (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
  1243. /* ANS state update */
  1244. ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
  1245. ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
  1246. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
  1247. ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
  1248. return seq;
  1249. }
  1250. FORCE_INLINE_TEMPLATE size_t
  1251. ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
  1252. void* dst, size_t maxDstSize,
  1253. const void* seqStart, size_t seqSize, int nbSeq,
  1254. const ZSTD_longOffset_e isLongOffset)
  1255. {
  1256. const BYTE* ip = (const BYTE*)seqStart;
  1257. const BYTE* const iend = ip + seqSize;
  1258. BYTE* const ostart = (BYTE* const)dst;
  1259. BYTE* const oend = ostart + maxDstSize;
  1260. BYTE* op = ostart;
  1261. const BYTE* litPtr = dctx->litPtr;
  1262. const BYTE* const litEnd = litPtr + dctx->litSize;
  1263. const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
  1264. const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
  1265. const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
  1266. DEBUGLOG(5, "ZSTD_decompressSequences_body");
  1267. /* Regen sequences */
  1268. if (nbSeq) {
  1269. seqState_t seqState;
  1270. dctx->fseEntropy = 1;
  1271. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  1272. CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
  1273. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  1274. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  1275. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  1276. for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
  1277. nbSeq--;
  1278. { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
  1279. size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
  1280. DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
  1281. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1282. op += oneSeqSize;
  1283. } }
  1284. /* check if reached exact end */
  1285. DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
  1286. if (nbSeq) return ERROR(corruption_detected);
  1287. /* save reps for next block */
  1288. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  1289. }
  1290. /* last literal segment */
  1291. { size_t const lastLLSize = litEnd - litPtr;
  1292. if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
  1293. memcpy(op, litPtr, lastLLSize);
  1294. op += lastLLSize;
  1295. }
  1296. return op-ostart;
  1297. }
  1298. static size_t
  1299. ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
  1300. void* dst, size_t maxDstSize,
  1301. const void* seqStart, size_t seqSize, int nbSeq,
  1302. const ZSTD_longOffset_e isLongOffset)
  1303. {
  1304. return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1305. }
  1306. FORCE_INLINE_TEMPLATE seq_t
  1307. ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
  1308. {
  1309. seq_t seq;
  1310. U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
  1311. U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
  1312. U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
  1313. U32 const totalBits = llBits+mlBits+ofBits;
  1314. U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
  1315. U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
  1316. U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
  1317. /* sequence */
  1318. { size_t offset;
  1319. if (!ofBits)
  1320. offset = 0;
  1321. else {
  1322. ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
  1323. ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
  1324. assert(ofBits <= MaxOff);
  1325. if (MEM_32bits() && longOffsets) {
  1326. U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
  1327. offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
  1328. if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
  1329. if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
  1330. } else {
  1331. offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
  1332. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
  1333. }
  1334. }
  1335. if (ofBits <= 1) {
  1336. offset += (llBase==0);
  1337. if (offset) {
  1338. size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
  1339. temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
  1340. if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
  1341. seqState->prevOffset[1] = seqState->prevOffset[0];
  1342. seqState->prevOffset[0] = offset = temp;
  1343. } else {
  1344. offset = seqState->prevOffset[0];
  1345. }
  1346. } else {
  1347. seqState->prevOffset[2] = seqState->prevOffset[1];
  1348. seqState->prevOffset[1] = seqState->prevOffset[0];
  1349. seqState->prevOffset[0] = offset;
  1350. }
  1351. seq.offset = offset;
  1352. }
  1353. seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
  1354. if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
  1355. BIT_reloadDStream(&seqState->DStream);
  1356. if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
  1357. BIT_reloadDStream(&seqState->DStream);
  1358. /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
  1359. ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
  1360. seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
  1361. if (MEM_32bits())
  1362. BIT_reloadDStream(&seqState->DStream);
  1363. { size_t const pos = seqState->pos + seq.litLength;
  1364. const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
  1365. seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
  1366. * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
  1367. seqState->pos = pos + seq.matchLength;
  1368. }
  1369. /* ANS state update */
  1370. ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
  1371. ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
  1372. if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
  1373. ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
  1374. return seq;
  1375. }
  1376. FORCE_INLINE_TEMPLATE size_t
  1377. ZSTD_decompressSequencesLong_body(
  1378. ZSTD_DCtx* dctx,
  1379. void* dst, size_t maxDstSize,
  1380. const void* seqStart, size_t seqSize, int nbSeq,
  1381. const ZSTD_longOffset_e isLongOffset)
  1382. {
  1383. const BYTE* ip = (const BYTE*)seqStart;
  1384. const BYTE* const iend = ip + seqSize;
  1385. BYTE* const ostart = (BYTE* const)dst;
  1386. BYTE* const oend = ostart + maxDstSize;
  1387. BYTE* op = ostart;
  1388. const BYTE* litPtr = dctx->litPtr;
  1389. const BYTE* const litEnd = litPtr + dctx->litSize;
  1390. const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
  1391. const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
  1392. const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
  1393. /* Regen sequences */
  1394. if (nbSeq) {
  1395. #define STORED_SEQS 4
  1396. #define STOSEQ_MASK (STORED_SEQS-1)
  1397. #define ADVANCED_SEQS 4
  1398. seq_t sequences[STORED_SEQS];
  1399. int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
  1400. seqState_t seqState;
  1401. int seqNb;
  1402. dctx->fseEntropy = 1;
  1403. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
  1404. seqState.prefixStart = prefixStart;
  1405. seqState.pos = (size_t)(op-prefixStart);
  1406. seqState.dictEnd = dictEnd;
  1407. CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
  1408. ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
  1409. ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
  1410. ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
  1411. /* prepare in advance */
  1412. for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
  1413. sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
  1414. }
  1415. if (seqNb<seqAdvance) return ERROR(corruption_detected);
  1416. /* decode and decompress */
  1417. for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
  1418. seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
  1419. size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
  1420. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1421. PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
  1422. sequences[seqNb&STOSEQ_MASK] = sequence;
  1423. op += oneSeqSize;
  1424. }
  1425. if (seqNb<nbSeq) return ERROR(corruption_detected);
  1426. /* finish queue */
  1427. seqNb -= seqAdvance;
  1428. for ( ; seqNb<nbSeq ; seqNb++) {
  1429. size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
  1430. if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
  1431. op += oneSeqSize;
  1432. }
  1433. /* save reps for next block */
  1434. { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
  1435. #undef STORED_SEQS
  1436. #undef STOSEQ_MASK
  1437. #undef ADVANCED_SEQS
  1438. }
  1439. /* last literal segment */
  1440. { size_t const lastLLSize = litEnd - litPtr;
  1441. if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
  1442. memcpy(op, litPtr, lastLLSize);
  1443. op += lastLLSize;
  1444. }
  1445. return op-ostart;
  1446. }
  1447. static size_t
  1448. ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
  1449. void* dst, size_t maxDstSize,
  1450. const void* seqStart, size_t seqSize, int nbSeq,
  1451. const ZSTD_longOffset_e isLongOffset)
  1452. {
  1453. return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1454. }
  1455. #if DYNAMIC_BMI2
  1456. static TARGET_ATTRIBUTE("bmi2") size_t
  1457. ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
  1458. void* dst, size_t maxDstSize,
  1459. const void* seqStart, size_t seqSize, int nbSeq,
  1460. const ZSTD_longOffset_e isLongOffset)
  1461. {
  1462. return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1463. }
  1464. static TARGET_ATTRIBUTE("bmi2") size_t
  1465. ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
  1466. void* dst, size_t maxDstSize,
  1467. const void* seqStart, size_t seqSize, int nbSeq,
  1468. const ZSTD_longOffset_e isLongOffset)
  1469. {
  1470. return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1471. }
  1472. #endif
  1473. typedef size_t (*ZSTD_decompressSequences_t)(
  1474. ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
  1475. const void *seqStart, size_t seqSize, int nbSeq,
  1476. const ZSTD_longOffset_e isLongOffset);
  1477. static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
  1478. const void* seqStart, size_t seqSize, int nbSeq,
  1479. const ZSTD_longOffset_e isLongOffset)
  1480. {
  1481. DEBUGLOG(5, "ZSTD_decompressSequences");
  1482. #if DYNAMIC_BMI2
  1483. if (dctx->bmi2) {
  1484. return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1485. }
  1486. #endif
  1487. return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1488. }
  1489. static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
  1490. void* dst, size_t maxDstSize,
  1491. const void* seqStart, size_t seqSize, int nbSeq,
  1492. const ZSTD_longOffset_e isLongOffset)
  1493. {
  1494. DEBUGLOG(5, "ZSTD_decompressSequencesLong");
  1495. #if DYNAMIC_BMI2
  1496. if (dctx->bmi2) {
  1497. return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1498. }
  1499. #endif
  1500. return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
  1501. }
  1502. /* ZSTD_getLongOffsetsShare() :
  1503. * condition : offTable must be valid
  1504. * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
  1505. * compared to maximum possible of (1<<OffFSELog) */
  1506. static unsigned
  1507. ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
  1508. {
  1509. const void* ptr = offTable;
  1510. U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
  1511. const ZSTD_seqSymbol* table = offTable + 1;
  1512. U32 const max = 1 << tableLog;
  1513. U32 u, total = 0;
  1514. DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
  1515. assert(max <= (1 << OffFSELog)); /* max not too large */
  1516. for (u=0; u<max; u++) {
  1517. if (table[u].nbAdditionalBits > 22) total += 1;
  1518. }
  1519. assert(tableLog <= OffFSELog);
  1520. total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
  1521. return total;
  1522. }
  1523. static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
  1524. void* dst, size_t dstCapacity,
  1525. const void* src, size_t srcSize, const int frame)
  1526. { /* blockType == blockCompressed */
  1527. const BYTE* ip = (const BYTE*)src;
  1528. /* isLongOffset must be true if there are long offsets.
  1529. * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
  1530. * We don't expect that to be the case in 64-bit mode.
  1531. * In block mode, window size is not known, so we have to be conservative.
  1532. * (note: but it could be evaluated from current-lowLimit)
  1533. */
  1534. ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
  1535. DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
  1536. if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
  1537. /* Decode literals section */
  1538. { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
  1539. DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
  1540. if (ZSTD_isError(litCSize)) return litCSize;
  1541. ip += litCSize;
  1542. srcSize -= litCSize;
  1543. }
  1544. /* Build Decoding Tables */
  1545. { int nbSeq;
  1546. size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
  1547. if (ZSTD_isError(seqHSize)) return seqHSize;
  1548. ip += seqHSize;
  1549. srcSize -= seqHSize;
  1550. if ( (!frame || dctx->fParams.windowSize > (1<<24))
  1551. && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
  1552. U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
  1553. U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
  1554. if (shareLongOffsets >= minShare)
  1555. return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
  1556. }
  1557. return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
  1558. }
  1559. }
  1560. static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
  1561. {
  1562. if (dst != dctx->previousDstEnd) { /* not contiguous */
  1563. dctx->dictEnd = dctx->previousDstEnd;
  1564. dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
  1565. dctx->prefixStart = dst;
  1566. dctx->previousDstEnd = dst;
  1567. }
  1568. }
  1569. size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
  1570. void* dst, size_t dstCapacity,
  1571. const void* src, size_t srcSize)
  1572. {
  1573. size_t dSize;
  1574. ZSTD_checkContinuity(dctx, dst);
  1575. dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
  1576. dctx->previousDstEnd = (char*)dst + dSize;
  1577. return dSize;
  1578. }
  1579. /** ZSTD_insertBlock() :
  1580. insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
  1581. ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
  1582. {
  1583. ZSTD_checkContinuity(dctx, blockStart);
  1584. dctx->previousDstEnd = (const char*)blockStart + blockSize;
  1585. return blockSize;
  1586. }
  1587. static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE value, size_t length)
  1588. {
  1589. if (length > dstCapacity) return ERROR(dstSize_tooSmall);
  1590. memset(dst, value, length);
  1591. return length;
  1592. }
  1593. /** ZSTD_findFrameCompressedSize() :
  1594. * compatible with legacy mode
  1595. * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
  1596. * `srcSize` must be at least as large as the frame contained
  1597. * @return : the compressed size of the frame starting at `src` */
  1598. size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
  1599. {
  1600. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
  1601. if (ZSTD_isLegacy(src, srcSize))
  1602. return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
  1603. #endif
  1604. if ( (srcSize >= ZSTD_skippableHeaderSize)
  1605. && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) {
  1606. return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE);
  1607. } else {
  1608. const BYTE* ip = (const BYTE*)src;
  1609. const BYTE* const ipstart = ip;
  1610. size_t remainingSize = srcSize;
  1611. ZSTD_frameHeader zfh;
  1612. /* Extract Frame Header */
  1613. { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
  1614. if (ZSTD_isError(ret)) return ret;
  1615. if (ret > 0) return ERROR(srcSize_wrong);
  1616. }
  1617. ip += zfh.headerSize;
  1618. remainingSize -= zfh.headerSize;
  1619. /* Loop on each block */
  1620. while (1) {
  1621. blockProperties_t blockProperties;
  1622. size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
  1623. if (ZSTD_isError(cBlockSize)) return cBlockSize;
  1624. if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
  1625. return ERROR(srcSize_wrong);
  1626. ip += ZSTD_blockHeaderSize + cBlockSize;
  1627. remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
  1628. if (blockProperties.lastBlock) break;
  1629. }
  1630. if (zfh.checksumFlag) { /* Final frame content checksum */
  1631. if (remainingSize < 4) return ERROR(srcSize_wrong);
  1632. ip += 4;
  1633. }
  1634. return ip - ipstart;
  1635. }
  1636. }
  1637. /*! ZSTD_decompressFrame() :
  1638. * @dctx must be properly initialized */
  1639. static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
  1640. void* dst, size_t dstCapacity,
  1641. const void** srcPtr, size_t *srcSizePtr)
  1642. {
  1643. const BYTE* ip = (const BYTE*)(*srcPtr);
  1644. BYTE* const ostart = (BYTE* const)dst;
  1645. BYTE* const oend = ostart + dstCapacity;
  1646. BYTE* op = ostart;
  1647. size_t remainingSize = *srcSizePtr;
  1648. /* check */
  1649. if (remainingSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize)
  1650. return ERROR(srcSize_wrong);
  1651. /* Frame Header */
  1652. { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
  1653. if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
  1654. if (remainingSize < frameHeaderSize+ZSTD_blockHeaderSize)
  1655. return ERROR(srcSize_wrong);
  1656. CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
  1657. ip += frameHeaderSize; remainingSize -= frameHeaderSize;
  1658. }
  1659. /* Loop on each block */
  1660. while (1) {
  1661. size_t decodedSize;
  1662. blockProperties_t blockProperties;
  1663. size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
  1664. if (ZSTD_isError(cBlockSize)) return cBlockSize;
  1665. ip += ZSTD_blockHeaderSize;
  1666. remainingSize -= ZSTD_blockHeaderSize;
  1667. if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
  1668. switch(blockProperties.blockType)
  1669. {
  1670. case bt_compressed:
  1671. decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
  1672. break;
  1673. case bt_raw :
  1674. decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
  1675. break;
  1676. case bt_rle :
  1677. decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
  1678. break;
  1679. case bt_reserved :
  1680. default:
  1681. return ERROR(corruption_detected);
  1682. }
  1683. if (ZSTD_isError(decodedSize)) return decodedSize;
  1684. if (dctx->fParams.checksumFlag)
  1685. XXH64_update(&dctx->xxhState, op, decodedSize);
  1686. op += decodedSize;
  1687. ip += cBlockSize;
  1688. remainingSize -= cBlockSize;
  1689. if (blockProperties.lastBlock) break;
  1690. }
  1691. if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
  1692. if ((U64)(op-ostart) != dctx->fParams.frameContentSize) {
  1693. return ERROR(corruption_detected);
  1694. } }
  1695. if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
  1696. U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
  1697. U32 checkRead;
  1698. if (remainingSize<4) return ERROR(checksum_wrong);
  1699. checkRead = MEM_readLE32(ip);
  1700. if (checkRead != checkCalc) return ERROR(checksum_wrong);
  1701. ip += 4;
  1702. remainingSize -= 4;
  1703. }
  1704. /* Allow caller to get size read */
  1705. *srcPtr = ip;
  1706. *srcSizePtr = remainingSize;
  1707. return op-ostart;
  1708. }
  1709. static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
  1710. void* dst, size_t dstCapacity,
  1711. const void* src, size_t srcSize,
  1712. const void* dict, size_t dictSize,
  1713. const ZSTD_DDict* ddict)
  1714. {
  1715. void* const dststart = dst;
  1716. int moreThan1Frame = 0;
  1717. DEBUGLOG(5, "ZSTD_decompressMultiFrame");
  1718. assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
  1719. if (ddict) {
  1720. dict = ZSTD_DDictDictContent(ddict);
  1721. dictSize = ZSTD_DDictDictSize(ddict);
  1722. }
  1723. while (srcSize >= ZSTD_frameHeaderSize_prefix) {
  1724. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
  1725. if (ZSTD_isLegacy(src, srcSize)) {
  1726. size_t decodedSize;
  1727. size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
  1728. if (ZSTD_isError(frameSize)) return frameSize;
  1729. /* legacy support is not compatible with static dctx */
  1730. if (dctx->staticSize) return ERROR(memory_allocation);
  1731. decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
  1732. dst = (BYTE*)dst + decodedSize;
  1733. dstCapacity -= decodedSize;
  1734. src = (const BYTE*)src + frameSize;
  1735. srcSize -= frameSize;
  1736. continue;
  1737. }
  1738. #endif
  1739. { U32 const magicNumber = MEM_readLE32(src);
  1740. DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
  1741. (U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
  1742. if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
  1743. size_t skippableSize;
  1744. if (srcSize < ZSTD_skippableHeaderSize)
  1745. return ERROR(srcSize_wrong);
  1746. skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE)
  1747. + ZSTD_skippableHeaderSize;
  1748. if (srcSize < skippableSize) return ERROR(srcSize_wrong);
  1749. src = (const BYTE *)src + skippableSize;
  1750. srcSize -= skippableSize;
  1751. continue;
  1752. } }
  1753. if (ddict) {
  1754. /* we were called from ZSTD_decompress_usingDDict */
  1755. CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict));
  1756. } else {
  1757. /* this will initialize correctly with no dict if dict == NULL, so
  1758. * use this in all cases but ddict */
  1759. CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
  1760. }
  1761. ZSTD_checkContinuity(dctx, dst);
  1762. { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
  1763. &src, &srcSize);
  1764. if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
  1765. && (moreThan1Frame==1) ) {
  1766. /* at least one frame successfully completed,
  1767. * but following bytes are garbage :
  1768. * it's more likely to be a srcSize error,
  1769. * specifying more bytes than compressed size of frame(s).
  1770. * This error message replaces ERROR(prefix_unknown),
  1771. * which would be confusing, as the first header is actually correct.
  1772. * Note that one could be unlucky, it might be a corruption error instead,
  1773. * happening right at the place where we expect zstd magic bytes.
  1774. * But this is _much_ less likely than a srcSize field error. */
  1775. return ERROR(srcSize_wrong);
  1776. }
  1777. if (ZSTD_isError(res)) return res;
  1778. /* no need to bound check, ZSTD_decompressFrame already has */
  1779. dst = (BYTE*)dst + res;
  1780. dstCapacity -= res;
  1781. }
  1782. moreThan1Frame = 1;
  1783. } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
  1784. if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
  1785. return (BYTE*)dst - (BYTE*)dststart;
  1786. }
  1787. size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
  1788. void* dst, size_t dstCapacity,
  1789. const void* src, size_t srcSize,
  1790. const void* dict, size_t dictSize)
  1791. {
  1792. return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
  1793. }
  1794. size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  1795. {
  1796. return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
  1797. }
  1798. size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  1799. {
  1800. #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
  1801. size_t regenSize;
  1802. ZSTD_DCtx* const dctx = ZSTD_createDCtx();
  1803. if (dctx==NULL) return ERROR(memory_allocation);
  1804. regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
  1805. ZSTD_freeDCtx(dctx);
  1806. return regenSize;
  1807. #else /* stack mode */
  1808. ZSTD_DCtx dctx;
  1809. ZSTD_initDCtx_internal(&dctx);
  1810. return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
  1811. #endif
  1812. }
  1813. /*-**************************************
  1814. * Advanced Streaming Decompression API
  1815. * Bufferless and synchronous
  1816. ****************************************/
  1817. size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
  1818. ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
  1819. switch(dctx->stage)
  1820. {
  1821. default: /* should not happen */
  1822. assert(0);
  1823. case ZSTDds_getFrameHeaderSize:
  1824. case ZSTDds_decodeFrameHeader:
  1825. return ZSTDnit_frameHeader;
  1826. case ZSTDds_decodeBlockHeader:
  1827. return ZSTDnit_blockHeader;
  1828. case ZSTDds_decompressBlock:
  1829. return ZSTDnit_block;
  1830. case ZSTDds_decompressLastBlock:
  1831. return ZSTDnit_lastBlock;
  1832. case ZSTDds_checkChecksum:
  1833. return ZSTDnit_checksum;
  1834. case ZSTDds_decodeSkippableHeader:
  1835. case ZSTDds_skipFrame:
  1836. return ZSTDnit_skippableFrame;
  1837. }
  1838. }
  1839. static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
  1840. /** ZSTD_decompressContinue() :
  1841. * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
  1842. * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
  1843. * or an error code, which can be tested using ZSTD_isError() */
  1844. size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  1845. {
  1846. DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
  1847. /* Sanity check */
  1848. if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
  1849. if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
  1850. switch (dctx->stage)
  1851. {
  1852. case ZSTDds_getFrameHeaderSize :
  1853. assert(src != NULL);
  1854. if (dctx->format == ZSTD_f_zstd1) { /* allows header */
  1855. assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
  1856. if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
  1857. memcpy(dctx->headerBuffer, src, srcSize);
  1858. dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */
  1859. dctx->stage = ZSTDds_decodeSkippableHeader;
  1860. return 0;
  1861. } }
  1862. dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
  1863. if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
  1864. memcpy(dctx->headerBuffer, src, srcSize);
  1865. dctx->expected = dctx->headerSize - srcSize;
  1866. dctx->stage = ZSTDds_decodeFrameHeader;
  1867. return 0;
  1868. case ZSTDds_decodeFrameHeader:
  1869. assert(src != NULL);
  1870. memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
  1871. CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
  1872. dctx->expected = ZSTD_blockHeaderSize;
  1873. dctx->stage = ZSTDds_decodeBlockHeader;
  1874. return 0;
  1875. case ZSTDds_decodeBlockHeader:
  1876. { blockProperties_t bp;
  1877. size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
  1878. if (ZSTD_isError(cBlockSize)) return cBlockSize;
  1879. dctx->expected = cBlockSize;
  1880. dctx->bType = bp.blockType;
  1881. dctx->rleSize = bp.origSize;
  1882. if (cBlockSize) {
  1883. dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
  1884. return 0;
  1885. }
  1886. /* empty block */
  1887. if (bp.lastBlock) {
  1888. if (dctx->fParams.checksumFlag) {
  1889. dctx->expected = 4;
  1890. dctx->stage = ZSTDds_checkChecksum;
  1891. } else {
  1892. dctx->expected = 0; /* end of frame */
  1893. dctx->stage = ZSTDds_getFrameHeaderSize;
  1894. }
  1895. } else {
  1896. dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */
  1897. dctx->stage = ZSTDds_decodeBlockHeader;
  1898. }
  1899. return 0;
  1900. }
  1901. case ZSTDds_decompressLastBlock:
  1902. case ZSTDds_decompressBlock:
  1903. DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
  1904. { size_t rSize;
  1905. switch(dctx->bType)
  1906. {
  1907. case bt_compressed:
  1908. DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
  1909. rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
  1910. break;
  1911. case bt_raw :
  1912. rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
  1913. break;
  1914. case bt_rle :
  1915. rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize);
  1916. break;
  1917. case bt_reserved : /* should never happen */
  1918. default:
  1919. return ERROR(corruption_detected);
  1920. }
  1921. if (ZSTD_isError(rSize)) return rSize;
  1922. DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
  1923. dctx->decodedSize += rSize;
  1924. if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
  1925. if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
  1926. DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
  1927. if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
  1928. if (dctx->decodedSize != dctx->fParams.frameContentSize) {
  1929. return ERROR(corruption_detected);
  1930. } }
  1931. if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
  1932. dctx->expected = 4;
  1933. dctx->stage = ZSTDds_checkChecksum;
  1934. } else {
  1935. dctx->expected = 0; /* ends here */
  1936. dctx->stage = ZSTDds_getFrameHeaderSize;
  1937. }
  1938. } else {
  1939. dctx->stage = ZSTDds_decodeBlockHeader;
  1940. dctx->expected = ZSTD_blockHeaderSize;
  1941. dctx->previousDstEnd = (char*)dst + rSize;
  1942. }
  1943. return rSize;
  1944. }
  1945. case ZSTDds_checkChecksum:
  1946. assert(srcSize == 4); /* guaranteed by dctx->expected */
  1947. { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
  1948. U32 const check32 = MEM_readLE32(src);
  1949. DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
  1950. if (check32 != h32) return ERROR(checksum_wrong);
  1951. dctx->expected = 0;
  1952. dctx->stage = ZSTDds_getFrameHeaderSize;
  1953. return 0;
  1954. }
  1955. case ZSTDds_decodeSkippableHeader:
  1956. assert(src != NULL);
  1957. assert(srcSize <= ZSTD_skippableHeaderSize);
  1958. memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */
  1959. dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
  1960. dctx->stage = ZSTDds_skipFrame;
  1961. return 0;
  1962. case ZSTDds_skipFrame:
  1963. dctx->expected = 0;
  1964. dctx->stage = ZSTDds_getFrameHeaderSize;
  1965. return 0;
  1966. default:
  1967. return ERROR(GENERIC); /* impossible */
  1968. }
  1969. }
  1970. static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
  1971. {
  1972. dctx->dictEnd = dctx->previousDstEnd;
  1973. dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
  1974. dctx->prefixStart = dict;
  1975. dctx->previousDstEnd = (const char*)dict + dictSize;
  1976. return 0;
  1977. }
  1978. /*! ZSTD_loadEntropy() :
  1979. * dict : must point at beginning of a valid zstd dictionary.
  1980. * @return : size of entropy tables read */
  1981. static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy,
  1982. const void* const dict, size_t const dictSize)
  1983. {
  1984. const BYTE* dictPtr = (const BYTE*)dict;
  1985. const BYTE* const dictEnd = dictPtr + dictSize;
  1986. if (dictSize <= 8) return ERROR(dictionary_corrupted);
  1987. assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
  1988. dictPtr += 8; /* skip header = magic + dictID */
  1989. ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
  1990. ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
  1991. ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
  1992. { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
  1993. size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
  1994. size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
  1995. dictPtr, dictEnd - dictPtr,
  1996. workspace, workspaceSize);
  1997. if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
  1998. dictPtr += hSize;
  1999. }
  2000. { short offcodeNCount[MaxOff+1];
  2001. U32 offcodeMaxValue = MaxOff, offcodeLog;
  2002. size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
  2003. if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
  2004. if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
  2005. if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
  2006. ZSTD_buildFSETable( entropy->OFTable,
  2007. offcodeNCount, offcodeMaxValue,
  2008. OF_base, OF_bits,
  2009. offcodeLog);
  2010. dictPtr += offcodeHeaderSize;
  2011. }
  2012. { short matchlengthNCount[MaxML+1];
  2013. unsigned matchlengthMaxValue = MaxML, matchlengthLog;
  2014. size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
  2015. if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
  2016. if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
  2017. if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
  2018. ZSTD_buildFSETable( entropy->MLTable,
  2019. matchlengthNCount, matchlengthMaxValue,
  2020. ML_base, ML_bits,
  2021. matchlengthLog);
  2022. dictPtr += matchlengthHeaderSize;
  2023. }
  2024. { short litlengthNCount[MaxLL+1];
  2025. unsigned litlengthMaxValue = MaxLL, litlengthLog;
  2026. size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
  2027. if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
  2028. if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
  2029. if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
  2030. ZSTD_buildFSETable( entropy->LLTable,
  2031. litlengthNCount, litlengthMaxValue,
  2032. LL_base, LL_bits,
  2033. litlengthLog);
  2034. dictPtr += litlengthHeaderSize;
  2035. }
  2036. if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
  2037. { int i;
  2038. size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
  2039. for (i=0; i<3; i++) {
  2040. U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
  2041. if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
  2042. entropy->rep[i] = rep;
  2043. } }
  2044. return dictPtr - (const BYTE*)dict;
  2045. }
  2046. static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
  2047. {
  2048. if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
  2049. { U32 const magic = MEM_readLE32(dict);
  2050. if (magic != ZSTD_MAGIC_DICTIONARY) {
  2051. return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
  2052. } }
  2053. dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
  2054. /* load entropy tables */
  2055. { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
  2056. if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
  2057. dict = (const char*)dict + eSize;
  2058. dictSize -= eSize;
  2059. }
  2060. dctx->litEntropy = dctx->fseEntropy = 1;
  2061. /* reference dictionary content */
  2062. return ZSTD_refDictContent(dctx, dict, dictSize);
  2063. }
  2064. size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
  2065. {
  2066. assert(dctx != NULL);
  2067. dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */
  2068. dctx->stage = ZSTDds_getFrameHeaderSize;
  2069. dctx->decodedSize = 0;
  2070. dctx->previousDstEnd = NULL;
  2071. dctx->prefixStart = NULL;
  2072. dctx->virtualStart = NULL;
  2073. dctx->dictEnd = NULL;
  2074. dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
  2075. dctx->litEntropy = dctx->fseEntropy = 0;
  2076. dctx->dictID = 0;
  2077. ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
  2078. memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
  2079. dctx->LLTptr = dctx->entropy.LLTable;
  2080. dctx->MLTptr = dctx->entropy.MLTable;
  2081. dctx->OFTptr = dctx->entropy.OFTable;
  2082. dctx->HUFptr = dctx->entropy.hufTable;
  2083. return 0;
  2084. }
  2085. size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
  2086. {
  2087. CHECK_F( ZSTD_decompressBegin(dctx) );
  2088. if (dict && dictSize)
  2089. CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
  2090. return 0;
  2091. }
  2092. /* ====== ZSTD_DDict ====== */
  2093. struct ZSTD_DDict_s {
  2094. void* dictBuffer;
  2095. const void* dictContent;
  2096. size_t dictSize;
  2097. ZSTD_entropyDTables_t entropy;
  2098. U32 dictID;
  2099. U32 entropyPresent;
  2100. ZSTD_customMem cMem;
  2101. }; /* typedef'd to ZSTD_DDict within "zstd.h" */
  2102. static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
  2103. {
  2104. assert(ddict != NULL);
  2105. return ddict->dictContent;
  2106. }
  2107. static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
  2108. {
  2109. assert(ddict != NULL);
  2110. return ddict->dictSize;
  2111. }
  2112. size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
  2113. {
  2114. DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
  2115. assert(dctx != NULL);
  2116. if (ddict) {
  2117. dctx->ddictIsCold = (dctx->dictEnd != (const char*)ddict->dictContent + ddict->dictSize);
  2118. DEBUGLOG(4, "DDict is %s",
  2119. dctx->ddictIsCold ? "~cold~" : "hot!");
  2120. }
  2121. CHECK_F( ZSTD_decompressBegin(dctx) );
  2122. if (ddict) { /* NULL ddict is equivalent to no dictionary */
  2123. dctx->dictID = ddict->dictID;
  2124. dctx->prefixStart = ddict->dictContent;
  2125. dctx->virtualStart = ddict->dictContent;
  2126. dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
  2127. dctx->previousDstEnd = dctx->dictEnd;
  2128. if (ddict->entropyPresent) {
  2129. dctx->litEntropy = 1;
  2130. dctx->fseEntropy = 1;
  2131. dctx->LLTptr = ddict->entropy.LLTable;
  2132. dctx->MLTptr = ddict->entropy.MLTable;
  2133. dctx->OFTptr = ddict->entropy.OFTable;
  2134. dctx->HUFptr = ddict->entropy.hufTable;
  2135. dctx->entropy.rep[0] = ddict->entropy.rep[0];
  2136. dctx->entropy.rep[1] = ddict->entropy.rep[1];
  2137. dctx->entropy.rep[2] = ddict->entropy.rep[2];
  2138. } else {
  2139. dctx->litEntropy = 0;
  2140. dctx->fseEntropy = 0;
  2141. }
  2142. }
  2143. return 0;
  2144. }
  2145. static size_t
  2146. ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict,
  2147. ZSTD_dictContentType_e dictContentType)
  2148. {
  2149. ddict->dictID = 0;
  2150. ddict->entropyPresent = 0;
  2151. if (dictContentType == ZSTD_dct_rawContent) return 0;
  2152. if (ddict->dictSize < 8) {
  2153. if (dictContentType == ZSTD_dct_fullDict)
  2154. return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
  2155. return 0; /* pure content mode */
  2156. }
  2157. { U32 const magic = MEM_readLE32(ddict->dictContent);
  2158. if (magic != ZSTD_MAGIC_DICTIONARY) {
  2159. if (dictContentType == ZSTD_dct_fullDict)
  2160. return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
  2161. return 0; /* pure content mode */
  2162. }
  2163. }
  2164. ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
  2165. /* load entropy tables */
  2166. CHECK_E( ZSTD_loadEntropy(&ddict->entropy,
  2167. ddict->dictContent, ddict->dictSize),
  2168. dictionary_corrupted );
  2169. ddict->entropyPresent = 1;
  2170. return 0;
  2171. }
  2172. static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
  2173. const void* dict, size_t dictSize,
  2174. ZSTD_dictLoadMethod_e dictLoadMethod,
  2175. ZSTD_dictContentType_e dictContentType)
  2176. {
  2177. if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
  2178. ddict->dictBuffer = NULL;
  2179. ddict->dictContent = dict;
  2180. if (!dict) dictSize = 0;
  2181. } else {
  2182. void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
  2183. ddict->dictBuffer = internalBuffer;
  2184. ddict->dictContent = internalBuffer;
  2185. if (!internalBuffer) return ERROR(memory_allocation);
  2186. memcpy(internalBuffer, dict, dictSize);
  2187. }
  2188. ddict->dictSize = dictSize;
  2189. ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
  2190. /* parse dictionary content */
  2191. CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) );
  2192. return 0;
  2193. }
  2194. ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
  2195. ZSTD_dictLoadMethod_e dictLoadMethod,
  2196. ZSTD_dictContentType_e dictContentType,
  2197. ZSTD_customMem customMem)
  2198. {
  2199. if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
  2200. { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
  2201. if (ddict == NULL) return NULL;
  2202. ddict->cMem = customMem;
  2203. { size_t const initResult = ZSTD_initDDict_internal(ddict,
  2204. dict, dictSize,
  2205. dictLoadMethod, dictContentType);
  2206. if (ZSTD_isError(initResult)) {
  2207. ZSTD_freeDDict(ddict);
  2208. return NULL;
  2209. } }
  2210. return ddict;
  2211. }
  2212. }
  2213. /*! ZSTD_createDDict() :
  2214. * Create a digested dictionary, to start decompression without startup delay.
  2215. * `dict` content is copied inside DDict.
  2216. * Consequently, `dict` can be released after `ZSTD_DDict` creation */
  2217. ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
  2218. {
  2219. ZSTD_customMem const allocator = { NULL, NULL, NULL };
  2220. return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
  2221. }
  2222. /*! ZSTD_createDDict_byReference() :
  2223. * Create a digested dictionary, to start decompression without startup delay.
  2224. * Dictionary content is simply referenced, it will be accessed during decompression.
  2225. * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
  2226. ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
  2227. {
  2228. ZSTD_customMem const allocator = { NULL, NULL, NULL };
  2229. return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
  2230. }
  2231. const ZSTD_DDict* ZSTD_initStaticDDict(
  2232. void* sBuffer, size_t sBufferSize,
  2233. const void* dict, size_t dictSize,
  2234. ZSTD_dictLoadMethod_e dictLoadMethod,
  2235. ZSTD_dictContentType_e dictContentType)
  2236. {
  2237. size_t const neededSpace = sizeof(ZSTD_DDict)
  2238. + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
  2239. ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
  2240. assert(sBuffer != NULL);
  2241. assert(dict != NULL);
  2242. if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
  2243. if (sBufferSize < neededSpace) return NULL;
  2244. if (dictLoadMethod == ZSTD_dlm_byCopy) {
  2245. memcpy(ddict+1, dict, dictSize); /* local copy */
  2246. dict = ddict+1;
  2247. }
  2248. if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
  2249. dict, dictSize,
  2250. ZSTD_dlm_byRef, dictContentType) ))
  2251. return NULL;
  2252. return ddict;
  2253. }
  2254. size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
  2255. {
  2256. if (ddict==NULL) return 0; /* support free on NULL */
  2257. { ZSTD_customMem const cMem = ddict->cMem;
  2258. ZSTD_free(ddict->dictBuffer, cMem);
  2259. ZSTD_free(ddict, cMem);
  2260. return 0;
  2261. }
  2262. }
  2263. /*! ZSTD_estimateDDictSize() :
  2264. * Estimate amount of memory that will be needed to create a dictionary for decompression.
  2265. * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
  2266. size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
  2267. {
  2268. return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
  2269. }
  2270. size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
  2271. {
  2272. if (ddict==NULL) return 0; /* support sizeof on NULL */
  2273. return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
  2274. }
  2275. /*! ZSTD_getDictID_fromDict() :
  2276. * Provides the dictID stored within dictionary.
  2277. * if @return == 0, the dictionary is not conformant with Zstandard specification.
  2278. * It can still be loaded, but as a content-only dictionary. */
  2279. unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
  2280. {
  2281. if (dictSize < 8) return 0;
  2282. if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
  2283. return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
  2284. }
  2285. /*! ZSTD_getDictID_fromDDict() :
  2286. * Provides the dictID of the dictionary loaded into `ddict`.
  2287. * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
  2288. * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
  2289. unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
  2290. {
  2291. if (ddict==NULL) return 0;
  2292. return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
  2293. }
  2294. /*! ZSTD_getDictID_fromFrame() :
  2295. * Provides the dictID required to decompresse frame stored within `src`.
  2296. * If @return == 0, the dictID could not be decoded.
  2297. * This could for one of the following reasons :
  2298. * - The frame does not require a dictionary (most common case).
  2299. * - The frame was built with dictID intentionally removed.
  2300. * Needed dictionary is a hidden information.
  2301. * Note : this use case also happens when using a non-conformant dictionary.
  2302. * - `srcSize` is too small, and as a result, frame header could not be decoded.
  2303. * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
  2304. * - This is not a Zstandard frame.
  2305. * When identifying the exact failure cause, it's possible to use
  2306. * ZSTD_getFrameHeader(), which will provide a more precise error code. */
  2307. unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
  2308. {
  2309. ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
  2310. size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
  2311. if (ZSTD_isError(hError)) return 0;
  2312. return zfp.dictID;
  2313. }
  2314. /*! ZSTD_decompress_usingDDict() :
  2315. * Decompression using a pre-digested Dictionary
  2316. * Use dictionary without significant overhead. */
  2317. size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
  2318. void* dst, size_t dstCapacity,
  2319. const void* src, size_t srcSize,
  2320. const ZSTD_DDict* ddict)
  2321. {
  2322. /* pass content and size in case legacy frames are encountered */
  2323. return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
  2324. NULL, 0,
  2325. ddict);
  2326. }
  2327. /*=====================================
  2328. * Streaming decompression
  2329. *====================================*/
  2330. ZSTD_DStream* ZSTD_createDStream(void)
  2331. {
  2332. DEBUGLOG(3, "ZSTD_createDStream");
  2333. return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
  2334. }
  2335. ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
  2336. {
  2337. return ZSTD_initStaticDCtx(workspace, workspaceSize);
  2338. }
  2339. ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
  2340. {
  2341. return ZSTD_createDCtx_advanced(customMem);
  2342. }
  2343. size_t ZSTD_freeDStream(ZSTD_DStream* zds)
  2344. {
  2345. return ZSTD_freeDCtx(zds);
  2346. }
  2347. /* *** Initialization *** */
  2348. size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
  2349. size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
  2350. size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
  2351. const void* dict, size_t dictSize,
  2352. ZSTD_dictLoadMethod_e dictLoadMethod,
  2353. ZSTD_dictContentType_e dictContentType)
  2354. {
  2355. if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
  2356. ZSTD_freeDDict(dctx->ddictLocal);
  2357. if (dict && dictSize >= 8) {
  2358. dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
  2359. if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
  2360. } else {
  2361. dctx->ddictLocal = NULL;
  2362. }
  2363. dctx->ddict = dctx->ddictLocal;
  2364. return 0;
  2365. }
  2366. size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
  2367. {
  2368. return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
  2369. }
  2370. size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
  2371. {
  2372. return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
  2373. }
  2374. size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
  2375. {
  2376. return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
  2377. }
  2378. size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
  2379. {
  2380. return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
  2381. }
  2382. /* ZSTD_initDStream_usingDict() :
  2383. * return : expected size, aka ZSTD_frameHeaderSize_prefix.
  2384. * this function cannot fail */
  2385. size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
  2386. {
  2387. DEBUGLOG(4, "ZSTD_initDStream_usingDict");
  2388. zds->streamStage = zdss_init;
  2389. zds->noForwardProgress = 0;
  2390. CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
  2391. return ZSTD_frameHeaderSize_prefix;
  2392. }
  2393. /* note : this variant can't fail */
  2394. size_t ZSTD_initDStream(ZSTD_DStream* zds)
  2395. {
  2396. DEBUGLOG(4, "ZSTD_initDStream");
  2397. return ZSTD_initDStream_usingDict(zds, NULL, 0);
  2398. }
  2399. /* ZSTD_initDStream_usingDDict() :
  2400. * ddict will just be referenced, and must outlive decompression session
  2401. * this function cannot fail */
  2402. size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
  2403. {
  2404. size_t const initResult = ZSTD_initDStream(dctx);
  2405. dctx->ddict = ddict;
  2406. return initResult;
  2407. }
  2408. /* ZSTD_resetDStream() :
  2409. * return : expected size, aka ZSTD_frameHeaderSize_prefix.
  2410. * this function cannot fail */
  2411. size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
  2412. {
  2413. DEBUGLOG(4, "ZSTD_resetDStream");
  2414. dctx->streamStage = zdss_loadHeader;
  2415. dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
  2416. dctx->legacyVersion = 0;
  2417. dctx->hostageByte = 0;
  2418. return ZSTD_frameHeaderSize_prefix;
  2419. }
  2420. size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
  2421. ZSTD_DStreamParameter_e paramType, unsigned paramValue)
  2422. {
  2423. if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
  2424. switch(paramType)
  2425. {
  2426. default : return ERROR(parameter_unsupported);
  2427. case DStream_p_maxWindowSize :
  2428. DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10);
  2429. dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1);
  2430. break;
  2431. }
  2432. return 0;
  2433. }
  2434. size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
  2435. {
  2436. if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
  2437. dctx->ddict = ddict;
  2438. return 0;
  2439. }
  2440. size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
  2441. {
  2442. if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
  2443. dctx->maxWindowSize = maxWindowSize;
  2444. return 0;
  2445. }
  2446. size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
  2447. {
  2448. DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format);
  2449. if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
  2450. dctx->format = format;
  2451. return 0;
  2452. }
  2453. size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
  2454. {
  2455. return ZSTD_sizeof_DCtx(dctx);
  2456. }
  2457. size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
  2458. {
  2459. size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
  2460. unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
  2461. unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
  2462. size_t const minRBSize = (size_t) neededSize;
  2463. if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge);
  2464. return minRBSize;
  2465. }
  2466. size_t ZSTD_estimateDStreamSize(size_t windowSize)
  2467. {
  2468. size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
  2469. size_t const inBuffSize = blockSize; /* no block can be larger */
  2470. size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
  2471. return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
  2472. }
  2473. size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
  2474. {
  2475. U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable */
  2476. ZSTD_frameHeader zfh;
  2477. size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
  2478. if (ZSTD_isError(err)) return err;
  2479. if (err>0) return ERROR(srcSize_wrong);
  2480. if (zfh.windowSize > windowSizeMax)
  2481. return ERROR(frameParameter_windowTooLarge);
  2482. return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
  2483. }
  2484. /* ***** Decompression ***** */
  2485. MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
  2486. {
  2487. size_t const length = MIN(dstCapacity, srcSize);
  2488. memcpy(dst, src, length);
  2489. return length;
  2490. }
  2491. size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
  2492. {
  2493. const char* const istart = (const char*)(input->src) + input->pos;
  2494. const char* const iend = (const char*)(input->src) + input->size;
  2495. const char* ip = istart;
  2496. char* const ostart = (char*)(output->dst) + output->pos;
  2497. char* const oend = (char*)(output->dst) + output->size;
  2498. char* op = ostart;
  2499. U32 someMoreWork = 1;
  2500. DEBUGLOG(5, "ZSTD_decompressStream");
  2501. if (input->pos > input->size) { /* forbidden */
  2502. DEBUGLOG(5, "in: pos: %u vs size: %u",
  2503. (U32)input->pos, (U32)input->size);
  2504. return ERROR(srcSize_wrong);
  2505. }
  2506. if (output->pos > output->size) { /* forbidden */
  2507. DEBUGLOG(5, "out: pos: %u vs size: %u",
  2508. (U32)output->pos, (U32)output->size);
  2509. return ERROR(dstSize_tooSmall);
  2510. }
  2511. DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
  2512. while (someMoreWork) {
  2513. switch(zds->streamStage)
  2514. {
  2515. case zdss_init :
  2516. DEBUGLOG(5, "stage zdss_init => transparent reset ");
  2517. ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
  2518. /* fall-through */
  2519. case zdss_loadHeader :
  2520. DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
  2521. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
  2522. if (zds->legacyVersion) {
  2523. /* legacy support is incompatible with static dctx */
  2524. if (zds->staticSize) return ERROR(memory_allocation);
  2525. { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
  2526. if (hint==0) zds->streamStage = zdss_init;
  2527. return hint;
  2528. } }
  2529. #endif
  2530. { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
  2531. DEBUGLOG(5, "header size : %u", (U32)hSize);
  2532. if (ZSTD_isError(hSize)) {
  2533. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
  2534. U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
  2535. if (legacyVersion) {
  2536. const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
  2537. size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
  2538. DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
  2539. /* legacy support is incompatible with static dctx */
  2540. if (zds->staticSize) return ERROR(memory_allocation);
  2541. CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
  2542. zds->previousLegacyVersion, legacyVersion,
  2543. dict, dictSize));
  2544. zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
  2545. { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
  2546. if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
  2547. return hint;
  2548. } }
  2549. #endif
  2550. return hSize; /* error */
  2551. }
  2552. if (hSize != 0) { /* need more input */
  2553. size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
  2554. size_t const remainingInput = (size_t)(iend-ip);
  2555. assert(iend >= ip);
  2556. if (toLoad > remainingInput) { /* not enough input to load full header */
  2557. if (remainingInput > 0) {
  2558. memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
  2559. zds->lhSize += remainingInput;
  2560. }
  2561. input->pos = input->size;
  2562. return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
  2563. }
  2564. assert(ip != NULL);
  2565. memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
  2566. break;
  2567. } }
  2568. /* check for single-pass mode opportunity */
  2569. if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
  2570. && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
  2571. size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
  2572. if (cSize <= (size_t)(iend-istart)) {
  2573. /* shortcut : using single-pass mode */
  2574. size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
  2575. if (ZSTD_isError(decompressedSize)) return decompressedSize;
  2576. DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
  2577. ip = istart + cSize;
  2578. op += decompressedSize;
  2579. zds->expected = 0;
  2580. zds->streamStage = zdss_init;
  2581. someMoreWork = 0;
  2582. break;
  2583. } }
  2584. /* Consume header (see ZSTDds_decodeFrameHeader) */
  2585. DEBUGLOG(4, "Consume header");
  2586. CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
  2587. if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
  2588. zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
  2589. zds->stage = ZSTDds_skipFrame;
  2590. } else {
  2591. CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
  2592. zds->expected = ZSTD_blockHeaderSize;
  2593. zds->stage = ZSTDds_decodeBlockHeader;
  2594. }
  2595. /* control buffer memory usage */
  2596. DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
  2597. (U32)(zds->fParams.windowSize >>10),
  2598. (U32)(zds->maxWindowSize >> 10) );
  2599. zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
  2600. if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
  2601. /* Adapt buffer sizes to frame header instructions */
  2602. { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
  2603. size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize);
  2604. if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) {
  2605. size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
  2606. DEBUGLOG(4, "inBuff : from %u to %u",
  2607. (U32)zds->inBuffSize, (U32)neededInBuffSize);
  2608. DEBUGLOG(4, "outBuff : from %u to %u",
  2609. (U32)zds->outBuffSize, (U32)neededOutBuffSize);
  2610. if (zds->staticSize) { /* static DCtx */
  2611. DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
  2612. assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
  2613. if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx))
  2614. return ERROR(memory_allocation);
  2615. } else {
  2616. ZSTD_free(zds->inBuff, zds->customMem);
  2617. zds->inBuffSize = 0;
  2618. zds->outBuffSize = 0;
  2619. zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
  2620. if (zds->inBuff == NULL) return ERROR(memory_allocation);
  2621. }
  2622. zds->inBuffSize = neededInBuffSize;
  2623. zds->outBuff = zds->inBuff + zds->inBuffSize;
  2624. zds->outBuffSize = neededOutBuffSize;
  2625. } }
  2626. zds->streamStage = zdss_read;
  2627. /* fall-through */
  2628. case zdss_read:
  2629. DEBUGLOG(5, "stage zdss_read");
  2630. { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
  2631. DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
  2632. if (neededInSize==0) { /* end of frame */
  2633. zds->streamStage = zdss_init;
  2634. someMoreWork = 0;
  2635. break;
  2636. }
  2637. if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
  2638. int const isSkipFrame = ZSTD_isSkipFrame(zds);
  2639. size_t const decodedSize = ZSTD_decompressContinue(zds,
  2640. zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart),
  2641. ip, neededInSize);
  2642. if (ZSTD_isError(decodedSize)) return decodedSize;
  2643. ip += neededInSize;
  2644. if (!decodedSize && !isSkipFrame) break; /* this was just a header */
  2645. zds->outEnd = zds->outStart + decodedSize;
  2646. zds->streamStage = zdss_flush;
  2647. break;
  2648. } }
  2649. if (ip==iend) { someMoreWork = 0; break; } /* no more input */
  2650. zds->streamStage = zdss_load;
  2651. /* fall-through */
  2652. case zdss_load:
  2653. { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
  2654. size_t const toLoad = neededInSize - zds->inPos;
  2655. int const isSkipFrame = ZSTD_isSkipFrame(zds);
  2656. size_t loadedSize;
  2657. if (isSkipFrame) {
  2658. loadedSize = MIN(toLoad, (size_t)(iend-ip));
  2659. } else {
  2660. if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */
  2661. loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
  2662. }
  2663. ip += loadedSize;
  2664. zds->inPos += loadedSize;
  2665. if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
  2666. /* decode loaded input */
  2667. { size_t const decodedSize = ZSTD_decompressContinue(zds,
  2668. zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
  2669. zds->inBuff, neededInSize);
  2670. if (ZSTD_isError(decodedSize)) return decodedSize;
  2671. zds->inPos = 0; /* input is consumed */
  2672. if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */
  2673. zds->outEnd = zds->outStart + decodedSize;
  2674. } }
  2675. zds->streamStage = zdss_flush;
  2676. /* fall-through */
  2677. case zdss_flush:
  2678. { size_t const toFlushSize = zds->outEnd - zds->outStart;
  2679. size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
  2680. op += flushedSize;
  2681. zds->outStart += flushedSize;
  2682. if (flushedSize == toFlushSize) { /* flush completed */
  2683. zds->streamStage = zdss_read;
  2684. if ( (zds->outBuffSize < zds->fParams.frameContentSize)
  2685. && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
  2686. DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
  2687. (int)(zds->outBuffSize - zds->outStart),
  2688. (U32)zds->fParams.blockSizeMax);
  2689. zds->outStart = zds->outEnd = 0;
  2690. }
  2691. break;
  2692. } }
  2693. /* cannot complete flush */
  2694. someMoreWork = 0;
  2695. break;
  2696. default: return ERROR(GENERIC); /* impossible */
  2697. } }
  2698. /* result */
  2699. input->pos = (size_t)(ip - (const char*)(input->src));
  2700. output->pos = (size_t)(op - (char*)(output->dst));
  2701. if ((ip==istart) && (op==ostart)) { /* no forward progress */
  2702. zds->noForwardProgress ++;
  2703. if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
  2704. if (op==oend) return ERROR(dstSize_tooSmall);
  2705. if (ip==iend) return ERROR(srcSize_wrong);
  2706. assert(0);
  2707. }
  2708. } else {
  2709. zds->noForwardProgress = 0;
  2710. }
  2711. { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
  2712. if (!nextSrcSizeHint) { /* frame fully decoded */
  2713. if (zds->outEnd == zds->outStart) { /* output fully flushed */
  2714. if (zds->hostageByte) {
  2715. if (input->pos >= input->size) {
  2716. /* can't release hostage (not present) */
  2717. zds->streamStage = zdss_read;
  2718. return 1;
  2719. }
  2720. input->pos++; /* release hostage */
  2721. } /* zds->hostageByte */
  2722. return 0;
  2723. } /* zds->outEnd == zds->outStart */
  2724. if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
  2725. input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */
  2726. zds->hostageByte=1;
  2727. }
  2728. return 1;
  2729. } /* nextSrcSizeHint==0 */
  2730. nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
  2731. assert(zds->inPos <= nextSrcSizeHint);
  2732. nextSrcSizeHint -= zds->inPos; /* part already loaded*/
  2733. return nextSrcSizeHint;
  2734. }
  2735. }
  2736. size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
  2737. {
  2738. return ZSTD_decompressStream(dctx, output, input);
  2739. }
  2740. size_t ZSTD_decompress_generic_simpleArgs (
  2741. ZSTD_DCtx* dctx,
  2742. void* dst, size_t dstCapacity, size_t* dstPos,
  2743. const void* src, size_t srcSize, size_t* srcPos)
  2744. {
  2745. ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
  2746. ZSTD_inBuffer input = { src, srcSize, *srcPos };
  2747. /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
  2748. size_t const cErr = ZSTD_decompress_generic(dctx, &output, &input);
  2749. *dstPos = output.pos;
  2750. *srcPos = input.pos;
  2751. return cErr;
  2752. }
  2753. void ZSTD_DCtx_reset(ZSTD_DCtx* dctx)
  2754. {
  2755. (void)ZSTD_initDStream(dctx);
  2756. dctx->format = ZSTD_f_zstd1;
  2757. dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
  2758. }