zstd_lazy.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750
  1. /*
  2. * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_lazy.h"
  11. /*-*************************************
  12. * Binary Tree search
  13. ***************************************/
  14. /** ZSTD_insertBt1() : add one or multiple positions to tree.
  15. * ip : assumed <= iend-8 .
  16. * @return : nb of positions added */
  17. static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
  18. U32 extDict)
  19. {
  20. U32* const hashTable = zc->hashTable;
  21. U32 const hashLog = zc->appliedParams.cParams.hashLog;
  22. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  23. U32* const bt = zc->chainTable;
  24. U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
  25. U32 const btMask = (1 << btLog) - 1;
  26. U32 matchIndex = hashTable[h];
  27. size_t commonLengthSmaller=0, commonLengthLarger=0;
  28. const BYTE* const base = zc->base;
  29. const BYTE* const dictBase = zc->dictBase;
  30. const U32 dictLimit = zc->dictLimit;
  31. const BYTE* const dictEnd = dictBase + dictLimit;
  32. const BYTE* const prefixStart = base + dictLimit;
  33. const BYTE* match;
  34. const U32 current = (U32)(ip-base);
  35. const U32 btLow = btMask >= current ? 0 : current - btMask;
  36. U32* smallerPtr = bt + 2*(current&btMask);
  37. U32* largerPtr = smallerPtr + 1;
  38. U32 dummy32; /* to be nullified at the end */
  39. U32 const windowLow = zc->lowLimit;
  40. U32 matchEndIdx = current+8;
  41. size_t bestLength = 8;
  42. #ifdef ZSTD_C_PREDICT
  43. U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
  44. U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
  45. predictedSmall += (predictedSmall>0);
  46. predictedLarge += (predictedLarge>0);
  47. #endif /* ZSTD_C_PREDICT */
  48. assert(ip <= iend-8); /* required for h calculation */
  49. hashTable[h] = current; /* Update Hash Table */
  50. while (nbCompares-- && (matchIndex > windowLow)) {
  51. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  52. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  53. #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
  54. const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
  55. if (matchIndex == predictedSmall) {
  56. /* no need to check length, result known */
  57. *smallerPtr = matchIndex;
  58. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  59. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  60. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  61. predictedSmall = predictPtr[1] + (predictPtr[1]>0);
  62. continue;
  63. }
  64. if (matchIndex == predictedLarge) {
  65. *largerPtr = matchIndex;
  66. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  67. largerPtr = nextPtr;
  68. matchIndex = nextPtr[0];
  69. predictedLarge = predictPtr[0] + (predictPtr[0]>0);
  70. continue;
  71. }
  72. #endif
  73. if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
  74. match = base + matchIndex;
  75. if (match[matchLength] == ip[matchLength])
  76. matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
  77. } else {
  78. match = dictBase + matchIndex;
  79. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  80. if (matchIndex+matchLength >= dictLimit)
  81. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  82. }
  83. if (matchLength > bestLength) {
  84. bestLength = matchLength;
  85. if (matchLength > matchEndIdx - matchIndex)
  86. matchEndIdx = matchIndex + (U32)matchLength;
  87. }
  88. if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
  89. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  90. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  91. /* match+1 is smaller than current */
  92. *smallerPtr = matchIndex; /* update smaller idx */
  93. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  94. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  95. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  96. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  97. } else {
  98. /* match is larger than current */
  99. *largerPtr = matchIndex;
  100. commonLengthLarger = matchLength;
  101. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  102. largerPtr = nextPtr;
  103. matchIndex = nextPtr[0];
  104. } }
  105. *smallerPtr = *largerPtr = 0;
  106. if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
  107. if (matchEndIdx > current + 8) return matchEndIdx - (current + 8);
  108. return 1;
  109. }
  110. static size_t ZSTD_insertBtAndFindBestMatch (
  111. ZSTD_CCtx* zc,
  112. const BYTE* const ip, const BYTE* const iend,
  113. size_t* offsetPtr,
  114. U32 nbCompares, const U32 mls,
  115. U32 extDict)
  116. {
  117. U32* const hashTable = zc->hashTable;
  118. U32 const hashLog = zc->appliedParams.cParams.hashLog;
  119. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  120. U32* const bt = zc->chainTable;
  121. U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
  122. U32 const btMask = (1 << btLog) - 1;
  123. U32 matchIndex = hashTable[h];
  124. size_t commonLengthSmaller=0, commonLengthLarger=0;
  125. const BYTE* const base = zc->base;
  126. const BYTE* const dictBase = zc->dictBase;
  127. const U32 dictLimit = zc->dictLimit;
  128. const BYTE* const dictEnd = dictBase + dictLimit;
  129. const BYTE* const prefixStart = base + dictLimit;
  130. const U32 current = (U32)(ip-base);
  131. const U32 btLow = btMask >= current ? 0 : current - btMask;
  132. const U32 windowLow = zc->lowLimit;
  133. U32* smallerPtr = bt + 2*(current&btMask);
  134. U32* largerPtr = bt + 2*(current&btMask) + 1;
  135. U32 matchEndIdx = current+8;
  136. U32 dummy32; /* to be nullified at the end */
  137. size_t bestLength = 0;
  138. assert(ip <= iend-8); /* required for h calculation */
  139. hashTable[h] = current; /* Update Hash Table */
  140. while (nbCompares-- && (matchIndex > windowLow)) {
  141. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  142. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  143. const BYTE* match;
  144. if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
  145. match = base + matchIndex;
  146. if (match[matchLength] == ip[matchLength])
  147. matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
  148. } else {
  149. match = dictBase + matchIndex;
  150. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  151. if (matchIndex+matchLength >= dictLimit)
  152. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  153. }
  154. if (matchLength > bestLength) {
  155. if (matchLength > matchEndIdx - matchIndex)
  156. matchEndIdx = matchIndex + (U32)matchLength;
  157. if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
  158. bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
  159. if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
  160. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  161. }
  162. if (match[matchLength] < ip[matchLength]) {
  163. /* match is smaller than current */
  164. *smallerPtr = matchIndex; /* update smaller idx */
  165. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  166. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  167. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  168. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  169. } else {
  170. /* match is larger than current */
  171. *largerPtr = matchIndex;
  172. commonLengthLarger = matchLength;
  173. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  174. largerPtr = nextPtr;
  175. matchIndex = nextPtr[0];
  176. } }
  177. *smallerPtr = *largerPtr = 0;
  178. zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
  179. return bestLength;
  180. }
  181. void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
  182. {
  183. const BYTE* const base = zc->base;
  184. const U32 target = (U32)(ip - base);
  185. U32 idx = zc->nextToUpdate;
  186. while(idx < target)
  187. idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
  188. }
  189. /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
  190. static size_t ZSTD_BtFindBestMatch (
  191. ZSTD_CCtx* zc,
  192. const BYTE* const ip, const BYTE* const iLimit,
  193. size_t* offsetPtr,
  194. const U32 maxNbAttempts, const U32 mls)
  195. {
  196. if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
  197. ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
  198. return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
  199. }
  200. static size_t ZSTD_BtFindBestMatch_selectMLS (
  201. ZSTD_CCtx* zc, /* Index table will be updated */
  202. const BYTE* ip, const BYTE* const iLimit,
  203. size_t* offsetPtr,
  204. const U32 maxNbAttempts, const U32 matchLengthSearch)
  205. {
  206. switch(matchLengthSearch)
  207. {
  208. default : /* includes case 3 */
  209. case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
  210. case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
  211. case 7 :
  212. case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
  213. }
  214. }
  215. void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
  216. {
  217. const BYTE* const base = zc->base;
  218. const U32 target = (U32)(ip - base);
  219. U32 idx = zc->nextToUpdate;
  220. while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
  221. }
  222. /** Tree updater, providing best match */
  223. static size_t ZSTD_BtFindBestMatch_extDict (
  224. ZSTD_CCtx* zc,
  225. const BYTE* const ip, const BYTE* const iLimit,
  226. size_t* offsetPtr,
  227. const U32 maxNbAttempts, const U32 mls)
  228. {
  229. if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
  230. ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
  231. return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
  232. }
  233. static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
  234. ZSTD_CCtx* zc, /* Index table will be updated */
  235. const BYTE* ip, const BYTE* const iLimit,
  236. size_t* offsetPtr,
  237. const U32 maxNbAttempts, const U32 matchLengthSearch)
  238. {
  239. switch(matchLengthSearch)
  240. {
  241. default : /* includes case 3 */
  242. case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
  243. case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
  244. case 7 :
  245. case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
  246. }
  247. }
  248. /* *********************************
  249. * Hash Chain
  250. ***********************************/
  251. #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
  252. /* Update chains up to ip (excluded)
  253. Assumption : always within prefix (i.e. not within extDict) */
  254. U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
  255. {
  256. U32* const hashTable = zc->hashTable;
  257. const U32 hashLog = zc->appliedParams.cParams.hashLog;
  258. U32* const chainTable = zc->chainTable;
  259. const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
  260. const BYTE* const base = zc->base;
  261. const U32 target = (U32)(ip - base);
  262. U32 idx = zc->nextToUpdate;
  263. while(idx < target) { /* catch up */
  264. size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
  265. NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
  266. hashTable[h] = idx;
  267. idx++;
  268. }
  269. zc->nextToUpdate = target;
  270. return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
  271. }
  272. /* inlining is important to hardwire a hot branch (template emulation) */
  273. FORCE_INLINE_TEMPLATE
  274. size_t ZSTD_HcFindBestMatch_generic (
  275. ZSTD_CCtx* zc, /* Index table will be updated */
  276. const BYTE* const ip, const BYTE* const iLimit,
  277. size_t* offsetPtr,
  278. const U32 maxNbAttempts, const U32 mls, const U32 extDict)
  279. {
  280. U32* const chainTable = zc->chainTable;
  281. const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
  282. const U32 chainMask = chainSize-1;
  283. const BYTE* const base = zc->base;
  284. const BYTE* const dictBase = zc->dictBase;
  285. const U32 dictLimit = zc->dictLimit;
  286. const BYTE* const prefixStart = base + dictLimit;
  287. const BYTE* const dictEnd = dictBase + dictLimit;
  288. const U32 lowLimit = zc->lowLimit;
  289. const U32 current = (U32)(ip-base);
  290. const U32 minChain = current > chainSize ? current - chainSize : 0;
  291. int nbAttempts=maxNbAttempts;
  292. size_t ml=4-1;
  293. /* HC4 match finder */
  294. U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
  295. for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
  296. const BYTE* match;
  297. size_t currentMl=0;
  298. if ((!extDict) || matchIndex >= dictLimit) {
  299. match = base + matchIndex;
  300. if (match[ml] == ip[ml]) /* potentially better */
  301. currentMl = ZSTD_count(ip, match, iLimit);
  302. } else {
  303. match = dictBase + matchIndex;
  304. if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
  305. currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
  306. }
  307. /* save best solution */
  308. if (currentMl > ml) {
  309. ml = currentMl;
  310. *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
  311. if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
  312. }
  313. if (matchIndex <= minChain) break;
  314. matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
  315. }
  316. return ml;
  317. }
  318. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
  319. ZSTD_CCtx* zc,
  320. const BYTE* ip, const BYTE* const iLimit,
  321. size_t* offsetPtr,
  322. const U32 maxNbAttempts, const U32 matchLengthSearch)
  323. {
  324. switch(matchLengthSearch)
  325. {
  326. default : /* includes case 3 */
  327. case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
  328. case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
  329. case 7 :
  330. case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
  331. }
  332. }
  333. FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
  334. ZSTD_CCtx* zc,
  335. const BYTE* ip, const BYTE* const iLimit,
  336. size_t* offsetPtr,
  337. const U32 maxNbAttempts, const U32 matchLengthSearch)
  338. {
  339. switch(matchLengthSearch)
  340. {
  341. default : /* includes case 3 */
  342. case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
  343. case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
  344. case 7 :
  345. case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
  346. }
  347. }
  348. /* *******************************
  349. * Common parser - lazy strategy
  350. *********************************/
  351. FORCE_INLINE_TEMPLATE
  352. size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
  353. const void* src, size_t srcSize,
  354. const U32 searchMethod, const U32 depth)
  355. {
  356. seqStore_t* seqStorePtr = &(ctx->seqStore);
  357. const BYTE* const istart = (const BYTE*)src;
  358. const BYTE* ip = istart;
  359. const BYTE* anchor = istart;
  360. const BYTE* const iend = istart + srcSize;
  361. const BYTE* const ilimit = iend - 8;
  362. const BYTE* const base = ctx->base + ctx->dictLimit;
  363. U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
  364. U32 const mls = ctx->appliedParams.cParams.searchLength;
  365. typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
  366. size_t* offsetPtr,
  367. U32 maxNbAttempts, U32 matchLengthSearch);
  368. searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
  369. U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0;
  370. /* init */
  371. ip += (ip==base);
  372. ctx->nextToUpdate3 = ctx->nextToUpdate;
  373. { U32 const maxRep = (U32)(ip-base);
  374. if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
  375. if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
  376. }
  377. /* Match Loop */
  378. while (ip < ilimit) {
  379. size_t matchLength=0;
  380. size_t offset=0;
  381. const BYTE* start=ip+1;
  382. /* check repCode */
  383. if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
  384. /* repcode : we take it */
  385. matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
  386. if (depth==0) goto _storeSequence;
  387. }
  388. /* first search (depth 0) */
  389. { size_t offsetFound = 99999999;
  390. size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
  391. if (ml2 > matchLength)
  392. matchLength = ml2, start = ip, offset=offsetFound;
  393. }
  394. if (matchLength < 4) {
  395. ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
  396. continue;
  397. }
  398. /* let's try to find a better solution */
  399. if (depth>=1)
  400. while (ip<ilimit) {
  401. ip ++;
  402. if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  403. size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  404. int const gain2 = (int)(mlRep * 3);
  405. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  406. if ((mlRep >= 4) && (gain2 > gain1))
  407. matchLength = mlRep, offset = 0, start = ip;
  408. }
  409. { size_t offset2=99999999;
  410. size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
  411. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  412. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  413. if ((ml2 >= 4) && (gain2 > gain1)) {
  414. matchLength = ml2, offset = offset2, start = ip;
  415. continue; /* search a better one */
  416. } }
  417. /* let's find an even better one */
  418. if ((depth==2) && (ip<ilimit)) {
  419. ip ++;
  420. if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
  421. size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
  422. int const gain2 = (int)(ml2 * 4);
  423. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  424. if ((ml2 >= 4) && (gain2 > gain1))
  425. matchLength = ml2, offset = 0, start = ip;
  426. }
  427. { size_t offset2=99999999;
  428. size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
  429. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  430. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  431. if ((ml2 >= 4) && (gain2 > gain1)) {
  432. matchLength = ml2, offset = offset2, start = ip;
  433. continue;
  434. } } }
  435. break; /* nothing found : store previous solution */
  436. }
  437. /* NOTE:
  438. * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
  439. * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
  440. * overflows the pointer, which is undefined behavior.
  441. */
  442. /* catch up */
  443. if (offset) {
  444. while ( (start > anchor)
  445. && (start > base+offset-ZSTD_REP_MOVE)
  446. && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */
  447. { start--; matchLength++; }
  448. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  449. }
  450. /* store sequence */
  451. _storeSequence:
  452. { size_t const litLength = start - anchor;
  453. ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
  454. anchor = ip = start + matchLength;
  455. }
  456. /* check immediate repcode */
  457. while ( (ip <= ilimit)
  458. && ((offset_2>0)
  459. & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
  460. /* store sequence */
  461. matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
  462. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
  463. ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
  464. ip += matchLength;
  465. anchor = ip;
  466. continue; /* faster when present ... (?) */
  467. } }
  468. /* Save reps for next block */
  469. seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
  470. seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
  471. /* Return the last literals size */
  472. return iend - anchor;
  473. }
  474. size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  475. {
  476. return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
  477. }
  478. size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  479. {
  480. return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
  481. }
  482. size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  483. {
  484. return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
  485. }
  486. size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  487. {
  488. return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
  489. }
  490. FORCE_INLINE_TEMPLATE
  491. size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
  492. const void* src, size_t srcSize,
  493. const U32 searchMethod, const U32 depth)
  494. {
  495. seqStore_t* seqStorePtr = &(ctx->seqStore);
  496. const BYTE* const istart = (const BYTE*)src;
  497. const BYTE* ip = istart;
  498. const BYTE* anchor = istart;
  499. const BYTE* const iend = istart + srcSize;
  500. const BYTE* const ilimit = iend - 8;
  501. const BYTE* const base = ctx->base;
  502. const U32 dictLimit = ctx->dictLimit;
  503. const U32 lowestIndex = ctx->lowLimit;
  504. const BYTE* const prefixStart = base + dictLimit;
  505. const BYTE* const dictBase = ctx->dictBase;
  506. const BYTE* const dictEnd = dictBase + dictLimit;
  507. const BYTE* const dictStart = dictBase + ctx->lowLimit;
  508. const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
  509. const U32 mls = ctx->appliedParams.cParams.searchLength;
  510. typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
  511. size_t* offsetPtr,
  512. U32 maxNbAttempts, U32 matchLengthSearch);
  513. searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
  514. U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1];
  515. /* init */
  516. ctx->nextToUpdate3 = ctx->nextToUpdate;
  517. ip += (ip == prefixStart);
  518. /* Match Loop */
  519. while (ip < ilimit) {
  520. size_t matchLength=0;
  521. size_t offset=0;
  522. const BYTE* start=ip+1;
  523. U32 current = (U32)(ip-base);
  524. /* check repCode */
  525. { const U32 repIndex = (U32)(current+1 - offset_1);
  526. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  527. const BYTE* const repMatch = repBase + repIndex;
  528. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  529. if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
  530. /* repcode detected we should take it */
  531. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  532. matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  533. if (depth==0) goto _storeSequence;
  534. } }
  535. /* first search (depth 0) */
  536. { size_t offsetFound = 99999999;
  537. size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
  538. if (ml2 > matchLength)
  539. matchLength = ml2, start = ip, offset=offsetFound;
  540. }
  541. if (matchLength < 4) {
  542. ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
  543. continue;
  544. }
  545. /* let's try to find a better solution */
  546. if (depth>=1)
  547. while (ip<ilimit) {
  548. ip ++;
  549. current++;
  550. /* check repCode */
  551. if (offset) {
  552. const U32 repIndex = (U32)(current - offset_1);
  553. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  554. const BYTE* const repMatch = repBase + repIndex;
  555. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  556. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  557. /* repcode detected */
  558. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  559. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  560. int const gain2 = (int)(repLength * 3);
  561. int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
  562. if ((repLength >= 4) && (gain2 > gain1))
  563. matchLength = repLength, offset = 0, start = ip;
  564. } }
  565. /* search match, depth 1 */
  566. { size_t offset2=99999999;
  567. size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
  568. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  569. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
  570. if ((ml2 >= 4) && (gain2 > gain1)) {
  571. matchLength = ml2, offset = offset2, start = ip;
  572. continue; /* search a better one */
  573. } }
  574. /* let's find an even better one */
  575. if ((depth==2) && (ip<ilimit)) {
  576. ip ++;
  577. current++;
  578. /* check repCode */
  579. if (offset) {
  580. const U32 repIndex = (U32)(current - offset_1);
  581. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  582. const BYTE* const repMatch = repBase + repIndex;
  583. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  584. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  585. /* repcode detected */
  586. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  587. size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  588. int const gain2 = (int)(repLength * 4);
  589. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
  590. if ((repLength >= 4) && (gain2 > gain1))
  591. matchLength = repLength, offset = 0, start = ip;
  592. } }
  593. /* search match, depth 2 */
  594. { size_t offset2=99999999;
  595. size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
  596. int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
  597. int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  598. if ((ml2 >= 4) && (gain2 > gain1)) {
  599. matchLength = ml2, offset = offset2, start = ip;
  600. continue;
  601. } } }
  602. break; /* nothing found : store previous solution */
  603. }
  604. /* catch up */
  605. if (offset) {
  606. U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
  607. const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
  608. const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
  609. while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
  610. offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
  611. }
  612. /* store sequence */
  613. _storeSequence:
  614. { size_t const litLength = start - anchor;
  615. ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
  616. anchor = ip = start + matchLength;
  617. }
  618. /* check immediate repcode */
  619. while (ip <= ilimit) {
  620. const U32 repIndex = (U32)((ip-base) - offset_2);
  621. const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
  622. const BYTE* const repMatch = repBase + repIndex;
  623. if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
  624. if (MEM_read32(ip) == MEM_read32(repMatch)) {
  625. /* repcode detected we should take it */
  626. const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
  627. matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
  628. offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
  629. ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
  630. ip += matchLength;
  631. anchor = ip;
  632. continue; /* faster when present ... (?) */
  633. }
  634. break;
  635. } }
  636. /* Save reps for next block */
  637. seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
  638. /* Return the last literals size */
  639. return iend - anchor;
  640. }
  641. size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  642. {
  643. return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
  644. }
  645. size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  646. {
  647. return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
  648. }
  649. size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  650. {
  651. return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
  652. }
  653. size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
  654. {
  655. return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
  656. }