sequence_compression_api.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * Copyright (c) 2016-2021, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /**
  11. * This fuzz target performs a zstd round-trip test by generating an arbitrary
  12. * array of sequences, generating the associated source buffer, calling
  13. * ZSTD_compressSequences(), and then decompresses and compares the result with
  14. * the original generated source buffer.
  15. */
  16. #define ZSTD_STATIC_LINKING_ONLY
  17. #include <stddef.h>
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include <string.h>
  21. #include <time.h>
  22. #include "fuzz_helpers.h"
  23. #include "zstd_helpers.h"
  24. #include "fuzz_data_producer.h"
  25. static ZSTD_CCtx *cctx = NULL;
  26. static ZSTD_DCtx *dctx = NULL;
  27. static void* literalsBuffer = NULL;
  28. static void* generatedSrc = NULL;
  29. static ZSTD_Sequence* generatedSequences = NULL;
  30. #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
  31. #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
  32. #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */
  33. #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */
  34. #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
  35. /* Deterministic random number generator */
  36. #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
  37. static uint32_t FUZZ_RDG_rand(uint32_t* src)
  38. {
  39. static const uint32_t prime1 = 2654435761U;
  40. static const uint32_t prime2 = 2246822519U;
  41. uint32_t rand32 = *src;
  42. rand32 *= prime1;
  43. rand32 ^= prime2;
  44. rand32 = FUZZ_RDG_rotl32(rand32, 13);
  45. *src = rand32;
  46. return rand32 >> 5;
  47. }
  48. /* Make a pseudorandom string - this simple function exists to avoid
  49. * taking a dependency on datagen.h to have RDG_genBuffer().
  50. */
  51. static char *generatePseudoRandomString(char *str, size_t size) {
  52. const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
  53. uint32_t seed = 0;
  54. if (size) {
  55. for (size_t n = 0; n < size; n++) {
  56. int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
  57. str[n] = charset[key];
  58. }
  59. }
  60. return str;
  61. }
  62. /* Returns size of source buffer */
  63. static size_t decodeSequences(void* dst, size_t nbSequences,
  64. size_t literalsSize, const void* dict, size_t dictSize) {
  65. const uint8_t* litPtr = literalsBuffer;
  66. const uint8_t* const litBegin = literalsBuffer;
  67. const uint8_t* const litEnd = literalsBuffer + literalsSize;
  68. const uint8_t* dictPtr = dict;
  69. uint8_t* op = dst;
  70. const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
  71. size_t generatedSrcBufferSize = 0;
  72. size_t bytesWritten = 0;
  73. uint32_t lastLLSize;
  74. for (size_t i = 0; i < nbSequences; ++i) {
  75. FUZZ_ASSERT(generatedSequences[i].matchLength != 0);
  76. FUZZ_ASSERT(generatedSequences[i].offset != 0);
  77. if (litPtr + generatedSequences[i].litLength > litEnd) {
  78. litPtr = litBegin;
  79. }
  80. ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength);
  81. bytesWritten += generatedSequences[i].litLength;
  82. op += generatedSequences[i].litLength;
  83. litPtr += generatedSequences[i].litLength;
  84. FUZZ_ASSERT(generatedSequences[i].offset != 0);
  85. /* Copy over the match */
  86. { size_t matchLength = generatedSequences[i].matchLength;
  87. size_t j = 0;
  88. size_t k = 0;
  89. if (dictSize != 0) {
  90. if (generatedSequences[i].offset > bytesWritten) {
  91. /* Offset goes into the dictionary */
  92. size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten;
  93. for (; k < offsetFromEndOfDict && k < matchLength; ++k) {
  94. op[k] = dictPtr[dictSize - offsetFromEndOfDict + k];
  95. }
  96. matchLength -= k;
  97. op += k;
  98. }
  99. }
  100. for (; j < matchLength; ++j) {
  101. op[j] = op[j-(int)generatedSequences[i].offset];
  102. }
  103. op += j;
  104. FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
  105. bytesWritten += generatedSequences[i].matchLength;
  106. }
  107. }
  108. generatedSrcBufferSize = bytesWritten;
  109. FUZZ_ASSERT(litPtr <= litEnd);
  110. lastLLSize = (uint32_t)(litEnd - litPtr);
  111. if (lastLLSize <= oend - op) {
  112. ZSTD_memcpy(op, litPtr, lastLLSize);
  113. generatedSrcBufferSize += lastLLSize;
  114. }
  115. return generatedSrcBufferSize;
  116. }
  117. /* Returns nb sequences generated
  118. * TODO: Add repcode fuzzing once we support repcode match splits
  119. */
  120. static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
  121. size_t literalsSizeLimit, size_t dictSize,
  122. size_t windowLog) {
  123. uint32_t bytesGenerated = 0;
  124. uint32_t nbSeqGenerated = 0;
  125. uint32_t litLength;
  126. uint32_t matchLength;
  127. uint32_t matchBound;
  128. uint32_t offset;
  129. uint32_t offsetBound;
  130. uint32_t repCode = 0;
  131. uint32_t isFirstSequence = 1;
  132. uint32_t windowSize = 1 << windowLog;
  133. while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ
  134. && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
  135. && !FUZZ_dataProducer_empty(producer)) {
  136. matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
  137. litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit)
  138. : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit);
  139. bytesGenerated += litLength;
  140. if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
  141. break;
  142. }
  143. offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize;
  144. offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
  145. if (dictSize > 0 && bytesGenerated <= windowSize) {
  146. /* Prevent match length from being such that it would be associated with an offset too large
  147. * from the decoder's perspective. If not possible (match would be too small),
  148. * then reduce the offset if necessary.
  149. */
  150. size_t bytesToReachWindowSize = windowSize - bytesGenerated;
  151. if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
  152. uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
  153. offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
  154. } else {
  155. matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ?
  156. ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize;
  157. }
  158. }
  159. matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
  160. bytesGenerated += matchLength;
  161. if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
  162. break;
  163. }
  164. ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
  165. generatedSequences[nbSeqGenerated++] = seq;
  166. isFirstSequence = 0;
  167. }
  168. return nbSeqGenerated;
  169. }
  170. static size_t roundTripTest(void *result, size_t resultCapacity,
  171. void *compressed, size_t compressedCapacity,
  172. size_t srcSize,
  173. const void *dict, size_t dictSize,
  174. size_t generatedSequencesSize,
  175. size_t wLog, unsigned cLevel, unsigned hasDict)
  176. {
  177. size_t cSize;
  178. size_t dSize;
  179. ZSTD_CDict* cdict = NULL;
  180. ZSTD_DDict* ddict = NULL;
  181. ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
  182. ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
  183. ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
  184. ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
  185. ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
  186. ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
  187. /* TODO: Add block delim mode fuzzing */
  188. ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
  189. if (hasDict) {
  190. FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize));
  191. FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize));
  192. }
  193. cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
  194. generatedSequences, generatedSequencesSize,
  195. generatedSrc, srcSize);
  196. FUZZ_ZASSERT(cSize);
  197. dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
  198. FUZZ_ZASSERT(dSize);
  199. if (cdict) {
  200. ZSTD_freeCDict(cdict);
  201. }
  202. if (ddict) {
  203. ZSTD_freeDDict(ddict);
  204. }
  205. return dSize;
  206. }
  207. int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
  208. {
  209. void* rBuf;
  210. size_t rBufSize;
  211. void* cBuf;
  212. size_t cBufSize;
  213. size_t generatedSrcSize;
  214. size_t nbSequences;
  215. void* dictBuffer;
  216. size_t dictSize = 0;
  217. unsigned hasDict;
  218. unsigned wLog;
  219. int cLevel;
  220. FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
  221. if (literalsBuffer == NULL) {
  222. literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
  223. literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
  224. }
  225. hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
  226. if (hasDict) {
  227. dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
  228. dictBuffer = FUZZ_malloc(dictSize);
  229. dictBuffer = generatePseudoRandomString(dictBuffer, dictSize);
  230. }
  231. /* Generate window log first so we dont generate offsets too large */
  232. wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32);
  233. cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
  234. if (!generatedSequences) {
  235. generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
  236. }
  237. if (!generatedSrc) {
  238. generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
  239. }
  240. nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog);
  241. generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize);
  242. cBufSize = ZSTD_compressBound(generatedSrcSize);
  243. cBuf = FUZZ_malloc(cBufSize);
  244. rBufSize = generatedSrcSize;
  245. rBuf = FUZZ_malloc(rBufSize);
  246. if (!cctx) {
  247. cctx = ZSTD_createCCtx();
  248. FUZZ_ASSERT(cctx);
  249. }
  250. if (!dctx) {
  251. dctx = ZSTD_createDCtx();
  252. FUZZ_ASSERT(dctx);
  253. }
  254. size_t const result = roundTripTest(rBuf, rBufSize,
  255. cBuf, cBufSize,
  256. generatedSrcSize,
  257. dictBuffer, dictSize,
  258. nbSequences,
  259. wLog, cLevel, hasDict);
  260. FUZZ_ZASSERT(result);
  261. FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size");
  262. FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!");
  263. free(rBuf);
  264. free(cBuf);
  265. FUZZ_dataProducer_free(producer);
  266. if (hasDict) {
  267. free(dictBuffer);
  268. }
  269. #ifndef STATEFUL_FUZZING
  270. ZSTD_freeCCtx(cctx); cctx = NULL;
  271. ZSTD_freeDCtx(dctx); dctx = NULL;
  272. free(generatedSequences); generatedSequences = NULL;
  273. free(generatedSrc); generatedSrc = NULL;
  274. free(literalsBuffer); literalsBuffer = NULL;
  275. #endif
  276. return 0;
  277. }