123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- /*
- * Copyright (c) 2017-2021, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
- #include "seqgen.h"
- #include "mem.h"
- #include <string.h>
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
- static const size_t kMatchBytes = 128;
- #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
- static BYTE SEQ_randByte(unsigned* src)
- {
- static const U32 prime1 = 2654435761U;
- static const U32 prime2 = 2246822519U;
- U32 rand32 = *src;
- rand32 *= prime1;
- rand32 ^= prime2;
- rand32 = SEQ_rotl32(rand32, 13);
- *src = rand32;
- return (BYTE)(rand32 >> 5);
- }
- SEQ_stream SEQ_initStream(unsigned seed)
- {
- SEQ_stream stream;
- stream.state = 0;
- XXH64_reset(&stream.xxh, 0);
- stream.seed = seed;
- return stream;
- }
- /* Generates a single guard byte, then match length + 1 of a different byte,
- * then another guard byte.
- */
- static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
- SEQ_outBuffer* out)
- {
- typedef enum {
- ml_first_byte = 0,
- ml_match_bytes,
- ml_last_byte,
- } ml_state;
- BYTE* const ostart = (BYTE*)out->dst;
- BYTE* const oend = ostart + out->size;
- BYTE* op = ostart + out->pos;
- switch ((ml_state)stream->state) {
- case ml_first_byte:
- /* Generate a single byte and pick a different byte for the match */
- if (op >= oend) {
- stream->bytesLeft = 1;
- break;
- }
- *op = SEQ_randByte(&stream->seed) & 0xFF;
- do {
- stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
- } while (*op == stream->saved);
- ++op;
- /* State transition */
- stream->state = ml_match_bytes;
- stream->bytesLeft = value + 1;
- /* fall-through */
- case ml_match_bytes: {
- /* Copy matchLength + 1 bytes to the output buffer */
- size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
- if (setLength > 0) {
- memset(op, stream->saved, setLength);
- op += setLength;
- stream->bytesLeft -= setLength;
- }
- if (stream->bytesLeft > 0)
- break;
- /* State transition */
- stream->state = ml_last_byte;
- }
- /* fall-through */
- case ml_last_byte:
- /* Generate a single byte and pick a different byte for the match */
- if (op >= oend) {
- stream->bytesLeft = 1;
- break;
- }
- do {
- *op = SEQ_randByte(&stream->seed) & 0xFF;
- } while (*op == stream->saved);
- ++op;
- /* State transition */
- /* fall-through */
- default:
- stream->state = 0;
- stream->bytesLeft = 0;
- break;
- }
- XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
- out->pos = op - ostart;
- return stream->bytesLeft;
- }
- /* Saves the current seed then generates kMatchBytes random bytes >= 128.
- * Generates literal length - kMatchBytes random bytes < 128.
- * Generates another kMatchBytes using the saved seed to generate a match.
- * This way the match is easy to find for the compressors.
- */
- static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
- {
- typedef enum {
- ll_start = 0,
- ll_run_bytes,
- ll_literals,
- ll_run_match,
- } ll_state;
- BYTE* const ostart = (BYTE*)out->dst;
- BYTE* const oend = ostart + out->size;
- BYTE* op = ostart + out->pos;
- switch ((ll_state)stream->state) {
- case ll_start:
- stream->state = ll_run_bytes;
- stream->saved = stream->seed;
- stream->bytesLeft = MIN(kMatchBytes, value);
- /* fall-through */
- case ll_run_bytes:
- while (stream->bytesLeft > 0 && op < oend) {
- *op++ = SEQ_randByte(&stream->seed) | 0x80;
- --stream->bytesLeft;
- }
- if (stream->bytesLeft > 0)
- break;
- /* State transition */
- stream->state = ll_literals;
- stream->bytesLeft = value - MIN(kMatchBytes, value);
- /* fall-through */
- case ll_literals:
- while (stream->bytesLeft > 0 && op < oend) {
- *op++ = SEQ_randByte(&stream->seed) & 0x7F;
- --stream->bytesLeft;
- }
- if (stream->bytesLeft > 0)
- break;
- /* State transition */
- stream->state = ll_run_match;
- stream->bytesLeft = MIN(kMatchBytes, value);
- /* fall-through */
- case ll_run_match: {
- while (stream->bytesLeft > 0 && op < oend) {
- *op++ = SEQ_randByte(&stream->saved) | 0x80;
- --stream->bytesLeft;
- }
- if (stream->bytesLeft > 0)
- break;
- }
- /* fall-through */
- default:
- stream->state = 0;
- stream->bytesLeft = 0;
- break;
- }
- XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
- out->pos = op - ostart;
- return stream->bytesLeft;
- }
- /* Saves the current seed then generates kMatchBytes random bytes >= 128.
- * Generates offset - kMatchBytes of zeros to get a large offset without
- * polluting the hash tables.
- * Generates another kMatchBytes using the saved seed to generate a with the
- * required offset.
- */
- static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
- {
- typedef enum {
- of_start = 0,
- of_run_bytes,
- of_offset,
- of_run_match,
- } of_state;
- BYTE* const ostart = (BYTE*)out->dst;
- BYTE* const oend = ostart + out->size;
- BYTE* op = ostart + out->pos;
- switch ((of_state)stream->state) {
- case of_start:
- stream->state = of_run_bytes;
- stream->saved = stream->seed;
- stream->bytesLeft = MIN(value, kMatchBytes);
- /* fall-through */
- case of_run_bytes: {
- while (stream->bytesLeft > 0 && op < oend) {
- *op++ = SEQ_randByte(&stream->seed) | 0x80;
- --stream->bytesLeft;
- }
- if (stream->bytesLeft > 0)
- break;
- /* State transition */
- stream->state = of_offset;
- stream->bytesLeft = value - MIN(value, kMatchBytes);
- }
- /* fall-through */
- case of_offset: {
- /* Copy matchLength + 1 bytes to the output buffer */
- size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
- if (setLength > 0) {
- memset(op, 0, setLength);
- op += setLength;
- stream->bytesLeft -= setLength;
- }
- if (stream->bytesLeft > 0)
- break;
- /* State transition */
- stream->state = of_run_match;
- stream->bytesLeft = MIN(value, kMatchBytes);
- }
- /* fall-through */
- case of_run_match: {
- while (stream->bytesLeft > 0 && op < oend) {
- *op++ = SEQ_randByte(&stream->saved) | 0x80;
- --stream->bytesLeft;
- }
- if (stream->bytesLeft > 0)
- break;
- }
- /* fall-through */
- default:
- stream->state = 0;
- stream->bytesLeft = 0;
- break;
- }
- XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
- out->pos = op - ostart;
- return stream->bytesLeft;
- }
- /* Returns the number of bytes left to generate.
- * Must pass the same type/value until it returns 0.
- */
- size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
- {
- switch (type) {
- case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
- case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
- case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
- case SEQ_gen_max: /* fall-through */
- default: return 0;
- }
- }
- /* Returns the xxhash of the data produced so far */
- XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
- {
- return XXH64_digest(&stream->xxh);
- }
|