123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- #ifndef BLOOM_H
- #define BLOOM_H
- struct commit;
- struct repository;
- struct bloom_filter_settings {
- /*
- * The version of the hashing technique being used.
- * We currently only support version = 1 which is
- * the seeded murmur3 hashing technique implemented
- * in bloom.c.
- */
- uint32_t hash_version;
- /*
- * The number of times a path is hashed, i.e. the
- * number of bit positions tht cumulatively
- * determine whether a path is present in the
- * Bloom filter.
- */
- uint32_t num_hashes;
- /*
- * The minimum number of bits per entry in the Bloom
- * filter. If the filter contains 'n' entries, then
- * filter size is the minimum number of 8-bit words
- * that contain n*b bits.
- */
- uint32_t bits_per_entry;
- /*
- * The maximum number of changed paths per commit
- * before declaring a Bloom filter to be too-large.
- *
- * Not written to the commit-graph file.
- */
- uint32_t max_changed_paths;
- };
- #define DEFAULT_BLOOM_MAX_CHANGES 512
- #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
- #define BITS_PER_WORD 8
- #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
- /*
- * A bloom_filter struct represents a data segment to
- * use when testing hash values. The 'len' member
- * dictates how many entries are stored in
- * 'data'.
- */
- struct bloom_filter {
- unsigned char *data;
- size_t len;
- };
- /*
- * A bloom_key represents the k hash values for a
- * given string. These can be precomputed and
- * stored in a bloom_key for re-use when testing
- * against a bloom_filter. The number of hashes is
- * given by the Bloom filter settings and is the same
- * for all Bloom filters and keys interacting with
- * the loaded version of the commit graph file and
- * the Bloom data chunks.
- */
- struct bloom_key {
- uint32_t *hashes;
- };
- /*
- * Calculate the murmur3 32-bit hash value for the given data
- * using the given seed.
- * Produces a uniformly distributed hash value.
- * Not considered to be cryptographically secure.
- * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
- */
- uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
- void fill_bloom_key(const char *data,
- size_t len,
- struct bloom_key *key,
- const struct bloom_filter_settings *settings);
- void clear_bloom_key(struct bloom_key *key);
- void add_key_to_filter(const struct bloom_key *key,
- struct bloom_filter *filter,
- const struct bloom_filter_settings *settings);
- void init_bloom_filters(void);
- enum bloom_filter_computed {
- BLOOM_NOT_COMPUTED = (1 << 0),
- BLOOM_COMPUTED = (1 << 1),
- BLOOM_TRUNC_LARGE = (1 << 2),
- BLOOM_TRUNC_EMPTY = (1 << 3),
- };
- struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
- struct commit *c,
- int compute_if_not_present,
- const struct bloom_filter_settings *settings,
- enum bloom_filter_computed *computed);
- #define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
- (r), (c), 0, NULL, NULL)
- int bloom_filter_contains(const struct bloom_filter *filter,
- const struct bloom_key *key,
- const struct bloom_filter_settings *settings);
- #endif
|