123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- //-----------------------------------------------------------------------------
- // Keyset tests generate various sorts of difficult-to-hash keysets and compare
- // the distribution and collision frequency of the hash results against an
- // ideal random distribution
- // The sanity checks are also in this cpp/h
- #pragma once
- #include "Types.h"
- #include "Stats.h"
- #include "Random.h" // for rand_p
- #include <algorithm> // for std::swap
- #include <assert.h>
- //-----------------------------------------------------------------------------
- // Sanity tests
- bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
- bool SanityTest ( pfHash hash, const int hashbits );
- void AppendedZeroesTest ( pfHash hash, const int hashbits );
- //-----------------------------------------------------------------------------
- // Keyset 'Combination' - all possible combinations of input blocks
- template< typename hashtype >
- void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
- uint32_t * blocks, int blockcount,
- pfHash hash, std::vector<hashtype> & hashes )
- {
- if(len == maxlen) return;
- for(int i = 0; i < blockcount; i++)
- {
- key[len] = blocks[i];
-
- //if(len == maxlen-1)
- {
- hashtype h;
- hash(key,(len+1) * sizeof(uint32_t),0,&h);
- hashes.push_back(h);
- }
- //else
- {
- CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
- }
- }
- }
- template< typename hashtype >
- bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
- {
- printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
- //----------
- std::vector<hashtype> hashes;
- uint32_t * key = new uint32_t[maxlen];
- CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
- delete [] key;
- printf("%d keys\n",(int)hashes.size());
- //----------
- bool result = true;
- result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-
- printf("\n");
- return result;
- }
- //----------------------------------------------------------------------------
- // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
- // consisting of all possible permutations of those blocks
- template< typename hashtype >
- void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
- {
- if(k == blockcount-1)
- {
- hashtype h;
- hash(blocks,blockcount * sizeof(uint32_t),0,&h);
- hashes.push_back(h);
- return;
- }
- for(int i = k; i < blockcount; i++)
- {
- std::swap(blocks[k],blocks[i]);
- PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
- std::swap(blocks[k],blocks[i]);
- }
- }
- template< typename hashtype >
- bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
- {
- printf("Keyset 'Permutation' - %d blocks - ",blockcount);
- //----------
- std::vector<hashtype> hashes;
- PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
- printf("%d keys\n",(int)hashes.size());
- //----------
- bool result = true;
- result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-
- printf("\n");
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
- template < typename keytype, typename hashtype >
- void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
- {
- const int nbytes = sizeof(keytype);
- const int nbits = nbytes * 8;
- hashtype h;
- for(int i = start; i < nbits; i++)
- {
- flipbit(&k,nbytes,i);
- if(inclusive || (bitsleft == 1))
- {
- hash(&k,sizeof(keytype),0,&h);
- hashes.push_back(h);
- }
- if(bitsleft > 1)
- {
- SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
- }
- flipbit(&k,nbytes,i);
- }
- }
- //----------
- template < int keybits, typename hashtype >
- bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram )
- {
- printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
- typedef Blob<keybits> keytype;
- std::vector<hashtype> hashes;
- keytype k;
- memset(&k,0,sizeof(k));
- if(inclusive)
- {
- hashtype h;
- hash(&k,sizeof(keytype),0,&h);
- hashes.push_back(h);
- }
- SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
- printf("%d keys\n",(int)hashes.size());
- bool result = true;
-
- result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
- printf("\n");
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
- // all possible keys with bits set in that window
- template < typename keytype, typename hashtype >
- bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
- {
- const int keybits = sizeof(keytype) * 8;
- const int keycount = 1 << windowbits;
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
- bool result = true;
- int testcount = keybits;
- printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
- for(int j = 0; j <= testcount; j++)
- {
- int minbit = j;
- keytype key;
- for(int i = 0; i < keycount; i++)
- {
- key = i;
- //key = key << minbit;
- lrot(&key,sizeof(keytype),minbit);
- hash(&key,sizeof(keytype),0,&hashes[i]);
- }
- printf("Window at %3d - ",j);
- result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
- //printf("\n");
- }
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
- // bytes.
- // (This keyset type is designed to make MurmurHash2 fail)
- template < typename hashtype >
- bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
- {
- printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
- Rand r(483723);
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
- int keyLen = cycleLen * cycleReps;
- uint8_t * cycle = new uint8_t[cycleLen + 16];
- uint8_t * key = new uint8_t[keyLen];
- //----------
- for(int i = 0; i < keycount; i++)
- {
- r.rand_p(cycle,cycleLen);
- *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
- for(int j = 0; j < keyLen; j++)
- {
- key[j] = cycle[j % cycleLen];
- }
- hash(key,keyLen,0,&hashes[i]);
- }
- //----------
-
- bool result = true;
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
- delete [] cycle;
- delete [] key;
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
- void TwoBytesKeygen ( int maxlen, KeyCallback & c );
- template < typename hashtype >
- bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
- {
- std::vector<hashtype> hashes;
- HashCallback<hashtype> c(hash,hashes);
- TwoBytesKeygen(maxlen,c);
- bool result = true;
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
- // where "core" consists of all possible combinations of the given character
- // set of length N.
- template < typename hashtype >
- bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
- {
- const int prefixlen = (int)strlen(prefix);
- const int suffixlen = (int)strlen(suffix);
- const int corecount = (int)strlen(coreset);
- const int keybytes = prefixlen + corelen + suffixlen;
- const int keycount = (int)pow(double(corecount),double(corelen));
- printf("Keyset 'Text' - keys of form \"%s[",prefix);
- for(int i = 0; i < corelen; i++) printf("X");
- printf("]%s\" - %d keys\n",suffix,keycount);
- uint8_t * key = new uint8_t[keybytes+1];
- key[keybytes] = 0;
- memcpy(key,prefix,prefixlen);
- memcpy(key+prefixlen+corelen,suffix,suffixlen);
- //----------
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
- for(int i = 0; i < keycount; i++)
- {
- int t = i;
- for(int j = 0; j < corelen; j++)
- {
- key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
- }
- hash(key,keybytes,0,&hashes[i]);
- }
- //----------
- bool result = true;
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
- delete [] key;
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
- // We reuse one block of empty bytes, otherwise the RAM cost is enormous.
- template < typename hashtype >
- bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
- {
- int keycount = 64*1024;
- printf("Keyset 'Zeroes' - %d keys\n",keycount);
- unsigned char * nullblock = new unsigned char[keycount];
- memset(nullblock,0,keycount);
- //----------
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
- for(int i = 0; i < keycount; i++)
- {
- hash(nullblock,i,0,&hashes[i]);
- }
- bool result = true;
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
- delete [] nullblock;
- return result;
- }
- //-----------------------------------------------------------------------------
- // Keyset 'Seed' - hash "the quick brown fox..." using different seeds
- template < typename hashtype >
- bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
- {
- printf("Keyset 'Seed' - %d keys\n",keycount);
- const char * text = "The quick brown fox jumps over the lazy dog";
- const int len = (int)strlen(text);
- //----------
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
- for(int i = 0; i < keycount; i++)
- {
- hash(text,len,i,&hashes[i]);
- }
- bool result = true;
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
- return result;
- }
- //-----------------------------------------------------------------------------
|