123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- #include "KeysetTest.h"
- #include "Platform.h"
- #include "Random.h"
- #include <map>
- #include <set>
- //-----------------------------------------------------------------------------
- // This should hopefully be a thorough and uambiguous test of whether a hash
- // is correctly implemented on a given platform
- bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
- {
- const int hashbytes = hashbits / 8;
- uint8_t * key = new uint8_t[256];
- uint8_t * hashes = new uint8_t[hashbytes * 256];
- uint8_t * final = new uint8_t[hashbytes];
- memset(key,0,256);
- memset(hashes,0,hashbytes*256);
- memset(final,0,hashbytes);
- // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
- // the seed
- for(int i = 0; i < 256; i++)
- {
- key[i] = (uint8_t)i;
- hash(key,i,256-i,&hashes[i*hashbytes]);
- }
- // Then hash the result array
- hash(hashes,hashbytes*256,0,final);
- // The first four bytes of that hash, interpreted as a little-endian integer, is our
- // verification value
- uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
- delete [] key;
- delete [] hashes;
- delete [] final;
- //----------
- if(expected != verification)
- {
- if(verbose) printf("Verification value 0x%08X : FAIL! (Expected 0x%08x)\n",verification,expected);
- return false;
- }
- else
- {
- if(verbose) printf("Verification value 0x%08X : PASS\n",verification);
- return true;
- }
- }
- //----------------------------------------------------------------------------
- // Basic sanity checks -
- // A hash function should not be reading outside the bounds of the key.
- // Flipping a bit of a key should, with overwhelmingly high probability,
- // result in a different hash.
- // Hashing the same key twice should always produce the same result.
- // The memory alignment of the key should not affect the hash result.
- bool SanityTest ( pfHash hash, const int hashbits )
- {
- printf("Running sanity check 1 ");
-
- Rand r(883741);
- bool result = true;
- const int hashbytes = hashbits/8;
- const int reps = 10;
- const int keymax = 256;
- const int pad = 16;
- const int buflen = keymax + pad*3;
-
- uint8_t * buffer1 = new uint8_t[buflen];
- uint8_t * buffer2 = new uint8_t[buflen];
- uint8_t * hash1 = new uint8_t[hashbytes];
- uint8_t * hash2 = new uint8_t[hashbytes];
- //----------
-
- for(int irep = 0; irep < reps; irep++)
- {
- if(irep % (reps/10) == 0) printf(".");
- for(int len = 4; len <= keymax; len++)
- {
- for(int offset = pad; offset < pad*2; offset++)
- {
- uint8_t * key1 = &buffer1[pad];
- uint8_t * key2 = &buffer2[pad+offset];
- r.rand_p(buffer1,buflen);
- r.rand_p(buffer2,buflen);
- memcpy(key2,key1,len);
- hash(key1,len,0,hash1);
- for(int bit = 0; bit < (len * 8); bit++)
- {
- // Flip a bit, hash the key -> we should get a different result.
- flipbit(key2,len,bit);
- hash(key2,len,0,hash2);
- if(memcmp(hash1,hash2,hashbytes) == 0)
- {
- result = false;
- }
- // Flip it back, hash again -> we should get the original result.
- flipbit(key2,len,bit);
- hash(key2,len,0,hash2);
- if(memcmp(hash1,hash2,hashbytes) != 0)
- {
- result = false;
- }
- }
- }
- }
- }
- if(result == false)
- {
- printf("FAIL !!!!!\n");
- }
- else
- {
- printf("PASS\n");
- }
- delete [] buffer1;
- delete [] buffer2;
- delete [] hash1;
- delete [] hash2;
- return result;
- }
- //----------------------------------------------------------------------------
- // Appending zero bytes to a key should always cause it to produce a different
- // hash value
- void AppendedZeroesTest ( pfHash hash, const int hashbits )
- {
- printf("Running AppendedZeroesTest");
-
- Rand r(173994);
- const int hashbytes = hashbits/8;
- for(int rep = 0; rep < 100; rep++)
- {
- if(rep % 10 == 0) printf(".");
- unsigned char key[256];
- memset(key,0,sizeof(key));
- r.rand_p(key,32);
- uint32_t h1[16];
- uint32_t h2[16];
- memset(h1,0,hashbytes);
- memset(h2,0,hashbytes);
- for(int i = 0; i < 32; i++)
- {
- hash(key,32+i,0,h1);
- if(memcmp(h1,h2,hashbytes) == 0)
- {
- printf("FAIL !!!!!\n");
- return;
- }
- memcpy(h2,h1,hashbytes);
- }
- }
- printf("PASS\n");
- }
- //-----------------------------------------------------------------------------
- // Generate all keys of up to N bytes containing two non-zero bytes
- void TwoBytesKeygen ( int maxlen, KeyCallback & c )
- {
- //----------
- // Compute # of keys
- int keycount = 0;
- for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
- keycount *= 255*255;
- for(int i = 2; i <= maxlen; i++) keycount += i*255;
- printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
- c.reserve(keycount);
- //----------
- // Add all keys with one non-zero byte
- uint8_t key[256];
- memset(key,0,256);
- for(int keylen = 2; keylen <= maxlen; keylen++)
- for(int byteA = 0; byteA < keylen; byteA++)
- {
- for(int valA = 1; valA <= 255; valA++)
- {
- key[byteA] = (uint8_t)valA;
- c(key,keylen);
- }
- key[byteA] = 0;
- }
- //----------
- // Add all keys with two non-zero bytes
- for(int keylen = 2; keylen <= maxlen; keylen++)
- for(int byteA = 0; byteA < keylen-1; byteA++)
- for(int byteB = byteA+1; byteB < keylen; byteB++)
- {
- for(int valA = 1; valA <= 255; valA++)
- {
- key[byteA] = (uint8_t)valA;
- for(int valB = 1; valB <= 255; valB++)
- {
- key[byteB] = (uint8_t)valB;
- c(key,keylen);
- }
- key[byteB] = 0;
- }
- key[byteA] = 0;
- }
- }
- //-----------------------------------------------------------------------------
- template< typename hashtype >
- void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
- {
- typedef CollisionMap<hashtype,ByteVec> cmap_t;
- for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
- {
- const hashtype & hash = (*it).first;
- printf("Hash - ");
- printbytes(&hash,sizeof(hashtype));
- printf("\n");
- std::vector<ByteVec> & keys = (*it).second;
- for(int i = 0; i < (int)keys.size(); i++)
- {
- ByteVec & key = keys[i];
- printf("Key - ");
- printbytes(&key[0],(int)key.size());
- printf("\n");
- }
- printf("\n");
- }
- }
- // test code
- void ReportCollisions ( pfHash hash )
- {
- printf("Hashing keyset\n");
- std::vector<uint128_t> hashes;
- HashCallback<uint128_t> c(hash,hashes);
- TwoBytesKeygen(20,c);
- printf("%d hashes\n",(int)hashes.size());
- printf("Finding collisions\n");
- HashSet<uint128_t> collisions;
- FindCollisions(hashes,collisions,1000);
- printf("%d collisions\n",(int)collisions.size());
- printf("Mapping collisions\n");
- CollisionMap<uint128_t,ByteVec> cmap;
- CollisionCallback<uint128_t> c2(hash,collisions,cmap);
- TwoBytesKeygen(20,c2);
- printf("Dumping collisions\n");
- DumpCollisionMap(cmap);
- }
|