KeysetTest.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. #include "KeysetTest.h"
  2. #include "Platform.h"
  3. #include "Random.h"
  4. #include <map>
  5. #include <set>
  6. //-----------------------------------------------------------------------------
  7. // This should hopefully be a thorough and uambiguous test of whether a hash
  8. // is correctly implemented on a given platform
  9. bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
  10. {
  11. const int hashbytes = hashbits / 8;
  12. uint8_t * key = new uint8_t[256];
  13. uint8_t * hashes = new uint8_t[hashbytes * 256];
  14. uint8_t * final = new uint8_t[hashbytes];
  15. memset(key,0,256);
  16. memset(hashes,0,hashbytes*256);
  17. memset(final,0,hashbytes);
  18. // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
  19. // the seed
  20. for(int i = 0; i < 256; i++)
  21. {
  22. key[i] = (uint8_t)i;
  23. hash(key,i,256-i,&hashes[i*hashbytes]);
  24. }
  25. // Then hash the result array
  26. hash(hashes,hashbytes*256,0,final);
  27. // The first four bytes of that hash, interpreted as a little-endian integer, is our
  28. // verification value
  29. uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
  30. delete [] key;
  31. delete [] hashes;
  32. delete [] final;
  33. //----------
  34. if(expected != verification)
  35. {
  36. if(verbose) printf("Verification value 0x%08X : FAIL! (Expected 0x%08x)\n",verification,expected);
  37. return false;
  38. }
  39. else
  40. {
  41. if(verbose) printf("Verification value 0x%08X : PASS\n",verification);
  42. return true;
  43. }
  44. }
  45. //----------------------------------------------------------------------------
  46. // Basic sanity checks -
  47. // A hash function should not be reading outside the bounds of the key.
  48. // Flipping a bit of a key should, with overwhelmingly high probability,
  49. // result in a different hash.
  50. // Hashing the same key twice should always produce the same result.
  51. // The memory alignment of the key should not affect the hash result.
  52. bool SanityTest ( pfHash hash, const int hashbits )
  53. {
  54. printf("Running sanity check 1 ");
  55. Rand r(883741);
  56. bool result = true;
  57. const int hashbytes = hashbits/8;
  58. const int reps = 10;
  59. const int keymax = 256;
  60. const int pad = 16;
  61. const int buflen = keymax + pad*3;
  62. uint8_t * buffer1 = new uint8_t[buflen];
  63. uint8_t * buffer2 = new uint8_t[buflen];
  64. uint8_t * hash1 = new uint8_t[hashbytes];
  65. uint8_t * hash2 = new uint8_t[hashbytes];
  66. //----------
  67. for(int irep = 0; irep < reps; irep++)
  68. {
  69. if(irep % (reps/10) == 0) printf(".");
  70. for(int len = 4; len <= keymax; len++)
  71. {
  72. for(int offset = pad; offset < pad*2; offset++)
  73. {
  74. uint8_t * key1 = &buffer1[pad];
  75. uint8_t * key2 = &buffer2[pad+offset];
  76. r.rand_p(buffer1,buflen);
  77. r.rand_p(buffer2,buflen);
  78. memcpy(key2,key1,len);
  79. hash(key1,len,0,hash1);
  80. for(int bit = 0; bit < (len * 8); bit++)
  81. {
  82. // Flip a bit, hash the key -> we should get a different result.
  83. flipbit(key2,len,bit);
  84. hash(key2,len,0,hash2);
  85. if(memcmp(hash1,hash2,hashbytes) == 0)
  86. {
  87. result = false;
  88. }
  89. // Flip it back, hash again -> we should get the original result.
  90. flipbit(key2,len,bit);
  91. hash(key2,len,0,hash2);
  92. if(memcmp(hash1,hash2,hashbytes) != 0)
  93. {
  94. result = false;
  95. }
  96. }
  97. }
  98. }
  99. }
  100. if(result == false)
  101. {
  102. printf("FAIL !!!!!\n");
  103. }
  104. else
  105. {
  106. printf("PASS\n");
  107. }
  108. delete [] buffer1;
  109. delete [] buffer2;
  110. delete [] hash1;
  111. delete [] hash2;
  112. return result;
  113. }
  114. //----------------------------------------------------------------------------
  115. // Appending zero bytes to a key should always cause it to produce a different
  116. // hash value
  117. void AppendedZeroesTest ( pfHash hash, const int hashbits )
  118. {
  119. printf("Running AppendedZeroesTest");
  120. Rand r(173994);
  121. const int hashbytes = hashbits/8;
  122. for(int rep = 0; rep < 100; rep++)
  123. {
  124. if(rep % 10 == 0) printf(".");
  125. unsigned char key[256];
  126. memset(key,0,sizeof(key));
  127. r.rand_p(key,32);
  128. uint32_t h1[16];
  129. uint32_t h2[16];
  130. memset(h1,0,hashbytes);
  131. memset(h2,0,hashbytes);
  132. for(int i = 0; i < 32; i++)
  133. {
  134. hash(key,32+i,0,h1);
  135. if(memcmp(h1,h2,hashbytes) == 0)
  136. {
  137. printf("FAIL !!!!!\n");
  138. return;
  139. }
  140. memcpy(h2,h1,hashbytes);
  141. }
  142. }
  143. printf("PASS\n");
  144. }
  145. //-----------------------------------------------------------------------------
  146. // Generate all keys of up to N bytes containing two non-zero bytes
  147. void TwoBytesKeygen ( int maxlen, KeyCallback & c )
  148. {
  149. //----------
  150. // Compute # of keys
  151. int keycount = 0;
  152. for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
  153. keycount *= 255*255;
  154. for(int i = 2; i <= maxlen; i++) keycount += i*255;
  155. printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
  156. c.reserve(keycount);
  157. //----------
  158. // Add all keys with one non-zero byte
  159. uint8_t key[256];
  160. memset(key,0,256);
  161. for(int keylen = 2; keylen <= maxlen; keylen++)
  162. for(int byteA = 0; byteA < keylen; byteA++)
  163. {
  164. for(int valA = 1; valA <= 255; valA++)
  165. {
  166. key[byteA] = (uint8_t)valA;
  167. c(key,keylen);
  168. }
  169. key[byteA] = 0;
  170. }
  171. //----------
  172. // Add all keys with two non-zero bytes
  173. for(int keylen = 2; keylen <= maxlen; keylen++)
  174. for(int byteA = 0; byteA < keylen-1; byteA++)
  175. for(int byteB = byteA+1; byteB < keylen; byteB++)
  176. {
  177. for(int valA = 1; valA <= 255; valA++)
  178. {
  179. key[byteA] = (uint8_t)valA;
  180. for(int valB = 1; valB <= 255; valB++)
  181. {
  182. key[byteB] = (uint8_t)valB;
  183. c(key,keylen);
  184. }
  185. key[byteB] = 0;
  186. }
  187. key[byteA] = 0;
  188. }
  189. }
  190. //-----------------------------------------------------------------------------
  191. template< typename hashtype >
  192. void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
  193. {
  194. typedef CollisionMap<hashtype,ByteVec> cmap_t;
  195. for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
  196. {
  197. const hashtype & hash = (*it).first;
  198. printf("Hash - ");
  199. printbytes(&hash,sizeof(hashtype));
  200. printf("\n");
  201. std::vector<ByteVec> & keys = (*it).second;
  202. for(int i = 0; i < (int)keys.size(); i++)
  203. {
  204. ByteVec & key = keys[i];
  205. printf("Key - ");
  206. printbytes(&key[0],(int)key.size());
  207. printf("\n");
  208. }
  209. printf("\n");
  210. }
  211. }
  212. // test code
  213. void ReportCollisions ( pfHash hash )
  214. {
  215. printf("Hashing keyset\n");
  216. std::vector<uint128_t> hashes;
  217. HashCallback<uint128_t> c(hash,hashes);
  218. TwoBytesKeygen(20,c);
  219. printf("%d hashes\n",(int)hashes.size());
  220. printf("Finding collisions\n");
  221. HashSet<uint128_t> collisions;
  222. FindCollisions(hashes,collisions,1000);
  223. printf("%d collisions\n",(int)collisions.size());
  224. printf("Mapping collisions\n");
  225. CollisionMap<uint128_t,ByteVec> cmap;
  226. CollisionCallback<uint128_t> c2(hash,collisions,cmap);
  227. TwoBytesKeygen(20,c2);
  228. printf("Dumping collisions\n");
  229. DumpCollisionMap(cmap);
  230. }