KeysetTest.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. //-----------------------------------------------------------------------------
  2. // Keyset tests generate various sorts of difficult-to-hash keysets and compare
  3. // the distribution and collision frequency of the hash results against an
  4. // ideal random distribution
  5. // The sanity checks are also in this cpp/h
  6. #pragma once
  7. #include "Types.h"
  8. #include "Stats.h"
  9. #include "Random.h" // for rand_p
  10. #include <algorithm> // for std::swap
  11. #include <assert.h>
  12. //-----------------------------------------------------------------------------
  13. // Sanity tests
  14. bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
  15. bool SanityTest ( pfHash hash, const int hashbits );
  16. void AppendedZeroesTest ( pfHash hash, const int hashbits );
  17. //-----------------------------------------------------------------------------
  18. // Keyset 'Combination' - all possible combinations of input blocks
  19. template< typename hashtype >
  20. void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
  21. uint32_t * blocks, int blockcount,
  22. pfHash hash, std::vector<hashtype> & hashes )
  23. {
  24. if(len == maxlen) return;
  25. for(int i = 0; i < blockcount; i++)
  26. {
  27. key[len] = blocks[i];
  28. //if(len == maxlen-1)
  29. {
  30. hashtype h;
  31. hash(key,(len+1) * sizeof(uint32_t),0,&h);
  32. hashes.push_back(h);
  33. }
  34. //else
  35. {
  36. CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
  37. }
  38. }
  39. }
  40. template< typename hashtype >
  41. bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
  42. {
  43. printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
  44. //----------
  45. std::vector<hashtype> hashes;
  46. uint32_t * key = new uint32_t[maxlen];
  47. CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
  48. delete [] key;
  49. printf("%d keys\n",(int)hashes.size());
  50. //----------
  51. bool result = true;
  52. result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
  53. printf("\n");
  54. return result;
  55. }
  56. //----------------------------------------------------------------------------
  57. // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
  58. // consisting of all possible permutations of those blocks
  59. template< typename hashtype >
  60. void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
  61. {
  62. if(k == blockcount-1)
  63. {
  64. hashtype h;
  65. hash(blocks,blockcount * sizeof(uint32_t),0,&h);
  66. hashes.push_back(h);
  67. return;
  68. }
  69. for(int i = k; i < blockcount; i++)
  70. {
  71. std::swap(blocks[k],blocks[i]);
  72. PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
  73. std::swap(blocks[k],blocks[i]);
  74. }
  75. }
  76. template< typename hashtype >
  77. bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
  78. {
  79. printf("Keyset 'Permutation' - %d blocks - ",blockcount);
  80. //----------
  81. std::vector<hashtype> hashes;
  82. PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
  83. printf("%d keys\n",(int)hashes.size());
  84. //----------
  85. bool result = true;
  86. result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
  87. printf("\n");
  88. return result;
  89. }
  90. //-----------------------------------------------------------------------------
  91. // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
  92. template < typename keytype, typename hashtype >
  93. void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
  94. {
  95. const int nbytes = sizeof(keytype);
  96. const int nbits = nbytes * 8;
  97. hashtype h;
  98. for(int i = start; i < nbits; i++)
  99. {
  100. flipbit(&k,nbytes,i);
  101. if(inclusive || (bitsleft == 1))
  102. {
  103. hash(&k,sizeof(keytype),0,&h);
  104. hashes.push_back(h);
  105. }
  106. if(bitsleft > 1)
  107. {
  108. SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
  109. }
  110. flipbit(&k,nbytes,i);
  111. }
  112. }
  113. //----------
  114. template < int keybits, typename hashtype >
  115. bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram )
  116. {
  117. printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
  118. typedef Blob<keybits> keytype;
  119. std::vector<hashtype> hashes;
  120. keytype k;
  121. memset(&k,0,sizeof(k));
  122. if(inclusive)
  123. {
  124. hashtype h;
  125. hash(&k,sizeof(keytype),0,&h);
  126. hashes.push_back(h);
  127. }
  128. SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
  129. printf("%d keys\n",(int)hashes.size());
  130. bool result = true;
  131. result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
  132. printf("\n");
  133. return result;
  134. }
  135. //-----------------------------------------------------------------------------
  136. // Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
  137. // all possible keys with bits set in that window
  138. template < typename keytype, typename hashtype >
  139. bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
  140. {
  141. const int keybits = sizeof(keytype) * 8;
  142. const int keycount = 1 << windowbits;
  143. std::vector<hashtype> hashes;
  144. hashes.resize(keycount);
  145. bool result = true;
  146. int testcount = keybits;
  147. printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
  148. for(int j = 0; j <= testcount; j++)
  149. {
  150. int minbit = j;
  151. keytype key;
  152. for(int i = 0; i < keycount; i++)
  153. {
  154. key = i;
  155. //key = key << minbit;
  156. lrot(&key,sizeof(keytype),minbit);
  157. hash(&key,sizeof(keytype),0,&hashes[i]);
  158. }
  159. printf("Window at %3d - ",j);
  160. result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
  161. //printf("\n");
  162. }
  163. return result;
  164. }
  165. //-----------------------------------------------------------------------------
  166. // Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
  167. // bytes.
  168. // (This keyset type is designed to make MurmurHash2 fail)
  169. template < typename hashtype >
  170. bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
  171. {
  172. printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
  173. Rand r(483723);
  174. std::vector<hashtype> hashes;
  175. hashes.resize(keycount);
  176. int keyLen = cycleLen * cycleReps;
  177. uint8_t * cycle = new uint8_t[cycleLen + 16];
  178. uint8_t * key = new uint8_t[keyLen];
  179. //----------
  180. for(int i = 0; i < keycount; i++)
  181. {
  182. r.rand_p(cycle,cycleLen);
  183. *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
  184. for(int j = 0; j < keyLen; j++)
  185. {
  186. key[j] = cycle[j % cycleLen];
  187. }
  188. hash(key,keyLen,0,&hashes[i]);
  189. }
  190. //----------
  191. bool result = true;
  192. result &= TestHashList(hashes,true,true,drawDiagram);
  193. printf("\n");
  194. delete [] cycle;
  195. delete [] key;
  196. return result;
  197. }
  198. //-----------------------------------------------------------------------------
  199. // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
  200. void TwoBytesKeygen ( int maxlen, KeyCallback & c );
  201. template < typename hashtype >
  202. bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
  203. {
  204. std::vector<hashtype> hashes;
  205. HashCallback<hashtype> c(hash,hashes);
  206. TwoBytesKeygen(maxlen,c);
  207. bool result = true;
  208. result &= TestHashList(hashes,true,true,drawDiagram);
  209. printf("\n");
  210. return result;
  211. }
  212. //-----------------------------------------------------------------------------
  213. // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
  214. // where "core" consists of all possible combinations of the given character
  215. // set of length N.
  216. template < typename hashtype >
  217. bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
  218. {
  219. const int prefixlen = (int)strlen(prefix);
  220. const int suffixlen = (int)strlen(suffix);
  221. const int corecount = (int)strlen(coreset);
  222. const int keybytes = prefixlen + corelen + suffixlen;
  223. const int keycount = (int)pow(double(corecount),double(corelen));
  224. printf("Keyset 'Text' - keys of form \"%s[",prefix);
  225. for(int i = 0; i < corelen; i++) printf("X");
  226. printf("]%s\" - %d keys\n",suffix,keycount);
  227. uint8_t * key = new uint8_t[keybytes+1];
  228. key[keybytes] = 0;
  229. memcpy(key,prefix,prefixlen);
  230. memcpy(key+prefixlen+corelen,suffix,suffixlen);
  231. //----------
  232. std::vector<hashtype> hashes;
  233. hashes.resize(keycount);
  234. for(int i = 0; i < keycount; i++)
  235. {
  236. int t = i;
  237. for(int j = 0; j < corelen; j++)
  238. {
  239. key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
  240. }
  241. hash(key,keybytes,0,&hashes[i]);
  242. }
  243. //----------
  244. bool result = true;
  245. result &= TestHashList(hashes,true,true,drawDiagram);
  246. printf("\n");
  247. delete [] key;
  248. return result;
  249. }
  250. //-----------------------------------------------------------------------------
  251. // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
  252. // We reuse one block of empty bytes, otherwise the RAM cost is enormous.
  253. template < typename hashtype >
  254. bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
  255. {
  256. int keycount = 64*1024;
  257. printf("Keyset 'Zeroes' - %d keys\n",keycount);
  258. unsigned char * nullblock = new unsigned char[keycount];
  259. memset(nullblock,0,keycount);
  260. //----------
  261. std::vector<hashtype> hashes;
  262. hashes.resize(keycount);
  263. for(int i = 0; i < keycount; i++)
  264. {
  265. hash(nullblock,i,0,&hashes[i]);
  266. }
  267. bool result = true;
  268. result &= TestHashList(hashes,true,true,drawDiagram);
  269. printf("\n");
  270. delete [] nullblock;
  271. return result;
  272. }
  273. //-----------------------------------------------------------------------------
  274. // Keyset 'Seed' - hash "the quick brown fox..." using different seeds
  275. template < typename hashtype >
  276. bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
  277. {
  278. printf("Keyset 'Seed' - %d keys\n",keycount);
  279. const char * text = "The quick brown fox jumps over the lazy dog";
  280. const int len = (int)strlen(text);
  281. //----------
  282. std::vector<hashtype> hashes;
  283. hashes.resize(keycount);
  284. for(int i = 0; i < keycount; i++)
  285. {
  286. hash(text,len,i,&hashes[i]);
  287. }
  288. bool result = true;
  289. result &= TestHashList(hashes,true,true,drawDiagram);
  290. printf("\n");
  291. return result;
  292. }
  293. //-----------------------------------------------------------------------------