AvalancheTest.h 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. //-----------------------------------------------------------------------------
  2. // Flipping a single bit of a key should cause an "avalanche" of changes in
  3. // the hash function's output. Ideally, each output bits should flip 50% of
  4. // the time - if the probability of an output bit flipping is not 50%, that bit
  5. // is "biased". Too much bias means that patterns applied to the input will
  6. // cause "echoes" of the patterns in the output, which in turn can cause the
  7. // hash function to fail to create an even, random distribution of hash values.
  8. #pragma once
  9. #include "Types.h"
  10. #include "Random.h"
  11. #include <vector>
  12. #include <stdio.h>
  13. #include <math.h>
  14. // Avalanche fails if a bit is biased by more than 1%
  15. #define AVALANCHE_FAIL 0.01
  16. double maxBias ( std::vector<int> & counts, int reps );
  17. //-----------------------------------------------------------------------------
  18. template < typename keytype, typename hashtype >
  19. void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
  20. {
  21. const int keybytes = sizeof(keytype);
  22. const int hashbytes = sizeof(hashtype);
  23. const int keybits = keybytes * 8;
  24. const int hashbits = hashbytes * 8;
  25. keytype K;
  26. hashtype A,B;
  27. for(int irep = 0; irep < reps; irep++)
  28. {
  29. if(irep % (reps/10) == 0) printf(".");
  30. r.rand_p(&K,keybytes);
  31. hash(&K,keybytes,0,&A);
  32. int * cursor = &counts[0];
  33. for(int iBit = 0; iBit < keybits; iBit++)
  34. {
  35. flipbit(&K,keybytes,iBit);
  36. hash(&K,keybytes,0,&B);
  37. flipbit(&K,keybytes,iBit);
  38. for(int iOut = 0; iOut < hashbits; iOut++)
  39. {
  40. int bitA = getbit(&A,hashbytes,iOut);
  41. int bitB = getbit(&B,hashbytes,iOut);
  42. (*cursor++) += (bitA ^ bitB);
  43. }
  44. }
  45. }
  46. }
  47. //-----------------------------------------------------------------------------
  48. template < typename keytype, typename hashtype >
  49. bool AvalancheTest ( pfHash hash, const int reps )
  50. {
  51. Rand r(48273);
  52. const int keybytes = sizeof(keytype);
  53. const int hashbytes = sizeof(hashtype);
  54. const int keybits = keybytes * 8;
  55. const int hashbits = hashbytes * 8;
  56. printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
  57. //----------
  58. std::vector<int> bins(keybits*hashbits,0);
  59. calcBias<keytype,hashtype>(hash,bins,reps,r);
  60. //----------
  61. bool result = true;
  62. double b = maxBias(bins,reps);
  63. printf(" worst bias is %f%%",b * 100.0);
  64. if(b > AVALANCHE_FAIL)
  65. {
  66. printf(" !!!!! ");
  67. result = false;
  68. }
  69. printf("\n");
  70. return result;
  71. }
  72. //----------------------------------------------------------------------------
  73. // Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
  74. // not really all that useful.
  75. template< typename keytype, typename hashtype >
  76. void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
  77. {
  78. Rand r(11938);
  79. const int keybytes = sizeof(keytype);
  80. const int hashbytes = sizeof(hashtype);
  81. const int hashbits = hashbytes * 8;
  82. std::vector<int> bins(hashbits*hashbits*4,0);
  83. keytype key;
  84. hashtype h1,h2;
  85. for(int irep = 0; irep < reps; irep++)
  86. {
  87. if(verbose)
  88. {
  89. if(irep % (reps/10) == 0) printf(".");
  90. }
  91. r.rand_p(&key,keybytes);
  92. hash(&key,keybytes,0,&h1);
  93. flipbit(key,keybit);
  94. hash(&key,keybytes,0,&h2);
  95. hashtype d = h1 ^ h2;
  96. for(int out1 = 0; out1 < hashbits; out1++)
  97. for(int out2 = 0; out2 < hashbits; out2++)
  98. {
  99. if(out1 == out2) continue;
  100. uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
  101. bins[(out1 * hashbits + out2) * 4 + b]++;
  102. }
  103. }
  104. if(verbose) printf("\n");
  105. maxBias = 0;
  106. for(int out1 = 0; out1 < hashbits; out1++)
  107. {
  108. for(int out2 = 0; out2 < hashbits; out2++)
  109. {
  110. if(out1 == out2)
  111. {
  112. if(verbose) printf("\\");
  113. continue;
  114. }
  115. double bias = 0;
  116. for(int b = 0; b < 4; b++)
  117. {
  118. double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
  119. b2 = fabs(b2 * 2 - 1);
  120. if(b2 > bias) bias = b2;
  121. }
  122. if(bias > maxBias)
  123. {
  124. maxBias = bias;
  125. maxA = out1;
  126. maxB = out2;
  127. }
  128. if(verbose)
  129. {
  130. if (bias < 0.01) printf(".");
  131. else if(bias < 0.05) printf("o");
  132. else if(bias < 0.33) printf("O");
  133. else printf("X");
  134. }
  135. }
  136. if(verbose) printf("\n");
  137. }
  138. }
  139. //----------
  140. template< typename keytype, typename hashtype >
  141. bool BicTest ( pfHash hash, const int reps )
  142. {
  143. const int keybytes = sizeof(keytype);
  144. const int keybits = keybytes * 8;
  145. double maxBias = 0;
  146. int maxK = 0;
  147. int maxA = 0;
  148. int maxB = 0;
  149. for(int i = 0; i < keybits; i++)
  150. {
  151. if(i % (keybits/10) == 0) printf(".");
  152. double bias;
  153. int a,b;
  154. BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
  155. if(bias > maxBias)
  156. {
  157. maxBias = bias;
  158. maxK = i;
  159. maxA = a;
  160. maxB = b;
  161. }
  162. }
  163. printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
  164. // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
  165. bool result = (maxBias < 0.05);
  166. return result;
  167. }
  168. //-----------------------------------------------------------------------------
  169. // BIC test variant - store all intermediate data in a table, draw diagram
  170. // afterwards (much faster)
  171. template< typename keytype, typename hashtype >
  172. void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
  173. {
  174. const int keybytes = sizeof(keytype);
  175. const int keybits = keybytes * 8;
  176. const int hashbytes = sizeof(hashtype);
  177. const int hashbits = hashbytes * 8;
  178. const int pagesize = hashbits*hashbits*4;
  179. Rand r(11938);
  180. double maxBias = 0;
  181. int maxK = 0;
  182. int maxA = 0;
  183. int maxB = 0;
  184. keytype key;
  185. hashtype h1,h2;
  186. std::vector<int> bins(keybits*pagesize,0);
  187. for(int keybit = 0; keybit < keybits; keybit++)
  188. {
  189. if(keybit % (keybits/10) == 0) printf(".");
  190. int * page = &bins[keybit*pagesize];
  191. for(int irep = 0; irep < reps; irep++)
  192. {
  193. r.rand_p(&key,keybytes);
  194. hash(&key,keybytes,0,&h1);
  195. flipbit(key,keybit);
  196. hash(&key,keybytes,0,&h2);
  197. hashtype d = h1 ^ h2;
  198. for(int out1 = 0; out1 < hashbits-1; out1++)
  199. for(int out2 = out1+1; out2 < hashbits; out2++)
  200. {
  201. int * b = &page[(out1*hashbits+out2)*4];
  202. uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
  203. b[x]++;
  204. }
  205. }
  206. }
  207. printf("\n");
  208. for(int out1 = 0; out1 < hashbits-1; out1++)
  209. {
  210. for(int out2 = out1+1; out2 < hashbits; out2++)
  211. {
  212. if(verbose) printf("(%3d,%3d) - ",out1,out2);
  213. for(int keybit = 0; keybit < keybits; keybit++)
  214. {
  215. int * page = &bins[keybit*pagesize];
  216. int * bins = &page[(out1*hashbits+out2)*4];
  217. double bias = 0;
  218. for(int b = 0; b < 4; b++)
  219. {
  220. double b2 = double(bins[b]) / double(reps / 2);
  221. b2 = fabs(b2 * 2 - 1);
  222. if(b2 > bias) bias = b2;
  223. }
  224. if(bias > maxBias)
  225. {
  226. maxBias = bias;
  227. maxK = keybit;
  228. maxA = out1;
  229. maxB = out2;
  230. }
  231. if(verbose)
  232. {
  233. if (bias < 0.01) printf(".");
  234. else if(bias < 0.05) printf("o");
  235. else if(bias < 0.33) printf("O");
  236. else printf("X");
  237. }
  238. }
  239. // Finished keybit
  240. if(verbose) printf("\n");
  241. }
  242. if(verbose)
  243. {
  244. for(int i = 0; i < keybits+12; i++) printf("-");
  245. printf("\n");
  246. }
  247. }
  248. printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
  249. }
  250. //-----------------------------------------------------------------------------
  251. // BIC test variant - iterate over output bits, then key bits. No temp storage,
  252. // but slooooow
  253. template< typename keytype, typename hashtype >
  254. void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
  255. {
  256. const int keybytes = sizeof(keytype);
  257. const int keybits = keybytes * 8;
  258. const int hashbytes = sizeof(hashtype);
  259. const int hashbits = hashbytes * 8;
  260. Rand r(11938);
  261. double maxBias = 0;
  262. int maxK = 0;
  263. int maxA = 0;
  264. int maxB = 0;
  265. keytype key;
  266. hashtype h1,h2;
  267. for(int out1 = 0; out1 < hashbits-1; out1++)
  268. for(int out2 = out1+1; out2 < hashbits; out2++)
  269. {
  270. if(verbose) printf("(%3d,%3d) - ",out1,out2);
  271. for(int keybit = 0; keybit < keybits; keybit++)
  272. {
  273. int bins[4] = { 0, 0, 0, 0 };
  274. for(int irep = 0; irep < reps; irep++)
  275. {
  276. r.rand_p(&key,keybytes);
  277. hash(&key,keybytes,0,&h1);
  278. flipbit(key,keybit);
  279. hash(&key,keybytes,0,&h2);
  280. hashtype d = h1 ^ h2;
  281. uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
  282. bins[b]++;
  283. }
  284. double bias = 0;
  285. for(int b = 0; b < 4; b++)
  286. {
  287. double b2 = double(bins[b]) / double(reps / 2);
  288. b2 = fabs(b2 * 2 - 1);
  289. if(b2 > bias) bias = b2;
  290. }
  291. if(bias > maxBias)
  292. {
  293. maxBias = bias;
  294. maxK = keybit;
  295. maxA = out1;
  296. maxB = out2;
  297. }
  298. if(verbose)
  299. {
  300. if (bias < 0.05) printf(".");
  301. else if(bias < 0.10) printf("o");
  302. else if(bias < 0.50) printf("O");
  303. else printf("X");
  304. }
  305. }
  306. // Finished keybit
  307. if(verbose) printf("\n");
  308. }
  309. printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
  310. }
  311. //-----------------------------------------------------------------------------