Hashes.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. #include "Hashes.h"
  2. #include "Random.h"
  3. #include <stdlib.h>
  4. //#include <stdint.h>
  5. #include <assert.h>
  6. //#include <emmintrin.h>
  7. //#include <xmmintrin.h>
  8. // ----------------------------------------------------------------------------
  9. //jodyhash
  10. void
  11. jodyhash(const void *key, int len, uint32_t seed, void *out)
  12. {
  13. uint32_t h = seed;
  14. const uint8_t *data = (const uint8_t *)key;
  15. for (int i = 0; i < len; i++) {
  16. h ^= h >> 3;
  17. h ^= h << 5;
  18. h ^= data[i];
  19. }
  20. *(uint32_t *) out = h;
  21. }
  22. // ----------------------------------------------------------------------------
  23. //fake / bad hashes
  24. void
  25. BadHash(const void *key, int len, uint32_t seed, void *out)
  26. {
  27. uint32_t h = seed;
  28. const uint8_t *data = (const uint8_t *)key;
  29. for (int i = 0; i < len; i++) {
  30. h ^= h >> 3;
  31. h ^= h << 5;
  32. h ^= data[i];
  33. }
  34. *(uint32_t *) out = h;
  35. }
  36. void
  37. sumhash(const void *key, int len, uint32_t seed, void *out)
  38. {
  39. uint32_t h = seed;
  40. const uint8_t *data = (const uint8_t *)key;
  41. for (int i = 0; i < len; i++) {
  42. h += data[i];
  43. }
  44. *(uint32_t *) out = h;
  45. }
  46. void
  47. sumhash32(const void *key, int len, uint32_t seed, void *out)
  48. {
  49. uint32_t h = seed;
  50. const uint32_t *data = (const uint32_t *)key;
  51. for (int i = 0; i < len / 4; i++) {
  52. h += data[i];
  53. }
  54. *(uint32_t *) out = h;
  55. }
  56. void
  57. DoNothingHash(const void *, int, uint32_t, void *)
  58. {
  59. }
  60. void
  61. NoopOAATReadHash(const void *key, int len, uint32_t seed, void *out)
  62. {
  63. volatile uint8_t c;
  64. const uint8_t *ptr = (uint8_t *)key;
  65. for(int i=0; i < len; i++)
  66. {
  67. c=ptr[i];
  68. }
  69. }
  70. //-----------------------------------------------------------------------------
  71. //One - byte - at - a - time hash based on Murmur 's mix
  72. uint32_t MurmurOAAT(const void *key, int len, uint32_t seed)
  73. {
  74. const uint8_t *data = (const uint8_t *)key;
  75. uint32_t h = seed;
  76. for (int i = 0; i < len; i++) {
  77. h ^= data[i];
  78. h *= 0x5bd1e995;
  79. h ^= h >> 15;
  80. }
  81. return h;
  82. }
  83. void
  84. MurmurOAAT_test(const void *key, int len, uint32_t seed, void *out)
  85. {
  86. *(uint32_t *) out = MurmurOAAT(key, len, seed);
  87. }
  88. //----------------------------------------------------------------------------
  89. void
  90. FNV32a(const void *key, int len, uint32_t seed, void *out)
  91. {
  92. unsigned int h = seed;
  93. const uint8_t *data = (const uint8_t *)key;
  94. h ^= BIG_CONSTANT(2166136261);
  95. for (int i = 0; i < len; i++) {
  96. h ^= data[i];
  97. h *= 16777619;
  98. }
  99. *(uint32_t *) out = h;
  100. }
  101. void
  102. FNV32a_YoshimitsuTRIAD(const void *key, int len, uint32_t seed, void *out)
  103. {
  104. const uint8_t *p = (const uint8_t *)key;
  105. const uint32_t PRIME = 709607;
  106. uint32_t hash32A = seed ^ BIG_CONSTANT(2166136261);
  107. uint32_t hash32B = BIG_CONSTANT(2166136261) + len;
  108. uint32_t hash32C = BIG_CONSTANT(2166136261);
  109. for (; len >= 3 * 2 * sizeof(uint32_t); len -= 3 * 2 * sizeof(uint32_t), p += 3 * 2 * sizeof(uint32_t)) {
  110. hash32A = (hash32A ^ (ROTL32(*(uint32_t *) (p + 0), 5) ^ *(uint32_t *) (p + 4))) * PRIME;
  111. hash32B = (hash32B ^ (ROTL32(*(uint32_t *) (p + 8), 5) ^ *(uint32_t *) (p + 12))) * PRIME;
  112. hash32C = (hash32C ^ (ROTL32(*(uint32_t *) (p + 16), 5) ^ *(uint32_t *) (p + 20))) * PRIME;
  113. }
  114. if (p != key) {
  115. hash32A = (hash32A ^ ROTL32(hash32C, 5)) * PRIME;
  116. }
  117. //Cases 0. .31
  118. if (len & 4 * sizeof(uint32_t)) {
  119. hash32A = (hash32A ^ (ROTL32(*(uint32_t *) (p + 0), 5) ^ *(uint32_t *) (p + 4))) * PRIME;
  120. hash32B = (hash32B ^ (ROTL32(*(uint32_t *) (p + 8), 5) ^ *(uint32_t *) (p + 12))) * PRIME;
  121. p += 8 * sizeof(uint16_t);
  122. }
  123. //Cases 0. .15
  124. if (len & 2 * sizeof(uint32_t)) {
  125. hash32A = (hash32A ^ *(uint32_t *) (p + 0)) * PRIME;
  126. hash32B = (hash32B ^ *(uint32_t *) (p + 4)) * PRIME;
  127. p += 4 * sizeof(uint16_t);
  128. }
  129. //Cases:0. .7
  130. if (len & sizeof(uint32_t)) {
  131. hash32A = (hash32A ^ *(uint16_t *) (p + 0)) * PRIME;
  132. hash32B = (hash32B ^ *(uint16_t *) (p + 2)) * PRIME;
  133. p += 2 * sizeof(uint16_t);
  134. }
  135. //Cases:0. .3
  136. if (len & sizeof(uint16_t)) {
  137. hash32A = (hash32A ^ *(uint16_t *) p) * PRIME;
  138. p += sizeof(uint16_t);
  139. }
  140. if (len & 1)
  141. hash32A = (hash32A ^ *p) * PRIME;
  142. hash32A = (hash32A ^ ROTL32(hash32B, 5)) * PRIME;
  143. *(uint32_t *) out = hash32A ^ (hash32A >> 16);
  144. }
  145. void
  146. FNV64a(const void *key, int len, uint32_t seed, void *out)
  147. {
  148. uint64_t h = (uint64_t) seed;
  149. const uint8_t *data = (const uint8_t *)key;
  150. h ^= BIG_CONSTANT(0xcbf29ce484222325);
  151. for (int i = 0; i < len; i++) {
  152. h ^= data[i];
  153. h *= 0x100000001b3ULL;
  154. }
  155. *(uint64_t *) out = h;
  156. }
  157. //-----------------------------------------------------------------------------
  158. uint32_t x17(const void *key, int len, uint32_t h)
  159. {
  160. const uint8_t *data = (const uint8_t *)key;
  161. for (int i = 0; i < len; ++i) {
  162. h = 17 * h + (data[i] - ' ');
  163. }
  164. return h ^ (h >> 16);
  165. }
  166. void
  167. x17_test(const void *key, int len, uint32_t seed, void *out)
  168. {
  169. *(uint32_t *) out = x17(key, len, seed);
  170. }
  171. //-----------------------------------------------------------------------------
  172. //also used in perl5 as djb2
  173. void
  174. Bernstein(const void *key, int len, uint32_t seed, void *out)
  175. {
  176. const uint8_t *data = (const uint8_t *)key;
  177. for (int i = 0; i < len; ++i) {
  178. //seed = ((seed << 5) + seed) + data[i];
  179. seed = 33 * seed + data[i];
  180. }
  181. *(uint32_t *) out = seed;
  182. }
  183. //as used in perl5
  184. void
  185. sdbm(const void *key, int len, uint32_t hash, void *out)
  186. {
  187. unsigned char *str = (unsigned char *)key;
  188. const unsigned char *const end = (const unsigned char *)str + len;
  189. //note that perl5 adds the seed to the end of key, which looks like cargo cult
  190. while (str < end) {
  191. hash = (hash << 6) + (hash << 16) - hash + *str++;
  192. }
  193. *(uint32_t *) out = hash;
  194. }
  195. //as used in perl5 as one_at_a_time_hard
  196. void
  197. JenkinsOOAT(const void *key, int len, uint32_t hash, void *out)
  198. {
  199. unsigned char *str = (unsigned char *)key;
  200. const unsigned char *const end = (const unsigned char *)str + len;
  201. uint64_t s = (uint64_t) hash;
  202. unsigned char *seed = (unsigned char *)&s;
  203. //unsigned char seed[8];
  204. //note that perl5 adds the seed to the end of key, which looks like cargo cult
  205. while (str < end) {
  206. hash += (hash << 10);
  207. hash ^= (hash >> 6);
  208. hash += *str++;
  209. }
  210. hash += (hash << 10);
  211. hash ^= (hash >> 6);
  212. hash += seed[4];
  213. hash += (hash << 10);
  214. hash ^= (hash >> 6);
  215. hash += seed[5];
  216. hash += (hash << 10);
  217. hash ^= (hash >> 6);
  218. hash += seed[6];
  219. hash += (hash << 10);
  220. hash ^= (hash >> 6);
  221. hash += seed[7];
  222. hash += (hash << 10);
  223. hash ^= (hash >> 6);
  224. hash += (hash << 3);
  225. hash ^= (hash >> 11);
  226. hash = hash + (hash << 15);
  227. *(uint32_t *) out = hash;
  228. }
  229. //as used in perl5 until 5.17(one_at_a_time_old)
  230. void JenkinsOOAT_perl(const void *key, int len, uint32_t hash, void *out)
  231. {
  232. unsigned char *str = (unsigned char *)key;
  233. const unsigned char *const end = (const unsigned char *)str + len;
  234. while (str < end) {
  235. hash += *str++;
  236. hash += (hash << 10);
  237. hash ^= (hash >> 6);
  238. }
  239. hash += (hash << 3);
  240. hash ^= (hash >> 11);
  241. hash = hash + (hash << 15);
  242. *(uint32_t *) out = hash;
  243. }
  244. //------------------------------------------------
  245. // One of a smallest non-multiplicative One-At-a-Time function
  246. // that passes whole SMHasher.
  247. // Author: Sokolov Yura aka funny-falcon <funny.falcon@gmail.com>
  248. void GoodOAAT(const void *key, int len, uint32_t seed, void *out) {
  249. #define grol(x,n) (((x)<<(n))|((x)>>(32-(n))))
  250. #define gror(x,n) (((x)>>(n))|((x)<<(32-(n))))
  251. unsigned char *str = (unsigned char *)key;
  252. const unsigned char *const end = (const unsigned char *)str + len;
  253. uint32_t h1 = seed ^ 0x3b00;
  254. uint32_t h2 = grol(seed, 15);
  255. for (;str != end; str++) {
  256. h1 += str[0];
  257. h1 += h1 << 3; // h1 *= 9
  258. h2 += h1;
  259. // the rest could be as in MicroOAAT: h1 = grol(h1, 7)
  260. // but clang doesn't generate ROTL instruction then.
  261. h2 = grol(h2, 7);
  262. h2 += h2 << 2; // h2 *= 5
  263. }
  264. h1 ^= h2;
  265. /* now h1 passes all collision checks,
  266. * so it is suitable for hash-tables with prime numbers. */
  267. h1 += grol(h2, 14);
  268. h2 ^= h1; h2 += gror(h1, 6);
  269. h1 ^= h2; h1 += grol(h2, 5);
  270. h2 ^= h1; h2 += gror(h1, 8);
  271. *(uint32_t *) out = h2;
  272. #undef grol
  273. #undef gror
  274. }
  275. // MicroOAAT suitable for hash-tables using prime numbers.
  276. // It passes all collision checks.
  277. // Author: Sokolov Yura aka funny-falcon <funny.falcon@gmail.com>
  278. void MicroOAAT(const void *key, int len, uint32_t seed, void *out) {
  279. #define grol(x,n) (((x)<<(n))|((x)>>(32-(n))))
  280. #define gror(x,n) (((x)>>(n))|((x)<<(32-(n))))
  281. unsigned char *str = (unsigned char *)key;
  282. const unsigned char *const end = (const unsigned char *)str + len;
  283. uint32_t h1 = seed ^ 0x3b00;
  284. uint32_t h2 = grol(seed, 15);
  285. for (;str != end; str++) {
  286. h1 += str[0];
  287. h1 += h1 << 3; // h1 *= 9
  288. h2 -= h1;
  289. // unfortunately, clang produces bad code here,
  290. // cause it doesn't generate rotl instruction.
  291. h1 = grol(h1, 7);
  292. }
  293. *(uint32_t *) out = h1 ^ h2;
  294. #undef grol
  295. #undef gror
  296. }
  297. //-----------------------------------------------------------------------------
  298. //Crap8 hash from http://www.team5150.com / ~andrew / noncryptohashzoo / Crap8.html
  299. uint32_t Crap8(const uint8_t * key, uint32_t len, uint32_t seed)
  300. {
  301. #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
  302. #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
  303. const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
  304. uint32_t h = len + seed, k = n + len;
  305. uint64_t p;
  306. while (len >= 8) {
  307. c8mix(key4[0]) c8mix(key4[1]) key4 += 2;
  308. len -= 8;
  309. }
  310. if (len >= 4) {
  311. c8mix(key4[0]) key4 += 1;
  312. len -= 4;
  313. }
  314. if (len) {
  315. c8mix(key4[0] & ((1 << (len * 8)) - 1))
  316. }
  317. c8fold(h ^ k, n, k, k)
  318. return k;
  319. }
  320. void
  321. Crap8_test(const void *key, int len, uint32_t seed, void *out)
  322. {
  323. *(uint32_t *) out = Crap8((const uint8_t *)key, len, seed);
  324. }
  325. extern "C" {
  326. #ifdef __SSE2__
  327. void hasshe2 (const void *input, int len, uint32_t seed, void *out);
  328. #endif
  329. #if defined(__SSE4_2__) && defined(__x86_64__)
  330. uint32_t crc32c_hw(const void *input, int len, uint32_t seed);
  331. uint32_t crc32c(const void *input, int len, uint32_t seed);
  332. uint64_t crc64c_hw(const void *input, int len, uint32_t seed);
  333. #endif
  334. }
  335. #ifdef __SSE2__
  336. void
  337. hasshe2_test(const void *input, int len, uint32_t seed, void *out)
  338. {
  339. if (!len) {
  340. *(uint32_t *) out = 0;
  341. return;
  342. }
  343. if (len % 16) {
  344. //add pad NUL
  345. len += 16 - (len % 16);
  346. }
  347. hasshe2(input, len, seed, out);
  348. }
  349. #endif
  350. #if defined(__SSE4_2__) && (defined(__i686__) || defined(_M_IX86) || defined(__x86_64__))
  351. /* Compute CRC-32C using the Intel hardware instruction.
  352. TODO: arm8
  353. */
  354. void
  355. crc32c_hw_test(const void *input, int len, uint32_t seed, void *out)
  356. {
  357. if (!len) {
  358. *(uint32_t *) out = 0;
  359. return;
  360. }
  361. *(uint32_t *) out = crc32c_hw(input, len, seed);
  362. }
  363. /* Faster Adler SSE4.2 crc32 in HW */
  364. void
  365. crc32c_hw1_test(const void *input, int len, uint32_t seed, void *out)
  366. {
  367. if (!len) {
  368. *(uint32_t *) out = 0;
  369. return;
  370. }
  371. *(uint32_t *) out = crc32c(input, len, seed);
  372. }
  373. #if defined(__SSE4_2__) && defined(__x86_64__)
  374. /* Compute CRC-64C using the Intel hardware instruction. */
  375. void
  376. crc64c_hw_test(const void *input, int len, uint32_t seed, void *out)
  377. {
  378. if (!len) {
  379. *(uint64_t *) out = 0;
  380. return;
  381. }
  382. *(uint64_t *) out = crc64c_hw(input, len, seed);
  383. }
  384. #endif
  385. #endif
  386. /* Cloudflare optimized zlib crc32 with PCLMUL */
  387. #if 0
  388. void
  389. zlib_crc32_test(const void *input, int len, uint32_t seed, void *out)
  390. {
  391. if (!len) {
  392. *(uint32_t *) out = 0;
  393. return;
  394. }
  395. *(uint32_t *) out = crc32(seed, input, (unsigned)len);
  396. }
  397. #endif
  398. #if 0 && defined(__x86_64__) && (defined(__linux__) || defined(__APPLE__))
  399. extern "C" {
  400. uint64_t fhtw_test(const unsigned char key[16], const unsigned char *m, size_t len);
  401. int fhtw_hash(void* key, int key_len);
  402. }
  403. /* asm */
  404. inline void
  405. fhtw_test(const void *input, int len, uint32_t seed, void *out)
  406. {
  407. *(uint32_t *) out = fhtw_hash(input, len);
  408. }
  409. #endif
  410. #include "siphash.h"
  411. /* https://github.com/floodyberry/siphash */
  412. void
  413. siphash_test(const void *input, int len, uint32_t seed, void *out)
  414. {
  415. /* 128bit state, filled with a 32bit seed */
  416. unsigned char key[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  417. if (!len) {
  418. *(uint32_t *) out = 0;
  419. return;
  420. }
  421. memcpy(key, &seed, sizeof(seed));
  422. *(uint64_t *) out = siphash(key, (const unsigned char *)input, (size_t) len);
  423. }
  424. void
  425. siphash13_test(const void *input, int len, uint32_t seed, void *out)
  426. {
  427. unsigned char key[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  428. if (!len) {
  429. *(uint32_t *) out = 0;
  430. return;
  431. }
  432. memcpy(key, &seed, sizeof(seed));
  433. *(uint64_t *) out = siphash13(key, (const unsigned char *)input, (size_t) len);
  434. }
  435. void
  436. halfsiphash_test(const void *input, int len, uint32_t seed, void *out)
  437. {
  438. unsigned char key[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  439. if (!len) {
  440. *(uint32_t *) out = 0;
  441. return;
  442. }
  443. memcpy(key, &seed, sizeof(seed));
  444. *(uint32_t *) out = halfsiphash(key, (const unsigned char *)input, (size_t) len);
  445. }
  446. /* https://github.com/gamozolabs/falkhash */
  447. #if defined(__SSE4_2__) && defined(__x86_64__)
  448. extern "C" {
  449. uint64_t falkhash_test(uint8_t *data, uint64_t len, uint32_t seed, void *out);
  450. }
  451. void
  452. falkhash_test_cxx(const void *input, int len, uint32_t seed, void *out)
  453. {
  454. uint64_t hash[2] = {0ULL, 0ULL};
  455. if (!len) {
  456. *(uint32_t *) out = 0;
  457. return;
  458. }
  459. falkhash_test((uint8_t *)input, (uint64_t)len, seed, hash);
  460. *(uint64_t *) out = hash[0];
  461. }
  462. #endif