Spooky.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // Spooky Hash
  2. // A 128-bit noncryptographic hash, for checksums and table lookup
  3. // By Bob Jenkins. Public domain.
  4. // Oct 31 2010: published framework, disclaimer ShortHash isn't right
  5. // Nov 7 2010: disabled ShortHash
  6. // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
  7. #include <memory.h>
  8. #include "Spooky.h"
  9. #define ALLOW_UNALIGNED_READS 1
  10. //
  11. // short hash ... it could be used on any message,
  12. // but it's used by Spooky just for short messages.
  13. //
  14. void SpookyHash::Short(
  15. const void *message,
  16. size_t length,
  17. uint64 *hash1,
  18. uint64 *hash2)
  19. {
  20. uint64 buf[sc_numVars];
  21. union
  22. {
  23. const uint8 *p8;
  24. uint32 *p32;
  25. uint64 *p64;
  26. size_t i;
  27. } u;
  28. u.p8 = (const uint8 *)message;
  29. if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
  30. {
  31. memcpy(buf, message, length);
  32. u.p64 = buf;
  33. }
  34. size_t remainder = length%32;
  35. uint64 a=*hash1;
  36. uint64 b=*hash2;
  37. uint64 c=sc_const;
  38. uint64 d=sc_const;
  39. if (length > 15)
  40. {
  41. const uint64 *end = u.p64 + (length/32)*4;
  42. // handle all complete sets of 32 bytes
  43. for (; u.p64 < end; u.p64 += 4)
  44. {
  45. c += u.p64[0];
  46. d += u.p64[1];
  47. ShortMix(a,b,c,d);
  48. a += u.p64[2];
  49. b += u.p64[3];
  50. }
  51. //Handle the case of 16+ remaining bytes.
  52. if (remainder >= 16)
  53. {
  54. c += u.p64[0];
  55. d += u.p64[1];
  56. ShortMix(a,b,c,d);
  57. u.p64 += 2;
  58. remainder -= 16;
  59. }
  60. }
  61. // Handle the last 0..15 bytes, and its length
  62. d = ((uint64)length) << 56;
  63. switch (remainder)
  64. {
  65. case 15:
  66. d += ((uint64)u.p8[14]) << 48;
  67. case 14:
  68. d += ((uint64)u.p8[13]) << 40;
  69. case 13:
  70. d += ((uint64)u.p8[12]) << 32;
  71. case 12:
  72. d += u.p32[2];
  73. c += u.p64[0];
  74. break;
  75. case 11:
  76. d += ((uint64)u.p8[10]) << 16;
  77. case 10:
  78. d += ((uint64)u.p8[9]) << 8;
  79. case 9:
  80. d += (uint64)u.p8[8];
  81. case 8:
  82. c += u.p64[0];
  83. break;
  84. case 7:
  85. c += ((uint64)u.p8[6]) << 48;
  86. case 6:
  87. c += ((uint64)u.p8[5]) << 40;
  88. case 5:
  89. c += ((uint64)u.p8[4]) << 32;
  90. case 4:
  91. c += u.p32[0];
  92. break;
  93. case 3:
  94. c += ((uint64)u.p8[2]) << 16;
  95. case 2:
  96. c += ((uint64)u.p8[1]) << 8;
  97. case 1:
  98. c += (uint64)u.p8[0];
  99. break;
  100. case 0:
  101. c += sc_const;
  102. d += sc_const;
  103. }
  104. ShortEnd(a,b,c,d);
  105. *hash1 = a;
  106. *hash2 = b;
  107. }
  108. // do the whole hash in one call
  109. void SpookyHash::Hash128(
  110. const void *message,
  111. size_t length,
  112. uint64 *hash1,
  113. uint64 *hash2)
  114. {
  115. if (length < sc_bufSize)
  116. {
  117. Short(message, length, hash1, hash2);
  118. return;
  119. }
  120. uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
  121. uint64 buf[sc_numVars];
  122. uint64 *end;
  123. union
  124. {
  125. const uint8 *p8;
  126. uint64 *p64;
  127. size_t i;
  128. } u;
  129. size_t remainder;
  130. h0=h3=h6=h9 = *hash1;
  131. h1=h4=h7=h10 = *hash2;
  132. h2=h5=h8=h11 = sc_const;
  133. u.p8 = (const uint8 *)message;
  134. end = u.p64 + (length/sc_blockSize)*sc_numVars;
  135. // handle all whole sc_blockSize blocks of bytes
  136. if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
  137. {
  138. while (u.p64 < end)
  139. {
  140. Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  141. u.p64 += sc_numVars;
  142. }
  143. }
  144. else
  145. {
  146. while (u.p64 < end)
  147. {
  148. memcpy(buf, u.p64, sc_blockSize);
  149. Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  150. u.p64 += sc_numVars;
  151. }
  152. }
  153. // handle the last partial block of sc_blockSize bytes
  154. remainder = (length - ((const uint8 *)end-(const uint8 *)message));
  155. memcpy(buf, end, remainder);
  156. memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder);
  157. ((uint8 *)buf)[sc_blockSize-1] = remainder;
  158. Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  159. // do some final mixing
  160. End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  161. *hash1 = h0;
  162. *hash2 = h1;
  163. }
  164. // init spooky state
  165. void SpookyHash::Init(uint64 seed1, uint64 seed2)
  166. {
  167. m_length = 0;
  168. m_remainder = 0;
  169. m_state[0] = seed1;
  170. m_state[1] = seed2;
  171. }
  172. // add a message fragment to the state
  173. void SpookyHash::Update(const void *message, size_t length)
  174. {
  175. uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
  176. size_t newLength = length + m_remainder;
  177. uint8 remainder;
  178. union
  179. {
  180. const uint8 *p8;
  181. uint64 *p64;
  182. size_t i;
  183. } u;
  184. const uint64 *end;
  185. // Is this message fragment too short? If it is, stuff it away.
  186. if (newLength < sc_bufSize)
  187. {
  188. memcpy(&((uint8 *)m_data)[m_remainder], message, length);
  189. m_length = length + m_length;
  190. m_remainder = (uint8)newLength;
  191. return;
  192. }
  193. // init the variables
  194. if (m_length < sc_bufSize)
  195. {
  196. h0=h3=h6=h9 = m_state[0];
  197. h1=h4=h7=h10 = m_state[1];
  198. h2=h5=h8=h11 = sc_const;
  199. }
  200. else
  201. {
  202. h0 = m_state[0];
  203. h1 = m_state[1];
  204. h2 = m_state[2];
  205. h3 = m_state[3];
  206. h4 = m_state[4];
  207. h5 = m_state[5];
  208. h6 = m_state[6];
  209. h7 = m_state[7];
  210. h8 = m_state[8];
  211. h9 = m_state[9];
  212. h10 = m_state[10];
  213. h11 = m_state[11];
  214. }
  215. m_length = length + m_length;
  216. // if we've got anything stuffed away, use it now
  217. if (m_remainder)
  218. {
  219. uint8 prefix = sc_bufSize-m_remainder;
  220. memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix);
  221. u.p64 = m_data;
  222. Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  223. Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  224. u.p8 = ((const uint8 *)message) + prefix;
  225. length -= prefix;
  226. }
  227. else
  228. {
  229. u.p8 = (const uint8 *)message;
  230. }
  231. // handle all whole blocks of sc_blockSize bytes
  232. end = u.p64 + (length/sc_blockSize)*sc_numVars;
  233. remainder = (uint8)(length-((const uint8 *)end-u.p8));
  234. if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
  235. {
  236. while (u.p64 < end)
  237. {
  238. Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  239. u.p64 += sc_numVars;
  240. }
  241. }
  242. else
  243. {
  244. while (u.p64 < end)
  245. {
  246. memcpy(m_data, u.p8, sc_blockSize);
  247. Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  248. u.p64 += sc_numVars;
  249. }
  250. }
  251. // stuff away the last few bytes
  252. m_remainder = remainder;
  253. memcpy(m_data, end, remainder);
  254. // stuff away the variables
  255. m_state[0] = h0;
  256. m_state[1] = h1;
  257. m_state[2] = h2;
  258. m_state[3] = h3;
  259. m_state[4] = h4;
  260. m_state[5] = h5;
  261. m_state[6] = h6;
  262. m_state[7] = h7;
  263. m_state[8] = h8;
  264. m_state[9] = h9;
  265. m_state[10] = h10;
  266. m_state[11] = h11;
  267. }
  268. // report the hash for the concatenation of all message fragments so far
  269. void SpookyHash::Final(uint64 *hash1, uint64 *hash2)
  270. {
  271. // init the variables
  272. if (m_length < sc_bufSize)
  273. {
  274. Short( m_data, m_length, hash1, hash2);
  275. return;
  276. }
  277. const uint64 *data = (const uint64 *)m_data;
  278. uint8 remainder = m_remainder;
  279. uint64 h0 = m_state[0];
  280. uint64 h1 = m_state[1];
  281. uint64 h2 = m_state[2];
  282. uint64 h3 = m_state[3];
  283. uint64 h4 = m_state[4];
  284. uint64 h5 = m_state[5];
  285. uint64 h6 = m_state[6];
  286. uint64 h7 = m_state[7];
  287. uint64 h8 = m_state[8];
  288. uint64 h9 = m_state[9];
  289. uint64 h10 = m_state[10];
  290. uint64 h11 = m_state[11];
  291. if (remainder >= sc_blockSize)
  292. {
  293. // m_data can contain two blocks; handle any whole first block
  294. Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  295. data += sc_numVars;
  296. remainder -= sc_blockSize;
  297. }
  298. // mix in the last partial block, and the length mod sc_blockSize
  299. memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder));
  300. ((uint8 *)data)[sc_blockSize-1] = remainder;
  301. Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  302. // do some final mixing
  303. End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
  304. *hash1 = h0;
  305. *hash2 = h1;
  306. }