hasshe2.c 4.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /* http://cessu.blogspot.com/2008/11/hashing-with-sse2-revisited-or-my-hash.html */
  2. /* Compile with gcc -O3 -msse2 ... */
  3. #include <stdlib.h>
  4. #include <stdint.h>
  5. #include <assert.h>
  6. #ifdef __SSE2__
  7. #include <xmmintrin.h>
  8. static uint32_t coeffs[12] __attribute__((aligned(16))) = {
  9. /* Four carefully selected coefficients and interleaving zeros. */
  10. 2561893793UL, 0, 1388747947UL, 0,
  11. 3077216833UL, 0, 3427609723UL, 0,
  12. /* 128 bits of random data. */
  13. 0x564A4447, 0xC7265595, 0xE20C241D, 0x128FA608,
  14. };
  15. #define COMBINE_AND_MIX(c_1, c_2, s_1, s_2, in) \
  16. /* Phase 1: Perform four 32x32->64 bit multiplication with the \
  17. input block and words 1 and 3 coeffs, respectively. This \
  18. effectively propagates a bit change in input to 32 more \
  19. significant bit positions. Combine into internal state by \
  20. subtracting the result of multiplications from the internal \
  21. state. */ \
  22. s_1 = _mm_sub_epi64(s_1, _mm_mul_epu32(c_1, _mm_unpackhi_epi32(in, in))); \
  23. s_2 = _mm_sub_epi64(s_2, _mm_mul_epu32(c_2, _mm_unpacklo_epi32(in, in))); \
  24. \
  25. /* Phase 2: Perform shifts and xors to propagate the 32-bit \
  26. changes produced above into 64-bit (and even a little larger) \
  27. changes in the internal state. */ \
  28. /* state ^= state >64> 29; */ \
  29. s_1 = _mm_xor_si128(s_1, _mm_srli_epi64(s_1, 29)); \
  30. s_2 = _mm_xor_si128(s_2, _mm_srli_epi64(s_2, 29)); \
  31. /* state +64= state <64< 16; */ \
  32. s_1 = _mm_add_epi64(s_1, _mm_slli_epi64(s_1, 16)); \
  33. s_2 = _mm_add_epi64(s_2, _mm_slli_epi64(s_2, 16)); \
  34. /* state ^= state >64> 21; */ \
  35. s_1 = _mm_xor_si128(s_1, _mm_srli_epi64(s_1, 21)); \
  36. s_2 = _mm_xor_si128(s_2, _mm_srli_epi64(s_2, 21)); \
  37. /* state +64= state <128< 32; */ \
  38. s_1 = _mm_add_epi64(s_1, _mm_slli_si128(s_1, 4)); \
  39. s_2 = _mm_add_epi64(s_2, _mm_slli_si128(s_2, 4)); \
  40. \
  41. /* Phase 3: Propagate the changes among the four 64-bit words by \
  42. performing 64-bit subtractions and 32-bit word shuffling. */ \
  43. s_1 = _mm_sub_epi64(s_1, s_2); \
  44. s_2 = _mm_sub_epi64(_mm_shuffle_epi32(s_2, _MM_SHUFFLE(0, 3, 2, 1)), s_1); \
  45. s_1 = _mm_sub_epi64(_mm_shuffle_epi32(s_1, _MM_SHUFFLE(0, 1, 3, 2)), s_2); \
  46. s_2 = _mm_sub_epi64(_mm_shuffle_epi32(s_2, _MM_SHUFFLE(2, 1, 0, 3)), s_1); \
  47. s_1 = _mm_sub_epi64(_mm_shuffle_epi32(s_1, _MM_SHUFFLE(2, 1, 0, 3)), s_2); \
  48. \
  49. /* With good coefficients any one-bit flip in the input has now \
  50. changed all bits in the internal state with a probability \
  51. between 45% to 55%. */
  52. void hasshe2(const void *input_buf, int n_bytes, uint32_t seed, void *output_state)
  53. {
  54. __m128i coeffs_1, coeffs_2, rnd_data, input, state_1, state_2;
  55. assert(n_bytes % 16 == 0);
  56. coeffs_1 = _mm_load_si128((void *) coeffs);
  57. coeffs_2 = _mm_load_si128((void *) (coeffs + 4));
  58. rnd_data = _mm_load_si128((void *) (coeffs + 8));
  59. /* Initialize internal state to something random. (Alternatively,
  60. if hashing a chain of data, read in the previous hash result from
  61. somewhere.) */
  62. state_1 = state_2 = rnd_data;
  63. while (n_bytes >= 16) {
  64. /* Read in 16 bytes, or 128 bits, from buf. Advance buf and
  65. decrement n_bytes accordingly. */
  66. input = _mm_loadu_si128((void *) input_buf);
  67. input_buf += 16;
  68. n_bytes -= 16;
  69. COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
  70. }
  71. /* Postprocessing. Copy half of the internal state into fake input,
  72. replace it with the constant rnd_data, and do one combine and mix
  73. phase more. */
  74. input = state_1;
  75. state_1 = rnd_data;
  76. COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
  77. _mm_storeu_si128((void *) output_state, state_1);
  78. _mm_storeu_si128((void *) (output_state + 16), state_2);
  79. }
  80. #endif