farmhash-c.c 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. // Copyright (c) 2014 Google, Inc.
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to deal
  5. // in the Software without restriction, including without limitation the rights
  6. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. // copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. // THE SOFTWARE.
  20. //
  21. // FarmHash, by Geoff Pike
  22. #include "farmhash-c.h"
  23. #include <string.h>
  24. #include <assert.h>
  25. // PLATFORM-SPECIFIC CONFIGURATION
  26. #if defined (__x86_64) || defined (__x86_64__)
  27. #define x86_64 1
  28. #else
  29. #define x86_64 0
  30. #endif
  31. #if defined(__i386__) || defined(__i386) || defined(__X86__)
  32. #define x86 1
  33. #else
  34. #define x86 x86_64
  35. #endif
  36. #if defined(__SSSE3__) || defined(__SSE4_1__) || defined(__SSE4_2__)
  37. #include <tmmintrin.h>
  38. #define CAN_USE_SSSE3 1 // Now we can use _mm_hsub_epi16 and so on.
  39. #else
  40. #define CAN_USE_SSSE3 0
  41. #endif
  42. #if defined(__SSE4_1__) || defined(__SSE4_2__)
  43. #include <immintrin.h>
  44. #define CAN_USE_SSE41 1 // Now we can use _mm_insert_epi64 and so on.
  45. #else
  46. #define CAN_USE_SSE41 0
  47. #endif
  48. #if defined(__SSE4_2__)
  49. #include <nmmintrin.h>
  50. #define CAN_USE_SSE42 1 // Now we can use _mm_crc32_u{32,16,8}. And on 64-bit platforms, _mm_crc32_u64.
  51. #else
  52. #define CAN_USE_SSE42 0
  53. #endif
  54. #if defined(__AES__)
  55. #include <wmmintrin.h>
  56. #define CAN_USE_AESNI 1 // Now we can use _mm_aesimc_si128 and so on.
  57. #else
  58. #define CAN_USE_AESNI 0
  59. #endif
  60. #if defined(__AVX__)
  61. #include <immintrin.h>
  62. #define CAN_USE_AVX 1
  63. #else
  64. #define CAN_USE_AVX 0
  65. #endif
  66. #ifdef __GNUC__
  67. #define likely(x) (__builtin_expect(!!(x), 1))
  68. #else
  69. #define likely(x) (x)
  70. #endif
  71. #ifdef LITTLE_ENDIAN
  72. #define uint32_t_in_expected_order(x) (x)
  73. #define uint64_t_in_expected_order(x) (x)
  74. #else
  75. #define uint32_t_in_expected_order(x) (bswap32(x))
  76. #define uint64_t_in_expected_order(x) (bswap64(x))
  77. #endif
  78. #define PERMUTE3(a, b, c) \
  79. do { \
  80. swap32(a, b); \
  81. swap32(a, c); \
  82. } while (0)
  83. static inline uint32_t bswap32(const uint32_t x) {
  84. uint32_t y = x;
  85. size_t i;
  86. for (i = 0; i < sizeof(uint32_t) >> 1; i++) {
  87. uint32_t d = sizeof(uint32_t) - i - 1;
  88. uint32_t mh = ((uint32_t)0xff) << (d << 3);
  89. uint32_t ml = ((uint32_t)0xff) << (i << 3);
  90. uint32_t h = x & mh;
  91. uint32_t l = x & ml;
  92. uint64_t t = (l << ((d - i) << 3)) | (h >> ((d - i) << 3));
  93. y = t | (y & ~(mh | ml));
  94. }
  95. return y;
  96. }
  97. static inline uint64_t bswap64(const uint64_t x) {
  98. uint64_t y = x;
  99. size_t i;
  100. for (i = 0; i < sizeof(uint64_t) >> 1; i++) {
  101. uint64_t d = sizeof(uint64_t) - i - 1;
  102. uint64_t mh = ((uint64_t)0xff) << (d << 3);
  103. uint64_t ml = ((uint64_t)0xff) << (i << 3);
  104. uint64_t h = x & mh;
  105. uint64_t l = x & ml;
  106. uint64_t t = (l << ((d - i) << 3)) | (h >> ((d - i) << 3));
  107. y = t | (y & ~(mh | ml));
  108. }
  109. return y;
  110. }
  111. static inline uint64_t fetch64(const char* p) {
  112. uint64_t result;
  113. memcpy(&result, p, sizeof(result));
  114. return uint64_t_in_expected_order(result);
  115. }
  116. static inline uint32_t fetch32(const char* p) {
  117. uint32_t result;
  118. memcpy(&result, p, sizeof(result));
  119. return uint32_t_in_expected_order(result);
  120. }
  121. #if CAN_USE_SSSE3 || CAN_USE_SSE41 || CAN_USE_SSE42 || CAN_USE_AESNI || CAN_USE_AVX
  122. static inline __m128i fetch128(const char* s) {
  123. return _mm_loadu_si128((const __m128i*) s);
  124. }
  125. #endif
  126. static inline void swap32(uint32_t* a, uint32_t* b) {
  127. uint32_t t;
  128. t = *a;
  129. *a = *b;
  130. *b = t;
  131. }
  132. static inline void swap64(uint64_t* a, uint64_t* b) {
  133. uint64_t t;
  134. t = *a;
  135. *a = *b;
  136. *b = t;
  137. }
  138. #if CAN_USE_SSSE3 || CAN_USE_SSE41 || CAN_USE_SSE42 || CAN_USE_AESNI || CAN_USE_AVX
  139. static inline void swap128(__m128i* a, __m128i* b) {
  140. __m128i t;
  141. t = *a;
  142. *a = *b;
  143. *b = t;
  144. }
  145. #endif
  146. static inline uint32_t ror32(uint32_t val, size_t shift) {
  147. // Avoid shifting by 32: doing so yields an undefined result.
  148. return shift == 0 ? val : (val >> shift) | (val << (32 - shift));
  149. }
  150. static inline uint64_t ror64(uint64_t val, size_t shift) {
  151. // Avoid shifting by 64: doing so yields an undefined result.
  152. return shift == 0 ? val : (val >> shift) | (val << (64 - shift));
  153. }
  154. // Helpers for data-parallel operations (1x 128 bits or 2x64 or 4x32 or 8x16).
  155. #if CAN_USE_SSSE3 || CAN_USE_SSE41 || CAN_USE_SSE42 || CAN_USE_AESNI || CAN_USE_AVX
  156. static inline __m128i add64x2(__m128i x, __m128i y) { return _mm_add_epi64(x, y); }
  157. static inline __m128i add32x4(__m128i x, __m128i y) { return _mm_add_epi32(x, y); }
  158. static inline __m128i xor128(__m128i x, __m128i y) { return _mm_xor_si128(x, y); }
  159. static inline __m128i or128(__m128i x, __m128i y) { return _mm_or_si128(x, y); }
  160. static inline __m128i mul32x4_5(__m128i x) { return add32x4(x, _mm_slli_epi32(x, 2)); }
  161. static inline __m128i rol32x4(__m128i x, int c) {
  162. return or128(_mm_slli_epi32(x, c),
  163. _mm_srli_epi32(x, 32 - c));
  164. }
  165. static inline __m128i rol32x4_17(__m128i x) { return rol32x4(x, 17); }
  166. static inline __m128i rol32x4_19(__m128i x) { return rol32x4(x, 19); }
  167. static inline __m128i shuf32x4_0_3_2_1(__m128i x) {
  168. return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0));
  169. }
  170. #endif
  171. #if CAN_USE_SSSE3
  172. static inline __m128i shuf8x16(__m128i x, __m128i y) { return _mm_shuffle_epi8(y, x); }
  173. #endif
  174. #if CAN_USE_SSE41
  175. static inline __m128i mul32x4(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); }
  176. static inline __m128i murk(__m128i a, __m128i b, __m128i c, __m128i d, __m128i e) {
  177. return add32x4(e,
  178. mul32x4_5(
  179. rol32x4_19(
  180. xor128(
  181. mul32x4(d,
  182. rol32x4_17(
  183. mul32x4(c, a))),
  184. (b)))));
  185. }
  186. #endif
  187. // Building blocks for hash functions
  188. // Some primes between 2^63 and 2^64 for various uses.
  189. static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
  190. static const uint64_t k1 = 0xb492b66fbe98f273ULL;
  191. static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
  192. // Magic numbers for 32-bit hashing. Copied from Murmur3.
  193. static const uint32_t c1 = 0xcc9e2d51;
  194. static const uint32_t c2 = 0x1b873593;
  195. // A 32-bit to 32-bit integer hash copied from Murmur3.
  196. static inline uint32_t fmix(uint32_t h) {
  197. h ^= h >> 16;
  198. h *= 0x85ebca6b;
  199. h ^= h >> 13;
  200. h *= 0xc2b2ae35;
  201. h ^= h >> 16;
  202. return h;
  203. }
  204. static inline uint64_t smix(uint64_t val) {
  205. return val ^ (val >> 47);
  206. }
  207. static inline uint32_t mur(uint32_t a, uint32_t h) {
  208. // Helper from Murmur3 for combining two 32-bit values.
  209. a *= c1;
  210. a = ror32(a, 17);
  211. a *= c2;
  212. h ^= a;
  213. h = ror32(h, 19);
  214. return h * 5 + 0xe6546b64;
  215. }
  216. static inline uint32_t debug_tweak32(uint32_t x) {
  217. #ifndef NDEBUG
  218. x = ~bswap32(x * c1);
  219. #endif
  220. return x;
  221. }
  222. static inline uint64_t debug_tweak64(uint64_t x) {
  223. #ifndef NDEBUG
  224. x = ~bswap64(x * k1);
  225. #endif
  226. return x;
  227. }
  228. uint128_c_t debug_tweak128(uint128_c_t x) {
  229. #ifndef NDEBUG
  230. uint64_t y = debug_tweak64(uint128_c_t_low64(x));
  231. uint64_t z = debug_tweak64(uint128_c_t_high64(x));
  232. y += z;
  233. z += y;
  234. x = make_uint128_c_t(y, z * k1);
  235. #endif
  236. return x;
  237. }
  238. static inline uint64_t farmhash_len_16(uint64_t u, uint64_t v) {
  239. return farmhash128_to_64(make_uint128_c_t(u, v));
  240. }
  241. static inline uint64_t farmhash_len_16_mul(uint64_t u, uint64_t v, uint64_t mul) {
  242. // Murmur-inspired hashing.
  243. uint64_t a = (u ^ v) * mul;
  244. a ^= (a >> 47);
  245. uint64_t b = (v ^ a) * mul;
  246. b ^= (b >> 47);
  247. b *= mul;
  248. return b;
  249. }
  250. // farmhash na
  251. static inline uint64_t farmhash_na_len_0_to_16(const char *s, size_t len) {
  252. if (len >= 8) {
  253. uint64_t mul = k2 + len * 2;
  254. uint64_t a = fetch64(s) + k2;
  255. uint64_t b = fetch64(s + len - 8);
  256. uint64_t c = ror64(b, 37) * mul + a;
  257. uint64_t d = (ror64(a, 25) + b) * mul;
  258. return farmhash_len_16_mul(c, d, mul);
  259. }
  260. if (len >= 4) {
  261. uint64_t mul = k2 + len * 2;
  262. uint64_t a = fetch32(s);
  263. return farmhash_len_16_mul(len + (a << 3), fetch32(s + len - 4), mul);
  264. }
  265. if (len > 0) {
  266. uint8_t a = s[0];
  267. uint8_t b = s[len >> 1];
  268. uint8_t c = s[len - 1];
  269. uint32_t y = (uint32_t) a + ((uint32_t) b << 8);
  270. uint32_t z = len + ((uint32_t) c << 2);
  271. return smix(y * k2 ^ z * k0) * k2;
  272. }
  273. return k2;
  274. }
  275. // This probably works well for 16-byte strings as well, but it may be overkill
  276. // in that case.
  277. static inline uint64_t farmhash_na_len_17_to_32(const char *s, size_t len) {
  278. uint64_t mul = k2 + len * 2;
  279. uint64_t a = fetch64(s) * k1;
  280. uint64_t b = fetch64(s + 8);
  281. uint64_t c = fetch64(s + len - 8) * mul;
  282. uint64_t d = fetch64(s + len - 16) * k2;
  283. return farmhash_len_16_mul(ror64(a + b, 43) + ror64(c, 30) + d,
  284. a + ror64(b + k2, 18) + c, mul);
  285. }
  286. // Return a 16-byte hash for 48 bytes. Quick and dirty.
  287. // Callers do best to use "random-looking" values for a and b.
  288. static inline uint128_c_t weak_farmhash_na_len_32_with_seeds_vals(
  289. uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
  290. a += w;
  291. b = ror64(b + a + z, 21);
  292. uint64_t c = a;
  293. a += x;
  294. a += y;
  295. b += ror64(a, 44);
  296. return make_uint128_c_t(a + z, b + c);
  297. }
  298. // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
  299. static inline uint128_c_t weak_farmhash_na_len_32_with_seeds(
  300. const char* s, uint64_t a, uint64_t b) {
  301. return weak_farmhash_na_len_32_with_seeds_vals(fetch64(s),
  302. fetch64(s + 8),
  303. fetch64(s + 16),
  304. fetch64(s + 24),
  305. a,
  306. b);
  307. }
  308. // Return an 8-byte hash for 33 to 64 bytes.
  309. static inline uint64_t farmhash_na_len_33_to_64(const char *s, size_t len) {
  310. uint64_t mul = k2 + len * 2;
  311. uint64_t a = fetch64(s) * k2;
  312. uint64_t b = fetch64(s + 8);
  313. uint64_t c = fetch64(s + len - 8) * mul;
  314. uint64_t d = fetch64(s + len - 16) * k2;
  315. uint64_t y = ror64(a + b, 43) + ror64(c, 30) + d;
  316. uint64_t z = farmhash_len_16_mul(y, a + ror64(b + k2, 18) + c, mul);
  317. uint64_t e = fetch64(s + 16) * mul;
  318. uint64_t f = fetch64(s + 24);
  319. uint64_t g = (y + fetch64(s + len - 32)) * mul;
  320. uint64_t h = (z + fetch64(s + len - 24)) * mul;
  321. return farmhash_len_16_mul(ror64(e + f, 43) + ror64(g, 30) + h,
  322. e + ror64(f + a, 18) + g, mul);
  323. }
  324. uint64_t farmhash64_na(const char *s, size_t len) {
  325. const uint64_t seed = 81;
  326. if (len <= 32) {
  327. if (len <= 16) {
  328. return farmhash_na_len_0_to_16(s, len);
  329. } else {
  330. return farmhash_na_len_17_to_32(s, len);
  331. }
  332. } else if (len <= 64) {
  333. return farmhash_na_len_33_to_64(s, len);
  334. }
  335. // For strings over 64 bytes we loop. Internal state consists of
  336. // 56 bytes: v, w, x, y, and z.
  337. uint64_t x = seed;
  338. uint64_t y = seed * k1 + 113;
  339. uint64_t z = smix(y * k2 + 113) * k2;
  340. uint128_c_t v = make_uint128_c_t(0, 0);
  341. uint128_c_t w = make_uint128_c_t(0, 0);
  342. x = x * k2 + fetch64(s);
  343. // Set end so that after the loop we have 1 to 64 bytes left to process.
  344. const char* end = s + ((len - 1) / 64) * 64;
  345. const char* last64 = end + ((len - 1) & 63) - 63;
  346. assert(s + len - 64 == last64);
  347. do {
  348. x = ror64(x + y + v.a + fetch64(s + 8), 37) * k1;
  349. y = ror64(y + v.b + fetch64(s + 48), 42) * k1;
  350. x ^= w.b;
  351. y += v.a + fetch64(s + 40);
  352. z = ror64(z + w.a, 33) * k1;
  353. v = weak_farmhash_na_len_32_with_seeds(s, v.b * k1, x + w.a);
  354. w = weak_farmhash_na_len_32_with_seeds(s + 32, z + w.b, y + fetch64(s + 16));
  355. swap64(&z, &x);
  356. s += 64;
  357. } while (s != end);
  358. uint64_t mul = k1 + ((z & 0xff) << 1);
  359. // Make s point to the last 64 bytes of input.
  360. s = last64;
  361. w.a += ((len - 1) & 63);
  362. v.a += w.a;
  363. w.a += v.a;
  364. x = ror64(x + y + v.a + fetch64(s + 8), 37) * mul;
  365. y = ror64(y + v.b + fetch64(s + 48), 42) * mul;
  366. x ^= w.b * 9;
  367. y += v.a * 9 + fetch64(s + 40);
  368. z = ror64(z + w.a, 33) * mul;
  369. v = weak_farmhash_na_len_32_with_seeds(s, v.b * mul, x + w.a);
  370. w = weak_farmhash_na_len_32_with_seeds(s + 32, z + w.b, y + fetch64(s + 16));
  371. swap64(&z, &x);
  372. return farmhash_len_16_mul(farmhash_len_16_mul(v.a, w.a, mul) + smix(y) * k0 + z,
  373. farmhash_len_16_mul(v.b, w.b, mul) + x,
  374. mul);
  375. }
  376. uint64_t farmhash64_na_with_seeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) {
  377. return farmhash_len_16(farmhash64_na(s, len) - seed0, seed1);
  378. }
  379. uint64_t farmhash64_na_with_seed(const char *s, size_t len, uint64_t seed) {
  380. return farmhash64_na_with_seeds(s, len, k2, seed);
  381. }
  382. // farmhash uo
  383. static inline uint64_t farmhash_uo_h(uint64_t x, uint64_t y, uint64_t mul, int r) {
  384. uint64_t a = (x ^ y) * mul;
  385. a ^= (a >> 47);
  386. uint64_t b = (y ^ a) * mul;
  387. return ror64(b, r) * mul;
  388. }
  389. uint64_t farmhash64_uo_with_seeds(const char *s, size_t len,
  390. uint64_t seed0, uint64_t seed1) {
  391. if (len <= 64) {
  392. return farmhash64_na_with_seeds(s, len, seed0, seed1);
  393. }
  394. // For strings over 64 bytes we loop. Internal state consists of
  395. // 64 bytes: u, v, w, x, y, and z.
  396. uint64_t x = seed0;
  397. uint64_t y = seed1 * k2 + 113;
  398. uint64_t z = smix(y * k2) * k2;
  399. uint128_c_t v = make_uint128_c_t(seed0, seed1);
  400. uint128_c_t w = make_uint128_c_t(0, 0);
  401. uint64_t u = x - z;
  402. x *= k2;
  403. uint64_t mul = k2 + (u & 0x82);
  404. // Set end so that after the loop we have 1 to 64 bytes left to process.
  405. const char* end = s + ((len - 1) / 64) * 64;
  406. const char* last64 = end + ((len - 1) & 63) - 63;
  407. assert(s + len - 64 == last64);
  408. do {
  409. uint64_t a0 = fetch64(s);
  410. uint64_t a1 = fetch64(s + 8);
  411. uint64_t a2 = fetch64(s + 16);
  412. uint64_t a3 = fetch64(s + 24);
  413. uint64_t a4 = fetch64(s + 32);
  414. uint64_t a5 = fetch64(s + 40);
  415. uint64_t a6 = fetch64(s + 48);
  416. uint64_t a7 = fetch64(s + 56);
  417. x += a0 + a1;
  418. y += a2;
  419. z += a3;
  420. v.a += a4;
  421. v.b += a5 + a1;
  422. w.a += a6;
  423. w.b += a7;
  424. x = ror64(x, 26);
  425. x *= 9;
  426. y = ror64(y, 29);
  427. z *= mul;
  428. v.a = ror64(v.a, 33);
  429. v.b = ror64(v.b, 30);
  430. w.a ^= x;
  431. w.a *= 9;
  432. z = ror64(z, 32);
  433. z += w.b;
  434. w.b += z;
  435. z *= 9;
  436. swap64(&u, &y);
  437. z += a0 + a6;
  438. v.a += a2;
  439. v.b += a3;
  440. w.a += a4;
  441. w.b += a5 + a6;
  442. x += a1;
  443. y += a7;
  444. y += v.a;
  445. v.a += x - y;
  446. v.b += w.a;
  447. w.a += v.b;
  448. w.b += x - y;
  449. x += w.b;
  450. w.b = ror64(w.b, 34);
  451. swap64(&u, &z);
  452. s += 64;
  453. } while (s != end);
  454. // Make s point to the last 64 bytes of input.
  455. s = last64;
  456. u *= 9;
  457. v.b = ror64(v.b, 28);
  458. v.a = ror64(v.a, 20);
  459. w.a += ((len - 1) & 63);
  460. u += y;
  461. y += u;
  462. x = ror64(y - x + v.a + fetch64(s + 8), 37) * mul;
  463. y = ror64(y ^ v.b ^ fetch64(s + 48), 42) * mul;
  464. x ^= w.b * 9;
  465. y += v.a + fetch64(s + 40);
  466. z = ror64(z + w.a, 33) * mul;
  467. v = weak_farmhash_na_len_32_with_seeds(s, v.b * mul, x + w.a);
  468. w = weak_farmhash_na_len_32_with_seeds(s + 32, z + w.b, y + fetch64(s + 16));
  469. return farmhash_uo_h(farmhash_len_16_mul(v.a + x, w.a ^ y, mul) + z - u,
  470. farmhash_uo_h(v.b + y, w.b + z, k2, 30) ^ x,
  471. k2,
  472. 31);
  473. }
  474. uint64_t farmhash64_uo_with_seed(const char *s, size_t len, uint64_t seed) {
  475. return len <= 64 ? farmhash64_na_with_seed(s, len, seed) :
  476. farmhash64_uo_with_seeds(s, len, 0, seed);
  477. }
  478. uint64_t farmhash64_uo(const char *s, size_t len) {
  479. return len <= 64 ? farmhash64_na(s, len) :
  480. farmhash64_uo_with_seeds(s, len, 81, 0);
  481. }
  482. // farmhash xo
  483. static inline uint64_t farmhash_xo_h32(const char *s, size_t len, uint64_t mul,
  484. uint64_t seed0, uint64_t seed1) {
  485. uint64_t a = fetch64(s) * k1;
  486. uint64_t b = fetch64(s + 8);
  487. uint64_t c = fetch64(s + len - 8) * mul;
  488. uint64_t d = fetch64(s + len - 16) * k2;
  489. uint64_t u = ror64(a + b, 43) + ror64(c, 30) + d + seed0;
  490. uint64_t v = a + ror64(b + k2, 18) + c + seed1;
  491. a = smix((u ^ v) * mul);
  492. b = smix((v ^ a) * mul);
  493. return b;
  494. }
  495. // Return an 8-byte hash for 33 to 64 bytes.
  496. static inline uint64_t farmhash_xo_len_33_to_64(const char *s, size_t len) {
  497. uint64_t mul0 = k2 - 30;
  498. uint64_t mul1 = k2 - 30 + 2 * len;
  499. uint64_t h0 = farmhash_xo_h32(s, 32, mul0, 0, 0);
  500. uint64_t h1 = farmhash_xo_h32(s + len - 32, 32, mul1, 0, 0);
  501. return ((h1 * mul1) + h0) * mul1;
  502. }
  503. // Return an 8-byte hash for 65 to 96 bytes.
  504. static inline uint64_t farmhash_xo_len_65_to_96(const char *s, size_t len) {
  505. uint64_t mul0 = k2 - 114;
  506. uint64_t mul1 = k2 - 114 + 2 * len;
  507. uint64_t h0 = farmhash_xo_h32(s, 32, mul0, 0, 0);
  508. uint64_t h1 = farmhash_xo_h32(s + 32, 32, mul1, 0, 0);
  509. uint64_t h2 = farmhash_xo_h32(s + len - 32, 32, mul1, h0, h1);
  510. return (h2 * 9 + (h0 >> 17) + (h1 >> 21)) * mul1;
  511. }
  512. uint64_t farmhash64_xo(const char *s, size_t len) {
  513. if (len <= 32) {
  514. if (len <= 16) {
  515. return farmhash_na_len_0_to_16(s, len);
  516. } else {
  517. return farmhash_na_len_17_to_32(s, len);
  518. }
  519. } else if (len <= 64) {
  520. return farmhash_xo_len_33_to_64(s, len);
  521. } else if (len <= 96) {
  522. return farmhash_xo_len_65_to_96(s, len);
  523. } else if (len <= 256) {
  524. return farmhash64_na(s, len);
  525. } else {
  526. return farmhash64_uo(s, len);
  527. }
  528. }
  529. uint64_t farmhash64_xo_with_seeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) {
  530. return farmhash64_uo_with_seeds(s, len, seed0, seed1);
  531. }
  532. uint64_t farmhash64_xo_with_seed(const char *s, size_t len, uint64_t seed) {
  533. return farmhash64_uo_with_seed(s, len, seed);
  534. }
  535. // farmhash te
  536. #if x86_64 && CAN_USE_SSE41 && CAN_USE_SSSE3
  537. // Requires n >= 256. Requires SSE4.1. Should be slightly faster if the
  538. // compiler uses AVX instructions (e.g., use the -mavx flag with GCC).
  539. static inline uint64_t farmhash64_te_long(const char* s, size_t n,
  540. uint64_t seed0, uint64_t seed1) {
  541. const __m128i k_shuf =
  542. _mm_set_epi8(4, 11, 10, 5, 8, 15, 6, 9, 12, 2, 14, 13, 0, 7, 3, 1);
  543. const __m128i k_mult =
  544. _mm_set_epi8(0xbd, 0xd6, 0x33, 0x39, 0x45, 0x54, 0xfa, 0x03,
  545. 0x34, 0x3e, 0x33, 0xed, 0xcc, 0x9e, 0x2d, 0x51);
  546. uint64_t seed2 = (seed0 + 113) * (seed1 + 9);
  547. uint64_t seed3 = (ror64(seed0, 23) + 27) * (ror64(seed1, 30) + 111);
  548. __m128i d0 = _mm_cvtsi64_si128(seed0);
  549. __m128i d1 = _mm_cvtsi64_si128(seed1);
  550. __m128i d2 = shuf8x16(k_shuf, d0);
  551. __m128i d3 = shuf8x16(k_shuf, d1);
  552. __m128i d4 = xor128(d0, d1);
  553. __m128i d5 = xor128(d1, d2);
  554. __m128i d6 = xor128(d2, d4);
  555. __m128i d7 = _mm_set1_epi32(seed2 >> 32);
  556. __m128i d8 = mul32x4(k_mult, d2);
  557. __m128i d9 = _mm_set1_epi32(seed3 >> 32);
  558. __m128i d10 = _mm_set1_epi32(seed3);
  559. __m128i d11 = add64x2(d2, _mm_set1_epi32(seed2));
  560. const char* end = s + (n & ~((size_t) 255));
  561. do {
  562. __m128i z;
  563. z = fetch128(s);
  564. d0 = add64x2(d0, z);
  565. d1 = shuf8x16(k_shuf, d1);
  566. d2 = xor128(d2, d0);
  567. d4 = xor128(d4, z);
  568. d4 = xor128(d4, d1);
  569. swap128(&d0, &d6);
  570. z = fetch128(s + 16);
  571. d5 = add64x2(d5, z);
  572. d6 = shuf8x16(k_shuf, d6);
  573. d8 = shuf8x16(k_shuf, d8);
  574. d7 = xor128(d7, d5);
  575. d0 = xor128(d0, z);
  576. d0 = xor128(d0, d6);
  577. swap128(&d5, &d11);
  578. z = fetch128(s + 32);
  579. d1 = add64x2(d1, z);
  580. d2 = shuf8x16(k_shuf, d2);
  581. d4 = shuf8x16(k_shuf, d4);
  582. d5 = xor128(d5, z);
  583. d5 = xor128(d5, d2);
  584. swap128(&d10, &d4);
  585. z = fetch128(s + 48);
  586. d6 = add64x2(d6, z);
  587. d7 = shuf8x16(k_shuf, d7);
  588. d0 = shuf8x16(k_shuf, d0);
  589. d8 = xor128(d8, d6);
  590. d1 = xor128(d1, z);
  591. d1 = add64x2(d1, d7);
  592. z = fetch128(s + 64);
  593. d2 = add64x2(d2, z);
  594. d5 = shuf8x16(k_shuf, d5);
  595. d4 = add64x2(d4, d2);
  596. d6 = xor128(d6, z);
  597. d6 = xor128(d6, d11);
  598. swap128(&d8, &d2);
  599. z = fetch128(s + 80);
  600. d7 = xor128(d7, z);
  601. d8 = shuf8x16(k_shuf, d8);
  602. d1 = shuf8x16(k_shuf, d1);
  603. d0 = add64x2(d0, d7);
  604. d2 = add64x2(d2, z);
  605. d2 = add64x2(d2, d8);
  606. swap128(&d1, &d7);
  607. z = fetch128(s + 96);
  608. d4 = shuf8x16(k_shuf, d4);
  609. d6 = shuf8x16(k_shuf, d6);
  610. d8 = mul32x4(k_mult, d8);
  611. d5 = xor128(d5, d11);
  612. d7 = xor128(d7, z);
  613. d7 = add64x2(d7, d4);
  614. swap128(&d6, &d0);
  615. z = fetch128(s + 112);
  616. d8 = add64x2(d8, z);
  617. d0 = shuf8x16(k_shuf, d0);
  618. d2 = shuf8x16(k_shuf, d2);
  619. d1 = xor128(d1, d8);
  620. d10 = xor128(d10, z);
  621. d10 = xor128(d10, d0);
  622. swap128(&d11, &d5);
  623. z = fetch128(s + 128);
  624. d4 = add64x2(d4, z);
  625. d5 = shuf8x16(k_shuf, d5);
  626. d7 = shuf8x16(k_shuf, d7);
  627. d6 = add64x2(d6, d4);
  628. d8 = xor128(d8, z);
  629. d8 = xor128(d8, d5);
  630. swap128(&d4, &d10);
  631. z = fetch128(s + 144);
  632. d0 = add64x2(d0, z);
  633. d1 = shuf8x16(k_shuf, d1);
  634. d2 = add64x2(d2, d0);
  635. d4 = xor128(d4, z);
  636. d4 = xor128(d4, d1);
  637. z = fetch128(s + 160);
  638. d5 = add64x2(d5, z);
  639. d6 = shuf8x16(k_shuf, d6);
  640. d8 = shuf8x16(k_shuf, d8);
  641. d7 = xor128(d7, d5);
  642. d0 = xor128(d0, z);
  643. d0 = xor128(d0, d6);
  644. swap128(&d2, &d8);
  645. z = fetch128(s + 176);
  646. d1 = add64x2(d1, z);
  647. d2 = shuf8x16(k_shuf, d2);
  648. d4 = shuf8x16(k_shuf, d4);
  649. d5 = mul32x4(k_mult, d5);
  650. d5 = xor128(d5, z);
  651. d5 = xor128(d5, d2);
  652. swap128(&d7, &d1);
  653. z = fetch128(s + 192);
  654. d6 = add64x2(d6, z);
  655. d7 = shuf8x16(k_shuf, d7);
  656. d0 = shuf8x16(k_shuf, d0);
  657. d8 = add64x2(d8, d6);
  658. d1 = xor128(d1, z);
  659. d1 = xor128(d1, d7);
  660. swap128(&d0, &d6);
  661. z = fetch128(s + 208);
  662. d2 = add64x2(d2, z);
  663. d5 = shuf8x16(k_shuf, d5);
  664. d4 = xor128(d4, d2);
  665. d6 = xor128(d6, z);
  666. d6 = xor128(d6, d9);
  667. swap128(&d5, &d11);
  668. z = fetch128(s + 224);
  669. d7 = add64x2(d7, z);
  670. d8 = shuf8x16(k_shuf, d8);
  671. d1 = shuf8x16(k_shuf, d1);
  672. d0 = xor128(d0, d7);
  673. d2 = xor128(d2, z);
  674. d2 = xor128(d2, d8);
  675. swap128(&d10, &d4);
  676. z = fetch128(s + 240);
  677. d3 = add64x2(d3, z);
  678. d4 = shuf8x16(k_shuf, d4);
  679. d6 = shuf8x16(k_shuf, d6);
  680. d7 = mul32x4(k_mult, d7);
  681. d5 = add64x2(d5, d3);
  682. d7 = xor128(d7, z);
  683. d7 = xor128(d7, d4);
  684. swap128(&d3, &d9);
  685. s += 256;
  686. } while (s != end);
  687. d6 = add64x2(mul32x4(k_mult, d6), _mm_cvtsi64_si128(n));
  688. if (n % 256 != 0) {
  689. d7 = add64x2(_mm_shuffle_epi32(d8, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0)), d7);
  690. d8 = add64x2(mul32x4(k_mult, d8), _mm_cvtsi64_si128(farmhash64_xo(s, n % 256)));
  691. }
  692. __m128i t[8];
  693. d0 = mul32x4(k_mult, shuf8x16(k_shuf, mul32x4(k_mult, d0)));
  694. d3 = mul32x4(k_mult, shuf8x16(k_shuf, mul32x4(k_mult, d3)));
  695. d9 = mul32x4(k_mult, shuf8x16(k_shuf, mul32x4(k_mult, d9)));
  696. d1 = mul32x4(k_mult, shuf8x16(k_shuf, mul32x4(k_mult, d1)));
  697. d0 = add64x2(d11, d0);
  698. d3 = xor128(d7, d3);
  699. d9 = add64x2(d8, d9);
  700. d1 = add64x2(d10, d1);
  701. d4 = add64x2(d3, d4);
  702. d5 = add64x2(d9, d5);
  703. d6 = xor128(d1, d6);
  704. d2 = add64x2(d0, d2);
  705. t[0] = d0;
  706. t[1] = d3;
  707. t[2] = d9;
  708. t[3] = d1;
  709. t[4] = d4;
  710. t[5] = d5;
  711. t[6] = d6;
  712. t[7] = d2;
  713. return farmhash64_xo((const char*) t, sizeof(t));
  714. }
  715. uint64_t farmhash64_te(const char *s, size_t len) {
  716. // Empirically, farmhash xo seems faster until length 512.
  717. return len >= 512 ? farmhash64_te_long(s, len, k2, k1) : farmhash64_xo(s, len);
  718. }
  719. uint64_t farmhash64_te_with_seed(const char *s, size_t len, uint64_t seed) {
  720. return len >= 512 ? farmhash64_te_long(s, len, k1, seed) :
  721. farmhash64_xo_with_seed(s, len, seed);
  722. }
  723. uint64_t farmhash64_te_with_seeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) {
  724. return len >= 512 ? farmhash64_te_long(s, len, seed0, seed1) :
  725. farmhash64_xo_with_seeds(s, len, seed0, seed1);
  726. }
  727. #endif
  728. // farmhash nt
  729. #if x86_64 && CAN_USE_SSE41 && CAN_USE_SSSE3
  730. uint32_t farmhash32_nt(const char *s, size_t len) {
  731. return (uint32_t) farmhash64_te(s, len);
  732. }
  733. uint32_t farmhash32_nt_with_seed(const char *s, size_t len, uint32_t seed) {
  734. return (uint32_t) farmhash64_te_with_seed(s, len, seed);
  735. }
  736. #endif
  737. // farmhash mk
  738. static inline uint32_t farmhash32_mk_len_13_to_24(const char *s, size_t len, uint32_t seed) {
  739. uint32_t a = fetch32(s - 4 + (len >> 1));
  740. uint32_t b = fetch32(s + 4);
  741. uint32_t c = fetch32(s + len - 8);
  742. uint32_t d = fetch32(s + (len >> 1));
  743. uint32_t e = fetch32(s);
  744. uint32_t f = fetch32(s + len - 4);
  745. uint32_t h = d * c1 + len + seed;
  746. a = ror32(a, 12) + f;
  747. h = mur(c, h) + a;
  748. a = ror32(a, 3) + c;
  749. h = mur(e, h) + a;
  750. a = ror32(a + f, 12) + d;
  751. h = mur(b ^ seed, h) + a;
  752. return fmix(h);
  753. }
  754. static inline uint32_t farmhash32_mk_len_0_to_4(const char *s, size_t len, uint32_t seed) {
  755. uint32_t b = seed;
  756. uint32_t c = 9;
  757. size_t i;
  758. for (i = 0; i < len; i++) {
  759. signed char v = s[i];
  760. b = b * c1 + v;
  761. c ^= b;
  762. }
  763. return fmix(mur(b, mur(len, c)));
  764. }
  765. static inline uint32_t farmhash32_mk_len_5_to_12(const char *s, size_t len, uint32_t seed) {
  766. uint32_t a = len, b = len * 5, c = 9, d = b + seed;
  767. a += fetch32(s);
  768. b += fetch32(s + len - 4);
  769. c += fetch32(s + ((len >> 1) & 4));
  770. return fmix(seed ^ mur(c, mur(b, mur(a, d))));
  771. }
  772. uint32_t farmhash32_mk(const char *s, size_t len) {
  773. if (len <= 24) {
  774. return len <= 12 ?
  775. (len <= 4 ? farmhash32_mk_len_0_to_4(s, len, 0) : farmhash32_mk_len_5_to_12(s, len, 0)) :
  776. farmhash32_mk_len_13_to_24(s, len, 0);
  777. }
  778. // len > 24
  779. uint32_t h = len, g = c1 * len, f = g;
  780. uint32_t a0 = ror32(fetch32(s + len - 4) * c1, 17) * c2;
  781. uint32_t a1 = ror32(fetch32(s + len - 8) * c1, 17) * c2;
  782. uint32_t a2 = ror32(fetch32(s + len - 16) * c1, 17) * c2;
  783. uint32_t a3 = ror32(fetch32(s + len - 12) * c1, 17) * c2;
  784. uint32_t a4 = ror32(fetch32(s + len - 20) * c1, 17) * c2;
  785. h ^= a0;
  786. h = ror32(h, 19);
  787. h = h * 5 + 0xe6546b64;
  788. h ^= a2;
  789. h = ror32(h, 19);
  790. h = h * 5 + 0xe6546b64;
  791. g ^= a1;
  792. g = ror32(g, 19);
  793. g = g * 5 + 0xe6546b64;
  794. g ^= a3;
  795. g = ror32(g, 19);
  796. g = g * 5 + 0xe6546b64;
  797. f += a4;
  798. f = ror32(f, 19) + 113;
  799. size_t iters = (len - 1) / 20;
  800. do {
  801. uint32_t a = fetch32(s);
  802. uint32_t b = fetch32(s + 4);
  803. uint32_t c = fetch32(s + 8);
  804. uint32_t d = fetch32(s + 12);
  805. uint32_t e = fetch32(s + 16);
  806. h += a;
  807. g += b;
  808. f += c;
  809. h = mur(d, h) + e;
  810. g = mur(c, g) + a;
  811. f = mur(b + e * c1, f) + d;
  812. f += g;
  813. g += f;
  814. s += 20;
  815. } while (--iters != 0);
  816. g = ror32(g, 11) * c1;
  817. g = ror32(g, 17) * c1;
  818. f = ror32(f, 11) * c1;
  819. f = ror32(f, 17) * c1;
  820. h = ror32(h + g, 19);
  821. h = h * 5 + 0xe6546b64;
  822. h = ror32(h, 17) * c1;
  823. h = ror32(h + f, 19);
  824. h = h * 5 + 0xe6546b64;
  825. h = ror32(h, 17) * c1;
  826. return h;
  827. }
  828. uint32_t farmhash32_mk_with_seed(const char *s, size_t len, uint32_t seed) {
  829. if (len <= 24) {
  830. if (len >= 13) return farmhash32_mk_len_13_to_24(s, len, seed * c1);
  831. else if (len >= 5) return farmhash32_mk_len_5_to_12(s, len, seed);
  832. else return farmhash32_mk_len_0_to_4(s, len, seed);
  833. }
  834. uint32_t h = farmhash32_mk_len_13_to_24(s, 24, seed ^ len);
  835. return mur(farmhash32_mk(s + 24, len - 24) + seed, h);
  836. }
  837. // farmhash su
  838. #if CAN_USE_AESNI && CAN_USE_SSE42
  839. uint32_t farmhash32_su(const char *s, size_t len) {
  840. const uint32_t seed = 81;
  841. if (len <= 24) {
  842. return len <= 12 ?
  843. (len <= 4 ?
  844. farmhash32_mk_len_0_to_4(s, len, 0) :
  845. farmhash32_mk_len_5_to_12(s, len, 0)) :
  846. farmhash32_mk_len_13_to_24(s, len, 0);
  847. }
  848. if (len < 40) {
  849. uint32_t a = len, b = seed * c2, c = a + b;
  850. a += fetch32(s + len - 4);
  851. b += fetch32(s + len - 20);
  852. c += fetch32(s + len - 16);
  853. uint32_t d = a;
  854. a = ror32(a, 21);
  855. a = mur(a, mur(b, _mm_crc32_u32(c, d)));
  856. a += fetch32(s + len - 12);
  857. b += fetch32(s + len - 8);
  858. d += a;
  859. a += d;
  860. b = mur(b, d) * c2;
  861. a = _mm_crc32_u32(a, b + c);
  862. return farmhash32_mk_len_13_to_24(s, (len + 1) / 2, a) + b;
  863. }
  864. const __m128i cc1 = _mm_set1_epi32(c1);
  865. const __m128i cc2 = _mm_set1_epi32(c2);
  866. __m128i h = _mm_set1_epi32(seed);
  867. __m128i g = _mm_set1_epi32(c1 * seed);
  868. __m128i f = g;
  869. __m128i k = _mm_set1_epi32(0xe6546b64);
  870. __m128i q;
  871. if (len < 80) {
  872. __m128i a = fetch128(s);
  873. __m128i b = fetch128(s + 16);
  874. __m128i c = fetch128(s + (len - 15) / 2);
  875. __m128i d = fetch128(s + len - 32);
  876. __m128i e = fetch128(s + len - 16);
  877. h = add32x4(h, a);
  878. g = add32x4(g, b);
  879. q = g;
  880. g = shuf32x4_0_3_2_1(g);
  881. f = add32x4(f, c);
  882. __m128i be = add32x4(b, mul32x4(e, cc1));
  883. h = add32x4(h, f);
  884. f = add32x4(f, h);
  885. h = add32x4(murk(d, h, cc1, cc2, k), e);
  886. k = xor128(k, _mm_shuffle_epi8(g, f));
  887. g = add32x4(xor128(c, g), a);
  888. f = add32x4(xor128(be, f), d);
  889. k = add32x4(k, be);
  890. k = add32x4(k, _mm_shuffle_epi8(f, h));
  891. f = add32x4(f, g);
  892. g = add32x4(g, f);
  893. g = add32x4(_mm_set1_epi32(len), mul32x4(g, cc1));
  894. } else {
  895. // len >= 80
  896. // The following is loosely modelled after farmhash32_mk.
  897. size_t iters = (len - 1) / 80;
  898. len -= iters * 80;
  899. #define CHUNK_AES() do { \
  900. __m128i a = fetch128(s); \
  901. __m128i b = fetch128(s + 16); \
  902. __m128i c = fetch128(s + 32); \
  903. __m128i d = fetch128(s + 48); \
  904. __m128i e = fetch128(s + 64); \
  905. h = add32x4(h, a); \
  906. g = add32x4(g, b); \
  907. g = shuf32x4_0_3_2_1(g); \
  908. f = add32x4(f, c); \
  909. __m128i be = add32x4(b, mul32x4(e, cc1)); \
  910. h = add32x4(h, f); \
  911. f = add32x4(f, h); \
  912. h = add32x4(h, d); \
  913. q = add32x4(q, e); \
  914. h = rol32x4_17(h); \
  915. h = mul32x4(h, cc1); \
  916. k = xor128(k, _mm_shuffle_epi8(g, f)); \
  917. g = add32x4(xor128(c, g), a); \
  918. f = add32x4(xor128(be, f), d); \
  919. swap128(&f, &q); \
  920. q = _mm_aesimc_si128(q); \
  921. k = add32x4(k, be); \
  922. k = add32x4(k, _mm_shuffle_epi8(f, h)); \
  923. f = add32x4(f, g); \
  924. g = add32x4(g, f); \
  925. f = mul32x4(f, cc1); \
  926. } while (0)
  927. q = g;
  928. while (iters-- != 0) {
  929. CHUNK_AES();
  930. s += 80;
  931. }
  932. if (len != 0) {
  933. h = add32x4(h, _mm_set1_epi32(len));
  934. s = s + len - 80;
  935. CHUNK_AES();
  936. }
  937. }
  938. g = shuf32x4_0_3_2_1(g);
  939. k = xor128(k, g);
  940. k = xor128(k, q);
  941. h = xor128(h, q);
  942. f = mul32x4(f, cc1);
  943. k = mul32x4(k, cc2);
  944. g = mul32x4(g, cc1);
  945. h = mul32x4(h, cc2);
  946. k = add32x4(k, _mm_shuffle_epi8(g, f));
  947. h = add32x4(h, f);
  948. f = add32x4(f, h);
  949. g = add32x4(g, k);
  950. k = add32x4(k, g);
  951. k = xor128(k, _mm_shuffle_epi8(f, h));
  952. __m128i buf[4];
  953. buf[0] = f;
  954. buf[1] = g;
  955. buf[2] = k;
  956. buf[3] = h;
  957. s = (char*) buf;
  958. uint32_t x = fetch32(s);
  959. uint32_t y = fetch32(s+4);
  960. uint32_t z = fetch32(s+8);
  961. x = _mm_crc32_u32(x, fetch32(s+12));
  962. y = _mm_crc32_u32(y, fetch32(s+16));
  963. z = _mm_crc32_u32(z * c1, fetch32(s+20));
  964. x = _mm_crc32_u32(x, fetch32(s+24));
  965. y = _mm_crc32_u32(y * c1, fetch32(s+28));
  966. uint32_t o = y;
  967. z = _mm_crc32_u32(z, fetch32(s+32));
  968. x = _mm_crc32_u32(x * c1, fetch32(s+36));
  969. y = _mm_crc32_u32(y, fetch32(s+40));
  970. z = _mm_crc32_u32(z * c1, fetch32(s+44));
  971. x = _mm_crc32_u32(x, fetch32(s+48));
  972. y = _mm_crc32_u32(y * c1, fetch32(s+52));
  973. z = _mm_crc32_u32(z, fetch32(s+56));
  974. x = _mm_crc32_u32(x, fetch32(s+60));
  975. return (o - x + y - z) * c1;
  976. }
  977. uint32_t farmhash32_su_with_seed(const char *s, size_t len, uint32_t seed) {
  978. if (len <= 24) {
  979. if (len >= 13) return farmhash32_mk_len_13_to_24(s, len, seed * c1);
  980. else if (len >= 5) return farmhash32_mk_len_5_to_12(s, len, seed);
  981. else return farmhash32_mk_len_0_to_4(s, len, seed);
  982. }
  983. uint32_t h = farmhash32_mk_len_13_to_24(s, 24, seed ^ len);
  984. return _mm_crc32_u32(farmhash32_su(s + 24, len - 24) + seed, h);
  985. }
  986. #endif
  987. // farmhash sa
  988. #if CAN_USE_SSE41 && CAN_USE_SSE42
  989. uint32_t farmhash32_sa(const char *s, size_t len) {
  990. const uint32_t seed = 81;
  991. if (len <= 24) {
  992. return len <= 12 ?
  993. (len <= 4 ?
  994. farmhash32_mk_len_0_to_4(s, len, 0) :
  995. farmhash32_mk_len_5_to_12(s, len, 0)) :
  996. farmhash32_mk_len_13_to_24(s, len, 0);
  997. }
  998. if (len < 40) {
  999. uint32_t a = len, b = seed * c2, c = a + b;
  1000. a += fetch32(s + len - 4);
  1001. b += fetch32(s + len - 20);
  1002. c += fetch32(s + len - 16);
  1003. uint32_t d = a;
  1004. a = ror32(a, 21);
  1005. a = mur(a, mur(b, mur(c, d)));
  1006. a += fetch32(s + len - 12);
  1007. b += fetch32(s + len - 8);
  1008. d += a;
  1009. a += d;
  1010. b = mur(b, d) * c2;
  1011. a = _mm_crc32_u32(a, b + c);
  1012. return farmhash32_mk_len_13_to_24(s, (len + 1) / 2, a) + b;
  1013. }
  1014. const __m128i cc1 = _mm_set1_epi32(c1);
  1015. const __m128i cc2 = _mm_set1_epi32(c2);
  1016. __m128i h = _mm_set1_epi32(seed);
  1017. __m128i g = _mm_set1_epi32(c1 * seed);
  1018. __m128i f = g;
  1019. __m128i k = _mm_set1_epi32(0xe6546b64);
  1020. if (len < 80) {
  1021. __m128i a = fetch128(s);
  1022. __m128i b = fetch128(s + 16);
  1023. __m128i c = fetch128(s + (len - 15) / 2);
  1024. __m128i d = fetch128(s + len - 32);
  1025. __m128i e = fetch128(s + len - 16);
  1026. h = add32x4(h, a);
  1027. g = add32x4(g, b);
  1028. g = shuf32x4_0_3_2_1(g);
  1029. f = add32x4(f, c);
  1030. __m128i be = add32x4(b, mul32x4(e, cc1));
  1031. h = add32x4(h, f);
  1032. f = add32x4(f, h);
  1033. h = add32x4(murk(d, h, cc1, cc2, k), e);
  1034. k = xor128(k, _mm_shuffle_epi8(g, f));
  1035. g = add32x4(xor128(c, g), a);
  1036. f = add32x4(xor128(be, f), d);
  1037. k = add32x4(k, be);
  1038. k = add32x4(k, _mm_shuffle_epi8(f, h));
  1039. f = add32x4(f, g);
  1040. g = add32x4(g, f);
  1041. g = add32x4(_mm_set1_epi32(len), mul32x4(g, cc1));
  1042. } else {
  1043. // len >= 80
  1044. // The following is loosely modelled after farmhash32_mk.
  1045. size_t iters = (len - 1) / 80;
  1046. len -= iters * 80;
  1047. #define CHUNK() do { \
  1048. __m128i a = fetch128(s); \
  1049. __m128i b = fetch128(s + 16); \
  1050. __m128i c = fetch128(s + 32); \
  1051. __m128i d = fetch128(s + 48); \
  1052. __m128i e = fetch128(s + 64); \
  1053. h = add32x4(h, a); \
  1054. g = add32x4(g, b); \
  1055. g = shuf32x4_0_3_2_1(g); \
  1056. f = add32x4(f, c); \
  1057. __m128i be = add32x4(b, mul32x4(e, cc1)); \
  1058. h = add32x4(h, f); \
  1059. f = add32x4(f, h); \
  1060. h = add32x4(murk(d, h, cc1, cc2, k), e); \
  1061. k = xor128(k, _mm_shuffle_epi8(g, f)); \
  1062. g = add32x4(xor128(c, g), a); \
  1063. f = add32x4(xor128(be, f), d); \
  1064. k = add32x4(k, be); \
  1065. k = add32x4(k, _mm_shuffle_epi8(f, h)); \
  1066. f = add32x4(f, g); \
  1067. g = add32x4(g, f); \
  1068. f = mul32x4(f, cc1); \
  1069. } while (0)
  1070. while (iters-- != 0) {
  1071. CHUNK();
  1072. s += 80;
  1073. }
  1074. if (len != 0) {
  1075. h = add32x4(h, _mm_set1_epi32(len));
  1076. s = s + len - 80;
  1077. CHUNK();
  1078. }
  1079. }
  1080. g = shuf32x4_0_3_2_1(g);
  1081. k = xor128(k, g);
  1082. f = mul32x4(f, cc1);
  1083. k = mul32x4(k, cc2);
  1084. g = mul32x4(g, cc1);
  1085. h = mul32x4(h, cc2);
  1086. k = add32x4(k, _mm_shuffle_epi8(g, f));
  1087. h = add32x4(h, f);
  1088. f = add32x4(f, h);
  1089. g = add32x4(g, k);
  1090. k = add32x4(k, g);
  1091. k = xor128(k, _mm_shuffle_epi8(f, h));
  1092. __m128i buf[4];
  1093. buf[0] = f;
  1094. buf[1] = g;
  1095. buf[2] = k;
  1096. buf[3] = h;
  1097. s = (char*) buf;
  1098. uint32_t x = fetch32(s);
  1099. uint32_t y = fetch32(s+4);
  1100. uint32_t z = fetch32(s+8);
  1101. x = _mm_crc32_u32(x, fetch32(s+12));
  1102. y = _mm_crc32_u32(y, fetch32(s+16));
  1103. z = _mm_crc32_u32(z * c1, fetch32(s+20));
  1104. x = _mm_crc32_u32(x, fetch32(s+24));
  1105. y = _mm_crc32_u32(y * c1, fetch32(s+28));
  1106. uint32_t o = y;
  1107. z = _mm_crc32_u32(z, fetch32(s+32));
  1108. x = _mm_crc32_u32(x * c1, fetch32(s+36));
  1109. y = _mm_crc32_u32(y, fetch32(s+40));
  1110. z = _mm_crc32_u32(z * c1, fetch32(s+44));
  1111. x = _mm_crc32_u32(x, fetch32(s+48));
  1112. y = _mm_crc32_u32(y * c1, fetch32(s+52));
  1113. z = _mm_crc32_u32(z, fetch32(s+56));
  1114. x = _mm_crc32_u32(x, fetch32(s+60));
  1115. return (o - x + y - z) * c1;
  1116. }
  1117. uint32_t farmhash32_sa_with_seed(const char *s, size_t len, uint32_t seed) {
  1118. if (len <= 24) {
  1119. if (len >= 13) return farmhash32_mk_len_13_to_24(s, len, seed * c1);
  1120. else if (len >= 5) return farmhash32_mk_len_5_to_12(s, len, seed);
  1121. else return farmhash32_mk_len_0_to_4(s, len, seed);
  1122. }
  1123. uint32_t h = farmhash32_mk_len_13_to_24(s, 24, seed ^ len);
  1124. return _mm_crc32_u32(farmhash32_sa(s + 24, len - 24) + seed, h);
  1125. }
  1126. #endif
  1127. // farmhash cc
  1128. // This file provides a 32-bit hash equivalent to cityhash32 (v1.1.1)
  1129. // and a 128-bit hash equivalent to cityhash128 (v1.1.1). It also provides
  1130. // a seeded 32-bit hash function similar to cityhash32.
  1131. static inline uint32_t farmhash32_cc_len_13_to_24(const char *s, size_t len) {
  1132. uint32_t a = fetch32(s - 4 + (len >> 1));
  1133. uint32_t b = fetch32(s + 4);
  1134. uint32_t c = fetch32(s + len - 8);
  1135. uint32_t d = fetch32(s + (len >> 1));
  1136. uint32_t e = fetch32(s);
  1137. uint32_t f = fetch32(s + len - 4);
  1138. uint32_t h = len;
  1139. return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, h)))))));
  1140. }
  1141. static inline uint32_t farmhash32_cc_len_0_to_4(const char *s, size_t len) {
  1142. uint32_t b = 0;
  1143. uint32_t c = 9;
  1144. size_t i;
  1145. for (i = 0; i < len; i++) {
  1146. signed char v = s[i];
  1147. b = b * c1 + v;
  1148. c ^= b;
  1149. }
  1150. return fmix(mur(b, mur(len, c)));
  1151. }
  1152. static inline uint32_t farmhash32_cc_len_5_to_12(const char *s, size_t len) {
  1153. uint32_t a = len, b = len * 5, c = 9, d = b;
  1154. a += fetch32(s);
  1155. b += fetch32(s + len - 4);
  1156. c += fetch32(s + ((len >> 1) & 4));
  1157. return fmix(mur(c, mur(b, mur(a, d))));
  1158. }
  1159. uint32_t farmhash32_cc(const char *s, size_t len) {
  1160. if (len <= 24) {
  1161. return len <= 12 ?
  1162. (len <= 4 ? farmhash32_cc_len_0_to_4(s, len) : farmhash32_cc_len_5_to_12(s, len)) :
  1163. farmhash32_cc_len_13_to_24(s, len);
  1164. }
  1165. // len > 24
  1166. uint32_t h = len, g = c1 * len, f = g;
  1167. uint32_t a0 = ror32(fetch32(s + len - 4) * c1, 17) * c2;
  1168. uint32_t a1 = ror32(fetch32(s + len - 8) * c1, 17) * c2;
  1169. uint32_t a2 = ror32(fetch32(s + len - 16) * c1, 17) * c2;
  1170. uint32_t a3 = ror32(fetch32(s + len - 12) * c1, 17) * c2;
  1171. uint32_t a4 = ror32(fetch32(s + len - 20) * c1, 17) * c2;
  1172. h ^= a0;
  1173. h = ror32(h, 19);
  1174. h = h * 5 + 0xe6546b64;
  1175. h ^= a2;
  1176. h = ror32(h, 19);
  1177. h = h * 5 + 0xe6546b64;
  1178. g ^= a1;
  1179. g = ror32(g, 19);
  1180. g = g * 5 + 0xe6546b64;
  1181. g ^= a3;
  1182. g = ror32(g, 19);
  1183. g = g * 5 + 0xe6546b64;
  1184. f += a4;
  1185. f = ror32(f, 19);
  1186. f = f * 5 + 0xe6546b64;
  1187. size_t iters = (len - 1) / 20;
  1188. do {
  1189. uint32_t a0 = ror32(fetch32(s) * c1, 17) * c2;
  1190. uint32_t a1 = fetch32(s + 4);
  1191. uint32_t a2 = ror32(fetch32(s + 8) * c1, 17) * c2;
  1192. uint32_t a3 = ror32(fetch32(s + 12) * c1, 17) * c2;
  1193. uint32_t a4 = fetch32(s + 16);
  1194. h ^= a0;
  1195. h = ror32(h, 18);
  1196. h = h * 5 + 0xe6546b64;
  1197. f += a1;
  1198. f = ror32(f, 19);
  1199. f = f * c1;
  1200. g += a2;
  1201. g = ror32(g, 18);
  1202. g = g * 5 + 0xe6546b64;
  1203. h ^= a3 + a1;
  1204. h = ror32(h, 19);
  1205. h = h * 5 + 0xe6546b64;
  1206. g ^= a4;
  1207. g = bswap32(g) * 5;
  1208. h += a4 * 5;
  1209. h = bswap32(h);
  1210. f += a0;
  1211. PERMUTE3(&f, &h, &g);
  1212. s += 20;
  1213. } while (--iters != 0);
  1214. g = ror32(g, 11) * c1;
  1215. g = ror32(g, 17) * c1;
  1216. f = ror32(f, 11) * c1;
  1217. f = ror32(f, 17) * c1;
  1218. h = ror32(h + g, 19);
  1219. h = h * 5 + 0xe6546b64;
  1220. h = ror32(h, 17) * c1;
  1221. h = ror32(h + f, 19);
  1222. h = h * 5 + 0xe6546b64;
  1223. h = ror32(h, 17) * c1;
  1224. return h;
  1225. }
  1226. uint32_t farmhash32_cc_with_seed(const char *s, size_t len, uint32_t seed) {
  1227. if (len <= 24) {
  1228. if (len >= 13) return farmhash32_mk_len_13_to_24(s, len, seed * c1);
  1229. else if (len >= 5) return farmhash32_mk_len_5_to_12(s, len, seed);
  1230. else return farmhash32_mk_len_0_to_4(s, len, seed);
  1231. }
  1232. uint32_t h = farmhash32_mk_len_13_to_24(s, 24, seed ^ len);
  1233. return mur(farmhash32_cc(s + 24, len - 24) + seed, h);
  1234. }
  1235. static inline uint64_t farmhash_cc_len_0_to_16(const char *s, size_t len) {
  1236. if (len >= 8) {
  1237. uint64_t mul = k2 + len * 2;
  1238. uint64_t a = fetch64(s) + k2;
  1239. uint64_t b = fetch64(s + len - 8);
  1240. uint64_t c = ror64(b, 37) * mul + a;
  1241. uint64_t d = (ror64(a, 25) + b) * mul;
  1242. return farmhash_len_16_mul(c, d, mul);
  1243. }
  1244. if (len >= 4) {
  1245. uint64_t mul = k2 + len * 2;
  1246. uint64_t a = fetch32(s);
  1247. return farmhash_len_16_mul(len + (a << 3), fetch32(s + len - 4), mul);
  1248. }
  1249. if (len > 0) {
  1250. uint8_t a = s[0];
  1251. uint8_t b = s[len >> 1];
  1252. uint8_t c = s[len - 1];
  1253. uint32_t y = ((uint32_t) a) + (((uint32_t) b) << 8);
  1254. uint32_t z = len + (((uint32_t) c) << 2);
  1255. return smix(y * k2 ^ z * k0) * k2;
  1256. }
  1257. return k2;
  1258. }
  1259. // Return a 16-byte hash for 48 bytes. Quick and dirty.
  1260. // Callers do best to use "random-looking" values for a and b.
  1261. static inline uint128_c_t weak_farmhash_cc_len_32_with_seeds_vals(
  1262. uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
  1263. a += w;
  1264. b = ror64(b + a + z, 21);
  1265. uint64_t c = a;
  1266. a += x;
  1267. a += y;
  1268. b += ror64(a, 44);
  1269. return make_uint128_c_t(a + z, b + c);
  1270. }
  1271. // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
  1272. static inline uint128_c_t weak_farmhash_cc_len_32_with_seeds(
  1273. const char* s, uint64_t a, uint64_t b) {
  1274. return weak_farmhash_cc_len_32_with_seeds_vals(fetch64(s),
  1275. fetch64(s + 8),
  1276. fetch64(s + 16),
  1277. fetch64(s + 24),
  1278. a,
  1279. b);
  1280. }
  1281. // A subroutine for cityhash128(). Returns a decent 128-bit hash for strings
  1282. // of any length representable in signed long. Based on City and Murmur.
  1283. static inline uint128_c_t farmhash_cc_city_murmur(const char *s, size_t len, uint128_c_t seed) {
  1284. uint64_t a = uint128_c_t_low64(seed);
  1285. uint64_t b = uint128_c_t_high64(seed);
  1286. uint64_t c = 0;
  1287. uint64_t d = 0;
  1288. signed long l = len - 16;
  1289. if (l <= 0) { // len <= 16
  1290. a = smix(a * k1) * k1;
  1291. c = b * k1 + farmhash_cc_len_0_to_16(s, len);
  1292. d = smix(a + (len >= 8 ? fetch64(s) : c));
  1293. } else { // len > 16
  1294. c = farmhash_len_16(fetch64(s + len - 8) + k1, a);
  1295. d = farmhash_len_16(b + len, c + fetch64(s + len - 16));
  1296. a += d;
  1297. do {
  1298. a ^= smix(fetch64(s) * k1) * k1;
  1299. a *= k1;
  1300. b ^= a;
  1301. c ^= smix(fetch64(s + 8) * k1) * k1;
  1302. c *= k1;
  1303. d ^= c;
  1304. s += 16;
  1305. l -= 16;
  1306. } while (l > 0);
  1307. }
  1308. a = farmhash_len_16(a, c);
  1309. b = farmhash_len_16(d, b);
  1310. return make_uint128_c_t(a ^ b, farmhash_len_16(b, a));
  1311. }
  1312. uint128_c_t farmhash128_cc_city_with_seed(const char *s, size_t len, uint128_c_t seed) {
  1313. if (len < 128) {
  1314. return farmhash_cc_city_murmur(s, len, seed);
  1315. }
  1316. // We expect len >= 128 to be the common case. Keep 56 bytes of state:
  1317. // v, w, x, y, and z.
  1318. uint128_c_t v, w;
  1319. uint64_t x = uint128_c_t_low64(seed);
  1320. uint64_t y = uint128_c_t_high64(seed);
  1321. uint64_t z = len * k1;
  1322. size_t tail_done;
  1323. v.a = ror64(y ^ k1, 49) * k1 + fetch64(s);
  1324. v.b = ror64(v.a, 42) * k1 + fetch64(s + 8);
  1325. w.a = ror64(y + z, 35) * k1 + x;
  1326. w.b = ror64(x + fetch64(s + 88), 53) * k1;
  1327. // This is the same inner loop as cityhash64(), manually unrolled.
  1328. do {
  1329. x = ror64(x + y + v.a + fetch64(s + 8), 37) * k1;
  1330. y = ror64(y + v.b + fetch64(s + 48), 42) * k1;
  1331. x ^= w.b;
  1332. y += v.a + fetch64(s + 40);
  1333. z = ror64(z + w.a, 33) * k1;
  1334. v = weak_farmhash_cc_len_32_with_seeds(s, v.b * k1, x + w.a);
  1335. w = weak_farmhash_cc_len_32_with_seeds(s + 32, z + w.b, y + fetch64(s + 16));
  1336. swap64(&z, &x);
  1337. s += 64;
  1338. x = ror64(x + y + v.a + fetch64(s + 8), 37) * k1;
  1339. y = ror64(y + v.b + fetch64(s + 48), 42) * k1;
  1340. x ^= w.b;
  1341. y += v.a + fetch64(s + 40);
  1342. z = ror64(z + w.a, 33) * k1;
  1343. v = weak_farmhash_cc_len_32_with_seeds(s, v.b * k1, x + w.a);
  1344. w = weak_farmhash_cc_len_32_with_seeds(s + 32, z + w.b, y + fetch64(s + 16));
  1345. swap64(&z, &x);
  1346. s += 64;
  1347. len -= 128;
  1348. } while (likely(len >= 128));
  1349. x += ror64(v.a + z, 49) * k0;
  1350. y = y * k0 + ror64(w.b, 37);
  1351. z = z * k0 + ror64(w.a, 27);
  1352. w.a *= 9;
  1353. v.a *= k0;
  1354. // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
  1355. for (tail_done = 0; tail_done < len; ) {
  1356. tail_done += 32;
  1357. y = ror64(x + y, 42) * k0 + v.b;
  1358. w.a += fetch64(s + len - tail_done + 16);
  1359. x = x * k0 + w.a;
  1360. z += w.b + fetch64(s + len - tail_done);
  1361. w.b += v.a;
  1362. v = weak_farmhash_cc_len_32_with_seeds(s + len - tail_done, v.a + z, v.b);
  1363. v.a *= k0;
  1364. }
  1365. // At this point our 56 bytes of state should contain more than
  1366. // enough information for a strong 128-bit hash. We use two
  1367. // different 56-byte-to-8-byte hashes to get a 16-byte final result.
  1368. x = farmhash_len_16(x, v.a);
  1369. y = farmhash_len_16(y + z, w.a);
  1370. return make_uint128_c_t(farmhash_len_16(x + v.b, w.b) + y,
  1371. farmhash_len_16(x + w.b, y + v.b));
  1372. }
  1373. static inline uint128_c_t farmhash128_cc_city(const char *s, size_t len) {
  1374. return len >= 16 ?
  1375. farmhash128_cc_city_with_seed(s + 16, len - 16,
  1376. make_uint128_c_t(fetch64(s), fetch64(s + 8) + k0)) :
  1377. farmhash128_cc_city_with_seed(s, len, make_uint128_c_t(k0, k1));
  1378. }
  1379. uint128_c_t farmhash_cc_fingerprint128(const char* s, size_t len) {
  1380. return farmhash128_cc_city(s, len);
  1381. }
  1382. // BASIC STRING HASHING
  1383. // farmhash function for a byte array. See also Hash(), below.
  1384. // May change from time to time, may differ on different platforms, may differ
  1385. // depending on NDEBUG.
  1386. // As this short 32-bit variant leads to different hash values with the x86_64
  1387. // SSE4.1 variant, compared to a 32bit or gcc <4.4 result, it is not portable and
  1388. // not recommended. Disabled in SMHasher
  1389. uint32_t farmhash32(const char* s, size_t len) {
  1390. return debug_tweak32(
  1391. #if CAN_USE_SSE41 && x86_64
  1392. farmhash32_nt(s, len)
  1393. #elif CAN_USE_SSE42 && CAN_USE_AESNI
  1394. farmhash32_su(s, len)
  1395. #elif CAN_USE_SSE41 && CAN_USE_SSE42
  1396. farmhash32_sa(s, len)
  1397. #else
  1398. farmhash32_mk(s, len)
  1399. #endif
  1400. );
  1401. }
  1402. // Hash function for a byte array. For convenience, a 32-bit seed is also
  1403. // hashed into the result.
  1404. // May change from time to time, may differ on different platforms, may differ
  1405. // depending on NDEBUG.
  1406. // As this short 32-bit variant leads to different hash values with the x86_64
  1407. // SSE4.1 variant, compared to a 32bit or gcc <4.4 result, it is not portable and
  1408. // not recommended. Disabled in SMHasher
  1409. uint32_t farmhash32_with_seed(const char* s, size_t len, uint32_t seed) {
  1410. return debug_tweak32(
  1411. #if CAN_USE_SSE41 && x86_64
  1412. farmhash32_nt_with_seed(s, len, seed)
  1413. #elif CAN_USE_SSE42 && CAN_USE_AESNI
  1414. farmhash32_su_with_seed(s, len, seed)
  1415. #elif CAN_USE_SSE41 && CAN_USE_SSE42
  1416. farmhash32_sa_with_seed(s, len, seed)
  1417. #else
  1418. farmhash32_mk_with_seed(s, len, seed)
  1419. #endif
  1420. );
  1421. }
  1422. // Hash function for a byte array. For convenience, a 64-bit seed is also
  1423. // hashed into the result. See also farmhash(), below.
  1424. // May change from time to time, may differ on different platforms, may differ
  1425. // depending on NDEBUG.
  1426. uint64_t farmhash64(const char* s, size_t len) {
  1427. return debug_tweak64(
  1428. #if CAN_USE_SSE41 && CAN_USE_SSE42 && x86_64
  1429. farmhash64_te(s, len)
  1430. #else
  1431. farmhash64_xo(s, len)
  1432. #endif
  1433. );
  1434. }
  1435. // Hash function for a byte array.
  1436. // May change from time to time, may differ on different platforms, may differ
  1437. // depending on NDEBUG.
  1438. size_t farmhash(const char* s, size_t len) {
  1439. return sizeof(size_t) == 8 ? farmhash64(s, len) : farmhash32(s, len);
  1440. }
  1441. // Hash function for a byte array. For convenience, a 64-bit seed is also
  1442. // hashed into the result.
  1443. // May change from time to time, may differ on different platforms, may differ
  1444. // depending on NDEBUG.
  1445. uint64_t farmhash64_with_seed(const char* s, size_t len, uint64_t seed) {
  1446. return debug_tweak64(farmhash64_na_with_seed(s, len, seed));
  1447. }
  1448. // Hash function for a byte array. For convenience, two seeds are also
  1449. // hashed into the result.
  1450. // May change from time to time, may differ on different platforms, may differ
  1451. // depending on NDEBUG.
  1452. uint64_t farmhash64_with_seeds(const char* s, size_t len, uint64_t seed0, uint64_t seed1) {
  1453. return debug_tweak64(farmhash64_na_with_seeds(s, len, seed0, seed1));
  1454. }
  1455. // Hash function for a byte array.
  1456. // May change from time to time, may differ on different platforms, may differ
  1457. // depending on NDEBUG.
  1458. uint128_c_t farmhash128(const char* s, size_t len) {
  1459. return debug_tweak128(farmhash_cc_fingerprint128(s, len));
  1460. }
  1461. // Hash function for a byte array. For convenience, a 128-bit seed is also
  1462. // hashed into the result.
  1463. // May change from time to time, may differ on different platforms, may differ
  1464. // depending on NDEBUG.
  1465. uint128_c_t farmhash128_with_seed(const char* s, size_t len, uint128_c_t seed) {
  1466. return debug_tweak128(farmhash128_cc_city_with_seed(s, len, seed));
  1467. }
  1468. // BASIC NON-STRING HASHING
  1469. // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
  1470. // Fingerprint function for a byte array. Most useful in 32-bit binaries.
  1471. uint32_t farmhash_fingerprint32(const char* s, size_t len) {
  1472. return farmhash32_mk(s, len);
  1473. }
  1474. // Fingerprint function for a byte array.
  1475. uint64_t farmhash_fingerprint64(const char* s, size_t len) {
  1476. return farmhash64_na(s, len);
  1477. }
  1478. // Fingerprint function for a byte array.
  1479. uint128_c_t farmhash_fingerprint128(const char* s, size_t len) {
  1480. return farmhash_cc_fingerprint128(s, len);
  1481. }