t1ha.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199
  1. /*
  2. * Copyright (c) 2016-2017 Positive Technologies, https://www.ptsecurity.com,
  3. * Fast Positive Hash.
  4. *
  5. * Portions Copyright (c) 2010-2017 Leonid Yuriev <leo@yuriev.ru>,
  6. * The 1Hippeus project (t1h).
  7. *
  8. * This software is provided 'as-is', without any express or implied
  9. * warranty. In no event will the authors be held liable for any damages
  10. * arising from the use of this software.
  11. *
  12. * Permission is granted to anyone to use this software for any purpose,
  13. * including commercial applications, and to alter it and redistribute it
  14. * freely, subject to the following restrictions:
  15. *
  16. * 1. The origin of this software must not be misrepresented; you must not
  17. * claim that you wrote the original software. If you use this software
  18. * in a product, an acknowledgement in the product documentation would be
  19. * appreciated but is not required.
  20. * 2. Altered source versions must be plainly marked as such, and must not be
  21. * misrepresented as being the original software.
  22. * 3. This notice may not be removed or altered from any source distribution.
  23. */
  24. /*
  25. * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
  26. * by [Positive Technologies](https://www.ptsecurity.ru)
  27. *
  28. * Briefly, it is a 64-bit Hash Function:
  29. * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
  30. * but without penalties could runs on any 64-bit CPU.
  31. * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
  32. * and all others which are not use specific hardware tricks.
  33. * 3. Not suitable for cryptography.
  34. *
  35. * ACKNOWLEDGEMENT:
  36. * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
  37. * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
  38. */
  39. #include "t1ha.h"
  40. #include <string.h> /* for memcpy() */
  41. #if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \
  42. !defined(__ORDER_BIG_ENDIAN__)
  43. #ifndef _MSC_VER
  44. #include <sys/param.h> /* for endianness */
  45. #endif
  46. #if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
  47. #define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN
  48. #define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN
  49. #define __BYTE_ORDER__ __BYTE_ORDER
  50. #else
  51. #define __ORDER_LITTLE_ENDIAN__ 1234
  52. #define __ORDER_BIG_ENDIAN__ 4321
  53. #if defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN) || \
  54. defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \
  55. defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \
  56. defined(__i386) || defined(__x86_64__) || defined(_M_IX86) || \
  57. defined(_M_X64) || defined(i386) || defined(_X86_) || defined(__i386__) || \
  58. defined(_X86_64_) || defined(_M_ARM) || defined(__e2k__)
  59. #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
  60. #elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(__ARMEB__) || \
  61. defined(__THUMBEB__) || defined(__AARCH64EB__) || defined(__MIPSEB__) || \
  62. defined(_MIPSEB) || defined(__MIPSEB)
  63. #define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__
  64. #else
  65. #error __BYTE_ORDER__ should be defined.
  66. #endif
  67. #endif
  68. #endif
  69. #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \
  70. __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
  71. #error Unsupported byte order.
  72. #endif
  73. #if !defined(UNALIGNED_OK)
  74. #if defined(__i386) || defined(__x86_64__) || defined(_M_IX86) || \
  75. defined(_M_X64) || defined(i386) || defined(_X86_) || defined(__i386__) || \
  76. defined(_X86_64_)
  77. #define UNALIGNED_OK 1
  78. #else
  79. #define UNALIGNED_OK 0
  80. #endif
  81. #endif
  82. #ifndef __has_builtin
  83. #define __has_builtin(x) (0)
  84. #endif
  85. #if __GNUC_PREREQ(4, 4) || defined(__clang__)
  86. #if defined(__i386__) || defined(__x86_64__)
  87. #include <cpuid.h>
  88. #include <x86intrin.h>
  89. #endif
  90. #define likely(cond) __builtin_expect(!!(cond), 1)
  91. #define unlikely(cond) __builtin_expect(!!(cond), 0)
  92. #if __GNUC_PREREQ(4, 5) || defined(__clang__)
  93. #define unreachable() __builtin_unreachable()
  94. #endif
  95. #define bswap64(v) __builtin_bswap64(v)
  96. #define bswap32(v) __builtin_bswap32(v)
  97. #if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
  98. #define bswap16(v) __builtin_bswap16(v)
  99. #endif
  100. #if __GNUC_PREREQ(4, 3) || __has_attribute(unused)
  101. #define maybe_unused __attribute__((unused))
  102. #endif
  103. #elif defined(_MSC_VER)
  104. #if _MSC_FULL_VER < 190024215
  105. #if _MSC_FULL_VER < 180040629 && defined(_M_IX86)
  106. #error Please use Visual Studio 2015 (MSC 19.0) or newer for 32-bit target.
  107. #else
  108. #pragma message( \
  109. "It is recommended to use Visual Studio 2015 (MSC 19.0) or newer.")
  110. #endif
  111. #endif
  112. #pragma warning( \
  113. disable : 4710) /* C4710: C4710: 'mux64': function not inlined */
  114. #pragma warning( \
  115. disable : 4711) /* C4711: function 'x86_cpu_features' selected for \
  116. automatic inline expansion */
  117. #include <intrin.h>
  118. #include <stdlib.h>
  119. #define likely(cond) (cond)
  120. #define unlikely(cond) (cond)
  121. #define unreachable() __assume(0)
  122. #define bswap64(v) _byteswap_uint64(v)
  123. #define bswap32(v) _byteswap_ulong(v)
  124. #define bswap16(v) _byteswap_ushort(v)
  125. #define rot64(v, s) _rotr64(v, s)
  126. #define rot32(v, s) _rotr(v, s)
  127. #define __inline __forceinline
  128. #if defined(_M_ARM64) || defined(_M_X64)
  129. #pragma intrinsic(_umul128)
  130. #define mul_64x64_128(a, b, ph) _umul128(a, b, ph)
  131. #pragma intrinsic(__umulh)
  132. #define mul_64x64_high(a, b) __umulh(a, b)
  133. #endif
  134. #if defined(_M_IX86)
  135. #pragma intrinsic(__emulu)
  136. #define mul_32x32_64(a, b) __emulu(a, b)
  137. #elif defined(_M_ARM)
  138. #define mul_32x32_64(a, b) _arm_umull(a, b)
  139. #endif
  140. #endif /* Compiler */
  141. #ifndef likely
  142. #define likely(cond) (cond)
  143. #endif
  144. #ifndef unlikely
  145. #define unlikely(cond) (cond)
  146. #endif
  147. #ifndef maybe_unused
  148. #define maybe_unused
  149. #endif
  150. #ifndef unreachable
  151. #define unreachable() \
  152. do { \
  153. } while (1)
  154. #endif
  155. #ifndef bswap64
  156. static __inline uint64_t bswap64(uint64_t v) {
  157. return v << 56 | v >> 56 | ((v << 40) & 0x00ff000000000000ull) |
  158. ((v << 24) & 0x0000ff0000000000ull) |
  159. ((v << 8) & 0x000000ff00000000ull) |
  160. ((v >> 8) & 0x00000000ff000000ull) |
  161. ((v >> 24) & 0x0000000000ff0000ull) |
  162. ((v >> 40) & 0x000000000000ff00ull);
  163. }
  164. #endif /* bswap64 */
  165. #ifndef bswap32
  166. static __inline uint32_t bswap32(uint32_t v) {
  167. return v << 24 | v >> 24 | ((v << 8) & 0x00ff0000) | ((v >> 8) & 0x0000ff00);
  168. }
  169. #endif /* bswap32 */
  170. #ifndef bswap16
  171. static __inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; }
  172. #endif /* bswap16 */
  173. #ifndef rot64
  174. static __inline uint64_t rot64(uint64_t v, unsigned s) {
  175. return (v >> s) | (v << (64 - s));
  176. }
  177. #endif /* rot64 */
  178. #ifndef rot32
  179. static __inline uint32_t rot32(uint32_t v, unsigned s) {
  180. return (v >> s) | (v << (32 - s));
  181. }
  182. #endif /* rot32 */
  183. #ifndef mul_32x32_64
  184. static __inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) {
  185. return a * (uint64_t)b;
  186. }
  187. #endif /* mul_32x32_64 */
  188. /***************************************************************************/
  189. static __inline uint64_t fetch64_le(const void *v) {
  190. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  191. return *(const uint64_t *)v;
  192. #else
  193. return bswap64(*(const uint64_t *)v);
  194. #endif
  195. }
  196. static __inline uint32_t fetch32_le(const void *v) {
  197. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  198. return *(const uint32_t *)v;
  199. #else
  200. return bswap32(*(const uint32_t *)v);
  201. #endif
  202. }
  203. static __inline uint16_t fetch16_le(const void *v) {
  204. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  205. return *(const uint16_t *)v;
  206. #else
  207. return bswap16(*(const uint16_t *)v);
  208. #endif
  209. }
  210. static __inline uint64_t tail64_le(const void *v, size_t tail) {
  211. const uint8_t *p = (const uint8_t *)v;
  212. uint64_t r = 0;
  213. switch (tail & 7) {
  214. #if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  215. /* For most CPUs this code is better when not needed
  216. * copying for alignment or byte reordering. */
  217. case 0:
  218. return fetch64_le(p);
  219. case 7:
  220. r = (uint64_t)p[6] << 8;
  221. case 6:
  222. r += p[5];
  223. r <<= 8;
  224. case 5:
  225. r += p[4];
  226. r <<= 32;
  227. case 4:
  228. return r + fetch32_le(p);
  229. case 3:
  230. r = (uint64_t)p[2] << 16;
  231. case 2:
  232. return r + fetch16_le(p);
  233. case 1:
  234. return p[0];
  235. #else
  236. /* For most CPUs this code is better than a
  237. * copying for alignment and/or byte reordering. */
  238. case 0:
  239. r = p[7] << 8;
  240. case 7:
  241. r += p[6];
  242. r <<= 8;
  243. case 6:
  244. r += p[5];
  245. r <<= 8;
  246. case 5:
  247. r += p[4];
  248. r <<= 8;
  249. case 4:
  250. r += p[3];
  251. r <<= 8;
  252. case 3:
  253. r += p[2];
  254. r <<= 8;
  255. case 2:
  256. r += p[1];
  257. r <<= 8;
  258. case 1:
  259. return r + p[0];
  260. #endif
  261. }
  262. unreachable();
  263. }
  264. /* 'magic' primes */
  265. static const uint64_t p0 = 17048867929148541611ull;
  266. static const uint64_t p1 = 9386433910765580089ull;
  267. static const uint64_t p2 = 15343884574428479051ull;
  268. static const uint64_t p3 = 13662985319504319857ull;
  269. static const uint64_t p4 = 11242949449147999147ull;
  270. static const uint64_t p5 = 13862205317416547141ull;
  271. static const uint64_t p6 = 14653293970879851569ull;
  272. /* rotations */
  273. static const unsigned s0 = 41;
  274. static const unsigned s1 = 17;
  275. static const unsigned s2 = 31;
  276. /* xor-mul-xor mixer */
  277. static __inline uint64_t mix(uint64_t v, uint64_t p) {
  278. v *= p;
  279. return v ^ rot64(v, s0);
  280. }
  281. static maybe_unused __inline unsigned add_with_carry(uint64_t *sum,
  282. uint64_t addend) {
  283. *sum += addend;
  284. return *sum < addend;
  285. }
  286. /* xor high and low parts of full 128-bit product */
  287. static __inline uint64_t mux64(uint64_t v, uint64_t p) {
  288. #ifdef __SIZEOF_INT128__
  289. __uint128_t r = (__uint128_t)v * (__uint128_t)p;
  290. /* modern GCC could nicely optimize this */
  291. return r ^ (r >> 64);
  292. #elif defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128
  293. __uint128 r = (__uint128)v * (__uint128)p;
  294. return r ^ (r >> 64);
  295. #elif defined(mul_64x64_128)
  296. uint64_t l, h;
  297. l = mul_64x64_128(v, p, &h);
  298. return l ^ h;
  299. #elif defined(mul_64x64_high)
  300. uint64_t l, h;
  301. l = v * p;
  302. h = mul_64x64_high(v, p);
  303. return l ^ h;
  304. #else
  305. /* performs 64x64 to 128 bit multiplication */
  306. uint64_t ll = mul_32x32_64((uint32_t)v, (uint32_t)p);
  307. uint64_t lh = mul_32x32_64(v >> 32, (uint32_t)p);
  308. uint64_t hl = mul_32x32_64(p >> 32, (uint32_t)v);
  309. uint64_t hh =
  310. mul_32x32_64(v >> 32, p >> 32) + (lh >> 32) + (hl >> 32) +
  311. /* Few simplification are possible here for 32-bit architectures,
  312. * but thus we would lost compatibility with the original 64-bit
  313. * version. Think is very bad idea, because then 32-bit t1ha will
  314. * still (relatively) very slowly and well yet not compatible. */
  315. add_with_carry(&ll, lh << 32) + add_with_carry(&ll, hl << 32);
  316. return hh ^ ll;
  317. #endif
  318. }
  319. uint64_t t1ha(const void *data, size_t len, uint64_t seed) {
  320. uint64_t a = seed;
  321. uint64_t b = len;
  322. const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
  323. uint64_t align[4];
  324. if (unlikely(len > 32)) {
  325. uint64_t c = rot64(len, s1) + seed;
  326. uint64_t d = len ^ rot64(seed, s1);
  327. const void *detent = (const uint8_t *)data + len - 31;
  328. do {
  329. const uint64_t *v = (const uint64_t *)data;
  330. if (unlikely(need_align))
  331. v = (const uint64_t *)memcpy(&align, v, 32);
  332. uint64_t w0 = fetch64_le(v + 0);
  333. uint64_t w1 = fetch64_le(v + 1);
  334. uint64_t w2 = fetch64_le(v + 2);
  335. uint64_t w3 = fetch64_le(v + 3);
  336. uint64_t d02 = w0 ^ rot64(w2 + d, s1);
  337. uint64_t c13 = w1 ^ rot64(w3 + c, s1);
  338. c += a ^ rot64(w0, s0);
  339. d -= b ^ rot64(w1, s2);
  340. a ^= p1 * (d02 + w3);
  341. b ^= p0 * (c13 + w2);
  342. data = (const uint64_t *)data + 4;
  343. } while (likely(data < detent));
  344. a ^= p6 * (rot64(c, s1) + d);
  345. b ^= p5 * (c + rot64(d, s1));
  346. len &= 31;
  347. }
  348. const uint64_t *v = (const uint64_t *)data;
  349. if (unlikely(need_align) && len > 8)
  350. v = (const uint64_t *)memcpy(&align, v, len);
  351. switch (len) {
  352. default:
  353. b += mux64(fetch64_le(v++), p4);
  354. case 24:
  355. case 23:
  356. case 22:
  357. case 21:
  358. case 20:
  359. case 19:
  360. case 18:
  361. case 17:
  362. a += mux64(fetch64_le(v++), p3);
  363. case 16:
  364. case 15:
  365. case 14:
  366. case 13:
  367. case 12:
  368. case 11:
  369. case 10:
  370. case 9:
  371. b += mux64(fetch64_le(v++), p2);
  372. case 8:
  373. case 7:
  374. case 6:
  375. case 5:
  376. case 4:
  377. case 3:
  378. case 2:
  379. case 1:
  380. a += mux64(tail64_le(v, len), p1);
  381. case 0:
  382. return mux64(rot64(a + b, s1), p4) + mix(a ^ b, p0);
  383. }
  384. }
  385. /***************************************************************************/
  386. static maybe_unused __inline uint64_t fetch64_be(const void *v) {
  387. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  388. return *(const uint64_t *)v;
  389. #else
  390. return bswap64(*(const uint64_t *)v);
  391. #endif
  392. }
  393. static maybe_unused __inline uint32_t fetch32_be(const void *v) {
  394. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  395. return *(const uint32_t *)v;
  396. #else
  397. return bswap32(*(const uint32_t *)v);
  398. #endif
  399. }
  400. static maybe_unused __inline uint16_t fetch16_be(const void *v) {
  401. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  402. return *(const uint16_t *)v;
  403. #else
  404. return bswap16(*(const uint16_t *)v);
  405. #endif
  406. }
  407. static maybe_unused __inline uint64_t tail64_be(const void *v, size_t tail) {
  408. const uint8_t *p = (const uint8_t *)v;
  409. switch (tail & 7) {
  410. #if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  411. /* For most CPUs this code is better when not needed
  412. * copying for alignment or byte reordering. */
  413. case 1:
  414. return p[0];
  415. case 2:
  416. return fetch16_be(p);
  417. case 3:
  418. return (uint32_t)fetch16_be(p) << 8 | p[2];
  419. case 4:
  420. return fetch32_be(p);
  421. case 5:
  422. return (uint64_t)fetch32_be(p) << 8 | p[4];
  423. case 6:
  424. return (uint64_t)fetch32_be(p) << 16 | fetch16_be(p + 4);
  425. case 7:
  426. return (uint64_t)fetch32_be(p) << 24 | (uint32_t)fetch16_be(p + 4) << 8 |
  427. p[6];
  428. case 0:
  429. return fetch64_be(p);
  430. #else
  431. /* For most CPUs this code is better than a
  432. * copying for alignment and/or byte reordering. */
  433. case 1:
  434. return p[0];
  435. case 2:
  436. return p[1] | (uint32_t)p[0] << 8;
  437. case 3:
  438. return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
  439. case 4:
  440. return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
  441. (uint32_t)p[0] << 24;
  442. case 5:
  443. return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 |
  444. (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32;
  445. case 6:
  446. return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 |
  447. (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
  448. case 7:
  449. return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
  450. (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 |
  451. (uint64_t)p[0] << 48;
  452. case 0:
  453. return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
  454. (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 |
  455. (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
  456. #endif
  457. }
  458. unreachable();
  459. }
  460. uint64_t t1ha_64be(const void *data, size_t len, uint64_t seed) {
  461. uint64_t a = seed;
  462. uint64_t b = len;
  463. const int need_align = (((uintptr_t)data) & 7) != 0 && !UNALIGNED_OK;
  464. uint64_t align[4];
  465. if (unlikely(len > 32)) {
  466. uint64_t c = rot64(len, s1) + seed;
  467. uint64_t d = len ^ rot64(seed, s1);
  468. const void *detent = (const uint8_t *)data + len - 31;
  469. do {
  470. const uint64_t *v = (const uint64_t *)data;
  471. if (unlikely(need_align))
  472. v = (const uint64_t *)memcpy(&align, v, 32);
  473. uint64_t w0 = fetch64_be(v + 0);
  474. uint64_t w1 = fetch64_be(v + 1);
  475. uint64_t w2 = fetch64_be(v + 2);
  476. uint64_t w3 = fetch64_be(v + 3);
  477. uint64_t d02 = w0 ^ rot64(w2 + d, s1);
  478. uint64_t c13 = w1 ^ rot64(w3 + c, s1);
  479. c += a ^ rot64(w0, s0);
  480. d -= b ^ rot64(w1, s2);
  481. a ^= p1 * (d02 + w3);
  482. b ^= p0 * (c13 + w2);
  483. data = (const uint64_t *)data + 4;
  484. } while (likely(data < detent));
  485. a ^= p6 * (rot64(c, s1) + d);
  486. b ^= p5 * (c + rot64(d, s1));
  487. len &= 31;
  488. }
  489. const uint64_t *v = (const uint64_t *)data;
  490. if (unlikely(need_align) && len > 8)
  491. v = (const uint64_t *)memcpy(&align, v, len);
  492. switch (len) {
  493. default:
  494. b += mux64(fetch64_be(v++), p4);
  495. case 24:
  496. case 23:
  497. case 22:
  498. case 21:
  499. case 20:
  500. case 19:
  501. case 18:
  502. case 17:
  503. a += mux64(fetch64_be(v++), p3);
  504. case 16:
  505. case 15:
  506. case 14:
  507. case 13:
  508. case 12:
  509. case 11:
  510. case 10:
  511. case 9:
  512. b += mux64(fetch64_be(v++), p2);
  513. case 8:
  514. case 7:
  515. case 6:
  516. case 5:
  517. case 4:
  518. case 3:
  519. case 2:
  520. case 1:
  521. a += mux64(tail64_be(v, len), p1);
  522. case 0:
  523. return mux64(rot64(a + b, s1), p4) + mix(a ^ b, p0);
  524. }
  525. }
  526. /***************************************************************************/
  527. static __inline uint32_t tail32_le(const void *v, size_t tail) {
  528. const uint8_t *p = (const uint8_t *)v;
  529. uint32_t r = 0;
  530. switch (tail & 3) {
  531. #if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  532. /* For most CPUs this code is better when not needed
  533. * copying for alignment or byte reordering. */
  534. case 0:
  535. return fetch32_le(p);
  536. case 3:
  537. r = (uint32_t)p[2] << 16;
  538. case 2:
  539. return r + fetch16_le(p);
  540. case 1:
  541. return p[0];
  542. #else
  543. /* For most CPUs this code is better than a
  544. * copying for alignment and/or byte reordering. */
  545. case 0:
  546. r += p[3];
  547. r <<= 8;
  548. case 3:
  549. r += p[2];
  550. r <<= 8;
  551. case 2:
  552. r += p[1];
  553. r <<= 8;
  554. case 1:
  555. return r + p[0];
  556. #endif
  557. }
  558. unreachable();
  559. }
  560. static __inline uint32_t tail32_be(const void *v, size_t tail) {
  561. const uint8_t *p = (const uint8_t *)v;
  562. switch (tail & 3) {
  563. #if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  564. /* For most CPUs this code is better when not needed
  565. * copying for alignment or byte reordering. */
  566. case 1:
  567. return p[0];
  568. case 2:
  569. return fetch16_be(p);
  570. case 3:
  571. return fetch16_be(p) << 8 | p[2];
  572. case 0:
  573. return fetch32_be(p);
  574. #else
  575. /* For most CPUs this code is better than a
  576. * copying for alignment and/or byte reordering. */
  577. case 1:
  578. return p[0];
  579. case 2:
  580. return p[1] | (uint32_t)p[0] << 8;
  581. case 3:
  582. return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
  583. case 0:
  584. return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
  585. (uint32_t)p[0] << 24;
  586. #endif
  587. }
  588. unreachable();
  589. }
  590. static __inline uint64_t remix32(uint32_t a, uint32_t b) {
  591. a ^= rot32(b, 13);
  592. uint64_t l = a | (uint64_t)b << 32;
  593. l *= p0;
  594. l ^= l >> 41;
  595. return l;
  596. }
  597. static __inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, uint32_t p) {
  598. uint64_t l = mul_32x32_64(*b + v, p);
  599. *a ^= (uint32_t)l;
  600. *b += (uint32_t)(l >> 32);
  601. }
  602. /* 32-bit 'magic' primes */
  603. static const uint32_t q0 = 0x92D78269;
  604. static const uint32_t q1 = 0xCA9B4735;
  605. static const uint32_t q2 = 0xA4ABA1C3;
  606. static const uint32_t q3 = 0xF6499843;
  607. static const uint32_t q4 = 0x86F0FD61;
  608. static const uint32_t q5 = 0xCA2DA6FB;
  609. static const uint32_t q6 = 0xC4BB3575;
  610. uint64_t t1ha_32le(const void *data, size_t len, uint64_t seed) {
  611. uint32_t a = rot32((uint32_t)len, s1) + (uint32_t)seed;
  612. uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32);
  613. const int need_align = (((uintptr_t)data) & 3) != 0 && !UNALIGNED_OK;
  614. uint32_t align[4];
  615. if (unlikely(len > 16)) {
  616. uint32_t c = ~a;
  617. uint32_t d = rot32(b, 5);
  618. const void *detent = (const uint8_t *)data + len - 15;
  619. do {
  620. const uint32_t *v = (const uint32_t *)data;
  621. if (unlikely(need_align))
  622. v = (const uint32_t *)memcpy(&align, v, 16);
  623. uint32_t w0 = fetch32_le(v + 0);
  624. uint32_t w1 = fetch32_le(v + 1);
  625. uint32_t w2 = fetch32_le(v + 2);
  626. uint32_t w3 = fetch32_le(v + 3);
  627. uint32_t c02 = w0 ^ rot32(w2 + c, 11);
  628. uint32_t d13 = w1 + rot32(w3 + d, s1);
  629. c ^= rot32(b + w1, 7);
  630. d ^= rot32(a + w0, 3);
  631. b = q1 * (c02 + w3);
  632. a = q0 * (d13 ^ w2);
  633. data = (const uint32_t *)data + 4;
  634. } while (likely(data < detent));
  635. c += a;
  636. d += b;
  637. a ^= q6 * (rot32(c, 16) + d);
  638. b ^= q5 * (c + rot32(d, 16));
  639. len &= 15;
  640. }
  641. const uint8_t *v = (const uint8_t *)data;
  642. if (unlikely(need_align) && len > 4)
  643. v = (const uint8_t *)memcpy(&align, v, len);
  644. switch (len) {
  645. default:
  646. mixup32(&a, &b, fetch32_le(v), q4);
  647. v += 4;
  648. case 12:
  649. case 11:
  650. case 10:
  651. case 9:
  652. mixup32(&b, &a, fetch32_le(v), q3);
  653. v += 4;
  654. case 8:
  655. case 7:
  656. case 6:
  657. case 5:
  658. mixup32(&a, &b, fetch32_le(v), q2);
  659. v += 4;
  660. case 4:
  661. case 3:
  662. case 2:
  663. case 1:
  664. mixup32(&b, &a, tail32_le(v, len), q1);
  665. case 0:
  666. return remix32(a, b);
  667. }
  668. }
  669. uint64_t t1ha_32be(const void *data, size_t len, uint64_t seed) {
  670. uint32_t a = rot32((uint32_t)len, s1) + (uint32_t)seed;
  671. uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32);
  672. const int need_align = (((uintptr_t)data) & 3) != 0 && !UNALIGNED_OK;
  673. uint32_t align[4];
  674. if (unlikely(len > 16)) {
  675. uint32_t c = ~a;
  676. uint32_t d = rot32(b, 5);
  677. const void *detent = (const uint8_t *)data + len - 15;
  678. do {
  679. const uint32_t *v = (const uint32_t *)data;
  680. if (unlikely(need_align))
  681. v = (const uint32_t *)memcpy(&align, v, 16);
  682. uint32_t w0 = fetch32_be(v + 0);
  683. uint32_t w1 = fetch32_be(v + 1);
  684. uint32_t w2 = fetch32_be(v + 2);
  685. uint32_t w3 = fetch32_be(v + 3);
  686. uint32_t c02 = w0 ^ rot32(w2 + c, 11);
  687. uint32_t d13 = w1 + rot32(w3 + d, s1);
  688. c ^= rot32(b + w1, 7);
  689. d ^= rot32(a + w0, 3);
  690. b = q1 * (c02 + w3);
  691. a = q0 * (d13 ^ w2);
  692. data = (const uint32_t *)data + 4;
  693. } while (likely(data < detent));
  694. c += a;
  695. d += b;
  696. a ^= q6 * (rot32(c, 16) + d);
  697. b ^= q5 * (c + rot32(d, 16));
  698. len &= 15;
  699. }
  700. const uint8_t *v = (const uint8_t *)data;
  701. if (unlikely(need_align) && len > 4)
  702. v = (const uint8_t *)memcpy(&align, v, len);
  703. switch (len) {
  704. default:
  705. mixup32(&a, &b, fetch32_be(v), q4);
  706. v += 4;
  707. case 12:
  708. case 11:
  709. case 10:
  710. case 9:
  711. mixup32(&b, &a, fetch32_be(v), q3);
  712. v += 4;
  713. case 8:
  714. case 7:
  715. case 6:
  716. case 5:
  717. mixup32(&a, &b, fetch32_be(v), q2);
  718. v += 4;
  719. case 4:
  720. case 3:
  721. case 2:
  722. case 1:
  723. mixup32(&b, &a, tail32_be(v, len), q1);
  724. case 0:
  725. return remix32(a, b);
  726. }
  727. }
  728. /***************************************************************************/
  729. #if (defined(__x86_64__) && (defined(__SSE4_2__) || __GNUC_PREREQ(4, 4) || \
  730. __has_attribute(target))) || \
  731. defined(_M_X64) || defined(_X86_64_)
  732. #include <nmmintrin.h>
  733. uint64_t
  734. #if __GNUC_PREREQ(4, 4) || __has_attribute(target)
  735. __attribute__((target("sse4.2")))
  736. #endif
  737. t1ha_ia32crc(const void *data, size_t len, uint64_t seed) {
  738. uint64_t a = seed;
  739. uint64_t b = len;
  740. const uint64_t *v = (const uint64_t *)data;
  741. if (unlikely(len > 32)) {
  742. const void *detent = (const uint8_t *)data + len - 31;
  743. uint32_t x = (uint32_t)b;
  744. uint32_t y = (uint32_t)a;
  745. uint32_t z = (uint32_t)(~a ^ b);
  746. do {
  747. uint32_t t = (uint32_t)a + x;
  748. a = rot64(a, 17) + p0 * v[0];
  749. x += (uint32_t)_mm_crc32_u64(y, v[1]);
  750. y += (uint32_t)_mm_crc32_u64(z, v[2]);
  751. z += (uint32_t)_mm_crc32_u64(t, v[3]);
  752. v += 4;
  753. } while (likely(detent > (const void *)v));
  754. a ^= x * p5 + y * p6 + z;
  755. b = x + y * p5 + z * p6;
  756. len &= 31;
  757. }
  758. switch (len) {
  759. default:
  760. b += mux64(*v++, p4);
  761. case 24:
  762. case 23:
  763. case 22:
  764. case 21:
  765. case 20:
  766. case 19:
  767. case 18:
  768. case 17:
  769. a += mux64(*v++, p3);
  770. case 16:
  771. case 15:
  772. case 14:
  773. case 13:
  774. case 12:
  775. case 11:
  776. case 10:
  777. case 9:
  778. b += mux64(*v++, p2);
  779. case 8:
  780. case 7:
  781. case 6:
  782. case 5:
  783. case 4:
  784. case 3:
  785. case 2:
  786. case 1:
  787. a += mux64(tail64_le(v, len), p1);
  788. case 0:
  789. return mux64(rot64(a + b, s1), p4) + mix(a ^ b, p0);
  790. }
  791. }
  792. #endif /* __x86_64__ */
  793. /***************************************************************************/
  794. #if defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) || \
  795. defined(i386) || defined(_X86_) || defined(__i386__) || defined(_X86_64_)
  796. static uint32_t x86_cpu_features(void) {
  797. #ifdef __GNUC__
  798. uint32_t eax, ebx, ecx, edx;
  799. if (__get_cpuid_max(0, NULL) < 1)
  800. return 0;
  801. __cpuid_count(1, 0, eax, ebx, ecx, edx);
  802. return ecx;
  803. #elif defined(_MSC_VER)
  804. int info[4];
  805. __cpuid(info, 0);
  806. if (info[0] < 1)
  807. return 0;
  808. __cpuidex(info, 1, 0);
  809. return info[2];
  810. #else
  811. return 0;
  812. #endif
  813. }
  814. #endif
  815. #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
  816. defined(_M_X64) || defined(i386) || defined(_X86_) || defined(_X86_64_)
  817. #include <emmintrin.h>
  818. #include <smmintrin.h>
  819. #include <wmmintrin.h>
  820. #if defined(__x86_64__) && defined(__ELF__) && \
  821. (__GNUC_PREREQ(4, 6) || __has_attribute(ifunc)) && __has_attribute(target)
  822. uint64_t t1ha_ia32aes(const void *data, size_t len, uint64_t seed)
  823. __attribute__((ifunc("t1ha_aes_resolve")));
  824. static uint64_t t1ha_ia32aes_avx(const void *data, size_t len, uint64_t seed);
  825. static uint64_t t1ha_ia32aes_noavx(const void *data, size_t len, uint64_t seed);
  826. static uint64_t (*t1ha_aes_resolve(void))(const void *, size_t, uint64_t) {
  827. uint32_t features = x86_cpu_features();
  828. if ((features & 0x01A000000) == 0x01A000000)
  829. return t1ha_ia32aes_avx;
  830. return t1ha_ia32aes_noavx;
  831. }
  832. static uint64_t __attribute__((target("aes,avx")))
  833. t1ha_ia32aes_avx(const void *data, size_t len, uint64_t seed) {
  834. uint64_t a = seed;
  835. uint64_t b = len;
  836. if (unlikely(len > 32)) {
  837. __m128i x = _mm_set_epi64x(a, b);
  838. __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(p0, p1));
  839. const __m128i *v = (const __m128i *)data;
  840. const __m128i *const detent =
  841. (const __m128i *)((const uint8_t *)data + (len & ~15ul));
  842. data = detent;
  843. if (len & 16) {
  844. x = _mm_add_epi64(x, _mm_loadu_si128(v++));
  845. y = _mm_aesenc_si128(x, y);
  846. }
  847. len &= 15;
  848. if (v + 7 < detent) {
  849. __m128i salt = y;
  850. do {
  851. __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt);
  852. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  853. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  854. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  855. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  856. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  857. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  858. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  859. salt = _mm_add_epi64(salt, _mm_set_epi64x(p2, p3));
  860. t = _mm_aesenc_si128(x, t);
  861. x = _mm_add_epi64(y, x);
  862. y = t;
  863. } while (v + 7 < detent);
  864. }
  865. while (v < detent) {
  866. __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
  867. __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
  868. x = _mm_aesdec_si128(x, v0y);
  869. y = _mm_aesdec_si128(y, v1x);
  870. }
  871. x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y);
  872. #if defined(__x86_64__) || defined(_M_X64)
  873. a = _mm_cvtsi128_si64(x);
  874. #if defined(__SSE4_1__)
  875. b = _mm_extract_epi64(x, 1);
  876. #else
  877. b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x));
  878. #endif
  879. #else
  880. a = (uint32_t)_mm_cvtsi128_si32(x);
  881. #if defined(__SSE4_1__)
  882. a |= (uint64_t)_mm_extract_epi32(x, 1) << 32;
  883. b = (uint32_t)_mm_extract_epi32(x, 2) |
  884. (uint64_t)_mm_extract_epi32(x, 3) << 32;
  885. #else
  886. a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
  887. x = _mm_unpackhi_epi64(x, x);
  888. b = (uint32_t)_mm_cvtsi128_si32(x);
  889. b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
  890. #endif
  891. #endif
  892. }
  893. const uint64_t *v = (const uint64_t *)data;
  894. switch (len) {
  895. default:
  896. b += mux64(*v++, p4);
  897. case 24:
  898. case 23:
  899. case 22:
  900. case 21:
  901. case 20:
  902. case 19:
  903. case 18:
  904. case 17:
  905. a += mux64(*v++, p3);
  906. case 16:
  907. case 15:
  908. case 14:
  909. case 13:
  910. case 12:
  911. case 11:
  912. case 10:
  913. case 9:
  914. b += mux64(*v++, p2);
  915. case 8:
  916. case 7:
  917. case 6:
  918. case 5:
  919. case 4:
  920. case 3:
  921. case 2:
  922. case 1:
  923. a += mux64(tail64_le(v, len), p1);
  924. case 0:
  925. return mux64(rot64(a + b, s1), p4) + mix(a ^ b, p0);
  926. }
  927. }
  928. static uint64_t
  929. #if __GNUC_PREREQ(4, 4) || __has_attribute(target)
  930. __attribute__((target("aes")))
  931. #endif
  932. t1ha_ia32aes_noavx(const void *data, size_t len, uint64_t seed) {
  933. #else /* ELF && ifunc */
  934. uint64_t
  935. #if __GNUC_PREREQ(4, 4) || __has_attribute(target)
  936. __attribute__((target("aes")))
  937. #endif
  938. t1ha_ia32aes(const void *data, size_t len, uint64_t seed) {
  939. #endif
  940. uint64_t a = seed;
  941. uint64_t b = len;
  942. if (unlikely(len > 32)) {
  943. __m128i x = _mm_set_epi64x(a, b);
  944. __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(p0, p1));
  945. const __m128i *v = (const __m128i *)data;
  946. const __m128i *const detent =
  947. (const __m128i *)((const uint8_t *)data + (len & ~15ul));
  948. data = detent;
  949. if (len & 16) {
  950. x = _mm_add_epi64(x, _mm_loadu_si128(v++));
  951. y = _mm_aesenc_si128(x, y);
  952. }
  953. len &= 15;
  954. if (v + 7 < detent) {
  955. __m128i salt = y;
  956. do {
  957. __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt);
  958. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  959. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  960. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  961. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  962. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  963. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  964. t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
  965. salt = _mm_add_epi64(salt, _mm_set_epi64x(p2, p3));
  966. t = _mm_aesenc_si128(x, t);
  967. x = _mm_add_epi64(y, x);
  968. y = t;
  969. } while (v + 7 < detent);
  970. }
  971. while (v < detent) {
  972. __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
  973. __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
  974. x = _mm_aesdec_si128(x, v0y);
  975. y = _mm_aesdec_si128(y, v1x);
  976. }
  977. x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y);
  978. #if defined(__x86_64__) || defined(_M_X64)
  979. a = _mm_cvtsi128_si64(x);
  980. #if defined(__SSE4_1__)
  981. b = _mm_extract_epi64(x, 1);
  982. #else
  983. b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x));
  984. #endif
  985. #else
  986. a = (uint32_t)_mm_cvtsi128_si32(x);
  987. #if defined(__SSE4_1__)
  988. a |= (uint64_t)_mm_extract_epi32(x, 1) << 32;
  989. b = (uint32_t)_mm_extract_epi32(x, 2) |
  990. (uint64_t)_mm_extract_epi32(x, 3) << 32;
  991. #else
  992. a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
  993. x = _mm_unpackhi_epi64(x, x);
  994. b = (uint32_t)_mm_cvtsi128_si32(x);
  995. b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
  996. #endif
  997. #endif
  998. }
  999. const uint64_t *v = (const uint64_t *)data;
  1000. switch (len) {
  1001. default:
  1002. b += mux64(*v++, p4);
  1003. case 24:
  1004. case 23:
  1005. case 22:
  1006. case 21:
  1007. case 20:
  1008. case 19:
  1009. case 18:
  1010. case 17:
  1011. a += mux64(*v++, p3);
  1012. case 16:
  1013. case 15:
  1014. case 14:
  1015. case 13:
  1016. case 12:
  1017. case 11:
  1018. case 10:
  1019. case 9:
  1020. b += mux64(*v++, p2);
  1021. case 8:
  1022. case 7:
  1023. case 6:
  1024. case 5:
  1025. case 4:
  1026. case 3:
  1027. case 2:
  1028. case 1:
  1029. a += mux64(tail64_le(v, len), p1);
  1030. case 0:
  1031. return mux64(rot64(a + b, s1), p4) + mix(a ^ b, p0);
  1032. }
  1033. }
  1034. #endif /* __i386__ || __x86_64__ */
  1035. /***************************************************************************/
  1036. static uint64_t (*t1ha_local_resolve(void))(const void *, size_t, uint64_t) {
  1037. #if defined(__x86_64) || defined(_M_IX86) || defined(_M_X64) || \
  1038. defined(i386) || defined(_X86_) || defined(__i386__) || defined(_X86_64_)
  1039. uint32_t features = x86_cpu_features();
  1040. if (features & (1l << 25))
  1041. return t1ha_ia32aes;
  1042. #if defined(__x86_64) || defined(_M_X64) || defined(_X86_64_)
  1043. if (features & (1l << 20))
  1044. return t1ha_ia32crc;
  1045. #endif
  1046. #endif /* x86 */
  1047. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  1048. return (sizeof(long) >= 8) ? t1ha_64be : t1ha_32be;
  1049. #else
  1050. return (sizeof(long) >= 8) ? t1ha_64le : t1ha_32le;
  1051. #endif
  1052. }
  1053. #if defined(__ELF__) && (__GNUC_PREREQ(4, 6) || __has_attribute(ifunc))
  1054. uint64_t t1ha_local(const void *data, size_t len, uint64_t seed)
  1055. __attribute__((ifunc("t1ha_local_resolve")));
  1056. #elif __GNUC_PREREQ(4, 0) || __has_attribute(constructor)
  1057. uint64_t (*t1ha_local_ptr)(const void *, size_t, uint64_t);
  1058. static void __attribute__((constructor)) t1ha_local_init(void) {
  1059. t1ha_local_ptr = t1ha_local_resolve();
  1060. }
  1061. #else /* ELF && ifunc */
  1062. static uint64_t t1ha_local_proxy(const void *data, size_t len, uint64_t seed) {
  1063. t1ha_local_ptr = t1ha_local_resolve();
  1064. return t1ha_local_ptr(data, len, seed);
  1065. }
  1066. uint64_t (*t1ha_local_ptr)(const void *, size_t, uint64_t) = t1ha_local_proxy;
  1067. #endif