farmhash.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. // Copyright (c) 2014 Google, Inc.
  2. //
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy
  4. // of this software and associated documentation files (the "Software"), to deal
  5. // in the Software without restriction, including without limitation the rights
  6. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. // copies of the Software, and to permit persons to whom the Software is
  8. // furnished to do so, subject to the following conditions:
  9. //
  10. // The above copyright notice and this permission notice shall be included in
  11. // all copies or substantial portions of the Software.
  12. //
  13. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. // THE SOFTWARE.
  20. //
  21. // FarmHash, by Geoff Pike
  22. //
  23. // http://code.google.com/p/farmhash/
  24. //
  25. // This file provides a few functions for hashing strings and other
  26. // data. All of them are high-quality functions in the sense that
  27. // they do well on standard tests such as Austin Appleby's SMHasher.
  28. // They're also fast. FarmHash is the successor to CityHash.
  29. //
  30. // Functions in the FarmHash family are not suitable for cryptography.
  31. //
  32. // WARNING: This code has been only lightly tested on big-endian platforms!
  33. // It is known to work well on little-endian platforms that have a small penalty
  34. // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
  35. // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
  36. // bug reports are welcome.
  37. //
  38. // By the way, for some hash functions, given strings a and b, the hash
  39. // of a+b is easily derived from the hashes of a and b. This property
  40. // doesn't hold for any hash functions in this file.
  41. #ifndef FARM_HASH_H_
  42. #define FARM_HASH_H_
  43. #include <assert.h>
  44. #include <stdint.h>
  45. #include <stdlib.h>
  46. #include <string.h> // for memcpy and memset
  47. #include <utility>
  48. #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
  49. #define NAMESPACE_FOR_HASH_FUNCTIONS util
  50. #endif
  51. namespace NAMESPACE_FOR_HASH_FUNCTIONS {
  52. #if defined(FARMHASH_UINT128_T_DEFINED)
  53. inline uint64_t Uint128Low64(const uint128_t x) {
  54. return static_cast<uint64_t>(x);
  55. }
  56. inline uint64_t Uint128High64(const uint128_t x) {
  57. return static_cast<uint64_t>(x >> 64);
  58. }
  59. inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
  60. return lo + (((uint128_t)hi) << 64);
  61. }
  62. #else
  63. typedef std::pair<uint64_t, uint64_t> uint128_t;
  64. inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
  65. inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
  66. inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
  67. #endif
  68. // BASIC STRING HASHING
  69. // Hash function for a byte array.
  70. // May change from time to time, may differ on different platforms, may differ
  71. // depending on NDEBUG.
  72. size_t Hash(const char* s, size_t len);
  73. // Hash function for a byte array. Most useful in 32-bit binaries.
  74. // May change from time to time, may differ on different platforms, may differ
  75. // depending on NDEBUG.
  76. uint32_t Hash32(const char* s, size_t len);
  77. // Hash function for a byte array. For convenience, a 32-bit seed is also
  78. // hashed into the result.
  79. // May change from time to time, may differ on different platforms, may differ
  80. // depending on NDEBUG.
  81. uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
  82. // Hash 128 input bits down to 64 bits of output.
  83. // Hash function for a byte array.
  84. // May change from time to time, may differ on different platforms, may differ
  85. // depending on NDEBUG.
  86. uint64_t Hash64(const char* s, size_t len);
  87. // Hash function for a byte array. For convenience, a 64-bit seed is also
  88. // hashed into the result.
  89. // May change from time to time, may differ on different platforms, may differ
  90. // depending on NDEBUG.
  91. uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
  92. // Hash function for a byte array. For convenience, two seeds are also
  93. // hashed into the result.
  94. // May change from time to time, may differ on different platforms, may differ
  95. // depending on NDEBUG.
  96. uint64_t Hash64WithSeeds(const char* s, size_t len,
  97. uint64_t seed0, uint64_t seed1);
  98. // Hash function for a byte array.
  99. // May change from time to time, may differ on different platforms, may differ
  100. // depending on NDEBUG.
  101. uint128_t Hash128(const char* s, size_t len);
  102. // Hash function for a byte array. For convenience, a 128-bit seed is also
  103. // hashed into the result.
  104. // May change from time to time, may differ on different platforms, may differ
  105. // depending on NDEBUG.
  106. uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
  107. // BASIC NON-STRING HASHING
  108. // This is intended to be a reasonably good hash function.
  109. // May change from time to time, may differ on different platforms, may differ
  110. // depending on NDEBUG.
  111. inline uint64_t Hash128to64(uint128_t x) {
  112. // Murmur-inspired hashing.
  113. const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  114. uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  115. a ^= (a >> 47);
  116. uint64_t b = (Uint128High64(x) ^ a) * kMul;
  117. b ^= (b >> 47);
  118. b *= kMul;
  119. return b;
  120. }
  121. // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
  122. // Fingerprint function for a byte array. Most useful in 32-bit binaries.
  123. uint32_t Fingerprint32(const char* s, size_t len);
  124. // Fingerprint function for a byte array.
  125. uint64_t Fingerprint64(const char* s, size_t len);
  126. // Fingerprint function for a byte array.
  127. uint128_t Fingerprint128(const char* s, size_t len);
  128. // This is intended to be a good fingerprinting primitive.
  129. // See below for more overloads.
  130. inline uint64_t Fingerprint(uint128_t x) {
  131. // Murmur-inspired hashing.
  132. const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  133. uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
  134. a ^= (a >> 47);
  135. uint64_t b = (Uint128High64(x) ^ a) * kMul;
  136. b ^= (b >> 44);
  137. b *= kMul;
  138. b ^= (b >> 41);
  139. b *= kMul;
  140. return b;
  141. }
  142. // This is intended to be a good fingerprinting primitive.
  143. inline uint64_t Fingerprint(uint64_t x) {
  144. // Murmur-inspired hashing.
  145. const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  146. uint64_t b = x * kMul;
  147. b ^= (b >> 44);
  148. b *= kMul;
  149. b ^= (b >> 41);
  150. b *= kMul;
  151. return b;
  152. }
  153. #ifndef FARMHASH_NO_CXX_STRING
  154. // Convenience functions to hash or fingerprint C++ strings.
  155. // These require that Str::data() return a pointer to the first char
  156. // (as a const char*) and that Str::length() return the string's length;
  157. // they work with std::string, for example.
  158. // Hash function for a byte array.
  159. // May change from time to time, may differ on different platforms, may differ
  160. // depending on NDEBUG.
  161. template <typename Str>
  162. inline size_t Hash(const Str& s) {
  163. assert(sizeof(s[0]) == 1);
  164. return Hash(s.data(), s.length());
  165. }
  166. // Hash function for a byte array. Most useful in 32-bit binaries.
  167. // May change from time to time, may differ on different platforms, may differ
  168. // depending on NDEBUG.
  169. template <typename Str>
  170. inline uint32_t Hash32(const Str& s) {
  171. assert(sizeof(s[0]) == 1);
  172. return Hash32(s.data(), s.length());
  173. }
  174. // Hash function for a byte array. For convenience, a 32-bit seed is also
  175. // hashed into the result.
  176. // May change from time to time, may differ on different platforms, may differ
  177. // depending on NDEBUG.
  178. template <typename Str>
  179. inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
  180. assert(sizeof(s[0]) == 1);
  181. return Hash32WithSeed(s.data(), s.length(), seed);
  182. }
  183. // Hash 128 input bits down to 64 bits of output.
  184. // Hash function for a byte array.
  185. // May change from time to time, may differ on different platforms, may differ
  186. // depending on NDEBUG.
  187. template <typename Str>
  188. inline uint64_t Hash64(const Str& s) {
  189. assert(sizeof(s[0]) == 1);
  190. return Hash64(s.data(), s.length());
  191. }
  192. // Hash function for a byte array. For convenience, a 64-bit seed is also
  193. // hashed into the result.
  194. // May change from time to time, may differ on different platforms, may differ
  195. // depending on NDEBUG.
  196. template <typename Str>
  197. inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
  198. assert(sizeof(s[0]) == 1);
  199. return Hash64WithSeed(s.data(), s.length(), seed);
  200. }
  201. // Hash function for a byte array. For convenience, two seeds are also
  202. // hashed into the result.
  203. // May change from time to time, may differ on different platforms, may differ
  204. // depending on NDEBUG.
  205. template <typename Str>
  206. inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
  207. assert(sizeof(s[0]) == 1);
  208. return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
  209. }
  210. // Hash function for a byte array.
  211. // May change from time to time, may differ on different platforms, may differ
  212. // depending on NDEBUG.
  213. template <typename Str>
  214. inline uint128_t Hash128(const Str& s) {
  215. assert(sizeof(s[0]) == 1);
  216. return Hash128(s.data(), s.length());
  217. }
  218. // Hash function for a byte array. For convenience, a 128-bit seed is also
  219. // hashed into the result.
  220. // May change from time to time, may differ on different platforms, may differ
  221. // depending on NDEBUG.
  222. template <typename Str>
  223. inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
  224. assert(sizeof(s[0]) == 1);
  225. return Hash128(s.data(), s.length(), seed);
  226. }
  227. // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
  228. // Fingerprint function for a byte array. Most useful in 32-bit binaries.
  229. template <typename Str>
  230. inline uint32_t Fingerprint32(const Str& s) {
  231. assert(sizeof(s[0]) == 1);
  232. return Fingerprint32(s.data(), s.length());
  233. }
  234. // Fingerprint 128 input bits down to 64 bits of output.
  235. // Fingerprint function for a byte array.
  236. template <typename Str>
  237. inline uint64_t Fingerprint64(const Str& s) {
  238. assert(sizeof(s[0]) == 1);
  239. return Fingerprint64(s.data(), s.length());
  240. }
  241. // Fingerprint function for a byte array.
  242. template <typename Str>
  243. inline uint128_t Fingerprint128(const Str& s) {
  244. assert(sizeof(s[0]) == 1);
  245. return Fingerprint128(s.data(), s.length());
  246. }
  247. #endif
  248. } // namespace NAMESPACE_FOR_HASH_FUNCTIONS
  249. #endif // FARM_HASH_H_