set.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /* This file is part of nit.
  2. *
  3. * Nit is free software: you can redistribute it and/or modify
  4. * it under the terms of the GNU Lesser General Public License as published by
  5. * the Free Software Foundation, either version 3 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * Nit is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Lesser General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Lesser General Public License
  14. * along with nit. If not, see <http://www.gnu.org/licenses/>.
  15. */
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include "set.h"
  19. #define BIN_MAX_DENSITY 5
  20. #define H_SEED 37
  21. const int bin_num[31] = {
  22. 1, 3,
  23. 8 + 3, 16 + 3, 32 + 5, 64 + 3, 128 + 3, 256 + 27, 512 + 9,
  24. 1024 + 9, 2048 + 5, 4096 + 3, 8192 + 27, 16384 + 43, 32768 + 3,
  25. 65536 + 45, 131072 + 29, 262144 + 3, 524288 + 21, 1048576 + 7,
  26. 2097152 + 17, 4194304 + 15, 8388608 + 9, 16777216 + 43, 33554432 + 35,
  27. 67108864 + 15, 134217728 + 29, 268435456 + 3, 536870912 + 11,
  28. 1073741824 + 85, 0
  29. };
  30. #define ROT32(x, y) ((x << y) | (x >> (32 - y)))
  31. static uint32_t
  32. murmur3_32(const char *key, uint32_t len, uint32_t seed)
  33. {
  34. static const uint32_t c1 = 0xcc9e2d51;
  35. static const uint32_t c2 = 0x1b873593;
  36. static const uint32_t r1 = 15;
  37. static const uint32_t r2 = 13;
  38. static const uint32_t m = 5;
  39. static const uint32_t n = 0xe6546b64;
  40. uint32_t h = seed;
  41. const int nblocks = len / 4;
  42. const uint32_t *blocks = (const uint32_t *) key;
  43. int i;
  44. uint32_t k;
  45. for (i = 0; i < nblocks; i++) {
  46. k = blocks[i];
  47. k *= c1;
  48. k = ROT32(k, r1);
  49. k *= c2;
  50. h ^= k;
  51. h = ROT32(h, r2) * m + n;
  52. }
  53. const uint8_t *tail = (const uint8_t *) (key + nblocks * 4);
  54. uint32_t k1 = 0;
  55. switch (len & 3) {
  56. case 3:
  57. k1 ^= tail[2] << 16;
  58. /* fallthru */
  59. case 2:
  60. k1 ^= tail[1] << 8;
  61. /* fallthru */
  62. case 1:
  63. k1 ^= tail[0];
  64. k1 *= c1;
  65. k1 = ROT32(k1, r1);
  66. k1 *= c2;
  67. h ^= k1;
  68. }
  69. h ^= len;
  70. h ^= (h >> 16);
  71. h *= 0x85ebca6b;
  72. h ^= (h >> 13);
  73. h *= 0xc2b2ae35;
  74. h ^= (h >> 16);
  75. return h;
  76. }
  77. static int
  78. key_same(const Set_entry *entry, uint32_t key_len, const void *dat)
  79. {
  80. if (key_len != entry->key_len)
  81. return 0;
  82. return !memcmp(entry->dat, dat, key_len);
  83. }
  84. Nit_error
  85. set_init(Set *set, Disposer *disposer, int bin_pos)
  86. {
  87. set->disposer = disposer;
  88. if (!(set->bins = calloc(bin_num[bin_pos], sizeof(*set->bins))))
  89. return NIT_ERROR_MEMORY;
  90. set->bin_pos = bin_pos;
  91. set->size = 0;
  92. return NIT_ERROR_FINE;
  93. }
  94. void
  95. set_dispose(Set *set, Set_dispose set_dispose)
  96. {
  97. for (int i = 0; i < bin_num[set->bin_pos]; ++i) {
  98. Set_entry *entry = set->bins[i];
  99. while (entry) {
  100. Set_entry *tmp = entry->next;
  101. if (set_dispose)
  102. set_dispose(set->disposer, entry->key_len,
  103. entry->dat);
  104. else if (set->disposer)
  105. set->disposer->dispose(entry->dat,
  106. set->disposer->extra);
  107. free(entry);
  108. entry = tmp;
  109. }
  110. }
  111. free(set->bins);
  112. }
  113. static int
  114. row_num(const void *key, uint32_t key_len, int num)
  115. {
  116. return murmur3_32(key, key_len, H_SEED) % num;
  117. }
  118. static Set_entry *
  119. set_entry(uint32_t key_len, const void *dat, uint32_t hash, Set_entry *next)
  120. {
  121. Set_entry *entry = malloc(sizeof(*entry));
  122. if (!entry)
  123. return NULL;
  124. entry->dat = (void *) dat;
  125. entry->key_len = key_len;
  126. entry->hash = hash;
  127. entry->next = next;
  128. return entry;
  129. }
  130. static int
  131. rehash(Set *set)
  132. {
  133. int new_bin_num = bin_num[set->bin_pos + 1];
  134. Set_entry **new_bins = calloc(new_bin_num, sizeof(*set->bins));
  135. if (!new_bins)
  136. return -1;
  137. for (int i = 0; i < bin_num[set->bin_pos]; ++i) {
  138. for (Set_entry *entry = set->bins[i]; entry;) {
  139. Set_entry *tmp = entry->next;
  140. uint32_t row = entry->hash % new_bin_num;
  141. entry->next = new_bins[row];
  142. new_bins[row] = entry;
  143. entry = tmp;
  144. }
  145. }
  146. free(set->bins);
  147. set->bins = new_bins;
  148. ++set->bin_pos;
  149. return 0;
  150. }
  151. Nit_error
  152. set_add(Set *set, uint32_t key_len, const void *dat)
  153. {
  154. uint32_t hash = murmur3_32(dat, key_len, H_SEED);
  155. Set_entry **bin;
  156. Set_entry *new_entry;
  157. if ((set->size + 1) / bin_num[set->bin_pos] >= BIN_MAX_DENSITY &&
  158. rehash(set) < 0)
  159. return NIT_ERROR_MEMORY;
  160. bin = &set->bins[hash % bin_num[set->bin_pos]];
  161. for (Set_entry *entry = *bin; entry; entry = entry->next)
  162. if (key_same(entry, key_len, dat))
  163. return NIT_ERROR_ALREADY;
  164. if (!(new_entry = set_entry(key_len, dat, hash, *bin)))
  165. return NIT_ERROR_MEMORY;
  166. *bin = new_entry;
  167. ++set->size;
  168. return NIT_ERROR_FINE;
  169. }
  170. Nit_error
  171. set_remove(Set *set, uint32_t key_len, void **dat)
  172. {
  173. uint32_t row = row_num(*dat, key_len, bin_num[set->bin_pos]);
  174. for (Set_entry **entry = &set->bins[row]; *entry;
  175. entry = &(*entry)->next)
  176. if (key_same(*entry, key_len, *dat)) {
  177. Set_entry *tmp = *entry;
  178. *dat = tmp->dat;
  179. *entry = tmp->next;
  180. free(tmp);
  181. return NIT_ERROR_FINE;
  182. }
  183. return NIT_ERROR_NOT_FOUND;
  184. }
  185. int
  186. set_get(const Set *set, uint32_t key_len, void **dat)
  187. {
  188. uint32_t row = row_num(*dat, key_len, bin_num[set->bin_pos]);
  189. for (Set_entry *entry = set->bins[row]; entry; entry = entry->next)
  190. if (key_same(entry, key_len, *dat)) {
  191. *dat = entry->dat;
  192. return 1;
  193. }
  194. return 0;
  195. }
  196. int
  197. set_contains(const Set *set, uint32_t key_len, const void *dat)
  198. {
  199. void **dat_ref = (void **) &dat;
  200. return !!set_get(set, key_len, dat_ref);
  201. }
  202. int
  203. set_subset(const Set *super, const Set *sub)
  204. {
  205. int i;
  206. if (sub->size > super->size)
  207. return 0;
  208. for (i = 0; i <= bin_num[sub->bin_pos]; ++i)
  209. for (Set_entry *entry = sub->bins[i]; entry;
  210. entry = entry->next)
  211. if (!set_contains(super, entry->key_len, entry->dat))
  212. return 0;
  213. return 1;
  214. }
  215. int
  216. set_equal(const Set *set1, const Set *set2)
  217. {
  218. return set1->size != set2->size ? 0 : set_subset(set1, set2);
  219. }
  220. Nit_error
  221. set_fill(Set *des, const Set *src)
  222. {
  223. int bin;
  224. Set_entry *entry;
  225. set_foreach (src, bin, entry) {
  226. Nit_error error;
  227. if (set_contains(des, entry->key_len, entry->dat))
  228. continue;
  229. if ((error = set_add(des, entry->key_len, entry->dat)))
  230. return error;
  231. }
  232. return NIT_ERROR_FINE;
  233. }
  234. Nit_error
  235. set_compl(Set *des, const Set *src, const Set *dif)
  236. {
  237. int bin;
  238. Set_entry *entry;
  239. set_foreach (src, bin, entry) {
  240. Nit_error error;
  241. if (set_contains(des, entry->key_len, entry->dat) ||
  242. set_contains(dif, entry->key_len, entry->dat))
  243. continue;
  244. if ((error = set_add(des, entry->key_len, entry->dat)))
  245. return error;
  246. }
  247. return NIT_ERROR_FINE;
  248. }
  249. Nit_error
  250. set_inter(Set *des, const Set *src1, const Set *src2)
  251. {
  252. const Set *large;
  253. const Set *small;
  254. int bin;
  255. Set_entry *entry;
  256. if (src1->size >= src2->size) {
  257. large = src1;
  258. small = src2;
  259. } else {
  260. large = src2;
  261. small = src1;
  262. }
  263. set_foreach (small, bin, entry) {
  264. Nit_error error;
  265. if (set_contains(des, entry->key_len, entry->dat) ||
  266. !set_contains(large, entry->key_len, entry->dat))
  267. continue;
  268. if ((error = set_add(des, entry->key_len, entry->dat)))
  269. return error;
  270. }
  271. return NIT_ERROR_FINE;
  272. }
  273. static void
  274. key_dsp(void *dat, void *extra)
  275. {
  276. (void) extra;
  277. free(dat);
  278. }
  279. Disposer key_dspr = {
  280. .extra = NULL,
  281. .dispose = key_dsp
  282. };
  283. Nit_error
  284. set_list(const Set *set, List *list, int init, int copy)
  285. {
  286. int bin;
  287. Set_entry *sentry;
  288. Set_key *new_key;
  289. Nit_error error;
  290. if (!set->size)
  291. return NIT_ERROR_EMPTY;
  292. if (init)
  293. list_init(list, copy ? &key_dspr : NULL);
  294. set_foreach(set, bin, sentry) {
  295. List_entry *prev = NULL;
  296. List_entry *lentry;
  297. list_foreach (lentry, list) {
  298. Set_key *key = lentry->data;
  299. if (sentry->key_len < key->key_len)
  300. break;
  301. if (sentry->key_len == key->key_len) {
  302. int cmp = memcmp(sentry->dat, key->dat,
  303. key->key_len);
  304. if (!cmp)
  305. goto next;
  306. if (cmp < 0)
  307. break;
  308. }
  309. prev = lentry;
  310. }
  311. if (!copy) {
  312. new_key = (Set_key *) sentry;
  313. } else {
  314. if (!(new_key = malloc(sizeof(*new_key))))
  315. return NIT_ERROR_MEMORY;
  316. new_key->key_len = sentry->key_len;
  317. if (!(new_key->dat = malloc(new_key->key_len))) {
  318. error = NIT_ERROR_MEMORY;
  319. goto err_dat;
  320. }
  321. memcpy(new_key->dat, sentry->dat, new_key->key_len);
  322. }
  323. if ((error = list_add(list, prev, new_key)))
  324. goto err_add;
  325. next:
  326. continue;
  327. }
  328. return NIT_ERROR_FINE;
  329. err_add:
  330. free(new_key->dat);
  331. err_dat:
  332. free(new_key);
  333. return error;
  334. }