patricia.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. #include <stddef.h> /* size_t */
  2. #include <stdint.h> /* uint8_t */
  3. #include <stdlib.h> /* malloc, free */
  4. #include <string.h> /* memcpy */
  5. #include "patricia.h"
  6. /**
  7. * Our Patricia tree structure can be thought of as operating on strings of
  8. * 9-bit bytes, where 0x00 -- 0xFF are mapped to 0x100 -- 0x1FF and 0x00
  9. * represents the end-of-string character (note that NUL can occur inside
  10. * keys). The field (struct pnode).mask is either 0 or a power of 2; if 0,
  11. * the left child, if non-NULL, is a pointer to the record associated with
  12. * the key thus far. For example, the strings "hello", "hello colin",
  13. * "hello world", and "wednesday", each associated with pointers to
  14. * themselves, are stored in the following tree:
  15. *
  16. * [0x10, 0x60, 0, ""]
  17. * | |
  18. * [0x00, 0x00, 5, "hello"] [0x00, 0x00, 9, "wednesday"]
  19. * | | | |
  20. * "hello" [0x10, 0x60, 1, " "] "wednesday" NULL
  21. * | |
  22. * [0x00, 0x00, 5, "colin"] [0x00, 0x00, 5, "world"]
  23. * | | | |
  24. * "hello colin" NULL "hello world" NULL
  25. *
  26. */
  27. /* Structure used to store a Patricia tree node. */
  28. struct pnode {
  29. struct pnode * left; /* Left child. */
  30. struct pnode * right; /* Right child. */
  31. uint8_t mask; /* Critical bit mask. */
  32. uint8_t high; /* High bits of this node. */
  33. uint8_t slen; /* Length of s[]. */
  34. uint8_t s[]; /* Bytes since parent's s[]. */
  35. };
  36. /**
  37. * The use of a uint8_t to store the number of bytes in (struct pnode).s[]
  38. * means that we cannot store more than 255 bytes in each node; since many
  39. * memory allocators can perform allocations of 256 bytes far more
  40. * efficiently than they can perform allocations of slightly more than 256
  41. * bytes, we artificially limit the number of bytes stored in each node so
  42. * that a node is never larger than 256 bytes in total.
  43. *
  44. * In some applications it may be preferable to eliminate variable-length
  45. * memory allocations entirely and provide the same size of memory allocation
  46. * to each node; if this is done, it would almost certainly be desirable to
  47. * reduce MAXSLEN further, e.g., to (2 * sizeof(void *) - 3) so that each
  48. * node would be of size (4 * sizeof(void *)).
  49. */
  50. /* Maximum number of key bytes stored in a node. */
  51. #ifndef MAXSLEN
  52. #define MAXSLEN (256 - sizeof(struct pnode))
  53. #endif
  54. /*
  55. * Structure used to store Patricia tree. The maximum key length is stored
  56. * in order to simplify buffer handling in the tree traversal code.
  57. */
  58. struct patricia_internal {
  59. struct pnode * root; /* Root node of tree. */
  60. size_t maxkey; /* Longest key length. */
  61. };
  62. static struct pnode * node_alloc(uint8_t, const uint8_t *);
  63. static struct pnode * node_dup(const struct pnode *, uint8_t, const uint8_t *);
  64. static int compare(const struct pnode *, const uint8_t *, size_t, uint8_t *,
  65. uint8_t *);
  66. static int foreach_internal(struct pnode *,
  67. int(void *, uint8_t *, size_t, void *), void *, uint8_t *, size_t);
  68. static void free_internal(struct pnode *);
  69. /*
  70. * Create a node with no children, mask = high = 0, and the provided slen
  71. * and s[].
  72. */
  73. static struct pnode *
  74. node_alloc(uint8_t slen, const uint8_t * s)
  75. {
  76. struct pnode * n;
  77. /* Allocate. */
  78. if ((n = malloc(sizeof(struct pnode) + slen)) == NULL)
  79. return (NULL);
  80. /* No children, mask, or high bits. */
  81. n->left = n->right = NULL;
  82. n->mask = n->high = 0;
  83. /* Insert provided bytes of key. */
  84. n->slen = slen;
  85. memcpy(n->s, s, slen);
  86. /* Success! */
  87. return (n);
  88. }
  89. /*
  90. * Create a duplicate of a node but with different slen and s[].
  91. */
  92. static struct pnode *
  93. node_dup(const struct pnode * n0, uint8_t slen, const uint8_t * s)
  94. {
  95. struct pnode * n;
  96. /* Allocate. */
  97. if ((n = malloc(sizeof(struct pnode) + slen)) == NULL)
  98. return (NULL);
  99. /* Copy children, mask, and high bits. */
  100. n->left = n0->left;
  101. n->right = n0->right;
  102. n->mask = n0->mask;
  103. n->high = n0->high;
  104. /* Insert provided bytes of key. */
  105. n->slen = slen;
  106. memcpy(n->s, s, slen);
  107. /* Success! */
  108. return (n);
  109. }
  110. /*
  111. * Compare the given key to the given node. If they match (i.e., the node is
  112. * a prefix of the key), return zero; otherwise, return non-zero and set the
  113. * values mlen and mask to the number of matching bytes and the bitmask where
  114. * there is a mismatch (where mask == 0 means that the key is a prefix of the
  115. * node).
  116. */
  117. static int
  118. compare(const struct pnode * n, const uint8_t * key, size_t keylen,
  119. uint8_t * mlen, uint8_t * mask)
  120. {
  121. uint8_t i;
  122. uint8_t mm;
  123. /* Scan through the complete bytes in the node. */
  124. for (i = 0; i < n->slen; i++) {
  125. /* Is the key a prefix of the node? */
  126. if (keylen == i) {
  127. *mlen = i;
  128. *mask = 0;
  129. return (1);
  130. }
  131. /* Compute how the bytes differ. */
  132. mm = n->s[i] ^ key[i];
  133. /* If the ith bytes match, move on to the next position. */
  134. if (mm == 0)
  135. continue;
  136. /* Figure out which bit they mismatch at. */
  137. for (*mask = 0x80; *mask != 0; *mask >>= 1) {
  138. if (mm & *mask)
  139. break;
  140. }
  141. /* There are i matching bytes. */
  142. *mlen = i;
  143. /* The key doesn't match the node. */
  144. return (1);
  145. }
  146. /* If the node splits on the 9th bit, it is a prefix of the key. */
  147. if (n->mask == 0)
  148. return (0);
  149. /* Otherwise, consider the high bits stored in the node. */
  150. /* Is the key a prefix of the node? */
  151. if (keylen == n->slen) {
  152. *mlen = n->slen;
  153. *mask = 0;
  154. return (1);
  155. }
  156. /* Compute how the top bits differ. */
  157. mm = (n->high ^ key[i]) & ((- n->mask) << 1);
  158. /* If the top bits match, the node is a prefix of the key. */
  159. if (mm == 0)
  160. return (0);
  161. /* Figure out which bit they mismatch at. */
  162. for (*mask = 0x80; *mask != 0; *mask >>= 1) {
  163. if (mm & *mask)
  164. break;
  165. }
  166. /* There are n->slen matching bytes. */
  167. *mlen = n->slen;
  168. /* The key doesn't match this node. */
  169. return (1);
  170. }
  171. /*
  172. * Recursively call func(cookie, key, keylen, rec) on all records under the
  173. * node n; the first keypos bytes of keybuf hold the key prefix generated
  174. * from ancestor nodes.
  175. */
  176. static int
  177. foreach_internal(struct pnode * n,
  178. int func(void *, uint8_t *, size_t, void *),
  179. void * cookie, uint8_t * keybuf, size_t keypos)
  180. {
  181. int rc = 0;
  182. /* Add bytes to the key buffer. */
  183. memcpy(keybuf + keypos, n->s, n->slen);
  184. keypos += n->slen;
  185. /* Left child. */
  186. if (n->left != NULL) {
  187. if (n->mask == 0) {
  188. rc = func(cookie, keybuf, keypos, n->left);
  189. } else {
  190. rc = foreach_internal(n->left, func, cookie,
  191. keybuf, keypos);
  192. }
  193. }
  194. /* Return non-zero status if necessary. */
  195. if (rc)
  196. return (rc);
  197. /* Right child. */
  198. if (n->right != NULL)
  199. rc = foreach_internal(n->right, func, cookie, keybuf, keypos);
  200. /* Return status. */
  201. return (rc);
  202. }
  203. /*
  204. * Recursively free the tree.
  205. */
  206. static void
  207. free_internal(struct pnode * n)
  208. {
  209. /* Left child. */
  210. if ((n->mask != 0) && (n->left != NULL))
  211. free_internal(n->left);
  212. /* Right child. */
  213. if (n->right != NULL)
  214. free_internal(n->right);
  215. /* Free this node. */
  216. free(n);
  217. }
  218. /**
  219. * patricia_init(void):
  220. * Create a Patricia tree to be used for mapping arbitrary-length keys to
  221. * records. Return NULL on failure.
  222. */
  223. PATRICIA *
  224. patricia_init(void)
  225. {
  226. PATRICIA * P;
  227. /* Allocate memory, or return failure. */
  228. if ((P = malloc(sizeof(PATRICIA))) == NULL)
  229. return (NULL);
  230. /* All keys so far have zero length, and we have no nodes. */
  231. P->root = NULL;
  232. P->maxkey = 0;
  233. /* Success! */
  234. return (P);
  235. }
  236. /**
  237. * patricia_insert(tree, key, keylen, rec):
  238. * Associate the provided ${key} of length ${keylen} bytes with the pointer
  239. * ${rec}, which must be non-NULL. Return (-1) on error, 0 on success, and 1
  240. * if the key already exists.
  241. */
  242. int
  243. patricia_insert(PATRICIA * P, const uint8_t * key, size_t keylen, void * rec)
  244. {
  245. struct pnode ** np = &P->root;
  246. struct pnode * pnew;
  247. struct pnode * pnew2;
  248. size_t slen;
  249. uint8_t mlen;
  250. uint8_t mask;
  251. /* Update maximum key length. */
  252. if (P->maxkey < keylen)
  253. P->maxkey = keylen;
  254. /*
  255. * To understand this code, first read the code for patricia_lookup.
  256. * This follows essentially the same approach, except that we keep
  257. * an extra level of indirection so that we can insert a new node
  258. * into the tree _above_ the node which we are considering at any
  259. * particular point.
  260. */
  261. do {
  262. /* Have we fallen off the bottom of the tree? */
  263. if (*np == NULL) {
  264. /*
  265. * Create a new node with up to MAXSLEN bytes of the
  266. * key, and add it at the current point. Then keep
  267. * on going (and move down into the newly added node).
  268. */
  269. /* Figure out how much key goes into this node. */
  270. slen = keylen;
  271. if (slen > MAXSLEN)
  272. slen = MAXSLEN;
  273. /* Create the node or error out. */
  274. if ((pnew = node_alloc((uint8_t)slen, key)) == NULL)
  275. return (-1);
  276. /* Add the new node into the tree. */
  277. *np = pnew;
  278. }
  279. /* Is the node not a prefix of the key? */
  280. if (compare(*np, key, keylen, &mlen, &mask)) {
  281. /*
  282. * Split the node *np after mlen bytes and a number
  283. * of bits based on mask. Leave *np pointing to the
  284. * upper of the two nodes (because we will continue
  285. * by traversing into the so-far-nonexistent child
  286. * of the new node).
  287. */
  288. /* Create the lower of the new nodes. */
  289. slen = (*np)->slen - mlen;
  290. pnew2 = node_dup(*np, (uint8_t)slen, (*np)->s + mlen);
  291. if (pnew2 == NULL)
  292. return (-1);
  293. /* Create the upper of the new nodes. */
  294. if ((pnew = node_alloc(mlen, key)) == NULL) {
  295. free(pnew2);
  296. return (-1);
  297. }
  298. pnew->mask = mask;
  299. /* Handle splitting on bit 9 differently. */
  300. if (mask == 0) {
  301. pnew->high = 0;
  302. pnew->right = pnew2;
  303. } else {
  304. pnew->high = key[mlen] & ((- mask) << 1);
  305. /*
  306. * This looks wrong, but it actually works:
  307. * mask is the bit where key[mlen] and
  308. * (*np)->s[mlen] differ, so if key[mlen]
  309. * has a 1 bit, (*np)->s[mlen] has a 0 bit
  310. * and belongs on the left (and vice versa).
  311. */
  312. if (key[mlen] & mask)
  313. pnew->left = pnew2;
  314. else
  315. pnew->right = pnew2;
  316. }
  317. /* Free the node which we are replacing. */
  318. free(*np);
  319. /* Reattach this branch to the tree. */
  320. *np = pnew;
  321. }
  322. /* Strip off the matching part of the key. */
  323. key += (*np)->slen;
  324. keylen -= (*np)->slen;
  325. /* Handle splitting on the 9th bit specially. */
  326. if ((*np)->mask == 0) {
  327. /* Have we found the key? */
  328. if (keylen == 0) {
  329. /* Add the record or return 1. */
  330. if ((*np)->left == NULL) {
  331. (*np)->left = rec;
  332. return (0);
  333. } else {
  334. return (1);
  335. }
  336. }
  337. /* The key continues; traverse to right child. */
  338. np = &(*np)->right;
  339. continue;
  340. }
  341. /* Take left or right child depending upon critical bit. */
  342. if (key[0] & (*np)->mask)
  343. np = &(*np)->right;
  344. else
  345. np = &(*np)->left;
  346. } while (1);
  347. }
  348. /**
  349. * patricia_lookup(tree, key, keylen):
  350. * Look up the provided ${key} of length ${keylen} bytes. Return a pointer to
  351. * the associated _record pointer_ if the key is present in the tree (this can
  352. * be used to change the record pointer associated with the key); or NULL
  353. * otherwise.
  354. *
  355. * Note that a record can be deleted from a Patricia tree as follows:
  356. * void ** recp = patricia_lookup(tree, key, keylen);
  357. * if (recp != NULL)
  358. * *recp = NULL;
  359. * but this does not reduce the memory used by the tree as one might expect
  360. * from reducing its size.
  361. */
  362. void **
  363. patricia_lookup(PATRICIA * P, const uint8_t * key, size_t keylen)
  364. {
  365. struct pnode * n = P->root;
  366. uint8_t t0, t1; /* Garbage variables. */
  367. /* Traverse the tree until we find the key or give up. */
  368. do {
  369. /* Have we fallen off the bottom of the tree? */
  370. if (n == NULL)
  371. return (NULL);
  372. /* Is the node not a prefix of the key? */
  373. if (compare(n, key, keylen, &t0, &t1))
  374. return (NULL);
  375. /* Strip off the matching part of the key. */
  376. key += n->slen;
  377. keylen -= n->slen;
  378. /* Handle splitting on the 9th bit specially. */
  379. if (n->mask == 0) {
  380. /* Have we found the key? */
  381. if (keylen == 0) {
  382. /* Is there a record here? */
  383. if (n->left != NULL)
  384. return ((void **)(void *)&n->left);
  385. else
  386. return (NULL);
  387. }
  388. /* The key continues; traverse to right child. */
  389. n = n->right;
  390. continue;
  391. }
  392. /* Take left or right child depending upon critical bit. */
  393. if (key[0] & n->mask)
  394. n = n->right;
  395. else
  396. n = n->left;
  397. } while (1);
  398. }
  399. /**
  400. * patricia_foreach(tree, func, cookie):
  401. * Traverse the ${tree} in lexicographical order of stored keys, and call
  402. * ${func(cookie, key, keylen, rec)} for each (key, record) pair. Stop the
  403. * traversal early if ${func} returns a non-zero value; return zero, the
  404. * non-zero value returned by ${func}, or (-1) if an error occurs in the
  405. * tree traversal.
  406. */
  407. int
  408. patricia_foreach(PATRICIA * P, int func(void *, uint8_t *, size_t, void *),
  409. void * cookie)
  410. {
  411. uint8_t * keybuf;
  412. int rc;
  413. /* Allocate buffer to store keys generated during traversal. */
  414. keybuf = malloc(P->maxkey);
  415. if ((keybuf == NULL) && (P->maxkey > 0))
  416. return (-1);
  417. /* Call a recursive function to do all the work. */
  418. if (P->root != NULL)
  419. rc = foreach_internal(P->root, func, cookie, keybuf, 0);
  420. else
  421. rc = 0;
  422. /* Free temporary buffer. */
  423. free(keybuf);
  424. /* Return status from func calls. */
  425. return (rc);
  426. }
  427. /**
  428. * patricia_free(tree):
  429. * Free the ${tree}.
  430. */
  431. void
  432. patricia_free(PATRICIA * P)
  433. {
  434. /* Behave consistently with free(NULL). */
  435. if (P == NULL)
  436. return;
  437. /* Call a recursive function to free all the nodes. */
  438. if (P->root != NULL)
  439. free_internal(P->root);
  440. /* Free the tree structure. */
  441. free(P);
  442. }