cordbscs.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920
  1. /*
  2. * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
  3. *
  4. * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
  5. * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
  6. *
  7. * Permission is hereby granted to use or copy this program
  8. * for any purpose, provided the above notices are retained on all copies.
  9. * Permission to modify the code and to distribute modified code is granted,
  10. * provided the above notices are retained, and a notice that the code was
  11. * modified is included with the above copyright notice.
  12. *
  13. * Author: Hans-J. Boehm (boehm@parc.xerox.com)
  14. */
  15. /* Boehm, October 3, 1994 5:19 pm PDT */
  16. # include "gc.h"
  17. # include "cord.h"
  18. # include <stdlib.h>
  19. # include <stdio.h>
  20. # include <string.h>
  21. /* An implementation of the cord primitives. These are the only */
  22. /* Functions that understand the representation. We perform only */
  23. /* minimal checks on arguments to these functions. Out of bounds */
  24. /* arguments to the iteration functions may result in client functions */
  25. /* invoked on garbage data. In most cases, client functions should be */
  26. /* programmed defensively enough that this does not result in memory */
  27. /* smashes. */
  28. typedef void (* oom_fn)(void);
  29. oom_fn CORD_oom_fn = (oom_fn) 0;
  30. # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
  31. ABORT("Out of memory\n"); }
  32. # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
  33. typedef unsigned long word;
  34. typedef union {
  35. struct Concatenation {
  36. char null;
  37. char header;
  38. char depth; /* concatenation nesting depth. */
  39. unsigned char left_len;
  40. /* Length of left child if it is sufficiently */
  41. /* short; 0 otherwise. */
  42. # define MAX_LEFT_LEN 255
  43. word len;
  44. CORD left; /* length(left) > 0 */
  45. CORD right; /* length(right) > 0 */
  46. } concatenation;
  47. struct Function {
  48. char null;
  49. char header;
  50. char depth; /* always 0 */
  51. char left_len; /* always 0 */
  52. word len;
  53. CORD_fn fn;
  54. void * client_data;
  55. } function;
  56. struct Generic {
  57. char null;
  58. char header;
  59. char depth;
  60. char left_len;
  61. word len;
  62. } generic;
  63. char string[1];
  64. } CordRep;
  65. # define CONCAT_HDR 1
  66. # define FN_HDR 4
  67. # define SUBSTR_HDR 6
  68. /* Substring nodes are a special case of function nodes. */
  69. /* The client_data field is known to point to a substr_args */
  70. /* structure, and the function is either CORD_apply_access_fn */
  71. /* or CORD_index_access_fn. */
  72. /* The following may be applied only to function and concatenation nodes: */
  73. #define IS_CONCATENATION(s) (((CordRep *)s)->generic.header == CONCAT_HDR)
  74. #define IS_FUNCTION(s) ((((CordRep *)s)->generic.header & FN_HDR) != 0)
  75. #define IS_SUBSTR(s) (((CordRep *)s)->generic.header == SUBSTR_HDR)
  76. #define LEN(s) (((CordRep *)s) -> generic.len)
  77. #define DEPTH(s) (((CordRep *)s) -> generic.depth)
  78. #define GEN_LEN(s) (CORD_IS_STRING(s) ? strlen(s) : LEN(s))
  79. #define LEFT_LEN(c) ((c) -> left_len != 0? \
  80. (c) -> left_len \
  81. : (CORD_IS_STRING((c) -> left) ? \
  82. (c) -> len - GEN_LEN((c) -> right) \
  83. : LEN((c) -> left)))
  84. #define SHORT_LIMIT (sizeof(CordRep) - 1)
  85. /* Cords shorter than this are C strings */
  86. /* Dump the internal representation of x to stdout, with initial */
  87. /* indentation level n. */
  88. void CORD_dump_inner(CORD x, unsigned n)
  89. {
  90. register size_t i;
  91. for (i = 0; i < (size_t)n; i++) {
  92. fputs(" ", stdout);
  93. }
  94. if (x == 0) {
  95. fputs("NIL\n", stdout);
  96. } else if (CORD_IS_STRING(x)) {
  97. for (i = 0; i <= SHORT_LIMIT; i++) {
  98. if (x[i] == '\0') break;
  99. putchar(x[i]);
  100. }
  101. if (x[i] != '\0') fputs("...", stdout);
  102. putchar('\n');
  103. } else if (IS_CONCATENATION(x)) {
  104. register struct Concatenation * conc =
  105. &(((CordRep *)x) -> concatenation);
  106. printf("Concatenation: %p (len: %d, depth: %d)\n",
  107. x, (int)(conc -> len), (int)(conc -> depth));
  108. CORD_dump_inner(conc -> left, n+1);
  109. CORD_dump_inner(conc -> right, n+1);
  110. } else /* function */{
  111. register struct Function * func =
  112. &(((CordRep *)x) -> function);
  113. if (IS_SUBSTR(x)) printf("(Substring) ");
  114. printf("Function: %p (len: %d): ", x, (int)(func -> len));
  115. for (i = 0; i < 20 && i < func -> len; i++) {
  116. putchar((*(func -> fn))(i, func -> client_data));
  117. }
  118. if (i < func -> len) fputs("...", stdout);
  119. putchar('\n');
  120. }
  121. }
  122. /* Dump the internal representation of x to stdout */
  123. void CORD_dump(CORD x)
  124. {
  125. CORD_dump_inner(x, 0);
  126. fflush(stdout);
  127. }
  128. CORD CORD_cat_char_star(CORD x, const char * y, size_t leny)
  129. {
  130. register size_t result_len;
  131. register size_t lenx;
  132. register int depth;
  133. if (x == CORD_EMPTY) return(y);
  134. if (leny == 0) return(x);
  135. if (CORD_IS_STRING(x)) {
  136. lenx = strlen(x);
  137. result_len = lenx + leny;
  138. if (result_len <= SHORT_LIMIT) {
  139. register char * result = GC_MALLOC_ATOMIC(result_len+1);
  140. if (result == 0) OUT_OF_MEMORY;
  141. memcpy(result, x, lenx);
  142. memcpy(result + lenx, y, leny);
  143. result[result_len] = '\0';
  144. return((CORD) result);
  145. } else {
  146. depth = 1;
  147. }
  148. } else {
  149. register CORD right;
  150. register CORD left;
  151. register char * new_right;
  152. register size_t right_len;
  153. lenx = LEN(x);
  154. if (leny <= SHORT_LIMIT/2
  155. && IS_CONCATENATION(x)
  156. && CORD_IS_STRING(right = ((CordRep *)x) -> concatenation.right)) {
  157. /* Merge y into right part of x. */
  158. if (!CORD_IS_STRING(left = ((CordRep *)x) -> concatenation.left)) {
  159. right_len = lenx - LEN(left);
  160. } else if (((CordRep *)x) -> concatenation.left_len != 0) {
  161. right_len = lenx - ((CordRep *)x) -> concatenation.left_len;
  162. } else {
  163. right_len = strlen(right);
  164. }
  165. result_len = right_len + leny; /* length of new_right */
  166. if (result_len <= SHORT_LIMIT) {
  167. new_right = GC_MALLOC_ATOMIC(result_len + 1);
  168. memcpy(new_right, right, right_len);
  169. memcpy(new_right + right_len, y, leny);
  170. new_right[result_len] = '\0';
  171. y = new_right;
  172. leny = result_len;
  173. x = left;
  174. lenx -= right_len;
  175. /* Now fall through to concatenate the two pieces: */
  176. }
  177. if (CORD_IS_STRING(x)) {
  178. depth = 1;
  179. } else {
  180. depth = DEPTH(x) + 1;
  181. }
  182. } else {
  183. depth = DEPTH(x) + 1;
  184. }
  185. result_len = lenx + leny;
  186. }
  187. {
  188. /* The general case; lenx, result_len is known: */
  189. register struct Concatenation * result;
  190. result = GC_NEW(struct Concatenation);
  191. if (result == 0) OUT_OF_MEMORY;
  192. result->header = CONCAT_HDR;
  193. result->depth = depth;
  194. if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
  195. result->len = result_len;
  196. result->left = x;
  197. result->right = y;
  198. if (depth >= MAX_DEPTH) {
  199. return(CORD_balance((CORD)result));
  200. } else {
  201. return((CORD) result);
  202. }
  203. }
  204. }
  205. CORD CORD_cat(CORD x, CORD y)
  206. {
  207. register size_t result_len;
  208. register int depth;
  209. register size_t lenx;
  210. if (x == CORD_EMPTY) return(y);
  211. if (y == CORD_EMPTY) return(x);
  212. if (CORD_IS_STRING(y)) {
  213. return(CORD_cat_char_star(x, y, strlen(y)));
  214. } else if (CORD_IS_STRING(x)) {
  215. lenx = strlen(x);
  216. depth = DEPTH(y) + 1;
  217. } else {
  218. register int depthy = DEPTH(y);
  219. lenx = LEN(x);
  220. depth = DEPTH(x) + 1;
  221. if (depthy >= depth) depth = depthy + 1;
  222. }
  223. result_len = lenx + LEN(y);
  224. {
  225. register struct Concatenation * result;
  226. result = GC_NEW(struct Concatenation);
  227. if (result == 0) OUT_OF_MEMORY;
  228. result->header = CONCAT_HDR;
  229. result->depth = depth;
  230. if (lenx <= MAX_LEFT_LEN) result->left_len = lenx;
  231. result->len = result_len;
  232. result->left = x;
  233. result->right = y;
  234. if (depth >= MAX_DEPTH) {
  235. return(CORD_balance((CORD)result));
  236. } else {
  237. return((CORD) result);
  238. }
  239. }
  240. }
  241. CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len)
  242. {
  243. if (len <= 0) return(0);
  244. if (len <= SHORT_LIMIT) {
  245. register char * result;
  246. register size_t i;
  247. char buf[SHORT_LIMIT+1];
  248. register char c;
  249. for (i = 0; i < len; i++) {
  250. c = (*fn)(i, client_data);
  251. if (c == '\0') goto gen_case;
  252. buf[i] = c;
  253. }
  254. buf[i] = '\0';
  255. result = GC_MALLOC_ATOMIC(len+1);
  256. if (result == 0) OUT_OF_MEMORY;
  257. strcpy(result, buf);
  258. result[len] = '\0';
  259. return((CORD) result);
  260. }
  261. gen_case:
  262. {
  263. register struct Function * result;
  264. result = GC_NEW(struct Function);
  265. if (result == 0) OUT_OF_MEMORY;
  266. result->header = FN_HDR;
  267. /* depth is already 0 */
  268. result->len = len;
  269. result->fn = fn;
  270. result->client_data = client_data;
  271. return((CORD) result);
  272. }
  273. }
  274. size_t CORD_len(CORD x)
  275. {
  276. if (x == 0) {
  277. return(0);
  278. } else {
  279. return(GEN_LEN(x));
  280. }
  281. }
  282. struct substr_args {
  283. CordRep * sa_cord;
  284. size_t sa_index;
  285. };
  286. char CORD_index_access_fn(size_t i, void * client_data)
  287. {
  288. register struct substr_args *descr = (struct substr_args *)client_data;
  289. return(((char *)(descr->sa_cord))[i + descr->sa_index]);
  290. }
  291. char CORD_apply_access_fn(size_t i, void * client_data)
  292. {
  293. register struct substr_args *descr = (struct substr_args *)client_data;
  294. register struct Function * fn_cord = &(descr->sa_cord->function);
  295. return((*(fn_cord->fn))(i + descr->sa_index, fn_cord->client_data));
  296. }
  297. /* A version of CORD_substr that simply returns a function node, thus */
  298. /* postponing its work. The fourth argument is a function that may */
  299. /* be used for efficient access to the ith character. */
  300. /* Assumes i >= 0 and i + n < length(x). */
  301. CORD CORD_substr_closure(CORD x, size_t i, size_t n, CORD_fn f)
  302. {
  303. register struct substr_args * sa = GC_NEW(struct substr_args);
  304. CORD result;
  305. if (sa == 0) OUT_OF_MEMORY;
  306. sa->sa_cord = (CordRep *)x;
  307. sa->sa_index = i;
  308. result = CORD_from_fn(f, (void *)sa, n);
  309. ((CordRep *)result) -> function.header = SUBSTR_HDR;
  310. return (result);
  311. }
  312. # define SUBSTR_LIMIT (10 * SHORT_LIMIT)
  313. /* Substrings of function nodes and flat strings shorter than */
  314. /* this are flat strings. Othewise we use a functional */
  315. /* representation, which is significantly slower to access. */
  316. /* A version of CORD_substr that assumes i >= 0, n > 0, and i + n < length(x).*/
  317. CORD CORD_substr_checked(CORD x, size_t i, size_t n)
  318. {
  319. if (CORD_IS_STRING(x)) {
  320. if (n > SUBSTR_LIMIT) {
  321. return(CORD_substr_closure(x, i, n, CORD_index_access_fn));
  322. } else {
  323. register char * result = GC_MALLOC_ATOMIC(n+1);
  324. if (result == 0) OUT_OF_MEMORY;
  325. strncpy(result, x+i, n);
  326. result[n] = '\0';
  327. return(result);
  328. }
  329. } else if (IS_CONCATENATION(x)) {
  330. register struct Concatenation * conc
  331. = &(((CordRep *)x) -> concatenation);
  332. register size_t left_len;
  333. register size_t right_len;
  334. left_len = LEFT_LEN(conc);
  335. right_len = conc -> len - left_len;
  336. if (i >= left_len) {
  337. if (n == right_len) return(conc -> right);
  338. return(CORD_substr_checked(conc -> right, i - left_len, n));
  339. } else if (i+n <= left_len) {
  340. if (n == left_len) return(conc -> left);
  341. return(CORD_substr_checked(conc -> left, i, n));
  342. } else {
  343. /* Need at least one character from each side. */
  344. register CORD left_part;
  345. register CORD right_part;
  346. register size_t left_part_len = left_len - i;
  347. if (i == 0) {
  348. left_part = conc -> left;
  349. } else {
  350. left_part = CORD_substr_checked(conc -> left, i, left_part_len);
  351. }
  352. if (i + n == right_len + left_len) {
  353. right_part = conc -> right;
  354. } else {
  355. right_part = CORD_substr_checked(conc -> right, 0,
  356. n - left_part_len);
  357. }
  358. return(CORD_cat(left_part, right_part));
  359. }
  360. } else /* function */ {
  361. if (n > SUBSTR_LIMIT) {
  362. if (IS_SUBSTR(x)) {
  363. /* Avoid nesting substring nodes. */
  364. register struct Function * f = &(((CordRep *)x) -> function);
  365. register struct substr_args *descr =
  366. (struct substr_args *)(f -> client_data);
  367. return(CORD_substr_closure((CORD)descr->sa_cord,
  368. i + descr->sa_index,
  369. n, f -> fn));
  370. } else {
  371. return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
  372. }
  373. } else {
  374. char * result;
  375. register struct Function * f = &(((CordRep *)x) -> function);
  376. char buf[SUBSTR_LIMIT+1];
  377. register char * p = buf;
  378. register char c;
  379. register int j;
  380. register int lim = i + n;
  381. for (j = i; j < lim; j++) {
  382. c = (*(f -> fn))(j, f -> client_data);
  383. if (c == '\0') {
  384. return(CORD_substr_closure(x, i, n, CORD_apply_access_fn));
  385. }
  386. *p++ = c;
  387. }
  388. *p = '\0';
  389. result = GC_MALLOC_ATOMIC(n+1);
  390. if (result == 0) OUT_OF_MEMORY;
  391. strcpy(result, buf);
  392. return(result);
  393. }
  394. }
  395. }
  396. CORD CORD_substr(CORD x, size_t i, size_t n)
  397. {
  398. register size_t len = CORD_len(x);
  399. if (i >= len || n <= 0) return(0);
  400. /* n < 0 is impossible in a correct C implementation, but */
  401. /* quite possible under SunOS 4.X. */
  402. if (i + n > len) n = len - i;
  403. # ifndef __STDC__
  404. if (i < 0) ABORT("CORD_substr: second arg. negative");
  405. /* Possible only if both client and C implementation are buggy. */
  406. /* But empirically this happens frequently. */
  407. # endif
  408. return(CORD_substr_checked(x, i, n));
  409. }
  410. /* See cord.h for definition. We assume i is in range. */
  411. int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
  412. CORD_batched_iter_fn f2, void * client_data)
  413. {
  414. if (x == 0) return(0);
  415. if (CORD_IS_STRING(x)) {
  416. register const char *p = x+i;
  417. if (*p == '\0') ABORT("2nd arg to CORD_iter5 too big");
  418. if (f2 != CORD_NO_FN) {
  419. return((*f2)(p, client_data));
  420. } else {
  421. while (*p) {
  422. if ((*f1)(*p, client_data)) return(1);
  423. p++;
  424. }
  425. return(0);
  426. }
  427. } else if (IS_CONCATENATION(x)) {
  428. register struct Concatenation * conc
  429. = &(((CordRep *)x) -> concatenation);
  430. if (i > 0) {
  431. register size_t left_len = LEFT_LEN(conc);
  432. if (i >= left_len) {
  433. return(CORD_iter5(conc -> right, i - left_len, f1, f2,
  434. client_data));
  435. }
  436. }
  437. if (CORD_iter5(conc -> left, i, f1, f2, client_data)) {
  438. return(1);
  439. }
  440. return(CORD_iter5(conc -> right, 0, f1, f2, client_data));
  441. } else /* function */ {
  442. register struct Function * f = &(((CordRep *)x) -> function);
  443. register size_t j;
  444. register size_t lim = f -> len;
  445. for (j = i; j < lim; j++) {
  446. if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
  447. return(1);
  448. }
  449. }
  450. return(0);
  451. }
  452. }
  453. #undef CORD_iter
  454. int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data)
  455. {
  456. return(CORD_iter5(x, 0, f1, CORD_NO_FN, client_data));
  457. }
  458. int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data)
  459. {
  460. if (x == 0) return(0);
  461. if (CORD_IS_STRING(x)) {
  462. register const char *p = x + i;
  463. register char c;
  464. for(;;) {
  465. c = *p;
  466. if (c == '\0') ABORT("2nd arg to CORD_riter4 too big");
  467. if ((*f1)(c, client_data)) return(1);
  468. if (p == x) break;
  469. p--;
  470. }
  471. return(0);
  472. } else if (IS_CONCATENATION(x)) {
  473. register struct Concatenation * conc
  474. = &(((CordRep *)x) -> concatenation);
  475. register CORD left_part = conc -> left;
  476. register size_t left_len;
  477. left_len = LEFT_LEN(conc);
  478. if (i >= left_len) {
  479. if (CORD_riter4(conc -> right, i - left_len, f1, client_data)) {
  480. return(1);
  481. }
  482. return(CORD_riter4(left_part, left_len - 1, f1, client_data));
  483. } else {
  484. return(CORD_riter4(left_part, i, f1, client_data));
  485. }
  486. } else /* function */ {
  487. register struct Function * f = &(((CordRep *)x) -> function);
  488. register size_t j;
  489. for (j = i; ; j--) {
  490. if ((*f1)((*(f -> fn))(j, f -> client_data), client_data)) {
  491. return(1);
  492. }
  493. if (j == 0) return(0);
  494. }
  495. }
  496. }
  497. int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data)
  498. {
  499. return(CORD_riter4(x, CORD_len(x) - 1, f1, client_data));
  500. }
  501. /*
  502. * The following functions are concerned with balancing cords.
  503. * Strategy:
  504. * Scan the cord from left to right, keeping the cord scanned so far
  505. * as a forest of balanced trees of exponentialy decreasing length.
  506. * When a new subtree needs to be added to the forest, we concatenate all
  507. * shorter ones to the new tree in the appropriate order, and then insert
  508. * the result into the forest.
  509. * Crucial invariants:
  510. * 1. The concatenation of the forest (in decreasing order) with the
  511. * unscanned part of the rope is equal to the rope being balanced.
  512. * 2. All trees in the forest are balanced.
  513. * 3. forest[i] has depth at most i.
  514. */
  515. typedef struct {
  516. CORD c;
  517. size_t len; /* Actual length of c */
  518. } ForestElement;
  519. static size_t min_len [ MAX_DEPTH ];
  520. static int min_len_init = 0;
  521. int CORD_max_len;
  522. typedef ForestElement Forest [ MAX_DEPTH ];
  523. /* forest[i].len >= fib(i+1) */
  524. /* The string is the concatenation */
  525. /* of the forest in order of DECREASING */
  526. /* indices. */
  527. void CORD_init_min_len()
  528. {
  529. register int i;
  530. register size_t last, previous, current;
  531. min_len[0] = previous = 1;
  532. min_len[1] = last = 2;
  533. for (i = 2; i < MAX_DEPTH; i++) {
  534. current = last + previous;
  535. if (current < last) /* overflow */ current = last;
  536. min_len[i] = current;
  537. previous = last;
  538. last = current;
  539. }
  540. CORD_max_len = last - 1;
  541. min_len_init = 1;
  542. }
  543. void CORD_init_forest(ForestElement * forest, size_t max_len)
  544. {
  545. register int i;
  546. for (i = 0; i < MAX_DEPTH; i++) {
  547. forest[i].c = 0;
  548. if (min_len[i] > max_len) return;
  549. }
  550. ABORT("Cord too long");
  551. }
  552. /* Add a leaf to the appropriate level in the forest, cleaning */
  553. /* out lower levels as necessary. */
  554. /* Also works if x is a balanced tree of concatenations; however */
  555. /* in this case an extra concatenation node may be inserted above x; */
  556. /* This node should not be counted in the statement of the invariants. */
  557. void CORD_add_forest(ForestElement * forest, CORD x, size_t len)
  558. {
  559. register int i = 0;
  560. register CORD sum = CORD_EMPTY;
  561. register size_t sum_len = 0;
  562. while (len > min_len[i + 1]) {
  563. if (forest[i].c != 0) {
  564. sum = CORD_cat(forest[i].c, sum);
  565. sum_len += forest[i].len;
  566. forest[i].c = 0;
  567. }
  568. i++;
  569. }
  570. /* Sum has depth at most 1 greter than what would be required */
  571. /* for balance. */
  572. sum = CORD_cat(sum, x);
  573. sum_len += len;
  574. /* If x was a leaf, then sum is now balanced. To see this */
  575. /* consider the two cases in which forest[i-1] either is or is */
  576. /* not empty. */
  577. while (sum_len >= min_len[i]) {
  578. if (forest[i].c != 0) {
  579. sum = CORD_cat(forest[i].c, sum);
  580. sum_len += forest[i].len;
  581. /* This is again balanced, since sum was balanced, and has */
  582. /* allowable depth that differs from i by at most 1. */
  583. forest[i].c = 0;
  584. }
  585. i++;
  586. }
  587. i--;
  588. forest[i].c = sum;
  589. forest[i].len = sum_len;
  590. }
  591. CORD CORD_concat_forest(ForestElement * forest, size_t expected_len)
  592. {
  593. register int i = 0;
  594. CORD sum = 0;
  595. size_t sum_len = 0;
  596. while (sum_len != expected_len) {
  597. if (forest[i].c != 0) {
  598. sum = CORD_cat(forest[i].c, sum);
  599. sum_len += forest[i].len;
  600. }
  601. i++;
  602. }
  603. return(sum);
  604. }
  605. /* Insert the frontier of x into forest. Balanced subtrees are */
  606. /* treated as leaves. This potentially adds one to the depth */
  607. /* of the final tree. */
  608. void CORD_balance_insert(CORD x, size_t len, ForestElement * forest)
  609. {
  610. register int depth;
  611. if (CORD_IS_STRING(x)) {
  612. CORD_add_forest(forest, x, len);
  613. } else if (IS_CONCATENATION(x)
  614. && ((depth = DEPTH(x)) >= MAX_DEPTH
  615. || len < min_len[depth])) {
  616. register struct Concatenation * conc
  617. = &(((CordRep *)x) -> concatenation);
  618. size_t left_len = LEFT_LEN(conc);
  619. CORD_balance_insert(conc -> left, left_len, forest);
  620. CORD_balance_insert(conc -> right, len - left_len, forest);
  621. } else /* function or balanced */ {
  622. CORD_add_forest(forest, x, len);
  623. }
  624. }
  625. CORD CORD_balance(CORD x)
  626. {
  627. Forest forest;
  628. register size_t len;
  629. if (x == 0) return(0);
  630. if (CORD_IS_STRING(x)) return(x);
  631. if (!min_len_init) CORD_init_min_len();
  632. len = LEN(x);
  633. CORD_init_forest(forest, len);
  634. CORD_balance_insert(x, len, forest);
  635. return(CORD_concat_forest(forest, len));
  636. }
  637. /* Position primitives */
  638. /* Private routines to deal with the hard cases only: */
  639. /* P contains a prefix of the path to cur_pos. Extend it to a full */
  640. /* path and set up leaf info. */
  641. /* Return 0 if past the end of cord, 1 o.w. */
  642. void CORD__extend_path(register CORD_pos p)
  643. {
  644. register struct CORD_pe * current_pe = &(p[0].path[p[0].path_len]);
  645. register CORD top = current_pe -> pe_cord;
  646. register size_t pos = p[0].cur_pos;
  647. register size_t top_pos = current_pe -> pe_start_pos;
  648. register size_t top_len = GEN_LEN(top);
  649. /* Fill in the rest of the path. */
  650. while(!CORD_IS_STRING(top) && IS_CONCATENATION(top)) {
  651. register struct Concatenation * conc =
  652. &(((CordRep *)top) -> concatenation);
  653. register size_t left_len;
  654. left_len = LEFT_LEN(conc);
  655. current_pe++;
  656. if (pos >= top_pos + left_len) {
  657. current_pe -> pe_cord = top = conc -> right;
  658. current_pe -> pe_start_pos = top_pos = top_pos + left_len;
  659. top_len -= left_len;
  660. } else {
  661. current_pe -> pe_cord = top = conc -> left;
  662. current_pe -> pe_start_pos = top_pos;
  663. top_len = left_len;
  664. }
  665. p[0].path_len++;
  666. }
  667. /* Fill in leaf description for fast access. */
  668. if (CORD_IS_STRING(top)) {
  669. p[0].cur_leaf = top;
  670. p[0].cur_start = top_pos;
  671. p[0].cur_end = top_pos + top_len;
  672. } else {
  673. p[0].cur_end = 0;
  674. }
  675. if (pos >= top_pos + top_len) p[0].path_len = CORD_POS_INVALID;
  676. }
  677. char CORD__pos_fetch(register CORD_pos p)
  678. {
  679. /* Leaf is a function node */
  680. struct CORD_pe * pe = &((p)[0].path[(p)[0].path_len]);
  681. CORD leaf = pe -> pe_cord;
  682. register struct Function * f = &(((CordRep *)leaf) -> function);
  683. if (!IS_FUNCTION(leaf)) ABORT("CORD_pos_fetch: bad leaf");
  684. return ((*(f -> fn))(p[0].cur_pos - pe -> pe_start_pos, f -> client_data));
  685. }
  686. void CORD__next(register CORD_pos p)
  687. {
  688. register size_t cur_pos = p[0].cur_pos + 1;
  689. register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
  690. register CORD leaf = current_pe -> pe_cord;
  691. /* Leaf is not a string or we're at end of leaf */
  692. p[0].cur_pos = cur_pos;
  693. if (!CORD_IS_STRING(leaf)) {
  694. /* Function leaf */
  695. register struct Function * f = &(((CordRep *)leaf) -> function);
  696. register size_t start_pos = current_pe -> pe_start_pos;
  697. register size_t end_pos = start_pos + f -> len;
  698. if (cur_pos < end_pos) {
  699. /* Fill cache and return. */
  700. register size_t i;
  701. register size_t limit = cur_pos + FUNCTION_BUF_SZ;
  702. register CORD_fn fn = f -> fn;
  703. register void * client_data = f -> client_data;
  704. if (limit > end_pos) {
  705. limit = end_pos;
  706. }
  707. for (i = cur_pos; i < limit; i++) {
  708. p[0].function_buf[i - cur_pos] =
  709. (*fn)(i - start_pos, client_data);
  710. }
  711. p[0].cur_start = cur_pos;
  712. p[0].cur_leaf = p[0].function_buf;
  713. p[0].cur_end = limit;
  714. return;
  715. }
  716. }
  717. /* End of leaf */
  718. /* Pop the stack until we find two concatenation nodes with the */
  719. /* same start position: this implies we were in left part. */
  720. {
  721. while (p[0].path_len > 0
  722. && current_pe[0].pe_start_pos != current_pe[-1].pe_start_pos) {
  723. p[0].path_len--;
  724. current_pe--;
  725. }
  726. if (p[0].path_len == 0) {
  727. p[0].path_len = CORD_POS_INVALID;
  728. return;
  729. }
  730. }
  731. p[0].path_len--;
  732. CORD__extend_path(p);
  733. }
  734. void CORD__prev(register CORD_pos p)
  735. {
  736. register struct CORD_pe * pe = &(p[0].path[p[0].path_len]);
  737. if (p[0].cur_pos == 0) {
  738. p[0].path_len = CORD_POS_INVALID;
  739. return;
  740. }
  741. p[0].cur_pos--;
  742. if (p[0].cur_pos >= pe -> pe_start_pos) return;
  743. /* Beginning of leaf */
  744. /* Pop the stack until we find two concatenation nodes with the */
  745. /* different start position: this implies we were in right part. */
  746. {
  747. register struct CORD_pe * current_pe = &((p)[0].path[(p)[0].path_len]);
  748. while (p[0].path_len > 0
  749. && current_pe[0].pe_start_pos == current_pe[-1].pe_start_pos) {
  750. p[0].path_len--;
  751. current_pe--;
  752. }
  753. }
  754. p[0].path_len--;
  755. CORD__extend_path(p);
  756. }
  757. #undef CORD_pos_fetch
  758. #undef CORD_next
  759. #undef CORD_prev
  760. #undef CORD_pos_to_index
  761. #undef CORD_pos_to_cord
  762. #undef CORD_pos_valid
  763. char CORD_pos_fetch(register CORD_pos p)
  764. {
  765. if (p[0].cur_start <= p[0].cur_pos && p[0].cur_pos < p[0].cur_end) {
  766. return(p[0].cur_leaf[p[0].cur_pos - p[0].cur_start]);
  767. } else {
  768. return(CORD__pos_fetch(p));
  769. }
  770. }
  771. void CORD_next(CORD_pos p)
  772. {
  773. if (p[0].cur_pos < p[0].cur_end - 1) {
  774. p[0].cur_pos++;
  775. } else {
  776. CORD__next(p);
  777. }
  778. }
  779. void CORD_prev(CORD_pos p)
  780. {
  781. if (p[0].cur_end != 0 && p[0].cur_pos > p[0].cur_start) {
  782. p[0].cur_pos--;
  783. } else {
  784. CORD__prev(p);
  785. }
  786. }
  787. size_t CORD_pos_to_index(CORD_pos p)
  788. {
  789. return(p[0].cur_pos);
  790. }
  791. CORD CORD_pos_to_cord(CORD_pos p)
  792. {
  793. return(p[0].path[0].pe_cord);
  794. }
  795. int CORD_pos_valid(CORD_pos p)
  796. {
  797. return(p[0].path_len != CORD_POS_INVALID);
  798. }
  799. void CORD_set_pos(CORD_pos p, CORD x, size_t i)
  800. {
  801. if (x == CORD_EMPTY) {
  802. p[0].path_len = CORD_POS_INVALID;
  803. return;
  804. }
  805. p[0].path[0].pe_cord = x;
  806. p[0].path[0].pe_start_pos = 0;
  807. p[0].path_len = 0;
  808. p[0].cur_pos = i;
  809. CORD__extend_path(p);
  810. }