look.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. /* Find the lines in a sorted file that start with a given string.
  2. Copyright (C) 1986 Richard Stallman
  3. NO WARRANTY
  4. BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
  5. NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
  6. WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
  7. RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
  8. WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
  9. BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  10. FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
  11. AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
  12. DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
  13. CORRECTION.
  14. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
  15. STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
  16. WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
  17. LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
  18. OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
  19. USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
  20. DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
  21. A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
  22. PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
  23. DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
  24. GENERAL PUBLIC LICENSE TO COPY
  25. 1. You may copy and distribute verbatim copies of this source file
  26. as you receive it, in any medium, provided that you conspicuously
  27. and appropriately publish on each copy a valid copyright notice
  28. "Copyright (C) 1986 Richard M. Stallman"; and include following the
  29. copyright notice a verbatim copy of the above disclaimer of warranty
  30. and of this License.
  31. 2. You may modify your copy or copies of this source file or
  32. any portion of it, and copy and distribute such modifications under
  33. the terms of Paragraph 1 above, provided that you also do the following:
  34. a) cause the modified files to carry prominent notices stating
  35. that you changed the files and the date of any change; and
  36. b) cause the whole of any work that you distribute or publish,
  37. that in whole or in part contains or is a derivative of this
  38. program or any part thereof, to be freely distributed
  39. and licensed to all third parties on terms identical to those
  40. contained in this License Agreement (except that you may choose
  41. to grant more extensive warranty protection to third parties,
  42. at your option).
  43. 3. You may copy and distribute this program or any portion of it in
  44. compiled, executable or object code form under the terms of Paragraphs
  45. 1 and 2 above provided that you do the following:
  46. a) cause each such copy to be accompanied by the
  47. corresponding machine-readable source code, which must
  48. be distributed under the terms of Paragraphs 1 and 2 above; or,
  49. b) cause each such copy to be accompanied by a
  50. written offer, with no time limit, to give any third party
  51. free (except for a nominal shipping charge) a machine readable
  52. copy of the corresponding source code, to be distributed
  53. under the terms of Paragraphs 1 and 2 above; or,
  54. c) in the case of a recipient of this program in compiled, executable
  55. or object code form (without the corresponding source code) you
  56. shall cause copies you distribute to be accompanied by a copy
  57. of the written offer of source code which you received along
  58. with the copy you received.
  59. 4. You may not copy, sublicense, distribute or transfer this program
  60. except as expressly provided under this License Agreement. Any attempt
  61. otherwise to copy, sublicense, distribute or transfer this program is void and
  62. your rights to use the program under this License agreement shall be
  63. automatically terminated. However, parties who have received computer
  64. software programs from you with this License Agreement will not have
  65. their licenses terminated so long as such parties remain in full compliance.
  66. In other words, you are welcome to use, share and improve this program.
  67. You are forbidden to forbid anyone else to use, share and improve
  68. what you give them. Help stamp out software-hoarding! */
  69. #include <stdio.h>
  70. #include <sys/types.h>
  71. #include <sys/stat.h>
  72. /* Nonzero means ignore characters other than letters, digits,
  73. tabs and spaces in both comparison and sorting */
  74. int dictionary_order;
  75. /* Nonzero means ignore case in comparing against `key'
  76. and also assume the file is sorted ignoring case. */
  77. int ignore_case;
  78. /* The string to be searched for (an argument) */
  79. char *key;
  80. /* Name of file to search, or 0 to use standard input. */
  81. char *default_file = "/usr/dict/words"; /* File used if none specd */
  82. char *filename;
  83. #define BUFSIZE 512
  84. char buffer[BUFSIZE];
  85. /* File address of the data now in buffer */
  86. long bufpos;
  87. /* Number of characters, starting at bufpos, now in buffer */
  88. int bufchars;
  89. /* Forward declarations */
  90. char *concat ();
  91. void fillbuf ();
  92. void read_before ();
  93. long trypos ();
  94. long findline ();
  95. void copylines ();
  96. main (argc, argv)
  97. int argc;
  98. char **argv;
  99. {
  100. FILE *stream;
  101. int desc;
  102. long pos;
  103. if (argc < 2)
  104. {
  105. fatal ("usage is look [-df] string [file]", 0);
  106. }
  107. if (argv[1][0] == '-')
  108. {
  109. char *p = argv[1] + 1;
  110. char c;
  111. if (argc < 3)
  112. fatal ("usage is look [-df] string [file]", 0);
  113. while (c = *p++)
  114. switch (c)
  115. {
  116. case 'd':
  117. dictionary_order = 1;
  118. break;
  119. case 'f':
  120. ignore_case = 1;
  121. break;
  122. default:
  123. fatal ("unknown switch %c", c);
  124. }
  125. key = argv[2];
  126. filename = argv[3]; /* Which is zero if there is no file */
  127. }
  128. else
  129. {
  130. key = argv[1];
  131. filename = argv[2]; /* which is zero if there is no file */
  132. }
  133. if (!filename)
  134. {
  135. filename = default_file;
  136. ignore_case = 1;
  137. dictionary_order = 1;
  138. }
  139. desc = open (filename, 0, 0);
  140. bufpos = -1; /* Indicate nothing buffered in core currently */
  141. /* Find position in file of first line that matches. */
  142. pos = findline (desc, key);
  143. if (pos < 0) exit (0); /* Give up if didn't find key */
  144. /* Now switch to ordinary stream input to read the matching lines */
  145. stream = fdopen (desc, "r");
  146. fseek (stream, pos, 0);
  147. /* Now copy out lines as long as they continue to match */
  148. copylines (stream, key);
  149. fclose (stream);
  150. }
  151. /* Binary search in the file for a line matching `key'. */
  152. long
  153. findline (desc, key)
  154. int desc;
  155. char *key;
  156. {
  157. long start;
  158. long end;
  159. long searchpoint;
  160. struct stat status;
  161. fstat (desc, &status); /* Read file status; in particular, the length */
  162. start = 0; /* Initialize binary search endpoints */
  163. end = status.st_size;
  164. searchpoint = 0; /* The meaning of searchpoint is this: */
  165. /* We already know no newline exists */
  166. /* between start and searchpoint. */
  167. do
  168. {
  169. long middle_line_start;
  170. long middle_line_searchpoint;
  171. int flag;
  172. int order;
  173. /* try to find a line starting between start and end.
  174. Since we know no line starts between start and searchpoint,
  175. it is ok to look for one between searchpoint and end. */
  176. flag = trybetween (desc, searchpoint, end,
  177. &middle_line_start, &middle_line_searchpoint);
  178. /* If region remaining is only one line, that's the one. */
  179. if (flag < 0) break;
  180. /* Compare this line with the key and decide following action */
  181. order = compare (desc, middle_line_start, key);
  182. if (order >= 0)
  183. {
  184. /* middle_line is beyond where we want */
  185. end = middle_line_start;
  186. }
  187. else
  188. {
  189. /* Where we want is beyond middle_line */
  190. start = middle_line_start;
  191. searchpoint = middle_line_searchpoint;
  192. }
  193. }
  194. while (searchpoint != end);
  195. /* It can happen that the line at `start' matches the key.
  196. This happens if the file's first line matches. */
  197. if (!compare (desc, start, key))
  198. return start;
  199. /* More often, the line at `end' is the one that we wanted .
  200. This is because, when we hit an exact match above,
  201. we set `end' to point at the matching line. */
  202. if (!compare (desc, end, key))
  203. return end;
  204. /* File has no match for `key'. */
  205. return -1;
  206. }
  207. /* Refill the buffer with data starting at `pos'. */
  208. void
  209. fillbuf (desc, pos)
  210. int desc;
  211. long pos;
  212. {
  213. bufpos = pos;
  214. lseek (desc, pos, 0);
  215. bufchars = read (desc, buffer, BUFSIZE);
  216. }
  217. /* Fill buffer with data up to position `pos'. */
  218. void
  219. read_before (desc, pos)
  220. int desc;
  221. long pos;
  222. {
  223. if (pos >= BUFSIZE)
  224. fillbuf (desc, pos - BUFSIZE);
  225. else
  226. fillbuf (desc, 0L);
  227. }
  228. /* Take a stab in the file at position `pos', and find
  229. the beginning of the line containing that position,
  230. or else return -1 if no start-of-line is found later than `start'.
  231. `start' should be less than `pos'. */
  232. long
  233. trypos (desc, start, pos)
  234. long pos;
  235. long start;
  236. int desc;
  237. {
  238. long try = pos;
  239. while (try > start)
  240. {
  241. char *p, *pstart;
  242. if (try <= bufpos || try > bufpos + bufchars)
  243. if (start > try - BUFSIZE)
  244. fillbuf (desc, start);
  245. else
  246. read_before (desc, try);
  247. if (start > bufpos)
  248. pstart = buffer + start - bufpos;
  249. else
  250. pstart = buffer;
  251. p = buffer + try - bufpos;
  252. while (p != pstart)
  253. if (*--p == '\n')
  254. return bufpos + (p - buffer) + 1;
  255. if (start >= bufpos)
  256. return -1;
  257. }
  258. return -1;
  259. }
  260. /* Find a line in the file starting between start and end, if possible.
  261. Try to find one midway between, but any line whose start is between is ok.
  262. If one is found, the value returned is positive
  263. and *lineposptr is set to the line's starting index in the file
  264. and *searchposptr is set to an index in the file at which we
  265. started searching back for the line's start.
  266. If no line is found, -1 is returned.
  267. This implies that there is no newline from start to end.
  268. The utility of *searchposptr is that the caller knows that
  269. no newlines exist between *lineposptr and there.
  270. This can sometimes be used to prevent searching that region
  271. over again on the next iteration of the binary search. */
  272. int
  273. trybetween (desc, start, end, lineposptr, searchposptr)
  274. int desc;
  275. long start;
  276. long end;
  277. long *lineposptr;
  278. long *searchposptr;
  279. {
  280. long guess = start;
  281. while (1)
  282. {
  283. long searchpoint = guess;
  284. long tem;
  285. guess = (guess + end + 1) / 2;
  286. if (guess == end) return -1;
  287. tem = trypos (desc, searchpoint, guess);
  288. if (tem >= 0)
  289. {
  290. *lineposptr = tem;
  291. *searchposptr = guess;
  292. return 0;
  293. }
  294. }
  295. }
  296. /* Compare the line starting at `pos' in the file with the string `key'.
  297. Assumes that the beginning of the line is in the buffer.
  298. Return 0 if they match, 1 if line is later, -1 if key is later. */
  299. int
  300. compare (desc, pos, key)
  301. int desc;
  302. long pos;
  303. char *key;
  304. {
  305. char *p = key;
  306. char *p1 = buffer + pos - bufpos;
  307. char *pe = buffer + bufchars;
  308. char ck = 1; /* Next char of key to compare */
  309. char cf; /* Next char of file to compare */
  310. while (ck)
  311. {
  312. int c;
  313. ck = *p++;
  314. if (dictionary_order)
  315. {
  316. while (ck && ck != ' ' && ck != '\t'
  317. && (ck < '0' || ck > '9')
  318. && (ck < 'a' || ck > 'z')
  319. && (ck < 'A' || ck > 'Z'))
  320. ck = *p++;
  321. }
  322. /* If beginning of line exactly matches the key, return "match".
  323. The binary search does not need to distinguish this case from an
  324. exact match, since it treats an exact match just like
  325. a line that is greater than the key.
  326. The final test for having found the key regards a zero value
  327. as meaning it was found. */
  328. if (!ck) return 0;
  329. if (p1 == pe)
  330. {
  331. fillbuf (desc, bufpos + bufchars);
  332. p1 = buffer;
  333. pe = buffer + bufchars;
  334. }
  335. if (p1 == pe) cf = 0;
  336. else cf = *p1++;
  337. if (dictionary_order)
  338. {
  339. while (cf && cf != '\n' && cf != ' ' && cf != '\t'
  340. && (cf < '0' || cf > '9')
  341. && (cf < 'a' || cf > 'z')
  342. && (cf < 'A' || cf > 'Z'))
  343. {
  344. if (p1 == pe)
  345. {
  346. fillbuf (desc, bufpos + bufchars);
  347. p1 = buffer;
  348. pe = buffer + bufchars;
  349. }
  350. if (p1 == pe) cf = 0;
  351. else cf = *p1++;
  352. }
  353. }
  354. if (ignore_case)
  355. {
  356. if (ck >= 'A' && ck <= 'Z')
  357. ck += 'a' - 'A';
  358. if (cf >= 'A' && cf <= 'Z')
  359. cf += 'a' - 'A';
  360. }
  361. if (cf == '\n') cf = 0; /* Treat newline as 0 for comparison */
  362. if (c = cf - ck)
  363. {
  364. if (c > 0) return 1;
  365. return -1;
  366. }
  367. }
  368. return 0;
  369. }
  370. /* Copy lines out of the stdio stream `stream'
  371. as long as they initially match `key'. */
  372. char *xbuffer; /* As a line is being tested, it is saved here */
  373. long xbufsize; /* xbuffer is enlarged as needed. Current size here. */
  374. void
  375. copylines (stream, key)
  376. FILE *stream;
  377. char *key;
  378. {
  379. int keylen = strlen (key);
  380. xbufsize = keylen;
  381. xbuffer = (char *) malloc (keylen);
  382. while (1)
  383. {
  384. long len;
  385. int c;
  386. /* Read and test one line of the file. */
  387. /* Read beginning of line into `xbuffer', comparing with `key'. */
  388. len = compare1 (key, stream);
  389. if (!len) break;
  390. fwrite (xbuffer, 1, len, stdout);
  391. do
  392. {
  393. c = getc (stream);
  394. putchar (c);
  395. }
  396. while (c != '\n');
  397. }
  398. }
  399. #define GROWBUF \
  400. { char *nbuf = (char *) realloc (xbuffer, xbufsize *= 2); \
  401. pb += (nbuf - xbuffer); \
  402. pbe += (nbuf - xbuffer); \
  403. xbuffer = nbuf; }
  404. /* Compare the string `key' against a line being read from `stream'
  405. and stored into `buffer' as it is read.
  406. The command switches control the kind of comparison used.
  407. If they are unequal, return 0.
  408. If they are equal, return the number of characters read. */
  409. int
  410. compare1 (key, stream)
  411. char *key;
  412. FILE *stream;
  413. {
  414. char *pk = key;
  415. char *pb = xbuffer;
  416. char *pbe = xbuffer + xbufsize;
  417. char c1 = 1, c2;
  418. while (c1)
  419. {
  420. /* Fetch one char from key and one from stream. */
  421. c1 = *pk++;
  422. if (!c1) return pb - xbuffer;
  423. c2 = getc (stream);
  424. *pb++ = c2;
  425. if (pb == pbe)
  426. GROWBUF;
  427. /* If desired, skip chars in each one until a letter, digit, space or tab */
  428. if (dictionary_order)
  429. {
  430. while (c1 != ' ' && c1 != '\t'
  431. && (c1 < '0' || c1 > '9')
  432. && (c1 < 'A' || c1 > 'Z')
  433. && (c1 < 'a' || c1 > 'z'))
  434. {
  435. c1 = *pk++;
  436. if (!c1) return pb - xbuffer;
  437. }
  438. while (c2 >= 0 && c2 != '\n' && c2 != ' ' && c2 != '\t'
  439. && (c2 < '0' || c2 > '9')
  440. && (c2 < 'A' || c2 > 'Z')
  441. && (c2 < 'a' || c2 > 'z'))
  442. {
  443. c2 = getc (stream);
  444. *pb++ = c2;
  445. if (pb == pbe)
  446. GROWBUF;
  447. }
  448. }
  449. if (ignore_case)
  450. {
  451. if (c1 >= 'A' && c1 <= 'Z')
  452. c1 += 'a' - 'A';
  453. if (c2 >= 'A' && c2 <= 'Z')
  454. c2 += 'a' - 'A';
  455. }
  456. if (c1 != c2) return 0;
  457. }
  458. return pb - xbuffer;
  459. }
  460. /* Print error message and exit. */
  461. fatal (s1, s2)
  462. char *s1, *s2;
  463. {
  464. error (s1, s2);
  465. exit (1);
  466. }
  467. /* Print error message. `s1' is printf control string, `s2' is arg for it. */
  468. error (s1, s2)
  469. char *s1, *s2;
  470. {
  471. printf ("look: ");
  472. printf (s1, s2);
  473. printf ("\n");
  474. }
  475. perror_with_name (name)
  476. char *name;
  477. {
  478. extern int errno, sys_nerr;
  479. extern char *sys_errlist[];
  480. char *s;
  481. if (errno < sys_nerr)
  482. s = concat ("", sys_errlist[errno], " for %s");
  483. else
  484. s = "cannot open %s";
  485. error (s, name);
  486. }
  487. /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
  488. char *
  489. concat (s1, s2, s3)
  490. char *s1, *s2, *s3;
  491. {
  492. int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
  493. char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
  494. strcpy (result, s1);
  495. strcpy (result + len1, s2);
  496. strcpy (result + len1 + len2, s3);
  497. *(result + len1 + len2 + len3) = 0;
  498. return result;
  499. }
  500. /* Like malloc but get fatal error if memory is exhausted. */
  501. int
  502. xmalloc (size)
  503. int size;
  504. {
  505. int result = malloc (size);
  506. if (!result)
  507. fatal ("virtual memory exhausted", 0);
  508. return result;
  509. }
  510. int
  511. xrealloc (ptr, size)
  512. char *ptr;
  513. int size;
  514. {
  515. int result = realloc (ptr, size);
  516. if (!result)
  517. fatal ("virtual memory exhausted");
  518. return result;
  519. }