grep.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. /* grep - search files for regular expression matches.
  2. Copyright (C) 1985 Dave Curry
  3. This program is distributed in the hope that it will be useful,
  4. but without any warranty. No author or distributor
  5. accepts responsibility to anyone for the consequences of using it
  6. or for whether it serves any particular purpose or works at all,
  7. unless he says so in writing.
  8. Permission is granted to anyone to distribute verbatim copies
  9. of this program's source code as received, in any medium, provided that
  10. the copyright notice, the nonwarraty notice above
  11. and this permission notice are preserved,
  12. and that the distributor grants the recipient all rights
  13. for further redistribution as permitted by this notice,
  14. and informs him of these rights.
  15. Permission is granted to distribute modified versions of this
  16. program's source code, or of portions of it, under the above
  17. conditions, plus the conditions that all changed files carry
  18. prominent notices stating who last changed them and that the
  19. derived material, including anything packaged together with it and
  20. conceptually functioning as a modification of it rather than an
  21. application of it, is in its entirety subject to a permission
  22. notice identical to this one.
  23. Permission is granted to distribute this program (verbatim or
  24. as modified) in compiled or executable form, provided verbatim
  25. redistribution is permitted as stated above for source code, and
  26. A. it is accompanied by the corresponding machine-readable
  27. source code, under the above conditions, or
  28. B. it is accompanied by a written offer, with no time limit,
  29. to distribute the corresponding machine-readable source code,
  30. under the above conditions, to any one, in return for reimbursement
  31. of the cost of distribution. Verbatim redistribution of the
  32. written offer must be permitted. Or,
  33. C. it is distributed by someone who received only the
  34. compiled or executable form, and is accompanied by a copy of the
  35. written offer of source code which he received along with it.
  36. In other words, you are welcome to use, share and improve this program.
  37. You are forbidden to forbid anyone else to use, share and improve
  38. what you give them. Help stamp out software-hoarding! */
  39. /*
  40. * cgrep - context grep
  41. *
  42. * This program is very similar to grep, except that it prints lines
  43. * of context around matching lines. The default is one line before
  44. * and one line after, but this may be changed.
  45. *
  46. * Flags understood are:
  47. *
  48. * -A # print # lines of context before matching lines
  49. * -B # print # lines of context after matching lines
  50. * -C turn on context grep (above flags do it too)
  51. * -E print in modified -n format for EMACS (JOVE) such that
  52. * only the matching lines (and not the context lines) are
  53. * parsed by the parse-some-errors command.
  54. * -F print the file name at the beginning of each matching line
  55. * -L do not separate matches with '-----' or blank lines
  56. * -b print block numbers in front of lines
  57. * -c count matching lines only
  58. * -e next argument is expression (for exprs starting with '-')
  59. * -f read expression from file
  60. * -i ignore case
  61. * -l print names of files containing matching lines only
  62. * -n number lines on output
  63. * -s silent mode - error messages only
  64. * -v all lines but those matching are printed (grep only)
  65. * -w match expression as if surrounded by \<...\>
  66. * -? print usage message and special characters
  67. *
  68. * David A. Curry, 7/9/84
  69. * davy@purdue-ecn
  70. * decvax!pur-ee!davy
  71. *
  72. * 4/20/85 - added -C flag, made -# and +# turn on cgrep. Cleaned up
  73. * and converted to use GNU regex routines.
  74. */
  75. #include <sys/types.h>
  76. #include <sys/stat.h>
  77. #include <sys/file.h>
  78. #include <stdio.h>
  79. #include <ctype.h>
  80. #include "regex.h"
  81. #define LBSIZE 256 /* starting size of buffers */
  82. #define EXPSIZE 256 /* starting regexp buffer size */
  83. #define NCONTEXT 1 /* default # of context lines */
  84. #ifndef BYTEWIDTH
  85. #define BYTEWIDTH 8
  86. #endif
  87. /*
  88. * We save (2 * ncontext + 1) lines at a time in
  89. * structures like these.
  90. */
  91. struct line {
  92. long len; /* length of this line */
  93. long size; /* size of this buffer */
  94. long blkno; /* block number of line */
  95. long lineno; /* line number in the file */
  96. char *linebuf; /* the line itself */
  97. };
  98. char print_context;
  99. char print_for_emacs;
  100. char print_line_file_names;
  101. char dont_print_separators;
  102. char print_block_numbers;
  103. char print_count_only;
  104. char ignore_case;
  105. char only_mention_file_if_match;
  106. char print_line_numbers;
  107. char silent;
  108. char negate_condition;
  109. char match_as_word;
  110. int nfile; /* number of files to do */
  111. int nsucc; /* 1 if found anything */
  112. long nmatch; /* number of matching lines */
  113. long nbytes; /* number of bytes read */
  114. long lineno; /* line number in the file */
  115. long nlines; /* number of lines saved */
  116. int retcode; /* exit code */
  117. int input_fd; /* input file descriptor */
  118. int blocksize; /* block size of input file */
  119. char *readbufp; /* ptr to next char to fetch */
  120. int readbufcount; /* number of characters of */
  121. /* buffer actually filled with */
  122. /* data. -1 means have yet to */
  123. /* fill it; 0 = end of file. */
  124. char readbuf[8192]; /* buffer for reading from file */
  125. int precontext;
  126. int aftcontext;
  127. char *expression; /* the compiled expression */
  128. struct line **lines; /* the array of lines we save */
  129. struct re_pattern_buffer re_comp_buf = {0,};
  130. char re_comp_fastmap[256];
  131. char downcase_table[256];
  132. char *malloc (), *xmalloc (), *calloc (), *xcalloc (), *realloc (), *xrealloc ();
  133. char *progname; /* For error messages */
  134. main (argc, argv)
  135. int argc;
  136. char **argv;
  137. {
  138. register int i;
  139. struct stat sbuf;
  140. register char *s;
  141. extern char _sobuf[];
  142. char *rindex ();
  143. register int argn;
  144. int fd;
  145. retcode = 0;
  146. expression = NULL;
  147. precontext = aftcontext = NCONTEXT;
  148. print_context = print_for_emacs = 0;
  149. print_line_file_names = dont_print_separators = 0;
  150. print_block_numbers = print_count_only = 0;
  151. ignore_case = only_mention_file_if_match = print_line_numbers = 0;
  152. silent = negate_condition = match_as_word = 0;
  153. progname = argv[0];
  154. /*
  155. * Figure out what mode we're in.
  156. */
  157. if ((s = rindex (argv[0], '/')) == NULL)
  158. s = argv[0];
  159. else
  160. s++;
  161. if (*s == 'c')
  162. print_context = 1;
  163. /*
  164. * Process the arguments.
  165. */
  166. for (argn = 1; argn < argc && argv[argn][0] == '-'; argn++)
  167. {
  168. for (s = argv[argn]+1; *s; s++)
  169. {
  170. switch (*s)
  171. {
  172. case 'A': /* number of post-context lines */
  173. print_context = 1;
  174. if (isdigit (*(s+1)))
  175. {
  176. aftcontext = atoi (++s);
  177. while (isdigit (*(s+1)))
  178. s++;
  179. }
  180. else
  181. {
  182. if (argn + 1 >= argc)
  183. {
  184. error ("must give number after -A.\n", 0);
  185. exit (2);
  186. }
  187. aftcontext = atoi (argv[++argn]);
  188. }
  189. continue;
  190. case 'B': /* number of pre-context lines */
  191. print_context = 1;
  192. if (isdigit (*(s+1)))
  193. {
  194. precontext = atoi (++s);
  195. while (isdigit (*(s+1)))
  196. s++;
  197. }
  198. else
  199. {
  200. if (argn + 1 >= argc)
  201. {
  202. error ("must give number after -B.\n", 0);
  203. exit (2);
  204. }
  205. precontext = atoi (argv[++argn]);
  206. }
  207. continue;
  208. case 'C': /* do context */
  209. print_context++;
  210. continue;
  211. case 'E': /* EMACS mode */
  212. print_for_emacs++;
  213. continue;
  214. case 'F': /* print filenames */
  215. print_line_file_names++;
  216. continue;
  217. case 'L': /* no separators */
  218. dont_print_separators++;
  219. continue;
  220. case 'b': /* block numbers */
  221. print_block_numbers++;
  222. continue;
  223. case 'c': /* count matching lines */
  224. print_count_only++;
  225. continue;
  226. case 'e': /* next arg is expr */
  227. if (argn + 1 >= argc)
  228. {
  229. error ("-e with no following expression.\n", 0);
  230. exit (2);
  231. }
  232. expression = argv[++argn];
  233. continue;
  234. case 'f': /* read expression from file */
  235. if (argn + 1 >= argc)
  236. {
  237. error ("-f with no following filename.\n", 0);
  238. exit (2);
  239. }
  240. if ((fd = open (argv[++argn], 0)) < 0)
  241. {
  242. error ("", 0);
  243. perror (argv[argn]);
  244. exit (2);
  245. }
  246. fstat (fd, &sbuf);
  247. expression = xmalloc (sbuf.st_size + 1);
  248. read (fd, expression, sbuf.st_size);
  249. expression[sbuf.st_size] = NULL;
  250. close (fd);
  251. continue;
  252. case '?': /* usage */
  253. usage ();
  254. exit (0);
  255. case 'i': /* ignore case */
  256. ignore_case++;
  257. continue;
  258. case 'l': /* file names only */
  259. only_mention_file_if_match++;
  260. continue;
  261. case 'n': /* print line numbers */
  262. print_line_numbers++;
  263. continue;
  264. case 's': /* silent mode */
  265. silent++;
  266. continue;
  267. case 'v': /* non-matching lines */
  268. negate_condition++;
  269. continue;
  270. case 'w': /* match as \<...\> */
  271. match_as_word++;
  272. continue;
  273. default:
  274. error ("unknown option \"%c\".\n", *s);
  275. exit (2);
  276. }
  277. }
  278. }
  279. if (!print_context)
  280. {
  281. precontext = 0;
  282. aftcontext = 0;
  283. }
  284. if (expression == NULL)
  285. {
  286. if (argn >= argc)
  287. {
  288. error ("no regexp to match specified.\n", 0);
  289. exit (2);
  290. }
  291. expression = argv[argn++];
  292. }
  293. /*
  294. * Buffer the output. Changed this to only do it if printing context.
  295. * so that otherwise you can interrupt out
  296. * of grep as soon as you see what you want instead
  297. * of waiting for the buffer to flush.
  298. */
  299. if (print_context)
  300. setbuf (stdout, _sobuf);
  301. if (match_as_word)
  302. {
  303. s = xmalloc (strlen (expression) + 5);
  304. sprintf (s, "\\<%s\\>", expression);
  305. expression = s;
  306. }
  307. nlines = precontext + aftcontext + 1;
  308. /*
  309. * Get the pointers to the line buffers.
  310. */
  311. lines = (struct line **) xcalloc (nlines, sizeof (struct line *));
  312. /*
  313. * Get the line buffer structures.
  314. */
  315. for (i=0; i < nlines; i++)
  316. {
  317. lines[i] = (struct line *) xcalloc (1, sizeof (struct line));
  318. }
  319. /*
  320. * Get the line buffers.
  321. */
  322. for (i=0; i < nlines; i++)
  323. {
  324. lines[i]->linebuf = xmalloc (LBSIZE);
  325. lines[i]->size = LBSIZE;
  326. }
  327. /*
  328. * Compile the regular expression.
  329. */
  330. /*
  331. * We do "ignore case" by using the translate table.
  332. */
  333. if (ignore_case)
  334. {
  335. for (i=0; i <= 256; i++)
  336. downcase_table[i] = i;
  337. for (i='A'; i <= 'Z'; i++)
  338. downcase_table[i] = i - 'A' + 'a';
  339. re_comp_buf.translate = downcase_table;
  340. }
  341. re_comp_buf.fastmap = re_comp_fastmap;
  342. expression = re_compile_pattern (expression, strlen (expression),
  343. &re_comp_buf);
  344. if (expression)
  345. {
  346. error ("regular expression error: ", expression);
  347. exit (2);
  348. }
  349. nfile = argc - argn;
  350. /*
  351. * Process the files.
  352. */
  353. if (argn == argc)
  354. {
  355. execute (NULL);
  356. }
  357. else
  358. {
  359. while (argn < argc)
  360. execute (argv[argn++]);
  361. }
  362. exit (retcode != 0 ? retcode : nsucc == 0);
  363. }
  364. /*
  365. * execute - look for expression in file
  366. */
  367. execute (file)
  368. char *file;
  369. {
  370. register int gotone;
  371. struct stat statbuf;
  372. /*
  373. * Get file as standard input.
  374. */
  375. if (file)
  376. {
  377. if ((input_fd = open (file, O_RDONLY)) < 0)
  378. {
  379. error ("", 0);
  380. perror (file );
  381. retcode = 2;
  382. return;
  383. }
  384. }
  385. else
  386. {
  387. input_fd = fileno (stdin);
  388. }
  389. fstat (input_fd, &statbuf);
  390. blocksize = statbuf.st_blksize;
  391. if (blocksize == 0) blocksize = 512;
  392. gotone = 0;
  393. nmatch = 0;
  394. nbytes = 0;
  395. lineno = 1;
  396. readbufcount = -1;
  397. readbufp = readbuf;
  398. /*
  399. * Load in the initial set of lines.
  400. */
  401. loadup ();
  402. /*
  403. * lines[precontext]->lineno will be -1 when
  404. * we've hit the end of the file.
  405. */
  406. while (lines[precontext]->lineno > 0)
  407. {
  408. /*
  409. * Look for a match on this line.
  410. */
  411. if ((0 <= re_search (&re_comp_buf, lines[precontext]->linebuf,
  412. lines[precontext]->len, 0,
  413. lines[precontext]->len, 0))
  414. == !negate_condition)
  415. {
  416. /*
  417. * If we need to, print a separator.
  418. */
  419. if (((precontext > 0) || (aftcontext > 0))
  420. && gotone && !dont_print_separators
  421. && !print_count_only && !only_mention_file_if_match && !silent)
  422. {
  423. if (!print_line_numbers && !print_for_emacs)
  424. printf ("-----\n");
  425. else
  426. printf ("\n");
  427. gotone = 0;
  428. }
  429. output (file);
  430. gotone = 1;
  431. }
  432. /*
  433. * Get the next line.
  434. */
  435. nextline ();
  436. }
  437. if (print_count_only)
  438. {
  439. if ((nfile > 1) || print_line_file_names)
  440. printf ("%s:", file);
  441. printf ("%ld\n", nmatch);
  442. }
  443. if (file)
  444. close (input_fd);
  445. }
  446. /*
  447. * loadup - loads nlines lines, starting at lines[precontext].
  448. */
  449. loadup ()
  450. {
  451. register int i;
  452. /*
  453. * Initialize.
  454. */
  455. for (i = 0; i < nlines; i++)
  456. {
  457. lines[i]->len = 0;
  458. lines[i]->blkno = 0;
  459. lines[i]->lineno = -1;
  460. lines[i]->linebuf[0] = NULL;
  461. }
  462. /*
  463. * Load lines.
  464. */
  465. for (i = precontext; i < nlines; i++)
  466. getline (lines[i]);
  467. }
  468. /*
  469. * nextline - diddle the pointers to "shift" the buffers up, and
  470. * get a new line in last buffer.
  471. */
  472. nextline ()
  473. {
  474. register int i;
  475. register struct line *tmp;
  476. /*
  477. * Save first line.
  478. */
  479. tmp = lines[0];
  480. /*
  481. * Copy lines.
  482. */
  483. for (i=1; i < nlines; i++)
  484. lines[i-1] = lines[i];
  485. /*
  486. * Get last line.
  487. */
  488. lines[--i] = tmp;
  489. getline (lines[i]);
  490. }
  491. getline (l)
  492. register struct line *l;
  493. {
  494. register int c;
  495. int maxread, spaceleft;
  496. register char *p, *q, *endp;
  497. l->len = 0;
  498. l->blkno = nbytes / blocksize;
  499. /*
  500. * Already at end of file, nothing to read.
  501. */
  502. if (readbufcount == 0)
  503. {
  504. l->lineno = -1;
  505. return;
  506. }
  507. l->lineno = lineno++;
  508. for (;;)
  509. {
  510. maxread = readbuf + readbufcount - readbufp;
  511. if (maxread <= 0)
  512. {
  513. readbufcount = read (input_fd, readbuf, sizeof (readbuf));
  514. if (readbufcount == 0)
  515. break;
  516. readbufp = readbuf;
  517. maxread = readbufcount;
  518. }
  519. spaceleft = l->size - l->len - 1;
  520. if (spaceleft <= 0)
  521. {
  522. p = xrealloc (l->linebuf, l->size *= 2);
  523. l->linebuf = p;
  524. spaceleft = l->size - l->len - 1;
  525. }
  526. if (maxread > spaceleft)
  527. maxread = spaceleft;
  528. p = readbufp;
  529. q = l->linebuf + l->len;
  530. endp = readbufp + maxread;
  531. while (p != endp)
  532. {
  533. c = *p++;
  534. *q++ = c;
  535. if (c == '\n')
  536. break;
  537. }
  538. l->len += p - readbufp;
  539. nbytes += p - readbufp;
  540. readbufp = p;
  541. if (c == '\n')
  542. {
  543. l->len--;
  544. break;
  545. }
  546. }
  547. l->linebuf[l->len] = NULL;
  548. }
  549. /*
  550. * output - prints the window of context, and the line itself
  551. */
  552. output (file)
  553. char *file;
  554. {
  555. register int i;
  556. nsucc = 1;
  557. if (silent)
  558. return;
  559. if (print_count_only)
  560. {
  561. nmatch++;
  562. return;
  563. }
  564. if (only_mention_file_if_match)
  565. {
  566. printf ("%s\n", file);
  567. fflush (stdout);
  568. lseek (input_fd, 0L, 2);
  569. return;
  570. }
  571. for (i=0; i < nlines; i++)
  572. {
  573. if (lines[i]->lineno == -1)
  574. continue;
  575. if ((nfile > 1) || print_line_file_names || print_for_emacs)
  576. {
  577. printf ("%s", file);
  578. if (print_for_emacs)
  579. {
  580. if (i < precontext)
  581. putchar ('-');
  582. else if (i == precontext)
  583. putchar (':');
  584. else
  585. putchar ('+');
  586. }
  587. else
  588. {
  589. putchar (':');
  590. }
  591. }
  592. if (print_block_numbers)
  593. printf ("%ld:", lines[i]->blkno);
  594. if (print_line_numbers || print_for_emacs)
  595. {
  596. printf ("%ld", lines[i]->lineno);
  597. if (print_for_emacs)
  598. {
  599. if (i < precontext)
  600. putchar ('-');
  601. else if (i == precontext)
  602. putchar (':');
  603. else
  604. putchar ('+');
  605. }
  606. else
  607. {
  608. putchar (':');
  609. }
  610. }
  611. fwrite (lines[i]->linebuf, 1, lines[i]->len, stdout);
  612. putchar ('\n');
  613. }
  614. }
  615. error (fmt, arg)
  616. char *fmt, *arg;
  617. {
  618. fprintf (stderr, "%s: ", progname);
  619. fprintf (stderr, fmt, arg);
  620. }
  621. usage ()
  622. {
  623. printf ("Usage: grep [-#] [+#] [-A #] [-B #] [-CELbcefhilnsvw] expression file (s)\n");
  624. printf("Special characters: . $ + * ^ \\| [ - ] \\( \\) \\b \\B \\s \\S \\w \\W\n");
  625. }
  626. out_of_memory ()
  627. {
  628. error ("out of memory.\n", 0);
  629. exit (2);
  630. }
  631. char *
  632. xmalloc (size)
  633. int size;
  634. {
  635. register char *val;
  636. if (val = malloc (size)) return (val);
  637. out_of_memory ();
  638. }
  639. char *
  640. xrealloc (ptr, size)
  641. char *ptr;
  642. int size;
  643. {
  644. register char *val;
  645. if (val = realloc (ptr, size)) return (val);
  646. out_of_memory ();
  647. }
  648. char *
  649. xcalloc (num, size)
  650. int num, size;
  651. {
  652. register char *val;
  653. if (val = calloc (num, size)) return (val);
  654. out_of_memory ();
  655. }