123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688 |
- /* sedexec.c -- execute compiled form of stream editor commands
- The single entry point of this module is the function execute(). It
- may take a string argument (the name of a file to be used as text) or
- the argument NULL which tells it to filter standard input. It executes
- the compiled commands in cmds[] on each line in turn.
- The function command() does most of the work. Match() and advance()
- are used for matching text against precompiled regular expressions and
- dosub() does right-hand-side substitution. Getline() does text input;
- readout() and memcmp() are output and string-comparison utilities.
- If your environment includes a memcmp() in the standard libraries,
- define the symbol MEMCMP to suppress the version given here, as yours
- is probably hacked to use whatever special string-compare instructions
- are available on your hardware and accordingly faster.
- ==== Written for the GNU programming environment by Eric S. Raymond ==== */
- #include <stdio.h> /* {f}puts, {f}printf, getc/putc, f{re}open, fclose */
- #include <ctype.h> /* for isprint(), isdigit(), toascii() macros */
- #include "sed.h" /* compiled-command structure and various constants */
- /* shared variables imported from sedcomp.c */
- extern char linebuf[]; /* current-line buffer */
- extern sedcmd cmds[]; /* hold compiled commands */
- extern long linenum[]; /* numeric-addresses table */
- extern int dflag; /* -d option flag */
- extern int nflag; /* -n option flag */
- extern int eargc; /* scratch copy of argument count */
- extern char bits[]; /* the bits table */
- /***** end of imported stuff *****/
- #define MAXHOLD MAXBUF /* size of the hold space */
- #define GENSIZ 71 /* maximum genbuf size */
- #define TRUE 1
- #define FALSE 0
- /* error messages */
- static char TLITL[] = "sed: line too long\n";
- static char COSTF[] = "sed: can't open text file %s\n";
- static char REBAD[] = "sed: regular expression (internal) error, %o\n";
- static char TMAPP[] = "sed: too many appends after line %ld\n";
- static char TMRDS[] = "sed: too many reads after line %ld\n";
- /* pointers and data areas used in RE interpretation */
- static char *tagend[MAXTAGS]; /* tagged pattern start pointers */
- static char *tagstart[MAXTAGS]; /* tagged pattern end pointers */
- static char genbuf[GENSIZ]; /* right-hand-sides expanded here */
- static char *loc1, *loc2, *locs; /* pointers to pattern-match locs */
- /* miscellaneous internals */
- static sedcmd *appends[MAXAPPENDS]; /* array of ptrs to a,i,c commands */
- static sedcmd **aptr = appends; /* ptr to current append */
- static long lnum = 0L; /* current source line number */
- static char *spend = linebuf; /* current end-of-line pointer */
- /* command-logic flags */
- static int lastline; /* do-line flag */
- static int jump; /* jump to cmd's link address if set */
- static int delete; /* delete command flag */
- void execute(file)
- /* execute the compiled commands in cmds[] on a file */
- char *file; /* name of text source file to be filtered */
- {
- register sedcmd *ipc; /* ptr to current command */
- static sedcmd *pending = NULL; /* next cmd to be executed */
- sedcmd *command(); /* main command executive */
- char *newspend; /* ptr to source, linebuf */
- char *getline(); /* input-getting functions */
- if (file != NULL) /* filter text from a named file */
- if (freopen(file, "r", stdin) == NULL)
- fprintf(stderr, COSTF, file);
- if (pending) /* there's a command waiting */
- {
- ipc = pending; /* it will be first executed */
- pending = NULL; /* clear the waiting ptr */
- goto doit; /* and go to execute it immediately */
- }
- /* here's the main command-execution loop */
- while ((newspend = getline(linebuf)) != BAD)
- {
- spend = newspend; /* update buffer-end ptr */
- /* loop through compiled commands, executing them */
- for(ipc = cmds; ipc->command; ipc++)
- {
- if (!selected(ipc))
- continue;
- /* execute the command pointed at */
- doit: pending = command(ipc);
- if (delete) /* if delete flag is set */
- break; /* don't exec rest of compiled cmds */
- if (jump) /* if jump set, follow cmd's link */
- {
- jump = FALSE;
- if ((ipc = ipc->u.link) == NULL)
- {
- ipc = cmds; /* restart commands */
- break;
- }
- else
- ipc--; /* so ipc++ won't screw us */
- }
- }
- /* we've now done all modification commands on the line */
- /* here's where the transformed line is output */
- if (!nflag && !delete)
- {
- for(newspend = linebuf; newspend < spend; newspend++)
- putc(*newspend, stdout);
- putc('\n', stdout);
- }
- /* if we've been set up for append, emit the text from it */
- if (aptr > appends)
- readout();
- delete = FALSE; /* clear delete flag; about to get next cmd */
- }
- }
- static int selected(ipc) /* uses lnum, linenum */
- /* return TRUE if the command is currently selected, FALSE otherwise */
- sedcmd *ipc;
- {
- register char *start_at = ipc->addr1; /* point p1 at first address */
- register char *end_at = ipc->addr2; /* and p2 at second */
- int c; /* scratch character holder */
- if (start_at)
- {
- if (ipc->flags.active) /* command is selected */
- {
- if (*end_at == CEND)
- start_at = NULL;
- else if (*end_at == CLNUM)
- {
- if (lnum > linenum[end_at[1]])
- {
- ipc->flags.active = FALSE;
- return(ipc->flags.allbut);
- }
- if (lnum == linenum[c])
- ipc->flags.active = FALSE;
- }
- else if (match(linebuf, end_at, FALSE))
- ipc->flags.active = FALSE;
- }
- else if (*start_at == CEND)
- {
- if (!lastline)
- return(ipc->flags.allbut);
- }
- else if (*start_at == CLNUM)
- {
- if (lnum != linenum[start_at[1]])
- return(ipc->flags.allbut);
- ipc->flags.active = (end_at != NULL);
- }
- else if (match(linebuf, start_at, FALSE))
- ipc->flags.active = (end_at != NULL);
- else
- return(ipc->flags.allbut);
- }
- /* skip selected command if flags.allbut is on */
- return (!ipc->flags.allbut);
- }
- static int match(lp, ep, gf) /* uses genbuf */
- /* match RE at ep... against lp...; if gf set, copy lp... from genbuf first */
- register char *lp, *ep;
- int gf;
- {
- char *scp;
- char c;
- if (gf)
- {
- if (*ep)
- return(FALSE);
- strcpy(lp, genbuf);
- locs = lp = loc2;
- }
- else
- locs = NULL;
- if (*ep++)
- {
- loc1 = lp;
- if(*ep == CCHR && ep[1] != *lp) /* 1st char is wrong */
- return(FALSE); /* so fail */
- return(advance(lp, ep)); /* else try to match rest */
- }
- /* quick check for 1st character if it's literal */
- if (*ep == CCHR)
- {
- c = ep[1]; /* pull out character to search for */
- do {
- if (*lp != c)
- continue; /* scan the source string */
- if (advance(lp, ep)) /* found it, match the rest */
- return(loc1 = lp, 1);
- } while
- (*lp++);
- return(FALSE); /* didn't find that first char */
- }
- /* else try for unanchored match of the pattern */
- do {
- if (advance(lp, ep))
- return(loc1 = lp, TRUE);
- } while
- (*lp++);
- /* if got here, didn't match either way */
- return(FALSE);
- }
- static int advance(lp, ep)
- /* attempt to advance match pointer by one pattern element */
- register char *lp; /* source (linebuf) ptr */
- register char *ep; /* regular expression element ptr */
- {
- register char *curlp; /* save ptr for closures */
- char c; /* scratch character holder */
- char *bbeg;
- int ct;
- for (;;)
- switch (*ep++)
- {
- case CCHR: /* literal character */
- if (*ep++ == *lp++) /* if chars are equal */
- continue; /* matched */
- return(FALSE); /* else return false */
- case CDOT: /* anything but newline */
- if (*lp++) /* first NUL is at EOL */
- continue; /* keep going if didn't find */
- return(FALSE); /* else return false */
- case CNL: /* start-of-line */
- case CDOL: /* end-of-line */
- if (*lp == 0) /* found that first NUL? */
- continue; /* yes, keep going */
- return(FALSE); /* else return false */
- case CEOF: /* end-of-address mark */
- loc2 = lp; /* set second loc */
- return(TRUE); /* return true */
- case CCL: /* a character class */
- c = *lp++ & 0177;
- if (ep[c>>3] & bits[c & 07]) /* is char in set? */
- {
- ep += 16; /* then skip rest of bitmask */
- continue; /* and keep going */
- }
- return(FALSE); /* else return false */
- case CBRA: /* start of tagged pattern */
- tagstart[*ep++] = lp; /* mark it */
- continue; /* and go */
- case CKET: /* end of tagged pattern */
- tagend[*ep++] = lp; /* mark it */
- continue; /* and go */
- case CBACK: /* a pattern tag reference */
- bbeg = tagstart[*ep]; /* find the start */
- ct = tagend[*ep++] - bbeg; /* and the length */
- if (memcmp(bbeg, lp, ct)) /* matching text */
- {
- lp += ct; /* yes, look past end */
- continue; /* and keep going */
- }
- return(FALSE); /* else match failed */
- case CBACK|STAR: /* pattern tag with Kleene star */
- bbeg = tagstart[*ep]; /* get start */
- ct = tagend[*ep++] - bbeg; /* and length */
- curlp = lp; /* save the location */
- while(memcmp(bbeg, lp, ct)) /* while we match */
- lp += ct; /* go to next one */
- while(lp >= curlp) /* while 1 match left */
- {
- if (advance(lp, ep)) /* try rest of RE */
- return(TRUE); /* if matched, O.K. */
- lp -= ct; /* else backtrack */
- }
- return(FALSE); /* no matches left, fail */
- case CDOT|STAR: /* match .* */
- curlp = lp; /* save closure start loc */
- while (*lp++); /* match anything */
- goto star; /* now look for followers */
- case CCHR|STAR: /* match <literal char>* */
- curlp = lp; /* save closure start loc */
- while (*lp++ == *ep); /* match many of that char */
- ep++; /* to start of next element */
- goto star; /* match it and followers */
- case CCL|STAR: /* match [...]* */
- curlp = lp; /* save closure start loc */
- do {
- c = *lp++ & 0x7F; /* match any in set */
- } while
- (ep[c>>3] & bits[c & 07]);
- ep += 16; /* skip past the set */
- goto star; /* match followers */
- star: /* the recursion part of a * or + match */
- if (--lp == curlp) /* 0 matches */
- continue;
- if (*ep == CCHR)
- {
- c = ep[1];
- do {
- if (*lp != c)
- continue;
- if (advance(lp, ep))
- return(TRUE);
- } while
- (lp-- > curlp);
- return(FALSE);
- }
- if (*ep == CBACK)
- {
- c = *(tagstart[ep[1]]);
- do {
- if (*lp != c)
- continue;
- if (advance(lp, ep))
- return(TRUE);
- } while
- (lp-- > curlp);
- return(FALSE);
- }
-
- do {
- if (lp == locs)
- break;
- if (advance(lp, ep))
- return(TRUE);
- } while
- (lp-- > curlp);
- return(FALSE);
- default:
- fprintf(stderr, REBAD, *--ep);
- }
- }
- static int substitute(lhs, rhs, gf) /* uses linebuf, spend */
- /* perform s command */
- char *lhs, *rhs; /* left and right sides of command */
- int gf; /* the global-substitute flag */
- {
- char *dosub(); /* for if we find a match */
- if (match(linebuf, lhs, FALSE)) /* if 1 match */
- spend = dosub(linebuf, rhs); /* perform substitution once */
- else
- return(FALSE); /* command fails */
- while(gf && match(linebuf, lhs, TRUE)) /* cycle through possibles */
- spend = dosub(linebuf, rhs); /* substitute each one */
- return(TRUE); /* we succeeded */
- }
- static char *dosub(linep, rhsbuf) /* uses genbuf, loc1, loc2 */
- /* generate substituted right-hand side (of s command) */
- char *linep; /* line buffer to substitute in */
- char *rhsbuf; /* where to put the result */
- {
- register char *lp, *sp, *rp;
- int c;
- char *place();
- /* copy linebuf to genbuf up to location 1 */
- sp = genbuf; lp = linep; while (lp < loc1) *sp++ = *lp++;
- /* insert substitute right-hand-side with tags expanded in genbuf */
- for (rp = rhsbuf; c = *rp++; )
- {
- if (c == '&')
- sp = place(sp, loc1, loc2);
- else if (c & TAGMARK && (c &= 0x7F) >= '1' && c < '1'+MAXTAGS)
- sp = place(sp, tagstart[c - '1'], tagend[c - '1']);
- else if (c & TAGMARK)
- fprintf(stderr, "sed: bad tag value %x\n", c);
- else
- *sp++ = toascii(c);
- if (sp >= genbuf + GENSIZ)
- fprintf(stderr, TLITL);
- }
- /* copy the part of the line after the substituted area */
- lp = loc2;
- loc2 = sp - genbuf + linep;
- while (*sp++ = *lp++)
- if (sp >= genbuf + GENSIZ)
- fprintf(stderr, TLITL);
- /* copy the substituted pattern from genbuf to linebuf */
- lp = linep; sp = genbuf; while (*lp++ = *sp++) continue;
- return(lp - 1);
- }
- static char *place(asp, al1, al2) /* uses genbuf */
- /* place chars at *al1...*(al1 - 1) at asp... in genbuf[] */
- register char *asp, *al1, *al2;
- {
- while (al1 < al2 && asp < genbuf + GENSIZ)
- *asp++ = *al1++;
- return(asp);
- }
- static void listto(p1, fp)
- /* write a hex dump expansion of *p1... to fp */
- register char *p1; /* the source */
- FILE *fp; /* output stream to write to */
- {
- p1--;
- while(*p1++)
- if (isprint(*p1))
- putc(*p1, fp); /* pass it through */
- else
- {
- putc('\134', fp); /* emit a backslash */
- switch(*p1)
- {
- case '\10': putc('b', fp); break; /* BS */
- case '\09': putc('t', fp); break; /* TAB */
- case '\12': putc('n', fp); break; /* NL */
- case '\15': putc('r', fp); break; /* CR */
- case '\33': putc('e', fp); break; /* ESC */
- default: fprintf(fp, "%02x", *p1 & 0xFF);
- }
- }
- putc('\n', fp);
- }
- static void dumpline(ipc)
- /* execute p, P, w, W commands and options */
- sedcmd *ipc;
- {
- register char *p;
- if (ipc->flags.firstl)
- for(p = linebuf; *p != '\n' && *p != '\0'; )
- putc(*p++, ipc->fout);
- else
- fputs(linebuf, ipc->fout);
- putc('\n', ipc->fout);
- }
- static sedcmd *command(ipc)
- /* execute compiled command pointed at by ipc */
- sedcmd *ipc;
- {
- register char *p1, *p2; /* scratch pointers */
- static char holdsp[MAXHOLD]; /* the hold space */
- static char *hspend = holdsp; /* hold space end pointer */
- static int sflag; /* true if last s succeeded */
- char c, *newspend;
- if (dflag)
- fprintf(stderr,
- "sed: executing %x on arguments \"%s\" and \"%s\"\n",
- ipc->command, ipc->addr1, ipc->addr2);
- switch(ipc->command)
- {
- case ACMD: /* append */
- *aptr++ = ipc;
- if (aptr >= appends + MAXAPPENDS)
- fprintf(stderr, TMAPP, lnum);
- *aptr = 0;
- break;
- case CCMD: /* change pattern space */
- if (!ipc->flags.active || lastline)
- puts(ipc->u.lhs);
- case DCMD: /* delete pattern space */
- delete = TRUE;
- break;
- case CDCMD: /* delete a line in pattern space */
- p1 = p2 = linebuf;
- while(*p1 != '\n')
- if (delete = (*p1++ == 0))
- return;
- p1++;
- while(*p2++ = *p1++);
- spend = p2-1;
- break;
- case EQCMD: /* show current line number */
- fprintf(stdout, "%ld\n", lnum);
- break;
- case GCMD: /* copy hold space to pattern space */
- p1 = linebuf; p2 = holdsp; while (*p1++ = *p2++);
- spend = p1-1;
- break;
- case CGCMD: /* append hold space to pattern space */
- *spend++ = '\n';
- p1 = spend; p2 = holdsp;
- while(*p1++ = *p2++)
- if (p1 >= linebuf + MAXBUF)
- break;
- spend = p1-1;
- break;
- case HCMD: /* copy pattern space to hold space */
- p1 = holdsp; p2 = linebuf; while(*p1++ = *p2++);
- hspend = p1-1;
- break;
- case CHCMD: /* append pattern space to hold space */
- *hspend++ = '\n';
- p1 = hspend; p2 = linebuf;
- while(*p1++ = *p2++)
- if (p1 >= holdsp + MAXHOLD)
- break;
- hspend = p1-1;
- break;
- case ICMD: /* insert text */
- printf("%s\n", ipc->u.lhs);
- break;
- case LCMD: /* list text */
- listto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout);
- break;
- case NCMD: /* read next line into pattern space */
- if (!nflag)
- puts(linebuf); /* flush out the current line */
- if (aptr > appends)
- readout(); /* do pending a, r commands */
- if ((newspend = getline(linebuf)) == BAD)
- return(delete = 1, ipc);
- spend = newspend;
- break;
- case CNCMD: /* append next line to pattern space */
- if (aptr > appends)
- readout();
- *spend++ = '\n';
- if ((newspend = getline(spend)) == BAD)
- return(delete = 1, ipc);
- spend = newspend;
- break;
- case PCMD: /* print pattern space */
- case CPCMD: /* print one line from pattern space */
- case WCMD: /* write pattern space to file */
- case CWCMD: /* write one line from pattern space */
- dumpline(ipc);
- break;
- case QCMD: /* quit the stream editor */
- if (!nflag)
- puts(linebuf); /* flush out the current line */
- if (aptr > appends)
- readout(); /* do any pending a and r commands */
- exit(0);
- case RCMD: /* read a file into the stream */
- *aptr++ = ipc;
- if (aptr >= appends + MAXAPPENDS)
- fprintf(stderr, TMRDS, lnum);
- *aptr = 0;
- break;
- case SCMD: /* substitute RE */
- sflag = substitute(ipc->u.lhs, ipc->rhs, ipc->flags.global);
- if (sflag)
- dumpline(ipc);
- break;
- case TCMD: /* branch on last s successful */
- case CTCMD: /* branch on last s failed */
- if (sflag == (ipc->command == CTCMD))
- break; /* no branch if last s failed, else */
- sflag = FALSE; /* clear the s condition flag */
- case BCMD: /* branch to label */
- jump = TRUE; /* set up to jump to assoc'd label */
- break;
- case XCMD: /* exchange pattern and hold spaces */
- p1 = linebuf, p2 = holdsp;
- while (p1 <= spend || p2 <= hspend)
- {
- c = *p1; *p1++ = *p2; *p2++ = c;
- }
- newspend = hspend; hspend = spend; spend = newspend;
- break;
- case YCMD: /* translate a line */
- p1 = linebuf; p2 = ipc->u.lhs;
- while(*p1 = p2[*p1])
- p1++;
- break;
- }
- return(NULL);
- }
- static char *getline(buf) /* uses lastline, eargc */
- /* get next line of text to be filtered */
- register char *buf; /* where to send the input */
- {
- if (gets(buf) != NULL)
- {
- lnum++; /* note that we got another line */
- while(*buf++); /* find the end of the input */
- return(--buf); /* return ptr to terminating null */
- }
- else
- {
- if (eargc == 0) /* if there are no more args */
- lastline = TRUE; /* set a flag */
- return(BAD);
- }
- }
- #ifndef MEMCMP
- static int memcmp(a, b, count)
- /* return 1 if *a... == *b... for count chars, 0 otherwise */
- register char *a, *b;
- {
- while(count--) /* look at count characters */
- if (*a++ != *b++) /* if any are nonequal */
- return(0); /* return 0 for false */
- return(TRUE); /* compare succeeded */
- }
- #endif MEMCMP
- static void readout() /* uses appends, aptr */
- /* write file indicated by r command to output */
- {
- register char *p1; /* character-fetching dummy */
- register int t; /* hold input char or EOF */
- FILE *fi; /* ptr to file to be read */
- aptr = appends - 1; /* arrange for pre-increment to work right */
- while(*++aptr)
- if ((*aptr)->command == ACMD) /* process "a" cmd */
- printf("%s\n", (*aptr)->u.lhs);
- else /* process "r" cmd */
- {
- if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL)
- continue;
- while((t = getc(fi)) != EOF)
- putc((char) t, stdout);
- fclose(fi);
- }
- aptr = appends; /* reset the append ptr */
- *aptr = 0;
- }
- /* sedexec.c ends here */
|