123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- /* for xc:
- % cc -O -o cut cut.c
- */
- /*
- * cut - a recreation of the Unix(Tm) cut(1) command.
- *
- * syntax: cut -cLIST[ file1 file2 ...]
- * cut -fLIST [-d char][ -s][ file1 file2 ...]
- *
- * Copyright (C) 1984 by David M. Ihnat
- *
- * This program is a total rewrite of the Bell Laboratories Unix(Tm)
- * command of the same name, as of System V. It contains no proprietary
- * code, and therefore may be used without violation of any proprietary
- * agreements whatsoever. However, you will notice that the program is
- * copyrighted by me. This is to assure the program does *not* fall
- * into the public domain. Thus, I may specify just what I am now:
- * This program may be freely copied and distributed, provided this notice
- * remains; it may not be sold for profit without express written consent of
- * the author.
- * Please note that I recreated the behavior of the Unix(Tm) 'cut' command
- * as faithfully as possible; however, I haven't run a full set of regression
- * tests. Thus, the user of this program accepts full responsibility for any
- * effects or loss; in particular, the author is not responsible for any losses,
- * explicit or incidental, that may be incurred through use of this program.
- *
- * I ask that any bugs (and, if possible, fixes) be reported to me when
- * possible. -David Ihnat (312) 784-4544 ihuxx!ignatz
- */
- #include <stdio.h>
- extern int errno;
- #define CPM
- /* I'd love to use enums, but not everyone has them. Portability, y'know. */
- #define BADLIST 1
- #define NODELIM 2
- #define NOFIELDS 3
- #define USAGE 4
- #define BADFILE 5
- #define BACKERR 6
- #define TOOLONG 7
- #define TAB '\t';
- #define BACKSP 0x8
- #define _MAXSZ 512
- #define COMMAND "cut"
- #define IGNOREIT 0
- #define CUTIT 1
- char outbuf[_MAXSZ]; /* Processed output buffer */
- char rawbuf[_MAXSZ]; /* Raw holding buffer for field mode */
- #define FLDFLAG fields[0] /* Used for EOL processing */
- short int fields[_MAXSZ]; /* Max num of fields or line length */
- char *cmdnam;
- short int cflag,fflag,sflag;
- char delim = TAB;
- main(argc,argv)
- int argc;
- char **argv;
- {
- FILE *fileptr;
- FILE *fopen();
- int filecnt;
- cflag = fflag = sflag = 0;
- #ifdef CPM
- cmdnam = COMMAND;
- #else
- cmdnam = *argv;
- #endif
- /* Skip invocation name */
- argv++;
- argc--;
- /* Most compilers initialize storage to zero; but don't count on it. */
- for(filecnt = 0;filecnt < _MAXSZ;filecnt++)
- fields[filecnt] = IGNOREIT;
- /* First, parse input options */
- while(argv[0][0] == '-')
- {
- switch(argv[0][1])
- {
- case 'c':
- /* Build the character position list */
- if(fflag || cflag)
- prerr(USAGE,NULL);
- else
- {
- cflag++;
- setflds(&argv[0][2]);
- }
- break;
- case 'f':
- /* Build the field position list */
- if(fflag || cflag)
- prerr(USAGE,NULL);
- else
- {
- fflag++;
- setflds(&argv[0][2]);
- }
- break;
- case 'd':
- /* New delimiter */
- delim = argv[0][2];
- if(delim == '\0')
- prerr(NODELIM,NULL);
- break;
- case 's':
- sflag++;
- break;
- default:
- prerr(USAGE,NULL);
- }
- argv++;
- argc--;
- }
- /* Finished all setup. If no fields selected, tell them and exit. */
- if(!(cflag | fflag))
- prerr(BADLIST,NULL);
- if(!FLDFLAG)
- prerr(NOFIELDS,NULL);
- /*
- * If no files specified, process stdin. Otherwise,
- * process on a file-by-file basis.
- */
- if(argc == 0)
- dofile(stdin);
- else
- for(filecnt = 0;filecnt < argc;filecnt++,argv++)
- if((fileptr = fopen(argv[0],"r")) == (FILE *)NULL)
- prerr(BADFILE,argv);
- else
- {
- dofile(fileptr);
- fclose(fileptr);
- }
- }
- setflds(fldstr)
- char *fldstr;
- {
- /*
- * The string, character or field, must have one of the
- * following formats:
- *
- * n
- * n,m[,...] where n<m
- * a-b where a<b
- * -n,m where n<m; implies 1-n
- * n- where - implies to end of line or last field
- */
- int index,minflag,value,fldset;
- minflag = 0;
- value = 0;
- index = 1;
- FLDFLAG = 0;
- for(;;)
- {
- switch(*fldstr)
- {
- case '-':
- /* Starting a range */
- if(minflag)
- prerr(BADLIST,NULL);
- minflag++;
- fldstr++;
- if(value)
- {
- if(value >= _MAXSZ)
- prerr(BADLIST,NULL);
- index = value;
- }else
- index = 1;
- value = 0;
- break;
-
- case ',':
- case '\0':
- /* Ending the string, or this field/column sublist */
- if(minflag) /* Some damnable range */
- { /* Ranges are nasty. Possibles:
- * -n,a-n,n-. In any case, index
- * contains the start of the range.
- */
- if(!value)
- { /* From index to EOL */
- FLDFLAG = index;
- fldset++;
- value = 0;
- }else
- {
- if(value >= _MAXSZ)
- prerr(BADLIST,NULL);
- if(value < index)
- prerr(BADLIST,NULL);
- /* Already a TOEOL sequence? */
- if(FLDFLAG)
- {
- /*
- * Yes. Now...is the ne w sequence already
- * contained by the old one? If so, no processing
- * is necessary.
- */
- if(FLDFLAG > index)
- {
- /*
- * No, the new s equence starts before the old.
- * Does the rang e extend into the current
- * EOL range? If so, simply move the EOL marker.
- */
- if(FLDFLAG < value)
- {
- FLDFLAG = index;
- }else
- /* Simple range. Fill it. */
- for(; index <= value ;index++)
- fields[index] = CUTIT;
- /* In any case, some fields were selected. */
- fldset++;
- }
- /* Ok, no TOEOL sequence */
- }else
- {
- for(;index <= value;index++)
- {
- fields[index] = CUTIT;
- }
- fldset++;
- }
- value = 0;
- }
- /* Reset the field-in-progress flag. */
- minflag = 0;
- }else
- if(value)
- {
- if(value >= _MAXSZ)
- prerr(BADLIST,NULL);
- fields[value] = CUTIT;
- value = 0;
- fldset++;
- }
- if(*fldstr == '\0')
- {
- /*
- * Last bit of processing. If there was an EOL,
- * fill the array from the EOL point. In any case,
- * if there were any fields selected, leave the FLDFLAG
- * value non-zero on return.
- */
- if(FLDFLAG)
- for(index = FLDFLAG; index < _MAXSZ; index++)
- fields[index] = CUTIT;
- if(fldset)
- FLDFLAG = 1;
- return(0);
- }
- fldstr++;
- break;
- default:
- if((*fldstr < '0' ) || (*fldstr > '9' ))
- prerr(BADLIST,NULL);
- else
- {
- value = 10 * value + *fldstr - '0';
- fldstr++;
- }
- }
- }
- }
- dofile(fno)
- FILE *fno;
- {
- /*
- * This will process the input files according to the rules specified
- * in the fields array.
- */
- int charcnt,poscnt,bflag,doneflag,fldfound;
- register int c;
- char *inbufptr, *rawbufptr;
- do
- {
- inbufptr = outbuf;
- rawbufptr = rawbuf;
- charcnt = bflag = doneflag = fldfound = 0;
- poscnt = 1;
- do
- {
- c = fgetc(fno);
- if(c == EOF)
- {
- /* That's it for this file or stream */
- doneflag++;
- break;
- }
- if(cflag)
- {
- /*
- * In character scan mode. Look to see if
- * it's an NROFF-type underlined character;
- * if so, then don't count the backspace.
- * Notice that this could cause a buffer
- * overflow in the worst case situation...
- * but that's MOST unlikely.
- */
- if(c == BACKSP)
- {
- if(bflag)
- prerr(BACKERR);
- else
- {
- bflag++;
- *inbufptr++ = c;
- }
- }else
- {
- /*
- * Valid character. If it's to be sent,
- * stow it in the outbuffer.
- */
- bflag = 0;
- if(++charcnt == (_MAXSZ - 1))
- prerr(TOOLONG);
- if(fields[charcnt] && (c != '\n'))
- *inbufptr++ = c;
- }
- }else
- {
- /*
- * Field processing. In this case, charcnt
- * does indicate processed characters on the
- * current line, but that is all. Notice that
- * ALL characters are initially stowed in the
- * raw buffer, until at least one field has
- * been found.
- */
- if(fields[poscnt])
- {
- /* Ok, working on a field. It,
- * and its terminating delimiter,
- * go only into the processed buffer.
- */
- fldfound = 1;
- if(c != '\n')
- *inbufptr++ = c;
- }else
- if(!fldfound)
- {
- charcnt++;
- if(c != '\n')
- *rawbufptr++ = c;
- }
- /*
- * In any case, if a delimiter, bump the field
- * indicator.
- */
- if(c == delim)
- poscnt++;
- }
- }while(c != '\n');
- if((cflag && charcnt) || (fflag && fldfound))
- {
- /*
- * No matter what mode, something was found. Print it.
- */
- if(fflag && (*(inbufptr-1) == delim))
- --inbufptr; /* Supress trailing delimiter */
- *inbufptr = '\0'; /* But null-terminate the line. */
- puts(outbuf);
- }else
- if((fflag && (!sflag)) && charcnt)
- {
- /*
- * In this case, a line with some characters,
- * no delimiters, and no supression. Print it.
- */
- *rawbufptr = '\0';
- puts(rawbuf);
- }
- }while(!doneflag);
- }
- prerr(etype, estring)
- int etype;
- char *estring;
- {
- switch(etype)
- {
- case BADLIST:
- fprintf(stderr,"%s : bad list for c/f option\n",cmdnam);
- break;
- case USAGE:
- fprintf(stderr,"Usage: %s [-s] [-d<char>] {-c<list> | -f<list>} file ...\n",cmdnam);
- break;
- case NOFIELDS:
- fprintf(stderr,"%s : no fields\n",cmdnam);
- break;
- case NODELIM:
- fprintf(stderr,"%s : no delimiter\n",cmdnam);
- break;
- case BADFILE:
- fprintf(stderr,"Cannot open: %s : %s\n",cmdnam,estring);
- break;
-
- case BACKERR:
- fprintf(stderr,"%s : cannot handle multiple adjacent backspaces\n",cmdnam);
- break;
- case TOOLONG:
- fprintf(stderr,"%s : line too long\n",cmdnam);
- }
- exit(2);
- }
|