cut.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /* for xc:
  2. % cc -O -o cut cut.c
  3. */
  4. /*
  5. * cut - a recreation of the Unix(Tm) cut(1) command.
  6. *
  7. * syntax: cut -cLIST[ file1 file2 ...]
  8. * cut -fLIST [-d char][ -s][ file1 file2 ...]
  9. *
  10. * Copyright (C) 1984 by David M. Ihnat
  11. *
  12. * This program is a total rewrite of the Bell Laboratories Unix(Tm)
  13. * command of the same name, as of System V. It contains no proprietary
  14. * code, and therefore may be used without violation of any proprietary
  15. * agreements whatsoever. However, you will notice that the program is
  16. * copyrighted by me. This is to assure the program does *not* fall
  17. * into the public domain. Thus, I may specify just what I am now:
  18. * This program may be freely copied and distributed, provided this notice
  19. * remains; it may not be sold for profit without express written consent of
  20. * the author.
  21. * Please note that I recreated the behavior of the Unix(Tm) 'cut' command
  22. * as faithfully as possible; however, I haven't run a full set of regression
  23. * tests. Thus, the user of this program accepts full responsibility for any
  24. * effects or loss; in particular, the author is not responsible for any losses,
  25. * explicit or incidental, that may be incurred through use of this program.
  26. *
  27. * I ask that any bugs (and, if possible, fixes) be reported to me when
  28. * possible. -David Ihnat (312) 784-4544 ihuxx!ignatz
  29. */
  30. #include <stdio.h>
  31. extern int errno;
  32. #define CPM
  33. /* I'd love to use enums, but not everyone has them. Portability, y'know. */
  34. #define BADLIST 1
  35. #define NODELIM 2
  36. #define NOFIELDS 3
  37. #define USAGE 4
  38. #define BADFILE 5
  39. #define BACKERR 6
  40. #define TOOLONG 7
  41. #define TAB '\t';
  42. #define BACKSP 0x8
  43. #define _MAXSZ 512
  44. #define COMMAND "cut"
  45. #define IGNOREIT 0
  46. #define CUTIT 1
  47. char outbuf[_MAXSZ]; /* Processed output buffer */
  48. char rawbuf[_MAXSZ]; /* Raw holding buffer for field mode */
  49. #define FLDFLAG fields[0] /* Used for EOL processing */
  50. short int fields[_MAXSZ]; /* Max num of fields or line length */
  51. char *cmdnam;
  52. short int cflag,fflag,sflag;
  53. char delim = TAB;
  54. main(argc,argv)
  55. int argc;
  56. char **argv;
  57. {
  58. FILE *fileptr;
  59. FILE *fopen();
  60. int filecnt;
  61. cflag = fflag = sflag = 0;
  62. #ifdef CPM
  63. cmdnam = COMMAND;
  64. #else
  65. cmdnam = *argv;
  66. #endif
  67. /* Skip invocation name */
  68. argv++;
  69. argc--;
  70. /* Most compilers initialize storage to zero; but don't count on it. */
  71. for(filecnt = 0;filecnt < _MAXSZ;filecnt++)
  72. fields[filecnt] = IGNOREIT;
  73. /* First, parse input options */
  74. while(argv[0][0] == '-')
  75. {
  76. switch(argv[0][1])
  77. {
  78. case 'c':
  79. /* Build the character position list */
  80. if(fflag || cflag)
  81. prerr(USAGE,NULL);
  82. else
  83. {
  84. cflag++;
  85. setflds(&argv[0][2]);
  86. }
  87. break;
  88. case 'f':
  89. /* Build the field position list */
  90. if(fflag || cflag)
  91. prerr(USAGE,NULL);
  92. else
  93. {
  94. fflag++;
  95. setflds(&argv[0][2]);
  96. }
  97. break;
  98. case 'd':
  99. /* New delimiter */
  100. delim = argv[0][2];
  101. if(delim == '\0')
  102. prerr(NODELIM,NULL);
  103. break;
  104. case 's':
  105. sflag++;
  106. break;
  107. default:
  108. prerr(USAGE,NULL);
  109. }
  110. argv++;
  111. argc--;
  112. }
  113. /* Finished all setup. If no fields selected, tell them and exit. */
  114. if(!(cflag | fflag))
  115. prerr(BADLIST,NULL);
  116. if(!FLDFLAG)
  117. prerr(NOFIELDS,NULL);
  118. /*
  119. * If no files specified, process stdin. Otherwise,
  120. * process on a file-by-file basis.
  121. */
  122. if(argc == 0)
  123. dofile(stdin);
  124. else
  125. for(filecnt = 0;filecnt < argc;filecnt++,argv++)
  126. if((fileptr = fopen(argv[0],"r")) == (FILE *)NULL)
  127. prerr(BADFILE,argv);
  128. else
  129. {
  130. dofile(fileptr);
  131. fclose(fileptr);
  132. }
  133. }
  134. setflds(fldstr)
  135. char *fldstr;
  136. {
  137. /*
  138. * The string, character or field, must have one of the
  139. * following formats:
  140. *
  141. * n
  142. * n,m[,...] where n<m
  143. * a-b where a<b
  144. * -n,m where n<m; implies 1-n
  145. * n- where - implies to end of line or last field
  146. */
  147. int index,minflag,value,fldset;
  148. minflag = 0;
  149. value = 0;
  150. index = 1;
  151. FLDFLAG = 0;
  152. for(;;)
  153. {
  154. switch(*fldstr)
  155. {
  156. case '-':
  157. /* Starting a range */
  158. if(minflag)
  159. prerr(BADLIST,NULL);
  160. minflag++;
  161. fldstr++;
  162. if(value)
  163. {
  164. if(value >= _MAXSZ)
  165. prerr(BADLIST,NULL);
  166. index = value;
  167. }else
  168. index = 1;
  169. value = 0;
  170. break;
  171. case ',':
  172. case '\0':
  173. /* Ending the string, or this field/column sublist */
  174. if(minflag) /* Some damnable range */
  175. { /* Ranges are nasty. Possibles:
  176. * -n,a-n,n-. In any case, index
  177. * contains the start of the range.
  178. */
  179. if(!value)
  180. { /* From index to EOL */
  181. FLDFLAG = index;
  182. fldset++;
  183. value = 0;
  184. }else
  185. {
  186. if(value >= _MAXSZ)
  187. prerr(BADLIST,NULL);
  188. if(value < index)
  189. prerr(BADLIST,NULL);
  190. /* Already a TOEOL sequence? */
  191. if(FLDFLAG)
  192. {
  193. /*
  194. * Yes. Now...is the ne w sequence already
  195. * contained by the old one? If so, no processing
  196. * is necessary.
  197. */
  198. if(FLDFLAG > index)
  199. {
  200. /*
  201. * No, the new s equence starts before the old.
  202. * Does the rang e extend into the current
  203. * EOL range? If so, simply move the EOL marker.
  204. */
  205. if(FLDFLAG < value)
  206. {
  207. FLDFLAG = index;
  208. }else
  209. /* Simple range. Fill it. */
  210. for(; index <= value ;index++)
  211. fields[index] = CUTIT;
  212. /* In any case, some fields were selected. */
  213. fldset++;
  214. }
  215. /* Ok, no TOEOL sequence */
  216. }else
  217. {
  218. for(;index <= value;index++)
  219. {
  220. fields[index] = CUTIT;
  221. }
  222. fldset++;
  223. }
  224. value = 0;
  225. }
  226. /* Reset the field-in-progress flag. */
  227. minflag = 0;
  228. }else
  229. if(value)
  230. {
  231. if(value >= _MAXSZ)
  232. prerr(BADLIST,NULL);
  233. fields[value] = CUTIT;
  234. value = 0;
  235. fldset++;
  236. }
  237. if(*fldstr == '\0')
  238. {
  239. /*
  240. * Last bit of processing. If there was an EOL,
  241. * fill the array from the EOL point. In any case,
  242. * if there were any fields selected, leave the FLDFLAG
  243. * value non-zero on return.
  244. */
  245. if(FLDFLAG)
  246. for(index = FLDFLAG; index < _MAXSZ; index++)
  247. fields[index] = CUTIT;
  248. if(fldset)
  249. FLDFLAG = 1;
  250. return(0);
  251. }
  252. fldstr++;
  253. break;
  254. default:
  255. if((*fldstr < '0' ) || (*fldstr > '9' ))
  256. prerr(BADLIST,NULL);
  257. else
  258. {
  259. value = 10 * value + *fldstr - '0';
  260. fldstr++;
  261. }
  262. }
  263. }
  264. }
  265. dofile(fno)
  266. FILE *fno;
  267. {
  268. /*
  269. * This will process the input files according to the rules specified
  270. * in the fields array.
  271. */
  272. int charcnt,poscnt,bflag,doneflag,fldfound;
  273. register int c;
  274. char *inbufptr, *rawbufptr;
  275. do
  276. {
  277. inbufptr = outbuf;
  278. rawbufptr = rawbuf;
  279. charcnt = bflag = doneflag = fldfound = 0;
  280. poscnt = 1;
  281. do
  282. {
  283. c = fgetc(fno);
  284. if(c == EOF)
  285. {
  286. /* That's it for this file or stream */
  287. doneflag++;
  288. break;
  289. }
  290. if(cflag)
  291. {
  292. /*
  293. * In character scan mode. Look to see if
  294. * it's an NROFF-type underlined character;
  295. * if so, then don't count the backspace.
  296. * Notice that this could cause a buffer
  297. * overflow in the worst case situation...
  298. * but that's MOST unlikely.
  299. */
  300. if(c == BACKSP)
  301. {
  302. if(bflag)
  303. prerr(BACKERR);
  304. else
  305. {
  306. bflag++;
  307. *inbufptr++ = c;
  308. }
  309. }else
  310. {
  311. /*
  312. * Valid character. If it's to be sent,
  313. * stow it in the outbuffer.
  314. */
  315. bflag = 0;
  316. if(++charcnt == (_MAXSZ - 1))
  317. prerr(TOOLONG);
  318. if(fields[charcnt] && (c != '\n'))
  319. *inbufptr++ = c;
  320. }
  321. }else
  322. {
  323. /*
  324. * Field processing. In this case, charcnt
  325. * does indicate processed characters on the
  326. * current line, but that is all. Notice that
  327. * ALL characters are initially stowed in the
  328. * raw buffer, until at least one field has
  329. * been found.
  330. */
  331. if(fields[poscnt])
  332. {
  333. /* Ok, working on a field. It,
  334. * and its terminating delimiter,
  335. * go only into the processed buffer.
  336. */
  337. fldfound = 1;
  338. if(c != '\n')
  339. *inbufptr++ = c;
  340. }else
  341. if(!fldfound)
  342. {
  343. charcnt++;
  344. if(c != '\n')
  345. *rawbufptr++ = c;
  346. }
  347. /*
  348. * In any case, if a delimiter, bump the field
  349. * indicator.
  350. */
  351. if(c == delim)
  352. poscnt++;
  353. }
  354. }while(c != '\n');
  355. if((cflag && charcnt) || (fflag && fldfound))
  356. {
  357. /*
  358. * No matter what mode, something was found. Print it.
  359. */
  360. if(fflag && (*(inbufptr-1) == delim))
  361. --inbufptr; /* Supress trailing delimiter */
  362. *inbufptr = '\0'; /* But null-terminate the line. */
  363. puts(outbuf);
  364. }else
  365. if((fflag && (!sflag)) && charcnt)
  366. {
  367. /*
  368. * In this case, a line with some characters,
  369. * no delimiters, and no supression. Print it.
  370. */
  371. *rawbufptr = '\0';
  372. puts(rawbuf);
  373. }
  374. }while(!doneflag);
  375. }
  376. prerr(etype, estring)
  377. int etype;
  378. char *estring;
  379. {
  380. switch(etype)
  381. {
  382. case BADLIST:
  383. fprintf(stderr,"%s : bad list for c/f option\n",cmdnam);
  384. break;
  385. case USAGE:
  386. fprintf(stderr,"Usage: %s [-s] [-d<char>] {-c<list> | -f<list>} file ...\n",cmdnam);
  387. break;
  388. case NOFIELDS:
  389. fprintf(stderr,"%s : no fields\n",cmdnam);
  390. break;
  391. case NODELIM:
  392. fprintf(stderr,"%s : no delimiter\n",cmdnam);
  393. break;
  394. case BADFILE:
  395. fprintf(stderr,"Cannot open: %s : %s\n",cmdnam,estring);
  396. break;
  397. case BACKERR:
  398. fprintf(stderr,"%s : cannot handle multiple adjacent backspaces\n",cmdnam);
  399. break;
  400. case TOOLONG:
  401. fprintf(stderr,"%s : line too long\n",cmdnam);
  402. }
  403. exit(2);
  404. }