123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- /* -*- Mode: C -*- tr -- translates characters of streams
- Reads from standard-input, outputs to standard-output
- Arguments: [ -cds ] [character-set-1 [ character-set-2 ] ]
- -d means output all characters -not- in character-set-1
- If -d is specified, then character-set-2 may not be specified,
- (as there is no real translation occurring)
- -c means to interpret character-set-1 as its complement
- (ie as if it were the contents of a [^...] regexp rather than [...])
- -s means to only output one character if several successive
- charactes translated into the same character.
- eg "tr -s -c a-z _" on "abc!!ddd ef&$ x" would produce "abc_ddd_ef_x"
- Character-set-1 and character-set-2 are like regexp [...] ranges
- except that "\" is a quoting character, which may be followed by either
- by another "\" or 1 to 3 octal characters. (\n, \f etc are not supported)
- This version of "tr" does not have the unix tr's bug of being unable
- to deal with \000
- */
- /* mly 850320 */
- #include <stdio.h>
- int dflag = 0;
- int sflag = 0;
- int cflag = 0;
- int last;
- char xlate[256]; /* translation table */
- char nuke[256]; /* if -d, anything in this table is not
- * output at all,
- * else this is the table of possible
- * translated characters, for the use
- * of -s
- */
- #define SRS_INIT 0
- #define SRS_READ 1
- #define SRS_RANGE 2
- #define SRS_EOF 3
- struct string_range_stream
- {
- int state; /* 0 initial, 1 reading, 2 range, 3 eof */
- int lower; /* lower bound of range we're in, or -1 */
- int upper; /* upper bound of above */
- char *p; /* last char looked at */
- }
- string1, string2;
- char *progname;
- main (argc,argv)
- int argc;
- char **argv;
- {
- register i;
- register c, d;
- progname = argv[0];
- if (--argc > 0)
- /* inhale flags */
- { argv++;
- while (argc > 2 && argv[0][0] == '-' && argv[0][1] != 0)
- { while (*++argv[0])
- switch (argv[0][0])
- { case 'c':
- cflag++;
- break;
- case 'd':
- dflag++;
- break;
- case 's':
- sflag++;
- break;
- default:
- barf ("unknown flag \"-%c\": Expected \"c\" \"d\", or \"s\"",
- argv[0][0]);
- break; }
- argc--;
- argv++; } }
- string1.p = argv[0]; string1.state = SRS_INIT; string2.state = SRS_EOF;
- if (argc == 0)
- string1.state = SRS_EOF;
- if (argc == 2)
- { if (dflag)
- barf ("-d takes only one argument");
- else
- string2.p = argv[1]; string2.state = SRS_INIT; }
- else if (argc > 2)
- barf ("too many arguments (%d)", argc);
- if (cflag)
- { if (argc < 2)
- barf ("-c must have two arguments given");
- for (i = 0 ; i < 256; i++)
- { xlate[i] = 0; nuke[i] = 0; }
- while ((c = snarf (&string1)) != EOF)
- xlate[c] = 1;
- for (i = 0 ; i < 256 ; i++)
- { if ((d = snarf (&string2)) == EOF)
- d = last;
- else {last = d; nuke[d] = 1; }
- xlate[i] = xlate[i] ? i : d; } }
- else
- { for (i = 0 ; i < 256 ; i++)
- { xlate[i] = i; nuke[i] = 0; }
- last = EOF;
- while ((c = snarf (&string1)) != EOF)
- { if (dflag)
- nuke[c] = 1;
- else { if ((d = snarf (&string2)) == EOF)
- d = last;
- else {last = d; nuke[d] = 1; }
- xlate[c] = d; } } }
- last = EOF;
- while ((c = getchar ()) != EOF )
- if (dflag ? !nuke[c]
- : ((c = xlate[c]) != last || !sflag || !nuke[c]))
- putchar (last = c);
- exit (0);
- }
- int
- snarf (s)
- struct string_range_stream *s;
- { register c;
- switch (s->state)
- { case SRS_EOF:
- c = EOF;
- break;
- case SRS_RANGE:
- c = ++s->lower;
- if (c == s->upper) s->state = SRS_INIT;
- break;
- case SRS_INIT:
- case SRS_READ:
- if (!(c = *s->p++))
- { s->state = SRS_EOF;
- c = EOF; }
- else if (c == '-' && s->state == SRS_READ)
- { register d;
- if ((d = snarfchar (s)) == EOF) d = 255;
- s->state = (((c = ++s->lower) == (s->upper = d))
- ? SRS_INIT : SRS_RANGE); }
- else { s->state = SRS_READ;
- if (c == '\\')
- c = snarfquoted (s); }
- s->lower = c; }
- return (c); }
- int
- snarfchar (s)
- struct string_range_stream *s;
- { register c;
- if (!(c = *s->p++))
- { s->state = SRS_EOF;
- return (EOF); }
- else if (c == '\\')
- return (snarfquoted (s));
- else return (c); }
- int
- snarfquoted (s)
- struct string_range_stream *s;
- { register c;
- if (!(c = *s->p++))
- { s->state = SRS_EOF;
- return (EOF); }
- else if ('0' <= c && c <= '7')
- { register d = c - '0';
- register i;
- for (i = 0; i < 2; i++)
- { if (!(c = *s->p++))
- { s->state = SRS_EOF;
- break; }
- else if ('0' <= c && c <= '7')
- d = d * 8 + c - '0';
- else { s->p--;
- break; } }
- return (d); }
- else return (c); }
- /* print error message and die with exit code 1 */
- barf (string, arg)
- char *string, *arg;
- {
- fprintf (stderr,"%s: ", progname);
- fprintf (stderr, string, arg);
- fprintf (stderr,"\n");
- exit (1);
- }
|