tr.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. /* -*- Mode: C -*- tr -- translates characters of streams
  2. Reads from standard-input, outputs to standard-output
  3. Arguments: [ -cds ] [character-set-1 [ character-set-2 ] ]
  4. -d means output all characters -not- in character-set-1
  5. If -d is specified, then character-set-2 may not be specified,
  6. (as there is no real translation occurring)
  7. -c means to interpret character-set-1 as its complement
  8. (ie as if it were the contents of a [^...] regexp rather than [...])
  9. -s means to only output one character if several successive
  10. charactes translated into the same character.
  11. eg "tr -s -c a-z _" on "abc!!ddd ef&$ x" would produce "abc_ddd_ef_x"
  12. Character-set-1 and character-set-2 are like regexp [...] ranges
  13. except that "\" is a quoting character, which may be followed by either
  14. by another "\" or 1 to 3 octal characters. (\n, \f etc are not supported)
  15. This version of "tr" does not have the unix tr's bug of being unable
  16. to deal with \000
  17. */
  18. /* mly 850320 */
  19. #include <stdio.h>
  20. int dflag = 0;
  21. int sflag = 0;
  22. int cflag = 0;
  23. int last;
  24. char xlate[256]; /* translation table */
  25. char nuke[256]; /* if -d, anything in this table is not
  26. * output at all,
  27. * else this is the table of possible
  28. * translated characters, for the use
  29. * of -s
  30. */
  31. #define SRS_INIT 0
  32. #define SRS_READ 1
  33. #define SRS_RANGE 2
  34. #define SRS_EOF 3
  35. struct string_range_stream
  36. {
  37. int state; /* 0 initial, 1 reading, 2 range, 3 eof */
  38. int lower; /* lower bound of range we're in, or -1 */
  39. int upper; /* upper bound of above */
  40. char *p; /* last char looked at */
  41. }
  42. string1, string2;
  43. char *progname;
  44. main (argc,argv)
  45. int argc;
  46. char **argv;
  47. {
  48. register i;
  49. register c, d;
  50. progname = argv[0];
  51. if (--argc > 0)
  52. /* inhale flags */
  53. { argv++;
  54. while (argc > 2 && argv[0][0] == '-' && argv[0][1] != 0)
  55. { while (*++argv[0])
  56. switch (argv[0][0])
  57. { case 'c':
  58. cflag++;
  59. break;
  60. case 'd':
  61. dflag++;
  62. break;
  63. case 's':
  64. sflag++;
  65. break;
  66. default:
  67. barf ("unknown flag \"-%c\": Expected \"c\" \"d\", or \"s\"",
  68. argv[0][0]);
  69. break; }
  70. argc--;
  71. argv++; } }
  72. string1.p = argv[0]; string1.state = SRS_INIT; string2.state = SRS_EOF;
  73. if (argc == 0)
  74. string1.state = SRS_EOF;
  75. if (argc == 2)
  76. { if (dflag)
  77. barf ("-d takes only one argument");
  78. else
  79. string2.p = argv[1]; string2.state = SRS_INIT; }
  80. else if (argc > 2)
  81. barf ("too many arguments (%d)", argc);
  82. if (cflag)
  83. { if (argc < 2)
  84. barf ("-c must have two arguments given");
  85. for (i = 0 ; i < 256; i++)
  86. { xlate[i] = 0; nuke[i] = 0; }
  87. while ((c = snarf (&string1)) != EOF)
  88. xlate[c] = 1;
  89. for (i = 0 ; i < 256 ; i++)
  90. { if ((d = snarf (&string2)) == EOF)
  91. d = last;
  92. else {last = d; nuke[d] = 1; }
  93. xlate[i] = xlate[i] ? i : d; } }
  94. else
  95. { for (i = 0 ; i < 256 ; i++)
  96. { xlate[i] = i; nuke[i] = 0; }
  97. last = EOF;
  98. while ((c = snarf (&string1)) != EOF)
  99. { if (dflag)
  100. nuke[c] = 1;
  101. else { if ((d = snarf (&string2)) == EOF)
  102. d = last;
  103. else {last = d; nuke[d] = 1; }
  104. xlate[c] = d; } } }
  105. last = EOF;
  106. while ((c = getchar ()) != EOF )
  107. if (dflag ? !nuke[c]
  108. : ((c = xlate[c]) != last || !sflag || !nuke[c]))
  109. putchar (last = c);
  110. exit (0);
  111. }
  112. int
  113. snarf (s)
  114. struct string_range_stream *s;
  115. { register c;
  116. switch (s->state)
  117. { case SRS_EOF:
  118. c = EOF;
  119. break;
  120. case SRS_RANGE:
  121. c = ++s->lower;
  122. if (c == s->upper) s->state = SRS_INIT;
  123. break;
  124. case SRS_INIT:
  125. case SRS_READ:
  126. if (!(c = *s->p++))
  127. { s->state = SRS_EOF;
  128. c = EOF; }
  129. else if (c == '-' && s->state == SRS_READ)
  130. { register d;
  131. if ((d = snarfchar (s)) == EOF) d = 255;
  132. s->state = (((c = ++s->lower) == (s->upper = d))
  133. ? SRS_INIT : SRS_RANGE); }
  134. else { s->state = SRS_READ;
  135. if (c == '\\')
  136. c = snarfquoted (s); }
  137. s->lower = c; }
  138. return (c); }
  139. int
  140. snarfchar (s)
  141. struct string_range_stream *s;
  142. { register c;
  143. if (!(c = *s->p++))
  144. { s->state = SRS_EOF;
  145. return (EOF); }
  146. else if (c == '\\')
  147. return (snarfquoted (s));
  148. else return (c); }
  149. int
  150. snarfquoted (s)
  151. struct string_range_stream *s;
  152. { register c;
  153. if (!(c = *s->p++))
  154. { s->state = SRS_EOF;
  155. return (EOF); }
  156. else if ('0' <= c && c <= '7')
  157. { register d = c - '0';
  158. register i;
  159. for (i = 0; i < 2; i++)
  160. { if (!(c = *s->p++))
  161. { s->state = SRS_EOF;
  162. break; }
  163. else if ('0' <= c && c <= '7')
  164. d = d * 8 + c - '0';
  165. else { s->p--;
  166. break; } }
  167. return (d); }
  168. else return (c); }
  169. /* print error message and die with exit code 1 */
  170. barf (string, arg)
  171. char *string, *arg;
  172. {
  173. fprintf (stderr,"%s: ", progname);
  174. fprintf (stderr, string, arg);
  175. fprintf (stderr,"\n");
  176. exit (1);
  177. }