awk.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. /*
  2. * gawk -- GNU version of awk
  3. * Copyright (C) 1986 Free Software Foundation
  4. * Written by Paul Rubin, August 1986
  5. *
  6. * This will be free software eventually, but not until it is finished.
  7. */
  8. %{
  9. #define YYDEBUG
  10. static int yylex ();
  11. static yyerror ();
  12. int expression_value;
  13. %}
  14. %union {
  15. long lval;
  16. int voidval;
  17. float fval;
  18. NODE *exp_val;
  19. char *sval;
  20. }
  21. %type <exp_val> exp start
  22. %token <sval> NAME
  23. %token <lval> ERROR
  24. %token <fval> NUMBER
  25. %left OR
  26. %left AND
  27. %left EQUAL NOTEQUAL
  28. %left '<' '>' LEQ GEQ
  29. %left '+' '-'
  30. %left '*' '/' '%'
  31. %right ASSIGNOP
  32. %right UNARY
  33. %%
  34. start : program
  35. { expression_value = $1; }
  36. ;
  37. program :
  38. /* empty */
  39. { $$ = NULL ; }
  40. | rule
  41. { $$ = $1; }
  42. | program rule
  43. { $$ = append_rule ($1, $2); }
  44. ;
  45. rule :
  46. pattern action NEWLINE
  47. { $$ = make_rule ($1, $2); }
  48. ;
  49. pattern : /* empty */
  50. | BEGIN
  51. | END
  52. | selection
  53. | '!' selection
  54. | selection AND selection
  55. | selection OR selection
  56. | '(' selection ')'
  57. | pattern ',' pattern
  58. ;
  59. /* In the next 2 rules, want_regexp tells yylex to expect stuff
  60. enclosed by slashes and return a regexp token. */
  61. selection :
  62. { ++want_regexp; }
  63. REGEXP
  64. { want_regexp = 0;
  65. $$ = make_regexp ($1);
  66. }
  67. | relational_expression
  68. ;
  69. relational_expression :
  70. expression MATCHOP
  71. { ++want_regexp; }
  72. REGEXP
  73. { want_regexp = 0;
  74. $$ = node ($1, $2, make_regexp($3));
  75. }
  76. | expression RELOP expression
  77. { $$ = node ($1, $2, $3); }
  78. ;
  79. action : /* empty */
  80. | '{' statements '}'
  81. ;
  82. /* Expressions, not including the comma operator. */
  83. exp : '(' exp ')'
  84. { $$ = $2; }
  85. | '-' exp %prec UNARY
  86. { $$ = node ($2, UNARY_MINUS, NULL);
  87. | INCDEC variable %prec UNARY
  88. { $$ = node ($2, PRE_INCDEC, $1);
  89. | variable INCDEC %prec UNARY
  90. { $$ = node ($2, POST_INCDEC, $1);
  91. | NUMBER
  92. { $$ = make_number ($1)
  93. | STRING
  94. { $$ = make_string ($1);
  95. ;
  96. variable :
  97. NAME
  98. { $$ = variable ($1); }
  99. | NAME '[' exp ']'
  100. { $$ = node (variable($1), OP_SUBSCRIPT, $3); }
  101. ;
  102. /* Binary operators in order of decreasing precedence. */
  103. exp : exp '*' exp
  104. { $$ = node ($1, $2, $3); }
  105. | exp '/' exp
  106. { $$ = node ($1, $2, $3); }
  107. | exp '%' exp
  108. { $$ = node ($1, $2, $3); }
  109. | exp '+' exp
  110. { $$ = node ($1, $2, $3); }
  111. | exp '-' exp
  112. { $$ = node ($1, $2, $3); }
  113. | exp ASSIGNOP exp
  114. { $$ = node ($1, $2, $3); }
  115. ;
  116. %%
  117. /* During parsing of a gawk program, the pointer to the next character
  118. is in this variable. */
  119. static char *lexptr;
  120. struct token {
  121. char *operator;
  122. int value;
  123. int class;
  124. };
  125. #define NULL 0
  126. static struct token tokentab2[] = {
  127. {"&&", AND, AND},
  128. {"||", OR, OR},
  129. {"==", EQUAL, RELOP},
  130. {"!=", NOTEQUAL, RELOP},
  131. {"<=", LEQ, RELOP},
  132. {">=", GEQ, RELOP},
  133. {"!~", NOMATCH, MATCHOP},
  134. {"++", INCREMENT, INCDEC},
  135. {"--", DECREMENT, INCDEC},
  136. {NULL, ERROR}
  137. };
  138. /* Read one token, getting characters through lexptr. */
  139. static int
  140. yylex ()
  141. {
  142. register int c;
  143. register int namelen;
  144. register char *tokstart;
  145. register struct token *toktab;
  146. retry:
  147. tokstart = lexptr;
  148. c = *tokstart;
  149. /* See if it is a special token of length 2. */
  150. for (toktab = tokentab2; toktab->operator != NULL; toktab++)
  151. if (c == *toktab->operator && tokstart[1] == toktab->operator[1]) {
  152. lexptr += 2;
  153. yylval.lval = toktab->value;
  154. return toktab->class;
  155. }
  156. switch (c) {
  157. case 0:
  158. return 0;
  159. case ' ':
  160. case '\t':
  161. lexptr++;
  162. goto retry;
  163. case '\'':
  164. lexptr++;
  165. c = *lexptr++;
  166. if (c == '\\')
  167. c = parse_escape (&lexptr);
  168. yylval.lval = c;
  169. c = *lexptr++;
  170. if (c != '\'') {
  171. yyerror ("Invalid character constant");
  172. return ERROR;
  173. }
  174. return CHAR;
  175. case '#': /* it's a comment */
  176. while (*lexptr != '\n' && *lexptr != '\0')
  177. lexptr++;
  178. goto retry;
  179. case '*':
  180. case '%':
  181. case '(':
  182. case ')':
  183. case '+':
  184. case '-':
  185. case '[':
  186. case ']':
  187. case '=':
  188. case '!':
  189. yylval.cval = c;
  190. lexptr++;
  191. return c;
  192. case '<':
  193. case '>':
  194. yylval.cval = c;
  195. lexptr++;
  196. return RELOP;
  197. case '~':
  198. yylval.cval = c;
  199. lexptr++;
  200. return MATCHOP;
  201. case '"':
  202. lexptr++;
  203. while (*lexptr != '\0') {
  204. switch (*lexptr++) {
  205. case '\\':
  206. lexptr++;
  207. break;
  208. case '\n':
  209. yyerror ("unterminated string");
  210. return ERROR;
  211. case '\"':
  212. yylval.sval = tokstart;
  213. return STRING;
  214. }
  215. return ERROR;
  216. }
  217. if (c >= '0' && c <= '9') {
  218. /* It's a number */
  219. int seen_e = 0, seen_point = 0;
  220. for (namelen = 0; (c = tokstart[namelen]) != '\0'; namelen++) {
  221. switch (c) {
  222. case '.':
  223. if (seen_point)
  224. goto got_number;
  225. ++seen_point;
  226. break;
  227. case 'e':
  228. case 'E':
  229. if (seen_e)
  230. goto got_number;
  231. ++seen_e;
  232. if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+')
  233. namelen++;
  234. break;
  235. case '0': case '1': case '2': case '3': case '4':
  236. case '5': case '6': case '7': case '8': case '9':
  237. break;
  238. default:
  239. goto got_number;
  240. }
  241. }
  242. got_number:
  243. lexptr = tokstart + namelen + 1;
  244. yylval.fval = atof(tokstart);
  245. return NUMBER;
  246. }
  247. if (!isalpha[c]) {
  248. yyerror ("Invalid token in expression\n");
  249. return ERROR;
  250. }
  251. /* It's a name. See how long it is. */
  252. for (namelen = 0; isalnum(tokstart[namelen]); namelen++)
  253. ;
  254. lexptr += namelen;
  255. return NAME;
  256. }
  257. /* Parse a C escape sequence. STRING_PTR points to a variable
  258. containing a pointer to the string to parse. That pointer
  259. is updated past the characters we use. The value of the
  260. escape sequence is returned.
  261. A negative value means the sequence \ newline was seen,
  262. which is supposed to be equivalent to nothing at all.
  263. If \ is followed by a null character, we return a negative
  264. value and leave the string pointer pointing at the null character.
  265. If \ is followed by 000, we return 0 and leave the string pointer
  266. after the zeros. A value of 0 does not mean end of string. */
  267. static int
  268. parse_escape (string_ptr)
  269. char **string_ptr;
  270. {
  271. register int c = *(*string_ptr)++;
  272. switch (c)
  273. {
  274. case 'a':
  275. return '\a';
  276. case 'b':
  277. return '\b';
  278. case 'e':
  279. return 033;
  280. case 'f':
  281. return '\f';
  282. case 'n':
  283. return '\n';
  284. case 'r':
  285. return '\r';
  286. case 't':
  287. return '\t';
  288. case 'v':
  289. return '\v';
  290. case '\n':
  291. return -2;
  292. case 0:
  293. (*string_ptr)--;
  294. return 0;
  295. case '^':
  296. c = *(*string_ptr)++;
  297. if (c == '\\')
  298. c = parse_escape (string_ptr);
  299. if (c == '?')
  300. return 0177;
  301. return (c & 0200) | (c & 037);
  302. case '0':
  303. case '1':
  304. case '2':
  305. case '3':
  306. case '4':
  307. case '5':
  308. case '6':
  309. case '7':
  310. {
  311. register int i = c - '0';
  312. register int count = 0;
  313. while (++count < 3)
  314. {
  315. if ((c = *(*string_ptr)++) >= '0' && c <= '7')
  316. {
  317. i *= 8;
  318. i += c - '0';
  319. }
  320. else
  321. {
  322. (*string_ptr)--;
  323. break;
  324. }
  325. }
  326. return i;
  327. }
  328. default:
  329. return c;
  330. }
  331. }
  332. static
  333. yyerror (s)
  334. char *s;
  335. {
  336. error (s);
  337. longjmp (parse_error_return, 1);
  338. }
  339. /* This page contains the entry point to this file. */
  340. /* Parse STRING as an expression, and complain if this fails
  341. to use up all of the contents of STRING. */
  342. int
  343. parse_c_expression (string)
  344. char *string;
  345. {
  346. lexptr = string;
  347. if (lexptr == 0 || *lexptr == 0) {
  348. error ("empty #if expression\n");
  349. return 0; /* don't include the #if group */
  350. }
  351. /* if there is some sort of scanning error, just return 0 and assume
  352. the parsing routine has printed an error message somewhere.
  353. there is surely a better thing to do than this. */
  354. if (setjmp(parse_error_return))
  355. return 0;
  356. if (yyparse ())
  357. return 0; /* actually this is never reached
  358. the way things stand. */
  359. if (*lexptr)
  360. error ("Junk after end of expression.");
  361. return expression_value; /* set by yyparse() */
  362. }
  363. #ifdef TEST_EXP_READER
  364. /* main program, for testing purposes. */
  365. main()
  366. {
  367. int n;
  368. char buf[1024];
  369. extern int yydebug;
  370. /*
  371. yydebug = 1;
  372. */
  373. initialize_random_junk ();
  374. for (;;) {
  375. printf("enter expression: ");
  376. n = 0;
  377. while ((buf[n] = getchar()) != '\n')
  378. n++;
  379. buf[n] = '\0';
  380. printf("parser returned %d\n", parse_c_expression(buf));
  381. }
  382. }
  383. /* table to tell if char can be part of a C identifier. */
  384. char is_identchar[256];
  385. /* table to tell if char can be first char of a c identifier. */
  386. char is_identstart[256];
  387. /* table to tell if c is horizontal space. isspace() thinks that
  388. newline is space; this is not a good idea for this program. */
  389. char is_hor_space[256];
  390. /*
  391. * initialize random junk in the hash table and maybe other places
  392. */
  393. initialize_random_junk()
  394. {
  395. register int i;
  396. /*
  397. * Set up is_identchar and is_identstart tables. These should be
  398. * faster than saying (is_alpha(c) || c == '_'), etc.
  399. * Must do set up these things before calling any routines tthat
  400. * refer to them.
  401. */
  402. for (i = 'a'; i <= 'z'; i++) {
  403. ++is_identchar[i - 'a' + 'A'];
  404. ++is_identchar[i];
  405. ++is_identstart[i - 'a' + 'A'];
  406. ++is_identstart[i];
  407. }
  408. for (i = '0'; i <= '9'; i++)
  409. ++is_identchar[i];
  410. ++is_identchar['_'];
  411. ++is_identstart['_'];
  412. /* horizontal space table */
  413. ++is_hor_space[' '];
  414. ++is_hor_space['\t'];
  415. }
  416. error (msg)
  417. {
  418. printf("error: %s\n", msg);
  419. }
  420. #endif