simplec.l 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. /* $Id: lexer.l,v 1.9 2001/07/13 19:09:56 sandro Exp $ */
  2. /*
  3. * Copyright (c) 1997-2001 Sandro Sigala. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. Digit [0-9]
  26. Literal [a-zA-Z_]
  27. Hex [a-fA-F0-9]
  28. Exp [Ee][+-]?{Digit}+
  29. FS (f|F|l|L)
  30. IS (u|U|l|L)+
  31. %{
  32. #include <ctype.h>
  33. #include <stdio.h>
  34. #include <stdlib.h>
  35. #include <string.h>
  36. #include <unistd.h> /* for getopt */
  37. #define DEFAULT_PREFIX "l_"
  38. static int lookahead = 0;
  39. static FILE *output_file = NULL;
  40. static int opt_prefix = 0; /* Indentifier prefix option. */
  41. static char *opt_prefix_arg = NULL; /* Option argument. */
  42. static void *xmalloc (size_t n)
  43. {
  44. return (calloc (1,n));
  45. }
  46. static void *xrealloc (void *ptr, size_t n)
  47. {
  48. return (realloc (ptr,n));
  49. }
  50. static void xfree (void *ptr)
  51. {
  52. if (ptr) free (ptr);
  53. }
  54. /* codes for lexer tokens */
  55. enum {
  56. IDENTIFIER=256,
  57. CHARACTER,
  58. STRING,
  59. COMMENT,
  60. CONSTANT,
  61. DIRECTIVE,
  62. KEYWORD,
  63. KW_BREAK,
  64. KW_CONTINUE,
  65. KW_DO,
  66. KW_ELSE,
  67. KW_FOR,
  68. KW_IF,
  69. KW_SWITCH,
  70. KW_WHILE,
  71. OPERATOR,
  72. UNTIL_CLOSEPAREN,
  73. UNTIL_CLOSEPAREN_NOECHO,
  74. UNTIL_ENDOFINSTR,
  75. UNTIL_ENDOFINSTR_NOECHO
  76. };
  77. static void string(void);
  78. static void comment(void);
  79. static void cppcomment(void);
  80. static void directive(void);
  81. static int yywrap(void) { return 1; }
  82. %}
  83. %%
  84. "//" { cppcomment(); return COMMENT; }
  85. "/*" { comment(); return COMMENT; }
  86. "#" { directive(); return DIRECTIVE; }
  87. "auto" { return KEYWORD; }
  88. "break" { return KW_BREAK; }
  89. "case" { return KEYWORD; }
  90. "char" { return KEYWORD; }
  91. "const" { return KEYWORD; }
  92. "continue" { return KW_CONTINUE; }
  93. "default" { return KEYWORD; }
  94. "do" { return KW_DO; }
  95. "double" { return KEYWORD; }
  96. "else" { return KW_ELSE; }
  97. "enum" { return KEYWORD; }
  98. "extern" { return KEYWORD; }
  99. "float" { return KEYWORD; }
  100. "for" { return KW_FOR; }
  101. "goto" { return KEYWORD; }
  102. "if" { return KW_IF; }
  103. "int" { return KEYWORD; }
  104. "long" { return KEYWORD; }
  105. "register" { return KEYWORD; }
  106. "return" { return KEYWORD; }
  107. "short" { return KEYWORD; }
  108. "signed" { return KEYWORD; }
  109. "sizeof" { return KEYWORD; }
  110. "static" { return KEYWORD; }
  111. "struct" { return KEYWORD; }
  112. "switch" { return KW_SWITCH; }
  113. "typedef" { return KEYWORD; }
  114. "union" { return KEYWORD; }
  115. "unsigned" { return KEYWORD; }
  116. "void" { return KEYWORD; }
  117. "volatile" { return KEYWORD; }
  118. "while" { return KW_WHILE; }
  119. {Literal}({Literal}|{Digit})* { return IDENTIFIER; }
  120. 0[xX]{Hex}+{IS}? { return CONSTANT; }
  121. 0{Digit}+{IS}? { return CONSTANT; }
  122. {Digit}+{IS} { return CONSTANT; }
  123. {Digit}+ { return CONSTANT; }
  124. '(\\.|[^\\'])+' { return CHARACTER; }
  125. {Digit}+{Exp}{FS}? { return CONSTANT; }
  126. {Digit}*"."{Digit}+({Exp})?{FS}? { return CONSTANT; }
  127. {Digit}+"."{Digit}*({Exp})?{FS}? { return CONSTANT; }
  128. "\"" { string(); return STRING; }
  129. ">>=" { return OPERATOR; }
  130. "<<=" { return OPERATOR; }
  131. "+=" { return OPERATOR; }
  132. "-=" { return OPERATOR; }
  133. "*=" { return OPERATOR; }
  134. "/=" { return OPERATOR; }
  135. "%=" { return OPERATOR; }
  136. "&=" { return OPERATOR; }
  137. "^=" { return OPERATOR; }
  138. "|=" { return OPERATOR; }
  139. ">>" { return OPERATOR; }
  140. "<<" { return OPERATOR; }
  141. "++" { return OPERATOR; }
  142. "--" { return OPERATOR; }
  143. "->" { return OPERATOR; }
  144. "&&" { return OPERATOR; }
  145. "||" { return OPERATOR; }
  146. "<=" { return OPERATOR; }
  147. ">=" { return OPERATOR; }
  148. "==" { return OPERATOR; }
  149. "!=" { return OPERATOR; }
  150. "..." { return OPERATOR; }
  151. ";" { return ';'; }
  152. "{" { return '{'; }
  153. "}" { return '}'; }
  154. "," { return ','; }
  155. ":" { return ':'; }
  156. "=" { return '='; }
  157. "(" { return '('; }
  158. ")" { return ')'; }
  159. "[" { return '['; }
  160. "]" { return ']'; }
  161. "." { return '.'; }
  162. "&" { return '&'; }
  163. "!" { return '!'; }
  164. "~" { return '~'; }
  165. "-" { return '-'; }
  166. "+" { return '+'; }
  167. "*" { return '*'; }
  168. "/" { return '/'; }
  169. "%" { return '%'; }
  170. "<" { return '<'; }
  171. ">" { return '>'; }
  172. "^" { return '^'; }
  173. "|" { return '|'; }
  174. "?" { return '?'; }
  175. [ \t\v\n\f] { return yytext[0]; }
  176. . { return yytext[0]; }
  177. %%
  178. static char *token_buffer = NULL;
  179. static int maxtoken = 0;
  180. static void init_lex(void)
  181. {
  182. /* how long can be a token or comment */
  183. maxtoken = (64*1024);
  184. token_buffer = (char *)xmalloc(maxtoken + 1);
  185. }
  186. void done_lex(void)
  187. {
  188. xfree(token_buffer);
  189. }
  190. static char * extend_token_buffer(char *p)
  191. {
  192. int offset = p - token_buffer;
  193. maxtoken = maxtoken * 2 + 10;
  194. token_buffer = (char *)xrealloc(token_buffer, maxtoken + 2);
  195. return token_buffer + offset;
  196. }
  197. static void string(void)
  198. {
  199. char *p;
  200. int c;
  201. p = token_buffer;
  202. *p++ = '"';
  203. while ((c = input()) != EOF && c != '"') {
  204. if (p >= token_buffer + maxtoken)
  205. p = extend_token_buffer(p);
  206. *p++ = c;
  207. if (c == '\\')
  208. *p++ = input();
  209. }
  210. if (c == EOF)
  211. fprintf (stderr, "unexpected end of file in string\n");
  212. *p++ = '"';
  213. *p = '\0';
  214. }
  215. static void comment(void)
  216. {
  217. char *p;
  218. int c;
  219. p = token_buffer;
  220. *p++ = '/';
  221. *p++ = '*';
  222. while ((c = input()) != EOF) {
  223. resync: if (p >= token_buffer + maxtoken)
  224. p = extend_token_buffer(p);
  225. *p++ = c;
  226. if (c == '*')
  227. if ((c = input()) == '/') {
  228. *p++ = c;
  229. *p = '\0';
  230. return;
  231. } else
  232. goto resync;
  233. }
  234. *p = '\0';
  235. }
  236. static void cppcomment(void)
  237. {
  238. char *p;
  239. int c;
  240. p = token_buffer;
  241. *p++ = '/';
  242. *p++ = '/';
  243. while ((c = input()) != EOF) {
  244. resync: if (p >= token_buffer + maxtoken)
  245. p = extend_token_buffer(p);
  246. *p++ = c;
  247. if (c == '\n') {
  248. *p = '\0';
  249. return;
  250. }
  251. }
  252. *p = '\0';
  253. }
  254. static void directive(void)
  255. {
  256. char *p;
  257. int c;
  258. p = token_buffer;
  259. *p++ = '#';
  260. while ((c = input()) != EOF && c != '\n') {
  261. if (p >= token_buffer + maxtoken)
  262. p = extend_token_buffer(p);
  263. *p++ = c;
  264. if (c == '\\')
  265. *p++ = input();
  266. }
  267. *p++ = c;
  268. *p = '\0';
  269. }
  270. static void outstr (char *s)
  271. {
  272. fprintf (output_file, "%s", s);
  273. }
  274. static void outch (int c)
  275. {
  276. fprintf (output_file, "%c", c);
  277. }
  278. static void next_token (void)
  279. {
  280. lookahead = yylex();
  281. }
  282. static void outtk(int tk)
  283. {
  284. switch (tk) {
  285. case COMMENT:
  286. case STRING:
  287. case DIRECTIVE:
  288. outstr(token_buffer);
  289. break;
  290. case KW_BREAK:
  291. case KW_CONTINUE:
  292. case KW_DO:
  293. case KW_ELSE:
  294. case KW_FOR:
  295. case KW_IF:
  296. case KW_SWITCH:
  297. case KW_WHILE:
  298. case KEYWORD:
  299. case IDENTIFIER:
  300. case CONSTANT:
  301. case CHARACTER:
  302. case OPERATOR:
  303. outstr(yytext);
  304. break;
  305. default:
  306. outch(tk);
  307. }
  308. }
  309. static int label_counter = 0;
  310. static int label_continue = 0;
  311. static int label_break = 0;
  312. static int parse_until(int untiltk);
  313. static void do_while(void)
  314. {
  315. int label_c = ++label_counter;
  316. int label_b = ++label_counter;
  317. int save_label_c = label_continue;
  318. int save_label_b = label_break;
  319. label_continue = label_c;
  320. label_break = label_b;
  321. while (lookahead != '(')
  322. next_token();
  323. next_token();
  324. outstr("{\n");
  325. fprintf(output_file, "%s%d:\n", opt_prefix_arg, label_c);
  326. outstr("if (!(");
  327. parse_until(UNTIL_CLOSEPAREN);
  328. outstr("\n) {\n");
  329. fprintf(output_file, "goto %s%d;\n}\n", opt_prefix_arg, label_b);
  330. parse_until(UNTIL_ENDOFINSTR);
  331. fprintf(output_file, "\ngoto %s%d;\n", opt_prefix_arg, label_c);
  332. fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_b);
  333. outstr("}\n");
  334. label_continue = save_label_c;
  335. label_break = save_label_b;
  336. }
  337. static void do_do(void)
  338. {
  339. int label_c = ++label_counter;
  340. int label_b = ++label_counter;
  341. int save_label_c = label_continue;
  342. int save_label_b = label_break;
  343. label_continue = label_c;
  344. label_break = label_b;
  345. outstr("{\n");
  346. fprintf(output_file, "%s%d:\n", opt_prefix_arg, label_c);
  347. parse_until(UNTIL_ENDOFINSTR);
  348. while (lookahead != '(')
  349. next_token();
  350. next_token();
  351. outstr("\nif (");
  352. parse_until(UNTIL_CLOSEPAREN);
  353. fprintf(output_file, " {\ngoto %s%d;\n}\n", opt_prefix_arg, label_c);
  354. fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_b);
  355. outstr("}\n");
  356. label_continue = save_label_c;
  357. label_break = save_label_b;
  358. }
  359. static void do_for(void)
  360. {
  361. int label_l = ++label_counter;
  362. int label_c = ++label_counter;
  363. int label_b = ++label_counter;
  364. int label_i = ++label_counter;
  365. int save_label_c = label_continue;
  366. int save_label_b = label_break;
  367. label_continue = label_c;
  368. label_break = label_b;
  369. outstr("{\n");
  370. while (lookahead != '(')
  371. next_token();
  372. next_token();
  373. parse_until(UNTIL_ENDOFINSTR);
  374. fprintf(output_file, "\n%s%d:\n", opt_prefix_arg, label_l);
  375. outstr("if (!(");
  376. if (!parse_until(UNTIL_ENDOFINSTR_NOECHO))
  377. fprintf(output_file, "1");
  378. outstr("))\n");
  379. fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_b);
  380. fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_i);
  381. fprintf(output_file, "%s%d:", opt_prefix_arg, label_c);
  382. parse_until(UNTIL_CLOSEPAREN_NOECHO);
  383. outstr(";\n");
  384. fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_l);
  385. fprintf(output_file, "%s%d:\n", opt_prefix_arg, label_i);
  386. parse_until(UNTIL_ENDOFINSTR);
  387. fprintf(output_file, "\ngoto %s%d;\n", opt_prefix_arg, label_c);
  388. fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_b);
  389. outstr("}\n");
  390. label_continue = save_label_c;
  391. label_break = save_label_b;
  392. }
  393. static void do_switch(void)
  394. {
  395. int save_label_b = label_break;
  396. label_break = 0;
  397. outstr("switch");
  398. parse_until(UNTIL_ENDOFINSTR);
  399. label_break = save_label_b;
  400. }
  401. static void do_if(void)
  402. {
  403. int label_l = ++label_counter;
  404. int label_e;
  405. while (lookahead != '(')
  406. next_token();
  407. next_token();
  408. outstr("{\n");
  409. outstr("if (!(");
  410. parse_until(UNTIL_CLOSEPAREN);
  411. outstr(") {\n");
  412. fprintf(output_file, "goto %s%d;\n}\n", opt_prefix_arg, label_l);
  413. parse_until(UNTIL_ENDOFINSTR);
  414. while (isspace(lookahead))
  415. next_token();
  416. if (lookahead == KW_ELSE) {
  417. label_e = ++label_counter;
  418. fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_e);
  419. }
  420. fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_l);
  421. if (lookahead == KW_ELSE) {
  422. next_token();
  423. parse_until(UNTIL_ENDOFINSTR);
  424. fprintf(output_file, "\n%s%d:;\n", opt_prefix_arg, label_e);
  425. }
  426. outstr("}\n");
  427. }
  428. /*
  429. * The main parsing function.
  430. */
  431. static int parse_until(int untiltk)
  432. {
  433. int nparens = 0, nblocks = 0;
  434. int isexpr = 0;
  435. if (untiltk == UNTIL_CLOSEPAREN || untiltk == UNTIL_CLOSEPAREN_NOECHO)
  436. nparens++;
  437. while (lookahead != 0)
  438. switch (lookahead) {
  439. case '(':
  440. next_token();
  441. isexpr = 1;
  442. if (nblocks == 0)
  443. nparens++;
  444. outch('(');
  445. break;
  446. case ')':
  447. next_token();
  448. isexpr = 1;
  449. if (nblocks == 0)
  450. nparens--;
  451. if (untiltk == UNTIL_CLOSEPAREN_NOECHO && nparens == 0)
  452. return isexpr;
  453. outch(')');
  454. if (untiltk == UNTIL_CLOSEPAREN && nparens == 0)
  455. return isexpr;
  456. break;
  457. case '{':
  458. next_token();
  459. isexpr = 1;
  460. nblocks++;
  461. outch('{');
  462. break;
  463. case '}':
  464. next_token();
  465. isexpr = 1;
  466. nblocks--;
  467. outch('}');
  468. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  469. return isexpr;
  470. break;
  471. case ';':
  472. next_token();
  473. if (untiltk == UNTIL_ENDOFINSTR_NOECHO && nblocks == 0)
  474. return isexpr;
  475. outch(';');
  476. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  477. return isexpr;
  478. break;
  479. case KW_DO:
  480. next_token();
  481. do_do();
  482. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  483. return isexpr;
  484. break;
  485. case KW_WHILE:
  486. next_token();
  487. do_while();
  488. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  489. return isexpr;
  490. break;
  491. case KW_FOR:
  492. next_token();
  493. do_for();
  494. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  495. return isexpr;
  496. break;
  497. case KW_SWITCH:
  498. next_token();
  499. do_switch();
  500. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  501. return isexpr;
  502. break;
  503. case KW_IF:
  504. next_token();
  505. do_if();
  506. if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
  507. return isexpr;
  508. break;
  509. case KW_BREAK:
  510. next_token();
  511. if (label_break > 0)
  512. fprintf(output_file, "goto %s%d", opt_prefix_arg, label_break);
  513. else
  514. outstr("break");
  515. break;
  516. case KW_CONTINUE:
  517. next_token();
  518. if (label_continue > 0)
  519. fprintf(output_file, "goto %s%d", opt_prefix_arg, label_continue);
  520. else
  521. outstr("continue");
  522. break;
  523. default:
  524. if (!isspace(lookahead))
  525. isexpr = 1;
  526. outtk(lookahead);
  527. next_token();
  528. }
  529. return isexpr;
  530. }
  531. static void parse(void)
  532. {
  533. next_token();
  534. parse_until(0);
  535. }
  536. static void process_file(char *filename)
  537. {
  538. if (filename != NULL && strcmp(filename, "-") != 0) {
  539. if ((yyin = fopen(filename, "r")) == NULL)
  540. fprintf(stderr, "cannot open input file %s\n", filename);
  541. } else
  542. yyin = stdin;
  543. init_lex();
  544. label_continue = label_break = label_counter = 0;
  545. parse();
  546. done_lex();
  547. if (yyin != stdin)
  548. fclose(yyin);
  549. }
  550. /*
  551. * Output the program syntax then exit.
  552. */
  553. static void usage(void)
  554. {
  555. fprintf(stderr, "usage: simplec [-V] [-o file] [-p prefix] [file ...]\n");
  556. }
  557. /*
  558. * Used by the err() functions.
  559. */
  560. char *progname = NULL;
  561. int main(int argc, char **argv)
  562. {
  563. int c;
  564. progname = argv[0];
  565. output_file = stdout;
  566. while ((c = getopt(argc, argv, "Vo:p:")) != -1)
  567. switch (c) {
  568. case 'o':
  569. if (output_file != stdout)
  570. fclose(output_file);
  571. if ((output_file = fopen(optarg, "w")) == NULL) {
  572. fprintf (stderr, "cannot open output file %s\n", optarg);
  573. return (0);
  574. }
  575. break;
  576. case 'p':
  577. opt_prefix = 1;
  578. opt_prefix_arg = optarg;
  579. break;
  580. case 'V':
  581. fprintf(stderr, "%s\n", "simplec based on cunloop from cutils-1.6");
  582. return (0);
  583. case '?':
  584. default:
  585. usage();
  586. /* NOTREACHED */
  587. return (0);
  588. }
  589. argc -= optind;
  590. argv += optind;
  591. if (!opt_prefix)
  592. opt_prefix_arg = DEFAULT_PREFIX;
  593. if (*argv) {
  594. /* scan the filenames */
  595. while (*argv) {
  596. process_file(*argv++);
  597. }
  598. } else {
  599. /* if no file pecified use stdin */
  600. process_file((char *)0);
  601. }
  602. if (output_file != stdout)
  603. fclose(output_file);
  604. return 0;
  605. }
  606. /* end. */