123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701 |
- /* $Id: lexer.l,v 1.9 2001/07/13 19:09:56 sandro Exp $ */
- /*
- * Copyright (c) 1997-2001 Sandro Sigala. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- Digit [0-9]
- Literal [a-zA-Z_]
- Hex [a-fA-F0-9]
- Exp [Ee][+-]?{Digit}+
- FS (f|F|l|L)
- IS (u|U|l|L)+
- %{
- #include <ctype.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h> /* for getopt */
- #define DEFAULT_PREFIX "l_"
- static int lookahead = 0;
- static FILE *output_file = NULL;
- static int opt_prefix = 0; /* Indentifier prefix option. */
- static char *opt_prefix_arg = NULL; /* Option argument. */
- static void *xmalloc (size_t n)
- {
- return (calloc (1,n));
- }
- static void *xrealloc (void *ptr, size_t n)
- {
- return (realloc (ptr,n));
- }
- static void xfree (void *ptr)
- {
- if (ptr) free (ptr);
- }
- /* codes for lexer tokens */
- enum {
- IDENTIFIER=256,
- CHARACTER,
- STRING,
- COMMENT,
- CONSTANT,
- DIRECTIVE,
- KEYWORD,
- KW_BREAK,
- KW_CONTINUE,
- KW_DO,
- KW_ELSE,
- KW_FOR,
- KW_IF,
- KW_SWITCH,
- KW_WHILE,
- OPERATOR,
- UNTIL_CLOSEPAREN,
- UNTIL_CLOSEPAREN_NOECHO,
- UNTIL_ENDOFINSTR,
- UNTIL_ENDOFINSTR_NOECHO
- };
- static void string(void);
- static void comment(void);
- static void cppcomment(void);
- static void directive(void);
- static int yywrap(void) { return 1; }
- %}
- %%
- "//" { cppcomment(); return COMMENT; }
- "/*" { comment(); return COMMENT; }
- "#" { directive(); return DIRECTIVE; }
- "auto" { return KEYWORD; }
- "break" { return KW_BREAK; }
- "case" { return KEYWORD; }
- "char" { return KEYWORD; }
- "const" { return KEYWORD; }
- "continue" { return KW_CONTINUE; }
- "default" { return KEYWORD; }
- "do" { return KW_DO; }
- "double" { return KEYWORD; }
- "else" { return KW_ELSE; }
- "enum" { return KEYWORD; }
- "extern" { return KEYWORD; }
- "float" { return KEYWORD; }
- "for" { return KW_FOR; }
- "goto" { return KEYWORD; }
- "if" { return KW_IF; }
- "int" { return KEYWORD; }
- "long" { return KEYWORD; }
- "register" { return KEYWORD; }
- "return" { return KEYWORD; }
- "short" { return KEYWORD; }
- "signed" { return KEYWORD; }
- "sizeof" { return KEYWORD; }
- "static" { return KEYWORD; }
- "struct" { return KEYWORD; }
- "switch" { return KW_SWITCH; }
- "typedef" { return KEYWORD; }
- "union" { return KEYWORD; }
- "unsigned" { return KEYWORD; }
- "void" { return KEYWORD; }
- "volatile" { return KEYWORD; }
- "while" { return KW_WHILE; }
- {Literal}({Literal}|{Digit})* { return IDENTIFIER; }
- 0[xX]{Hex}+{IS}? { return CONSTANT; }
- 0{Digit}+{IS}? { return CONSTANT; }
- {Digit}+{IS} { return CONSTANT; }
- {Digit}+ { return CONSTANT; }
- '(\\.|[^\\'])+' { return CHARACTER; }
- {Digit}+{Exp}{FS}? { return CONSTANT; }
- {Digit}*"."{Digit}+({Exp})?{FS}? { return CONSTANT; }
- {Digit}+"."{Digit}*({Exp})?{FS}? { return CONSTANT; }
- "\"" { string(); return STRING; }
- ">>=" { return OPERATOR; }
- "<<=" { return OPERATOR; }
- "+=" { return OPERATOR; }
- "-=" { return OPERATOR; }
- "*=" { return OPERATOR; }
- "/=" { return OPERATOR; }
- "%=" { return OPERATOR; }
- "&=" { return OPERATOR; }
- "^=" { return OPERATOR; }
- "|=" { return OPERATOR; }
- ">>" { return OPERATOR; }
- "<<" { return OPERATOR; }
- "++" { return OPERATOR; }
- "--" { return OPERATOR; }
- "->" { return OPERATOR; }
- "&&" { return OPERATOR; }
- "||" { return OPERATOR; }
- "<=" { return OPERATOR; }
- ">=" { return OPERATOR; }
- "==" { return OPERATOR; }
- "!=" { return OPERATOR; }
- "..." { return OPERATOR; }
- ";" { return ';'; }
- "{" { return '{'; }
- "}" { return '}'; }
- "," { return ','; }
- ":" { return ':'; }
- "=" { return '='; }
- "(" { return '('; }
- ")" { return ')'; }
- "[" { return '['; }
- "]" { return ']'; }
- "." { return '.'; }
- "&" { return '&'; }
- "!" { return '!'; }
- "~" { return '~'; }
- "-" { return '-'; }
- "+" { return '+'; }
- "*" { return '*'; }
- "/" { return '/'; }
- "%" { return '%'; }
- "<" { return '<'; }
- ">" { return '>'; }
- "^" { return '^'; }
- "|" { return '|'; }
- "?" { return '?'; }
- [ \t\v\n\f] { return yytext[0]; }
- . { return yytext[0]; }
- %%
- static char *token_buffer = NULL;
- static int maxtoken = 0;
- static void init_lex(void)
- {
- /* how long can be a token or comment */
- maxtoken = (64*1024);
- token_buffer = (char *)xmalloc(maxtoken + 1);
- }
- void done_lex(void)
- {
- xfree(token_buffer);
- }
- static char * extend_token_buffer(char *p)
- {
- int offset = p - token_buffer;
- maxtoken = maxtoken * 2 + 10;
- token_buffer = (char *)xrealloc(token_buffer, maxtoken + 2);
- return token_buffer + offset;
- }
- static void string(void)
- {
- char *p;
- int c;
- p = token_buffer;
- *p++ = '"';
- while ((c = input()) != EOF && c != '"') {
- if (p >= token_buffer + maxtoken)
- p = extend_token_buffer(p);
- *p++ = c;
- if (c == '\\')
- *p++ = input();
- }
- if (c == EOF)
- fprintf (stderr, "unexpected end of file in string\n");
- *p++ = '"';
- *p = '\0';
- }
- static void comment(void)
- {
- char *p;
- int c;
- p = token_buffer;
- *p++ = '/';
- *p++ = '*';
- while ((c = input()) != EOF) {
- resync: if (p >= token_buffer + maxtoken)
- p = extend_token_buffer(p);
- *p++ = c;
- if (c == '*')
- if ((c = input()) == '/') {
- *p++ = c;
- *p = '\0';
- return;
- } else
- goto resync;
- }
- *p = '\0';
- }
- static void cppcomment(void)
- {
- char *p;
- int c;
- p = token_buffer;
- *p++ = '/';
- *p++ = '/';
- while ((c = input()) != EOF) {
- resync: if (p >= token_buffer + maxtoken)
- p = extend_token_buffer(p);
- *p++ = c;
- if (c == '\n') {
- *p = '\0';
- return;
- }
- }
- *p = '\0';
- }
- static void directive(void)
- {
- char *p;
- int c;
- p = token_buffer;
- *p++ = '#';
- while ((c = input()) != EOF && c != '\n') {
- if (p >= token_buffer + maxtoken)
- p = extend_token_buffer(p);
- *p++ = c;
- if (c == '\\')
- *p++ = input();
- }
- *p++ = c;
- *p = '\0';
- }
- static void outstr (char *s)
- {
- fprintf (output_file, "%s", s);
- }
- static void outch (int c)
- {
- fprintf (output_file, "%c", c);
- }
- static void next_token (void)
- {
- lookahead = yylex();
- }
- static void outtk(int tk)
- {
- switch (tk) {
- case COMMENT:
- case STRING:
- case DIRECTIVE:
- outstr(token_buffer);
- break;
- case KW_BREAK:
- case KW_CONTINUE:
- case KW_DO:
- case KW_ELSE:
- case KW_FOR:
- case KW_IF:
- case KW_SWITCH:
- case KW_WHILE:
- case KEYWORD:
- case IDENTIFIER:
- case CONSTANT:
- case CHARACTER:
- case OPERATOR:
- outstr(yytext);
- break;
- default:
- outch(tk);
- }
- }
- static int label_counter = 0;
- static int label_continue = 0;
- static int label_break = 0;
- static int parse_until(int untiltk);
- static void do_while(void)
- {
- int label_c = ++label_counter;
- int label_b = ++label_counter;
- int save_label_c = label_continue;
- int save_label_b = label_break;
- label_continue = label_c;
- label_break = label_b;
- while (lookahead != '(')
- next_token();
- next_token();
- outstr("{\n");
- fprintf(output_file, "%s%d:\n", opt_prefix_arg, label_c);
- outstr("if (!(");
- parse_until(UNTIL_CLOSEPAREN);
- outstr("\n) {\n");
- fprintf(output_file, "goto %s%d;\n}\n", opt_prefix_arg, label_b);
- parse_until(UNTIL_ENDOFINSTR);
- fprintf(output_file, "\ngoto %s%d;\n", opt_prefix_arg, label_c);
- fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_b);
- outstr("}\n");
- label_continue = save_label_c;
- label_break = save_label_b;
- }
- static void do_do(void)
- {
- int label_c = ++label_counter;
- int label_b = ++label_counter;
- int save_label_c = label_continue;
- int save_label_b = label_break;
- label_continue = label_c;
- label_break = label_b;
- outstr("{\n");
- fprintf(output_file, "%s%d:\n", opt_prefix_arg, label_c);
- parse_until(UNTIL_ENDOFINSTR);
- while (lookahead != '(')
- next_token();
- next_token();
- outstr("\nif (");
- parse_until(UNTIL_CLOSEPAREN);
- fprintf(output_file, " {\ngoto %s%d;\n}\n", opt_prefix_arg, label_c);
- fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_b);
- outstr("}\n");
- label_continue = save_label_c;
- label_break = save_label_b;
- }
- static void do_for(void)
- {
- int label_l = ++label_counter;
- int label_c = ++label_counter;
- int label_b = ++label_counter;
- int label_i = ++label_counter;
- int save_label_c = label_continue;
- int save_label_b = label_break;
- label_continue = label_c;
- label_break = label_b;
- outstr("{\n");
- while (lookahead != '(')
- next_token();
- next_token();
- parse_until(UNTIL_ENDOFINSTR);
- fprintf(output_file, "\n%s%d:\n", opt_prefix_arg, label_l);
- outstr("if (!(");
- if (!parse_until(UNTIL_ENDOFINSTR_NOECHO))
- fprintf(output_file, "1");
- outstr("))\n");
- fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_b);
- fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_i);
- fprintf(output_file, "%s%d:", opt_prefix_arg, label_c);
- parse_until(UNTIL_CLOSEPAREN_NOECHO);
- outstr(";\n");
- fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_l);
- fprintf(output_file, "%s%d:\n", opt_prefix_arg, label_i);
- parse_until(UNTIL_ENDOFINSTR);
- fprintf(output_file, "\ngoto %s%d;\n", opt_prefix_arg, label_c);
- fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_b);
- outstr("}\n");
- label_continue = save_label_c;
- label_break = save_label_b;
- }
- static void do_switch(void)
- {
- int save_label_b = label_break;
- label_break = 0;
- outstr("switch");
- parse_until(UNTIL_ENDOFINSTR);
- label_break = save_label_b;
- }
- static void do_if(void)
- {
- int label_l = ++label_counter;
- int label_e;
- while (lookahead != '(')
- next_token();
- next_token();
- outstr("{\n");
- outstr("if (!(");
- parse_until(UNTIL_CLOSEPAREN);
- outstr(") {\n");
- fprintf(output_file, "goto %s%d;\n}\n", opt_prefix_arg, label_l);
- parse_until(UNTIL_ENDOFINSTR);
- while (isspace(lookahead))
- next_token();
- if (lookahead == KW_ELSE) {
- label_e = ++label_counter;
- fprintf(output_file, "goto %s%d;\n", opt_prefix_arg, label_e);
- }
- fprintf(output_file, "%s%d:;\n", opt_prefix_arg, label_l);
- if (lookahead == KW_ELSE) {
- next_token();
- parse_until(UNTIL_ENDOFINSTR);
- fprintf(output_file, "\n%s%d:;\n", opt_prefix_arg, label_e);
- }
- outstr("}\n");
- }
- /*
- * The main parsing function.
- */
- static int parse_until(int untiltk)
- {
- int nparens = 0, nblocks = 0;
- int isexpr = 0;
- if (untiltk == UNTIL_CLOSEPAREN || untiltk == UNTIL_CLOSEPAREN_NOECHO)
- nparens++;
- while (lookahead != 0)
- switch (lookahead) {
- case '(':
- next_token();
- isexpr = 1;
- if (nblocks == 0)
- nparens++;
- outch('(');
- break;
- case ')':
- next_token();
- isexpr = 1;
- if (nblocks == 0)
- nparens--;
- if (untiltk == UNTIL_CLOSEPAREN_NOECHO && nparens == 0)
- return isexpr;
- outch(')');
- if (untiltk == UNTIL_CLOSEPAREN && nparens == 0)
- return isexpr;
- break;
- case '{':
- next_token();
- isexpr = 1;
- nblocks++;
- outch('{');
- break;
- case '}':
- next_token();
- isexpr = 1;
- nblocks--;
- outch('}');
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case ';':
- next_token();
- if (untiltk == UNTIL_ENDOFINSTR_NOECHO && nblocks == 0)
- return isexpr;
- outch(';');
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case KW_DO:
- next_token();
- do_do();
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case KW_WHILE:
- next_token();
- do_while();
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case KW_FOR:
- next_token();
- do_for();
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case KW_SWITCH:
- next_token();
- do_switch();
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case KW_IF:
- next_token();
- do_if();
- if (untiltk == UNTIL_ENDOFINSTR && nblocks == 0)
- return isexpr;
- break;
- case KW_BREAK:
- next_token();
- if (label_break > 0)
- fprintf(output_file, "goto %s%d", opt_prefix_arg, label_break);
- else
- outstr("break");
- break;
- case KW_CONTINUE:
- next_token();
- if (label_continue > 0)
- fprintf(output_file, "goto %s%d", opt_prefix_arg, label_continue);
- else
- outstr("continue");
- break;
- default:
- if (!isspace(lookahead))
- isexpr = 1;
- outtk(lookahead);
- next_token();
- }
- return isexpr;
- }
- static void parse(void)
- {
- next_token();
- parse_until(0);
- }
- static void process_file(char *filename)
- {
- if (filename != NULL && strcmp(filename, "-") != 0) {
- if ((yyin = fopen(filename, "r")) == NULL)
- fprintf(stderr, "cannot open input file %s\n", filename);
- } else
- yyin = stdin;
- init_lex();
- label_continue = label_break = label_counter = 0;
- parse();
- done_lex();
- if (yyin != stdin)
- fclose(yyin);
- }
- /*
- * Output the program syntax then exit.
- */
- static void usage(void)
- {
- fprintf(stderr, "usage: simplec [-V] [-o file] [-p prefix] [file ...]\n");
- }
- /*
- * Used by the err() functions.
- */
- char *progname = NULL;
- int main(int argc, char **argv)
- {
- int c;
- progname = argv[0];
- output_file = stdout;
- while ((c = getopt(argc, argv, "Vo:p:")) != -1)
- switch (c) {
- case 'o':
- if (output_file != stdout)
- fclose(output_file);
- if ((output_file = fopen(optarg, "w")) == NULL) {
- fprintf (stderr, "cannot open output file %s\n", optarg);
- return (0);
- }
- break;
- case 'p':
- opt_prefix = 1;
- opt_prefix_arg = optarg;
- break;
- case 'V':
- fprintf(stderr, "%s\n", "simplec based on cunloop from cutils-1.6");
- return (0);
- case '?':
- default:
- usage();
- /* NOTREACHED */
- return (0);
- }
- argc -= optind;
- argv += optind;
- if (!opt_prefix)
- opt_prefix_arg = DEFAULT_PREFIX;
- if (*argv) {
- /* scan the filenames */
- while (*argv) {
- process_file(*argv++);
- }
- } else {
- /* if no file pecified use stdin */
- process_file((char *)0);
- }
- if (output_file != stdout)
- fclose(output_file);
- return 0;
- }
- /* end. */
|