123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229 |
- /**
- Strips C- and C++-style comments from stdin, sending the results to
- stdout. It assumes that its input is legal C-like code, and does
- only little error handling.
- It treats string literals as anything starting and ending with
- matching double OR single quotes OR backticks (for use with
- scripting languages which use those). It assumes that a quote
- character within a string which uses the same quote type is escaped
- by a backslash. It should not be used on any code which might
- contain C/C++ comments inside heredocs, and similar constructs, as
- it will strip those out.
- Usage: $0 [--keep-first|-k] < input > output
- The --keep-first (-k) flag tells it to retain the first comment in the
- input stream (which is often a license or attribution block). It
- may be given repeatedly, each one incrementing the number of
- retained comments by one.
- License: Public Domain
- Author: Stephan Beal (stephan@wanderinghorse.net)
- */
- #include <stdio.h>
- #include <assert.h>
- #include <string.h>
- #if 1
- #define MARKER(pfexp) \
- do{ printf("MARKER: %s:%d:\t",__FILE__,__LINE__); \
- printf pfexp; \
- } while(0)
- #else
- #define MARKER(exp) if(0) printf
- #endif
- struct {
- FILE * input;
- FILE * output;
- int rc;
- int keepFirst;
- } App = {
- 0/*input*/,
- 0/*output*/,
- 0/*rc*/,
- 0/*keepFirst*/
- };
- void do_it_all(void){
- enum states {
- S_NONE = 0 /* not in comment */,
- S_SLASH1 = 1 /* slash - possibly comment prefix */,
- S_CPP = 2 /* in C++ comment */,
- S_C = 3 /* in C comment */
- };
- int ch, prev = EOF;
- FILE * out = App.output;
- int const slash = '/';
- int const star = '*';
- int line = 1;
- int col = 0;
- enum states state = S_NONE /* current state */;
- int elide = 0 /* true if currently eliding output */;
- int state3Col = -99
- /* huge kludge for odd corner case: */
- /*/ <--- here. state3Col marks the source column in which a C-style
- comment starts, so that it can tell if star-slash inside a
- C-style comment is the end of the comment or is the weird corner
- case marked at the start of _this_ comment block. */;
- for( ; EOF != (ch = fgetc(App.input)); prev = ch,
- ++col){
- switch(state){
- case S_NONE:
- if('\''==ch || '"'==ch || '`'==ch){
- /* Read string literal...
- needed to properly catch comments in strings. */
- int const quote = ch,
- startLine = line, startCol = col;
- int ch2, escaped = 0, endOfString = 0;
- fputc(ch, out);
- for( ++col; !endOfString && EOF != (ch2 = fgetc(App.input));
- ++col ){
- switch(ch2){
- case '\\': escaped = !escaped;
- break;
- case '`':
- case '\'':
- case '"':
- if(!escaped && quote == ch2) endOfString = 1;
- escaped = 0;
- break;
- default:
- escaped = 0;
- break;
- }
- if('\n'==ch2){
- ++line;
- col = 0;
- }
- fputc(ch2, out);
- }
- if(EOF == ch2){
- fprintf(stderr, "Unexpected EOF while reading %s literal "
- "on line %d column %d.\n",
- ('\''==ch) ? "char" : "string",
- startLine, startCol);
- App.rc = 1;
- return;
- }
- break;
- }
- else if(slash == ch){
- /* MARKER(("state 0 ==> 1 @ %d:%d\n", line, col)); */
- state = S_SLASH1;
- break;
- }
- fputc(ch, out);
- break;
- case S_SLASH1: /* 1 slash */
- /* MARKER(("SLASH1 @ %d:%d App.keepFirst=%d\n",
- line, col, App.keepFirst)); */
- switch(ch){
- case '*':
- /* Enter C comment */
- if(App.keepFirst>0){
- elide = 0;
- --App.keepFirst;
- }else{
- elide = 1;
- }
- /*MARKER(("state 1 ==> 3 @ %d:%d\n", line, col));*/
- state = S_C;
- state3Col = col-1;
- if(!elide){
- fputc(prev, out);
- fputc(ch, out);
- }
- break;
- case '/':
- /* Enter C++ comment */
- if(App.keepFirst>0){
- elide = 0;
- --App.keepFirst;
- }else{
- elide = 1;
- }
- /*MARKER(("state 1 ==> 2 @ %d:%d\n", line, col));*/
- state = S_CPP;
- if(!elide){
- fputc(prev, out);
- fputc(ch, out);
- }
- break;
- default:
- /* It wasn't a comment after all. */
- state = S_NONE;
- if(!elide){
- fputc(prev, out);
- fputc(ch, out);
- }
- }
- break;
- case S_CPP: /* C++ comment */
- if('\n' == ch){
- /* MARKER(("state 2 ==> 0 @ %d:%d\n", line, col)); */
- state = S_NONE;
- elide = 0;
- }
- if(!elide){
- fputc(ch, out);
- }
- break;
- case S_C: /* C comment */
- if(!elide){
- fputc(ch, out);
- }
- if(slash == ch){
- if(star == prev){
- /* MARKER(("state 3 ==> 0 @ %d:%d\n", line, col)); */
- /* Corner case which breaks this: */
- /*/ <-- slash there */
- /* That shows up twice in a piece of 3rd-party
- code i use. */
- /* And thus state3Col was introduced :/ */
- if(col!=state3Col+2){
- state = S_NONE;
- elide = 0;
- state3Col = -99;
- }
- }
- }
- break;
- default:
- assert(!"impossible!");
- break;
- }
- if('\n' == ch){
- ++line;
- col = 0;
- state3Col = -99;
- }
- }
- }
- static void usage(char const *zAppName){
- fprintf(stderr, "Strips C- and C++-style comments from stdin and sends "
- "the results to stdout.\n");
- fprintf(stderr, "Usage: %s [--keep-first|-k] < input > output\n", zAppName);
- }
- int main( int argc, char const * const * argv ){
- int i;
- for(i = 1; i < argc; ++i){
- char const * zArg = argv[i];
- while( '-'==*zArg ) ++zArg;
- if( 0==strcmp(zArg,"k")
- || 0==strcmp(zArg,"keep-first") ){
- ++App.keepFirst;
- }else{
- usage(argv[0]);
- return 1;
- }
- }
- App.input = stdin;
- App.output = stdout;
- do_it_all();
- return App.rc ? 1 : 0;
- }
|