0000.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #define septok() if (j) { j=0; ntok++; }
  2. #include<math.h>
  3. #include<stdio.h>
  4. #include<stdlib.h>
  5. #include<string.h>
  6. #include<ctype.h>
  7. double r = 5.5*4;
  8. char** gloob(void *t[
  9. ], ...) {
  10. int k=0;
  11. k %= (k *= (k+= (k++)));
  12. k<<=k--;
  13. k|=k--;
  14. k -=--k<<1?3:2;
  15. }
  16. // we know this isn;'t, and cannot be perfect c tokenizer
  17. void form(char* program) {
  18. // FIXME dynamic or fail safely
  19. char tokens[
  20. 4096
  21. ][ 256];
  22. char states[4096][256];
  23. int ntok = 0;
  24. // char debug[4096][256] = {0};
  25. int dq= 0; int sq=0; // instring, inchar
  26. int ininclude =0;
  27. int slash= 0;int sharp=0;int bs=0;
  28. int eolc = 0;
  29. int longc = 0;
  30. int qmark = 0;
  31. char first='\0';
  32. char prev ='\0';
  33. char three = 0;
  34. // strings can be split up at a small cost ... by user
  35. // excess / defect
  36. // comments? remove them
  37. /* // no guarantee for non well formed c program (or wild defin
  38. es) */
  39. int j=0; char rach[]="// /*";
  40. char c = '\0';
  41. for (int i=0; i<strlen(program); i++) {
  42. prev = c;
  43. c = program[i];
  44. /* char status[256] = {0}; */
  45. /* int s = 0; */
  46. /* s += sprintf(status+s, "["); */
  47. /* if (bs) s += sprintf(status+s, "\\"); */
  48. /* if (dq) s += sprintf(status+s, "\""); */
  49. /* if (sq) s += sprintf(status+s, "'"); */
  50. /* if (eolc) s += sprintf(status+s, "/"); */
  51. /* if (longc) s += sprintf(status+s, "*"); */
  52. /* if (sharp) s += sprintf(status+s, "#"); */
  53. /* s += sprintf(status+s, "]"); */
  54. /* printf("%-8s", status); */
  55. /* char chda[256] = {0}; */
  56. /* s = 0; */
  57. /* if (!isprint(c)) { */
  58. /* s += sprintf(chda+s, "0x%02X", c); */
  59. /* } else { */
  60. /* s += sprintf(chda+s, "%c", c); */
  61. /* } */
  62. /* printf("%-8s", chda); */
  63. // break token = ntok++; j=0;
  64. if (c == 0) break;
  65. if (bs) {
  66. if (c == '\n') {
  67. bs = 0;
  68. continue;
  69. }
  70. else {
  71. bs = 0;
  72. if (!eolc && !longc) {
  73. tokens[ntok][j++] = '\\';
  74. tokens[ntok][j++] = c;
  75. }
  76. continue;
  77. }
  78. }
  79. if (c == '\\') {
  80. bs = 1;
  81. continue;
  82. }
  83. if (eolc) {
  84. if (c == '\n') {
  85. eolc = 0;
  86. septok();
  87. continue;
  88. } else {
  89. continue;
  90. }
  91. }
  92. if (longc) {
  93. if (c == '/' && longc==2) {
  94. longc = 0;
  95. if (!sharp) septok();
  96. continue;
  97. }
  98. if (c == '*') {
  99. longc = 2;
  100. continue;
  101. } else {
  102. continue;
  103. }
  104. }
  105. if (sq) {
  106. if (c == '\'') {
  107. tokens[ntok][j++] += '\'';
  108. sq = 0;
  109. septok();
  110. continue;
  111. } else {
  112. tokens[ntok][j++] += c; // addtok
  113. continue;
  114. }
  115. }
  116. if (dq) {
  117. if (c == '"') {
  118. tokens[ntok][j++] += '"';
  119. dq = 0;
  120. septok();
  121. continue;
  122. } else {
  123. tokens[ntok][j++] += c;
  124. continue;
  125. }
  126. }
  127. if (c == ' ' && !sharp && !sharp &&! sharp&&!sharp) {
  128. septok();
  129. continue;
  130. }
  131. if (c == '\n' && !sharp) {
  132. septok();
  133. continue;
  134. }
  135. if (c == '\n' && sharp) {
  136. sharp = 0;
  137. septok();
  138. continue;
  139. }
  140. if (c == '#') {
  141. sharp = 1;
  142. tokens[ntok][j++] = c;
  143. continue;
  144. }
  145. if (c == '/' && first == '/') {
  146. first = '\0';
  147. eolc = 1;
  148. continue;
  149. }
  150. if (c == '*' && first == '/') {
  151. first = '\0';
  152. longc = 1;
  153. continue;
  154. }
  155. if (c != '/' && c != '\n' && sharp) {
  156. tokens[ntok][j++] += c;
  157. continue;
  158. }
  159. // TODO how to treat .? has several special uses, varargs + decimal point
  160. if (!first && index("()[];,?:", c)) {
  161. septok();
  162. tokens[ntok][j++] = c;
  163. septok();
  164. continue;
  165. }
  166. if (first) {
  167. if (first == '<' || first == '>') {
  168. if (three) {
  169. septok();
  170. tokens[ntok][j++] = three;
  171. tokens[ntok][j++] = first;
  172. first = 0;
  173. three = 0;
  174. if (c == '=') {
  175. tokens[ntok][j++] = c;
  176. septok();
  177. } else {
  178. septok();
  179. if (index("/^+*%-=<>!&|", c)) {
  180. first = c;
  181. } else {
  182. first = 0;
  183. tokens[ntok][j++] = c;
  184. }
  185. }
  186. continue;
  187. }
  188. if (!three & first == c) {
  189. three = c;
  190. continue;
  191. }
  192. }
  193. char zee[4] = {first, c, ' ', 0};
  194. // TODO <<=, >>=
  195. // TODO trigraphs -> first
  196. char* p = strstr("== ^= && || &= |= /= ++ -- += *= %= -= << >> <= >= != ", zee);
  197. if (p) {
  198. // TODO replace with more general sliding windoof?
  199. septok();
  200. tokens[ntok][j++] = first;
  201. tokens[ntok][j++] = c;
  202. // printf("AHAH %c%c %s %lx\n", first, c, zee, p);
  203. first = 0;
  204. septok();
  205. continue;
  206. } else {
  207. septok();
  208. tokens[ntok][j++] = first;
  209. septok();
  210. // tokens[ntok][j++] = first;
  211. if (c == '"') { first =0; goto dqon; }
  212. if (c == '\'') { first =0; goto sqon; }
  213. if (index("/^+*%-=<>!&|", c)) {
  214. first = c;
  215. } else {
  216. first = 0;
  217. // septok();
  218. tokens[ntok][j++] = c;
  219. }
  220. continue;
  221. }
  222. }
  223. if (index("/^+*%-=<>!&|", c) && !first) {
  224. first = c;
  225. continue;
  226. }
  227. if (c == '"') {
  228. dqon:
  229. dq = 1;
  230. septok();
  231. tokens[ntok][j++] = c;
  232. continue;
  233. }
  234. if (c == '\'') {
  235. sqon:
  236. sq = 1;
  237. septok();
  238. tokens[ntok][j++] = c;
  239. continue;
  240. }
  241. // if (index("0123456789.", c) && !index("0123456789.", prev)) {
  242. // septok();
  243. // }
  244. //// if (first) { first = 0; septok(); }
  245. // if (first) { first = 0; }
  246. tokens[ntok][j++] = c;
  247. // if (!isprint(c)) {
  248. // printf("((0x%02X))", c);
  249. // } else {
  250. // printf("((%c))", c);
  251. // }
  252. }
  253. // printf("\n");
  254. for (int i=0; i<ntok; ++i) {
  255. // printf("%-6d %s\n", i, tokens[i]);
  256. printf("%s\n", tokens[i]);
  257. }
  258. /* int l = strlen(program); */
  259. /* int k = ceil(sqrt(l)); */
  260. /* int z = 0; */
  261. /* for (int i=0; i<k; ++i) { */
  262. /* for (int j=0; j<k; ++j) { */
  263. /* printf("."); */
  264. /* } */
  265. /* printf("\n"); */
  266. /* } */
  267. }
  268. int main(int argc, char ** argv) {
  269. const unsigned int S=23;
  270. char * program = (char*)malloc(S*sizeof(char));
  271. int i=0;
  272. for(;;) {
  273. size_t bytes = fread(program+i, sizeof(char), S, stdin);
  274. // fwrite(program+i, sizeof(char), bytes, stdout);
  275. // fflush(stdout);
  276. if (bytes < S)
  277. if (feof(stdin)) {
  278. program[i+bytes] = 0;
  279. break;
  280. }
  281. i += bytes;
  282. program = (char*)realloc(program, (S+i)*sizeof(char));
  283. }
  284. form(program);
  285. free(program);
  286. }