tinflat.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. #include<math.h>
  2. #include<stdio.h>
  3. #include<stdlib.h>
  4. #include<string.h>
  5. #include<ctype.h>
  6. #include<stdarg.h>
  7. const static unsigned int tokinc=4096;
  8. // #define septok() Y(&tokens, &j, &ntok, &tokensmax)
  9. void Y(char ***tokens, int *j, int *ntok, int *tokensmax) {
  10. if (*j) {
  11. (*tokens)[*ntok][(*j)++]=0; (*j)=0; (*ntok)++;
  12. }
  13. // fprintf( stderr, "%ld %ld\n", *ntok, *tokensmax );
  14. if (*ntok>=*tokensmax) {
  15. // fprintf( stderr, "%ld >= %ld\n", *ntok, *tokensmax );
  16. *tokensmax += tokinc;
  17. *tokens = realloc(*tokens, *tokensmax * sizeof(char*));
  18. for ( int op = *tokensmax - tokinc ; op < *tokensmax ; ++op ) {
  19. (*tokens)[op] = malloc(256 * sizeof(char)); memset((*tokens)[op], 0, 256);
  20. }
  21. }
  22. }
  23. // NotaBene: c can never be 0 before the end.
  24. void exttoki(char ***tokens, int *ntok, int *j, char c) {
  25. (*tokens)[*ntok][(*j)++] = c;
  26. if ((*j) % 256) {
  27. // yeah I know length should be stored to make this efficient
  28. (*tokens)[*ntok] = realloc((*tokens)[*ntok],
  29. 2*(strlen((*tokens)[*ntok])+1));
  30. }
  31. }
  32. void toko(char* program) {
  33. char **tokens;
  34. // int tokensmax = 8; // more reasonable: 8192;
  35. int tokensmax = 8192;
  36. tokens = malloc(tokensmax * sizeof(char*));
  37. int ntok = 0;
  38. for (int i=0; i < tokensmax; ++i) {
  39. tokens[i] = malloc(256 * sizeof(char));
  40. memset(tokens[i], 0, 256);
  41. }
  42. char fla = 0x0;
  43. int splash= 0;
  44. char first='\0';
  45. char prev ='\0';
  46. char three = 0;
  47. int j=0;
  48. char c = '\0';
  49. for (int i=0; i<strlen(program); i++) {
  50. prev = c;
  51. c = program[i];
  52. if (c == 0) break;
  53. if (fla & 1) {
  54. if (c == '\n') {
  55. fla ^= 1;
  56. continue;
  57. }
  58. else {
  59. fla ^= 1;
  60. if (!(fla & 4) && !(fla & 8)) {
  61. exttoki(&tokens, &ntok, &j, '\\');
  62. exttoki(&tokens, &ntok, &j, c);
  63. }
  64. continue;
  65. }
  66. }
  67. if (c == '\\') {
  68. fla |= 1;
  69. continue;
  70. }
  71. if (fla & 4) {
  72. if (c == '\n') {
  73. fla ^= 4;
  74. Y(&tokens, &j, &ntok, &tokensmax);
  75. continue;
  76. } else {
  77. continue;
  78. }
  79. }
  80. if (fla & 8) {
  81. if (c == '/' && splash) {
  82. fla ^= 8; splash = 0;
  83. if (!(fla & 2)) Y(&tokens, &j, &ntok, &tokensmax);
  84. continue;
  85. }
  86. if (c == '*') {
  87. splash = 1;
  88. continue;
  89. } else {
  90. continue;
  91. }
  92. }
  93. if (fla & 32) {
  94. if (c == '\'') {
  95. exttoki(&tokens, &ntok, &j, '\'');
  96. fla ^= 32; // sq = 0;
  97. Y(&tokens, &j, &ntok, &tokensmax);
  98. continue;
  99. } else {
  100. exttoki(&tokens, &ntok, &j, c);
  101. continue;
  102. }
  103. }
  104. if (fla & 16) {
  105. if (c == '"') {
  106. exttoki(&tokens, &ntok, &j, '"');
  107. fla ^= 16; //dq = 0;
  108. Y(&tokens, &j, &ntok, &tokensmax);
  109. continue;
  110. } else {
  111. exttoki(&tokens, &ntok, &j, c);
  112. continue;
  113. }
  114. }
  115. if (c == ' ' && !(fla & 2)){
  116. // !sharp && !sharp &&! sharp&&!sharp) {
  117. Y(&tokens, &j, &ntok, &tokensmax);
  118. continue;
  119. }
  120. if (c == '\n' && !(fla & 2)) {
  121. Y(&tokens, &j, &ntok, &tokensmax);
  122. continue;
  123. }
  124. if (c == '\n' && (fla & 2)) {
  125. fla ^= 2;
  126. Y(&tokens, &j, &ntok, &tokensmax);
  127. continue;
  128. }
  129. if (c == '#') {
  130. fla |= 2;
  131. exttoki(&tokens, &ntok, &j, c);
  132. continue;
  133. }
  134. if (c == '/' && prev == '/') {
  135. first = '\0';
  136. fla |= 4;
  137. continue;
  138. }
  139. if (c == '*' && prev == '/') {
  140. first = '\0';
  141. fla |= 8;
  142. continue;
  143. }
  144. if (c != '/' && c != '\n' && (fla & 2)) {
  145. exttoki(&tokens, &ntok, &j, c);
  146. continue;
  147. }
  148. if (index("{}()[];,?:", c)) {
  149. if (first) { Y(&tokens, &j, &ntok, &tokensmax); exttoki(&tokens, &ntok, &j, first); first = 0; }
  150. Y(&tokens, &j, &ntok, &tokensmax);
  151. exttoki(&tokens, &ntok, &j, c);
  152. Y(&tokens, &j, &ntok, &tokensmax);
  153. continue;
  154. }
  155. if (first) {
  156. if (first == '<' || first == '>') {
  157. if (three) {
  158. Y(&tokens, &j, &ntok, &tokensmax);
  159. exttoki(&tokens, &ntok, &j, three);
  160. exttoki(&tokens, &ntok, &j, first);
  161. first = 0;
  162. three = 0;
  163. if (c == '=') {
  164. exttoki(&tokens, &ntok, &j, c);
  165. Y(&tokens, &j, &ntok, &tokensmax);
  166. } else {
  167. Y(&tokens, &j, &ntok, &tokensmax);
  168. if (index("/^+*%-=<>!&|", c)) {
  169. first = c;
  170. } else {
  171. first = 0;
  172. exttoki(&tokens, &ntok, &j, c);
  173. }
  174. }
  175. continue;
  176. }
  177. if (!three && first == c) {
  178. three = c;
  179. continue;
  180. }
  181. }
  182. char zee[4] = {first, c, ' ', 0};
  183. char* p = strstr("== ^= && || &= |= /= ++ -- += *= %= -= << >> <= >= != ", zee);
  184. if (p) {
  185. Y(&tokens, &j, &ntok, &tokensmax);
  186. exttoki(&tokens, &ntok, &j, first);
  187. exttoki(&tokens, &ntok, &j, c);
  188. first = 0;
  189. Y(&tokens, &j, &ntok, &tokensmax);
  190. continue;
  191. } else {
  192. Y(&tokens, &j, &ntok, &tokensmax);
  193. exttoki(&tokens, &ntok, &j, first);
  194. Y(&tokens, &j, &ntok, &tokensmax);
  195. if (c == '"') { first =0; goto dqon; }
  196. if (c == '\'') { first =0; goto sqon; }
  197. if (index("/^+*%-=<>!&|", c)) {
  198. first = c;
  199. } else {
  200. first = 0;
  201. exttoki(&tokens, &ntok, &j, c);
  202. }
  203. continue;
  204. }
  205. }
  206. if (index("/^+*%-=<>!&|", c) && !first) {
  207. first = c;
  208. continue;
  209. }
  210. if (c == '"') {
  211. dqon:
  212. fla |= 16; // dq = 1;
  213. Y(&tokens, &j, &ntok, &tokensmax);
  214. exttoki(&tokens, &ntok, &j, c);
  215. continue;
  216. }
  217. if (c == '\'') {
  218. sqon:
  219. fla |= 32; // sq = 1;
  220. Y(&tokens, &j, &ntok, &tokensmax);
  221. exttoki(&tokens, &ntok, &j, c);
  222. continue;
  223. }
  224. // if (prev && ((prev == '_' || isalpha(prev) && (c == '.'))) Y(&tokens, &j, &ntok, &tokensmax);
  225. // TODO this necessitates check for NUMBER before ...
  226. // "digit" is not enough ...
  227. exttoki(&tokens, &ntok, &j, c);
  228. }
  229. // print out flatly ...
  230. for (int i=0; i<=ntok; ++i) {
  231. printf("%s\n", tokens[i]);
  232. }
  233. fprintf(stdout, "\n");
  234. for (int i=0; i < tokensmax; ++i) {
  235. free(tokens[i]);
  236. }
  237. free(tokens);
  238. }
  239. int main(int argc, char ** argv) {
  240. const unsigned int S=23;
  241. char * program = (char*)malloc(S*sizeof(char));
  242. int i=0;
  243. for(;;) {
  244. size_t bytes = fread(program+i, sizeof(char), S, stdin);
  245. if (bytes < S)
  246. if (feof(stdin)) {
  247. program[i+bytes] = 0;
  248. break;
  249. }
  250. i += bytes;
  251. program = (char*)realloc(program, (S+i)*sizeof(char));
  252. }
  253. toko(program);
  254. free(program);
  255. }