avdl_lexer.c 10 KB


  1. #include <errno.h>
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <ctype.h>
  6. #include "avdl_symtable.h"
  7. #include "avdl_lexer.h"
  8. extern char *includePath;
  9. static char buffer[500];
  10. static char lastTokenRead[200];
  11. static int lastToken;
  12. struct file_properties {
  13. FILE *f;
  14. char filename[200];
  15. int currentLineNumber;
  16. int currentCharacterNumber;
  17. long lex_pos_previousLine;
  18. long lex_pos_currentLine;
  19. long lex_pos_current;
  20. };
  21. static char pastFiles[100][100];
  22. static int currentPastFile = -1;
  23. static struct file_properties files[10];
  24. static int currentFile = -1;
  25. static void lexer_includePop(int peek) {
  26. if (currentFile == 0) {
  27. printf("lexer: attempted to pop base file\n");
  28. exit(-1);
  29. }
  30. if (files[currentFile].f && !peek) {
  31. fclose(files[currentFile].f);
  32. files[currentFile].f = 0;
  33. }
  34. currentFile--;
  35. }
  36. void lexer_prepare(const char *filename) {
  37. lastTokenRead[0] = '\0';
  38. lastToken = LEXER_TOKEN_UNKNOWN;
  39. currentFile = 0;
  40. strcpy(files[0].filename, filename);
  41. files[0].currentLineNumber = 1;
  42. files[0].currentCharacterNumber = 0;
  43. files[0].f = fopen(filename, "r");
  44. if (!files[0].f) {
  45. printf("avdl error: Unable to open '%s': %s\n", filename, strerror(errno));
  46. exit(-1);
  47. }
  48. files[0].lex_pos_previousLine =
  49. files[0].lex_pos_currentLine =
  50. files[0].lex_pos_current =
  51. ftell(files[0].f);
  52. }
  53. void lexer_clean() {
  54. for (int i = 0; i < currentFile; i++) {
  55. if (files[i].f) {
  56. fclose(files[i].f);
  57. files[i].f = 0;
  58. }
  59. }
  60. currentFile = -1;
  61. }
  62. static int getNextToken(int peek) {
  63. files[currentFile].currentCharacterNumber += strlen(lastTokenRead);
  64. if (lastToken == LEXER_TOKEN_STRING) {
  65. files[currentFile].currentCharacterNumber += 2;
  66. }
  67. //printf("current char number += %d, %s = %d\n", strlen(buffer), buffer, files[currentFile].currentCharacterNumber);
  68. // clear characters that are not tokens (whitespace / comments)
  69. int allClear;
  70. do {
  71. allClear = 0;
  72. // new line
  73. allClear += fscanf(files[currentFile].f, "%1[\n]", buffer);
  74. if (allClear) {
  75. //printf("ignored new line\n");
  76. files[currentFile].currentLineNumber++;
  77. files[currentFile].currentCharacterNumber = 0;
  78. files[currentFile].lex_pos_previousLine = files[currentFile].lex_pos_currentLine;
  79. files[currentFile].lex_pos_currentLine = ftell(files[currentFile].f);
  80. }
  81. // ignore whitespace
  82. buffer[0] = '\0';
  83. allClear += fscanf(files[currentFile].f, "%100[ \t]*", buffer);
  84. files[currentFile].currentCharacterNumber += strlen(buffer);
  85. //printf("ignored whitespace: %s\n", buffer);
  86. // ignore comments
  87. buffer[0] = '\0';
  88. allClear += fscanf(files[currentFile].f, "%1[#]%*[^\n]*", buffer);
  89. files[currentFile].currentCharacterNumber += strlen(buffer);
  90. //printf("ignored comment: %s\n", buffer);
  91. //printf("clear: %d\n", allClear);
  92. // reached EOF in included file
  93. if (feof(files[currentFile].f) && currentFile > 0) {
  94. lexer_includePop(peek);
  95. return lexer_getNextToken();
  96. }
  97. } while (allClear > 0);
  98. // read a character and find out what token it is
  99. buffer[0] = '\0';
  100. fscanf(files[currentFile].f, "%1c", buffer);
  101. //printf("read character: %c\n", buffer[0]);
  102. buffer[1] = '\0';
  103. int returnToken = LEXER_TOKEN_UNKNOWN;
  104. // start of command
  105. if (buffer[0] == '(') {
  106. //printf("command start: %s\n", buffer);
  107. //return LEXER_TOKEN_COMMANDSTART;
  108. returnToken = LEXER_TOKEN_COMMANDSTART;
  109. }
  110. else
  111. // end of command
  112. if (buffer[0] == ')') {
  113. //printf("command end: %s\n", buffer);
  114. returnToken = LEXER_TOKEN_COMMANDEND;
  115. }
  116. else
  117. // string
  118. if (buffer[0] == '\"') {
  119. fscanf(files[currentFile].f, "%499[^\"]", buffer);
  120. buffer[499] = '\0';
  121. fscanf(files[currentFile].f, "%*1c");
  122. //printf("found string: %s\n", buffer);
  123. returnToken = LEXER_TOKEN_STRING;
  124. }
  125. else
  126. // start of array
  127. if (buffer[0] == '[') {
  128. //printf("arrat start: %s\n", buffer);
  129. returnToken = LEXER_TOKEN_ARRAYSTART;
  130. }
  131. else
  132. // end of array
  133. if (buffer[0] == ']') {
  134. //printf("array end: %s\n", buffer);
  135. returnToken = LEXER_TOKEN_ARRAYEND;
  136. }
  137. else
  138. // period
  139. if (buffer[0] == '.') {
  140. //printf("period: %s\n", buffer);
  141. returnToken = LEXER_TOKEN_PERIOD;
  142. }
  143. else
  144. // identifier
  145. if ((buffer[0] >= 'a' && buffer[0] <= 'z')
  146. || (buffer[0] >= 'A' && buffer[0] <= 'Z')
  147. || buffer[0] == '_') {
  148. char restNumber[500];
  149. if (fscanf(files[currentFile].f, "%500[a-zA-Z0-9_]", restNumber) > 0) {
  150. strcat(buffer, restNumber);
  151. }
  152. //printf("identifier: %s\n", buffer);
  153. returnToken = LEXER_TOKEN_IDENTIFIER;
  154. }
  155. else
  156. // number
  157. if ((buffer[0] >= '0' && buffer[0] <= '9')) {
  158. // get the whole number
  159. char restNumber[500];
  160. restNumber[0] = '\0';
  161. if (fscanf(files[currentFile].f, "%499[0-9.]", restNumber) > 0) {
  162. strcat(buffer, restNumber);
  163. }
  164. // decide if it's a floating number
  165. int isFloat = 0;
  166. char *ptr = buffer;
  167. while (ptr[0] != '\0') {
  168. if (ptr[0] == '.') {
  169. isFloat = 1;
  170. break;
  171. }
  172. ptr++;
  173. }
  174. // parsing float
  175. if (isFloat) {
  176. //printf("float: %s\n", buffer);
  177. returnToken = LEXER_TOKEN_FLOAT;
  178. }
  179. // parsing int
  180. else {
  181. //printf("int: %s\n", buffer);
  182. returnToken = LEXER_TOKEN_INT;
  183. }
  184. }
  185. else
  186. // special characters only meant for native commands
  187. if (buffer[0] == '-'
  188. || buffer[0] == '+'
  189. || buffer[0] == '/'
  190. || buffer[0] == '*'
  191. || buffer[0] == '%'
  192. || buffer[0] == '='
  193. || buffer[0] == '<'
  194. || buffer[0] == '>'
  195. || buffer[0] == '!'
  196. || buffer[0] == '&'
  197. || buffer[0] == '|') {
  198. // check if negative number
  199. if (buffer[0] == '-') {
  200. long pos = ftell(files[currentFile].f);
  201. char restId;
  202. fscanf(files[currentFile].f, "%1c", &restId);
  203. if (restId >= '0' && restId <= '9') {
  204. buffer[1] = restId;
  205. buffer[2] = '\0';
  206. // get the whole number
  207. char restNumber[500];
  208. restNumber[0] = '\0';
  209. if (fscanf(files[currentFile].f, "%499[0-9.]", restNumber) > 0) {
  210. strcat(buffer, restNumber);
  211. }
  212. // decide if it's a floating number
  213. int isFloat = 0;
  214. char *ptr = buffer;
  215. while (ptr[0] != '\0') {
  216. if (ptr[0] == '.') {
  217. isFloat = 1;
  218. break;
  219. }
  220. ptr++;
  221. }
  222. // parsing float
  223. if (isFloat) {
  224. //printf("float: %s\n", buffer);
  225. returnToken = LEXER_TOKEN_FLOAT;
  226. }
  227. // parsing int
  228. else {
  229. //printf("int: %s\n", buffer);
  230. returnToken = LEXER_TOKEN_INT;
  231. }
  232. }
  233. else {
  234. fseek(files[currentFile].f, pos, SEEK_SET);
  235. }
  236. }
  237. else
  238. // check if some symbols come with "="
  239. if (buffer[0] == '='
  240. || buffer[0] == '<'
  241. || buffer[0] == '>'
  242. || buffer[0] == '!') {
  243. long pos = ftell(files[currentFile].f);
  244. char restId;
  245. fscanf(files[currentFile].f, "%1c", &restId);
  246. if (restId != '=') {
  247. fseek(files[currentFile].f, pos, SEEK_SET);
  248. }
  249. else {
  250. buffer[1] = restId;
  251. buffer[2] = '\0';
  252. }
  253. }
  254. else
  255. if (buffer[0] == '&'
  256. || buffer[0] == '|') {
  257. long pos = ftell(files[currentFile].f);
  258. char restId;
  259. fscanf(files[currentFile].f, "%1c", &restId);
  260. if (restId != buffer[0]) {
  261. fseek(files[currentFile].f, pos, SEEK_SET);
  262. }
  263. else {
  264. buffer[1] = restId;
  265. buffer[2] = '\0';
  266. }
  267. }
  268. //printf("identifier special: %s\n", buffer);
  269. if (returnToken == LEXER_TOKEN_UNKNOWN) {
  270. returnToken = LEXER_TOKEN_IDENTIFIER;
  271. }
  272. }
  273. else
  274. // end of file -- nothing left to parse
  275. if (feof(files[currentFile].f)) {
  276. //printf("token done\n");
  277. returnToken = LEXER_TOKEN_DONE;
  278. }
  279. if (returnToken == LEXER_TOKEN_UNKNOWN) {
  280. printf("unknown token: %s\n", buffer);
  281. exit(-1);
  282. }
  283. files[currentFile].lex_pos_current = ftell(files[currentFile].f);
  284. if (!peek) {
  285. strncpy(lastTokenRead, buffer, 199);
  286. lastTokenRead[199] = '\0';
  287. lastToken = returnToken;
  288. }
  289. return returnToken;
  290. }
  291. int lexer_getNextToken() {
  292. return getNextToken(0);
  293. }
  294. int lexer_peek() {
  295. struct file_properties files_backup[10];
  296. memcpy(files_backup, files, sizeof(files_backup));
  297. int currentFile_backup = currentFile;
  298. int token = getNextToken(1);
  299. memcpy(files, files_backup, sizeof(files_backup));
  300. currentFile = currentFile_backup;
  301. for (int i = 0; i <= currentFile; i++) {
  302. fseek(files[i].f, files[i].lex_pos_current, SEEK_SET);
  303. }
  304. return token;
  305. }
  306. const char *lexer_getLexToken() {
  307. return lastTokenRead;
  308. }
  309. const char *lexer_getCurrentFilename() {
  310. return files[currentFile].filename;
  311. }
  312. int lexer_getCurrentLinenumber() {
  313. return files[currentFile].currentLineNumber;
  314. }
  315. /*
  316. * prints the previous and current source line,
  317. * with an arrow pointing on the first character of
  318. * the last parsed token
  319. */
  320. void lexer_printCurrentLine() {
  321. // go to the line previous to the current one
  322. fseek(files[currentFile].f, files[currentFile].lex_pos_previousLine, SEEK_SET);
  323. // print the previous and current line (on first line only print that)
  324. int extraLine = files[currentFile].currentLineNumber > 1 ? 1 : 0;
  325. for (int i = 1 -extraLine; i < 2; i++) {
  326. char b[500];
  327. fscanf(files[currentFile].f, "%499[^\n]\n", b);
  328. b[499] = '\0';
  329. printf(" %d | %s\n", lexer_getCurrentLinenumber() -1 +i, b);
  330. }
  331. // print an arrow pointing at the current character
  332. char lineNum[20];
  333. sprintf(lineNum, "%d", lexer_getCurrentLinenumber());
  334. lineNum[19] = '\0';
  335. printf(" ");
  336. for (int i = 0; i < files[currentFile].currentCharacterNumber +strlen(lineNum); i++) {
  337. printf(" ");
  338. }
  339. printf("^\n");
  340. // go back to (potentially) resume parsing
  341. fseek(files[currentFile].f, files[currentFile].lex_pos_current, SEEK_SET);
  342. }
  343. void lexer_addIncludedFile(const char *includeFilename) {
  344. // skip files that have been included before
  345. for (int i = 0; i <= currentPastFile; i++) {
  346. if (strcmp(pastFiles[i], includeFilename) == 0) {
  347. return;
  348. }
  349. }
  350. // temp limit of 100 characters per filename
  351. if (strlen(includeFilename) >= 100) {
  352. printf("cannot include %s: too long name\n", includeFilename);
  353. exit(-1);
  354. }
  355. // save included file so it's not included again
  356. currentPastFile++;
  357. strcpy(pastFiles[currentPastFile], includeFilename);
  358. if (includePath) {
  359. strcpy(buffer, includePath);
  360. strcat(buffer, includeFilename);
  361. }
  362. else {
  363. strcpy(buffer, includeFilename);
  364. }
  365. if (currentFile+1 >= 10) {
  366. printf("lexer: reached limit of included files with: '%s'\n", buffer);
  367. exit(-1);
  368. }
  369. currentFile++;
  370. strcpy(files[currentFile].filename, buffer);
  371. files[currentFile].currentLineNumber = 1;
  372. files[currentFile].currentCharacterNumber = 0;
  373. files[currentFile].f = fopen(buffer, "r");
  374. if (!files[currentFile].f) {
  375. printf("avdl error: Unable to open '%s': %s\n", buffer, strerror(errno));
  376. exit(-1);
  377. }
  378. files[currentFile].lex_pos_previousLine =
  379. files[currentFile].lex_pos_currentLine =
  380. files[currentFile].lex_pos_current =
  381. ftell(files[currentFile].f);
  382. }