gml_scanner.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /*
  2. * Copyright 2021
  3. * (C) Universitaet Passau 1986-1991
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. * These are the four essential freedoms with GNU GPL software:
  19. * 1: freedom to run the program, for any purpose
  20. * 2: freedom to study how the program works, and change it to make it do what you wish
  21. * 3: freedom to redistribute copies to help your Free Software friends
  22. * 4: freedom to distribute copies of your modified versions to your Free Software friends
  23. * , ,
  24. * / \
  25. * ((__-^^-,-^^-__))
  26. * `-_---' `---_-'
  27. * `--|o` 'o|--'
  28. * \ ` /
  29. * ): :(
  30. * :o_o:
  31. * "-"
  32. *
  33. * SPDX-License-Identifier: GPL-3.0+
  34. * License-Filename: LICENSE
  35. */
  36. /*
  37. * Scanner for the GML - file format
  38. * this file has 1 memory leak
  39. */
  40. #include "config.h"
  41. #include <stdio.h>
  42. #include <stdlib.h>
  43. #include <ctype.h>
  44. #include <assert.h>
  45. #include <string.h>
  46. #include <zlib.h>
  47. #include "main.h"
  48. #include "gml_scanner.h"
  49. #include "gml_parser.h"
  50. /*
  51. * ISO8859-1 coding of chars >= 160
  52. */
  53. char *GML_table[] = {
  54. "&nbsp;", /* 160 */
  55. "&iexcl;",
  56. "&cent;",
  57. "&pound;",
  58. "&curren;",
  59. "&yen;",
  60. "&brvbar;",
  61. "&sect;",
  62. "&uml;",
  63. "&copy;",
  64. "&ordf;", /* 170 */
  65. "&laquo;",
  66. "&not;",
  67. "&shy;",
  68. "&reg;",
  69. "&macr;",
  70. "&deg;",
  71. "&plusmn;",
  72. "&sup2;",
  73. "&sup3;", /* 180 */
  74. "&acute;",
  75. "&micro;",
  76. "&para;",
  77. "&middot;",
  78. "&cedil;",
  79. "&sup1;",
  80. "&ordm;",
  81. "&raquo;",
  82. "&frac14;",
  83. "&frac12;",
  84. "&frac34;", /* 190 */
  85. "&iquest;",
  86. "&Agrave;",
  87. "&Aacute;",
  88. "&Acirc;",
  89. "&Atilde;",
  90. "&Auml;",
  91. "&Aring;",
  92. "&AElig;",
  93. "&Ccedil;",
  94. "&Egrave;", /* 200 */
  95. "&Eacute;",
  96. "&Ecirc;",
  97. "&Euml;",
  98. "&Igrave;",
  99. "&Iacute;",
  100. "&Icirc;",
  101. "&Iuml;",
  102. "&ETH;",
  103. "&Ntilde;",
  104. "&Ograve;", /* 210 */
  105. "&Oacute;",
  106. "&Ocirc;",
  107. "&Otilde;",
  108. "&Ouml;",
  109. "&times;",
  110. "&Oslash;",
  111. "&Ugrave;",
  112. "&Uacute;",
  113. "&Ucirc;",
  114. "&Uuml;", /* 220 */
  115. "&Yacute;",
  116. "&THORN;",
  117. "&szlig;",
  118. "&agrave;",
  119. "&aacute;",
  120. "&acirc;",
  121. "&atilde;",
  122. "&auml;",
  123. "&aring;",
  124. "&aelig;", /* 230 */
  125. "&ccedil;",
  126. "&egrave;",
  127. "&eacute;",
  128. "&ecirc;",
  129. "&euml;",
  130. "&igrave;",
  131. "&iacute;",
  132. "&icirc;",
  133. "&iuml;",
  134. "&eth;", /* 240 */
  135. "&ntilde;",
  136. "&ograve;",
  137. "&oacute;",
  138. "&ocirc;",
  139. "&otilde;",
  140. "&ouml;",
  141. "&divide;",
  142. "&oslash;",
  143. "&ugrave;",
  144. "&uacute;", /* 250 */
  145. "&ucirc;",
  146. "&uuml;",
  147. "&yacute;",
  148. "&thorn;",
  149. "&yuml;"
  150. };
  151. unsigned int GML_line = 0;
  152. unsigned int GML_column = 0;
  153. int GML_search_ISO(char *str, int len)
  154. {
  155. int i = 0;
  156. int ret = '&';
  157. if (strncmp(str, "&quot;", (size_t)len) == 0) {
  158. return 34;
  159. } else if (strncmp(str, "&amp;", (size_t)len) == 0) {
  160. return 38;
  161. } else if (strncmp(str, "&lt;", (size_t)len) == 0) {
  162. return 60;
  163. } else if (strncmp(str, "&gt;", (size_t)len) == 0) {
  164. return 62;
  165. }
  166. for (i = 0; i < 96; i++) {
  167. if (strncmp(str, GML_table[i], (size_t)len) == 0) {
  168. ret = i + 160;
  169. break;
  170. }
  171. }
  172. return ret;
  173. }
  174. void GML_init(void)
  175. {
  176. GML_line = 1;
  177. GML_column = 1;
  178. return;
  179. }
  180. struct GML_token GML_scanner(gzFile source)
  181. {
  182. unsigned int cur_max_size = INITIAL_SIZE;
  183. static char buffer[INITIAL_SIZE];
  184. char *tmp = buffer;
  185. char *ret = tmp;
  186. struct GML_token token;
  187. int is_float = 0;
  188. unsigned int count = 0;
  189. int next;
  190. char ISO_buffer[8];
  191. int ISO_count;
  192. assert(source != NULL);
  193. /*
  194. * eliminate preceeding white spaces
  195. */
  196. do {
  197. next = gzgetc(source);
  198. GML_column++;
  199. if (next == '\n') {
  200. GML_line++;
  201. GML_column = 1;
  202. }
  203. }
  204. while (isspace(next) && next != EOF);
  205. if (next == EOF) {
  206. /*
  207. * reached EOF
  208. */
  209. token.kind = GML_END;
  210. return token;
  211. } else if (isdigit(next) || next == '.' || next == '+' || next == '-') {
  212. /*
  213. * floating point or integer
  214. */
  215. do {
  216. if (count == INITIAL_SIZE - 1) {
  217. token.value.err.err_num = GML_TOO_MANY_DIGITS;
  218. token.value.err.line = GML_line;
  219. token.value.err.column = GML_column + count;
  220. token.kind = GML_ERROR;
  221. return token;
  222. }
  223. if (next == '.' || next == 'E') {
  224. is_float = 1;
  225. }
  226. buffer[count] = next;
  227. count++;
  228. next = gzgetc(source);
  229. }
  230. while (!isspace(next) && next != ']' && next != EOF);
  231. buffer[count] = 0;
  232. if (next == ']') {
  233. gzungetc(next, source);
  234. }
  235. if (next == '\n') {
  236. GML_line++;
  237. GML_column = 1;
  238. } else {
  239. GML_column += count;
  240. }
  241. if (is_float) {
  242. token.value.floating = atof(tmp);
  243. token.kind = GML_DOUBLE;
  244. } else {
  245. token.value.integer = atol(tmp);
  246. token.kind = GML_INT;
  247. }
  248. return token;
  249. } else if (isalpha(next) || next == '_') {
  250. /*
  251. * key
  252. */
  253. do {
  254. if (count == cur_max_size - 1) {
  255. *tmp = 0;
  256. tmp = (char *)gmlparser_calloc((size_t)1, (2 * cur_max_size * sizeof(char)));
  257. strcpy(tmp, ret);
  258. if (cur_max_size > INITIAL_SIZE) {
  259. gmlparser_free(ret);
  260. }
  261. ret = tmp;
  262. tmp += count;
  263. cur_max_size *= 2;
  264. }
  265. *tmp++ = next;
  266. count++;
  267. next = gzgetc(source);
  268. }
  269. while (isalnum(next) || next == '_');
  270. if (next == '\n') {
  271. GML_line++;
  272. GML_column = 1;
  273. } else {
  274. GML_column += count;
  275. }
  276. if (next == '[') {
  277. gzungetc(next, source);
  278. } else if (!isspace(next)) {
  279. token.value.err.err_num = GML_UNEXPECTED;
  280. token.value.err.line = GML_line;
  281. token.value.err.column = GML_column + count;
  282. token.kind = GML_ERROR;
  283. if (cur_max_size > INITIAL_SIZE) {
  284. gmlparser_free(ret);
  285. }
  286. return token;
  287. }
  288. *tmp = 0;
  289. token.kind = GML_KEY;
  290. /* #warning "memleak here" */
  291. token.value.string = (char *)gmlparser_calloc((size_t)1, ((count + 1) * sizeof(char)));
  292. strcpy(token.value.string, ret);
  293. if (cur_max_size > INITIAL_SIZE) {
  294. gmlparser_free(ret);
  295. }
  296. return token;
  297. } else {
  298. /*
  299. * comments, brackets and strings
  300. */
  301. switch (next) {
  302. case '#':
  303. do {
  304. next = gzgetc(source);
  305. }
  306. while (next != '\n' && next != EOF);
  307. GML_line++;
  308. GML_column = 1;
  309. return GML_scanner(source);
  310. case '[':
  311. token.kind = GML_L_BRACKET;
  312. return token;
  313. case ']':
  314. token.kind = GML_R_BRACKET;
  315. return token;
  316. case '"':
  317. next = gzgetc(source);
  318. GML_column++;
  319. while (next != '"') {
  320. if (count >= cur_max_size - 8) {
  321. *tmp = 0;
  322. tmp = (char *)gmlparser_calloc((size_t)1, (2 * cur_max_size * sizeof(char)));
  323. strcpy(tmp, ret);
  324. if (cur_max_size > INITIAL_SIZE) {
  325. gmlparser_free(ret);
  326. }
  327. ret = tmp;
  328. tmp += count;
  329. cur_max_size *= 2;
  330. }
  331. if (next == '&') {
  332. ISO_count = 0;
  333. while (next != ';') {
  334. if (next == '"' || next == EOF) {
  335. gzungetc(next, source);
  336. ISO_count = 0;
  337. break;
  338. }
  339. if (ISO_count < 8) {
  340. ISO_buffer[ISO_count] = next;
  341. ISO_count++;
  342. }
  343. next = gzgetc(source);
  344. }
  345. if (ISO_count == 8) {
  346. ISO_count = 0;
  347. }
  348. if (ISO_count) {
  349. ISO_buffer[ISO_count] = ';';
  350. ISO_count++;
  351. next = GML_search_ISO(ISO_buffer, ISO_count);
  352. ISO_count = 0;
  353. if (ISO_count) {
  354. }
  355. } else {
  356. next = '&';
  357. }
  358. }
  359. *tmp++ = next;
  360. count++;
  361. GML_column++;
  362. next = gzgetc(source);
  363. if (next == EOF) {
  364. token.value.err.err_num = GML_PREMATURE_EOF;
  365. token.value.err.line = GML_line;
  366. token.value.err.column = GML_column + count;
  367. token.kind = GML_ERROR;
  368. if (cur_max_size > INITIAL_SIZE) {
  369. gmlparser_free(ret);
  370. }
  371. return token;
  372. }
  373. if (next == '\n') {
  374. GML_line++;
  375. GML_column = 1;
  376. }
  377. }
  378. *tmp = 0;
  379. token.kind = GML_STRING;
  380. token.value.string = (char *)gmlparser_calloc((size_t)1, ((count + 1) * sizeof(char)));
  381. strcpy(token.value.string, ret);
  382. if (cur_max_size > INITIAL_SIZE) {
  383. gmlparser_free(ret);
  384. }
  385. return token;
  386. default:
  387. token.value.err.err_num = GML_UNEXPECTED;
  388. token.value.err.line = GML_line;
  389. token.value.err.column = GML_column;
  390. token.kind = GML_ERROR;
  391. return token;
  392. }
  393. }
  394. /* shouldnothappen */
  395. token.value.err.err_num = GML_UNEXPECTED;
  396. token.value.err.line = GML_line;
  397. token.value.err.column = GML_column;
  398. token.kind = GML_ERROR;
  399. return token;
  400. }
  401. /* end */