123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479 |
- /*
- * Copyright 2021
- * (C) Universitaet Passau 1986-1991
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * These are the four essential freedoms with GNU GPL software:
- * 1: freedom to run the program, for any purpose
- * 2: freedom to study how the program works, and change it to make it do what you wish
- * 3: freedom to redistribute copies to help your Free Software friends
- * 4: freedom to distribute copies of your modified versions to your Free Software friends
- * , ,
- * / \
- * ((__-^^-,-^^-__))
- * `-_---' `---_-'
- * `--|o` 'o|--'
- * \ ` /
- * ): :(
- * :o_o:
- * "-"
- *
- * SPDX-License-Identifier: GPL-3.0+
- * License-Filename: LICENSE
- */
- /*
- * Scanner for the GML - file format
- * this file has 1 memory leak
- */
- #include "config.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include <assert.h>
- #include <string.h>
- #include <zlib.h>
- #include "main.h"
- #include "gml_scanner.h"
- #include "gml_parser.h"
- /*
- * ISO8859-1 coding of chars >= 160
- */
- char *GML_table[] = {
- " ", /* 160 */
- "¡",
- "¢",
- "£",
- "¤",
- "¥",
- "¦",
- "§",
- "¨",
- "©",
- "ª", /* 170 */
- "«",
- "¬",
- "­",
- "®",
- "¯",
- "°",
- "±",
- "²",
- "³", /* 180 */
- "´",
- "µ",
- "¶",
- "·",
- "¸",
- "¹",
- "º",
- "»",
- "¼",
- "½",
- "¾", /* 190 */
- "¿",
- "À",
- "Á",
- "Â",
- "Ã",
- "Ä",
- "Å",
- "Æ",
- "Ç",
- "È", /* 200 */
- "É",
- "Ê",
- "Ë",
- "Ì",
- "Í",
- "Î",
- "Ï",
- "Ð",
- "Ñ",
- "Ò", /* 210 */
- "Ó",
- "Ô",
- "Õ",
- "Ö",
- "×",
- "Ø",
- "Ù",
- "Ú",
- "Û",
- "Ü", /* 220 */
- "Ý",
- "Þ",
- "ß",
- "à",
- "á",
- "â",
- "ã",
- "ä",
- "å",
- "æ", /* 230 */
- "ç",
- "è",
- "é",
- "ê",
- "ë",
- "ì",
- "í",
- "î",
- "ï",
- "ð", /* 240 */
- "ñ",
- "ò",
- "ó",
- "ô",
- "õ",
- "ö",
- "÷",
- "ø",
- "ù",
- "ú", /* 250 */
- "û",
- "ü",
- "ý",
- "þ",
- "ÿ"
- };
- unsigned int GML_line = 0;
- unsigned int GML_column = 0;
- int GML_search_ISO(char *str, int len)
- {
- int i = 0;
- int ret = '&';
- if (strncmp(str, """, (size_t)len) == 0) {
- return 34;
- } else if (strncmp(str, "&", (size_t)len) == 0) {
- return 38;
- } else if (strncmp(str, "<", (size_t)len) == 0) {
- return 60;
- } else if (strncmp(str, ">", (size_t)len) == 0) {
- return 62;
- }
- for (i = 0; i < 96; i++) {
- if (strncmp(str, GML_table[i], (size_t)len) == 0) {
- ret = i + 160;
- break;
- }
- }
- return ret;
- }
- void GML_init(void)
- {
- GML_line = 1;
- GML_column = 1;
- return;
- }
- struct GML_token GML_scanner(gzFile source)
- {
- unsigned int cur_max_size = INITIAL_SIZE;
- static char buffer[INITIAL_SIZE];
- char *tmp = buffer;
- char *ret = tmp;
- struct GML_token token;
- int is_float = 0;
- unsigned int count = 0;
- int next;
- char ISO_buffer[8];
- int ISO_count;
- assert(source != NULL);
- /*
- * eliminate preceeding white spaces
- */
- do {
- next = gzgetc(source);
- GML_column++;
- if (next == '\n') {
- GML_line++;
- GML_column = 1;
- }
- }
- while (isspace(next) && next != EOF);
- if (next == EOF) {
- /*
- * reached EOF
- */
- token.kind = GML_END;
- return token;
- } else if (isdigit(next) || next == '.' || next == '+' || next == '-') {
- /*
- * floating point or integer
- */
- do {
- if (count == INITIAL_SIZE - 1) {
- token.value.err.err_num = GML_TOO_MANY_DIGITS;
- token.value.err.line = GML_line;
- token.value.err.column = GML_column + count;
- token.kind = GML_ERROR;
- return token;
- }
- if (next == '.' || next == 'E') {
- is_float = 1;
- }
- buffer[count] = next;
- count++;
- next = gzgetc(source);
- }
- while (!isspace(next) && next != ']' && next != EOF);
- buffer[count] = 0;
- if (next == ']') {
- gzungetc(next, source);
- }
- if (next == '\n') {
- GML_line++;
- GML_column = 1;
- } else {
- GML_column += count;
- }
- if (is_float) {
- token.value.floating = atof(tmp);
- token.kind = GML_DOUBLE;
- } else {
- token.value.integer = atol(tmp);
- token.kind = GML_INT;
- }
- return token;
- } else if (isalpha(next) || next == '_') {
- /*
- * key
- */
- do {
- if (count == cur_max_size - 1) {
- *tmp = 0;
- tmp = (char *)gmlparser_calloc((size_t)1, (2 * cur_max_size * sizeof(char)));
- strcpy(tmp, ret);
- if (cur_max_size > INITIAL_SIZE) {
- gmlparser_free(ret);
- }
- ret = tmp;
- tmp += count;
- cur_max_size *= 2;
- }
- *tmp++ = next;
- count++;
- next = gzgetc(source);
- }
- while (isalnum(next) || next == '_');
- if (next == '\n') {
- GML_line++;
- GML_column = 1;
- } else {
- GML_column += count;
- }
- if (next == '[') {
- gzungetc(next, source);
- } else if (!isspace(next)) {
- token.value.err.err_num = GML_UNEXPECTED;
- token.value.err.line = GML_line;
- token.value.err.column = GML_column + count;
- token.kind = GML_ERROR;
- if (cur_max_size > INITIAL_SIZE) {
- gmlparser_free(ret);
- }
- return token;
- }
- *tmp = 0;
- token.kind = GML_KEY;
- /* #warning "memleak here" */
- token.value.string = (char *)gmlparser_calloc((size_t)1, ((count + 1) * sizeof(char)));
- strcpy(token.value.string, ret);
- if (cur_max_size > INITIAL_SIZE) {
- gmlparser_free(ret);
- }
- return token;
- } else {
- /*
- * comments, brackets and strings
- */
- switch (next) {
- case '#':
- do {
- next = gzgetc(source);
- }
- while (next != '\n' && next != EOF);
- GML_line++;
- GML_column = 1;
- return GML_scanner(source);
- case '[':
- token.kind = GML_L_BRACKET;
- return token;
- case ']':
- token.kind = GML_R_BRACKET;
- return token;
- case '"':
- next = gzgetc(source);
- GML_column++;
- while (next != '"') {
- if (count >= cur_max_size - 8) {
- *tmp = 0;
- tmp = (char *)gmlparser_calloc((size_t)1, (2 * cur_max_size * sizeof(char)));
- strcpy(tmp, ret);
- if (cur_max_size > INITIAL_SIZE) {
- gmlparser_free(ret);
- }
- ret = tmp;
- tmp += count;
- cur_max_size *= 2;
- }
- if (next == '&') {
- ISO_count = 0;
- while (next != ';') {
- if (next == '"' || next == EOF) {
- gzungetc(next, source);
- ISO_count = 0;
- break;
- }
- if (ISO_count < 8) {
- ISO_buffer[ISO_count] = next;
- ISO_count++;
- }
- next = gzgetc(source);
- }
- if (ISO_count == 8) {
- ISO_count = 0;
- }
- if (ISO_count) {
- ISO_buffer[ISO_count] = ';';
- ISO_count++;
- next = GML_search_ISO(ISO_buffer, ISO_count);
- ISO_count = 0;
- if (ISO_count) {
- }
- } else {
- next = '&';
- }
- }
- *tmp++ = next;
- count++;
- GML_column++;
- next = gzgetc(source);
- if (next == EOF) {
- token.value.err.err_num = GML_PREMATURE_EOF;
- token.value.err.line = GML_line;
- token.value.err.column = GML_column + count;
- token.kind = GML_ERROR;
- if (cur_max_size > INITIAL_SIZE) {
- gmlparser_free(ret);
- }
- return token;
- }
- if (next == '\n') {
- GML_line++;
- GML_column = 1;
- }
- }
- *tmp = 0;
- token.kind = GML_STRING;
- token.value.string = (char *)gmlparser_calloc((size_t)1, ((count + 1) * sizeof(char)));
- strcpy(token.value.string, ret);
- if (cur_max_size > INITIAL_SIZE) {
- gmlparser_free(ret);
- }
- return token;
- default:
- token.value.err.err_num = GML_UNEXPECTED;
- token.value.err.line = GML_line;
- token.value.err.column = GML_column;
- token.kind = GML_ERROR;
- return token;
- }
- }
- /* shouldnothappen */
- token.value.err.err_num = GML_UNEXPECTED;
- token.value.err.line = GML_line;
- token.value.err.column = GML_column;
- token.kind = GML_ERROR;
- return token;
- }
- /* end */
|