123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789 |
- /*
- * Copyright 2021
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * SPDX-License-Identifier: GPL-3.0+
- * License-Filename: LICENSE
- */
- /* dphl.* is about Dot Parsing Html Labels */
- /*
- * <br /> is oke but < br/> not or <br/ > is not oke
- * <br/> is lexed as token "<br" and token "/>"
- */
- /* gcc-11 analyzer has several problems with the flex-2.4.6 generated lexers, even with own customized skeleton */
- /* at malloc error lexer will leak memory and do exit() */
- %{
- #include "config.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <strings.h>
- #include <zlib.h>
- #include "splay-tree.h"
- #include "lex.yy.h"
- #include "dot.tab.h"
- #include "dp.h"
- #include "dpus.h"
- #include "dphlparser.h"
- #include "dphl.h"
- #include "dpmem.h"
- /* use GNU GCC compiler builtin strlen */
- #undef YY_NEED_STRLEN
- /* temp buffer */
- static char *tmp = NULL;
- static char *pt = NULL;
- static char *q = NULL;
- /* ws */
- static int skipws = 1;
- /* own yyalloc
- * void *yyalloc (size_t n) { return(calloc(1,n)); }
- * void yyfree (void *ptr) { if(ptr) { free (ptr); } return; }
- * void *yyrealloc (void *ptr, size_t n) { return (realloc (ptr,n)); }
- */
- %}
- /* use own yyalloc
- * %option noyyalloc
- * %option noyyfree
- * %option noyyrealloc
- */
- /* no yywrap() at end-of-file */
- %option noyywrap
- /* do not push back chars in stream function() */
- %option nounput
- /* no input() function */
- %option noinput
- /* input does not come from a tty. */
- %option never-interactive
- /* no yywrap() at end of file */
- %option noyywrap
- /* 8-bits scanner */
- %option 8bit
- /* use nameprefix for routines */
- %option prefix="hl"
- /* add debug output */
- %option debug
- /* use clib to read data */
- %option noread
- /* line no */
- %option yylineno
- /* "string" as in var="value" */
- ISTR [^\\\"]|\\.|\\\n
- STR \"({ISTR}*)\"
- /* 'string' as in var='value' */
- ISTRA [^\\\']|\\.|\\\n
- STRA \'({ISTRA}*)\'
- /* chars in a <var> */
- chars [a-zA-Z]*
- /* chars in a tag with a '-' for point-size */
- charsattr [a-zA-Z-]*
- nl (\r\n|\r|\n)
- open {nl}?"<"
- close ">"{nl}?
- comment {open}"!--"([^-]|"-"[^-])*"--"{close}
- %%
- {comment} { /* skip comment and it does not allow <> chars in it or nested comments */ }
- [\f ]+ { /* skip form feed chars and spaces */ }
- [\t] { /* skip tabs */ }
- [\n] { /* skip new line */ /* lexer does update yylineno */ }
- [\r] { /* skip carriage return */ }
- "<"{chars} {
- hlylval.string = dp_uniqstr (yytext);
- /* start of tag */
- if (strcasecmp (yytext, "<b") == 0) {
- skipws = 0;
- return (HL_B);
- } else if (strcasecmp (yytext, "<br") == 0) {
- skipws = 0;
- dphl_rbr ();
- return (HL_BR);
- } else if (strcasecmp (yytext, "<font") == 0) {
- skipws = 0;
- return (HL_FONT);
- } else if (strcasecmp (yytext, "<hr") == 0) {
- dphl_rhr ();
- return (HL_HR);
- } else if (strcasecmp (yytext, "<html") == 0) {
- return (HL_HTML);
- } else if (strcasecmp (yytext, "<i") == 0) {
- skipws = 0;
- return (HL_I);
- } else if (strcasecmp (yytext, "<img") == 0) {
- return (HL_IMG);
- } else if (strcasecmp (yytext, "<o") == 0) {
- skipws = 0;
- return (HL_O);
- } else if (strcasecmp (yytext, "<s") == 0) {
- skipws = 0;
- return (HL_S);
- } else if (strcasecmp (yytext, "<sub") == 0) {
- skipws = 0;
- return (HL_SUB);
- } else if (strcasecmp (yytext, "<sup") == 0) {
- skipws = 0;
- return (HL_SUP);
- } else if (strcasecmp (yytext, "<table") == 0) {
- return (HL_TABLE);
- } else if (strcasecmp (yytext, "<th") == 0) {
- /* undocumented that "th" is same as "td" */
- return (HL_TD);
- } else if (strcasecmp (yytext, "<td") == 0) {
- return (HL_TD);
- } else if (strcasecmp (yytext, "<tr") == 0) {
- return (HL_TR);
- } else if (strcasecmp (yytext, "<u") == 0) {
- skipws = 0;
- return (HL_U);
- } else if (strcasecmp (yytext, "<vr") == 0) {
- dphl_rvr ();
- return (HL_VR);
- } else {
- /* this is a unknown tag at start */
- memset(dp_errmsg, 0, 256);
- snprintf(dp_errmsg, (256 - 1), "%s(): unknown start tag `%s' in html string at line %d\n", __func__, yytext, hllineno);
- return (EOF);
- }
- }
- "</"{chars} {
- hlylval.string = dp_uniqstr (yytext);
- /* end tag */
- if (strcasecmp (yytext, "</b") == 0) {
- return (HL_C_B);
- } else if (strcasecmp (yytext, "</br") == 0) {
- /* undocumented that <br></br> is same as <br/> */
- return (HL_C_BR);
- } else if (strcasecmp (yytext, "</font") == 0) {
- return (HL_C_FONT);
- } else if (strcasecmp (yytext, "</html") == 0) {
- return (HL_C_HTML);
- } else if (strcasecmp (yytext, "</hr") == 0) {
- /* it seems <hr></hr> could be same as <hr/> */
- return (HL_C_HR);
- } else if (strcasecmp (yytext, "</i") == 0) {
- return (HL_C_I);
- } else if (strcasecmp (yytext, "</img") == 0) {
- /* it seems <img></img> couble be same ad <img/> */
- return (HL_C_IMG);
- } else if (strcasecmp (yytext, "</o") == 0) {
- return (HL_C_O);
- } else if (strcasecmp (yytext, "</s") == 0) {
- return (HL_C_S);
- } else if (strcasecmp (yytext, "</sub") == 0) {
- return (HL_C_SUB);
- } else if (strcasecmp (yytext, "</sup") == 0) {
- return (HL_C_SUP);
- } else if (strcasecmp (yytext, "</table") == 0) {
- return (HL_C_TABLE);
- } else if (strcasecmp (yytext, "</th") == 0) {
- /* undocumented that "th" is same as "td" */
- return (HL_C_TD);
- } else if (strcasecmp (yytext, "</td") == 0) {
- return (HL_C_TD);
- } else if (strcasecmp (yytext, "</tr") == 0) {
- return (HL_C_TR);
- } else if (strcasecmp (yytext, "</u") == 0) {
- return (HL_C_U);
- } else if (strcasecmp (yytext, "</vr") == 0) {
- /* it seems <vr></vr> could be same as <vr/> */
- return (HL_C_VR);
- } else {
- /* this is a unknown end tag */
- memset(dp_errmsg, 0, 256);
- snprintf(dp_errmsg, (256 - 1), "%s(): unknown end tag `%s' in html string at line %d\n", __func__, yytext, hllineno);
- return (EOF);
- }
- }
- "/>" {
- /* end tag for br and hr, vr and img */
- hlylval.string = dp_uniqstr (yytext);
- return (HL_SC);
- }
- ">" {
- /* end tag for others */
- hlylval.string = dp_uniqstr (yytext);
- return (HL_C);
- }
- "=" {
- /* as in var=value */
- hlylval.string = dp_uniqstr (yytext);
- return (HL_IS);
- }
- {STR} {
- /* "string" as in var="value" */
- if (strlen (yytext) == 2) {
- hlylval.string = dp_uniqstr ((char * )"");
- return (HL_QSTR);
- }
- /* create copy buffer */
- tmp = (char *) dp_calloc (1, (yyleng+1));
- /* wipe last double quote */
- yytext[yyleng-1] = 0;
- /* copy past first char */
- pt = yytext;
- pt++;
- q = tmp;
- while (*pt)
- {
- if (*pt == '\\') {
- if (*(pt+1) == 0) {
- *q = '\\';
- q++;
- pt++;
- } else if (*(pt+1) == '\n') {
- /* skip \\n */
- pt = pt + 2;
- } else if (*(pt+1) == '"') {
- /* \" becomes " */
- *q = '"';
- q++;
- pt = pt + 2;
- } else if (*(pt+1) == '\\') {
- /* \\ becomes \ */
- *q = '\\';
- q++;
- pt = pt + 2;
- } else {
- *q = '\\';
- q++;
- *q = *(pt+1);
- q++;
- pt = pt + 2;
- }
- } else {
- /* copy regular chars */
- *q = *pt;
- pt++;
- q++;
- }
- }
- hlylval.string = dp_uniqstr (tmp);
- tmp = (char *) dp_free ((void *) tmp);
- pt = NULL;
- q = NULL;
- return (HL_QSTR);
- }
- {STRA} {
- /* 'string' as in var='value' */
- if (strlen (yytext) == 2) {
- hlylval.string = dp_uniqstr ((char * )"");
- return (HL_QSTR);
- }
- /* create copy buffer */
- tmp = (char *) dp_calloc (1, (yyleng+1));
- /* wipe last quote */
- yytext[yyleng-1] = 0;
- /* copy past first char */
- pt = yytext;
- pt++;
- q = tmp;
- while (*pt)
- {
- if (*pt == '\\') {
- if (*(pt+1) == 0) {
- *q = '\\';
- q++;
- pt++;
- } else if (*(pt+1) == '\n') {
- /* skip \\n */
- pt = pt + 2;
- } else if (*(pt+1) == '\'') {
- /* \' becomes ' */
- *q = '\'';
- q++;
- pt = pt + 2;
- } else if (*(pt+1) == '\\') {
- /* \\ becomes \ */
- *q = '\\';
- q++;
- pt = pt + 2;
- } else {
- *q = '\\';
- q++;
- *q = *(pt+1);
- q++;
- pt = pt + 2;
- }
- } else {
- /* copy regular chars */
- *q = *pt;
- pt++;
- q++;
- }
- }
- hlylval.string = dp_uniqstr (tmp);
- tmp = (char *) dp_free ((void *) tmp);
- pt = NULL;
- q = NULL;
- return (HL_QSTR);
- }
- {charsattr} {
- /* var */
- hlylval.string = dp_uniqstr (yytext);
- switch (*yytext)
- {
- case 'a':
- case 'A':
- if (strcasecmp (yytext, "align") == 0) {
- return (HL_ALIGN);
- }
- break;
- case 'b':
- case 'B':
- if (strcasecmp (yytext, "balign") == 0) {
- return (HL_BALIGN);
- } else if (strcasecmp (yytext, "bgcolor") == 0) {
- return (HL_BGCOLOR);
- } else if (strcasecmp (yytext, "border") == 0) {
- return (HL_BORDER);
- } else {
- /* unknown */
- }
- break;
- case 'c':
- case 'C':
- if (strcasecmp (yytext, "cellborder") == 0) {
- return (HL_CELLBORDER);
- } else if (strcasecmp (yytext, "cellpadding") == 0) {
- return (HL_CELLPADDING);
- } else if (strcasecmp (yytext, "cellspacing") == 0) {
- return (HL_CELLSPACING);
- } else if (strcasecmp (yytext, "color") == 0) {
- return (HL_COLOR);
- } else if (strcasecmp (yytext, "columns") == 0) {
- return (HL_COLUMNS);
- } else if (strcasecmp (yytext, "colspan") == 0) {
- return (HL_COLSPAN);
- } else {
- /* unknown */
- }
- break;
- case 'f':
- case 'F':
- if (strcasecmp (yytext, "face") == 0) {
- return (HL_FACE);
- } else if (strcasecmp (yytext, "fixedsize") == 0) {
- return (HL_FIXEDSIZE);
- } else {
- /* unknown */
- }
- break;
- case 'g':
- case 'G':
- if (strcasecmp (yytext, "gradientangle") == 0) {
- return (HL_GRADIENTANGLE);
- }
- break;
- case 'h':
- case 'H':
- if (strcasecmp (yytext, "height") == 0) {
- return (HL_HEIGHT);
- } else if (strcasecmp (yytext, "href") == 0) {
- return (HL_HREF);
- } else {
- /* unknown */
- }
- break;
- case 'i':
- case 'I':
- if (strcasecmp (yytext, "id") == 0) {
- return (HL_ID);
- }
- break;
- case 'p':
- case 'P':
- if (strcasecmp (yytext, "point-size") == 0) {
- return (HL_POINTSIZE);
- } else if (strcasecmp (yytext, "pointsize") == 0) {
- return (HL_POINTSIZE);
- } else if (strcasecmp (yytext, "port") == 0) {
- return (HL_PORT);
- } else {
- /* unknown */
- }
- break;
- case 'r':
- case 'R':
- if (strcasecmp (yytext, "rows") == 0) {
- return (HL_ROWS);
- } else if (strcasecmp (yytext, "rowspan") == 0) {
- return (HL_ROWSPAN);
- } else {
- /* unknown */
- }
- break;
- case 's':
- case 'S':
- if (strcasecmp (yytext, "scale") == 0) {
- return (HL_SCALE);
- } else if (strcasecmp (yytext, "sides") == 0) {
- return (HL_SIDES);
- } else if (strcasecmp (yytext, "src") == 0) {
- return (HL_SRC);
- } else if (strcasecmp (yytext, "style") == 0) {
- return (HL_STYLE);
- } else {
- /* unknown */
- }
- break;
- case 't':
- case 'T':
- if (strcasecmp (yytext, "target") == 0) {
- return (HL_TARGET);
- } else if (strcasecmp (yytext, "title") == 0) {
- return (HL_TITLE);
- } else if (strcasecmp (yytext, "tooltip") == 0) {
- return (HL_TOOLTIP);
- } else {
- /* unknown */
- }
- break;
- case 'v':
- case 'V':
- if (strcasecmp (yytext, "valign") == 0) {
- return (HL_VALIGN);
- }
- break;
- case 'w':
- case 'W':
- if (strcasecmp (yytext, "width") == 0) {
- return (HL_WIDTH);
- }
- break;
- default:
- /* unknown */
- break;
- }
- /* unknown */
- return (HL_STR);
- }
- <<EOF>> {
- /* end of buffer */
- return (EOF);
- }
- . {
- /* something unknown causes parse error */
- return ((int)yytext[0]);
- }
- %%
- /* lex buffer */
- static YY_BUFFER_STATE buffer = NULL;
- /* clear buffer */
- static int clearit = 0;
- /* lex buffer pending */
- static int pending = 0;
- /* lex buffer length */
- static int buflen = 0;
- /* data buffer */
- static char *dbuf = (char *)0;
- /* tag buffer */
- static char *tbuf = (char *)0;
- /* input buffer */
- static char *ibuf = (char *)0;
- /* pointer in ibuf */
- static char *bufptr = (char *)0;
- /* init
- * the string has at least 1 char as in "< >"
- */
- void html_lex_init (int dbg, char *str, int line)
- {
- if (dbg || 0) {
- hl_flex_debug = 1;
- hlydebug = 1;
- } else {
- hl_flex_debug = 0;
- hlydebug = 0;
- }
- /* set lineno to where the node is defind */
- hllineno = line;
- /* extra space for \0\0 */
- buflen = strlen (str) + 2;
- ibuf = (char *) dp_calloc (1, buflen);
- if (ibuf == NULL) { return; }
- /* copy, skip first < */
- strcpy (ibuf, (str+1));
- /* wipe last > */
- ibuf[strlen(ibuf)-1] = 0;
- /* set where to start lexing */
- bufptr = ibuf;
- /* skip leading ws todo XXX is this always correct? */
- while (*bufptr)
- {
- if (*bufptr == ' ') {
- } else if (*bufptr == '\n') {
- hllineno++;
- } else if (*bufptr == '\r') {
- } else if (*bufptr == '\f') {
- } else if (*bufptr == '\t') {
- } else {
- break;
- }
- bufptr++;
- }
- skipws = 1;
- /* data buffer */
- dbuf = (char *) dp_calloc (1, buflen);
- if (dbuf == NULL) { return; }
- /* tag buffer */
- tbuf = (char *) dp_calloc (1, buflen);
- if (tbuf == NULL) { return; }
- /* lex buffer pending */
- pending = 0;
- /* clear buffer */
- clearit = 0;
- return;
- }
- /* de-init */
- void html_lex_deinit (void)
- {
- hl_flex_debug = 0;
- /* clear buffer */
- if (clearit || 1) {
- /* this may fix valgrind issue */
- yylex_destroy();
- /* yy_delete_buffer (buffer); */
- buffer = NULL;
- clearit = 0;
- }
- /* lex buffer pending */
- pending = 0;
- /* lex buffer length */
- buflen = 0;
- /* data buffer */
- if (dbuf) {
- dbuf = (char *) dp_free ((void *) dbuf);
- }
- /* tag buffer */
- if (tbuf) {
- tbuf = (char *) dp_free ((void *) tbuf);
- }
- /* input buffer */
- if( ibuf) {
- ibuf = (char *) dp_free ((void *) ibuf);
- }
- /* pointer in ibuf */
- bufptr = (char *)0;
- return;
- }
- /* lex one token */
- int html_lex (void)
- {
- int token = 0;
- int i = 0;
- int j = 0;
- char *p = NULL;
- for (i=0; i < 100 ; i++)
- {
- /* lex buffer pending */
- if (pending == 1) {
- /* lex from buffer */
- token = hllex ();
- /* at end-of-string in tbuf */
- if (token == EOF) {
- /* this may fix valgrind issue */
- yylex_destroy();
- /* yy_delete_buffer (buffer); */
- buffer = NULL;
- clearit = 0;
- pending = 0;
- continue;
- }
- /* at regular token */
- pending = 1;
- /* hlylval.string is set in the actions */
- return (token);
- } else {
- /* refill buffer */
- if (*bufptr == 0) {
- /* no data anymore */
- return (EOF);
- }
- if (skipws) {
- /* skip leading ws */
- while (*bufptr)
- {
- if (*bufptr == ' ') {
- } else if (*bufptr == '\n') {
- hllineno++;
- } else if (*bufptr == '\r') {
- } else if (*bufptr == '\f') {
- } else if (*bufptr == '\t') {
- } else {
- break;
- }
- bufptr++;
- }
- } else {
- skipws = 1;
- }
- if (*bufptr == 0) {
- /* no data anymore */
- return (EOF);
- }
- if (*bufptr == '<') {
- /* fill tag buffer */
- for (j=0; j < buflen ; j++)
- {
- tbuf[j] = 0;
- }
- j = 0;
- while (*bufptr)
- {
- if (*bufptr == '>') {
- tbuf[j] = *bufptr;
- j++;
- bufptr++;
- break;
- }
- tbuf[j] = *bufptr;
- j++;
- bufptr++;
- }
- if (j == 0) {
- return (EOF);
- }
- /* start lexing from buffer */
- buffer = yy_scan_string (tbuf);
- clearit = 1;
- /* lex tag buffer */
- pending = 1;
- continue;
- } else {
- /* fill data buffer */
- for (j=0; j < buflen ; j++)
- {
- dbuf[j] = 0;
- }
- j = 0;
- p = dbuf;
- while (*bufptr)
- {
- if (*bufptr == '<') {
- break;
- }
- /* update yylineno */
- if (*bufptr == '\n') {
- hllineno++;
- }
- p[j] = *bufptr;
- j++;
- bufptr++;
- }
- if (j == 0) {
- return (EOF);
- }
- pending = 0;
- /* return copy of the data area */
- hlylval.string = dp_uniqstr (dbuf);
- if (hlydebug) {
- printf ("token HL_DATA \"%s\"\n",hlylval.string);
- }
- return (HL_DATA);
- }
- }
- }
- /* shouldnothappen */
- printf ("%s(): huh?\n",__func__);
- return (EOF);
- }
- /* end */
|