dot.l 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /*
  2. * Copyright 2021
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. * SPDX-License-Identifier: GPL-3.0+
  18. * License-Filename: LICENSE
  19. */
  20. %{
  21. #include "config.h"
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <zlib.h>
  25. #include "splay-tree.h"
  26. #include "lex.yy.h"
  27. #include "dot.tab.h"
  28. #include "dp.h"
  29. #include "dpus.h"
  30. #include "dpmem.h"
  31. /* use GNU GCC compiler builtin strlen */
  32. #undef YY_NEED_STRLEN
  33. /* increase read buffer */
  34. #undef YY_READ_BUF_SIZE
  35. /* #define YY_READ_BUF_SIZE (16*1024) */
  36. /* fread() with 128kb at once is fastest on Linux */
  37. #define YY_READ_BUF_SIZE (128*1024)
  38. /* Size of default input buffer. Do not tune. */
  39. #undef YY_BUF_SIZE
  40. #define YY_BUF_SIZE 16384
  41. static gzFile gvzin = (gzFile)0;
  42. /* gzfread cannot be used because of older zlib in the dll */
  43. #undef YY_INPUT
  44. #define YY_INPUT(buf,result,max_size) do { \
  45. if ( (result = gzread(gvzin, (char*)buf, (sizeof(char) * max_size) )) == 0) { \
  46. int estatus = 0; \
  47. const char *es = gzerror (gvzin, &estatus); \
  48. if (estatus == Z_BUF_ERROR) { \
  49. YY_FATAL_ERROR( "gzread() in dot.l flex scanner failed"); \
  50. } else { \
  51. if (estatus) { \
  52. printf ("%s(): zlib error status %d %s in dot.l\n",__func__,(int)estatus,es); \
  53. } \
  54. } \
  55. } } while (0);
  56. static char *tmpp = NULL;
  57. static char *p = NULL;
  58. static char *q = NULL;
  59. static int htmlnest = 0;
  60. static void dp_check_c_comment (char *s);
  61. /* own yyalloc
  62. * void *yyalloc (size_t n) { return(calloc(1,n)); }
  63. * void yyfree (void *ptr) { if (ptr) { free (ptr); } return; }
  64. * void *yyrealloc (void *ptr, size_t n) { return (realloc(ptr,n)); }
  65. */
  66. %}
  67. %x htmlstr
  68. /* empty string "" with length 0 is troublesome in dot: */
  69. /* digraph {} has no name */
  70. /* digraph "" {} is digraph {} has no name but a space is set */
  71. /* digraph " " {} is digraph with string " " */
  72. /* node label="" is two chars in output drawing */
  73. /* allow in fnum1,2,3 also numbers as "23.1" or "23." and ".23" */
  74. /* use own yyalloc
  75. * %option noyyalloc
  76. * %option noyyfree
  77. * %option noyyrealloc
  78. */
  79. %option noinput
  80. %option nounput
  81. %option noyywrap
  82. %option 8bit
  83. %option never-interactive
  84. %option yylineno
  85. %option noread
  86. %option debug
  87. ISTR [^\\\"]|\\.|\\\n
  88. STR \"({ISTR}*)\"
  89. CCS \/\*[^\*]*\*+([^\*\/][^\*]*\*+)*\/
  90. CCE \/\/[^\n]*
  91. CCS1 \*\/
  92. ALPHA [A-Za-z]
  93. DIGIT [0-9]
  94. ID ([_]|{ALPHA})([_]|{ALPHA}|{DIGIT})*
  95. INUM [-+]?{DIGIT}+
  96. FNUM1 [-+]?{DIGIT}*\.{DIGIT}+
  97. FNUM2 [-+]?{DIGIT}*\.
  98. FNUM3 [-+]?\.{DIGIT}+
  99. NUM {INUM}|{FNUM1}|{FNUM2}|{FNUM3}
  100. %%
  101. {CCS} { /* c-comment style */ /* lexer does update yylineno */ dp_check_c_comment(yytext+1); }
  102. {CCS1} { /* end of c comment but no start of c comment shouldnothappen */ printf("%s(): end of c-comment without start of c-comment \"*\\\" at line %d skipped\n",__func__,yylineno); }
  103. {CCE} { /* c++ comment style */ /* lexer does update yylineno */ }
  104. "#".* { /* dot comment line */ }
  105. "\xef\xbb\xbf" { /* this is dot specific */ return (TOKEN_UTF8BOM); }
  106. [\f ]+ { /* skip form feed chars and spaces */ }
  107. [\t] { /* skip tabs */ }
  108. [\n] { /* skip new line */ /* lexer does update yylineno */ }
  109. [\r] { /* skip carriage return */ }
  110. "@" { return (EOF); }
  111. "+" { return (TOKEN_PLUS); }
  112. "," { return (TOKEN_COMMA); }
  113. ":" { return (TOKEN_COLON); }
  114. ";" { return (TOKEN_SC); }
  115. "=" { return (TOKEN_IS); }
  116. "[" { return (TOKEN_BRACKETOPEN); }
  117. "]" { return (TOKEN_BRACKETCLOSE); }
  118. "{" { return (TOKEN_BRACEOPEN); }
  119. "}" { return (TOKEN_BRACECLOSE); }
  120. "--" { yylval.string = (char *) dp_uniqstr ((char *) "--"); return (TOKEN_EOP); }
  121. "->" { yylval.string = (char *) dp_uniqstr ((char *) "->"); return (TOKEN_EOP); }
  122. [Ee][Dd][Gg][Ee] { return(TOKEN_EDGE); }
  123. [Nn][Oo][Dd][Ee] { return(TOKEN_NODE); }
  124. [Dd][Ii][Gg][Rr][Aa][Pp][Hh] { return(TOKEN_DIGRAPH); }
  125. [Gg][Rr][Aa][Pp][Hh] { return(TOKEN_GRAPH); }
  126. [Ss][Uu][Bb][Gg][Rr][Aa][Pp][Hh] { return(TOKEN_SUBGRAPH); }
  127. [Ss][Tt][Rr][Ii][Cc][Tt] { return(TOKEN_STRICT); }
  128. {STR} {
  129. if(yyleng == 2) {
  130. /* string is "" */
  131. yylval.string = (char *)dp_uniqstr((char *)"");
  132. return (TOKEN_QTEXT);
  133. }
  134. /* copy and filter the text, and clear last " */
  135. yytext[yyleng-1] = 0;
  136. tmpp = (char *) dp_calloc (1, yyleng);
  137. p = yytext;
  138. p++; /* skip first " */
  139. q = tmpp;
  140. while (*p)
  141. {
  142. if (*p =='\\') {
  143. if(*(p+1) == 0) {
  144. *q = '\\';
  145. q++;
  146. p++; /* "\" as last char */
  147. } else if (*(p+1) =='\n') {
  148. p = p + 2; /* skip "\\n" sequence */
  149. } else if (*(p+1) == ' ') {
  150. *q = ' '; /* "\ " is " " special in html label */
  151. q++;
  152. p = p + 2;
  153. } else if (*(p+1) == '|') {
  154. *q = '\\'; /* special in record label */
  155. q++;
  156. *q = '|';
  157. q++;
  158. p = p + 2;
  159. } else if (*(p+1) == '{') {
  160. *q = '\\'; /* special in record label */
  161. q++;
  162. *q = '{';
  163. q++;
  164. p = p + 2;
  165. } else if (*(p+1) == '}') {
  166. *q = '\\'; /* special in record label */
  167. q++;
  168. *q = '}';
  169. q++;
  170. p = p + 2;
  171. } else if (*(p+1) == '\\') {
  172. /* \\ is translated into a single \ */
  173. *q = '\\';
  174. q++;
  175. p = p + 2;
  176. } else if (*(p+1) == '"') {
  177. /* \" becomes " */
  178. *q = '"';
  179. q++;
  180. p = p + 2;
  181. } else {
  182. *q = *p; /* copy other esc sequences */
  183. p++;
  184. q++;
  185. }
  186. } else {
  187. *q = *p; /* copy regular chars */
  188. p++;
  189. q++;
  190. }
  191. }
  192. yylval.string = (char *) dp_uniqstr ((char *)tmpp);
  193. tmpp = (char *) dp_free ((void *) tmpp);
  194. p = NULL;
  195. q = NULL;
  196. return (TOKEN_QTEXT);
  197. }
  198. {ID} {
  199. yylval.string = (char *) dp_uniqstr ((char *)yytext);
  200. return (TOKEN_TEXT);
  201. }
  202. {NUM} {
  203. yylval.string =(char *)dp_uniqstr ((char *)yytext);
  204. return (TOKEN_NUM);
  205. }
  206. /* html label, but if it is <> return "" */
  207. [<] {
  208. BEGIN(htmlstr);
  209. htmlnest = 1;
  210. yylval.string = (char *)"<";
  211. }
  212. <htmlstr>[>] {
  213. htmlnest--;
  214. if(htmlnest) {
  215. yylval.string = (char *) dp_ccat((char *) yylval.string,(char *)yytext);
  216. } else {
  217. yylval.string = dp_ccat((char *) yylval.string,(char *) ">");
  218. BEGIN(INITIAL);
  219. if (strlen(yylval.string) == 2) {
  220. yylval.string = (char *) "";
  221. }
  222. return (TOKEN_HTEXT);
  223. }
  224. }
  225. <htmlstr>[<] {
  226. htmlnest++;
  227. yylval.string = (char *) dp_ccat((char *) yylval.string,(char *)yytext);
  228. }
  229. <htmlstr>[\\][<] {
  230. yylval.string = (char *) dp_ccat((char *) yylval.string,(char *)"&lt;");
  231. }
  232. <htmlstr>[\\][>] {
  233. yylval.string = (char *)dp_ccat((char *)yylval.string,(char *)"&gt;");
  234. }
  235. <htmlstr>[\\][\n] {
  236. /* yylineno++ is update by lexer code */ ;
  237. }
  238. <htmlstr>([^><\\]*|[\\].) {
  239. yylval.string = (char *)dp_ccat((char *)yylval.string,(char *)yytext);
  240. }
  241. . { return ((int)yytext[0]); }
  242. %%
  243. /* */
  244. void dp_lex_init (gzFile f, int debugflag)
  245. {
  246. yylineno = 1;
  247. gvzin = f;
  248. yyin = NULL;
  249. /* activate debug in lexer */
  250. yy_flex_debug = debugflag;
  251. /* activate debug in glr parser */
  252. /* yydebug is now set in main() */
  253. /* yydebug = debugflag; */
  254. htmlnest = 0;
  255. /* yyset_debug (debugflag); */
  256. return;
  257. }
  258. /* */
  259. static void dp_lex_clear (void)
  260. {
  261. if (tmpp) {
  262. tmpp = (char *) dp_free ((void *) tmpp);
  263. }
  264. p = NULL;
  265. q = NULL;
  266. htmlnest = 0;
  267. return;
  268. }
  269. void dp_lex_deinit (void)
  270. {
  271. yy_flex_debug = 0;
  272. dp_lex_clear ();
  273. yylex_destroy ();
  274. gvzin = (gzFile)0;
  275. yyin = NULL;
  276. return;
  277. }
  278. void dp_yydebug (int debugflag)
  279. {
  280. /* activate debug in lexer */
  281. yy_flex_debug = debugflag;
  282. /* activate debug in glr parser */
  283. yydebug = debugflag;
  284. return;
  285. }
  286. /* check for comment inside comment */
  287. static void dp_check_c_comment (char *str)
  288. {
  289. char *ptr0 = NULL;
  290. char *ptr = NULL;
  291. if(str == NULL) {
  292. return;
  293. }
  294. if(strlen(str) < 1) {
  295. return;
  296. }
  297. ptr=str;
  298. while (*ptr) {
  299. ptr0 = strchr (ptr, '/');
  300. if (ptr0) {
  301. if ((*(ptr0+1)) == '*') {
  302. printf ("%s(): start of c-comment \"/*\" inside c-comment at line %d\n",__func__,yylineno);
  303. ptr0++;
  304. } else if ((*(ptr0+1)) == '/') {
  305. printf ("%s(): start of c++-comment \"//\" inside c-comment at line %d\n",__func__,yylineno);
  306. ptr0++;
  307. } else {
  308. /* nop */
  309. }
  310. ptr=ptr0;
  311. }
  312. ptr++;
  313. }
  314. return;
  315. }
  316. /* end */