dphl.l 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. /*
  2. * Copyright 2021
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. * SPDX-License-Identifier: GPL-3.0+
  18. * License-Filename: LICENSE
  19. */
  20. /* dphl.* is about Dot Parsing Html Labels */
  21. /*
  22. * <br /> is oke but < br/> not or <br/ > is not oke
  23. * <br/> is lexed as token "<br" and token "/>"
  24. */
  25. /* gcc-11 analyzer has several problems with the flex-2.4.6 generated lexers, even with own customized skeleton */
  26. /* at malloc error lexer will leak memory and do exit() */
  27. %{
  28. #include "config.h"
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <strings.h>
  32. #include <zlib.h>
  33. #include "splay-tree.h"
  34. #include "lex.yy.h"
  35. #include "dot.tab.h"
  36. #include "dp.h"
  37. #include "dpus.h"
  38. #include "dphlparser.h"
  39. #include "dphl.h"
  40. #include "dpmem.h"
  41. /* use GNU GCC compiler builtin strlen */
  42. #undef YY_NEED_STRLEN
  43. /* temp buffer */
  44. static char *tmp = NULL;
  45. static char *pt = NULL;
  46. static char *q = NULL;
  47. /* ws */
  48. static int skipws = 1;
  49. /* own yyalloc
  50. * void *yyalloc (size_t n) { return(calloc(1,n)); }
  51. * void yyfree (void *ptr) { if(ptr) { free (ptr); } return; }
  52. * void *yyrealloc (void *ptr, size_t n) { return (realloc (ptr,n)); }
  53. */
  54. %}
  55. /* use own yyalloc
  56. * %option noyyalloc
  57. * %option noyyfree
  58. * %option noyyrealloc
  59. */
  60. /* no yywrap() at end-of-file */
  61. %option noyywrap
  62. /* do not push back chars in stream function() */
  63. %option nounput
  64. /* no input() function */
  65. %option noinput
  66. /* input does not come from a tty. */
  67. %option never-interactive
  68. /* no yywrap() at end of file */
  69. %option noyywrap
  70. /* 8-bits scanner */
  71. %option 8bit
  72. /* use nameprefix for routines */
  73. %option prefix="hl"
  74. /* add debug output */
  75. %option debug
  76. /* use clib to read data */
  77. %option noread
  78. /* line no */
  79. %option yylineno
  80. /* "string" as in var="value" */
  81. ISTR [^\\\"]|\\.|\\\n
  82. STR \"({ISTR}*)\"
  83. /* 'string' as in var='value' */
  84. ISTRA [^\\\']|\\.|\\\n
  85. STRA \'({ISTRA}*)\'
  86. /* chars in a <var> */
  87. chars [a-zA-Z]*
  88. /* chars in a tag with a '-' for point-size */
  89. charsattr [a-zA-Z-]*
  90. nl (\r\n|\r|\n)
  91. open {nl}?"<"
  92. close ">"{nl}?
  93. comment {open}"!--"([^-]|"-"[^-])*"--"{close}
  94. %%
  95. {comment} { /* skip comment and it does not allow <> chars in it or nested comments */ }
  96. [\f ]+ { /* skip form feed chars and spaces */ }
  97. [\t] { /* skip tabs */ }
  98. [\n] { /* skip new line */ /* lexer does update yylineno */ }
  99. [\r] { /* skip carriage return */ }
  100. "<"{chars} {
  101. hlylval.string = dp_uniqstr (yytext);
  102. /* start of tag */
  103. if (strcasecmp (yytext, "<b") == 0) {
  104. skipws = 0;
  105. return (HL_B);
  106. } else if (strcasecmp (yytext, "<br") == 0) {
  107. skipws = 0;
  108. dphl_rbr ();
  109. return (HL_BR);
  110. } else if (strcasecmp (yytext, "<font") == 0) {
  111. skipws = 0;
  112. return (HL_FONT);
  113. } else if (strcasecmp (yytext, "<hr") == 0) {
  114. dphl_rhr ();
  115. return (HL_HR);
  116. } else if (strcasecmp (yytext, "<html") == 0) {
  117. return (HL_HTML);
  118. } else if (strcasecmp (yytext, "<i") == 0) {
  119. skipws = 0;
  120. return (HL_I);
  121. } else if (strcasecmp (yytext, "<img") == 0) {
  122. return (HL_IMG);
  123. } else if (strcasecmp (yytext, "<o") == 0) {
  124. skipws = 0;
  125. return (HL_O);
  126. } else if (strcasecmp (yytext, "<s") == 0) {
  127. skipws = 0;
  128. return (HL_S);
  129. } else if (strcasecmp (yytext, "<sub") == 0) {
  130. skipws = 0;
  131. return (HL_SUB);
  132. } else if (strcasecmp (yytext, "<sup") == 0) {
  133. skipws = 0;
  134. return (HL_SUP);
  135. } else if (strcasecmp (yytext, "<table") == 0) {
  136. return (HL_TABLE);
  137. } else if (strcasecmp (yytext, "<th") == 0) {
  138. /* undocumented that "th" is same as "td" */
  139. return (HL_TD);
  140. } else if (strcasecmp (yytext, "<td") == 0) {
  141. return (HL_TD);
  142. } else if (strcasecmp (yytext, "<tr") == 0) {
  143. return (HL_TR);
  144. } else if (strcasecmp (yytext, "<u") == 0) {
  145. skipws = 0;
  146. return (HL_U);
  147. } else if (strcasecmp (yytext, "<vr") == 0) {
  148. dphl_rvr ();
  149. return (HL_VR);
  150. } else {
  151. /* this is a unknown tag at start */
  152. memset(dp_errmsg, 0, 256);
  153. snprintf(dp_errmsg, (256 - 1), "%s(): unknown start tag `%s' in html string at line %d\n", __func__, yytext, hllineno);
  154. return (EOF);
  155. }
  156. }
  157. "</"{chars} {
  158. hlylval.string = dp_uniqstr (yytext);
  159. /* end tag */
  160. if (strcasecmp (yytext, "</b") == 0) {
  161. return (HL_C_B);
  162. } else if (strcasecmp (yytext, "</br") == 0) {
  163. /* undocumented that <br></br> is same as <br/> */
  164. return (HL_C_BR);
  165. } else if (strcasecmp (yytext, "</font") == 0) {
  166. return (HL_C_FONT);
  167. } else if (strcasecmp (yytext, "</html") == 0) {
  168. return (HL_C_HTML);
  169. } else if (strcasecmp (yytext, "</hr") == 0) {
  170. /* it seems <hr></hr> could be same as <hr/> */
  171. return (HL_C_HR);
  172. } else if (strcasecmp (yytext, "</i") == 0) {
  173. return (HL_C_I);
  174. } else if (strcasecmp (yytext, "</img") == 0) {
  175. /* it seems <img></img> couble be same ad <img/> */
  176. return (HL_C_IMG);
  177. } else if (strcasecmp (yytext, "</o") == 0) {
  178. return (HL_C_O);
  179. } else if (strcasecmp (yytext, "</s") == 0) {
  180. return (HL_C_S);
  181. } else if (strcasecmp (yytext, "</sub") == 0) {
  182. return (HL_C_SUB);
  183. } else if (strcasecmp (yytext, "</sup") == 0) {
  184. return (HL_C_SUP);
  185. } else if (strcasecmp (yytext, "</table") == 0) {
  186. return (HL_C_TABLE);
  187. } else if (strcasecmp (yytext, "</th") == 0) {
  188. /* undocumented that "th" is same as "td" */
  189. return (HL_C_TD);
  190. } else if (strcasecmp (yytext, "</td") == 0) {
  191. return (HL_C_TD);
  192. } else if (strcasecmp (yytext, "</tr") == 0) {
  193. return (HL_C_TR);
  194. } else if (strcasecmp (yytext, "</u") == 0) {
  195. return (HL_C_U);
  196. } else if (strcasecmp (yytext, "</vr") == 0) {
  197. /* it seems <vr></vr> could be same as <vr/> */
  198. return (HL_C_VR);
  199. } else {
  200. /* this is a unknown end tag */
  201. memset(dp_errmsg, 0, 256);
  202. snprintf(dp_errmsg, (256 - 1), "%s(): unknown end tag `%s' in html string at line %d\n", __func__, yytext, hllineno);
  203. return (EOF);
  204. }
  205. }
  206. "/>" {
  207. /* end tag for br and hr, vr and img */
  208. hlylval.string = dp_uniqstr (yytext);
  209. return (HL_SC);
  210. }
  211. ">" {
  212. /* end tag for others */
  213. hlylval.string = dp_uniqstr (yytext);
  214. return (HL_C);
  215. }
  216. "=" {
  217. /* as in var=value */
  218. hlylval.string = dp_uniqstr (yytext);
  219. return (HL_IS);
  220. }
  221. {STR} {
  222. /* "string" as in var="value" */
  223. if (strlen (yytext) == 2) {
  224. hlylval.string = dp_uniqstr ((char * )"");
  225. return (HL_QSTR);
  226. }
  227. /* create copy buffer */
  228. tmp = (char *) dp_calloc (1, (yyleng+1));
  229. /* wipe last double quote */
  230. yytext[yyleng-1] = 0;
  231. /* copy past first char */
  232. pt = yytext;
  233. pt++;
  234. q = tmp;
  235. while (*pt)
  236. {
  237. if (*pt == '\\') {
  238. if (*(pt+1) == 0) {
  239. *q = '\\';
  240. q++;
  241. pt++;
  242. } else if (*(pt+1) == '\n') {
  243. /* skip \\n */
  244. pt = pt + 2;
  245. } else if (*(pt+1) == '"') {
  246. /* \" becomes " */
  247. *q = '"';
  248. q++;
  249. pt = pt + 2;
  250. } else if (*(pt+1) == '\\') {
  251. /* \\ becomes \ */
  252. *q = '\\';
  253. q++;
  254. pt = pt + 2;
  255. } else {
  256. *q = '\\';
  257. q++;
  258. *q = *(pt+1);
  259. q++;
  260. pt = pt + 2;
  261. }
  262. } else {
  263. /* copy regular chars */
  264. *q = *pt;
  265. pt++;
  266. q++;
  267. }
  268. }
  269. hlylval.string = dp_uniqstr (tmp);
  270. tmp = (char *) dp_free ((void *) tmp);
  271. pt = NULL;
  272. q = NULL;
  273. return (HL_QSTR);
  274. }
  275. {STRA} {
  276. /* 'string' as in var='value' */
  277. if (strlen (yytext) == 2) {
  278. hlylval.string = dp_uniqstr ((char * )"");
  279. return (HL_QSTR);
  280. }
  281. /* create copy buffer */
  282. tmp = (char *) dp_calloc (1, (yyleng+1));
  283. /* wipe last quote */
  284. yytext[yyleng-1] = 0;
  285. /* copy past first char */
  286. pt = yytext;
  287. pt++;
  288. q = tmp;
  289. while (*pt)
  290. {
  291. if (*pt == '\\') {
  292. if (*(pt+1) == 0) {
  293. *q = '\\';
  294. q++;
  295. pt++;
  296. } else if (*(pt+1) == '\n') {
  297. /* skip \\n */
  298. pt = pt + 2;
  299. } else if (*(pt+1) == '\'') {
  300. /* \' becomes ' */
  301. *q = '\'';
  302. q++;
  303. pt = pt + 2;
  304. } else if (*(pt+1) == '\\') {
  305. /* \\ becomes \ */
  306. *q = '\\';
  307. q++;
  308. pt = pt + 2;
  309. } else {
  310. *q = '\\';
  311. q++;
  312. *q = *(pt+1);
  313. q++;
  314. pt = pt + 2;
  315. }
  316. } else {
  317. /* copy regular chars */
  318. *q = *pt;
  319. pt++;
  320. q++;
  321. }
  322. }
  323. hlylval.string = dp_uniqstr (tmp);
  324. tmp = (char *) dp_free ((void *) tmp);
  325. pt = NULL;
  326. q = NULL;
  327. return (HL_QSTR);
  328. }
  329. {charsattr} {
  330. /* var */
  331. hlylval.string = dp_uniqstr (yytext);
  332. switch (*yytext)
  333. {
  334. case 'a':
  335. case 'A':
  336. if (strcasecmp (yytext, "align") == 0) {
  337. return (HL_ALIGN);
  338. }
  339. break;
  340. case 'b':
  341. case 'B':
  342. if (strcasecmp (yytext, "balign") == 0) {
  343. return (HL_BALIGN);
  344. } else if (strcasecmp (yytext, "bgcolor") == 0) {
  345. return (HL_BGCOLOR);
  346. } else if (strcasecmp (yytext, "border") == 0) {
  347. return (HL_BORDER);
  348. } else {
  349. /* unknown */
  350. }
  351. break;
  352. case 'c':
  353. case 'C':
  354. if (strcasecmp (yytext, "cellborder") == 0) {
  355. return (HL_CELLBORDER);
  356. } else if (strcasecmp (yytext, "cellpadding") == 0) {
  357. return (HL_CELLPADDING);
  358. } else if (strcasecmp (yytext, "cellspacing") == 0) {
  359. return (HL_CELLSPACING);
  360. } else if (strcasecmp (yytext, "color") == 0) {
  361. return (HL_COLOR);
  362. } else if (strcasecmp (yytext, "columns") == 0) {
  363. return (HL_COLUMNS);
  364. } else if (strcasecmp (yytext, "colspan") == 0) {
  365. return (HL_COLSPAN);
  366. } else {
  367. /* unknown */
  368. }
  369. break;
  370. case 'f':
  371. case 'F':
  372. if (strcasecmp (yytext, "face") == 0) {
  373. return (HL_FACE);
  374. } else if (strcasecmp (yytext, "fixedsize") == 0) {
  375. return (HL_FIXEDSIZE);
  376. } else {
  377. /* unknown */
  378. }
  379. break;
  380. case 'g':
  381. case 'G':
  382. if (strcasecmp (yytext, "gradientangle") == 0) {
  383. return (HL_GRADIENTANGLE);
  384. }
  385. break;
  386. case 'h':
  387. case 'H':
  388. if (strcasecmp (yytext, "height") == 0) {
  389. return (HL_HEIGHT);
  390. } else if (strcasecmp (yytext, "href") == 0) {
  391. return (HL_HREF);
  392. } else {
  393. /* unknown */
  394. }
  395. break;
  396. case 'i':
  397. case 'I':
  398. if (strcasecmp (yytext, "id") == 0) {
  399. return (HL_ID);
  400. }
  401. break;
  402. case 'p':
  403. case 'P':
  404. if (strcasecmp (yytext, "point-size") == 0) {
  405. return (HL_POINTSIZE);
  406. } else if (strcasecmp (yytext, "pointsize") == 0) {
  407. return (HL_POINTSIZE);
  408. } else if (strcasecmp (yytext, "port") == 0) {
  409. return (HL_PORT);
  410. } else {
  411. /* unknown */
  412. }
  413. break;
  414. case 'r':
  415. case 'R':
  416. if (strcasecmp (yytext, "rows") == 0) {
  417. return (HL_ROWS);
  418. } else if (strcasecmp (yytext, "rowspan") == 0) {
  419. return (HL_ROWSPAN);
  420. } else {
  421. /* unknown */
  422. }
  423. break;
  424. case 's':
  425. case 'S':
  426. if (strcasecmp (yytext, "scale") == 0) {
  427. return (HL_SCALE);
  428. } else if (strcasecmp (yytext, "sides") == 0) {
  429. return (HL_SIDES);
  430. } else if (strcasecmp (yytext, "src") == 0) {
  431. return (HL_SRC);
  432. } else if (strcasecmp (yytext, "style") == 0) {
  433. return (HL_STYLE);
  434. } else {
  435. /* unknown */
  436. }
  437. break;
  438. case 't':
  439. case 'T':
  440. if (strcasecmp (yytext, "target") == 0) {
  441. return (HL_TARGET);
  442. } else if (strcasecmp (yytext, "title") == 0) {
  443. return (HL_TITLE);
  444. } else if (strcasecmp (yytext, "tooltip") == 0) {
  445. return (HL_TOOLTIP);
  446. } else {
  447. /* unknown */
  448. }
  449. break;
  450. case 'v':
  451. case 'V':
  452. if (strcasecmp (yytext, "valign") == 0) {
  453. return (HL_VALIGN);
  454. }
  455. break;
  456. case 'w':
  457. case 'W':
  458. if (strcasecmp (yytext, "width") == 0) {
  459. return (HL_WIDTH);
  460. }
  461. break;
  462. default:
  463. /* unknown */
  464. break;
  465. }
  466. /* unknown */
  467. return (HL_STR);
  468. }
  469. <<EOF>> {
  470. /* end of buffer */
  471. return (EOF);
  472. }
  473. . {
  474. /* something unknown causes parse error */
  475. return ((int)yytext[0]);
  476. }
  477. %%
  478. /* lex buffer */
  479. static YY_BUFFER_STATE buffer = NULL;
  480. /* clear buffer */
  481. static int clearit = 0;
  482. /* lex buffer pending */
  483. static int pending = 0;
  484. /* lex buffer length */
  485. static int buflen = 0;
  486. /* data buffer */
  487. static char *dbuf = (char *)0;
  488. /* tag buffer */
  489. static char *tbuf = (char *)0;
  490. /* input buffer */
  491. static char *ibuf = (char *)0;
  492. /* pointer in ibuf */
  493. static char *bufptr = (char *)0;
  494. /* init
  495. * the string has at least 1 char as in "< >"
  496. */
  497. void html_lex_init (int dbg, char *str, int line)
  498. {
  499. if (dbg || 0) {
  500. hl_flex_debug = 1;
  501. hlydebug = 1;
  502. } else {
  503. hl_flex_debug = 0;
  504. hlydebug = 0;
  505. }
  506. /* set lineno to where the node is defind */
  507. hllineno = line;
  508. /* extra space for \0\0 */
  509. buflen = strlen (str) + 2;
  510. ibuf = (char *) dp_calloc (1, buflen);
  511. if (ibuf == NULL) { return; }
  512. /* copy, skip first < */
  513. strcpy (ibuf, (str+1));
  514. /* wipe last > */
  515. ibuf[strlen(ibuf)-1] = 0;
  516. /* set where to start lexing */
  517. bufptr = ibuf;
  518. /* skip leading ws todo XXX is this always correct? */
  519. while (*bufptr)
  520. {
  521. if (*bufptr == ' ') {
  522. } else if (*bufptr == '\n') {
  523. hllineno++;
  524. } else if (*bufptr == '\r') {
  525. } else if (*bufptr == '\f') {
  526. } else if (*bufptr == '\t') {
  527. } else {
  528. break;
  529. }
  530. bufptr++;
  531. }
  532. skipws = 1;
  533. /* data buffer */
  534. dbuf = (char *) dp_calloc (1, buflen);
  535. if (dbuf == NULL) { return; }
  536. /* tag buffer */
  537. tbuf = (char *) dp_calloc (1, buflen);
  538. if (tbuf == NULL) { return; }
  539. /* lex buffer pending */
  540. pending = 0;
  541. /* clear buffer */
  542. clearit = 0;
  543. return;
  544. }
  545. /* de-init */
  546. void html_lex_deinit (void)
  547. {
  548. hl_flex_debug = 0;
  549. /* clear buffer */
  550. if (clearit || 1) {
  551. /* this may fix valgrind issue */
  552. yylex_destroy();
  553. /* yy_delete_buffer (buffer); */
  554. buffer = NULL;
  555. clearit = 0;
  556. }
  557. /* lex buffer pending */
  558. pending = 0;
  559. /* lex buffer length */
  560. buflen = 0;
  561. /* data buffer */
  562. if (dbuf) {
  563. dbuf = (char *) dp_free ((void *) dbuf);
  564. }
  565. /* tag buffer */
  566. if (tbuf) {
  567. tbuf = (char *) dp_free ((void *) tbuf);
  568. }
  569. /* input buffer */
  570. if( ibuf) {
  571. ibuf = (char *) dp_free ((void *) ibuf);
  572. }
  573. /* pointer in ibuf */
  574. bufptr = (char *)0;
  575. return;
  576. }
  577. /* lex one token */
  578. int html_lex (void)
  579. {
  580. int token = 0;
  581. int i = 0;
  582. int j = 0;
  583. char *p = NULL;
  584. for (i=0; i < 100 ; i++)
  585. {
  586. /* lex buffer pending */
  587. if (pending == 1) {
  588. /* lex from buffer */
  589. token = hllex ();
  590. /* at end-of-string in tbuf */
  591. if (token == EOF) {
  592. /* this may fix valgrind issue */
  593. yylex_destroy();
  594. /* yy_delete_buffer (buffer); */
  595. buffer = NULL;
  596. clearit = 0;
  597. pending = 0;
  598. continue;
  599. }
  600. /* at regular token */
  601. pending = 1;
  602. /* hlylval.string is set in the actions */
  603. return (token);
  604. } else {
  605. /* refill buffer */
  606. if (*bufptr == 0) {
  607. /* no data anymore */
  608. return (EOF);
  609. }
  610. if (skipws) {
  611. /* skip leading ws */
  612. while (*bufptr)
  613. {
  614. if (*bufptr == ' ') {
  615. } else if (*bufptr == '\n') {
  616. hllineno++;
  617. } else if (*bufptr == '\r') {
  618. } else if (*bufptr == '\f') {
  619. } else if (*bufptr == '\t') {
  620. } else {
  621. break;
  622. }
  623. bufptr++;
  624. }
  625. } else {
  626. skipws = 1;
  627. }
  628. if (*bufptr == 0) {
  629. /* no data anymore */
  630. return (EOF);
  631. }
  632. if (*bufptr == '<') {
  633. /* fill tag buffer */
  634. for (j=0; j < buflen ; j++)
  635. {
  636. tbuf[j] = 0;
  637. }
  638. j = 0;
  639. while (*bufptr)
  640. {
  641. if (*bufptr == '>') {
  642. tbuf[j] = *bufptr;
  643. j++;
  644. bufptr++;
  645. break;
  646. }
  647. tbuf[j] = *bufptr;
  648. j++;
  649. bufptr++;
  650. }
  651. if (j == 0) {
  652. return (EOF);
  653. }
  654. /* start lexing from buffer */
  655. buffer = yy_scan_string (tbuf);
  656. clearit = 1;
  657. /* lex tag buffer */
  658. pending = 1;
  659. continue;
  660. } else {
  661. /* fill data buffer */
  662. for (j=0; j < buflen ; j++)
  663. {
  664. dbuf[j] = 0;
  665. }
  666. j = 0;
  667. p = dbuf;
  668. while (*bufptr)
  669. {
  670. if (*bufptr == '<') {
  671. break;
  672. }
  673. /* update yylineno */
  674. if (*bufptr == '\n') {
  675. hllineno++;
  676. }
  677. p[j] = *bufptr;
  678. j++;
  679. bufptr++;
  680. }
  681. if (j == 0) {
  682. return (EOF);
  683. }
  684. pending = 0;
  685. /* return copy of the data area */
  686. hlylval.string = dp_uniqstr (dbuf);
  687. if (hlydebug) {
  688. printf ("token HL_DATA \"%s\"\n",hlylval.string);
  689. }
  690. return (HL_DATA);
  691. }
  692. }
  693. }
  694. /* shouldnothappen */
  695. printf ("%s(): huh?\n",__func__);
  696. return (EOF);
  697. }
  698. /* end */