emit.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. /*
  2. * Copyright 2021
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. * SPDX-License-Identifier: GPL-3.0+
  18. * License-Filename: LICENSE
  19. */
  20. #include <assert.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include "arg.h"
  25. #include "error.h"
  26. #include "syntax.h"
  27. #include "template.h"
  28. static char *g_name; /* grammar name */
  29. static struct s_node *g_node; /* grammar root */
  30. static struct s_node *cur_rule;
  31. struct assoc {
  32. char *name;
  33. struct s_node *value;
  34. };
  35. static struct assoc *a_stack;
  36. static int a_ptr, a_alloc;
  37. #if 0
  38. static void assoc_dump(void) {
  39. int i;
  40. for (i = 0; i < a_ptr; ++i)
  41. fprintf(stderr, "var %s @ pos %d\n", a_stack[i].name, i);
  42. }
  43. #endif
  44. /* Low level routines to output things. */
  45. int nr = 1;
  46. static int indent = 2;
  47. static int need_indent = 0;
  48. static void c_str(char *s) {
  49. assert(!strchr(s, '\n'));
  50. if (*s == '#') need_indent = 0;
  51. if (need_indent) {
  52. int i;
  53. for (i = 0; i < indent; ++i) { fputs(" ", stdout); }
  54. need_indent = 0;
  55. }
  56. fputs(s, stdout);
  57. }
  58. static void c_strln(char *s) {
  59. c_str(s);
  60. putchar('\n');
  61. ++nr;
  62. need_indent = 1;
  63. }
  64. static void c_raw(char *s) {
  65. while (*s) {
  66. if (*s == '\n') ++nr;
  67. putchar(*s);
  68. ++s;
  69. }
  70. }
  71. static void c_semi(void) { c_strln(";"); }
  72. /* static void c_char(char i) { printf("%c", i); } */
  73. static void c_int(int i) { printf("%d", i); }
  74. static void c_long(long l) { printf("%ld", l); }
  75. static void c_open(void) { ++indent; c_strln(" {"); }
  76. static void c_close(void) { --indent; c_strln("}"); }
  77. static void c_closet(void) { --indent; c_str("} "); }
  78. enum opt_assign { no_assign, assign };
  79. static void c_code(struct s_node *n, enum opt_assign a) {
  80. /* XXX except for a couple of desugared cases, there is always a coords as
  81. * a first child of expr, so it would be better to fix sugar.c and assert
  82. * here */
  83. if (n->first && n->first->type == coords) {
  84. int i;
  85. c_strln("");
  86. c_str("#line "); c_int(n->first->pair[0]);
  87. c_str(" \""); c_str(arg_input()); c_strln("\"");
  88. /* 1-based counting; subtract 1 for upcoming '(' and maybe '=' */
  89. for (i = 1; i < n->first->pair[1] - 1 - assign; ++i) putchar(' ');
  90. }
  91. if (a) putchar('=');
  92. putchar('('); c_raw(n->text); putchar(')');
  93. if (a) putchar(';');
  94. c_raw("\n");
  95. /* now a #line to take us back to the C output; line number of next line */
  96. c_str("#line "); c_long(nr + 1);
  97. c_str(" \""); c_str(arg_output()); c_strln("\"");
  98. }
  99. static void c_defines(void) {
  100. c_str("#define PACC_NAME "); c_strln(g_node->text);
  101. if (arg_feed()) {
  102. c_str("#define PACC_FEED_NAME "); c_str(g_node->text); c_strln("_feed");
  103. }
  104. c_strln("");
  105. c_strln("#define PACC_ASSERT 1");
  106. c_strln("#define PACC_CAN_TRACE 1");
  107. c_strln("");
  108. }
  109. static void associate(char *n, struct s_node *s) {
  110. if (a_ptr == a_alloc) {
  111. int l = 2 * a_alloc + 1;
  112. struct assoc *a = realloc(a_stack, l * sizeof *a_stack);
  113. if (!a) nomem();
  114. a_stack = a;
  115. a_alloc = l;
  116. }
  117. a_stack[a_ptr].name = n;
  118. a_stack[a_ptr++].value = s;
  119. assert(a_ptr > 0);
  120. }
  121. int associating = 0;
  122. static int *f_stack;
  123. static int f_ptr, f_alloc;
  124. static void frame_start() {
  125. if (f_ptr == f_alloc) {
  126. int l = 2 * f_alloc + 1;
  127. int *f = realloc(f_stack, l * sizeof *f_stack);
  128. if (!f) nomem();
  129. f_stack = f;
  130. f_alloc = l;
  131. }
  132. f_stack[f_ptr++] = a_ptr;
  133. }
  134. static void frame_end() {
  135. assert(f_ptr > 0);
  136. a_ptr = f_stack[--f_ptr];
  137. }
  138. static char **t_list;
  139. static int t_max, t_alloc;
  140. static int type_list(char *t) {
  141. int i;
  142. if (strcmp(t, "void") == 0) return 0;
  143. for (i = 0; i < t_max; ++i) {
  144. if (strcmp(t, t_list[i]) == 0) return i;
  145. }
  146. if (t_max == t_alloc) {
  147. int l = 2 * t_alloc + 1;
  148. char **t = realloc(t_list, l * sizeof *t_list);
  149. if (!t) nomem();
  150. t_list = t;
  151. t_alloc = l;
  152. }
  153. t_list[t_max] = t;
  154. return t_max++;
  155. }
  156. static int rule_u(struct s_node *r) {
  157. return type_list(r->first->text);
  158. }
  159. static void grammar_pre(struct s_node *n) {
  160. int i, r = 0;
  161. struct s_node *p;
  162. static int cooked = 0;
  163. g_node = n;
  164. if (arg_defines()) {
  165. c_str("#include \"");c_str(arg_defines());c_strln("\"");
  166. if (arg_feed() && cooked) {
  167. c_strln("#undef PACC_NAME");
  168. c_strln("#define PACC_NAME PACC_FEED_NAME");
  169. }
  170. } else
  171. c_defines();
  172. ++cooked;
  173. pre_decl();
  174. /* We slightly simplify both building & walking the tree and insist
  175. * that every grammar starts with a preamble, which may be null.
  176. * It's a bit odd to represent no preamble with an empty preamble
  177. * node. */
  178. p = n->first;
  179. assert(p->type == preamble);
  180. if (!arg_defines() && p->text) c_raw(p->text);
  181. p = p->next;
  182. for ( ; p; p = p->next) {
  183. assert(p->type == rule);
  184. ++r;
  185. }
  186. c_str("static const int n_rules = "); c_int(r); c_semi();
  187. c_str("static const int start_rule_id = "); c_long(n->first->next->id);
  188. c_semi();
  189. g_name = n->text;
  190. /* type of start rule is always u0 */
  191. type_list(n->first->next->first->text);
  192. for (p = n->first; p; p = p->next)
  193. if (p->type == rule) type_list(p->first->text);
  194. c_str("union PACC_SYM(vals)"); c_open();
  195. for (i = 0; i < t_max; ++i) {
  196. c_str(t_list[i]); c_str(" u"); c_int(i); c_semi();
  197. }
  198. c_close(); c_semi();
  199. /* XXX just for debugging */
  200. c_str("#define TYPE_PRINTF ");
  201. if (strcmp(n->first->next->first->text, "int") == 0) c_str("\"%d\"");
  202. else if (strcmp(n->first->next->first->text, "char *") == 0) c_str("\"%s\"");
  203. else c_str("\"%p\"");
  204. c_strln("");
  205. c_str("#define PACC_TYPE "); c_strln(n->first->next->first->text);
  206. pre_engine();
  207. c_str("_st=");
  208. c_long(n->first->type == preamble ? n->first->next->id : n->first->id);
  209. c_semi();
  210. c_strln("goto top;");
  211. c_strln("contin:");
  212. c_strln("_st=_cont;");
  213. c_strln("PACC_TRACE fprintf(stderr, \"continuing in state %d\\n\", _cont);");
  214. c_strln("top:");
  215. c_strln("PACC_TRACE fprintf(stderr, \"switch to state %d\\n\", _st);");
  216. c_str("switch(_st)"); c_open();
  217. }
  218. static void grammar_post(__attribute__((unused)) struct s_node *n) {
  219. c_strln("case -1: break;");
  220. c_close();
  221. post_engine();
  222. }
  223. static void debug_pre(char *type, struct s_node *n) {
  224. c_str("PACC_TRACE fprintf(stderr, \""); c_str(type); c_str(" ");
  225. c_long(n->id); c_strln(" @ col %zu?\\n\", _x);");
  226. }
  227. static void debug_post(char *type, struct s_node *n) {
  228. //printf("PACC_TRACE fprintf(stderr, \"%s %ld @ col %%ld => %%s\\n\", _x, status != no_parse ? \"yes\" : \"no\");\n",
  229. //type, n->id);
  230. c_str("PACC_TRACE fprintf(stderr, \""); c_str(type); c_str(" "); c_long(n->id);
  231. c_strln(" %zu => %s\\n\", _x, _status != no_parse ? \"yes\" : \"no\");");
  232. }
  233. /* We recognise a properly-escaped C string in the grammar, and copy
  234. * that verbatim into the generated parser. That means we have to be a
  235. * bit careful in calculating the string's length. */
  236. /* XXX no, not a *bit* careful but *very* careful, as we need to support
  237. * the full C:1999 syntax, including octal, hex, and universal escapes.
  238. * Furthermore, we will need test cases for it all.
  239. */
  240. static void literal(struct s_node *n) {
  241. char *p;
  242. int l; /* length of named string */
  243. l = 0;
  244. for (p = n->text; *p; ++p) {
  245. if (*p == '\\') {
  246. ++p;
  247. assert(*p);
  248. switch (*p) {
  249. case '\'': case '"': case '?': case '\\':
  250. case 'a': case 'b': case 'f': case 'n':
  251. case 'r': case 't': case 'v':
  252. break;
  253. default:
  254. assert(0);
  255. }
  256. }
  257. ++l;
  258. }
  259. debug_pre("lit", n);
  260. c_str("PACC_TRACE fprintf(stderr, \"lit "); c_long(n->id);
  261. c_strln(" @ col %zu => \", _x);");
  262. c_str("if (_x+"); c_int(l); c_str(" <= _pacc->input_length && ");
  263. c_str("memcmp(\""); c_str(n->text); c_str("\", _pacc->string + _x, ");
  264. c_int(l); c_str(") == 0)"); c_open();
  265. c_strln("_status = parsed;");
  266. c_str("_x += "); c_int(l); c_semi();
  267. c_close(); c_str("else"); c_open();
  268. c_str("_pacc_error(_pacc, \".\\\""); c_str(n->text);
  269. c_strln("\\\"\", _x);");
  270. c_strln("_status = no_parse;");
  271. c_close();
  272. debug_post("lit", n);
  273. }
  274. /* assumes utf-8 encoding */
  275. static void any_emit(__attribute__((unused)) struct s_node *n) {
  276. debug_pre("any", n);
  277. c_str("if (_x < _pacc->input_length)"); c_open();
  278. c_strln("_pacc_any_i = _pacc_utf8_char(_pacc->string+_x, _pacc->input_length - _x, &_pacc_utf_cp);");
  279. c_strln("if (!_pacc_any_i) pacc_panic(\"invalid UTF-8 input\");");
  280. c_strln("_x += _pacc_any_i;");
  281. c_closet(); c_strln("else _status = no_parse;");
  282. debug_post("any", n);
  283. }
  284. static void rule_pre(struct s_node *n) {
  285. cur_rule = n;
  286. c_str("case "); c_long(n->id); c_str(": /* "); c_str(n->text);
  287. c_strln(" */");
  288. c_str("PACC_TRACE fprintf(stderr, \"rule "); c_long(n->id);
  289. c_str(" ("); c_str(n->text); c_str(") col %zu\\n\", _x)"); c_semi();
  290. c_strln("_x_rule = _x;");
  291. c_str("cur = _pacc_result(_pacc, _x, "); c_int(cur_rule->id); c_strln(");");
  292. c_str("if ((cur->rule & 3) == uncomputed)"); c_open(); /* memoization ON */
  293. //c_str("if (1 || (cur->rule & 3) == uncomputed)"); c_open(); /* m9n OFF */
  294. }
  295. static void rule_post(struct s_node *n) {
  296. c_strln("cur->rule = (cur->rule & ~3) | _status;");
  297. c_strln("cur->remainder = _x;");
  298. /* Rule made no progress: over-write error */
  299. /* XXX: See test/pacc/err0.c. This is wrong. What is right? */
  300. c_str("if (_pacc->err_col == _x_rule)"); c_open();
  301. c_strln("_pacc->err_valid = 0;");
  302. c_str("_pacc_error(_pacc, \"."); c_str(n->text); c_strln("\", _x_rule);");
  303. c_close();
  304. c_close(); /* this closes the open in rule_pre() */
  305. c_strln("goto contin;");
  306. }
  307. static void savecol(void) {
  308. c_strln("PACC_TRACE fprintf(stderr, \"save column registers\\n\");");
  309. c_strln("_pacc_Push(_x); _pacc_Push(cur->ev_valid);");
  310. }
  311. static void restcol(void) {
  312. c_strln("PACC_TRACE fprintf(stderr, \"restore column registers\\n\");");
  313. c_strln("_pacc_Pop(cur->ev_valid); _pacc_Pop(_x);");
  314. }
  315. static void accept_col(void) {
  316. c_strln("PACC_TRACE fprintf(stderr, \"accept column registers\\n\");");
  317. c_strln("_pacc_Discard(cur->ev_valid); _pacc_Discard(_x);");
  318. }
  319. static void seq_pre(struct s_node *n) {
  320. frame_start();
  321. c_str("PACC_TRACE fprintf(stderr, \"seq "); c_long(n->id);
  322. c_strln(" @ col %zu?\\n\", _x);");
  323. c_strln("_pacc_Push(_cont);");
  324. c_str("_cont = "); c_long(n->id); c_semi();
  325. c_strln("_status = parsed;");
  326. }
  327. static void seq_mid(__attribute__((unused)) struct s_node *n) {
  328. c_str("if (_status == no_parse)"); c_open();
  329. c_strln("goto contin;");
  330. c_close();
  331. }
  332. static void seq_post(struct s_node *n) {
  333. c_str("case "); c_long(n->id); c_strln(":");
  334. c_strln("_pacc_Pop(_cont);");
  335. c_str("PACC_TRACE fprintf(stderr, \"seq "); c_long(n->id);
  336. c_strln(" @ col %zu => %s\\n\",_x_rule,_status!=no_parse?\"yes\":\"no\");");
  337. c_strln("PACC_TRACE fprintf(stderr, \"col is %zu\\n\", _x);");
  338. frame_end();
  339. }
  340. static void and_pre(struct s_node *n) {
  341. debug_pre("and", n);
  342. savecol();
  343. }
  344. static void and_post(struct s_node *n) {
  345. restcol();
  346. debug_post("and", n);
  347. }
  348. static void not_pre(struct s_node *n) {
  349. debug_pre("not", n);
  350. savecol();
  351. }
  352. static void not_post(struct s_node *n) {
  353. c_strln("_status = (_status == no_parse) ? parsed : no_parse;");
  354. restcol();
  355. debug_post("not", n);
  356. }
  357. static void bind_pre(struct s_node *n) {
  358. /* A binding may only contain a call... */
  359. assert(n->first && n->first->type == call);
  360. /* ... which itself must refer to a rule. */
  361. assert(n->first->first->type == rule);
  362. c_str("/* bind: "); c_str(n->text); c_strln(" */");
  363. debug_pre("bind", n);
  364. /* Save the name bound, and the rule to which it is bound. */
  365. associate(n->text, n->first->first);
  366. associating = 1;
  367. c_str("PACC_TRACE fprintf(stderr, \"will bind "); c_str(n->text);
  368. c_str(" @ rule "); c_long(n->first->first->id);
  369. c_strln(", col %zd\\n\", _x);");
  370. }
  371. static void bind_post(struct s_node *n) {
  372. associating = 0;
  373. c_str("/* end bind: "); c_str(n->text); c_strln(" */");
  374. }
  375. static void declarations(struct s_node *n) {
  376. int i;
  377. struct s_node *p;
  378. for (p = n->first; p; p = p->next) {
  379. if (p->type == coords) continue;
  380. assert(p->type == ident);
  381. /* Search for the name. Start from the end, so scopes nest. */
  382. for (i = a_ptr - 1; i >= 0; --i)
  383. if (a_stack[i].value && strcmp(a_stack[i].name, p->text) == 0)
  384. break;
  385. /* It is not an error if we have a name without a binding: the
  386. * parser will pick out names like "printf" from the code. */
  387. if (i < 0)
  388. continue;
  389. assert(a_stack[i].value->type == rule);
  390. assert(a_stack[i].value->first->type == type);
  391. c_str("/* i is "); c_int(i); c_str(", type is ");
  392. c_str(a_stack[i].value->first->text); c_strln(" */");
  393. c_str(a_stack[i].value->first->text); c_str(" ");
  394. c_str(a_stack[i].name);c_semi();
  395. }
  396. }
  397. static void bindings(struct s_node *n) {
  398. struct s_node *p;
  399. for (p = n->first; p; p = p->next) {
  400. int i, p0, p1;
  401. if (p->type == coords) continue;
  402. assert(p->type == ident);
  403. /*
  404. for (i = 0; i < a_ptr; ++i)
  405. fprintf(stderr, "var %s @ pos %d\n", a_stack[i].name, i);
  406. */
  407. for (i = a_ptr - 1; i >= 0; --i)
  408. if (a_stack[i].name && strcmp(a_stack[i].name, p->text) == 0)
  409. break;
  410. if (i < 0)
  411. continue;
  412. /* XXX this is ugly. having introduced <frame> markers, we need to
  413. * calculate a separate position on our name stack, and in
  414. * evlis. this makes dummy bindings a bit stupid, since they
  415. * were invented to keep the two in step.
  416. */
  417. p0 = p1 = i;
  418. #if 0
  419. for ( ; i >= 0; --i)
  420. if (!a_stack[i].value && a_stack[i].name &&
  421. strcmp(a_stack[i].name, "<frame>") == 0)
  422. --p1;
  423. #endif
  424. c_str("_pos = "); c_int(p1); c_semi();
  425. c_str("PACC_TRACE fprintf(stderr, \"binding of "); c_str(p->text);
  426. c_strln(": pos %zu holds col %zu\\n\", _pos, _pacc_p->evlis[_pos].col);");
  427. c_str("PACC_TRACE fprintf(stderr, \"bind "); c_str(p->text);
  428. c_str(" to r"); c_long(a_stack[p0].value->id);
  429. c_strln(" @ c%zu\\n\", _pacc_p->evlis[_pos].col);");
  430. c_str("cur = _pacc_result(_pacc, _pacc_p->evlis[_pos].col, ");
  431. c_long(a_stack[p0].value->id); c_strln(");");
  432. c_str("if ((cur->rule & 3) != evaluated)"); c_open();
  433. c_strln("_pacc_Push(_x); _pacc_Push(_cont);");
  434. c_str("_cont = "); c_long(p->id); c_semi();
  435. c_strln("_pacc_ev_i = 0; goto eval_loop;");
  436. c_str("case "); c_long(p->id); c_strln(":");
  437. c_strln("_pacc_Pop(_cont); _pacc_Pop(_x);");
  438. c_close();
  439. c_str(p->text);
  440. c_str(" = cur->value.u"); c_int(rule_u(a_stack[p0].value));
  441. c_semi();
  442. c_str("PACC_TRACE fprintf(stderr, \"bound "); c_str(p->text);
  443. c_str(" to r"); c_long(a_stack[p0].value->id);
  444. c_strln(" @ c%zu ==> \" TYPE_PRINTF \"\\n\", _pacc_p->evlis[_pos].col, cur->value.u0);");
  445. }
  446. }
  447. static void emit_expr(struct s_node *n) {
  448. debug_pre("expr", n);
  449. /* When we encounter an expression whilst parsing, simply record the
  450. * expression's id. This will become the new state when we evaluate.
  451. */
  452. c_strln("assert(cur->expr_id == 0);");
  453. c_str("cur->expr_id = "); c_long(n->id); c_semi();
  454. /* When evaluating, we need to evaluate the expression! */
  455. c_str("case "); c_long(n->id); c_strln(":");
  456. c_str("if (_evaling)"); c_open();
  457. c_strln("struct pacc_mid *_pacc_p;"); /* parent */
  458. declarations(n);
  459. c_str("PACC_TRACE fprintf(stderr, \""); c_long(n->id);
  460. c_strln(": evaluating\\n\");");
  461. c_str("_pacc_p = cur = _pacc_result(_pacc, _x, "); c_int(cur_rule->id);
  462. c_strln(");");
  463. bindings(n);
  464. c_strln("cur = _pacc_p;");
  465. c_str("cur->value.u"); c_int(rule_u(cur_rule));
  466. c_code(n, assign);
  467. c_str("PACC_TRACE fprintf(stderr, \"stash \" TYPE_PRINTF \" to (%zu, ");
  468. c_int(cur_rule->id); c_strln(")\\n\", cur->value.u0, _x);");
  469. c_strln("goto _pacc_expr_done;");
  470. c_close();
  471. }
  472. static void guard_pre(struct s_node *n) {
  473. debug_pre("guard", n);
  474. c_str("/* "); c_long(n->id); c_strln(": guard_pre() */");
  475. c_open();
  476. c_strln("struct pacc_mid *_pacc_p;"); /* parent */
  477. declarations(n);
  478. c_strln("_pacc_p = cur; _evaling = 1;");
  479. bindings(n);
  480. c_strln("cur = _pacc_p; _evaling = 0;");
  481. }
  482. /* obviously, the tricky part of a guard is the bindings! */
  483. static void guard_post(struct s_node *n) {
  484. /* XXX doesn't work, why not?
  485. c_strln("if (!"); c_code(n); c_strln(") status = no_parse;");
  486. */
  487. c_strln("_status = ("); c_code(n, no_assign);
  488. c_strln(") ? parsed : no_parse;");
  489. debug_post("guard", n);
  490. c_close();
  491. }
  492. static void emit_call(struct s_node *n) {
  493. /* The number of bindings is equal to the number of rule calls -
  494. * this is assumed XXX where?, so save dummy "binding" if we're not
  495. * binding.
  496. */
  497. if (!associating) associate(0, 0);
  498. c_str("_pacc_save_core("); c_long(n->first->id); c_strln(", _x);");
  499. c_strln("_pacc_Push(_x_rule);"); /* XXX this is not callee saves */
  500. c_strln("_pacc_Push(_cont);");
  501. c_str("_cont = "); c_long(n->id); c_semi();
  502. c_str("_st = "); c_long(n->first->id); c_semi();
  503. c_str("/* call "); c_str(n->text); c_strln(" */");
  504. c_strln("goto top;");
  505. c_str("case "); c_long(n->id); c_str(": /* return from ");
  506. c_str(n->text); c_strln(" */");
  507. c_strln("_pacc_Pop(_cont);");
  508. c_strln("_status = cur->rule & 3;");
  509. c_strln("_x = cur->remainder;");
  510. c_strln("_pacc_Pop(_x_rule);");
  511. c_str("cur = _pacc_result(_pacc, _x_rule, "); c_int(cur_rule->id);
  512. c_strln(");");
  513. }
  514. static void alt_pre(struct s_node *n) {
  515. debug_pre("alt", n);
  516. c_strln("_pacc_Push(_cont);");
  517. c_str("_cont = "); c_long(n->id); c_semi();
  518. savecol();
  519. }
  520. static void alt_mid(struct s_node *n) {
  521. c_str("PACC_TRACE fprintf(stderr, \"alt "); c_long(n->id);
  522. c_strln(" @ col %zu => %s\\n\", _x, _status!=no_parse?\"yes\":\"no\");");
  523. c_str("if (_status != no_parse)"); c_open();
  524. accept_col();
  525. c_strln("goto contin;");
  526. c_close();
  527. restcol();
  528. savecol();
  529. c_strln("PACC_TRACE fprintf(stderr, \"col restored to %zu\\n\", _x);");
  530. c_str("PACC_TRACE fprintf(stderr, \"alt "); c_long(n->id);
  531. c_strln(" @ col %zu? (next alternative)\\n\", _x);");
  532. }
  533. static void alt_post(struct s_node *n) {
  534. c_str("if (_status == no_parse)"); c_open();
  535. restcol();
  536. c_close(); c_str("else"); c_open();
  537. accept_col();
  538. c_close();
  539. c_str("case "); c_long(n->id); c_strln(":");
  540. c_strln("_pacc_Pop(_cont);");
  541. c_str("PACC_TRACE fprintf(stderr, \"alt "); c_long(n->id);
  542. c_strln(" @ col %zu => %s\\n\", _x, _status!=no_parse?\"yes\":\"no\");");
  543. c_strln("PACC_TRACE fprintf(stderr, \"col is %zu\\n\", _x);");
  544. }
  545. static void cclass_pre(struct s_node *n) {
  546. debug_pre("cclass", n);
  547. c_str("if (_x < _pacc->input_length)"); c_open();
  548. c_strln("_pacc_any_i = _pacc_utf8_char(_pacc->string+_x, _pacc->input_length - _x, &_pacc_utf_cp);");
  549. c_strln("if (!_pacc_any_i) pacc_panic(\"invalid UTF-8 input\");");
  550. c_str("if (");
  551. if (n->text[0] == '^') c_str("!(");
  552. }
  553. static void emit_cceq(struct s_node *n) {
  554. c_str("_pacc_utf_cp=="); c_int(n->number);
  555. if (n->next) c_str(" || ");
  556. }
  557. static void emit_ccge(struct s_node *n) {
  558. assert(n->next && n->next->type == ccle);
  559. c_str("(_pacc_utf_cp>="); c_int(n->number); c_str("&&");
  560. }
  561. static void emit_ccle(struct s_node *n) {
  562. c_str("_pacc_utf_cp<="); c_int(n->number); c_str(")");
  563. if (n->next) c_str(" || ");
  564. }
  565. static void cclass_post(struct s_node *n) {
  566. char *esc, *p, *q;
  567. /* escape " characters in the string */
  568. esc = realloc(0, 2 * strlen(n->text) + 1);
  569. if (!esc) nomem();
  570. for (p = n->text, q = esc; *p; ++p, ++q) {
  571. if (*p == '\"')
  572. *q++ = '\\';
  573. *q = *p;
  574. }
  575. *q = '\0';
  576. if (n->text[0] == '^') c_str(")");
  577. c_str(")"); c_open();
  578. c_strln("_status = parsed;");
  579. c_strln("_x += _pacc_any_i;");
  580. c_close(); c_str("else"); c_open();
  581. //error(n->text, 1);
  582. c_str("_pacc_error(_pacc, \".["); c_str(esc); c_strln("]\", _x);");
  583. c_strln("_status = no_parse;");
  584. c_close(); c_close();
  585. c_str("else"); c_open();
  586. c_str("_pacc_error(_pacc, \".["); c_str(esc); c_strln("]\", _x);");
  587. c_strln("_status = no_parse;");
  588. c_close();
  589. free(esc);
  590. debug_post("cclass", n);
  591. }
  592. static void rep_pre(__attribute__((unused)) struct s_node *n) {
  593. int sugar = 1;
  594. assert(!sugar);
  595. }
  596. static void (*pre[s_type_max])(struct s_node *);
  597. static void (*mid[s_type_max])(struct s_node *);
  598. static void (*post[s_type_max])(struct s_node *);
  599. static void node(struct s_node *n) {
  600. struct s_node *p;
  601. if (pre[n->type]) pre[n->type](n);
  602. /* XXX could optimize with seq_last() etc. I think */
  603. if (s_has_children(n->type))
  604. for (p = n->first; p; p = p->next) {
  605. node(p);
  606. if (p->next)
  607. if (mid[n->type]) mid[n->type](n);
  608. }
  609. if (post[n->type]) post[n->type](n);
  610. }
  611. void emit(struct s_node *g) {
  612. pre[grammar] = grammar_pre; pre[rule] = rule_pre;
  613. pre[alt] = alt_pre; pre[seq] = seq_pre;
  614. pre[and] = and_pre; pre[not] = not_pre;
  615. pre[bind] = bind_pre; pre[expr] = emit_expr;
  616. pre[guard] = guard_pre;
  617. pre[call] = emit_call; pre[lit] = literal; pre[any] = any_emit;
  618. pre[cclass] = cclass_pre; pre[cceq] = emit_cceq;
  619. pre[ccge] = emit_ccge; pre[ccle] = emit_ccle;
  620. pre[rep] = rep_pre;
  621. mid[alt] = alt_mid; mid[seq] = seq_mid;
  622. post[grammar] = grammar_post; post[rule] = rule_post;
  623. post[alt] = alt_post; post[seq] = seq_post;
  624. post[and] = and_post; post[not] = not_post;
  625. post[bind] = bind_post;
  626. post[guard] = guard_post;
  627. post[cclass] = cclass_post;
  628. node(g);
  629. fflush(stdout);
  630. }
  631. static void h_lines(char *yy, char *type) {
  632. printf("extern int %s_trace;\n", yy);
  633. printf("extern struct pacc_parser *%s_new(void);\n", yy);
  634. printf("extern void %s_input(struct pacc_parser *, char *, size_t l);\n", yy);
  635. printf("extern void %s_destroy(struct pacc_parser *);\n", yy);
  636. printf("extern int %s_parse(struct pacc_parser *);\n", yy);
  637. printf("extern %s %s_result(struct pacc_parser *);\n", type, yy);
  638. printf("extern char *%s_error(struct pacc_parser *);\n", yy);
  639. printf("extern char *%s_pos(struct pacc_parser *, const char *);\n", yy);
  640. printf("extern int %s_wrap(const char *, char *, size_t, %s *result);\n", yy, type);
  641. }
  642. void header(struct s_node *g) {
  643. size_t l;
  644. char *newname;
  645. char *yy = g->text;
  646. char *preamble = g->first->text;
  647. char *type = g->first->next->first->text;
  648. c_defines();
  649. if (preamble) puts(preamble);
  650. printf("#include <sys/types.h>\n"); /* for off_t */
  651. printf("struct pacc_parser;\n");
  652. h_lines(yy, type);
  653. if (arg_feed()) {
  654. /* XXX cut'n'paste from cook.c */
  655. l = strlen(yy) + strlen("_feed") + 1;
  656. newname = realloc(0, l);
  657. if (!newname) nomem();
  658. strcpy(newname, g->text);
  659. strcat(newname, "_feed");
  660. h_lines(newname, type);
  661. free(newname);
  662. }
  663. fflush(stdout);
  664. }
  665. /* end. */