sugar.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /*
  2. * Copyright 2021
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. * SPDX-License-Identifier: GPL-3.0+
  18. * License-Filename: LICENSE
  19. */
  20. /* We need _BSD_SOURCE for strdup() */
  21. #define _BSD_SOURCE 1
  22. /* except that now it's called _DEFAULT_SOURCE */
  23. #define _DEFAULT_SOURCE 1
  24. #include <assert.h>
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include "error.h"
  29. #include "syntax.h"
  30. /* We now have a generic "walk" function that handles all current
  31. * transforms. I used to think that at this point I would implement walk
  32. * with an explicit stack, rather than recursion. Now I wonder why? This
  33. * is the compiler; not the emitted code. It's not worth sacrificing
  34. * clarity to imagined performance.
  35. */
  36. /* When a desugaring creates new rules, it first links it into rules. Then we
  37. * patch that onto the tree. */
  38. static struct s_node *rules = 0;
  39. static void patchrules(struct s_node *g) {
  40. struct s_node *p;
  41. for (p = g->first; p->next; p = p->next)
  42. ;
  43. p->next = rules;
  44. rules = 0;
  45. }
  46. /* XXX only find_type needs this; must be a better way? */
  47. static struct s_node *top;
  48. /*
  49. The deblit (desugar bound literal) transform converts this tree:
  50. lit 115: five
  51. call 114: Word
  52. into this tree:
  53. seq 115:
  54. bind 147: _pacc_bl115
  55. call 114: Word
  56. guard 146: ref_streq(_pacc_bl115,"five")
  57. ident 145: _pacc_bl115
  58. */
  59. static void deblit(
  60. struct s_node *n,
  61. __attribute__((unused)) const char *ign0,
  62. __attribute__((unused)) int ign1
  63. ) {
  64. char *gd, *id;
  65. int l;
  66. struct s_node *p;
  67. /* We need a unique identifier name for this bound literal... */
  68. l = snprintf(0, 0, "_pacc_bl%ld", n->id) + 1;
  69. id = malloc(l);
  70. if (!id) nomem();
  71. snprintf(id, l, "_pacc_bl%ld", n->id);
  72. /* ... and a guard string. */
  73. l = snprintf(0, 0, "ref_streq(%s,\"%s\")", id, n->text) + 1;
  74. gd = malloc(l);
  75. if (!gd) nomem();
  76. snprintf(gd, l, "ref_streq(%s,\"%s\")", id, n->text);
  77. p = s_text(ident, id);
  78. p = s_both(guard, gd, p);
  79. s_retype(bind, n);
  80. free(n->text);
  81. n->text = id;
  82. p->next = n->next;
  83. n->next = p;
  84. }
  85. static int isblit(struct s_node *n) {
  86. return n->type == lit && n->first;
  87. }
  88. /* XXX this is pretty grody, and kindof in the wrong place */
  89. char *find_type(char *name) {
  90. struct s_node *p;
  91. for (p = top->first->next; p; p = p->next)
  92. if (strcmp(p->text, name) == 0)
  93. return p->first->text;
  94. fatal3("rule not found: `", name, "'");
  95. return 0; /* not reached */
  96. }
  97. /*
  98. The dequery (desugar query operator) transform changes
  99. R <- x: Thing? { ... }
  100. Thing :: t <- ...
  101. into
  102. R <- x: ThingOpt { ... }
  103. ThingOpt :: t <- a:Thing -> a / epsilon -> 0
  104. except with different names for the virtual rules. It (mostly) works
  105. because "0" is a member of both numeric and pointer types in C.
  106. */
  107. static void dequery(struct s_node *n, const char *name, int c) {
  108. char *xo, *thing, *typ;
  109. int l;
  110. struct s_node *p, *q, *s;
  111. thing = n->first->first->text;
  112. /* XXX to find the type of the called rule, we need to find the
  113. * rule, and we haven't called resolve() yet. At this point, we
  114. * don't even have a pointer that's of any use to find it! Darn,
  115. * this is going to get messy.
  116. */
  117. typ = find_type(thing);
  118. //fprintf(stderr, "thing is %s :: %s\n", thing, typ);
  119. /* Create the name for ThingOpt. */
  120. l = snprintf(0, 0, "%s:?:%d", name, c) + 1;
  121. xo = malloc(l);
  122. if (!xo) nomem();
  123. snprintf(xo, l, "%s:?:%d", name, c);
  124. /* generate the new rule */
  125. p = s_kid(seq, s_text(expr, "0")); s = p;
  126. p = s_both(expr, "a", s_text(ident, "a"));
  127. p = cons(s_both(bind, "a", s_text(call, thing)), p); q = p;
  128. p = s_new(seq); p->first = q; p->next = s; q = p;
  129. p = s_new(alt); p->first = q; q = p;
  130. p = s_text(type, typ); p->next = q; q = p;
  131. p = s_text(rule, xo); p->first = q;
  132. p->next = rules; rules = p;
  133. /* replace "bind / rep / call" with "bind / call" */
  134. n->first->text = xo;
  135. n->first->type = call;
  136. free(n->first->first);
  137. n->first->first = 0;
  138. }
  139. /*
  140. The debind (desugar binding) transform creates a new rule for the right
  141. hand side of a non-rule binding.
  142. S <- r:("y" "z") { use r }
  143. becomes, effectively
  144. S <- r:S_r { use r }
  145. S_r <- "y" "z"
  146. except that the name of the invented rule contains a prohibited
  147. character instead of the underscore.
  148. */
  149. static void debind(struct s_node *n, const char *name, int c) {
  150. char *x;
  151. int l;
  152. struct s_node *p, *r;
  153. if (n->first && n->first->type == rep &&
  154. n->first->text && strcmp(n->first->text, ",1") == 0 &&
  155. n->first->first && n->first->first->type == call) {
  156. dequery(n, name, c);
  157. return;
  158. }
  159. l = snprintf(0, 0, "%s:%s:%d", name, n->text, c) + 1;
  160. x = malloc(l);
  161. if (!x) nomem();
  162. snprintf(x, l, "%s:%s:%d", name, n->text, c);
  163. p = s_text(expr, "ref()");
  164. p = s_kid(seq, cons(n->first, p));
  165. p = cons(s_text(type, "ref_t"), p);
  166. r = s_both(rule, x, p);
  167. n->first = s_text(call, x);
  168. r->next = rules;
  169. rules = r;
  170. }
  171. static int isbind(struct s_node *n) {
  172. return n->type == bind && n->first->type != call;
  173. }
  174. /*
  175. The derep (desugar repetitions) transform changes
  176. Y <- "y" *
  177. into
  178. Y <- Y_star
  179. Y_star <- Y_one Y_star / epsilon
  180. Y_one <- "y"
  181. except with different names for the virtual rules.
  182. */
  183. static void derep(struct s_node *n, const char *name, int c) {
  184. char *x0, *x1, *xx;
  185. int l;
  186. struct s_node *p, *q, *s;
  187. /* Create names for two new rules. User-named rules can never
  188. * contain a colon, so we will use that in rules generated by
  189. * desugaring to ensure uniqueness. */
  190. l = snprintf(0, 0, "%s:.:%d", name, c) + 1;
  191. x0 = malloc(l);
  192. if (!x0) nomem();
  193. snprintf(x0, l, "%s:.:%d", name, c);
  194. //fprintf(stderr, "!!! got a rep - nm is %s !!!\n", x0);
  195. l = strlen(name) + 1;
  196. x0[l] = '1'; x1 = strdup(x0); if (!x1) nomem();
  197. x0[l] = '*'; xx = strdup(x0); if (!xx) nomem();
  198. free(x0);
  199. p = s_text(type, "void"); p->next = n->first; q = p;
  200. p = s_text(rule, x1); p->first = q;
  201. p->next = rules; rules = p;
  202. if (!n->text || *n->text == '\0') {
  203. /* generate a rule that matches zero or more of the rep */
  204. p = s_new(seq); s = p;
  205. p = s_text(call, xx); q = p;
  206. p = s_text(call, x1); p->next = q; q = p;
  207. p = s_new(seq); p->first = q; p->next = s; q = p;
  208. p = s_new(alt); p->first = q; q = p;
  209. p = s_text(type, "void"); p->next = q; q = p;
  210. p = s_text(rule, xx); p->first = q;
  211. p->next = rules; rules = p;
  212. } else if (strcmp(n->text, "1,") == 0) {
  213. /* generate a rule that matches one or more of the rep */
  214. p = s_text(call, x1); q = p;
  215. p = s_new(seq); p->first = q; s = p;
  216. p = s_text(call, xx); q = p;
  217. p = s_text(call, x1); p->next = q; q = p;
  218. p = s_new(seq); p->first = q; p->next = s; q = p;
  219. p = s_new(alt); p->first = q; q = p;
  220. p = s_text(type, "void"); p->next = q; q = p;
  221. p = s_text(rule, xx); p->first = q;
  222. p->next = rules; rules = p;
  223. } else if (strcmp(n->text, ",1") == 0) {
  224. /* generate a rule that matches zero or one of the rep */
  225. p = s_new(seq); s = p;
  226. p = s_text(call, x1); q = p;
  227. p = s_new(seq); p->first = q; p->next = s; q = p;
  228. p = s_new(alt); p->first = q; q = p;
  229. p = s_text(type, "void"); p->next = q; q = p;
  230. p = s_text(rule, xx); p->first = q;
  231. p->next = rules; rules = p;
  232. }
  233. n->text = xx;
  234. n->type = call;
  235. n->first = 0;
  236. }
  237. static int isrep(struct s_node *n) {
  238. return n->type == rep;
  239. }
  240. /* default expression: convert
  241. * R <- f:Five -> f / Six
  242. * into
  243. * R <- f:Five -> f / s:Six -> s
  244. */
  245. static void dedefexpr1(struct s_node *s, char *name, char *type) {
  246. assert(s && s->type == seq);
  247. /* is the only thing in this seq a call? */
  248. if (s->first->type == call && ! s->first->next) {
  249. int l;
  250. char *id;
  251. struct s_node *p, *q;
  252. if (strcmp(type, find_type(s->first->text)) != 0)
  253. fatal9("type mismatch: `", name, " :: ", type, "' cannot call `",
  254. s->first->text, " :: ", find_type(s->first->text), "'");
  255. /* name for default expr identifier */
  256. l = snprintf(0, 0, "_pacc_de%ld", s->id) + 1;
  257. id = malloc(l);
  258. if (!id) nomem();
  259. snprintf(id, l, "_pacc_de%ld", s->id);
  260. /* XXX this expr node has no coords */
  261. p = s_both(expr, id, s_text(ident, id)); q = p;
  262. p = s_both(bind, id, s->first);
  263. p->next = q;
  264. s->first = p;
  265. }
  266. }
  267. /* XXX this would be better done with a loop over just the rules... */
  268. static void dedefexpr(struct s_node *r, __attribute__((unused)) int ign) {
  269. assert(r && r->type == rule);
  270. assert(r->first && r->first->type == type);
  271. if (strcmp(r->first->text, "void") == 0)
  272. return;
  273. if (r->first->next->type == alt) {
  274. struct s_node *s;
  275. /* loop over the seq children of the top-level alt */
  276. for (s = r->first->next->first; s; s = s->next)
  277. dedefexpr1(s, r->text, r->first->text);
  278. } else if (r->first->next->type == seq)
  279. dedefexpr1(r->first->next, r->text, r->first->text);
  280. }
  281. typedef int pred_fn_t(struct s_node *);
  282. typedef void trans_fn_t(struct s_node *, const char *, int);
  283. static void walk(struct s_node *n, pred_fn_t pred, trans_fn_t transform) {
  284. static char *name;
  285. static int c;
  286. struct s_node *p;
  287. if (n->type == rule) {
  288. name = n->text;
  289. c = 0;
  290. }
  291. if (s_has_children(n->type))
  292. for (p = n->first; p; p = p->next)
  293. if (pred(n))
  294. transform(n, name, c++);
  295. else
  296. walk(p, pred, transform);
  297. }
  298. /* call f for each rule */
  299. typedef void rule_fn_t(struct s_node *, int);
  300. static void for_each_rule(struct s_node *top, rule_fn_t f) {
  301. int n;
  302. struct s_node *p;
  303. assert(top && top->type == grammar);
  304. assert(top->first && top->first->type == preamble);
  305. assert(top->first->next && top->first->next->type == rule);
  306. for (n = 0, p = top->first->next; p; p = p->next, ++n)
  307. f(p, n);
  308. }
  309. void desugar(struct s_node *g) {
  310. top = g; /* XXX for find_type */
  311. walk(g, &isblit, &deblit);
  312. walk(g, &isbind, &debind); patchrules(g);
  313. walk(g, &isrep, &derep); patchrules(g);
  314. for_each_rule(g, &dedefexpr);
  315. }
  316. /* end. */