sugar.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. /* We need _BSD_SOURCE for strdup() */
  2. #define _BSD_SOURCE 1
  3. /* except that now it's called _DEFAULT_SOURCE */
  4. #define _DEFAULT_SOURCE 1
  5. #include <assert.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include "error.h"
  10. #include "syntax.h"
  11. /* We now have a generic "walk" function that handles all current
  12. * transforms. I used to think that at this point I would implement walk
  13. * with an explicit stack, rather than recursion. Now I wonder why? This
  14. * is the compiler; not the emitted code. It's not worth sacrificing
  15. * clarity to imagined performance.
  16. */
  17. /* When a desugaring creates new rules, it first links it into rules. Then we
  18. * patch that onto the tree. */
  19. static struct s_node *rules = 0;
  20. static void patchrules(struct s_node *g) {
  21. struct s_node *p;
  22. for (p = g->first; p->next; p = p->next)
  23. ;
  24. p->next = rules;
  25. rules = 0;
  26. }
  27. /* XXX only find_type needs this; must be a better way? */
  28. static struct s_node *top;
  29. /*
  30. The deblit (desugar bound literal) transform converts this tree:
  31. lit 115: five
  32. call 114: Word
  33. into this tree:
  34. seq 115:
  35. bind 147: _pacc_bl115
  36. call 114: Word
  37. guard 146: ref_streq(_pacc_bl115,"five")
  38. ident 145: _pacc_bl115
  39. */
  40. static void deblit(
  41. struct s_node *n,
  42. __attribute__((unused)) const char *ign0,
  43. __attribute__((unused)) int ign1
  44. ) {
  45. char *gd, *id;
  46. int l;
  47. struct s_node *p;
  48. /* We need a unique identifier name for this bound literal... */
  49. l = snprintf(0, 0, "_pacc_bl%ld", n->id) + 1;
  50. id = malloc(l);
  51. if (!id) nomem();
  52. snprintf(id, l, "_pacc_bl%ld", n->id);
  53. /* ... and a guard string. */
  54. l = snprintf(0, 0, "ref_streq(%s,\"%s\")", id, n->text) + 1;
  55. gd = malloc(l);
  56. if (!gd) nomem();
  57. snprintf(gd, l, "ref_streq(%s,\"%s\")", id, n->text);
  58. p = s_text(ident, id);
  59. p = s_both(guard, gd, p);
  60. s_retype(bind, n);
  61. free(n->text);
  62. n->text = id;
  63. p->next = n->next;
  64. n->next = p;
  65. }
  66. static int isblit(struct s_node *n) {
  67. return n->type == lit && n->first;
  68. }
  69. /* XXX this is pretty grody, and kindof in the wrong place */
  70. char *find_type(char *name) {
  71. struct s_node *p;
  72. for (p = top->first->next; p; p = p->next)
  73. if (strcmp(p->text, name) == 0)
  74. return p->first->text;
  75. fatal3("rule not found: `", name, "'");
  76. return 0; /* not reached */
  77. }
  78. /*
  79. The dequery (desugar query operator) transform changes
  80. R <- x: Thing? { ... }
  81. Thing :: t <- ...
  82. into
  83. R <- x: ThingOpt { ... }
  84. ThingOpt :: t <- a:Thing -> a / epsilon -> 0
  85. except with different names for the virtual rules. It (mostly) works
  86. because "0" is a member of both numeric and pointer types in C.
  87. */
  88. static void dequery(struct s_node *n, const char *name, int c) {
  89. char *xo, *thing, *typ;
  90. int l;
  91. struct s_node *p, *q, *s;
  92. thing = n->first->first->text;
  93. /* XXX to find the type of the called rule, we need to find the
  94. * rule, and we haven't called resolve() yet. At this point, we
  95. * don't even have a pointer that's of any use to find it! Darn,
  96. * this is going to get messy.
  97. */
  98. typ = find_type(thing);
  99. //fprintf(stderr, "thing is %s :: %s\n", thing, typ);
  100. /* Create the name for ThingOpt. */
  101. l = snprintf(0, 0, "%s:?:%d", name, c) + 1;
  102. xo = malloc(l);
  103. if (!xo) nomem();
  104. snprintf(xo, l, "%s:?:%d", name, c);
  105. /* generate the new rule */
  106. p = s_kid(seq, s_text(expr, "0")); s = p;
  107. p = s_both(expr, "a", s_text(ident, "a"));
  108. p = cons(s_both(bind, "a", s_text(call, thing)), p); q = p;
  109. p = s_new(seq); p->first = q; p->next = s; q = p;
  110. p = s_new(alt); p->first = q; q = p;
  111. p = s_text(type, typ); p->next = q; q = p;
  112. p = s_text(rule, xo); p->first = q;
  113. p->next = rules; rules = p;
  114. /* replace "bind / rep / call" with "bind / call" */
  115. n->first->text = xo;
  116. n->first->type = call;
  117. free(n->first->first);
  118. n->first->first = 0;
  119. }
  120. /*
  121. The debind (desugar binding) transform creates a new rule for the right
  122. hand side of a non-rule binding.
  123. S <- r:("y" "z") { use r }
  124. becomes, effectively
  125. S <- r:S_r { use r }
  126. S_r <- "y" "z"
  127. except that the name of the invented rule contains a prohibited
  128. character instead of the underscore.
  129. */
  130. static void debind(struct s_node *n, const char *name, int c) {
  131. char *x;
  132. int l;
  133. struct s_node *p, *r;
  134. if (n->first && n->first->type == rep &&
  135. n->first->text && strcmp(n->first->text, ",1") == 0 &&
  136. n->first->first && n->first->first->type == call) {
  137. dequery(n, name, c);
  138. return;
  139. }
  140. l = snprintf(0, 0, "%s:%s:%d", name, n->text, c) + 1;
  141. x = malloc(l);
  142. if (!x) nomem();
  143. snprintf(x, l, "%s:%s:%d", name, n->text, c);
  144. p = s_text(expr, "ref()");
  145. p = s_kid(seq, cons(n->first, p));
  146. p = cons(s_text(type, "ref_t"), p);
  147. r = s_both(rule, x, p);
  148. n->first = s_text(call, x);
  149. r->next = rules;
  150. rules = r;
  151. }
  152. static int isbind(struct s_node *n) {
  153. return n->type == bind && n->first->type != call;
  154. }
  155. /*
  156. The derep (desugar repetitions) transform changes
  157. Y <- "y" *
  158. into
  159. Y <- Y_star
  160. Y_star <- Y_one Y_star / epsilon
  161. Y_one <- "y"
  162. except with different names for the virtual rules.
  163. */
  164. static void derep(struct s_node *n, const char *name, int c) {
  165. char *x0, *x1, *xx;
  166. int l;
  167. struct s_node *p, *q, *s;
  168. /* Create names for two new rules. User-named rules can never
  169. * contain a colon, so we will use that in rules generated by
  170. * desugaring to ensure uniqueness. */
  171. l = snprintf(0, 0, "%s:.:%d", name, c) + 1;
  172. x0 = malloc(l);
  173. if (!x0) nomem();
  174. snprintf(x0, l, "%s:.:%d", name, c);
  175. //fprintf(stderr, "!!! got a rep - nm is %s !!!\n", x0);
  176. l = strlen(name) + 1;
  177. x0[l] = '1'; x1 = strdup(x0); if (!x1) nomem();
  178. x0[l] = '*'; xx = strdup(x0); if (!xx) nomem();
  179. free(x0);
  180. p = s_text(type, "void"); p->next = n->first; q = p;
  181. p = s_text(rule, x1); p->first = q;
  182. p->next = rules; rules = p;
  183. if (!n->text || *n->text == '\0') {
  184. /* generate a rule that matches zero or more of the rep */
  185. p = s_new(seq); s = p;
  186. p = s_text(call, xx); q = p;
  187. p = s_text(call, x1); p->next = q; q = p;
  188. p = s_new(seq); p->first = q; p->next = s; q = p;
  189. p = s_new(alt); p->first = q; q = p;
  190. p = s_text(type, "void"); p->next = q; q = p;
  191. p = s_text(rule, xx); p->first = q;
  192. p->next = rules; rules = p;
  193. } else if (strcmp(n->text, "1,") == 0) {
  194. /* generate a rule that matches one or more of the rep */
  195. p = s_text(call, x1); q = p;
  196. p = s_new(seq); p->first = q; s = p;
  197. p = s_text(call, xx); q = p;
  198. p = s_text(call, x1); p->next = q; q = p;
  199. p = s_new(seq); p->first = q; p->next = s; q = p;
  200. p = s_new(alt); p->first = q; q = p;
  201. p = s_text(type, "void"); p->next = q; q = p;
  202. p = s_text(rule, xx); p->first = q;
  203. p->next = rules; rules = p;
  204. } else if (strcmp(n->text, ",1") == 0) {
  205. /* generate a rule that matches zero or one of the rep */
  206. p = s_new(seq); s = p;
  207. p = s_text(call, x1); q = p;
  208. p = s_new(seq); p->first = q; p->next = s; q = p;
  209. p = s_new(alt); p->first = q; q = p;
  210. p = s_text(type, "void"); p->next = q; q = p;
  211. p = s_text(rule, xx); p->first = q;
  212. p->next = rules; rules = p;
  213. }
  214. n->text = xx;
  215. n->type = call;
  216. n->first = 0;
  217. }
  218. static int isrep(struct s_node *n) {
  219. return n->type == rep;
  220. }
  221. /* default expression: convert
  222. * R <- f:Five -> f / Six
  223. * into
  224. * R <- f:Five -> f / s:Six -> s
  225. */
  226. static void dedefexpr1(struct s_node *s, char *name, char *type) {
  227. assert(s && s->type == seq);
  228. /* is the only thing in this seq a call? */
  229. if (s->first->type == call && ! s->first->next) {
  230. int l;
  231. char *id;
  232. struct s_node *p, *q;
  233. if (strcmp(type, find_type(s->first->text)) != 0)
  234. fatal9("type mismatch: `", name, " :: ", type, "' cannot call `",
  235. s->first->text, " :: ", find_type(s->first->text), "'");
  236. /* name for default expr identifier */
  237. l = snprintf(0, 0, "_pacc_de%ld", s->id) + 1;
  238. id = malloc(l);
  239. if (!id) nomem();
  240. snprintf(id, l, "_pacc_de%ld", s->id);
  241. /* XXX this expr node has no coords */
  242. p = s_both(expr, id, s_text(ident, id)); q = p;
  243. p = s_both(bind, id, s->first);
  244. p->next = q;
  245. s->first = p;
  246. }
  247. }
  248. /* XXX this would be better done with a loop over just the rules... */
  249. static void dedefexpr(struct s_node *r, __attribute__((unused)) int ign) {
  250. assert(r && r->type == rule);
  251. assert(r->first && r->first->type == type);
  252. if (strcmp(r->first->text, "void") == 0)
  253. return;
  254. if (r->first->next->type == alt) {
  255. struct s_node *s;
  256. /* loop over the seq children of the top-level alt */
  257. for (s = r->first->next->first; s; s = s->next)
  258. dedefexpr1(s, r->text, r->first->text);
  259. } else if (r->first->next->type == seq)
  260. dedefexpr1(r->first->next, r->text, r->first->text);
  261. }
  262. typedef int pred_fn_t(struct s_node *);
  263. typedef void trans_fn_t(struct s_node *, const char *, int);
  264. static void walk(struct s_node *n, pred_fn_t pred, trans_fn_t transform) {
  265. static char *name;
  266. static int c;
  267. struct s_node *p;
  268. if (n->type == rule) {
  269. name = n->text;
  270. c = 0;
  271. }
  272. if (s_has_children(n->type))
  273. for (p = n->first; p; p = p->next)
  274. if (pred(n))
  275. transform(n, name, c++);
  276. else
  277. walk(p, pred, transform);
  278. }
  279. /* call f for each rule */
  280. typedef void rule_fn_t(struct s_node *, int);
  281. static void for_each_rule(struct s_node *top, rule_fn_t f) {
  282. int n;
  283. struct s_node *p;
  284. assert(top && top->type == grammar);
  285. assert(top->first && top->first->type == preamble);
  286. assert(top->first->next && top->first->next->type == rule);
  287. for (n = 0, p = top->first->next; p; p = p->next, ++n)
  288. f(p, n);
  289. }
  290. void desugar(struct s_node *g) {
  291. top = g; /* XXX for find_type */
  292. walk(g, &isblit, &deblit);
  293. walk(g, &isbind, &debind); patchrules(g);
  294. walk(g, &isrep, &derep); patchrules(g);
  295. for_each_rule(g, &dedefexpr);
  296. }