lalr.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. /* Compute look-ahead criteria for bison,
  2. Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
  3. This file is part of Bison, the GNU Compiler Compiler.
  4. Bison is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 1, or (at your option)
  7. any later version.
  8. Bison is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with Bison; see the file COPYING. If not, write to
  14. the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
  15. /* Compute how to make the finite state machine deterministic;
  16. find which rules need lookahead in each state, and which lookahead tokens they accept.
  17. lalr(), the entry point, builds these data structures:
  18. goto_map, from_state and to_state
  19. record each shift transition which accepts a variable (a nonterminal).
  20. ngotos is the number of such transitions.
  21. from_state[t] is the state number which a transition leads from
  22. and to_state[t] is the state number it leads to.
  23. All the transitions that accept a particular variable are grouped together and
  24. goto_map[i - ntokens] is the index in from_state and to_state of the first of them.
  25. consistent[s] is nonzero if no lookahead is needed to decide what to do in state s.
  26. LAruleno is a vector which records the rules that need lookahead in various states.
  27. The elements of LAruleno that apply to state s are those from
  28. lookaheads[s] through lookaheads[s+1]-1.
  29. Each element of LAruleno is a rule number.
  30. If lr is the length of LAruleno, then a number from 0 to lr-1
  31. can specify both a rule and a state where the rule might be applied.
  32. LA is a lr by ntokens matrix of bits.
  33. LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state
  34. when the next token is symbol i.
  35. If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
  36. */
  37. #include <stdio.h>
  38. #include "system.h"
  39. #include "machine.h"
  40. #include "types.h"
  41. #include "state.h"
  42. #include "new.h"
  43. #include "gram.h"
  44. extern short **derives;
  45. extern char *nullable;
  46. int tokensetsize;
  47. short *lookaheads;
  48. short *LAruleno;
  49. unsigned *LA;
  50. short *accessing_symbol;
  51. char *consistent;
  52. core **state_table;
  53. shifts **shift_table;
  54. reductions **reduction_table;
  55. short *goto_map;
  56. short *from_state;
  57. short *to_state;
  58. short **transpose();
  59. void set_state_table();
  60. void set_accessing_symbol();
  61. void set_shift_table();
  62. void set_reduction_table();
  63. void set_maxrhs();
  64. void initialize_LA();
  65. void set_goto_map();
  66. void initialize_F();
  67. void build_relations();
  68. void add_lookback_edge();
  69. void compute_FOLLOWS();
  70. void compute_lookaheads();
  71. void digraph();
  72. void traverse();
  73. extern void toomany();
  74. extern void berror();
  75. static int infinity;
  76. static int maxrhs;
  77. static int ngotos;
  78. static unsigned *F;
  79. static short **includes;
  80. static shorts **lookback;
  81. static short **R;
  82. static short *INDEX;
  83. static short *VERTICES;
  84. static int top;
  85. void
  86. lalr()
  87. {
  88. tokensetsize = WORDSIZE(ntokens);
  89. set_state_table();
  90. set_accessing_symbol();
  91. set_shift_table();
  92. set_reduction_table();
  93. set_maxrhs();
  94. initialize_LA();
  95. set_goto_map();
  96. initialize_F();
  97. build_relations();
  98. compute_FOLLOWS();
  99. compute_lookaheads();
  100. }
  101. void
  102. set_state_table()
  103. {
  104. register core *sp;
  105. state_table = NEW2(nstates, core *);
  106. for (sp = first_state; sp; sp = sp->next)
  107. state_table[sp->number] = sp;
  108. }
  109. void
  110. set_accessing_symbol()
  111. {
  112. register core *sp;
  113. accessing_symbol = NEW2(nstates, short);
  114. for (sp = first_state; sp; sp = sp->next)
  115. accessing_symbol[sp->number] = sp->accessing_symbol;
  116. }
  117. void
  118. set_shift_table()
  119. {
  120. register shifts *sp;
  121. shift_table = NEW2(nstates, shifts *);
  122. for (sp = first_shift; sp; sp = sp->next)
  123. shift_table[sp->number] = sp;
  124. }
  125. void
  126. set_reduction_table()
  127. {
  128. register reductions *rp;
  129. reduction_table = NEW2(nstates, reductions *);
  130. for (rp = first_reduction; rp; rp = rp->next)
  131. reduction_table[rp->number] = rp;
  132. }
  133. void
  134. set_maxrhs()
  135. {
  136. register short *itemp;
  137. register int length;
  138. register int max;
  139. length = 0;
  140. max = 0;
  141. for (itemp = ritem; *itemp; itemp++)
  142. {
  143. if (*itemp > 0)
  144. {
  145. length++;
  146. }
  147. else
  148. {
  149. if (length > max) max = length;
  150. length = 0;
  151. }
  152. }
  153. maxrhs = max;
  154. }
  155. void
  156. initialize_LA()
  157. {
  158. register int i;
  159. register int j;
  160. register int count;
  161. register reductions *rp;
  162. register shifts *sp;
  163. register short *np;
  164. consistent = NEW2(nstates, char);
  165. lookaheads = NEW2(nstates + 1, short);
  166. count = 0;
  167. for (i = 0; i < nstates; i++)
  168. {
  169. register int j;
  170. lookaheads[i] = count;
  171. rp = reduction_table[i];
  172. sp = shift_table[i];
  173. if (rp && (rp->nreds > 1
  174. || (sp && ! ISVAR(accessing_symbol[sp->shifts[0]]))))
  175. count += rp->nreds;
  176. else
  177. consistent[i] = 1;
  178. if (sp)
  179. for (j = 0; j < sp->nshifts; j++)
  180. {
  181. if (accessing_symbol[sp->shifts[j]] == error_token_number)
  182. {
  183. consistent[i] = 0;
  184. break;
  185. }
  186. }
  187. }
  188. lookaheads[nstates] = count;
  189. LA = NEW2(count * tokensetsize, unsigned);
  190. LAruleno = NEW2(count, short);
  191. lookback = NEW2(count, shorts *);
  192. np = LAruleno;
  193. for (i = 0; i < nstates; i++)
  194. {
  195. if (!consistent[i])
  196. {
  197. if (rp = reduction_table[i])
  198. for (j = 0; j < rp->nreds; j++)
  199. *np++ = rp->rules[j];
  200. }
  201. }
  202. }
  203. void
  204. set_goto_map()
  205. {
  206. register shifts *sp;
  207. register int i;
  208. register int symbol;
  209. register int k;
  210. register short *temp_map;
  211. register int state2;
  212. register int state1;
  213. goto_map = NEW2(nvars + 1, short) - ntokens;
  214. temp_map = NEW2(nvars + 1, short) - ntokens;
  215. ngotos = 0;
  216. for (sp = first_shift; sp; sp = sp->next)
  217. {
  218. for (i = sp->nshifts - 1; i >= 0; i--)
  219. {
  220. symbol = accessing_symbol[sp->shifts[i]];
  221. if (ISTOKEN(symbol)) break;
  222. if (ngotos == MAXSHORT)
  223. toomany("gotos");
  224. ngotos++;
  225. goto_map[symbol]++;
  226. }
  227. }
  228. k = 0;
  229. for (i = ntokens; i < nsyms; i++)
  230. {
  231. temp_map[i] = k;
  232. k += goto_map[i];
  233. }
  234. for (i = ntokens; i < nsyms; i++)
  235. goto_map[i] = temp_map[i];
  236. goto_map[nsyms] = ngotos;
  237. temp_map[nsyms] = ngotos;
  238. from_state = NEW2(ngotos, short);
  239. to_state = NEW2(ngotos, short);
  240. for (sp = first_shift; sp; sp = sp->next)
  241. {
  242. state1 = sp->number;
  243. for (i = sp->nshifts - 1; i >= 0; i--)
  244. {
  245. state2 = sp->shifts[i];
  246. symbol = accessing_symbol[state2];
  247. if (ISTOKEN(symbol)) break;
  248. k = temp_map[symbol]++;
  249. from_state[k] = state1;
  250. to_state[k] = state2;
  251. }
  252. }
  253. FREE(temp_map + ntokens);
  254. }
  255. /* Map_goto maps a state/symbol pair into its numeric representation. */
  256. int
  257. map_goto(state, symbol)
  258. int state;
  259. int symbol;
  260. {
  261. register int high;
  262. register int low;
  263. register int middle;
  264. register int s;
  265. low = goto_map[symbol];
  266. high = goto_map[symbol + 1] - 1;
  267. while (low <= high)
  268. {
  269. middle = (low + high) / 2;
  270. s = from_state[middle];
  271. if (s == state)
  272. return (middle);
  273. else if (s < state)
  274. low = middle + 1;
  275. else
  276. high = middle - 1;
  277. }
  278. berror("map_goto");
  279. /* NOTREACHED */
  280. return 0;
  281. }
  282. void
  283. initialize_F()
  284. {
  285. register int i;
  286. register int j;
  287. register int k;
  288. register shifts *sp;
  289. register short *edge;
  290. register unsigned *rowp;
  291. register short *rp;
  292. register short **reads;
  293. register int nedges;
  294. register int stateno;
  295. register int symbol;
  296. register int nwords;
  297. nwords = ngotos * tokensetsize;
  298. F = NEW2(nwords, unsigned);
  299. reads = NEW2(ngotos, short *);
  300. edge = NEW2(ngotos + 1, short);
  301. nedges = 0;
  302. rowp = F;
  303. for (i = 0; i < ngotos; i++)
  304. {
  305. stateno = to_state[i];
  306. sp = shift_table[stateno];
  307. if (sp)
  308. {
  309. k = sp->nshifts;
  310. for (j = 0; j < k; j++)
  311. {
  312. symbol = accessing_symbol[sp->shifts[j]];
  313. if (ISVAR(symbol))
  314. break;
  315. SETBIT(rowp, symbol);
  316. }
  317. for (; j < k; j++)
  318. {
  319. symbol = accessing_symbol[sp->shifts[j]];
  320. if (nullable[symbol])
  321. edge[nedges++] = map_goto(stateno, symbol);
  322. }
  323. if (nedges)
  324. {
  325. reads[i] = rp = NEW2(nedges + 1, short);
  326. for (j = 0; j < nedges; j++)
  327. rp[j] = edge[j];
  328. rp[nedges] = -1;
  329. nedges = 0;
  330. }
  331. }
  332. rowp += tokensetsize;
  333. }
  334. digraph(reads);
  335. for (i = 0; i < ngotos; i++)
  336. {
  337. if (reads[i])
  338. FREE(reads[i]);
  339. }
  340. FREE(reads);
  341. FREE(edge);
  342. }
  343. void
  344. build_relations()
  345. {
  346. register int i;
  347. register int j;
  348. register int k;
  349. register short *rulep;
  350. register short *rp;
  351. register shifts *sp;
  352. register int length;
  353. register int nedges;
  354. register int done;
  355. register int state1;
  356. register int stateno;
  357. register int symbol1;
  358. register int symbol2;
  359. register short *shortp;
  360. register short *edge;
  361. register short *states;
  362. register short **new_includes;
  363. includes = NEW2(ngotos, short *);
  364. edge = NEW2(ngotos + 1, short);
  365. states = NEW2(maxrhs + 1, short);
  366. for (i = 0; i < ngotos; i++)
  367. {
  368. nedges = 0;
  369. state1 = from_state[i];
  370. symbol1 = accessing_symbol[to_state[i]];
  371. for (rulep = derives[symbol1]; *rulep > 0; rulep++)
  372. {
  373. length = 1;
  374. states[0] = state1;
  375. stateno = state1;
  376. for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++)
  377. {
  378. symbol2 = *rp;
  379. sp = shift_table[stateno];
  380. k = sp->nshifts;
  381. for (j = 0; j < k; j++)
  382. {
  383. stateno = sp->shifts[j];
  384. if (accessing_symbol[stateno] == symbol2) break;
  385. }
  386. states[length++] = stateno;
  387. }
  388. if (!consistent[stateno])
  389. add_lookback_edge(stateno, *rulep, i);
  390. length--;
  391. done = 0;
  392. while (!done)
  393. {
  394. done = 1;
  395. rp--;
  396. /* JF added rp>=ritem && I hope to god its right! */
  397. if (rp>=ritem && ISVAR(*rp))
  398. {
  399. stateno = states[--length];
  400. edge[nedges++] = map_goto(stateno, *rp);
  401. if (nullable[*rp]) done = 0;
  402. }
  403. }
  404. }
  405. if (nedges)
  406. {
  407. includes[i] = shortp = NEW2(nedges + 1, short);
  408. for (j = 0; j < nedges; j++)
  409. shortp[j] = edge[j];
  410. shortp[nedges] = -1;
  411. }
  412. }
  413. new_includes = transpose(includes, ngotos);
  414. for (i = 0; i < ngotos; i++)
  415. if (includes[i])
  416. FREE(includes[i]);
  417. FREE(includes);
  418. includes = new_includes;
  419. FREE(edge);
  420. FREE(states);
  421. }
  422. void
  423. add_lookback_edge(stateno, ruleno, gotono)
  424. int stateno;
  425. int ruleno;
  426. int gotono;
  427. {
  428. register int i;
  429. register int k;
  430. register int found;
  431. register shorts *sp;
  432. i = lookaheads[stateno];
  433. k = lookaheads[stateno + 1];
  434. found = 0;
  435. while (!found && i < k)
  436. {
  437. if (LAruleno[i] == ruleno)
  438. found = 1;
  439. else
  440. i++;
  441. }
  442. if (found == 0)
  443. berror("add_lookback_edge");
  444. sp = NEW(shorts);
  445. sp->next = lookback[i];
  446. sp->value = gotono;
  447. lookback[i] = sp;
  448. }
  449. short **
  450. transpose(R, n)
  451. short **R;
  452. int n;
  453. {
  454. register short **new_R;
  455. register short **temp_R;
  456. register short *nedges;
  457. register short *sp;
  458. register int i;
  459. register int k;
  460. nedges = NEW2(n, short);
  461. for (i = 0; i < n; i++)
  462. {
  463. sp = R[i];
  464. if (sp)
  465. {
  466. while (*sp >= 0)
  467. nedges[*sp++]++;
  468. }
  469. }
  470. new_R = NEW2(n, short *);
  471. temp_R = NEW2(n, short *);
  472. for (i = 0; i < n; i++)
  473. {
  474. k = nedges[i];
  475. if (k > 0)
  476. {
  477. sp = NEW2(k + 1, short);
  478. new_R[i] = sp;
  479. temp_R[i] = sp;
  480. sp[k] = -1;
  481. }
  482. }
  483. FREE(nedges);
  484. for (i = 0; i < n; i++)
  485. {
  486. sp = R[i];
  487. if (sp)
  488. {
  489. while (*sp >= 0)
  490. *temp_R[*sp++]++ = i;
  491. }
  492. }
  493. FREE(temp_R);
  494. return (new_R);
  495. }
  496. void
  497. compute_FOLLOWS()
  498. {
  499. register int i;
  500. digraph(includes);
  501. for (i = 0; i < ngotos; i++)
  502. {
  503. if (includes[i]) FREE(includes[i]);
  504. }
  505. FREE(includes);
  506. }
  507. void
  508. compute_lookaheads()
  509. {
  510. register int i;
  511. register int n;
  512. register unsigned *fp1;
  513. register unsigned *fp2;
  514. register unsigned *fp3;
  515. register shorts *sp;
  516. register unsigned *rowp;
  517. /* register short *rulep; JF unused */
  518. /* register int count; JF unused */
  519. register shorts *sptmp;/* JF */
  520. rowp = LA;
  521. n = lookaheads[nstates];
  522. for (i = 0; i < n; i++)
  523. {
  524. fp3 = rowp + tokensetsize;
  525. for (sp = lookback[i]; sp; sp = sp->next)
  526. {
  527. fp1 = rowp;
  528. fp2 = F + tokensetsize * sp->value;
  529. while (fp1 < fp3)
  530. *fp1++ |= *fp2++;
  531. }
  532. rowp = fp3;
  533. }
  534. for (i = 0; i < n; i++)
  535. {/* JF removed ref to freed storage */
  536. for (sp = lookback[i]; sp; sp = sptmp) {
  537. sptmp=sp->next;
  538. FREE(sp);
  539. }
  540. }
  541. FREE(lookback);
  542. FREE(F);
  543. }
  544. void
  545. digraph(relation)
  546. short **relation;
  547. {
  548. register int i;
  549. infinity = ngotos + 2;
  550. INDEX = NEW2(ngotos + 1, short);
  551. VERTICES = NEW2(ngotos + 1, short);
  552. top = 0;
  553. R = relation;
  554. for (i = 0; i < ngotos; i++)
  555. INDEX[i] = 0;
  556. for (i = 0; i < ngotos; i++)
  557. {
  558. if (INDEX[i] == 0 && R[i])
  559. traverse(i);
  560. }
  561. FREE(INDEX);
  562. FREE(VERTICES);
  563. }
  564. void
  565. traverse(i)
  566. register int i;
  567. {
  568. register unsigned *fp1;
  569. register unsigned *fp2;
  570. register unsigned *fp3;
  571. register int j;
  572. register short *rp;
  573. int height;
  574. unsigned *base;
  575. VERTICES[++top] = i;
  576. INDEX[i] = height = top;
  577. base = F + i * tokensetsize;
  578. fp3 = base + tokensetsize;
  579. rp = R[i];
  580. if (rp)
  581. {
  582. while ((j = *rp++) >= 0)
  583. {
  584. if (INDEX[j] == 0)
  585. traverse(j);
  586. if (INDEX[i] > INDEX[j])
  587. INDEX[i] = INDEX[j];
  588. fp1 = base;
  589. fp2 = F + j * tokensetsize;
  590. while (fp1 < fp3)
  591. *fp1++ |= *fp2++;
  592. }
  593. }
  594. if (INDEX[i] == height)
  595. {
  596. for (;;)
  597. {
  598. j = VERTICES[top--];
  599. INDEX[j] = infinity;
  600. if (i == j)
  601. break;
  602. fp1 = base;
  603. fp2 = F + j * tokensetsize;
  604. while (fp1 < fp3)
  605. *fp2++ = *fp1++;
  606. }
  607. }
  608. }