script.sed 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. # Purpose: To accept texts like these ($ stands for end of string):
  2. # package a.b.c;$
  3. # package p1.p2.p3;$
  4. # And reject texts which don't follow this form,
  5. # with a sed (Stream EDitor) implementation of the concepts exposed in LR Parsing, from A.V. Aho and S.C. Johnson.
  6. # Texts which span across lines, and/or contain whitespaces between unbreakable elements as well as at string edges, are also accepted:
  7. # package
  8. # a
  9. # .
  10. # b . c
  11. # ; $
  12. # TODO Add comments on how to read this program. Let it be sort of mapping from this program itself to the book LR Parsing.
  13. # 0: _package a.b.c;$
  14. # 1: PACKAGE_a.b.c;$
  15. # 2: PACKAGE ID_.b.c;$
  16. # 3: PACKAGE SEQ_.b.c;$
  17. # 4: PACKAGE SEQ '.' _b.c;$
  18. # 5: PACKAGE SEQ '.' ID_.c;$
  19. # 3: PACKAGE SEQ_.c;$
  20. # 4: PACKAGE SEQ '.' _c;$
  21. # 5: PACKAGE SEQ '.' ID_;$
  22. # 3: PACKAGE SEQ_;$
  23. # 6: PKGSTMT_;$
  24. # 7: PKGSTMT';'_$ => accept (currently this just means the program halt with its last significant operation being pushing one state atop ';' -the 'accept' just means the abscense of error until the program execution can't go ahead).
  25. # Keeping the code correct as more and more table entries are added (both parse action and goto table) usually means to take care of the following:
  26. # - The states that should be checked for acceptance can change due to new states taking the place of acceptance state. This would be modified in the code that executes only at last input line (the one surrounded with ${ and }).
  27. # - Coming to a missmatch after having looked up over all table entries is something indeed not expected. It would mean a serious problem in the code. Still, jump into an error routine (:error label) is commanded (or stated, as in statement) to prevent such a bad code from running away (in other words, it's like a failfast on unexpected conditions). The last table entry lookup is followed by this jump. All others jumps following table entry checks should point to the next one. Thus, adding a new state entiles chaining the former last entry with the new one and that one with the error routine. This is true for goto table.
  28. # - Adding a new state also entiles to re-chain the former with the new last entry and this one with the parse_action_else routine in the parse action table totally likewise to what's done for the error routine and the goto table.
  29. x
  30. :next_cycle
  31. x
  32. s/^\s\+//
  33. t L1
  34. :L1
  35. s/^$//
  36. T stick_with_input_line
  37. # Accept on last input line
  38. ${
  39. x
  40. s/^7/\0/ # check stack top for acceptance states
  41. T error
  42. }
  43. b
  44. :stick_with_input_line
  45. # Parse action table
  46. # State retrieval
  47. x
  48. :0
  49. s/^0/\0/
  50. T 1
  51. x
  52. :0_PACKAGE
  53. s/^package\b//
  54. T error
  55. # shift PACKAGE
  56. x
  57. s/^/PACKAGE\n/
  58. t goto_table
  59. :1
  60. s/^1/\0/
  61. T 2
  62. x
  63. :1_ID
  64. s/^[a-zA-Z_][a-zA-Z0-9_]*//
  65. T error
  66. # shift ID
  67. x
  68. s/^/ID\n/
  69. t goto_table
  70. :2
  71. s/^2/\0/
  72. T 3
  73. x
  74. :2_'.'
  75. s/^\./\0/
  76. T error
  77. # reduce by rule SEQ -> ID
  78. x
  79. s/^2\nID/SEQ/
  80. t goto_table
  81. :3
  82. s/^3/\0/
  83. T 4
  84. x
  85. :3_'.'
  86. s/^\.//
  87. T 3_SEMICOLON
  88. # shift '.'
  89. x
  90. s/^/'.'\n/
  91. t goto_table
  92. :3_SEMICOLON
  93. s/^;/\0/
  94. T error
  95. # reduce by rule PKGSTMT -> PACKAGE SEQ
  96. x
  97. s/^3\nSEQ\n1\nPACKAGE/PKGSTMT/
  98. t goto_table
  99. :4
  100. s/^4/\0/
  101. T 5
  102. x
  103. :4_ID
  104. s/^[a-zA-Z_][a-zA-Z0-9_]*//
  105. T error
  106. # shift ID
  107. x
  108. s/^/ID\n/
  109. t goto_table
  110. :5
  111. s/^5/\0/
  112. T 6
  113. x
  114. :5_'.'
  115. :5_SEMICOLON
  116. s/^[.;]/\0/
  117. T error
  118. # reduce by rule SEQ -> SEQ '.' ID
  119. x
  120. s/^5\nID\n4\n'\.'\n3\nSEQ/SEQ/
  121. t goto_table
  122. :6
  123. s/^6/\0/
  124. T parse_action_else # It should solely follow the last entry in the table lookup (checked for a match).
  125. x
  126. :6_SEMICOLON
  127. s/^;//
  128. T error
  129. # shift ';'
  130. x
  131. s/^/';'\n/
  132. t goto_table
  133. :parse_action_else
  134. # Before jumping to init_stack a check is done that the parser is not in an acceptance state in which more input is remaining but not allowed.
  135. # Having arrived the flow to here implicitly always means more input is remaining.
  136. # A clean and cheap way is to impose for init_stack to be jumped into, that the stack must be totally empty. This check is cheaper (more inexpensive) than checking arround acceptance states.
  137. s/^$//
  138. t init_stack
  139. b error
  140. :goto_table
  141. # It's trivial yet worthnoting, upon walking across goto table's 2nd. dimmension (where stack top is being transited from a symbol to a new state), utmost correctness rules would require to jump to the next entry from each one against which a missmatch has turned out. In practice, belonging these 2 types of elements (symbols and states) each on its separate domain, and especially being they of different form distinguishable one another, guarrantee is that at most one entry will hit while the others not.
  142. # Rightmost state
  143. :0g
  144. s/\`.*\n0/\0/m
  145. T 1g
  146. :0g_PACKAGE
  147. s/^PACKAGE/1\n\0/
  148. t next_cycle
  149. :0g_PKGSTMT
  150. s/^PKGSTMT/6\n\0/
  151. t next_cycle
  152. :1g
  153. s/\`.*\n1/\0/m
  154. T 3g
  155. :1g_ID
  156. s/^ID/2\n\0/
  157. t next_cycle
  158. :1g_SEQ
  159. s/^SEQ/3\n\0/
  160. t next_cycle
  161. :3g
  162. s/\`.*\n3/\0/m
  163. T 4g
  164. :3g_'.'
  165. s/^'\.'/4\n\0/
  166. t next_cycle
  167. :4g
  168. s/\`.*\n4/\0/m
  169. T 6g
  170. :4g_ID
  171. s/^ID/5\n\0/
  172. t next_cycle
  173. :6g
  174. s/\`.*\n6/\0/m
  175. T error # This is a goto table exit condidion. It must go only where every else table entry cannot be provided.
  176. :6g_SEMICOLON
  177. s/^';'/7\n\0/
  178. t next_cycle
  179. :init_stack
  180. s/^$/0/
  181. t next_cycle
  182. a\
  183. Oh oh!
  184. :error
  185. p
  186. a\
  187. Error
  188. x
  189. q 1 # GNU's extension