parser-combinators.sml 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. (* 'a is a parameter specifying what kind of result tree will be returned *)
  2. type pstring = char list;
  3. type 'a parse_data = ('a * pstring);
  4. type 'a parse_result = 'a parse_data list;
  5. datatype 'a result = ParseFailure
  6. | ParseSuccess of 'a parse_result;
  7. type 'a parser = pstring -> 'a result;
  8. (*
  9. result :: a -> Parser a
  10. result v = \inp -> [(v, inp)]
  11. *)
  12. fun result value input = ParseSuccess [(value, input)];
  13. fun zero input = ParseFailure;
  14. fun item [] = ParseFailure
  15. | item (c::cs) =
  16. let
  17. val value = c;
  18. val unconsumed_input = cs;
  19. in
  20. ParseSuccess [(value, unconsumed_input)]
  21. end;
  22. (* The paper makes use of a concat function, which flattens nested lists of
  23. depth 2 to a flat list. *)
  24. fun concat nil = nil
  25. | concat (nil::rest_outer) = concat rest_outer
  26. | concat ((first::rest_inner)::rest_outer) =
  27. first :: concat (rest_inner :: rest_outer);
  28. (* The paper also makes use of a list comprehension, which does not exist in
  29. SML. To emulate it we write a compose function. *)
  30. fun compose inner outer =
  31. fn any => outer (inner any);
  32. (* The parser bind "integrates {sequencing of parsers} with {processing of their
  33. result values}. bind has the following signature:
  34. Take
  35. - a Parser of type 'a,
  36. - a function which maps from type 'a to a Parser of type 'b
  37. Return
  38. - a Parser of type 'b
  39. The Parser of type 'b will be constructed from Parser of type 'a and the mapping
  40. function. *)
  41. fun bind p f (input: pstring) =
  42. ParseSuccess
  43. (concat
  44. (map
  45. (fn (value, unconsumed_input) => f value unconsumed_input)
  46. (p input)));
  47. fun sequence p q =
  48. bind p (fn x =>
  49. (bind q (fn y =>
  50. result x y)));
  51. fun bind (p: 'a parser) (f: 'a -> 'b parser) =
  52. (* bind must return a parser itself, so that we can combine parsers into new
  53. parsers. A parser always takes some input string, here called `input`. *)
  54. (fn input: pstring =>
  55. (ParseSuccess
  56. (concat
  57. (map
  58. (fn ((value: 'a, unconsumed_input): 'a parse_data) =>
  59. ((f value): 'b parser) unconsumed_input)
  60. (p input)))): 'b result): 'b parser;
  61. fun bind (p: 'a parser) (f: 'a -> 'b parser) =
  62. (* bind must return a parser itself, so that we can combine parsers into new
  63. parsers. A parser always takes some input string, here called `input`. *)
  64. (fn (input: pstring) =>
  65. let
  66. val result_of_parser_a = p input;
  67. in
  68. result_of_parser_a
  69. end): 'b parser;
  70. (* =========================================================
  71. Approach from https://invidio.xamh.de/watch?v=RDalzi7mhdY
  72. ========================================================= *)
  73. (* (matched char, remaining chars, failure message) *)
  74. type result2 = (pstring * char list * string);
  75. fun pchar2 (char_to_match: char) (input: pstring) =
  76. if List.null input
  77. then
  78. ([], input, "no characters left to match"): result2
  79. else
  80. let
  81. val (first_char::rest_chars) = input;
  82. in
  83. if first_char = char_to_match
  84. then
  85. ([first_char], rest_chars, ""): result2
  86. else
  87. ([], input, "first character does not match"): result2
  88. end;
  89. (* 3 - introduce Success and Failure datatype *)
  90. type result3 = (char * char list);
  91. datatype 'a result = Success of result3
  92. | Failure of string;
  93. fun pchar3 (char_to_match: char) (input: pstring) =
  94. if List.null input
  95. then
  96. Failure "no characters left to match"
  97. else
  98. let
  99. val (first_char::rest_chars) = input;
  100. in
  101. if first_char = char_to_match
  102. then
  103. Success (first_char, rest_chars)
  104. else
  105. Failure "first character does not match"
  106. end;
  107. (* 4 - wrap function as a type "parser" *)
  108. type 'a parser_outcome_value = ('a list * pstring);
  109. datatype 'a ParseOutcome = Success of 'a parser_outcome_value
  110. | Failure of string;
  111. datatype 'a Parser = Parser of (pstring -> 'a ParseOutcome);
  112. fun pchar (char_to_match: char) =
  113. let
  114. fun parsing_func input =
  115. if List.null input
  116. then
  117. Failure "no characters left to match"
  118. else
  119. let
  120. val (first_char::rest_chars) = input;
  121. in
  122. if first_char = char_to_match
  123. then
  124. Success ([first_char], rest_chars)
  125. else
  126. Failure "first character does not match"
  127. end;
  128. in
  129. Parser parsing_func
  130. end;
  131. fun run parser input =
  132. let
  133. val (Parser parsing_func) = parser;
  134. in
  135. parsing_func input
  136. end;
  137. fun pcompose parser1 parser2 =
  138. let
  139. fun parsing_func input =
  140. let
  141. val result1 = run parser1 input;
  142. in
  143. case result1 of
  144. Failure err => result1
  145. | Success (value1, remaining1) =>
  146. let
  147. val result2 = run parser2 remaining1;
  148. in
  149. case result2 of
  150. Failure err => result2
  151. | Success (value2, remaining2) =>
  152. Success (value1 @ value2, remaining2)
  153. end
  154. end
  155. in
  156. Parser parsing_func
  157. end;