mes_tokenize.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. /* -*-comment-start: "//";comment-end:""-*-
  2. * GNU Mes --- Maxwell Equations of Software
  3. * Copyright © 2016,2017,2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
  4. * Copyright © 2019 Jeremiah Orians
  5. *
  6. * This file is part of GNU Mes.
  7. *
  8. * GNU Mes is free software; you can redistribute it and/or modify it
  9. * under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 3 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * GNU Mes is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with GNU Mes. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include "mes.h"
  22. struct cell* token_stack;
  23. char* copy_string(char* target, char* source);
  24. struct cell* findsym(char *name);
  25. struct cell* make_char(int a);
  26. struct cell* make_string(char* a);
  27. struct cell* make_sym(char* name);
  28. void reset_block(char* a);
  29. /****************************************************************
  30. * "Convert a string into a list of tokens." *
  31. ****************************************************************/
  32. struct cell* tokenize(struct cell* head, char* fullstring, int size)
  33. {
  34. int string_index = 0;
  35. int done = FALSE;
  36. if((0 >= size) || (0 == fullstring[0]))
  37. {
  38. return head;
  39. }
  40. reset_block(memory_block);
  41. int escape;
  42. int c;
  43. do
  44. {
  45. c = fullstring[string_index];
  46. if(string_index > size)
  47. {
  48. done = TRUE;
  49. }
  50. else if('\"' == c)
  51. {
  52. escape = FALSE;
  53. do
  54. {
  55. if(!escape && '\\' == c ) escape = TRUE;
  56. else escape = FALSE;
  57. memory_block[string_index] = c;
  58. string_index = string_index + 1;
  59. c = fullstring[string_index];
  60. } while(escape || ('\"' != fullstring[string_index]));
  61. string_index = string_index + 1;
  62. done = TRUE;
  63. }
  64. else
  65. {
  66. if((' ' == c) || ('\t' == c) || ('\n' == c) | ('\r' == c))
  67. {
  68. string_index = string_index + 1;
  69. done = TRUE;
  70. }
  71. else
  72. {
  73. memory_block[string_index] = c;
  74. string_index = string_index + 1;
  75. }
  76. }
  77. } while(!done);
  78. if(string_index > 1)
  79. {
  80. char* store = calloc(string_index + 1, sizeof(char));
  81. copy_string(store, memory_block);
  82. struct cell* temp = make_sym(store);
  83. temp->cdr = head;
  84. head = temp;
  85. }
  86. head = tokenize(head, (fullstring+string_index), (size - string_index));
  87. return head;
  88. }
  89. int is_integer(char* a)
  90. {
  91. if(('0' <= a[0]) && ('9' >= a[0]))
  92. {
  93. return TRUE;
  94. }
  95. if('-' == a[0])
  96. {
  97. if(('0' <= a[1]) && ('9' >= a[1]))
  98. {
  99. return TRUE;
  100. }
  101. }
  102. return FALSE;
  103. }
  104. char special_lookup(char* s)
  105. {
  106. if (match(s, "\\nul")) return '\0';
  107. else if (match(s, "\\alarm")) return '\a';
  108. else if (match(s, "\\backspace")) return '\b';
  109. else if (match(s, "\\tab")) return '\t';
  110. else if (match(s, "\\newline")) return '\n';
  111. else if (match(s, "\\vtab")) return '\v';
  112. else if (match(s, "\\page")) return '\f';
  113. else if (match(s, "\\return")) return '\r';
  114. else if (match(s, "\\space")) return ' ';
  115. return s[1];
  116. }
  117. struct cell* readlist();
  118. struct cell* readobj();
  119. struct cell* list_to_vector(struct cell* args);
  120. struct cell* reader_read_hash(struct cell* a)
  121. {
  122. if('\\' == a->string[1])
  123. {
  124. return make_char(special_lookup(a->string + 1));
  125. }
  126. if('(' == a->string[1])
  127. {
  128. return list_to_vector(readlist());
  129. }
  130. if('x' == a->string[1])
  131. {
  132. a->string[0] = '0';
  133. a->type = INT;
  134. a->value = numerate_string(a->string);
  135. return a;
  136. }
  137. if('o' == a->string[1])
  138. {
  139. a->string = a->string + 1;
  140. a->string[0] = '0';
  141. a->type = INT;
  142. a->value = numerate_string(a->string);
  143. return a;
  144. }
  145. if(match("#t", a->string)) return cell_t;
  146. if(match("#f", a->string)) return cell_f;
  147. file_print("Unknown hash provided: ", stderr);
  148. file_print(a->string, stderr);
  149. exit(EXIT_FAILURE);
  150. }
  151. /********************************************************************
  152. * Numbers become numbers *
  153. * Strings become strings *
  154. * Functions become functions *
  155. * quoted things become quoted *
  156. * Everything is treated like a symbol *
  157. ********************************************************************/
  158. struct cell* atom(struct cell* a)
  159. {
  160. /* Check for quote */
  161. if(match("'", a->string))
  162. {
  163. return make_cons(quote, make_cons(readobj(), nil));
  164. }
  165. /* Check for quasiquote */
  166. if(match("`", a->string))
  167. {
  168. return make_cons(quasiquote, make_cons(readobj(), nil));
  169. }
  170. /* Check for unquote */
  171. if(match(",", a->string))
  172. {
  173. return make_cons(unquote, make_cons(readobj(), nil));
  174. }
  175. /* Check for unquote-splicing */
  176. if(match(",@", a->string))
  177. {
  178. return make_cons(unquote_splicing, make_cons(readobj(), nil));
  179. }
  180. /* Check for strings */
  181. if('\"' == a->string[0])
  182. {
  183. return make_string(a->string + 1);
  184. }
  185. /* Check for specials*/
  186. if('#' == a->string[0])
  187. {
  188. return reader_read_hash(a);
  189. }
  190. /* Check for integer */
  191. if(is_integer(a->string))
  192. {
  193. a->type = INT;
  194. a->value = numerate_string(a->string);
  195. return a;
  196. }
  197. /* Check for functions */
  198. struct cell* op = findsym(a->string);
  199. if(nil != op)
  200. {
  201. return op->car;
  202. }
  203. /* Assume new symbol */
  204. all_symbols = make_cons(a, all_symbols);
  205. return a;
  206. }
  207. /****************************************************************
  208. * "Read an expression from a sequence of tokens." *
  209. ****************************************************************/
  210. struct cell* readobj()
  211. {
  212. struct cell* head = token_stack;
  213. token_stack = head->cdr;
  214. head->cdr = NULL;
  215. if (match("(", head->string))
  216. {
  217. return readlist();
  218. }
  219. return atom(head);
  220. }
  221. struct cell* readlist()
  222. {
  223. struct cell* head = token_stack;
  224. if (match(")", head->string))
  225. {
  226. token_stack = head->cdr;
  227. return nil;
  228. }
  229. struct cell* tmp = readobj();
  230. return make_cons(tmp,readlist());
  231. }
  232. /****************************************************
  233. * Put list of tokens in correct order *
  234. ****************************************************/
  235. struct cell* reverse_list(struct cell* head)
  236. {
  237. struct cell* root = NULL;
  238. while(NULL != head)
  239. {
  240. struct cell* next = head->cdr;
  241. head->cdr = root;
  242. root = head;
  243. head = next;
  244. }
  245. return root;
  246. }
  247. /****************************************************
  248. * "Read a S-expression from a string." *
  249. ****************************************************/
  250. struct cell* parse(char* program, int size)
  251. {
  252. token_stack = tokenize(NULL, program, size);
  253. if(NULL == token_stack)
  254. {
  255. return nil;
  256. }
  257. token_stack = reverse_list(token_stack);
  258. return readobj();
  259. }