mes_tokenize.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /* -*-comment-start: "//";comment-end:""-*-
  2. * GNU Mes --- Maxwell Equations of Software
  3. * Copyright © 2016,2017,2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
  4. * Copyright © 2019 Jeremiah Orians
  5. *
  6. * This file is part of GNU Mes.
  7. *
  8. * GNU Mes is free software; you can redistribute it and/or modify it
  9. * under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 3 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * GNU Mes is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with GNU Mes. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include "mes.h"
  22. struct cell* token_stack;
  23. /* Imported functions */
  24. char* copy_string(char* target, char* source,int length);
  25. int escape_lookup(char* c);
  26. int in_set(int c, char* s);
  27. int string_size(char* a);
  28. struct cell* findsym(char *name);
  29. struct cell* make_char(int a);
  30. struct cell* make_keyword(char* name);
  31. struct cell* make_string(char* a, int length);
  32. struct cell* make_sym(char* name);
  33. void reset_block(char* a);
  34. /****************************************************************
  35. * "Convert a string into a list of tokens." *
  36. ****************************************************************/
  37. struct cell* tokenize(struct cell* head, char* fullstring, unsigned size)
  38. {
  39. unsigned string_index = 0;
  40. unsigned out_index = 0;
  41. int done = FALSE;
  42. if(0 == fullstring[0])
  43. {
  44. return head;
  45. }
  46. reset_block(memory_block);
  47. int c;
  48. do
  49. {
  50. c = fullstring[string_index];
  51. if(string_index > size)
  52. {
  53. done = TRUE;
  54. }
  55. else if('\\' == c)
  56. {
  57. memory_block[out_index] = c;
  58. string_index = string_index + 1;
  59. out_index = out_index + 1;
  60. c = fullstring[string_index];
  61. memory_block[out_index] = c;
  62. string_index = string_index + 1;
  63. out_index = out_index + 1;
  64. c = fullstring[string_index];
  65. }
  66. else if('\"' == c)
  67. {
  68. do
  69. {
  70. if(c == '\\')
  71. {
  72. c = escape_lookup(fullstring + string_index);
  73. if(fullstring[string_index + 1] == 'x') string_index = string_index + 2;
  74. string_index = string_index + 1;
  75. }
  76. memory_block[out_index] = c;
  77. string_index = string_index + 1;
  78. out_index = out_index + 1;
  79. c = fullstring[string_index];
  80. require(string_index < MAX_TOKEN, "String Token exceeds size limit for token\nExpand MES_MAX_TOKEN value to resolve\n");
  81. } while(('\"' != fullstring[string_index]));
  82. string_index = string_index + 1;
  83. out_index = out_index + 1;
  84. done = TRUE;
  85. }
  86. else
  87. {
  88. if(in_set(c, " \t\n\r\f"))
  89. {
  90. string_index = string_index + 1;
  91. out_index = out_index + 1;
  92. done = TRUE;
  93. }
  94. else
  95. {
  96. memory_block[out_index] = c;
  97. string_index = string_index + 1;
  98. out_index = out_index + 1;
  99. }
  100. }
  101. require(out_index < MAX_TOKEN, "Token exceeds size limit for token\nExpand MES_MAX_TOKEN value to resolve\n");
  102. } while(!done);
  103. if(out_index > 1)
  104. {
  105. char* store = calloc(string_index + 1, sizeof(char));
  106. copy_string(store, memory_block, out_index);
  107. struct cell* temp = make_sym(store);
  108. temp->cdr = head;
  109. head = temp;
  110. }
  111. head = tokenize(head, (fullstring+string_index), (size - string_index));
  112. return head;
  113. }
  114. int is_integer(char* a)
  115. {
  116. int i = numerate_string(a);
  117. if(0 != i) return TRUE;
  118. if(match("0", a)) return TRUE;
  119. if(match("-0", a)) return TRUE;
  120. return FALSE;
  121. }
  122. char special_lookup(char* s)
  123. {
  124. if (match(s, "\\nul")) return '\0';
  125. else if (match(s, "\\alarm")) return '\a';
  126. else if (match(s, "\\backspace")) return '\b';
  127. else if (match(s, "\\tab")) return '\t';
  128. else if (match(s, "\\newline")) return '\n';
  129. else if (match(s, "\\vtab")) return '\v';
  130. else if (match(s, "\\page")) return '\f';
  131. else if (match(s, "\\return")) return '\r';
  132. else if (match(s, "\\space")) return ' ';
  133. return s[1];
  134. }
  135. struct cell* readlist();
  136. struct cell* readobj();
  137. struct cell* list_to_vector(struct cell* args);
  138. struct cell* reader_read_hash(struct cell* a)
  139. {
  140. /* Support #\char*/
  141. if('\\' == a->string[1])
  142. {
  143. return make_char(special_lookup(a->string + 1));
  144. }
  145. /* Support #(1 2 3) vectors */
  146. if('(' == a->string[1])
  147. {
  148. return list_to_vector(readlist());
  149. }
  150. /* Support #x0123456789ABCDEF hex*/
  151. if('x' == a->string[1])
  152. {
  153. a->string[0] = '0';
  154. a->type = INT;
  155. a->value = numerate_string(a->string);
  156. return a;
  157. }
  158. /* Support #o01234567 Octals */
  159. if('o' == a->string[1])
  160. {
  161. a->string = a->string + 1;
  162. a->string[0] = '0';
  163. a->type = INT;
  164. a->value = numerate_string(a->string);
  165. return a;
  166. }
  167. /* Support standard true and false */
  168. if(match("#t", a->string)) return cell_t;
  169. if(match("#f", a->string)) return cell_f;
  170. /* Support #:keywords */
  171. if(':' == a->string[1])
  172. {
  173. return make_keyword(a->string);
  174. }
  175. file_print("Unknown hash provided: ", stderr);
  176. file_print(a->string, stderr);
  177. exit(EXIT_FAILURE);
  178. }
  179. /********************************************************************
  180. * Numbers become numbers *
  181. * Strings become strings *
  182. * Functions become functions *
  183. * quoted things become quoted *
  184. * Everything is treated like a symbol *
  185. ********************************************************************/
  186. struct cell* atom(struct cell* a)
  187. {
  188. /* Check for quote */
  189. if(match("'", a->string))
  190. {
  191. return make_cons(quote, make_cons(readobj(), nil));
  192. }
  193. /* Check for quasiquote */
  194. if(match("`", a->string))
  195. {
  196. return make_cons(quasiquote, make_cons(readobj(), nil));
  197. }
  198. /* Check for unquote */
  199. if(match(",", a->string))
  200. {
  201. return make_cons(unquote, make_cons(readobj(), nil));
  202. }
  203. /* Check for unquote-splicing */
  204. if(match(",@", a->string))
  205. {
  206. return make_cons(unquote_splicing, make_cons(readobj(), nil));
  207. }
  208. /* Check for strings */
  209. if('\"' == a->string[0])
  210. {
  211. return make_string(a->string + 1, string_size(a->string + 1));
  212. }
  213. /* Check for specials*/
  214. if('#' == a->string[0])
  215. {
  216. return reader_read_hash(a);
  217. }
  218. /* Check for integer */
  219. if(is_integer(a->string))
  220. {
  221. a->type = INT;
  222. a->value = numerate_string(a->string);
  223. return a;
  224. }
  225. /* Check for functions */
  226. struct cell* op = findsym(a->string);
  227. if(nil != op)
  228. {
  229. return op->car;
  230. }
  231. /* Assume new symbol */
  232. all_symbols = make_cons(a, all_symbols);
  233. return a;
  234. }
  235. /****************************************************************
  236. * "Read an expression from a sequence of tokens." *
  237. ****************************************************************/
  238. struct cell* readobj()
  239. {
  240. struct cell* head = token_stack;
  241. require(NULL != head, "missing object in readobj token_stack\n");
  242. token_stack = head->cdr;
  243. head->cdr = NULL;
  244. if (match("(", head->string))
  245. {
  246. return readlist();
  247. }
  248. return atom(head);
  249. }
  250. struct cell* readlist()
  251. {
  252. struct cell* head = token_stack;
  253. require(NULL != head, "missing object in readlist token_stack\n");
  254. if (match(")", head->string))
  255. {
  256. token_stack = head->cdr;
  257. return nil;
  258. }
  259. struct cell* tmp = readobj();
  260. return make_cons(tmp,readlist());
  261. }
  262. /****************************************************
  263. * Put list of tokens in correct order *
  264. ****************************************************/
  265. struct cell* reverse_list(struct cell* head)
  266. {
  267. struct cell* root = NULL;
  268. struct cell* next;
  269. while(NULL != head)
  270. {
  271. next = head->cdr;
  272. head->cdr = root;
  273. root = head;
  274. head = next;
  275. }
  276. return root;
  277. }
  278. /****************************************************
  279. * "Read a S-expression from a string." *
  280. ****************************************************/
  281. struct cell* parse(char* program, int size)
  282. {
  283. token_stack = tokenize(NULL, program, size);
  284. if(NULL == token_stack)
  285. {
  286. return nil;
  287. }
  288. token_stack = reverse_list(token_stack);
  289. return readobj();
  290. }