scan.cpp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. // This scanner uses the recursive descent method.
  2. //
  3. // The char pointers token_str and scan_str are pointers to the input string as
  4. // in the following example.
  5. //
  6. // | g | a | m | m | a | | a | l | p | h | a |
  7. // ^ ^
  8. // token_str scan_str
  9. //
  10. // The char pointer token_buf points to a malloc buffer.
  11. //
  12. // | g | a | m | m | a | \0 |
  13. // ^
  14. // token_buf
  15. #include "stdafx.h"
  16. #include "defs.h"
  17. #define T_INTEGER 1001
  18. #define T_DOUBLE 1002
  19. #define T_SYMBOL 1003
  20. #define T_FUNCTION 1004
  21. #define T_NEWLINE 1006
  22. #define T_STRING 1007
  23. #define T_GTEQ 1008
  24. #define T_LTEQ 1009
  25. #define T_EQ 1010
  26. static int token, newline_flag, meta_mode;
  27. static char *input_str, *scan_str, *token_str, *token_buf;
  28. // Returns number of chars scanned and expr on stack.
  29. // Returns zero when nothing left to scan.
  30. int
  31. scan(char *s)
  32. {
  33. meta_mode = 0;
  34. expanding++;
  35. input_str = s;
  36. scan_str = s;
  37. get_next_token();
  38. if (token == 0) {
  39. push(symbol(NIL));
  40. expanding--;
  41. return 0;
  42. }
  43. scan_stmt();
  44. expanding--;
  45. return (int) (token_str - input_str);
  46. }
  47. int
  48. scan_meta(char *s)
  49. {
  50. meta_mode = 1;
  51. expanding++;
  52. input_str = s;
  53. scan_str = s;
  54. get_next_token();
  55. if (token == 0) {
  56. push(symbol(NIL));
  57. expanding--;
  58. return 0;
  59. }
  60. scan_stmt();
  61. expanding--;
  62. return (int) (token_str - input_str);
  63. }
  64. void
  65. scan_stmt(void)
  66. {
  67. scan_relation();
  68. if (token == '=') {
  69. get_next_token();
  70. push_symbol(SETQ);
  71. swap();
  72. scan_relation();
  73. list(3);
  74. }
  75. }
  76. void
  77. scan_relation(void)
  78. {
  79. scan_expression();
  80. switch (token) {
  81. case T_EQ:
  82. push_symbol(TESTEQ);
  83. swap();
  84. get_next_token();
  85. scan_expression();
  86. list(3);
  87. break;
  88. case T_LTEQ:
  89. push_symbol(TESTLE);
  90. swap();
  91. get_next_token();
  92. scan_expression();
  93. list(3);
  94. break;
  95. case T_GTEQ:
  96. push_symbol(TESTGE);
  97. swap();
  98. get_next_token();
  99. scan_expression();
  100. list(3);
  101. break;
  102. case '<':
  103. push_symbol(TESTLT);
  104. swap();
  105. get_next_token();
  106. scan_expression();
  107. list(3);
  108. break;
  109. case '>':
  110. push_symbol(TESTGT);
  111. swap();
  112. get_next_token();
  113. scan_expression();
  114. list(3);
  115. break;
  116. default:
  117. break;
  118. }
  119. }
  120. void
  121. scan_expression(void)
  122. {
  123. int h = tos;
  124. switch (token) {
  125. case '+':
  126. get_next_token();
  127. scan_term();
  128. break;
  129. case '-':
  130. get_next_token();
  131. scan_term();
  132. negate();
  133. break;
  134. default:
  135. scan_term();
  136. break;
  137. }
  138. while (newline_flag == 0 && (token == '+' || token == '-')) {
  139. if (token == '+') {
  140. get_next_token();
  141. scan_term();
  142. } else {
  143. get_next_token();
  144. scan_term();
  145. negate();
  146. }
  147. }
  148. if (tos - h > 1) {
  149. list(tos - h);
  150. push_symbol(ADD);
  151. swap();
  152. cons();
  153. }
  154. }
  155. int
  156. is_factor(void)
  157. {
  158. switch (token) {
  159. case '*':
  160. case '/':
  161. return 1;
  162. case '(':
  163. case T_SYMBOL:
  164. case T_FUNCTION:
  165. case T_INTEGER:
  166. case T_DOUBLE:
  167. case T_STRING:
  168. if (newline_flag) { // implicit mul can't cross line
  169. scan_str = token_str; // better error display
  170. return 0;
  171. } else
  172. return 1;
  173. default:
  174. break;
  175. }
  176. return 0;
  177. }
  178. void
  179. scan_term(void)
  180. {
  181. int h = tos;
  182. scan_power();
  183. // discard integer 1
  184. if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
  185. pop();
  186. while (is_factor()) {
  187. if (token == '*') {
  188. get_next_token();
  189. scan_power();
  190. } else if (token == '/') {
  191. get_next_token();
  192. scan_power();
  193. inverse();
  194. } else
  195. scan_power();
  196. // fold constants
  197. if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1]))
  198. multiply();
  199. // discard integer 1
  200. if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
  201. pop();
  202. }
  203. if (h == tos)
  204. push_integer(1);
  205. else if (tos - h > 1) {
  206. list(tos - h);
  207. push_symbol(MULTIPLY);
  208. swap();
  209. cons();
  210. }
  211. }
  212. void
  213. scan_power(void)
  214. {
  215. scan_factor();
  216. if (token == '^') {
  217. get_next_token();
  218. push_symbol(POWER);
  219. swap();
  220. scan_power();
  221. list(3);
  222. }
  223. }
  224. void
  225. scan_factor(void)
  226. {
  227. int h;
  228. h = tos;
  229. if (token == '(')
  230. scan_subexpr();
  231. else if (token == T_SYMBOL)
  232. scan_symbol();
  233. else if (token == T_FUNCTION)
  234. scan_function_call();
  235. else if (token == T_INTEGER) {
  236. bignum_scan_integer(token_buf);
  237. get_next_token();
  238. } else if (token == T_DOUBLE) {
  239. bignum_scan_float(token_buf);
  240. get_next_token();
  241. } else if (token == T_STRING)
  242. scan_string();
  243. else
  244. error("syntax error");
  245. // index
  246. if (token == '[') {
  247. get_next_token();
  248. push_symbol(INDEX);
  249. swap();
  250. scan_expression();
  251. while (token == ',') {
  252. get_next_token();
  253. scan_expression();
  254. }
  255. if (token != ']')
  256. error("] expected");
  257. get_next_token();
  258. list(tos - h);
  259. }
  260. while (token == '!') {
  261. get_next_token();
  262. push_symbol(FACTORIAL);
  263. swap();
  264. list(2);
  265. }
  266. }
  267. void
  268. scan_symbol(void)
  269. {
  270. if (token != T_SYMBOL)
  271. error("symbol expected");
  272. if (meta_mode && strlen(token_buf) == 1)
  273. switch (token_buf[0]) {
  274. case 'a':
  275. push(symbol(METAA));
  276. break;
  277. case 'b':
  278. push(symbol(METAB));
  279. break;
  280. case 'x':
  281. push(symbol(METAX));
  282. break;
  283. default:
  284. push(usr_symbol(token_buf));
  285. break;
  286. }
  287. else
  288. push(usr_symbol(token_buf));
  289. get_next_token();
  290. }
  291. void
  292. scan_string(void)
  293. {
  294. new_string(token_buf);
  295. get_next_token();
  296. }
  297. void
  298. scan_function_call(void)
  299. {
  300. int n = 1;
  301. U *p;
  302. p = usr_symbol(token_buf);
  303. push(p);
  304. get_next_token(); // function name
  305. get_next_token(); // left paren
  306. if (token != ')') {
  307. scan_stmt();
  308. n++;
  309. while (token == ',') {
  310. get_next_token();
  311. scan_stmt();
  312. n++;
  313. }
  314. }
  315. if (token != ')')
  316. error(") expected");
  317. get_next_token();
  318. list(n);
  319. }
  320. // scan subexpression
  321. void
  322. scan_subexpr(void)
  323. {
  324. int n;
  325. if (token != '(')
  326. error("( expected");
  327. get_next_token();
  328. scan_stmt();
  329. if (token == ',') {
  330. n = 1;
  331. while (token == ',') {
  332. get_next_token();
  333. scan_stmt();
  334. n++;
  335. }
  336. build_tensor(n);
  337. }
  338. if (token != ')')
  339. error(") expected");
  340. get_next_token();
  341. }
  342. void
  343. error(char *errmsg)
  344. {
  345. printchar(' ');
  346. // try not to put question mark on orphan line
  347. while (input_str != scan_str) {
  348. if ((*input_str == '\n' || *input_str == '\r') && input_str + 1 == scan_str)
  349. break;
  350. printchar(*input_str++);
  351. }
  352. printstr(" ? ");
  353. while (*input_str && (*input_str != '\n' && *input_str != '\r'))
  354. printchar(*input_str++);
  355. printchar(' ');
  356. stop(errmsg);
  357. }
  358. // There are n expressions on the stack, possibly tensors.
  359. //
  360. // This function assembles the stack expressions into a single tensor.
  361. //
  362. // For example, at the top level of the expression ((a,b),(c,d)), the vectors
  363. // (a,b) and (c,d) would be on the stack.
  364. void
  365. build_tensor(int n)
  366. {
  367. // int i, j, k, ndim, nelem;
  368. int i;
  369. U **s;
  370. save();
  371. s = stack + tos - n;
  372. p2 = alloc_tensor(n);
  373. p2->u.tensor->ndim = 1;
  374. p2->u.tensor->dim[0] = n;
  375. for (i = 0; i < n; i++)
  376. p2->u.tensor->elem[i] = s[i];
  377. tos -= n;
  378. push(p2);
  379. restore();
  380. }
  381. void
  382. get_next_token()
  383. {
  384. newline_flag = 0;
  385. while (1) {
  386. get_token();
  387. if (token != T_NEWLINE)
  388. break;
  389. newline_flag = 1;
  390. }
  391. }
  392. void
  393. get_token(void)
  394. {
  395. // skip spaces
  396. while (isspace(*scan_str)) {
  397. if (*scan_str == '\n' || *scan_str == '\r') {
  398. token = T_NEWLINE;
  399. scan_str++;
  400. return;
  401. }
  402. scan_str++;
  403. }
  404. token_str = scan_str;
  405. // end of string?
  406. if (*scan_str == 0) {
  407. token = 0;
  408. return;
  409. }
  410. // number?
  411. if (isdigit(*scan_str) || *scan_str == '.') {
  412. while (isdigit(*scan_str))
  413. scan_str++;
  414. if (*scan_str == '.') {
  415. scan_str++;
  416. while (isdigit(*scan_str))
  417. scan_str++;
  418. if (*scan_str == 'e' && (scan_str[1] == '+' || scan_str[1] == '-' || isdigit(scan_str[1]))) {
  419. scan_str += 2;
  420. while (isdigit(*scan_str))
  421. scan_str++;
  422. }
  423. token = T_DOUBLE;
  424. } else
  425. token = T_INTEGER;
  426. update_token_buf(token_str, scan_str);
  427. return;
  428. }
  429. // symbol?
  430. if (isalpha(*scan_str)) {
  431. while (isalnum(*scan_str))
  432. scan_str++;
  433. if (*scan_str == '(')
  434. token = T_FUNCTION;
  435. else
  436. token = T_SYMBOL;
  437. update_token_buf(token_str, scan_str);
  438. return;
  439. }
  440. // string ?
  441. if (*scan_str == '"') {
  442. scan_str++;
  443. while (*scan_str != '"') {
  444. if (*scan_str == 0 || *scan_str == '\n' || *scan_str == '\r')
  445. error("runaway string");
  446. scan_str++;
  447. }
  448. scan_str++;
  449. token = T_STRING;
  450. update_token_buf(token_str + 1, scan_str - 1);
  451. return;
  452. }
  453. // comment?
  454. if (*scan_str == '#' || (*scan_str == '-' && scan_str[1] == '-')) {
  455. while (*scan_str && *scan_str != '\n' && *scan_str != '\r')
  456. scan_str++;
  457. if (*scan_str)
  458. scan_str++;
  459. token = T_NEWLINE;
  460. return;
  461. }
  462. // relational operator?
  463. if (*scan_str == '=' && scan_str[1] == '=') {
  464. scan_str += 2;
  465. token = T_EQ;
  466. return;
  467. }
  468. if (*scan_str == '<' && scan_str[1] == '=') {
  469. scan_str += 2;
  470. token = T_LTEQ;
  471. return;
  472. }
  473. if (*scan_str == '>' && scan_str[1] == '=') {
  474. scan_str += 2;
  475. token = T_GTEQ;
  476. return;
  477. }
  478. // single char token
  479. token = *scan_str++;
  480. }
  481. void
  482. update_token_buf(char *a, char *b)
  483. {
  484. int n;
  485. if (token_buf)
  486. free(token_buf);
  487. n = (int) (b - a);
  488. token_buf = (char *) malloc(n + 1);
  489. if (token_buf == 0)
  490. stop("malloc failure");
  491. strncpy(token_buf, a, n);
  492. token_buf[n] = 0;
  493. }
  494. // Notes:
  495. //
  496. // Formerly add() and multiply() were used to construct expressions but
  497. // this preevaluation caused problems.
  498. //
  499. // For example, suppose A has the floating point value inf.
  500. //
  501. // Before, the expression A/A resulted in 1 because the scanner would
  502. // divide the symbols.
  503. //
  504. // After removing add() and multiply(), A/A results in nan which is the
  505. // correct result.
  506. //
  507. // The functions negate() and inverse() are used but they do not cause
  508. // problems with preevaluation of symbols.