123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587 |
- // This scanner uses the recursive descent method.
- //
- // The char pointers token_str and scan_str are pointers to the input string as
- // in the following example.
- //
- // | g | a | m | m | a | | a | l | p | h | a |
- // ^ ^
- // token_str scan_str
- //
- // The char pointer token_buf points to a malloc buffer.
- //
- // | g | a | m | m | a | \0 |
- // ^
- // token_buf
- #include "stdafx.h"
- #include "defs.h"
- #define T_INTEGER 1001
- #define T_DOUBLE 1002
- #define T_SYMBOL 1003
- #define T_FUNCTION 1004
- #define T_NEWLINE 1006
- #define T_STRING 1007
- #define T_GTEQ 1008
- #define T_LTEQ 1009
- #define T_EQ 1010
- static int token, newline_flag, meta_mode;
- static char *input_str, *scan_str, *token_str, *token_buf;
- // Returns number of chars scanned and expr on stack.
- // Returns zero when nothing left to scan.
- int
- scan(char *s)
- {
- meta_mode = 0;
- expanding++;
- input_str = s;
- scan_str = s;
- get_next_token();
- if (token == 0) {
- push(symbol(NIL));
- expanding--;
- return 0;
- }
- scan_stmt();
- expanding--;
- return (int) (token_str - input_str);
- }
- int
- scan_meta(char *s)
- {
- meta_mode = 1;
- expanding++;
- input_str = s;
- scan_str = s;
- get_next_token();
- if (token == 0) {
- push(symbol(NIL));
- expanding--;
- return 0;
- }
- scan_stmt();
- expanding--;
- return (int) (token_str - input_str);
- }
- void
- scan_stmt(void)
- {
- scan_relation();
- if (token == '=') {
- get_next_token();
- push_symbol(SETQ);
- swap();
- scan_relation();
- list(3);
- }
- }
- void
- scan_relation(void)
- {
- scan_expression();
- switch (token) {
- case T_EQ:
- push_symbol(TESTEQ);
- swap();
- get_next_token();
- scan_expression();
- list(3);
- break;
- case T_LTEQ:
- push_symbol(TESTLE);
- swap();
- get_next_token();
- scan_expression();
- list(3);
- break;
- case T_GTEQ:
- push_symbol(TESTGE);
- swap();
- get_next_token();
- scan_expression();
- list(3);
- break;
- case '<':
- push_symbol(TESTLT);
- swap();
- get_next_token();
- scan_expression();
- list(3);
- break;
- case '>':
- push_symbol(TESTGT);
- swap();
- get_next_token();
- scan_expression();
- list(3);
- break;
- default:
- break;
- }
- }
- void
- scan_expression(void)
- {
- int h = tos;
- switch (token) {
- case '+':
- get_next_token();
- scan_term();
- break;
- case '-':
- get_next_token();
- scan_term();
- negate();
- break;
- default:
- scan_term();
- break;
- }
- while (newline_flag == 0 && (token == '+' || token == '-')) {
- if (token == '+') {
- get_next_token();
- scan_term();
- } else {
- get_next_token();
- scan_term();
- negate();
- }
- }
- if (tos - h > 1) {
- list(tos - h);
- push_symbol(ADD);
- swap();
- cons();
- }
- }
- int
- is_factor(void)
- {
- switch (token) {
- case '*':
- case '/':
- return 1;
- case '(':
- case T_SYMBOL:
- case T_FUNCTION:
- case T_INTEGER:
- case T_DOUBLE:
- case T_STRING:
- if (newline_flag) { // implicit mul can't cross line
- scan_str = token_str; // better error display
- return 0;
- } else
- return 1;
- default:
- break;
- }
- return 0;
- }
- void
- scan_term(void)
- {
- int h = tos;
- scan_power();
- // discard integer 1
- if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
- pop();
- while (is_factor()) {
- if (token == '*') {
- get_next_token();
- scan_power();
- } else if (token == '/') {
- get_next_token();
- scan_power();
- inverse();
- } else
- scan_power();
- // fold constants
- if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1]))
- multiply();
- // discard integer 1
- if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
- pop();
- }
- if (h == tos)
- push_integer(1);
- else if (tos - h > 1) {
- list(tos - h);
- push_symbol(MULTIPLY);
- swap();
- cons();
- }
- }
- void
- scan_power(void)
- {
- scan_factor();
- if (token == '^') {
- get_next_token();
- push_symbol(POWER);
- swap();
- scan_power();
- list(3);
- }
- }
- void
- scan_factor(void)
- {
- int h;
- h = tos;
- if (token == '(')
- scan_subexpr();
- else if (token == T_SYMBOL)
- scan_symbol();
- else if (token == T_FUNCTION)
- scan_function_call();
- else if (token == T_INTEGER) {
- bignum_scan_integer(token_buf);
- get_next_token();
- } else if (token == T_DOUBLE) {
- bignum_scan_float(token_buf);
- get_next_token();
- } else if (token == T_STRING)
- scan_string();
- else
- error("syntax error");
- // index
- if (token == '[') {
- get_next_token();
- push_symbol(INDEX);
- swap();
- scan_expression();
- while (token == ',') {
- get_next_token();
- scan_expression();
- }
- if (token != ']')
- error("] expected");
- get_next_token();
- list(tos - h);
- }
- while (token == '!') {
- get_next_token();
- push_symbol(FACTORIAL);
- swap();
- list(2);
- }
- }
- void
- scan_symbol(void)
- {
- if (token != T_SYMBOL)
- error("symbol expected");
- if (meta_mode && strlen(token_buf) == 1)
- switch (token_buf[0]) {
- case 'a':
- push(symbol(METAA));
- break;
- case 'b':
- push(symbol(METAB));
- break;
- case 'x':
- push(symbol(METAX));
- break;
- default:
- push(usr_symbol(token_buf));
- break;
- }
- else
- push(usr_symbol(token_buf));
- get_next_token();
- }
- void
- scan_string(void)
- {
- new_string(token_buf);
- get_next_token();
- }
- void
- scan_function_call(void)
- {
- int n = 1;
- U *p;
- p = usr_symbol(token_buf);
- push(p);
- get_next_token(); // function name
- get_next_token(); // left paren
- if (token != ')') {
- scan_stmt();
- n++;
- while (token == ',') {
- get_next_token();
- scan_stmt();
- n++;
- }
- }
- if (token != ')')
- error(") expected");
- get_next_token();
- list(n);
- }
- // scan subexpression
- void
- scan_subexpr(void)
- {
- int n;
- if (token != '(')
- error("( expected");
- get_next_token();
- scan_stmt();
- if (token == ',') {
- n = 1;
- while (token == ',') {
- get_next_token();
- scan_stmt();
- n++;
- }
- build_tensor(n);
- }
- if (token != ')')
- error(") expected");
- get_next_token();
- }
- void
- error(char *errmsg)
- {
- printchar(' ');
- // try not to put question mark on orphan line
- while (input_str != scan_str) {
- if ((*input_str == '\n' || *input_str == '\r') && input_str + 1 == scan_str)
- break;
- printchar(*input_str++);
- }
- printstr(" ? ");
- while (*input_str && (*input_str != '\n' && *input_str != '\r'))
- printchar(*input_str++);
- printchar(' ');
- stop(errmsg);
- }
- // There are n expressions on the stack, possibly tensors.
- //
- // This function assembles the stack expressions into a single tensor.
- //
- // For example, at the top level of the expression ((a,b),(c,d)), the vectors
- // (a,b) and (c,d) would be on the stack.
- void
- build_tensor(int n)
- {
- // int i, j, k, ndim, nelem;
- int i;
- U **s;
- save();
- s = stack + tos - n;
- p2 = alloc_tensor(n);
- p2->u.tensor->ndim = 1;
- p2->u.tensor->dim[0] = n;
- for (i = 0; i < n; i++)
- p2->u.tensor->elem[i] = s[i];
- tos -= n;
- push(p2);
- restore();
- }
- void
- get_next_token()
- {
- newline_flag = 0;
- while (1) {
- get_token();
- if (token != T_NEWLINE)
- break;
- newline_flag = 1;
- }
- }
- void
- get_token(void)
- {
- // skip spaces
- while (isspace(*scan_str)) {
- if (*scan_str == '\n' || *scan_str == '\r') {
- token = T_NEWLINE;
- scan_str++;
- return;
- }
- scan_str++;
- }
- token_str = scan_str;
- // end of string?
- if (*scan_str == 0) {
- token = 0;
- return;
- }
- // number?
- if (isdigit(*scan_str) || *scan_str == '.') {
- while (isdigit(*scan_str))
- scan_str++;
- if (*scan_str == '.') {
- scan_str++;
- while (isdigit(*scan_str))
- scan_str++;
- if (*scan_str == 'e' && (scan_str[1] == '+' || scan_str[1] == '-' || isdigit(scan_str[1]))) {
- scan_str += 2;
- while (isdigit(*scan_str))
- scan_str++;
- }
- token = T_DOUBLE;
- } else
- token = T_INTEGER;
- update_token_buf(token_str, scan_str);
- return;
- }
- // symbol?
- if (isalpha(*scan_str)) {
- while (isalnum(*scan_str))
- scan_str++;
- if (*scan_str == '(')
- token = T_FUNCTION;
- else
- token = T_SYMBOL;
- update_token_buf(token_str, scan_str);
- return;
- }
- // string ?
- if (*scan_str == '"') {
- scan_str++;
- while (*scan_str != '"') {
- if (*scan_str == 0 || *scan_str == '\n' || *scan_str == '\r')
- error("runaway string");
- scan_str++;
- }
- scan_str++;
- token = T_STRING;
- update_token_buf(token_str + 1, scan_str - 1);
- return;
- }
- // comment?
- if (*scan_str == '#' || (*scan_str == '-' && scan_str[1] == '-')) {
- while (*scan_str && *scan_str != '\n' && *scan_str != '\r')
- scan_str++;
- if (*scan_str)
- scan_str++;
- token = T_NEWLINE;
- return;
- }
- // relational operator?
- if (*scan_str == '=' && scan_str[1] == '=') {
- scan_str += 2;
- token = T_EQ;
- return;
- }
- if (*scan_str == '<' && scan_str[1] == '=') {
- scan_str += 2;
- token = T_LTEQ;
- return;
- }
- if (*scan_str == '>' && scan_str[1] == '=') {
- scan_str += 2;
- token = T_GTEQ;
- return;
- }
- // single char token
- token = *scan_str++;
- }
- void
- update_token_buf(char *a, char *b)
- {
- int n;
- if (token_buf)
- free(token_buf);
- n = (int) (b - a);
- token_buf = (char *) malloc(n + 1);
- if (token_buf == 0)
- stop("malloc failure");
- strncpy(token_buf, a, n);
- token_buf[n] = 0;
- }
- // Notes:
- //
- // Formerly add() and multiply() were used to construct expressions but
- // this preevaluation caused problems.
- //
- // For example, suppose A has the floating point value inf.
- //
- // Before, the expression A/A resulted in 1 because the scanner would
- // divide the symbols.
- //
- // After removing add() and multiply(), A/A results in nan which is the
- // correct result.
- //
- // The functions negate() and inverse() are used but they do not cause
- // problems with preevaluation of symbols.
|