Lexer.cs 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. // $Id$
  2. using System;
  3. using System.Text;
  4. using System.IO;
  5. namespace Lisp {
  6. /// <summary>
  7. /// Lisp lexer
  8. /// </summary>
  9. /// <seealso cref="Parser"/>
  10. public sealed class Lexer {
  11. private TextReader stream;
  12. private char[] buffer;
  13. private char c;
  14. int bufpos;
  15. int buflen;
  16. public class EOFException : LispException {
  17. };
  18. public enum Token {
  19. EOF,
  20. OPEN_PAREN,
  21. CLOSE_PAREN,
  22. SYMBOL,
  23. STRING,
  24. INTEGER,
  25. REAL,
  26. TRUE,
  27. FALSE
  28. };
  29. private StringBuilder TokenStringBuilder;
  30. public string TokenString {
  31. get { return TokenStringBuilder.ToString(); }
  32. }
  33. public int LineNumber;
  34. public Lexer(TextReader stream) {
  35. this.stream = stream;
  36. buffer = new char[1025];
  37. NextChar();
  38. }
  39. public Token GetNextToken() {
  40. try {
  41. while(Char.IsWhiteSpace(c)) {
  42. if(c == '\n')
  43. LineNumber++;
  44. NextChar();
  45. }
  46. TokenStringBuilder = new StringBuilder();
  47. switch(c) {
  48. case ';': // comment
  49. while(true) {
  50. NextChar();
  51. if(c == '\n') {
  52. LineNumber++;
  53. break;
  54. }
  55. }
  56. NextChar();
  57. return GetNextToken();
  58. case '(':
  59. NextChar();
  60. return Token.OPEN_PAREN;
  61. case ')':
  62. NextChar();
  63. return Token.CLOSE_PAREN;
  64. case '"': { // string
  65. while(true) {
  66. NextChar();
  67. if(c == '"')
  68. break;
  69. if(c == '\\') {
  70. NextChar();
  71. switch(c) {
  72. case 'n':
  73. c = '\n';
  74. break;
  75. case 't':
  76. c = '\t';
  77. break;
  78. }
  79. }
  80. TokenStringBuilder.Append(c);
  81. }
  82. NextChar();
  83. return Token.STRING;
  84. }
  85. case '#': // constant
  86. NextChar();
  87. while(Char.IsLetterOrDigit(c) || c == '_') {
  88. TokenStringBuilder.Append(c);
  89. NextChar();
  90. }
  91. if(TokenString == "t")
  92. return Token.TRUE;
  93. if(TokenString == "f")
  94. return Token.FALSE;
  95. throw new LispException("Unknown constant '"
  96. + TokenString + "'");
  97. default:
  98. if(Char.IsDigit(c) || c == '-') {
  99. bool have_nondigits = false;
  100. bool have_digits = false;
  101. int have_floating_point = 0;
  102. do {
  103. if(Char.IsDigit(c))
  104. have_digits = true;
  105. else if(c == '.')
  106. have_floating_point++;
  107. else if(Char.IsLetter(c) || c == '_')
  108. have_nondigits = true;
  109. TokenStringBuilder.Append(c);
  110. NextChar();
  111. } while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
  112. && c != ')' && c != ';');
  113. if(have_nondigits || !have_digits
  114. || have_floating_point > 1)
  115. return Token.SYMBOL;
  116. else if(have_floating_point == 1)
  117. return Token.REAL;
  118. else
  119. return Token.INTEGER;
  120. } else {
  121. do {
  122. TokenStringBuilder.Append(c);
  123. NextChar();
  124. } while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
  125. && c != ')' && c != ';');
  126. return Token.SYMBOL;
  127. }
  128. }
  129. } catch (EOFException) {
  130. return Token.EOF;
  131. }
  132. }
  133. private void NextChar() {
  134. if(bufpos >= buflen) {
  135. buflen = stream.Read(buffer, 0, 1024);
  136. if(buflen <= 0)
  137. throw new EOFException();
  138. bufpos = 0;
  139. // following hack appends an additional ' ' at the end of the file
  140. // to avoid problems when parsing symbols/elements and a sudden EOF:
  141. // This way we can avoid the need for an unget function.
  142. if(stream.Peek() < 0) {
  143. buffer[buflen] = ' ';
  144. ++buflen;
  145. }
  146. }
  147. c = buffer[bufpos++];
  148. }
  149. }
  150. }