Lexer.cs 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. // $Id$
  2. using System;
  3. using System.Text;
  4. using System.IO;
  5. namespace Lisp {
  6. public class Lexer {
  7. private StreamReader stream;
  8. private char[] buffer;
  9. private char c;
  10. int bufpos;
  11. int buflen;
  12. public class EOFException : Exception {
  13. };
  14. public enum TokenType {
  15. EOF,
  16. OPEN_PAREN,
  17. CLOSE_PAREN,
  18. SYMBOL,
  19. STRING,
  20. INTEGER,
  21. REAL,
  22. TRUE,
  23. FALSE
  24. };
  25. private StringBuilder TokenStringBuilder;
  26. public string TokenString {
  27. get { return TokenStringBuilder.ToString(); }
  28. }
  29. public int LineNumber;
  30. public Lexer(StreamReader stream) {
  31. this.stream = stream;
  32. buffer = new char[1025];
  33. NextChar();
  34. }
  35. public TokenType GetNextToken() {
  36. try {
  37. while(Char.IsWhiteSpace(c)) {
  38. NextChar();
  39. if(c == '\n')
  40. LineNumber++;
  41. }
  42. TokenStringBuilder = new StringBuilder();
  43. switch(c) {
  44. case ';': // comment
  45. while(true) {
  46. NextChar();
  47. if(c == '\n') {
  48. LineNumber++;
  49. break;
  50. }
  51. }
  52. NextChar();
  53. return GetNextToken();
  54. case '(':
  55. NextChar();
  56. return TokenType.OPEN_PAREN;
  57. case ')':
  58. NextChar();
  59. return TokenType.CLOSE_PAREN;
  60. case '"': { // string
  61. int startline = LineNumber;
  62. while(true) {
  63. NextChar();
  64. if(c == '"')
  65. break;
  66. if(c == '\\') {
  67. NextChar();
  68. switch(c) {
  69. case 'n':
  70. c = '\n';
  71. break;
  72. case 't':
  73. c = '\t';
  74. break;
  75. }
  76. }
  77. TokenStringBuilder.Append(c);
  78. }
  79. NextChar();
  80. return TokenType.STRING;
  81. }
  82. case '#': // constant
  83. NextChar();
  84. while(Char.IsLetterOrDigit(c) || c == '_') {
  85. TokenStringBuilder.Append(c);
  86. NextChar();
  87. }
  88. if(TokenString == "t")
  89. return TokenType.TRUE;
  90. if(TokenString == "f")
  91. return TokenType.FALSE;
  92. throw new Exception("Unknown constant '"
  93. + TokenString + "'");
  94. default:
  95. if(Char.IsDigit(c) || c == '-') {
  96. bool have_nondigits = false;
  97. bool have_digits = false;
  98. int have_floating_point = 0;
  99. do {
  100. if(Char.IsDigit(c))
  101. have_digits = true;
  102. else if(c == '.')
  103. have_floating_point++;
  104. else if(Char.IsLetter(c) || c == '_')
  105. have_nondigits = true;
  106. TokenStringBuilder.Append(c);
  107. NextChar();
  108. } while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
  109. && c != ')' && c != ';');
  110. if(have_nondigits || !have_digits
  111. || have_floating_point > 1)
  112. return TokenType.SYMBOL;
  113. else if(have_floating_point == 1)
  114. return TokenType.REAL;
  115. else
  116. return TokenType.INTEGER;
  117. } else {
  118. do {
  119. TokenStringBuilder.Append(c);
  120. NextChar();
  121. } while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
  122. && c != ')' && c != ';');
  123. return TokenType.SYMBOL;
  124. }
  125. }
  126. } catch(EOFException) {
  127. return TokenType.EOF;
  128. }
  129. }
  130. private void NextChar() {
  131. if(bufpos >= buflen) {
  132. if(!stream.BaseStream.CanRead)
  133. throw new EOFException();
  134. buflen = stream.Read(buffer, 0, 1024);
  135. bufpos = 0;
  136. // following hack appends an additional ' ' at the end of the file
  137. // to avoid problems when parsing symbols/elements and a sudden EOF:
  138. // This way we can avoid the need for an unget function.
  139. if(!stream.BaseStream.CanRead) {
  140. buffer[buflen] = ' ';
  141. ++buflen;
  142. }
  143. }
  144. c = buffer[bufpos++];
  145. }
  146. }
  147. }