Lexer.cs 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. /**
  2. * From supertux/tools/tilemanager
  3. */
  4. using System;
  5. using System.Text;
  6. using System.IO;
  7. namespace Lisp {
  8. public class Lexer {
  9. private StreamReader stream;
  10. private char[] buffer;
  11. private char c;
  12. int bufpos;
  13. int buflen;
  14. public class EOFException : Exception {
  15. };
  16. public enum TokenType {
  17. EOF,
  18. OPEN_PAREN,
  19. CLOSE_PAREN,
  20. SYMBOL,
  21. STRING,
  22. INTEGER,
  23. REAL,
  24. TRUE,
  25. FALSE
  26. };
  27. private StringBuilder TokenStringBuilder;
  28. public string TokenString {
  29. get { return TokenStringBuilder.ToString(); }
  30. }
  31. public int LineNumber;
  32. public Lexer(StreamReader stream) {
  33. this.stream = stream;
  34. buffer = new char[1025];
  35. NextChar();
  36. }
  37. public TokenType GetNextToken() {
  38. try {
  39. while(Char.IsWhiteSpace(c)) {
  40. NextChar();
  41. if(c == '\n')
  42. LineNumber++;
  43. }
  44. TokenStringBuilder = new StringBuilder();
  45. switch(c) {
  46. case ';': // comment
  47. while(true) {
  48. NextChar();
  49. if(c == '\n') {
  50. LineNumber++;
  51. break;
  52. }
  53. }
  54. NextChar();
  55. return GetNextToken();
  56. case '(':
  57. NextChar();
  58. return TokenType.OPEN_PAREN;
  59. case ')':
  60. NextChar();
  61. return TokenType.CLOSE_PAREN;
  62. case '"': { // string
  63. int startline = LineNumber;
  64. while(true) {
  65. NextChar();
  66. if(c == '"')
  67. break;
  68. if(c == '\\') {
  69. NextChar();
  70. switch(c) {
  71. case 'n':
  72. c = '\n';
  73. break;
  74. case 't':
  75. c = '\t';
  76. break;
  77. }
  78. }
  79. TokenStringBuilder.Append(c);
  80. }
  81. NextChar();
  82. return TokenType.STRING;
  83. }
  84. case '#': // constant
  85. NextChar();
  86. while(Char.IsLetterOrDigit(c) || c == '_') {
  87. TokenStringBuilder.Append(c);
  88. NextChar();
  89. }
  90. if(TokenString == "t")
  91. return TokenType.TRUE;
  92. if(TokenString == "f")
  93. return TokenType.FALSE;
  94. throw new Exception("Unknown constant '"
  95. + TokenString + "'");
  96. default:
  97. if(Char.IsDigit(c) || c == '-') {
  98. bool have_nondigits = false;
  99. bool have_digits = false;
  100. int have_floating_point = 0;
  101. do {
  102. if(Char.IsDigit(c))
  103. have_digits = true;
  104. else if(c == '.')
  105. have_floating_point++;
  106. else if(Char.IsLetter(c) || c == '_')
  107. have_nondigits = true;
  108. TokenStringBuilder.Append(c);
  109. NextChar();
  110. } while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
  111. && c != ')' && c != ';');
  112. if(have_nondigits || !have_digits
  113. || have_floating_point > 1)
  114. return TokenType.SYMBOL;
  115. else if(have_floating_point == 1)
  116. return TokenType.REAL;
  117. else
  118. return TokenType.INTEGER;
  119. } else {
  120. do {
  121. TokenStringBuilder.Append(c);
  122. NextChar();
  123. } while(!Char.IsWhiteSpace(c) && c != '\"' && c != '('
  124. && c != ')' && c != ';');
  125. return TokenType.SYMBOL;
  126. }
  127. }
  128. } catch(EOFException) {
  129. return TokenType.EOF;
  130. }
  131. }
  132. private void NextChar() {
  133. if(bufpos >= buflen) {
  134. if(!stream.BaseStream.CanRead)
  135. throw new EOFException();
  136. buflen = stream.Read(buffer, 0, 1024);
  137. bufpos = 0;
  138. // following hack appends an additional ' ' at the end of the file
  139. // to avoid problems when parsing symbols/elements and a sudden EOF:
  140. // This way we can avoid the need for an unget function.
  141. if(!stream.BaseStream.CanRead) {
  142. buffer[buflen] = ' ';
  143. ++buflen;
  144. }
  145. }
  146. c = buffer[bufpos++];
  147. }
  148. }
  149. }