Lexer.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /*
  2. ===========================================================================
  3. Doom 3 BFG Edition GPL Source Code
  4. Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
  6. Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #ifndef __LEXER_H__
  21. #define __LEXER_H__
  22. /*
  23. ===============================================================================
  24. Lexicographical parser
  25. Does not use memory allocation during parsing. The lexer uses no
  26. memory allocation if a source is loaded with LoadMemory().
  27. However, idToken may still allocate memory for large strings.
  28. A number directly following the escape character '\' in a string is
  29. assumed to be in decimal format instead of octal. Binary numbers of
  30. the form 0b.. or 0B.. can also be used.
  31. ===============================================================================
  32. */
  33. // lexer flags
  34. typedef enum {
  35. LEXFL_NOERRORS = BIT(0), // don't print any errors
  36. LEXFL_NOWARNINGS = BIT(1), // don't print any warnings
  37. LEXFL_NOFATALERRORS = BIT(2), // errors aren't fatal
  38. LEXFL_NOSTRINGCONCAT = BIT(3), // multiple strings seperated by whitespaces are not concatenated
  39. LEXFL_NOSTRINGESCAPECHARS = BIT(4), // no escape characters inside strings
  40. LEXFL_NODOLLARPRECOMPILE = BIT(5), // don't use the $ sign for precompilation
  41. LEXFL_NOBASEINCLUDES = BIT(6), // don't include files embraced with < >
  42. LEXFL_ALLOWPATHNAMES = BIT(7), // allow path seperators in names
  43. LEXFL_ALLOWNUMBERNAMES = BIT(8), // allow names to start with a number
  44. LEXFL_ALLOWIPADDRESSES = BIT(9), // allow ip addresses to be parsed as numbers
  45. LEXFL_ALLOWFLOATEXCEPTIONS = BIT(10), // allow float exceptions like 1.#INF or 1.#IND to be parsed
  46. LEXFL_ALLOWMULTICHARLITERALS = BIT(11), // allow multi character literals
  47. LEXFL_ALLOWBACKSLASHSTRINGCONCAT = BIT(12), // allow multiple strings seperated by '\' to be concatenated
  48. LEXFL_ONLYSTRINGS = BIT(13) // parse as whitespace deliminated strings (quoted strings keep quotes)
  49. } lexerFlags_t;
  50. // punctuation ids
  51. #define P_RSHIFT_ASSIGN 1
  52. #define P_LSHIFT_ASSIGN 2
  53. #define P_PARMS 3
  54. #define P_PRECOMPMERGE 4
  55. #define P_LOGIC_AND 5
  56. #define P_LOGIC_OR 6
  57. #define P_LOGIC_GEQ 7
  58. #define P_LOGIC_LEQ 8
  59. #define P_LOGIC_EQ 9
  60. #define P_LOGIC_UNEQ 10
  61. #define P_MUL_ASSIGN 11
  62. #define P_DIV_ASSIGN 12
  63. #define P_MOD_ASSIGN 13
  64. #define P_ADD_ASSIGN 14
  65. #define P_SUB_ASSIGN 15
  66. #define P_INC 16
  67. #define P_DEC 17
  68. #define P_BIN_AND_ASSIGN 18
  69. #define P_BIN_OR_ASSIGN 19
  70. #define P_BIN_XOR_ASSIGN 20
  71. #define P_RSHIFT 21
  72. #define P_LSHIFT 22
  73. #define P_POINTERREF 23
  74. #define P_CPP1 24
  75. #define P_CPP2 25
  76. #define P_MUL 26
  77. #define P_DIV 27
  78. #define P_MOD 28
  79. #define P_ADD 29
  80. #define P_SUB 30
  81. #define P_ASSIGN 31
  82. #define P_BIN_AND 32
  83. #define P_BIN_OR 33
  84. #define P_BIN_XOR 34
  85. #define P_BIN_NOT 35
  86. #define P_LOGIC_NOT 36
  87. #define P_LOGIC_GREATER 37
  88. #define P_LOGIC_LESS 38
  89. #define P_REF 39
  90. #define P_COMMA 40
  91. #define P_SEMICOLON 41
  92. #define P_COLON 42
  93. #define P_QUESTIONMARK 43
  94. #define P_PARENTHESESOPEN 44
  95. #define P_PARENTHESESCLOSE 45
  96. #define P_BRACEOPEN 46
  97. #define P_BRACECLOSE 47
  98. #define P_SQBRACKETOPEN 48
  99. #define P_SQBRACKETCLOSE 49
  100. #define P_BACKSLASH 50
  101. #define P_PRECOMP 51
  102. #define P_DOLLAR 52
  103. // punctuation
  104. typedef struct punctuation_s
  105. {
  106. char *p; // punctuation character(s)
  107. int n; // punctuation id
  108. } punctuation_t;
  109. class idLexer {
  110. friend class idParser;
  111. public:
  112. // constructor
  113. idLexer();
  114. idLexer( int flags );
  115. idLexer( const char *filename, int flags = 0, bool OSPath = false );
  116. idLexer( const char *ptr, int length, const char *name, int flags = 0 );
  117. // destructor
  118. ~idLexer();
  119. // load a script from the given file at the given offset with the given length
  120. int LoadFile( const char *filename, bool OSPath = false );
  121. // load a script from the given memory with the given length and a specified line offset,
  122. // so source strings extracted from a file can still refer to proper line numbers in the file
  123. // NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0'
  124. int LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 );
  125. // free the script
  126. void FreeSource();
  127. // returns true if a script is loaded
  128. int IsLoaded() { return idLexer::loaded; };
  129. // read a token
  130. int ReadToken( idToken *token );
  131. // expect a certain token, reads the token when available
  132. int ExpectTokenString( const char *string );
  133. // expect a certain token type
  134. int ExpectTokenType( int type, int subtype, idToken *token );
  135. // expect a token
  136. int ExpectAnyToken( idToken *token );
  137. // returns true when the token is available
  138. int CheckTokenString( const char *string );
  139. // returns true an reads the token when a token with the given type is available
  140. int CheckTokenType( int type, int subtype, idToken *token );
  141. // returns true if the next token equals the given string but does not remove the token from the source
  142. int PeekTokenString( const char *string );
  143. // returns true if the next token equals the given type but does not remove the token from the source
  144. int PeekTokenType( int type, int subtype, idToken *token );
  145. // skip tokens until the given token string is read
  146. int SkipUntilString( const char *string );
  147. // skip the rest of the current line
  148. int SkipRestOfLine();
  149. // skip the braced section
  150. int SkipBracedSection( bool parseFirstBrace = true );
  151. // skips spaces, tabs, C-like comments etc. Returns false if there is no token left to read.
  152. bool SkipWhiteSpace( bool currentLine );
  153. // unread the given token
  154. void UnreadToken( const idToken *token );
  155. // read a token only if on the same line
  156. int ReadTokenOnLine( idToken *token );
  157. //Returns the rest of the current line
  158. const char* ReadRestOfLine(idStr& out);
  159. // read a signed integer
  160. int ParseInt();
  161. // read a boolean
  162. bool ParseBool();
  163. // read a floating point number. If errorFlag is NULL, a non-numeric token will
  164. // issue an Error(). If it isn't NULL, it will issue a Warning() and set *errorFlag = true
  165. float ParseFloat( bool *errorFlag = NULL );
  166. // parse matrices with floats
  167. int Parse1DMatrix( int x, float *m );
  168. int Parse2DMatrix( int y, int x, float *m );
  169. int Parse3DMatrix( int z, int y, int x, float *m );
  170. // parse a braced section into a string
  171. const char * ParseBracedSection( idStr &out );
  172. // parse a braced section into a string, maintaining indents and newlines
  173. const char * ParseBracedSectionExact ( idStr &out, int tabs = -1 );
  174. // parse the rest of the line
  175. const char * ParseRestOfLine( idStr &out );
  176. // pulls the entire line, including the \n at the end
  177. const char * ParseCompleteLine( idStr &out );
  178. // retrieves the white space characters before the last read token
  179. int GetLastWhiteSpace( idStr &whiteSpace ) const;
  180. // returns start index into text buffer of last white space
  181. int GetLastWhiteSpaceStart() const;
  182. // returns end index into text buffer of last white space
  183. int GetLastWhiteSpaceEnd() const;
  184. // set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example
  185. void SetPunctuations( const punctuation_t *p );
  186. // returns a pointer to the punctuation with the given id
  187. const char * GetPunctuationFromId( int id );
  188. // get the id for the given punctuation
  189. int GetPunctuationId( const char *p );
  190. // set lexer flags
  191. void SetFlags( int flags );
  192. // get lexer flags
  193. int GetFlags();
  194. // reset the lexer
  195. void Reset();
  196. // returns true if at the end of the file
  197. bool EndOfFile();
  198. // returns the current filename
  199. const char * GetFileName();
  200. // get offset in script
  201. const int GetFileOffset();
  202. // get file time
  203. const ID_TIME_T GetFileTime();
  204. // returns the current line number
  205. const int GetLineNum();
  206. // print an error message
  207. void Error( VERIFY_FORMAT_STRING const char *str, ... );
  208. // print a warning message
  209. void Warning( VERIFY_FORMAT_STRING const char *str, ... );
  210. // returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set
  211. bool HadError() const;
  212. // set the base folder to load files from
  213. static void SetBaseFolder( const char *path );
  214. private:
  215. int loaded; // set when a script file is loaded from file or memory
  216. idStr filename; // file name of the script
  217. int allocated; // true if buffer memory was allocated
  218. const char * buffer; // buffer containing the script
  219. const char * script_p; // current pointer in the script
  220. const char * end_p; // pointer to the end of the script
  221. const char * lastScript_p; // script pointer before reading token
  222. const char * whiteSpaceStart_p; // start of last white space
  223. const char * whiteSpaceEnd_p; // end of last white space
  224. ID_TIME_T fileTime; // file time
  225. int length; // length of the script in bytes
  226. int line; // current line in script
  227. int lastline; // line before reading token
  228. int tokenavailable; // set by unreadToken
  229. int flags; // several script flags
  230. const punctuation_t *punctuations; // the punctuations used in the script
  231. int * punctuationtable; // ASCII table with punctuations
  232. int * nextpunctuation; // next punctuation in chain
  233. idToken token; // available token
  234. idLexer * next; // next script in a chain
  235. bool hadError; // set by idLexer::Error, even if the error is supressed
  236. static char baseFolder[ 256 ]; // base folder to load files from
  237. private:
  238. void CreatePunctuationTable( const punctuation_t *punctuations );
  239. int ReadWhiteSpace();
  240. int ReadEscapeCharacter( char *ch );
  241. int ReadString( idToken *token, int quote );
  242. int ReadName( idToken *token );
  243. int ReadNumber( idToken *token );
  244. int ReadPunctuation( idToken *token );
  245. int ReadPrimitive( idToken *token );
  246. int CheckString( const char *str ) const;
  247. int NumLinesCrossed();
  248. };
  249. ID_INLINE const char *idLexer::GetFileName() {
  250. return idLexer::filename;
  251. }
  252. ID_INLINE const int idLexer::GetFileOffset() {
  253. return idLexer::script_p - idLexer::buffer;
  254. }
  255. ID_INLINE const ID_TIME_T idLexer::GetFileTime() {
  256. return idLexer::fileTime;
  257. }
  258. ID_INLINE const int idLexer::GetLineNum() {
  259. return idLexer::line;
  260. }
  261. ID_INLINE void idLexer::SetFlags( int flags ) {
  262. idLexer::flags = flags;
  263. }
  264. ID_INLINE int idLexer::GetFlags() {
  265. return idLexer::flags;
  266. }
  267. #endif /* !__LEXER_H__ */