comun.h 100 KB


  1. #ifndef _COMUN_H
  2. #define _COMUN_H
  3. /** Comun programming language implemented as a KISS header only C99 library.
  4. [][][] [][][] [][][] [] [] [][][]
  5. [] [] [] [][][] [] [] [] []
  6. [][][] [][][] [] [] [][][] [] []
  7. The interpreter implemented here is supposed to be small and simple rather
  8. than fast and efficient.
  9. By drummyfish, released under CC0 1.0, public domain. */
  10. #include <stdint.h>
  11. #define CMN_LANG_VERSION_STRING "TODO" ///< implemented language version
  12. #define CMN_LIB_VERSION_STRING "0.2d" ///< implementation (not language) version
  13. #define CMN_NATIVE_UINT unsigned int
  14. #ifndef CMN_TOKEN_MAX_LENGTH
  15. /** Maximum allowed token length. */
  16. #define CMN_TOKEN_MAX_LENGTH 128
  17. #endif
  18. #ifndef CMN_PARSE_STACK_SIZE
  19. /** Size of compile stack, says how many levels of nesting can be processed. */
  20. #define CMN_PARSE_STACK_SIZE 128
  21. #endif
  22. #ifndef CMN_INTERPRETER_CALLSTACK_SIZE
  23. /** Size of the interpreter call stack, says the max depth of function calls. */
  24. #define CMN_INTERPRETER_CALLSTACK_SIZE 64
  25. #endif
  26. #ifndef CMN_STRING_PSEUDOHASH_SIZE
  27. /** Size, in bytes, of string pseudohash that's used in symbol table. This
  28. should be at least 8. Increasing this lowers the probability of string hash
  29. collision (should be reasonably low with the default value). */
  30. #define CMN_STRING_PSEUDOHASH_SIZE 8
  31. #endif
  32. #define CMN_MINIMUM_STACK_SIZE 16 ///< Minimum main stack size by specification.
  33. /** Converts an unsigned value to signed as it would happen in two's complement,
  34. which is not guaranteed in C, so this function exists for portability. */
  35. int32_t CMN_unsignedToSigned32(uint32_t value, uint8_t bits);
  36. int CMN_unsignedToSignedNative(CMN_NATIVE_UINT value);
  37. /** Creates a pseudohash from given string and type char (type char specifies
  38. a group of symbols, e.g. function names, variable names etc.). Pseudohash is
  39. a fixed width string with low probability of collisions with other
  40. pseudohashes. Pseudohash will only consist of such characters that it will be
  41. a valid comun/C identifier, but keep in mind the temrinating 0 won't be
  42. present. */
  43. void CMN_pseudohash(char typeChar, const char *str,
  44. char hash[CMN_STRING_PSEUDOHASH_SIZE]);
  45. uint64_t CMN_literalValue(const char *literalString, uint8_t *negative,
  46. uint8_t *ok);
  47. // tokenizer states:
  48. #define CMN_TOKENIZER_NOTHING 0x00
  49. #define CMN_TOKENIZER_TOKEN 0x01
  50. #define CMN_TOKENIZER_ERROR 0x02
  51. #define _CMN_TOKENIZER_BLANK 0x03
  52. #define _CMN_TOKENIZER_COMMENT 0x04
  53. #define _CMN_TOKENIZER_STR 0x05
  54. #define _CMN_TOKENIZER_STR_END 0x06
  55. #define _CMN_TOKENIZER_NAME 0x07
  56. // possible types of tokens returned by tokenizer:
  57. #define CMN_TOKEN_ERROR 0x00
  58. #define CMN_TOKEN_COMMAND 0x01
  59. #define CMN_TOKEN_NAME 0x02
  60. #define CMN_TOKEN_FUNC 0x03
  61. #define CMN_TOKEN_NUMBER 0x04
  62. #define CMN_TOKEN_STRING 0x05
  63. #define CMN_TOKEN_BRANCH 0x06
  64. #define CMN_TOKEN_LOOP 0x07
  65. #define CMN_TOKEN_END 0x08
  66. #define CMN_TOKEN_ELSE 0x09
  67. #define CMN_TOKEN_BREAK 0x0a
  68. #define CMN_TOKEN_LABEL 0x0b
  69. /** Serves to convert a stream of source code characters to a stream of language
  70. tokens. */
  71. typedef struct
  72. {
  73. uint8_t state;
  74. char tokenString[CMN_TOKEN_MAX_LENGTH + 1];
  75. uint16_t tokenStringPos;
  76. } CMN_Tokenizer;
  77. void CMN_tokenizerInit(CMN_Tokenizer *tokenizer);
  78. uint8_t CMN_tokenizerFeedChar(CMN_Tokenizer *tokenizer, char character);
  79. uint8_t CMN_identifyToken(const char *tokenString);
  80. #define CMN_BYTECODE_HEADER_SIZE 8
  81. #define CMN_BYTECODE_CHECKSUM_BYTE 4
  82. /// instruction parameters
  83. #define CMN_IPARAMS(typeEnv,noPop,constCont,immediateC)\
  84. (((typeEnv) << 6) | ((noPop) << 5) | ((constCont) << 4) | (immediateC))
  85. // values for the DES instruction:
  86. #define CMN_DES_IF 0x01
  87. #define CMN_DES_ELSE 0x02
  88. #define CMN_DES_IF_END 0x03
  89. #define CMN_DES_LOOP 0x04
  90. #define CMN_DES_LOOP_BREAK 0x05
  91. #define CMN_DES_LOOP_END 0x06
  92. #define CMN_DES_FUNC 0x07
  93. #define CMN_DES_EXIT 0x08
  94. #define CMN_DES_GOTO 0x09
  95. #define CMN_DES_LABEL 0x0a
  96. #define CMN_MASK_INSTR_NOPOP 0x20 ///< "no pop" bit
  97. #define CMN_MASK_INSTR_CON 0x10 ///< "constant continue" bit
  98. #define CMN_MASK_INSTR_MODE 0x03 ///< mode of typical instructions
  99. #define CMN_MASK_INSTR_GROUP 0xfc
  100. #define CMN_OPCODE_SPECIALS 0x1f ///< upper bound of special instructions
  101. #define CMN_LAST_SPECIAL_PTR 15 ///< last special pointer index
  102. // instruction modes:
  103. #define CMN_OPCODE_21 0 ///< mode: pop 2, push 1
  104. #define CMN_OPCODE_1C1 1 ///< mode: pop 1, use immediate const., push 1
  105. #define CMN_OPCODE_11 2 ///< mode: pop 1, push 1
  106. #define CMN_OPCODE_01 3 ///< mode: push 1
  107. // instruction groups:
  108. #define CMN_OPCODE_AD 0x20
  109. #define CMN_OPCODE_SU 0x24
  110. #define CMN_OPCODE_MU 0x28
  111. #define CMN_OPCODE_DI 0x2c
  112. #define CMN_OPCODE_DS 0x30
  113. #define CMN_OPCODE_MO 0x34
  114. #define CMN_OPCODE_MS 0x38
  115. #define CMN_OPCODE_GR 0x40
  116. #define CMN_OPCODE_GE 0x44
  117. #define CMN_OPCODE_SM 0x48
  118. #define CMN_OPCODE_SE 0x4c
  119. #define CMN_OPCODE_GS 0x50
  120. #define CMN_OPCODE_BS 0x54
  121. #define CMN_OPCODE_SS 0x58
  122. #define CMN_OPCODE_LS 0x5c
  123. #define CMN_OPCODE_EQ 0x60
  124. #define CMN_OPCODE_NE 0x64
  125. #define CMN_OPCODE_BA 0x68
  126. #define CMN_OPCODE_BO 0x6c
  127. #define CMN_OPCODE_BX 0x70
  128. #define CMN_OPCODE_LA 0x74
  129. #define CMN_OPCODE_LO 0x78
  130. #define CMN_OPCODE_LX 0x7c
  131. // SPECIFIC OPCODES:
  132. // special instructions:
  133. #define CMN_OPCODE_END 0x00
  134. #define CMN_OPCODE_NOP 0x01
  135. #define CMN_OPCODE_DES 0x02
  136. #define CMN_OPCODE_COC 0x03
  137. #define CMN_OPCODE_ERR 0x04
  138. #define CMN_OPCODE_CAL 0x07
  139. #define CMN_OPCODE_CAE 0x08
  140. #define CMN_OPCODE_RET 0x09
  141. #define CMN_OPCODE_JIA 0x0a
  142. #define CMN_OPCODE_JNA 0x0b
  143. #define CMN_OPCODE_JMA 0x0c
  144. #define CMN_OPCODE_INI 0x0f
  145. #define CMN_OPCODE_PSC 0x10
  146. #define CMN_OPCODE_PAC 0x11
  147. #define CMN_OPCODE_PAX 0x12
  148. #define CMN_OPCODE_PCO 0x13
  149. #define CMN_OPCODE_MEX 0x14
  150. #define CMN_OPCODE_MGE 0x15
  151. #define CMN_OPCODE_PUX 0x16
  152. #define CMN_OPCODE_PCM 0x17
  153. #define CMN_OPCODE_CON 0x1a
  154. #define CMN_OPCODE_CND 0x1b
  155. #define CMN_OPCODE_SWP 0x1c
  156. #define CMN_OPCODE_TRA 0x1d
  157. #define CMN_OPCODE_POP 0x1e
  158. #define CMN_OPCODE_OUT 0x1f
  159. // typical stack instructions, fit one of the predefined modes:
  160. #define CMN_OPCODE_ADX (CMN_OPCODE_AD | CMN_OPCODE_21)
  161. #define CMN_OPCODE_ADC (CMN_OPCODE_AD | CMN_OPCODE_1C1)
  162. #define CMN_OPCODE_SUX (CMN_OPCODE_SU | CMN_OPCODE_21)
  163. #define CMN_OPCODE_SUC (CMN_OPCODE_SU | CMN_OPCODE_1C1)
  164. #define CMN_OPCODE_MUX (CMN_OPCODE_MU | CMN_OPCODE_21)
  165. #define CMN_OPCODE_MUC (CMN_OPCODE_MU | CMN_OPCODE_1C1)
  166. #define CMN_OPCODE_DIX (CMN_OPCODE_DI | CMN_OPCODE_21)
  167. #define CMN_OPCODE_DIC (CMN_OPCODE_DI | CMN_OPCODE_1C1)
  168. #define CMN_OPCODE_DSX (CMN_OPCODE_DS | CMN_OPCODE_21)
  169. #define CMN_OPCODE_DSC (CMN_OPCODE_DS | CMN_OPCODE_1C1)
  170. #define CMN_OPCODE_MOX (CMN_OPCODE_MO | CMN_OPCODE_21)
  171. #define CMN_OPCODE_MOC (CMN_OPCODE_MO | CMN_OPCODE_1C1)
  172. #define CMN_OPCODE_MSX (CMN_OPCODE_MS | CMN_OPCODE_21)
  173. #define CMN_OPCODE_MSC (CMN_OPCODE_MS | CMN_OPCODE_1C1)
  174. #define CMN_OPCODE_GRX (CMN_OPCODE_GR | CMN_OPCODE_21)
  175. #define CMN_OPCODE_GRC (CMN_OPCODE_GR | CMN_OPCODE_1C1)
  176. #define CMN_OPCODE_GEX (CMN_OPCODE_GE | CMN_OPCODE_21)
  177. #define CMN_OPCODE_GEC (CMN_OPCODE_GE | CMN_OPCODE_1C1)
  178. #define CMN_OPCODE_SMX (CMN_OPCODE_SM | CMN_OPCODE_21)
  179. #define CMN_OPCODE_SMC (CMN_OPCODE_SM | CMN_OPCODE_1C1)
  180. #define CMN_OPCODE_SEX (CMN_OPCODE_SE | CMN_OPCODE_21)
  181. #define CMN_OPCODE_SEC (CMN_OPCODE_SE | CMN_OPCODE_1C1)
  182. #define CMN_OPCODE_GSX (CMN_OPCODE_GS | CMN_OPCODE_21)
  183. #define CMN_OPCODE_GSC (CMN_OPCODE_GS | CMN_OPCODE_1C1)
  184. #define CMN_OPCODE_BSX (CMN_OPCODE_BS | CMN_OPCODE_21)
  185. #define CMN_OPCODE_BSC (CMN_OPCODE_BS | CMN_OPCODE_1C1)
  186. #define CMN_OPCODE_SSX (CMN_OPCODE_SS | CMN_OPCODE_21)
  187. #define CMN_OPCODE_SSC (CMN_OPCODE_SS | CMN_OPCODE_1C1)
  188. #define CMN_OPCODE_LSX (CMN_OPCODE_LS | CMN_OPCODE_21)
  189. #define CMN_OPCODE_LSC (CMN_OPCODE_LS | CMN_OPCODE_1C1)
  190. #define CMN_OPCODE_EQX (CMN_OPCODE_EQ | CMN_OPCODE_21)
  191. #define CMN_OPCODE_EQC (CMN_OPCODE_EQ | CMN_OPCODE_1C1)
  192. #define CMN_OPCODE_NEX (CMN_OPCODE_NE | CMN_OPCODE_21)
  193. #define CMN_OPCODE_NEC (CMN_OPCODE_NE | CMN_OPCODE_1C1)
  194. #define CMN_OPCODE_BAX (CMN_OPCODE_BA | CMN_OPCODE_21)
  195. #define CMN_OPCODE_BAC (CMN_OPCODE_BA | CMN_OPCODE_1C1)
  196. #define CMN_OPCODE_BOX (CMN_OPCODE_BO | CMN_OPCODE_21)
  197. #define CMN_OPCODE_BOC (CMN_OPCODE_BO | CMN_OPCODE_1C1)
  198. #define CMN_OPCODE_BXX (CMN_OPCODE_BX | CMN_OPCODE_21)
  199. #define CMN_OPCODE_BXC (CMN_OPCODE_BX | CMN_OPCODE_1C1)
  200. #define CMN_OPCODE_LAX (CMN_OPCODE_LA | CMN_OPCODE_21)
  201. #define CMN_OPCODE_LAC (CMN_OPCODE_LA | CMN_OPCODE_1C1)
  202. #define CMN_OPCODE_LOX (CMN_OPCODE_LO | CMN_OPCODE_21)
  203. #define CMN_OPCODE_LOC (CMN_OPCODE_LO | CMN_OPCODE_1C1)
  204. #define CMN_OPCODE_LXX (CMN_OPCODE_LX | CMN_OPCODE_21)
  205. #define CMN_OPCODE_LXC (CMN_OPCODE_LX | CMN_OPCODE_1C1)
  206. #define CMN_OPCODE_BNO (0x80 | CMN_OPCODE_11)
  207. #define CMN_OPCODE_ADR (0xf0 | CMN_OPCODE_01)
  208. #define CMN_OPCODE_INU (0xf8 | CMN_OPCODE_01)
  209. #define CMN_OPCODE_INP (0xfc | CMN_OPCODE_01)
  210. // interpreter status codes:
  211. #define CMN_INTERPRETER_END 0x00 ///< end of program reached
  212. #define CMN_INTERPRETER_OK 0x01 ///< execution continues
  213. #define CMN_INTERPRETER_ERROR 0x10 ///< generic error
  214. #define CMN_INTERPRETER_ERROR_THROW 0x11 ///< error raised by ERR instr.
  215. #define CMN_INTERPRETER_ERROR_OPERATION 0x20 ///< bad operation
  216. #define CMN_INTERPRETER_ERROR_ZERODIV 0x21 ///< division by zero
  217. #define CMN_INTERPRETER_ERROR_BAD_CALL 0x22 ///< call of unknown external func.
  218. #define CMN_INTERPRETER_ERROR_MEMORY 0x30 ///< out of memory
  219. #define CMN_INTERPRETER_ERROR_STACK_OF 0x31 ///< memory stack overflow
  220. #define CMN_INTERPRETER_ERROR_CALLSTACK 0x33 ///< call stack overflow/underflow
  221. #define CMN_INTERPRETER_ERROR_BYTECODE 0x40 ///< malformed bytecode
  222. #define CMN_INTERPRETER_ERROR_BC_HEADER 0x41 ///< bad bytecode header
  223. #define CMN_INTERPRETER_ERROR_BC_OPCODE 0x42 ///< invalid opcode
  224. #define CMN_INTERPRETER_ERROR_BC_INSTR 0x43 ///< nonsense instruction data
  225. #define CMN_INTERPRETER_ERROR_BC_JUMP 0x44 ///< jump to nonexistent location
  226. /** Function used by the interpreter to perform I/O. The argument is value to
  227. print and will be either non-negative and lower than 256 in which case print
  228. of the value should be performed (and return value doesn't matter), or -1 in
  229. which case a 1 byte character should be read from input and returned. If a
  230. character is to be returned from finished input (EOF reached), -1 should be
  231. returned. */
  232. typedef int16_t (*CMN_IOFunction)(int16_t);
  233. /** Function used by compiler to indicate that a file include command has been
  234. encountered and that it should be read from. User of the library is supposed
  235. to implement this function. When the library calls it, it says the specified
  236. file is to be open and subsequent characters fed to compiler should be from
  237. that file. If preprocessing is active, the content of each file has to be
  238. prepended with '[' and appended with ']' (without preprocessor this may or
  239. may not be done). Once the file has been all read, the library user must NOT
  240. feed a 0 terminating character to compiler (which would indicate end of whole
  241. source code), but must simply silently start feeding characters from the
  242. previously opened file. 0 character must be fed at the very end of the source
  243. code. If this function is called asking for opening a file that is already
  244. open above in the include stack (i.e. which would cause a circular include),
  245. it must be ignored. The string passed as argument won't last after the
  246. function ends and has to be copied if needed. */
  247. typedef void (*CMN_FileIncludeFunction)(const char *fileName);
  248. struct _CMN_InterpreterS;
  249. /** Function used by interpreter to signal exterunal function calls. The first
  250. parameter is external function index, the second parameter is pointer to the
  251. interpreter that calls the function. */
  252. typedef void
  253. (*CMN_ExternalCallFunction)(uint16_t, struct _CMN_InterpreterS *);
  254. typedef struct _CMN_InterpreterS
  255. {
  256. const uint8_t *bytecode;
  257. CMN_NATIVE_UINT *memory0;
  258. uint8_t *memory8;
  259. uint16_t *memory16;
  260. uint32_t *memory32;
  261. uint32_t memorySize; ///< number of cells in each type env.
  262. uint32_t *pointers[4]; ///< pointer tables for each type env.
  263. const uint8_t *callstack[CMN_INTERPRETER_CALLSTACK_SIZE]; ///< ret. addresses
  264. uint16_t callStackTop;
  265. const uint8_t *currentInstruction;
  266. CMN_IOFunction ioFunction;
  267. CMN_ExternalCallFunction externalCallFunction;
  268. uint8_t inputEndReached;
  269. uint32_t step;
  270. uint8_t argc;
  271. const char **argv;
  272. } CMN_Interpreter;
  273. /** Initializes interpreter, returns 1 on success, otherwise 0 (e.g. too little
  274. memory provided), minCells says the minimum number of memory cells to allocate
  275. (in case automatic estimation underestimates it), argc and argv are arguments
  276. passed to the program, argv must not be changed until interpretation ends! */
  277. uint8_t CMN_interpreterInit(CMN_Interpreter *interpreter,
  278. const uint8_t *bytecode, uint8_t *memory, uint32_t memorySize,
  279. uint16_t minCells, CMN_IOFunction ioFunction,
  280. CMN_ExternalCallFunction externalCallFunction, uint8_t argc,
  281. const char **argv);
  282. /** Executes given number of steps of execution of interpreted program
  283. (one step ~= one non-empty instruction), returns the state
  284. (see CMN_INTERPRETER_* ). */
  285. uint8_t CMN_interpreterStep(CMN_Interpreter *interpreter, uint32_t steps);
  286. uint32_t CMN_interpreterGetInstrAddress(const CMN_Interpreter *interpreter);
  287. /** Sets an address in bytecode at which execution will continue next. */
  288. void CMN_interpreterGoto(CMN_Interpreter *interpreter, uint32_t address);
  289. void CMN_interpreterCallFunction(CMN_Interpreter *interpreter,
  290. uint32_t functionID);
  291. /** Gets a value that's currently on stack top of type environment 0 in
  292. interpreter's (with possible negative offset). This function doesn't check
  293. whether such value exists (i.e. if it isn't under address 0) -- in such case
  294. 0 is returned. The value is not popped. */
  295. CMN_NATIVE_UINT CMN_interpretGetValue(CMN_Interpreter *interpreter,
  296. uint8_t stackTopOffset);
  297. /** Pushes value on stack in type environment 0 of given interpreter. */
  298. void CMN_interpreterPush(CMN_Interpreter *interpreter, CMN_NATIVE_UINT value);
  299. /** Pops n values from type environment 0 of given interpreter. This function
  300. doesn't check whether this is possible -- as many values as possible will be
  301. popped. */
  302. void CMN_interpreterPop(CMN_Interpreter *interpreter, uint8_t n);
  303. /** Convenience function which just simply interprets source code passed as
  304. string, memory and memorySize specify memory used for the whole process
  305. (bytecode generation, interpreter memory, ...), statusCallback is an optional
  306. function pointer which if non-zero will be called once with first paramteret
  307. either 0 (successful end, interpreter pointer passed), 1 (error during
  308. compileation, second parameter say string position of error) or 2 (error
  309. during run, interpreter pointer passed), maxSymbols says the maximum amount of
  310. symbols in symbol table, maxSteps is the maximum number of interpreter steps
  311. or 0 (infinite). The function returns final value at the stack top in type
  312. environment 0. */
  313. int CMN_interpretStr(const char *source, uint8_t *memory,
  314. uint32_t memorySize, uint16_t minCells, uint32_t maxSymbols,
  315. uint32_t maxSteps, CMN_IOFunction ioFunction,
  316. void (*statusCallback)(uint8_t, uint32_t, CMN_Interpreter *));
  317. // preprocessor states:
  318. #define CMN_PREPROCESSOR_OK 0
  319. #define CMN_PREPROCESSOR_ERROR 1
  320. #define _CMN_PREPPROCESSOR_OUT 2
  321. #define _CMN_PREPPROCESSOR_IN 3
  322. typedef struct
  323. {
  324. uint8_t state;
  325. uint8_t minify; // whether stage 2 output should be minified
  326. CMN_Tokenizer tokenizer;
  327. void (*outFunction)(char);
  328. } CMN_Preprocessor;
  329. void CMN_preprocessorInit(CMN_Preprocessor *preprocessor, uint8_t minify,
  330. void (*outFunction)(char));
  331. /** Feeds an input character to preprocessor. The preprocessor will possibly
  332. output several characters in return (via the callback function). */
  333. uint8_t CMN_preprocessorFeedChar(CMN_Preprocessor *preprocessor, char c);
  334. /** Estimates how much memory will be needed for the execution of bytecode,
  335. returns estimated number of memory cells needed for each type environment
  336. (0 signifies the environment isn't used at all) and an exact number of
  337. pointers (user ones plus stack top) used in each type environment. This
  338. can be useful for preallocation of resources. The minStackSize says the
  339. minimum size of main stack that should be considered, however at least the
  340. minimum value given by specification will be taken into account. */
  341. void CMN_estimateMemory(const uint8_t *bytecode, uint32_t minStackSize,
  342. uint32_t memoryCells[4], uint32_t pointers[4]);
  343. static inline uint8_t CMN_instrTypeEnv(const uint8_t *instruction);
  344. void CMN_instrToStr(const uint8_t *instruction, char string[16]);
  345. uint64_t CMN_instrGetConst(const uint8_t *instr);
  346. void CMN_instrGetConsts(const uint8_t *instr, uint64_t *c1, uint64_t *c2);
  347. uint8_t CMN_instrGetConstBits(const uint8_t *instr);
  348. uint8_t CMN_instrTouchesMem(uint8_t opcode);
  349. uint8_t CMN_instrTouchesPtr(uint8_t opcode);
  350. uint8_t CMN_bytecodeChecksum(const uint8_t *bytecode);
  351. /** Removes a continuous block of instructions from bytecode and modifies rest
  352. of bytecode to keep its semantics (recomputes jump addresses, recomputes
  353. header checksum, ...). */
  354. void CMN_bytecodeRemoveInstrs(uint8_t *bytecode, uint32_t startAddr,
  355. uint16_t instrCount);
  356. typedef struct
  357. {
  358. CMN_Tokenizer tokenizer;
  359. uint8_t *bytecode;
  360. const uint8_t *bytecodeLimit;
  361. uint8_t *bytecodeEnd;
  362. uint8_t currentTypeEnv;
  363. uint8_t implicitAddressSize; /**< How many half-bytes are reserved for unknown
  364. addresses, if compilation fails due to
  365. address not fitting, increase this. */
  366. uint32_t parseStack[CMN_PARSE_STACK_SIZE];
  367. uint8_t parseStackTop;
  368. uint8_t state;
  369. uint8_t flags;
  370. char *symbolTable; /**< Symbol table stores pseudohashes of strings
  371. (ptr and func names), each pseudohash has a
  372. type indicated by its first char ('f': func.
  373. def., 'c': func. call, 's': info about
  374. pointer size, 'l': label def., 'j': goto,
  375. 'e': external call, '0', '1̈́', '2' and '3':
  376. ptr. in respective type env., 'n': unused).
  377. Each symbol has an index which is given by
  378. its order among same type symbols. */
  379. uint16_t symbolCount;
  380. uint16_t symbolTableSize; ///< Maximum number of items in the table.
  381. CMN_FileIncludeFunction includeFunction; ///< If 0, includes aren't supported.
  382. } CMN_Compiler;
  383. #define CMN_OPTIMIZE_REMOVE_NOPS 0x00000001 ///< Remove NOP instructions.
  384. #define CMN_OPTIMIZE_REMOVE_DEAD 0x00000002 /**< Remove dead parts of code,
  385. e.g. unused functions. */
  386. #define CMN_OPTIMIZE_REPLACE_OPS 0x00000004 /**< Replace operations with more
  387. efficient ones. */
  388. #define CMN_OPTIMIZE_INLINE 0x00000008 ///< Inline functions.
  389. #define CMN_OPTIMIZE_ALL 0xffffffff
  390. /** Applies selected optimizations to bytecode, compiler pointer can optionally
  391. be passed so that its symbol table is adjusted to match the optimized
  392. bytecode. */
  393. void CMN_bytecodeOptimize(uint8_t *bytecode, uint32_t types,
  394. CMN_Compiler *compiler);
  395. #define CMN_BYTECODE_SANITY_OK 0x01 ///< all OK
  396. #define CMN_BYTECODE_SANITY_ERROR 0x02 ///< unspecified error
  397. #define CMN_BYTECODE_SANITY_ERROR_HEADER 0x03 ///< malformed header
  398. #define CMN_BYTECODE_SANITY_ERROR_CHECKSUM 0x04 ///< bad code checksum
  399. #define CMN_BYTECODE_SANITY_ERROR_INSTR 0x05 ///< bad instruction
  400. #define CMN_BYTECODE_SANITY_ERROR_NO_END 0x06 ///< no end instruction
  401. /** Performs a basic sanity check of given bytecode and returns appropriate
  402. status code. Note that passing this test does not guarantee sanity or
  403. safety of the bytecode, it just catches most obvious errors. */
  404. uint8_t CMN_bytecodeCheckSanity(const uint8_t *bytecode, uint32_t maxSize);
  405. // compiler status codes:
  406. #define CMN_COMPILER_OK 0x00
  407. #define CMN_COMPILER_ERROR_BAD_TOKEN 0x01
  408. #define CMN_COMPILER_ERROR_UNEXPECTED_TOKEN 0x02
  409. #define CMN_COMPILER_ERROR_UNEXPECTED_END 0x03
  410. #define CMN_COMPILER_ERROR_BYTECODE_TOO_BIG 0x04
  411. #define CMN_COMPILER_ERROR_UNKNOWN_NAME 0x05
  412. #define CMN_COMPILER_ERROR_REDEFINED 0x06
  413. #define CMN_COMPILER_ERROR_UNSUPPORTED 0x07
  414. #define CMN_COMPILER_ERROR_PARSE_STACK 0x08
  415. #define CMN_COMPILER_ERROR_SYMBOL_TABLE 0x09
  416. #define CMN_COMPILER_ERROR_GENERIC 0xff
  417. void CMN_compilerInit(CMN_Compiler *compiler, uint8_t *bytecode,
  418. uint32_t bytecodeMaxSize, char *symbolTableMemory, uint32_t symbolTableSize,
  419. CMN_FileIncludeFunction includeFunction);
  420. /** Feeds a single source code character to the compiler. Note that compiler
  421. does not perform preprocessing (see the preprocessor struct). The compiler
  422. potentially generates part of the bytecode, sets it status code and returns
  423. it. */
  424. uint8_t CMN_compilerFeedChar(CMN_Compiler *compiler, char character);
  425. /** Adds symbol to compiler's symbol table. */
  426. int32_t CMN_compilerAddSymbol(CMN_Compiler *compiler,
  427. char symbol[CMN_STRING_PSEUDOHASH_SIZE]);
  428. /** Finds symbol by name, returns its index or -1 if not found. */
  429. int32_t CMN_compilerFindSymbol(const CMN_Compiler *compiler,
  430. char symbol[CMN_STRING_PSEUDOHASH_SIZE]);
  431. /** Given a name of function, returns its index according to symbol table, or
  432. -1 if match isn't found. This function may only be used after the code has
  433. been compiled. */
  434. int32_t CMN_compilerFindFunction(const CMN_Compiler *compiler,
  435. const char *funcName, uint8_t isExternal);
  436. /** Gets symbol with given index and type (index is the sequential number within
  437. symbols in the same type group). */
  438. uint8_t CMN_compilerGetSymbol(const CMN_Compiler *compiler, char typeChar,
  439. uint32_t id, char symbol[CMN_STRING_PSEUDOHASH_SIZE]);
  440. //------------------------------------------------------------------------------
  441. // privates:
  442. char _CMN_numPseudohash(uint8_t n)
  443. {
  444. n %= 64;
  445. if (n < 2)
  446. return '_';
  447. if (n < 12)
  448. return n + '0' - 2;
  449. if (n < 38)
  450. return n + 'a' - 12;
  451. return n + 'A' - 38;
  452. }
  453. unsigned int _CMN_strLen(const char *s)
  454. {
  455. unsigned int r = 0;
  456. while (s[r] != 0)
  457. r++;
  458. return r;
  459. }
  460. uint8_t _CMN_typeEnvBits(uint8_t typeEnv)
  461. {
  462. typeEnv = typeEnv == 0 ? ((uint8_t) sizeof(CMN_NATIVE_UINT)) :
  463. (typeEnv + (typeEnv == 3));
  464. return typeEnv * 8;
  465. }
  466. static inline uint8_t CMN_instrTypeEnv(const uint8_t *instruction)
  467. {
  468. return instruction[1] >> 6;
  469. }
  470. int32_t CMN_unsignedToSigned32(uint32_t value, uint8_t bits)
  471. {
  472. return (value & (0x00000001 << (bits - 1))) ?
  473. -1 * ((int32_t) ((((~value) & (0xffffffff >> (32 - bits)))) + 1))
  474. : ((int32_t) value);
  475. }
  476. int CMN_unsignedToSignedNative(CMN_NATIVE_UINT value)
  477. {
  478. CMN_NATIVE_UINT tmp = -1;
  479. tmp /= 2;
  480. if (value <= tmp)
  481. return value;
  482. value = (((CMN_NATIVE_UINT) -1) - value) + 1;
  483. return -1 * ((int) value);
  484. }
  485. #define _COMP switch (greater | (eq << 1)) {\
  486. case 0: return v1 < v2; break;\
  487. case 1: return v1 > v2; break;\
  488. case 2: return v1 <= v2; break;\
  489. case 3: return v1 >= v2; break;\
  490. default: return 0; break; }
  491. uint8_t _CMN_compare(uint32_t v1, uint32_t v2, uint8_t greater, uint8_t eq)
  492. { _COMP }
  493. uint8_t _CMN_compareSigned(int32_t v1, int32_t v2, uint8_t greater, uint8_t eq)
  494. { _COMP }
  495. #undef _COMP
  496. uint64_t _CMN_interpreterGetX(CMN_Interpreter *interpreter, uint8_t typeEnv)
  497. {
  498. switch (typeEnv)
  499. {
  500. case 0: return interpreter->memory0[interpreter->pointers[0][0]]; break;
  501. case 1: return interpreter->memory8[interpreter->pointers[1][0]]; break;
  502. case 2: return interpreter->memory16[interpreter->pointers[2][0]]; break;
  503. case 3: return interpreter->memory32[interpreter->pointers[3][0]]; break;
  504. default: break;
  505. }
  506. return 0;
  507. }
  508. void _CMN_interpreterGetXY(CMN_Interpreter *interpreter, uint8_t typeEnv,
  509. uint64_t *x, uint64_t *y)
  510. {
  511. switch (typeEnv)
  512. {
  513. #define _CASE(n,t,m) \
  514. case n: {\
  515. t *p = interpreter->m + interpreter->pointers[n][0];\
  516. *x = *p; p--; *y = *p;\
  517. break; }
  518. _CASE(0,CMN_NATIVE_UINT,memory0)
  519. _CASE(1,uint8_t,memory8)
  520. _CASE(2,uint16_t,memory16)
  521. _CASE(3,uint32_t,memory32)
  522. default: break;
  523. #undef _CASE
  524. }
  525. }
  526. uint8_t _CMN_interpreterPopPush(CMN_Interpreter *interpreter, uint8_t typeEnv,
  527. int8_t pop, uint64_t r)
  528. {
  529. interpreter->pointers[typeEnv][0] += -1 * pop + 1;
  530. if (interpreter->pointers[typeEnv][0] >= interpreter->memorySize)
  531. return 0;
  532. switch (typeEnv)
  533. {
  534. case 0: interpreter->memory0[interpreter->pointers[0][0]] = r;
  535. break;
  536. case 1: interpreter->memory8[interpreter->pointers[1][0]] = r % 256;
  537. break;
  538. case 2: interpreter->memory16[interpreter->pointers[2][0]] = r % 65536;
  539. break;
  540. case 3: interpreter->memory32[interpreter->pointers[3][0]] = r;
  541. break;
  542. default: break;
  543. }
  544. return 1;
  545. }
  546. void CMN_interpreterGoto(CMN_Interpreter *interpreter, uint32_t address)
  547. {
  548. interpreter->currentInstruction = interpreter->bytecode +
  549. CMN_BYTECODE_HEADER_SIZE + 2 * address;
  550. }
  551. uint32_t CMN_interpreterGetInstrAddress(const CMN_Interpreter *interpreter)
  552. {
  553. return (interpreter->currentInstruction - interpreter->bytecode -
  554. CMN_BYTECODE_HEADER_SIZE) / 2;
  555. }
  556. void CMN_interpreterCallFunction(CMN_Interpreter *interpreter,
  557. uint32_t functionID)
  558. {
  559. if (interpreter->callStackTop >= CMN_INTERPRETER_CALLSTACK_SIZE - 1)
  560. return;
  561. const uint8_t *instr = interpreter->bytecode + CMN_BYTECODE_HEADER_SIZE;
  562. while (*instr != CMN_OPCODE_END)
  563. {
  564. if (instr[0] == CMN_OPCODE_DES && instr[1] == CMN_DES_FUNC)
  565. {
  566. if (functionID == 0)
  567. {
  568. interpreter->callstack[interpreter->callStackTop] =
  569. interpreter->currentInstruction;
  570. interpreter->callStackTop++;
  571. interpreter->currentInstruction = instr + 4;
  572. return;
  573. }
  574. else
  575. functionID--;
  576. }
  577. instr += 2;
  578. }
  579. }
  580. uint32_t _CMN_interpreterGetPtrAddr(CMN_Interpreter *interpreter,
  581. uint8_t typeEnv, uint32_t pointerIndex)
  582. {
  583. return (pointerIndex <= CMN_LAST_SPECIAL_PTR) ?
  584. interpreter->pointers[typeEnv][0] - pointerIndex
  585. : interpreter->pointers[typeEnv][pointerIndex - CMN_LAST_SPECIAL_PTR];
  586. }
  587. uint32_t *_CMN_interpreterGetPtrForWrite(CMN_Interpreter *interpreter,
  588. uint8_t typeEnv, uint32_t pointerIndex)
  589. {
  590. uint8_t userPointer = pointerIndex > CMN_LAST_SPECIAL_PTR;
  591. return (pointerIndex == 0 || userPointer) ?
  592. &(interpreter->pointers[typeEnv][pointerIndex -
  593. CMN_LAST_SPECIAL_PTR * userPointer]) : 0;
  594. }
  595. uint8_t CMN_interpreterStep(CMN_Interpreter *interpreter, uint32_t steps)
  596. {
  597. #define _INSTR interpreter->currentInstruction
  598. #define _POP(n) \
  599. if (!(_INSTR[1] & CMN_MASK_INSTR_NOPOP)) { \
  600. uint32_t *ptrVal = &interpreter->pointers[CMN_instrTypeEnv(_INSTR)][0];\
  601. *ptrVal -= n;\
  602. if (*ptrVal >= interpreter->memorySize)\
  603. return CMN_INTERPRETER_ERROR_STACK_OF;}
  604. uint8_t infiniteSteps = steps == 0;
  605. uint8_t repeat = 0; // for skipping instructions that do nothing
  606. while (infiniteSteps || steps > 0 || repeat)
  607. {
  608. repeat = 0;
  609. uint8_t opcode = _INSTR[0];
  610. if (opcode > CMN_OPCODE_SPECIALS)
  611. { // typical stack instructions
  612. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  613. uint8_t bitsX = _CMN_typeEnvBits(typeEnv);
  614. uint8_t bitsY = bitsX;
  615. uint8_t mode = opcode & CMN_MASK_INSTR_MODE;
  616. uint64_t x = 0, y = 0, r = 0;
  617. uint8_t opcodeGroup = opcode & CMN_MASK_INSTR_GROUP;
  618. int8_t pop =
  619. (_INSTR[1] & CMN_MASK_INSTR_NOPOP) ? 0 :
  620. ((mode != CMN_OPCODE_01) + (mode == CMN_OPCODE_21));
  621. if (mode != CMN_OPCODE_01)
  622. _CMN_interpreterGetXY(interpreter,typeEnv,&x,&y);
  623. if (mode == CMN_OPCODE_1C1)
  624. {
  625. y = x;
  626. x = CMN_instrGetConst(_INSTR);
  627. if (typeEnv == 0)
  628. bitsX = 32;
  629. }
  630. switch (opcodeGroup)
  631. {
  632. case CMN_OPCODE_AD: r = y + x; break;
  633. case CMN_OPCODE_SU: r = y - x; break;
  634. case CMN_OPCODE_MU: r = y * x; break;
  635. case CMN_OPCODE_EQ: r = y == x; break;
  636. case CMN_OPCODE_NE: r = y != x; break;
  637. case CMN_OPCODE_BA: r = y & x; break;
  638. case CMN_OPCODE_BO: r = y | x; break;
  639. case CMN_OPCODE_BX: r = y ^ x; break;
  640. case CMN_OPCODE_LA: r = y && x; break;
  641. case CMN_OPCODE_LO: r = y || x; break;
  642. case CMN_OPCODE_LX: r = (y == 0) != (x == 0); break;
  643. case CMN_OPCODE_GR:
  644. case CMN_OPCODE_GE:
  645. case CMN_OPCODE_SM:
  646. case CMN_OPCODE_SE:
  647. r = _CMN_compare(y,x,!(opcode & 0x08),(opcode & 0x04) != 0);
  648. break;
  649. case CMN_OPCODE_GS:
  650. case CMN_OPCODE_BS:
  651. case CMN_OPCODE_SS:
  652. case CMN_OPCODE_LS:
  653. r = _CMN_compareSigned(CMN_unsignedToSigned32(y,bitsY),
  654. CMN_unsignedToSigned32(x,bitsX),!(opcode & 0x08),(opcode & 0x04) != 0);
  655. break;
  656. case CMN_OPCODE_DI:
  657. case CMN_OPCODE_DS:
  658. case CMN_OPCODE_MO:
  659. case CMN_OPCODE_MS:
  660. if (x == 0)
  661. return CMN_INTERPRETER_ERROR_ZERODIV;
  662. if (opcodeGroup == CMN_OPCODE_DS || opcodeGroup == CMN_OPCODE_MS)
  663. r = (opcodeGroup == CMN_OPCODE_DS) ?
  664. (CMN_unsignedToSigned32(y,bitsY) / CMN_unsignedToSigned32(x,bitsX)) :
  665. (CMN_unsignedToSigned32(y,bitsY) % CMN_unsignedToSigned32(x,bitsX));
  666. else // unsigned
  667. r = (opcodeGroup == CMN_OPCODE_DI) ? (y / x) : (y % x);
  668. break;
  669. case (CMN_OPCODE_BNO & CMN_MASK_INSTR_GROUP):
  670. r = ~x;
  671. break;
  672. case (CMN_OPCODE_ADR & CMN_MASK_INSTR_GROUP):
  673. r = interpreter->pointers[typeEnv][0];
  674. break;
  675. case (CMN_OPCODE_INU & CMN_MASK_INSTR_GROUP):
  676. r = !interpreter->inputEndReached;
  677. break;
  678. case (CMN_OPCODE_INP & CMN_MASK_INSTR_GROUP):
  679. {
  680. int16_t v =
  681. (interpreter->inputEndReached || interpreter->ioFunction == 0)
  682. ? 0 : interpreter->ioFunction(-1);
  683. if (v == -1)
  684. {
  685. interpreter->inputEndReached = 1;
  686. r = 0;
  687. }
  688. else
  689. r = v;
  690. break;
  691. }
  692. default: break;
  693. }
  694. if (!_CMN_interpreterPopPush(interpreter,typeEnv,pop,r))
  695. return CMN_INTERPRETER_ERROR_STACK_OF;
  696. }
  697. else // non-typical instructions
  698. {
  699. switch (opcode)
  700. {
  701. case CMN_OPCODE_CAE:
  702. if (interpreter->externalCallFunction != 0)
  703. interpreter->externalCallFunction(CMN_instrGetConst(_INSTR),
  704. interpreter);
  705. else
  706. return CMN_INTERPRETER_ERROR_BAD_CALL;
  707. break;
  708. case CMN_OPCODE_POP:
  709. _POP(CMN_instrGetConst(_INSTR) + 1)
  710. break;
  711. case CMN_OPCODE_CON:
  712. if (!_CMN_interpreterPopPush(interpreter,CMN_instrTypeEnv(_INSTR),
  713. 0,CMN_instrGetConst(_INSTR)))
  714. return CMN_INTERPRETER_ERROR_STACK_OF;
  715. _POP(1)
  716. break;
  717. case CMN_OPCODE_TRA:
  718. if (!_CMN_interpreterPopPush(interpreter,
  719. CMN_instrGetConst(_INSTR),1,
  720. _CMN_interpreterGetX(interpreter,CMN_instrTypeEnv(_INSTR))))
  721. return CMN_INTERPRETER_ERROR_STACK_OF;
  722. _POP(1)
  723. break;
  724. case CMN_OPCODE_CAL:
  725. if (interpreter->callStackTop >= CMN_INTERPRETER_CALLSTACK_SIZE - 1)
  726. return CMN_INTERPRETER_ERROR_CALLSTACK;
  727. interpreter->callstack[interpreter->callStackTop] = _INSTR;
  728. interpreter->callStackTop++;
  729. CMN_interpreterGoto(interpreter,CMN_instrGetConst(_INSTR) - 1);
  730. break;
  731. case CMN_OPCODE_RET:
  732. if (interpreter->callStackTop == 0)
  733. return CMN_INTERPRETER_ERROR_CALLSTACK;
  734. _INSTR = interpreter->callstack[interpreter->callStackTop - 1];
  735. interpreter->callStackTop--;
  736. break;
  737. case CMN_OPCODE_INI:
  738. {
  739. for (uint8_t i = 0; i < interpreter->argc; ++i)
  740. {
  741. const char *c = interpreter->argv[interpreter->argc - 1 - i];
  742. const char *c2 = c;
  743. while (*c2 != 0)
  744. c2++;
  745. while (c2 >= c)
  746. {
  747. interpreter->pointers[0][0]++;
  748. interpreter->memory0[interpreter->pointers[0][0]] = *c2;
  749. c2--;
  750. }
  751. }
  752. interpreter->pointers[0][0]++;
  753. interpreter->memory0[interpreter->pointers[0][0]] =
  754. interpreter->argc;
  755. break;
  756. }
  757. case CMN_OPCODE_JNA:
  758. case CMN_OPCODE_JIA:
  759. {
  760. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  761. if (interpreter->pointers[typeEnv][0] >= interpreter->memorySize)
  762. return CMN_INTERPRETER_ERROR_STACK_OF;
  763. uint8_t stop = (!_CMN_interpreterGetX(interpreter,
  764. typeEnv)) == (opcode != CMN_OPCODE_JNA);
  765. _POP(1);
  766. if (stop)
  767. break;
  768. // else continue to JMA
  769. }
  770. __attribute__((fallthrough));
  771. // ^ gcc extension removing warning, can be removed in case of trouble
  772. case CMN_OPCODE_JMA:
  773. CMN_interpreterGoto(interpreter,CMN_instrGetConst(_INSTR) - 1);
  774. break;
  775. case CMN_OPCODE_OUT:
  776. {
  777. if (interpreter->ioFunction != 0)
  778. {
  779. int16_t v = _CMN_interpreterGetX(interpreter,
  780. CMN_instrTypeEnv(_INSTR));
  781. interpreter->ioFunction(v >= 0 ? v : 0);
  782. }
  783. _POP(1);
  784. break;
  785. }
  786. #define _DOINST(cmd)\
  787. if (typeEnv == 0)\
  788. { CMN_NATIVE_UINT t,*s = interpreter->memory0 + interpreter->pointers[0][0];\
  789. (void)(t); cmd;}\
  790. else if (typeEnv == 1)\
  791. { uint8_t t,*s = interpreter->memory8 + interpreter->pointers[1][0];\
  792. (void)(t); cmd;}\
  793. else if (typeEnv == 2)\
  794. { uint16_t t,*s = interpreter->memory16 + interpreter->pointers[2][0];\
  795. (void)(t); cmd;}\
  796. else\
  797. { uint32_t t,*s = interpreter->memory32 + interpreter->pointers[3][0];\
  798. (void)(t); cmd;}\
  799. case CMN_OPCODE_SWP:
  800. {
  801. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  802. int8_t shift = (_INSTR[1] & CMN_MASK_INSTR_NOPOP) ? 2 : 0;
  803. _DOINST(t = *s; *(s + shift) = *(s - 1);*(s + shift - 1) = t;)
  804. interpreter->pointers[typeEnv][0] += shift;
  805. break;
  806. }
  807. case CMN_OPCODE_PCM:
  808. {
  809. uint64_t c1, c2;
  810. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  811. CMN_instrGetConsts(_INSTR,&c1,&c2);
  812. c1 = _CMN_interpreterGetPtrAddr(interpreter,typeEnv,c1);
  813. c2 = _CMN_interpreterGetPtrAddr(interpreter,typeEnv,c2);
  814. c1 = c1 == c2 ? 0 : (1 + (c1 > c2));
  815. interpreter->pointers[typeEnv][0]++;
  816. _DOINST(*s = c1;)
  817. break;
  818. }
  819. case CMN_OPCODE_CND:
  820. {
  821. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  822. int8_t shift = 1 - ((_INSTR[1] & CMN_MASK_INSTR_NOPOP) ? 0 : 3);
  823. _DOINST(*(s + shift) = *(s - 2) ? *(s - 1) : *s)
  824. interpreter->pointers[typeEnv][0] += shift;
  825. break;
  826. }
  827. case CMN_OPCODE_PSC:
  828. case CMN_OPCODE_PAC:
  829. {
  830. uint64_t c1, c2;
  831. uint32_t *p;
  832. CMN_instrGetConsts(_INSTR,&c1,&c2);
  833. p = _CMN_interpreterGetPtrForWrite(interpreter,
  834. CMN_instrTypeEnv(_INSTR),c1);
  835. if (p != 0)
  836. *p = (opcode == CMN_OPCODE_PSC) ? c2 : (*p +
  837. CMN_unsignedToSigned32(c2,4));
  838. break;
  839. }
  840. case CMN_OPCODE_PAX:
  841. {
  842. uint32_t pointer = CMN_instrGetConst(_INSTR);
  843. uint32_t *p = _CMN_interpreterGetPtrForWrite(interpreter,
  844. CMN_instrTypeEnv(_INSTR),pointer);
  845. if (p != 0)
  846. {
  847. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  848. *p += CMN_unsignedToSigned32(_CMN_interpreterGetX(interpreter,
  849. typeEnv),_CMN_typeEnvBits(typeEnv));
  850. if (pointer != 0)
  851. _POP(1);
  852. }
  853. break;
  854. }
  855. case CMN_OPCODE_PCO:
  856. {
  857. uint64_t c1, c2;
  858. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  859. CMN_instrGetConsts(_INSTR,&c1,&c2);
  860. uint32_t *p = _CMN_interpreterGetPtrForWrite(interpreter,typeEnv,c1);
  861. if (p != 0)
  862. *p = _CMN_interpreterGetPtrAddr(interpreter,typeEnv,c2);
  863. else
  864. return CMN_INTERPRETER_ERROR_OPERATION;
  865. break;
  866. }
  867. case CMN_OPCODE_MEX:
  868. {
  869. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  870. uint64_t val, addr;
  871. val = _CMN_interpreterGetX(interpreter,typeEnv);
  872. addr = CMN_instrGetConst(_INSTR);
  873. addr = _CMN_interpreterGetPtrAddr(interpreter,typeEnv,addr);
  874. if (addr >= interpreter->memorySize)
  875. return CMN_INTERPRETER_ERROR_MEMORY;
  876. if (typeEnv == 0)
  877. interpreter->memory0[addr] = val;
  878. else if (typeEnv == 1)
  879. interpreter->memory8[addr] = val;
  880. else if (typeEnv == 2)
  881. interpreter->memory16[addr] = val;
  882. else
  883. interpreter->memory32[addr] = val;
  884. _POP(1)
  885. break;
  886. }
  887. case CMN_OPCODE_MGE:
  888. case CMN_OPCODE_PUX:
  889. {
  890. uint8_t typeEnv = CMN_instrTypeEnv(_INSTR);
  891. uint32_t p;
  892. p = (opcode == CMN_OPCODE_MGE) ?
  893. _CMN_interpreterGetPtrAddr(
  894. interpreter,typeEnv,CMN_instrGetConst(_INSTR))
  895. :
  896. (_CMN_interpreterGetPtrAddr(interpreter,typeEnv,0)
  897. - _CMN_interpreterGetX(interpreter,typeEnv));
  898. if (opcode == CMN_OPCODE_MGE || (_INSTR[1] & CMN_MASK_INSTR_NOPOP))
  899. interpreter->pointers[typeEnv][0]++;
  900. if (p >= interpreter->memorySize)
  901. return CMN_INTERPRETER_ERROR_STACK_OF;
  902. if (typeEnv == 0)
  903. interpreter->memory0[interpreter->pointers[0][0]] =
  904. interpreter->memory0[p];
  905. else if (typeEnv == 1)
  906. interpreter->memory8[interpreter->pointers[1][0]] =
  907. interpreter->memory8[p];
  908. else if (typeEnv == 2)
  909. interpreter->memory16[interpreter->pointers[2][0]] =
  910. interpreter->memory16[p];
  911. else
  912. interpreter->memory32[interpreter->pointers[3][0]] =
  913. interpreter->memory32[p];
  914. break;
  915. }
  916. #undef _DOINST
  917. case CMN_OPCODE_END:
  918. _INSTR -= 2; /* has to be here because even after end API can be used
  919. to call a function and that has to return here */
  920. return CMN_INTERPRETER_END; break;
  921. case CMN_OPCODE_ERR: return CMN_INTERPRETER_ERROR_THROW; break;
  922. case CMN_OPCODE_NOP:
  923. case CMN_OPCODE_DES:
  924. case CMN_OPCODE_COC:
  925. repeat = 1;
  926. break;
  927. default: return CMN_INTERPRETER_ERROR_BC_OPCODE; break;
  928. }
  929. }
  930. _INSTR += 2;
  931. if (!repeat)
  932. {
  933. steps--;
  934. interpreter->step++;
  935. }
  936. }
  937. return CMN_INTERPRETER_OK;
  938. #undef _POP
  939. #undef _INSTR
  940. }
  941. void CMN_instrGetConsts(const uint8_t *instr, uint64_t *c1, uint64_t *c2)
  942. {
  943. uint8_t bits = CMN_instrGetConstBits(instr) / 2;
  944. *c1 = CMN_instrGetConst(instr);
  945. *c2 = (*c1) >> bits;
  946. *c1 &= ~(0xffffffff << bits);
  947. }
  948. uint64_t CMN_instrGetConst(const uint8_t *instr)
  949. {
  950. instr++;
  951. uint64_t result = (*instr) & 0x0f;
  952. uint8_t shift = 0;
  953. while ((*instr) & CMN_MASK_INSTR_CON)
  954. {
  955. instr += 2;
  956. shift += 4;
  957. result |= ((uint64_t) ((*instr) & 0x0f)) << shift;
  958. }
  959. return result;
  960. }
  961. uint8_t CMN_instrGetConstBits(const uint8_t *instr)
  962. {
  963. instr++;
  964. uint8_t result = 4;
  965. while ((*instr) & CMN_MASK_INSTR_CON)
  966. {
  967. instr += 2;
  968. result += 4;
  969. }
  970. return result;
  971. }
  972. uint8_t CMN_interpreterInit(CMN_Interpreter *interpreter,
  973. const uint8_t *bytecode, uint8_t *memory, uint32_t memorySize,
  974. uint16_t minCells, CMN_IOFunction ioFunction,
  975. CMN_ExternalCallFunction externalCallFunction,
  976. uint8_t argc, const char **argv)
  977. {
  978. interpreter->bytecode = bytecode;
  979. interpreter->currentInstruction = bytecode + CMN_BYTECODE_HEADER_SIZE;
  980. interpreter->ioFunction = ioFunction;
  981. interpreter->externalCallFunction = externalCallFunction;
  982. interpreter->inputEndReached = 0;
  983. interpreter->callStackTop = 0;
  984. interpreter->memorySize = 0;
  985. interpreter->step = 0;
  986. interpreter->argc = argc;
  987. interpreter->argv = argv;
  988. uint8_t environments = 0x01; /* We make env 0 be always present, even if no
  989. instructions access it, because it may be
  990. used by the programmer via API functions. */
  991. uint16_t maxPointers = 0;
  992. uint32_t mems[4], ptrs[4];
  993. uint16_t argLen = 1; // for pushing argc
  994. CMN_estimateMemory(bytecode,minCells,mems,ptrs);
  995. if (argc > 0) // add space needed for arguments
  996. {
  997. for (uint8_t i = 0; i < argc; ++i)
  998. {
  999. const char *arg = argv[i];
  1000. argLen++; // terminating 0
  1001. while (*arg != 0)
  1002. {
  1003. argLen++;
  1004. arg++;
  1005. }
  1006. }
  1007. }
  1008. mems[0] += argLen;
  1009. for (uint8_t i = 0; i < 4; ++i)
  1010. {
  1011. if (mems[i] != 0 || ptrs[i] != 0)
  1012. environments |= 0x01 << i;
  1013. if (mems[i] > interpreter->memorySize)
  1014. interpreter->memorySize = mems[i];
  1015. if (ptrs[i] > maxPointers)
  1016. maxPointers = ptrs[i];
  1017. }
  1018. uint8_t *pointerEnd = memory;
  1019. interpreter->memory0 = 0;
  1020. interpreter->memory8 = 0;
  1021. interpreter->memory16 = 0;
  1022. interpreter->memory32 = 0;
  1023. for (uint8_t i = 0; i < 4; ++i)
  1024. if (environments & (0x01 << i))
  1025. {
  1026. interpreter->pointers[i] = (uint32_t *) pointerEnd;
  1027. pointerEnd += sizeof(uint32_t) * maxPointers;
  1028. #define _DO_SET(m,t)\
  1029. interpreter->m = (t *) pointerEnd;\
  1030. pointerEnd += sizeof(t) * interpreter->memorySize;\
  1031. break;
  1032. switch (i)
  1033. {
  1034. case 0: _DO_SET(memory0,CMN_NATIVE_UINT)
  1035. case 1: _DO_SET(memory8,uint8_t)
  1036. case 2: _DO_SET(memory16,uint16_t)
  1037. case 3: _DO_SET(memory32,uint32_t)
  1038. default: break;
  1039. }
  1040. #undef _DO_SET
  1041. }
  1042. else
  1043. interpreter->pointers[i] = 0;
  1044. if (pointerEnd - memory > memorySize)
  1045. return 0;
  1046. for (uint32_t i = 0; i != memorySize; ++i)
  1047. memory[i] = 0;
  1048. return 1;
  1049. }
  1050. CMN_NATIVE_UINT CMN_interpreterGetValue(CMN_Interpreter *interpreter,
  1051. uint8_t stackTopOffset)
  1052. {
  1053. return stackTopOffset <= interpreter->pointers[0][0] ?
  1054. interpreter->memory0[interpreter->pointers[0][0] - stackTopOffset] : 0;
  1055. }
  1056. void CMN_interpreterPush(CMN_Interpreter *interpreter, CMN_NATIVE_UINT value)
  1057. {
  1058. if (interpreter->pointers[0][0] >= interpreter->memorySize - 1)
  1059. return;
  1060. interpreter->pointers[0][0]++;
  1061. interpreter->memory0[interpreter->pointers[0][0]] = value;
  1062. }
  1063. void CMN_interpreterPop(CMN_Interpreter *interpreter, uint8_t n)
  1064. {
  1065. interpreter->pointers[0][0] -= n <= interpreter->pointers[0][0] ? n : 0;
  1066. }
  1067. void CMN_instrToStr(const uint8_t *instruction, char string[16])
  1068. {
  1069. static const char names[] =
  1070. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  1071. /*0*/ "ENDNOPDESCOCERR CALCAERETJIAJNAJMA INI"
  1072. /*1*/ "PSCPACPAXPCOMEXMGEPUXPCM CONCNDSWPTRAPOPOUT"
  1073. /*2*/ "ADXADC SUXSUC MUXMUC DIXDICX "
  1074. /*3*/ "DSXDSC MOXMOC MSXMSC "
  1075. /*4*/ "GRXGRC GEXGEC SMXSMC SEXSECX "
  1076. /*5*/ "GSXGSC BSXBSC SSXSSC LSXLSCX "
  1077. /*6*/ "EQXEQC NEXNEC BAXBAC BOXBOCX "
  1078. /*7*/ "BXXBXC LAXLAC LOXLOC LXXLXCX "
  1079. /*8*/ " BNO "
  1080. /*9 - e: empty */
  1081. /*f*/ " ADR INU INP";
  1082. uint16_t nameIndex = *instruction * 3 - (*instruction >= 0x90) * (6 * 3 * 16);
  1083. string[0] = names[nameIndex];
  1084. string[1] = names[nameIndex + 1];
  1085. string[2] = names[nameIndex + 2];
  1086. string[3] = (instruction[1] & CMN_MASK_INSTR_NOPOP) ? '\'' : ' ';
  1087. string[4] = ' ';
  1088. uint8_t env = instruction[1] >> 6;
  1089. env = env * 8 + 8 * (env == 3);
  1090. string[5] = '0' + env / 10;
  1091. string[6] = '0' + env % 10;
  1092. string[7] = ' ';
  1093. for (uint8_t i = 0; i < 4; ++i)
  1094. string[8 + i] = '0' + ((instruction[1] & (0x08 >> i)) != 0);
  1095. if (instruction[1] & CMN_MASK_INSTR_CON)
  1096. {
  1097. string[12] = '.'; string[13] = '.'; string[14] = '.'; string[15] = 0;
  1098. }
  1099. else
  1100. string[12] = 0;
  1101. }
  1102. void CMN_tokenizerInit(CMN_Tokenizer *tokenizer)
  1103. {
  1104. tokenizer->state = _CMN_TOKENIZER_BLANK;
  1105. tokenizer->tokenStringPos = 0;
  1106. }
  1107. uint64_t CMN_literalValue(const char *literalString, uint8_t *negative,
  1108. uint8_t *ok)
  1109. {
  1110. uint64_t result = 0, prev = 0;
  1111. uint8_t isNegative = 0;
  1112. uint8_t base = 10;
  1113. if (ok != 0)
  1114. *ok = 1;
  1115. if (*literalString == '+' || *literalString == '-')
  1116. {
  1117. isNegative = *literalString == '-';
  1118. literalString++;
  1119. }
  1120. switch (*literalString)
  1121. {
  1122. case 'x': base = 16;
  1123. __attribute__((fallthrough)); // let fall through
  1124. case 'd': literalString++; break;
  1125. case 'b': base = 2; literalString++; break;
  1126. default: break;
  1127. }
  1128. while (*literalString != 0)
  1129. {
  1130. result *= base;
  1131. if (ok != 0 && prev > result) // overflow?
  1132. *ok = 0;
  1133. result += ((*literalString >= '0' && *literalString <= '9') ?
  1134. (*literalString - '0') : (*literalString - 'a' + 10));
  1135. prev = result;
  1136. literalString++;
  1137. }
  1138. if (negative != 0)
  1139. *negative = isNegative;
  1140. return !isNegative ? result : ((0xffffffffffffffff - result) + 1);
  1141. }
  1142. uint8_t CMN_identifyToken(const char *tokenString)
  1143. {
  1144. if (tokenString[0] == '.' && tokenString[1] == 0)
  1145. return CMN_TOKEN_END;
  1146. else if (tokenString[0] == ';' && tokenString[1] == 0)
  1147. return CMN_TOKEN_ELSE;
  1148. else if (tokenString[0] == '?' &&
  1149. (tokenString[1] == 0 || tokenString[1] == '\''))
  1150. return CMN_TOKEN_BRANCH;
  1151. else if (tokenString[0] == '@' &&
  1152. (tokenString[1] == 0 ||
  1153. ((tokenString[1] == '\'' || tokenString[1] == '@')
  1154. && tokenString[2] == 0)))
  1155. return CMN_TOKEN_LOOP;
  1156. else if (tokenString[0] == '!' && tokenString[1] == '@' &&
  1157. tokenString[2] == 0)
  1158. return CMN_TOKEN_BREAK;
  1159. else if (tokenString[0] == '"')
  1160. {
  1161. tokenString++;
  1162. while (1)
  1163. {
  1164. if (tokenString[0] == 0)
  1165. return CMN_TOKEN_ERROR;
  1166. else if (tokenString[0] == '"')
  1167. return (tokenString[1] == 0) ? CMN_TOKEN_STRING : CMN_TOKEN_ERROR;
  1168. tokenString++;
  1169. }
  1170. }
  1171. else if ((tokenString[0] == '+' || tokenString[0] == '-') &&
  1172. ((tokenString[1] <= '9' && tokenString[1] >= '0') ||
  1173. (tokenString[1] <= 'z' && tokenString[1] >= 'a')))
  1174. {
  1175. tokenString++;
  1176. uint8_t base = 1;
  1177. if (tokenString[0] == 'd')
  1178. {
  1179. tokenString++;
  1180. }
  1181. else if (tokenString[0] == 'x')
  1182. {
  1183. base = 2;
  1184. tokenString++;
  1185. }
  1186. else if (tokenString[0] == 'b')
  1187. {
  1188. base = 0;
  1189. tokenString++;
  1190. }
  1191. if (tokenString[0] == 0)
  1192. return CMN_TOKEN_ERROR;
  1193. do
  1194. {
  1195. uint8_t charClass = 0;
  1196. if (tokenString[0] >= 'a' && tokenString[0] <= 'f')
  1197. charClass = 2;
  1198. else if (tokenString[0] >= '0' && tokenString[0] <= '9')
  1199. charClass = tokenString[0] >= '2';
  1200. else
  1201. return CMN_TOKEN_ERROR;
  1202. if (charClass > base)
  1203. return CMN_TOKEN_ERROR;
  1204. tokenString++;
  1205. } while (tokenString[0] != 0);
  1206. return CMN_TOKEN_NUMBER;
  1207. }
  1208. else if (tokenString[0] >= '0' && tokenString[0] <= '9')
  1209. {
  1210. do
  1211. {
  1212. if (tokenString[0] < '0' || tokenString[0] > '9')
  1213. return CMN_TOKEN_ERROR;
  1214. tokenString++;
  1215. } while (tokenString[0] != 0);
  1216. return CMN_TOKEN_NUMBER;
  1217. }
  1218. else if (tokenString[0] == '~' && tokenString[1] == ':')
  1219. {
  1220. return CMN_identifyToken(tokenString + 2) == CMN_TOKEN_NAME ?
  1221. CMN_TOKEN_LABEL : CMN_TOKEN_ERROR;
  1222. }
  1223. else
  1224. {
  1225. uint8_t result = CMN_TOKEN_NAME;
  1226. do
  1227. {
  1228. if (tokenString[0] <= ' ' || tokenString[0] == '#')
  1229. return CMN_TOKEN_ERROR;
  1230. else if (
  1231. (tokenString[0] > '9' || tokenString[0] < '0') &&
  1232. (tokenString[0] > 'z' || tokenString[0] < 'a') &&
  1233. (tokenString[0] > 'Z' || tokenString[0] < 'A') &&
  1234. tokenString[0] != '_')
  1235. result = (tokenString[0] == ':' && tokenString[1] == 0) ?
  1236. CMN_TOKEN_FUNC : CMN_TOKEN_COMMAND;
  1237. tokenString++;
  1238. } while (tokenString[0] != 0);
  1239. return result;
  1240. }
  1241. return CMN_TOKEN_ERROR;
  1242. }
  1243. uint8_t CMN_tokenizerFeedChar(CMN_Tokenizer *tokenizer, char character)
  1244. {
  1245. uint8_t append = 0, token = 0;
  1246. #define _BLANK(x) (x == ' ' || x == '\n' || x == '\t' || x == ']' || x == '[' || x == 0)
  1247. switch (tokenizer->state)
  1248. {
  1249. case _CMN_TOKENIZER_BLANK:
  1250. if (character == '"')
  1251. {
  1252. tokenizer->state = _CMN_TOKENIZER_STR;
  1253. append = 1;
  1254. }
  1255. else if (character == '#')
  1256. tokenizer->state = _CMN_TOKENIZER_COMMENT;
  1257. else if (!_BLANK(character))
  1258. {
  1259. tokenizer->state = _CMN_TOKENIZER_NAME;
  1260. append = 1;
  1261. }
  1262. break;
  1263. case _CMN_TOKENIZER_NAME:
  1264. if (_BLANK(character))
  1265. {
  1266. tokenizer->state = _CMN_TOKENIZER_BLANK;
  1267. token = 1;
  1268. }
  1269. else if (character == '#')
  1270. {
  1271. tokenizer->state = _CMN_TOKENIZER_COMMENT;
  1272. token = 1;
  1273. }
  1274. else
  1275. append = 1;
  1276. break;
  1277. case _CMN_TOKENIZER_STR_END:
  1278. if (character == '#')
  1279. tokenizer->state = _CMN_TOKENIZER_COMMENT;
  1280. else if (_BLANK(character))
  1281. tokenizer->state = _CMN_TOKENIZER_BLANK;
  1282. else
  1283. tokenizer->state = CMN_TOKENIZER_ERROR;
  1284. break;
  1285. case _CMN_TOKENIZER_COMMENT:
  1286. if (character == '#' || character == '\n')
  1287. tokenizer->state = _CMN_TOKENIZER_BLANK;
  1288. break;
  1289. case _CMN_TOKENIZER_STR:
  1290. append = 1;
  1291. if (character == '"')
  1292. {
  1293. tokenizer->state = _CMN_TOKENIZER_STR_END;
  1294. token = 1;
  1295. }
  1296. break;
  1297. default:
  1298. break;
  1299. }
  1300. if (append)
  1301. {
  1302. if (tokenizer->tokenStringPos < CMN_TOKEN_MAX_LENGTH - 1)
  1303. {
  1304. tokenizer->tokenString[tokenizer->tokenStringPos] = character;
  1305. tokenizer->tokenStringPos++;
  1306. }
  1307. else
  1308. tokenizer->state = CMN_TOKENIZER_ERROR;
  1309. }
  1310. if (tokenizer->state == CMN_TOKENIZER_ERROR)
  1311. return CMN_TOKENIZER_ERROR;
  1312. else if (token)
  1313. {
  1314. tokenizer->tokenString[tokenizer->tokenStringPos] = 0;
  1315. tokenizer->tokenStringPos = 0;
  1316. return CMN_TOKENIZER_TOKEN;
  1317. }
  1318. return CMN_TOKENIZER_NOTHING;
  1319. #undef _BLANK
  1320. }
  1321. void CMN_compilerInit(CMN_Compiler *compiler, uint8_t *bytecode,
  1322. uint32_t bytecodeMaxSize, char *symbolTableMemory, uint32_t symbolTableSize,
  1323. CMN_FileIncludeFunction includeFunction)
  1324. {
  1325. CMN_tokenizerInit(&compiler->tokenizer);
  1326. compiler->bytecode = bytecode;
  1327. compiler->bytecodeLimit = bytecode + bytecodeMaxSize;
  1328. compiler->bytecodeEnd = bytecode + CMN_BYTECODE_HEADER_SIZE;
  1329. compiler->currentTypeEnv = 0;
  1330. compiler->implicitAddressSize = 5; // reasonable magic constant
  1331. compiler->parseStackTop = 0;
  1332. compiler->state = CMN_COMPILER_OK;
  1333. compiler->symbolTable = symbolTableMemory;
  1334. compiler->symbolTableSize = symbolTableSize / CMN_STRING_PSEUDOHASH_SIZE;
  1335. compiler->symbolCount = 0;
  1336. compiler->flags = 0;
  1337. compiler->includeFunction = includeFunction;
  1338. }
  1339. uint8_t _CMN_constSegmentsNeeded(uint64_t c)
  1340. {
  1341. uint8_t r = 1;
  1342. while (1)
  1343. {
  1344. c >>= 4;
  1345. if (c == 0)
  1346. break;
  1347. r++;
  1348. }
  1349. return r;
  1350. }
  1351. uint8_t *_CMN_compilerFillConsts(CMN_Compiler *compiler, uint8_t *address,
  1352. uint32_t c1, uint32_t c2)
  1353. {
  1354. if (compiler != 0 && compiler->state != CMN_COMPILER_OK)
  1355. return address;
  1356. int8_t count1 = _CMN_constSegmentsNeeded(c1),
  1357. count2 = _CMN_constSegmentsNeeded(c2);
  1358. if (count2 > count1)
  1359. count1 = count2;
  1360. for (uint8_t i = 0; i < 2; ++i)
  1361. {
  1362. count2 = count1;
  1363. address[1] &= ~(CMN_MASK_INSTR_CON | 0x0f);
  1364. if (i != 0)
  1365. {
  1366. if (address >= compiler->bytecodeLimit)
  1367. {
  1368. compiler->state = CMN_COMPILER_ERROR_BYTECODE_TOO_BIG;
  1369. return address;
  1370. }
  1371. *address = CMN_OPCODE_COC;
  1372. *(address - 1) |= CMN_MASK_INSTR_CON;
  1373. }
  1374. while (1)
  1375. {
  1376. if (compiler != 0 && address + 3 >= compiler->bytecodeLimit)
  1377. {
  1378. compiler->state = CMN_COMPILER_ERROR_BYTECODE_TOO_BIG;
  1379. return address;
  1380. }
  1381. address++;
  1382. *address |= c1 & 0x0000000f;
  1383. address++;
  1384. count2--;
  1385. if (count2 <= 0)
  1386. break;
  1387. c1 >>= 4;
  1388. *(address - 1) |= CMN_MASK_INSTR_CON;
  1389. *address = CMN_OPCODE_COC;
  1390. *(address + 1) = 0;
  1391. }
  1392. c1 = c2;
  1393. }
  1394. return address;
  1395. }
  1396. /** Fills constant to given bytecode address, potentially adding COCs depending
  1397. on size of the constant, returns the address right after this. */
  1398. uint8_t *_CMN_compilerFillConst(CMN_Compiler *compiler, uint8_t *address,
  1399. uint64_t con)
  1400. {
  1401. if (compiler != 0 && compiler->state != CMN_COMPILER_OK)
  1402. return 0;
  1403. address[1] &= ~(CMN_MASK_INSTR_CON | 0x0f);
  1404. int8_t count = _CMN_constSegmentsNeeded(con);
  1405. while (1)
  1406. {
  1407. if (compiler != 0 && address + 3 >= compiler->bytecodeLimit)
  1408. {
  1409. compiler->state = CMN_COMPILER_ERROR_BYTECODE_TOO_BIG;
  1410. return 0;
  1411. }
  1412. address++;
  1413. *address |= con & 0x0000000f;
  1414. address++;
  1415. count--;
  1416. if (count <= 0)
  1417. break;
  1418. con >>= 4;
  1419. *(address - 1) |= CMN_MASK_INSTR_CON;
  1420. *address = CMN_OPCODE_COC;
  1421. *(address + 1) = 0;
  1422. }
  1423. return address;
  1424. }
  1425. void _CMN_compilerParseStackPush(CMN_Compiler *compiler, uint32_t addr)
  1426. {
  1427. if (compiler->parseStackTop >= CMN_PARSE_STACK_SIZE)
  1428. {
  1429. compiler->state = CMN_COMPILER_ERROR_PARSE_STACK;
  1430. return;
  1431. }
  1432. compiler->parseStack[compiler->parseStackTop] = addr;
  1433. compiler->parseStackTop++;
  1434. }
  1435. uint32_t _CMN_compilerPointerToAddr(const CMN_Compiler *compiler,
  1436. const uint8_t *ptr)
  1437. {
  1438. return ((ptr - compiler->bytecode) - CMN_BYTECODE_HEADER_SIZE) / 2;
  1439. }
  1440. uint8_t *_CMN_compilerAddrToPointer(const CMN_Compiler *compiler,
  1441. uint32_t addr)
  1442. {
  1443. return compiler->bytecode + CMN_BYTECODE_HEADER_SIZE + 2 * addr;
  1444. }
  1445. void CMN_pseudohash(char typeChar, const char *str,
  1446. char hash[CMN_STRING_PSEUDOHASH_SIZE])
  1447. {
  1448. hash[0] = typeChar;
  1449. uint8_t i = 0;
  1450. while (i < CMN_STRING_PSEUDOHASH_SIZE - 1 && str[i] != 0)
  1451. {
  1452. hash[i + 1] = str[i];
  1453. i++;
  1454. }
  1455. if (i < CMN_STRING_PSEUDOHASH_SIZE - 1)
  1456. {
  1457. while (i < CMN_STRING_PSEUDOHASH_SIZE - 1)
  1458. {
  1459. hash[i + 1] = 0;
  1460. i++;
  1461. }
  1462. }
  1463. else
  1464. {
  1465. uint8_t vlen = 0;
  1466. uint8_t vsum = 0;
  1467. uint8_t vmul = 0;
  1468. while (*str != 0)
  1469. {
  1470. vlen++;
  1471. vsum += *str;
  1472. vmul *= (vmul + 1);
  1473. str++;
  1474. }
  1475. vlen = _CMN_numPseudohash(vlen);
  1476. vsum = _CMN_numPseudohash(vsum);
  1477. vmul = _CMN_numPseudohash(vmul);
  1478. hash[CMN_STRING_PSEUDOHASH_SIZE - 1] = vmul;
  1479. hash[CMN_STRING_PSEUDOHASH_SIZE - 2] = vsum;
  1480. hash[CMN_STRING_PSEUDOHASH_SIZE - 3] = vlen;
  1481. }
  1482. }
  1483. int32_t CMN_compilerAddSymbol(CMN_Compiler *compiler,
  1484. char symbol[CMN_STRING_PSEUDOHASH_SIZE])
  1485. {
  1486. int32_t index = CMN_compilerFindSymbol(compiler,symbol);
  1487. if (index >= 0)
  1488. return index;
  1489. if (compiler->symbolCount >= compiler->symbolTableSize)
  1490. {
  1491. compiler->state = CMN_COMPILER_ERROR_SYMBOL_TABLE;
  1492. return -1;
  1493. }
  1494. index = 0;
  1495. char *m = compiler->symbolTable;
  1496. for (uint16_t i = 0; i < compiler->symbolCount; ++i)
  1497. {
  1498. if (m[0] == symbol[0])
  1499. index++;
  1500. m += CMN_STRING_PSEUDOHASH_SIZE;
  1501. }
  1502. for (uint8_t i = 0; i < CMN_STRING_PSEUDOHASH_SIZE; ++i)
  1503. m[i] = symbol[i];
  1504. compiler->symbolCount++;
  1505. return index;
  1506. }
  1507. uint8_t CMN_compilerGetSymbol(const CMN_Compiler *compiler, char typeChar,
  1508. uint32_t id, char symbol[CMN_STRING_PSEUDOHASH_SIZE])
  1509. {
  1510. uint32_t index = 0;
  1511. while (index < compiler->symbolCount)
  1512. {
  1513. if (compiler->symbolTable[index * CMN_STRING_PSEUDOHASH_SIZE] == typeChar)
  1514. {
  1515. if (id == 0)
  1516. {
  1517. for (uint8_t i = 0; i < CMN_STRING_PSEUDOHASH_SIZE; ++i)
  1518. symbol[i] =
  1519. compiler->symbolTable[index * CMN_STRING_PSEUDOHASH_SIZE + i];
  1520. return 1;
  1521. }
  1522. id--;
  1523. }
  1524. index++;
  1525. }
  1526. return 0;
  1527. }
  1528. int32_t CMN_compilerFindFunction(const CMN_Compiler *compiler,
  1529. const char *funcName, uint8_t isExternal)
  1530. {
  1531. char h[CMN_STRING_PSEUDOHASH_SIZE];
  1532. CMN_pseudohash(isExternal ? 'e' : 'f',funcName,h);
  1533. return CMN_compilerFindSymbol(compiler,h);
  1534. }
  1535. int32_t CMN_compilerFindSymbol(const CMN_Compiler *compiler,
  1536. char symbol[CMN_STRING_PSEUDOHASH_SIZE])
  1537. {
  1538. const char *m = compiler->symbolTable;
  1539. int32_t result = 0;
  1540. for (uint32_t i = 0; i < compiler->symbolCount; ++i)
  1541. {
  1542. if (symbol[0] == m[0])
  1543. {
  1544. uint8_t matches = 1;
  1545. for (uint8_t j = 1; j < CMN_STRING_PSEUDOHASH_SIZE; ++j)
  1546. if (symbol[j] != m[j])
  1547. {
  1548. matches = 0;
  1549. break;
  1550. }
  1551. if (matches)
  1552. return result;
  1553. result++;
  1554. }
  1555. m += CMN_STRING_PSEUDOHASH_SIZE;
  1556. }
  1557. return -1;
  1558. }
  1559. uint8_t *_CMN_compilerFindNthDes(CMN_Compiler *compiler, uint16_t n, uint8_t d)
  1560. {
  1561. uint8_t *instr = compiler->bytecode + CMN_BYTECODE_HEADER_SIZE;
  1562. while (1)
  1563. {
  1564. if (*instr == CMN_OPCODE_DES && ((*(instr + 1) & 0x0f) == d))
  1565. {
  1566. if (n == 0)
  1567. break;
  1568. else
  1569. n--;
  1570. }
  1571. instr += 2;
  1572. }
  1573. instr += 2; // skip the DES
  1574. if (d == CMN_DES_FUNC) // with functions also skip the initial jump
  1575. {
  1576. instr += 2;
  1577. while ((*instr == CMN_OPCODE_COC) || (*instr == CMN_OPCODE_NOP))
  1578. instr += 2;
  1579. }
  1580. return instr;
  1581. }
  1582. void _CMN_decodePtrSizesFromSymbol(uint8_t *typeEnv, uint32_t *ptrIndex,
  1583. uint32_t *ptrSize, const char symbol[CMN_STRING_PSEUDOHASH_SIZE])
  1584. {
  1585. *typeEnv = symbol[1] - '0';
  1586. *ptrIndex =
  1587. symbol[2] - '!' + (symbol[3] - '!') * 64 + (symbol[4] - '!') * 64 * 64;
  1588. *ptrSize =
  1589. symbol[5] - '!' + (symbol[6] - '!') * 64 + (symbol[7] - '!') * 64 * 64;
  1590. }
  1591. void _CMN_encodePtrSizeAsSymbol(uint8_t typeEnv, uint32_t ptrIndex,
  1592. uint32_t ptrSize, char symbol[CMN_STRING_PSEUDOHASH_SIZE])
  1593. {
  1594. char *c = symbol;
  1595. *c = 's'; c++;
  1596. *c = '0' + typeEnv; c++;
  1597. for (uint8_t i = 0; i < 3; ++i)
  1598. {
  1599. *c = '!' + ptrIndex % 64;
  1600. *(c + 3) = '!' + ptrSize % 64;
  1601. ptrIndex /= 64;
  1602. ptrSize /= 64;
  1603. c++;
  1604. }
  1605. c += 3;
  1606. while (c < symbol + CMN_STRING_PSEUDOHASH_SIZE)
  1607. {
  1608. *c = 0;
  1609. c++;
  1610. }
  1611. }
  1612. #define _TOK_STR (compiler->tokenizer.tokenString)
  1613. #define _BC_END (compiler->bytecodeEnd)
  1614. void _CMN_compilerAppendInstr(CMN_Compiler *compiler, uint8_t opcode,
  1615. uint8_t params)
  1616. {
  1617. if (_BC_END >= compiler->bytecodeLimit - 1)
  1618. compiler->state = CMN_COMPILER_ERROR_BYTECODE_TOO_BIG;
  1619. else
  1620. {
  1621. *_BC_END = opcode;
  1622. _BC_END++;
  1623. *_BC_END = params;
  1624. _BC_END++;
  1625. }
  1626. }
  1627. #define _APPEND_I(opcode,params) \
  1628. _CMN_compilerAppendInstr(compiler,opcode,params);
  1629. #define _TYPE_ENV (compiler->currentTypeEnv)
  1630. void _CMN_compilerHandleJump(CMN_Compiler *compiler,
  1631. char *token, uint8_t opcode, uint8_t des, char typeChar1, char typeChar2)
  1632. {
  1633. char symbol[CMN_STRING_PSEUDOHASH_SIZE];
  1634. CMN_pseudohash(typeChar1,token,symbol);
  1635. int32_t index = CMN_compilerFindSymbol(compiler,symbol);
  1636. if (index >= 0) // destination (func. or label) already declared
  1637. {
  1638. _APPEND_I(opcode,0);
  1639. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,
  1640. _CMN_compilerPointerToAddr(compiler,
  1641. _CMN_compilerFindNthDes(compiler,index,des)));
  1642. }
  1643. else // destination not declared yet, leave address resolution for later
  1644. {
  1645. /* hack: we use the CMN_MASK_INSTR_NOPOP bit to indicate the address
  1646. has not yet been resolved so that we know it later */
  1647. _APPEND_I(opcode,CMN_IPARAMS(0,1,0,0));
  1648. uint8_t *tmp = _BC_END - 2;
  1649. symbol[0] = typeChar2;
  1650. index = CMN_compilerAddSymbol(compiler,symbol);
  1651. for (uint8_t i = 0; i < compiler->implicitAddressSize - 1; ++i)
  1652. _APPEND_I(CMN_OPCODE_NOP,0)
  1653. _CMN_compilerFillConst(compiler,tmp,index); // store the index
  1654. }
  1655. }
  1656. uint8_t CMN_compilerFeedChar(CMN_Compiler *compiler, char character)
  1657. {
  1658. #define _CMN_COMPILER_FLAG_INIT_FUNC_NEEDED 1
  1659. #define _CMN_COMPILER_FLAG_COMMANDS_STARTED 2
  1660. char symbol[CMN_STRING_PSEUDOHASH_SIZE];
  1661. if (compiler->state != CMN_COMPILER_OK)
  1662. return compiler->state;
  1663. switch (CMN_tokenizerFeedChar(&compiler->tokenizer,character))
  1664. {
  1665. case CMN_TOKENIZER_TOKEN:
  1666. {
  1667. uint8_t token = CMN_identifyToken(_TOK_STR);
  1668. if (!(compiler->flags & _CMN_COMPILER_FLAG_COMMANDS_STARTED) &&
  1669. compiler->parseStackTop == 0 && token != CMN_TOKEN_FUNC)
  1670. {
  1671. // first top-level command found, reserve space for calling init func.
  1672. for (uint8_t i = 0; i < compiler->implicitAddressSize; ++i)
  1673. _APPEND_I(CMN_OPCODE_NOP,i == 0) // hack: use 1 in first NOP as a mark
  1674. compiler->flags |= _CMN_COMPILER_FLAG_COMMANDS_STARTED;
  1675. }
  1676. switch (token)
  1677. {
  1678. case CMN_TOKEN_COMMAND:
  1679. case CMN_TOKEN_BREAK:
  1680. case CMN_TOKEN_BRANCH:
  1681. case CMN_TOKEN_LOOP:
  1682. {
  1683. uint8_t noPop = 0;
  1684. unsigned int len = _CMN_strLen(_TOK_STR);
  1685. if (_TOK_STR[len - 1] == '\'')
  1686. {
  1687. len--;
  1688. noPop = 1;
  1689. _TOK_STR[len] = 0;
  1690. }
  1691. #define _STR_ID(a,b,c)\
  1692. ((((uint32_t) (c)) << 24) | (((uint32_t) (b)) << 16) | (a))
  1693. uint32_t sID = len > 3 ? 0 : _STR_ID(
  1694. _TOK_STR[0],
  1695. len > 1 ? _TOK_STR[1] : 0,
  1696. len > 2 ? _TOK_STR[2] : 0);
  1697. switch (sID)
  1698. {
  1699. #define _SIMPLE_OP(c1,c2,c3,opcode,con) case _STR_ID(c1,c2,c3):\
  1700. _APPEND_I(opcode,CMN_IPARAMS(_TYPE_ENV,noPop,0,con)); break;
  1701. _SIMPLE_OP('+',0,0, CMN_OPCODE_ADX,0)
  1702. _SIMPLE_OP('-',0,0, CMN_OPCODE_SUX,0)
  1703. _SIMPLE_OP('+','+',0, CMN_OPCODE_ADC,1)
  1704. _SIMPLE_OP('-','-',0, CMN_OPCODE_SUC,1)
  1705. _SIMPLE_OP('*',0,0, CMN_OPCODE_MUX,0)
  1706. _SIMPLE_OP('/',0,0, CMN_OPCODE_DIX,0)
  1707. _SIMPLE_OP('%',0,0, CMN_OPCODE_MOX,0)
  1708. _SIMPLE_OP('/','/',0, CMN_OPCODE_DSX,0)
  1709. _SIMPLE_OP('%','%',0, CMN_OPCODE_MSX,0)
  1710. _SIMPLE_OP('>','<',0, CMN_OPCODE_SWP,0)
  1711. _SIMPLE_OP('^',0,0, CMN_OPCODE_POP,0)
  1712. _SIMPLE_OP('?','?',0, CMN_OPCODE_CND,0)
  1713. _SIMPLE_OP('&',0,0, CMN_OPCODE_BAX,0)
  1714. _SIMPLE_OP('|',0,0, CMN_OPCODE_BOX,0)
  1715. _SIMPLE_OP('|','!',0, CMN_OPCODE_BXX,0)
  1716. _SIMPLE_OP('&','&',0, CMN_OPCODE_LAX,0)
  1717. _SIMPLE_OP('|','|',0, CMN_OPCODE_LOX,0)
  1718. _SIMPLE_OP('|','!','!',CMN_OPCODE_LXX,0)
  1719. _SIMPLE_OP('!','!',0, CMN_OPCODE_EQC,0)
  1720. _SIMPLE_OP('!',0,0, CMN_OPCODE_BNO,0)
  1721. _SIMPLE_OP('<','?',0, CMN_OPCODE_INU,0)
  1722. _SIMPLE_OP('<','-',0, CMN_OPCODE_INP,0)
  1723. _SIMPLE_OP('-','>',0, CMN_OPCODE_OUT,0)
  1724. _SIMPLE_OP('$','$',0, CMN_OPCODE_ADR,0)
  1725. _SIMPLE_OP('=',0,0, CMN_OPCODE_EQX,0)
  1726. _SIMPLE_OP('!','=',0, CMN_OPCODE_NEX,0)
  1727. _SIMPLE_OP('<',0,0, CMN_OPCODE_SMX,0)
  1728. _SIMPLE_OP('<','=',0, CMN_OPCODE_SEX,0)
  1729. _SIMPLE_OP('>',0,0, CMN_OPCODE_GRX,0)
  1730. _SIMPLE_OP('>','=',0, CMN_OPCODE_GEX,0)
  1731. _SIMPLE_OP('<','<',0, CMN_OPCODE_SSX,0)
  1732. _SIMPLE_OP('<','<','=',CMN_OPCODE_LSX,0)
  1733. _SIMPLE_OP('>','>',0, CMN_OPCODE_GSX,0)
  1734. _SIMPLE_OP('>','>','=',CMN_OPCODE_BSX,0)
  1735. #undef _SIMPLE_OP
  1736. case _STR_ID('~','0',0): _TYPE_ENV = 0; break;
  1737. case _STR_ID('~','8',0): _TYPE_ENV = 1; break;
  1738. case _STR_ID('~','1','6'): _TYPE_ENV = 2; break;
  1739. case _STR_ID('~','3','2'): _TYPE_ENV = 3; break;
  1740. case _STR_ID('>','0',0):
  1741. case _STR_ID('>','8',0):
  1742. case _STR_ID('>','1','6'):
  1743. case _STR_ID('>','3','2'):
  1744. {
  1745. uint8_t env = (_TOK_STR[1] > '1') + 2 * (_TOK_STR[2] != 0);
  1746. if (!noPop || env != _TYPE_ENV) // otherwise has no effect
  1747. _APPEND_I(CMN_OPCODE_TRA,CMN_IPARAMS(_TYPE_ENV,noPop,0,env))
  1748. break;
  1749. }
  1750. case _STR_ID('@','@',0):
  1751. _CMN_compilerParseStackPush(compiler,
  1752. _CMN_compilerPointerToAddr(compiler,_BC_END));
  1753. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_LOOP))
  1754. _APPEND_I(CMN_OPCODE_NOP,0)
  1755. break;
  1756. case _STR_ID('!','.',0):
  1757. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_EXIT))
  1758. _APPEND_I(CMN_OPCODE_JMA,0)
  1759. for (uint8_t i = 0; i < compiler->implicitAddressSize - 1; ++i)
  1760. _APPEND_I(CMN_OPCODE_NOP,0)
  1761. break;
  1762. case _STR_ID('-','-','>'): // -->
  1763. {
  1764. if (noPop)
  1765. {
  1766. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN;
  1767. return compiler->state;
  1768. }
  1769. const char *cmd = " @' -> . ^ ";
  1770. while (*cmd != 0)
  1771. {
  1772. if (CMN_compilerFeedChar(compiler,*cmd) != CMN_COMPILER_OK)
  1773. break;
  1774. cmd++;
  1775. }
  1776. break;
  1777. }
  1778. case _STR_ID('@',0,0):
  1779. case _STR_ID('?',0,0):
  1780. case _STR_ID('!','@',0):
  1781. _CMN_compilerParseStackPush(compiler,
  1782. _CMN_compilerPointerToAddr(compiler,_BC_END));
  1783. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,
  1784. _TOK_STR[0] == '@' ? CMN_DES_LOOP :
  1785. (_TOK_STR[0] == '?' ? CMN_DES_IF : CMN_DES_LOOP_BREAK)))
  1786. if (_TOK_STR[0] != '!')
  1787. _APPEND_I(CMN_OPCODE_JNA,CMN_IPARAMS(_TYPE_ENV,noPop,0,0))
  1788. else
  1789. _APPEND_I(CMN_OPCODE_JMA,0)
  1790. for (uint8_t i = 0; i < compiler->implicitAddressSize - 1; ++i)
  1791. _APPEND_I(CMN_OPCODE_NOP,0)
  1792. break;
  1793. default:
  1794. if (_TOK_STR[0] == '$')
  1795. {
  1796. if (_TOK_STR[1] == 0)
  1797. {
  1798. _APPEND_I(CMN_OPCODE_PUX,CMN_IPARAMS(_TYPE_ENV,noPop,0,0));
  1799. break;
  1800. }
  1801. uint8_t opc = CMN_OPCODE_MGE;
  1802. char *s1 = _TOK_STR + 1;
  1803. char *s2 = 0;
  1804. if (_TOK_STR[1] == '>' || _TOK_STR[1] == '<')
  1805. {
  1806. opc = CMN_OPCODE_PAC;
  1807. s1++;
  1808. }
  1809. else if (_TOK_STR[1] == ':')
  1810. {
  1811. opc = CMN_OPCODE_MEX;
  1812. s1++;
  1813. }
  1814. else if (_TOK_STR[1] == '+')
  1815. {
  1816. opc = CMN_OPCODE_PAX;
  1817. s1++;
  1818. }
  1819. else
  1820. {
  1821. char *s3 = _TOK_STR + 2;
  1822. while (*s3 != 0)
  1823. {
  1824. if (*s3 == '>' || *s3 == '=')
  1825. {
  1826. opc = *s3 == '>' ? CMN_OPCODE_PCO : CMN_OPCODE_PCM;
  1827. s1 = s3 + 1;
  1828. s2 = _TOK_STR + 1;
  1829. *s3 = 0;
  1830. break;
  1831. }
  1832. s3++;
  1833. }
  1834. }
  1835. int32_t pointer1, pointer2;
  1836. int32_t *p = &pointer1;
  1837. for (uint8_t i = 0; i < 2; ++i)
  1838. {
  1839. CMN_pseudohash('0' + _TYPE_ENV,s1,symbol);
  1840. if (s1[1] == 0 && s1[0] >= '0' && s1[0] <= '9')
  1841. *p = s1[0] - '0';
  1842. else
  1843. {
  1844. *p = CMN_compilerFindSymbol(compiler,symbol);
  1845. if (*p >= 0)
  1846. *p += CMN_LAST_SPECIAL_PTR + 1;
  1847. }
  1848. if (*p < 0)
  1849. {
  1850. compiler->state = CMN_COMPILER_ERROR_UNKNOWN_NAME;
  1851. break;
  1852. }
  1853. if (s2 == 0)
  1854. break;
  1855. s1 = s2;
  1856. p = &pointer2;
  1857. }
  1858. _APPEND_I(opc,CMN_IPARAMS(_TYPE_ENV,noPop,0,0));
  1859. if (opc == CMN_OPCODE_PAC)
  1860. _BC_END = _CMN_compilerFillConsts(compiler,_BC_END - 2,
  1861. pointer1, _TOK_STR[1] == '>' ? 0x01 : 0x0f);
  1862. else if (opc == CMN_OPCODE_PCO || opc == CMN_OPCODE_PCM)
  1863. _BC_END = _CMN_compilerFillConsts(compiler,_BC_END - 2,
  1864. pointer1,pointer2);
  1865. else
  1866. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,
  1867. pointer1);
  1868. }
  1869. else if (_TOK_STR[0] == '~')
  1870. {
  1871. if (_TOK_STR[1] == '"')
  1872. {
  1873. _TOK_STR[len - 1] = 0; // remove the final '"'
  1874. if (compiler->includeFunction != 0)
  1875. compiler->includeFunction(_TOK_STR + 2);
  1876. else
  1877. {
  1878. _TOK_STR[len - 1] = '"';
  1879. compiler->state = CMN_COMPILER_ERROR_UNSUPPORTED;
  1880. return compiler->state;
  1881. }
  1882. }
  1883. else
  1884. { // pointer
  1885. uint32_t size = 1;
  1886. char *separator = _TOK_STR;
  1887. while (*separator != 0 && *separator != ':')
  1888. separator++;
  1889. if (*separator == ':')
  1890. {
  1891. *separator = 0;
  1892. if (CMN_identifyToken(separator + 1) != CMN_TOKEN_NUMBER)
  1893. {
  1894. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN;
  1895. return compiler->state;
  1896. }
  1897. size = CMN_literalValue(separator + 1,0,0);
  1898. }
  1899. if (CMN_identifyToken(_TOK_STR + 1) != CMN_TOKEN_NAME)
  1900. {
  1901. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN;
  1902. return compiler->state;
  1903. }
  1904. CMN_pseudohash('0' + compiler->currentTypeEnv,
  1905. _TOK_STR + 1,symbol);
  1906. uint32_t id = CMN_compilerAddSymbol(compiler,symbol);
  1907. if (size != 0)
  1908. {
  1909. /* 0 sized pointers don't need initialization, they aren't
  1910. supposed to point to allocated memory */
  1911. compiler->flags |= _CMN_COMPILER_FLAG_INIT_FUNC_NEEDED;
  1912. _CMN_encodePtrSizeAsSymbol(_TYPE_ENV,id,size,symbol);
  1913. CMN_compilerAddSymbol(compiler,symbol);
  1914. }
  1915. }
  1916. }
  1917. else if (_TOK_STR[0] == '>')
  1918. { // goto
  1919. if (CMN_identifyToken(_TOK_STR + 1) == CMN_TOKEN_NAME)
  1920. {
  1921. _APPEND_I(CMN_OPCODE_DES,CMN_DES_GOTO)
  1922. _CMN_compilerHandleJump(compiler,_TOK_STR + 1,CMN_OPCODE_JMA,
  1923. CMN_DES_LABEL,'l','j');
  1924. }
  1925. else
  1926. {
  1927. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN;
  1928. return compiler->state;
  1929. }
  1930. }
  1931. else
  1932. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN;
  1933. break;
  1934. }
  1935. #undef _STR_ID
  1936. break;
  1937. } // case COMMAND, BREAK, BRANCH, LOOP
  1938. case CMN_TOKEN_NAME:
  1939. {
  1940. _CMN_compilerHandleJump(compiler,_TOK_STR,CMN_OPCODE_CAL,CMN_DES_FUNC,
  1941. 'f','c');
  1942. break;
  1943. }
  1944. case CMN_TOKEN_LABEL:
  1945. CMN_pseudohash('l',_TOK_STR + 2,symbol);
  1946. if (CMN_compilerFindSymbol(compiler,symbol) >= 0)
  1947. {
  1948. compiler->state = CMN_COMPILER_ERROR_REDEFINED;
  1949. return compiler->state;
  1950. }
  1951. CMN_compilerAddSymbol(compiler,symbol);
  1952. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_LABEL))
  1953. break;
  1954. case CMN_TOKEN_FUNC:
  1955. {
  1956. uint8_t p = 0;
  1957. while (compiler->tokenizer.tokenString[p] != 0)
  1958. {
  1959. if (compiler->tokenizer.tokenString[p] == ':')
  1960. {
  1961. compiler->tokenizer.tokenString[p] = 0;
  1962. break;
  1963. }
  1964. p++;
  1965. }
  1966. CMN_pseudohash('f',compiler->tokenizer.tokenString,symbol);
  1967. if (CMN_compilerFindSymbol(compiler,symbol) >= 0)
  1968. {
  1969. compiler->state = CMN_COMPILER_ERROR_REDEFINED;
  1970. break;
  1971. }
  1972. CMN_compilerAddSymbol(compiler,symbol);
  1973. if (compiler->parseStackTop != 0)
  1974. {
  1975. // function def. must appear always on top level
  1976. compiler->state = CMN_COMPILER_ERROR_UNEXPECTED_TOKEN;
  1977. break;
  1978. }
  1979. _CMN_compilerParseStackPush(compiler,
  1980. _CMN_compilerPointerToAddr(compiler,_BC_END));
  1981. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_FUNC))
  1982. _APPEND_I(CMN_OPCODE_JMA,0)
  1983. for (uint8_t i = 0; i < compiler->implicitAddressSize - 1; ++i)
  1984. _APPEND_I(CMN_OPCODE_NOP,0)
  1985. break;
  1986. }
  1987. case CMN_TOKEN_NUMBER:
  1988. {
  1989. _APPEND_I(CMN_OPCODE_CON,CMN_IPARAMS(_TYPE_ENV,1,0,0));
  1990. uint8_t negative, ok;
  1991. uint64_t v = CMN_literalValue(_TOK_STR,&negative,&ok);
  1992. if (!ok)
  1993. {
  1994. compiler->state = CMN_COMPILER_ERROR_UNSUPPORTED;
  1995. return compiler->state;
  1996. }
  1997. if (compiler->currentTypeEnv == 1)
  1998. v &= 0xff;
  1999. else if (compiler->currentTypeEnv == 2)
  2000. v &= 0xffff;
  2001. if (compiler->currentTypeEnv == 0 && negative)
  2002. {
  2003. /* In type env 0 we don't know the bit width so we push negative
  2004. literals by pushing 0 and subtracting constant with SUC. */
  2005. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,0);
  2006. v = 0xffffffffffffffff - v + 1;
  2007. _APPEND_I(CMN_OPCODE_SUC,CMN_IPARAMS(0,0,0,0));
  2008. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,v);
  2009. }
  2010. else
  2011. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,v);
  2012. break;
  2013. }
  2014. case CMN_TOKEN_STRING:
  2015. {
  2016. const char *c = _TOK_STR + 1;
  2017. while (*c != 0 && *c != '"')
  2018. c++;
  2019. c--;
  2020. while (c > _TOK_STR && compiler->state == CMN_COMPILER_OK)
  2021. {
  2022. _APPEND_I(CMN_OPCODE_CON,CMN_IPARAMS(_TYPE_ENV,1,0,0));
  2023. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,*c);
  2024. c--;
  2025. }
  2026. break;
  2027. }
  2028. case CMN_TOKEN_ELSE:
  2029. case CMN_TOKEN_END:
  2030. {
  2031. uint32_t parseStackTopOld = compiler->parseStackTop;
  2032. uint8_t *matchingInstr = 0;
  2033. uint8_t instrType;
  2034. // find matching instr., skip possible loop breaks on stack:
  2035. while (1)
  2036. {
  2037. if (compiler->parseStackTop == 0)
  2038. {
  2039. compiler->state = CMN_COMPILER_ERROR_UNEXPECTED_TOKEN;
  2040. matchingInstr = 0;
  2041. break;
  2042. }
  2043. compiler->parseStackTop--;
  2044. matchingInstr = _CMN_compilerAddrToPointer(compiler,
  2045. compiler->parseStack[compiler->parseStackTop]);
  2046. instrType = (*(matchingInstr + 1)) & 0x0f;
  2047. if (instrType != CMN_DES_LOOP_BREAK)
  2048. break;
  2049. }
  2050. if (matchingInstr == 0)
  2051. break;
  2052. switch (instrType)
  2053. {
  2054. case CMN_DES_LOOP:
  2055. {
  2056. if (token != CMN_TOKEN_END)
  2057. {
  2058. // "else" matched a loop start: error
  2059. compiler->state = CMN_COMPILER_ERROR_UNEXPECTED_TOKEN;
  2060. return compiler->state;
  2061. }
  2062. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_LOOP_END));
  2063. _APPEND_I(CMN_OPCODE_JMA,0);
  2064. _BC_END = _CMN_compilerFillConst(compiler,_BC_END - 2,
  2065. _CMN_compilerPointerToAddr(compiler,matchingInstr) + 1);
  2066. uint32_t addrHere =
  2067. _CMN_compilerPointerToAddr(compiler,_BC_END);
  2068. // for non-infinite loop fill back the jump address:
  2069. if (*(matchingInstr + 2) != CMN_OPCODE_NOP)
  2070. _CMN_compilerFillConst(compiler,matchingInstr + 2,addrHere);
  2071. // also fill in addresses for all loop breaks:
  2072. while (1)
  2073. {
  2074. parseStackTopOld--;
  2075. if (parseStackTopOld == compiler->parseStackTop)
  2076. break;
  2077. _CMN_compilerFillConst(compiler,
  2078. _CMN_compilerAddrToPointer(compiler,
  2079. compiler->parseStack[parseStackTopOld]) + 2,
  2080. addrHere);
  2081. }
  2082. break;
  2083. }
  2084. case CMN_DES_ELSE:
  2085. if (token == CMN_TOKEN_ELSE)
  2086. {
  2087. // else matching another else: error
  2088. compiler->state = CMN_COMPILER_ERROR_UNEXPECTED_TOKEN;
  2089. return compiler->state;
  2090. }
  2091. // else continue on
  2092. __attribute__((fallthrough));
  2093. case CMN_DES_IF:
  2094. if (token == CMN_TOKEN_ELSE)
  2095. {
  2096. // here we just replace the if with else on stack:
  2097. compiler->parseStack[compiler->parseStackTop] =
  2098. _CMN_compilerPointerToAddr(compiler,_BC_END);
  2099. compiler->parseStackTop = parseStackTopOld;
  2100. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_ELSE));
  2101. _APPEND_I(CMN_OPCODE_JMA,0);
  2102. for (uint8_t i = 0; i < compiler->implicitAddressSize - 1; ++i)
  2103. _APPEND_I(CMN_OPCODE_NOP,0)
  2104. }
  2105. else
  2106. {
  2107. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_IF_END));
  2108. // now pop the if while keeping possible loop breaks above
  2109. while (compiler->parseStackTop < parseStackTopOld - 1)
  2110. {
  2111. compiler->parseStack[compiler->parseStackTop] =
  2112. compiler->parseStack[compiler->parseStackTop + 1];
  2113. compiler->parseStackTop++;
  2114. }
  2115. }
  2116. _CMN_compilerFillConst(compiler,matchingInstr + 2,
  2117. _CMN_compilerPointerToAddr(compiler,_BC_END + 1));
  2118. break;
  2119. case CMN_DES_FUNC:
  2120. compiler->parseStackTop = 0;
  2121. _APPEND_I(CMN_OPCODE_RET,0);
  2122. _CMN_compilerFillConst(compiler,matchingInstr + 2,
  2123. _CMN_compilerPointerToAddr(compiler,_BC_END));
  2124. break;
  2125. default: // unknown description, shouldn't happen
  2126. compiler->state = CMN_COMPILER_ERROR_GENERIC;
  2127. return compiler->state;
  2128. break;
  2129. }
  2130. break;
  2131. }
  2132. default:
  2133. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN; break;
  2134. return compiler->state;
  2135. break;
  2136. }
  2137. break;
  2138. }
  2139. case CMN_TOKENIZER_ERROR:
  2140. compiler->state = CMN_COMPILER_ERROR_BAD_TOKEN;
  2141. return compiler->state;
  2142. break;
  2143. default: break;
  2144. }
  2145. if (compiler->state != CMN_COMPILER_OK)
  2146. return compiler->state;
  2147. if (character == 0) // end of source code => append epilogue code
  2148. {
  2149. if (compiler->parseStackTop != 0)
  2150. {
  2151. compiler->state = CMN_COMPILER_ERROR_UNEXPECTED_END;
  2152. return compiler->state;
  2153. }
  2154. uint8_t *initFunc = 0;
  2155. if (compiler->flags & _CMN_COMPILER_FLAG_INIT_FUNC_NEEDED)
  2156. {
  2157. // create the init function to set the initial pointer addresses
  2158. _APPEND_I(CMN_OPCODE_DES,CMN_IPARAMS(0,0,0,CMN_DES_FUNC))
  2159. initFunc = _BC_END;
  2160. _APPEND_I(CMN_OPCODE_JMA,0)
  2161. for (uint8_t i = 0; i < compiler->implicitAddressSize - 1; ++i)
  2162. _APPEND_I(CMN_OPCODE_NOP,0)
  2163. const char *symbolItem = compiler->symbolTable;
  2164. uint32_t memoryEnds[4];
  2165. for (uint8_t i = 0; i < 4; ++i)
  2166. memoryEnds[i] = 0;
  2167. for (uint32_t i = 0; i < compiler->symbolCount; ++i)
  2168. {
  2169. if (symbolItem[0] == 's')
  2170. {
  2171. uint8_t env;
  2172. uint32_t ptrIndex, ptrSize;
  2173. _CMN_decodePtrSizesFromSymbol(&env,&ptrIndex,&ptrSize,symbolItem);
  2174. if (ptrSize != 0 && memoryEnds[env] != 0)
  2175. {
  2176. ptrIndex += CMN_LAST_SPECIAL_PTR + 1; // user pointers start here
  2177. _APPEND_I(CMN_OPCODE_PSC,CMN_IPARAMS(env,0,0,0))
  2178. _BC_END = _CMN_compilerFillConsts(compiler,_BC_END - 2,
  2179. ptrIndex,memoryEnds[env]);
  2180. }
  2181. memoryEnds[env] += ptrSize;
  2182. }
  2183. symbolItem += CMN_STRING_PSEUDOHASH_SIZE;
  2184. }
  2185. if (compiler->state != CMN_COMPILER_OK)
  2186. return compiler->state;
  2187. for (uint8_t i = 0; i < 4; ++i) // and potentially init stack tops
  2188. if (memoryEnds[i] != 0)
  2189. {
  2190. _APPEND_I(CMN_OPCODE_PSC,CMN_IPARAMS(i,0,0,0))
  2191. _BC_END = _CMN_compilerFillConsts(compiler,_BC_END - 2,0,
  2192. memoryEnds[i]);
  2193. }
  2194. _APPEND_I(CMN_OPCODE_INI,0)
  2195. _APPEND_I(CMN_OPCODE_RET,0)
  2196. _CMN_compilerFillConst(compiler,initFunc,
  2197. _CMN_compilerPointerToAddr(compiler,_BC_END));
  2198. }
  2199. if (compiler->state != CMN_COMPILER_OK)
  2200. return compiler->state;
  2201. uint8_t *instr = compiler->bytecode + CMN_BYTECODE_HEADER_SIZE;
  2202. while (*instr != CMN_OPCODE_END) // fill back the init function call
  2203. {
  2204. if (*instr == CMN_OPCODE_NOP && *(instr + 1) == 1)
  2205. {
  2206. *(instr + 1) = 0;
  2207. if (initFunc != 0)
  2208. {
  2209. *instr = CMN_OPCODE_CAL;
  2210. _CMN_compilerFillConst(compiler,instr,
  2211. _CMN_compilerPointerToAddr(compiler,
  2212. initFunc + 2 * compiler->implicitAddressSize));
  2213. }
  2214. else
  2215. *instr = CMN_OPCODE_INI;
  2216. break;
  2217. }
  2218. instr += 2;
  2219. }
  2220. _APPEND_I(CMN_OPCODE_END,0); // append the final end
  2221. if (compiler->state == CMN_COMPILER_OK)
  2222. {
  2223. // do a forward pass and fill the unknown fun. call/goto addresses:
  2224. instr = compiler->bytecode + CMN_BYTECODE_HEADER_SIZE;
  2225. while (*instr != CMN_OPCODE_END)
  2226. {
  2227. if ((*instr == CMN_OPCODE_CAL || *instr == CMN_OPCODE_JMA) &&
  2228. (*(instr + 1) & CMN_MASK_INSTR_NOPOP))
  2229. {
  2230. uint8_t funcCall = *instr == CMN_OPCODE_CAL;
  2231. int32_t c = CMN_instrGetConst(instr);
  2232. *(instr + 1) &= ~CMN_MASK_INSTR_NOPOP;
  2233. CMN_compilerGetSymbol(compiler,funcCall ? 'c' : 'j',c,symbol);
  2234. symbol[0] = funcCall ? 'f' : 'l';
  2235. c = CMN_compilerFindSymbol(compiler,symbol);
  2236. if (c >= 0)
  2237. {
  2238. *(instr + 1) = 0;
  2239. for (uint8_t i = 1; i < compiler->implicitAddressSize; ++i)
  2240. {
  2241. *(instr + 2 * i) = CMN_OPCODE_NOP;
  2242. *(instr + 2 * i + 1) = 0;
  2243. }
  2244. _CMN_compilerFillConst(compiler,instr,
  2245. _CMN_compilerPointerToAddr(compiler,
  2246. _CMN_compilerFindNthDes(compiler,c,
  2247. funcCall ? CMN_DES_FUNC : CMN_DES_LABEL)));
  2248. }
  2249. else
  2250. {
  2251. if (funcCall)
  2252. *instr = CMN_OPCODE_CAE;
  2253. else
  2254. {
  2255. // undefined label
  2256. compiler->state = CMN_COMPILER_ERROR_UNKNOWN_NAME;
  2257. return compiler->state;
  2258. }
  2259. }
  2260. }
  2261. instr += 2;
  2262. } // while (resolve addresses)
  2263. instr = compiler->bytecode + CMN_BYTECODE_HEADER_SIZE;
  2264. // now do a forward pass again and order external calls from 0:
  2265. uint16_t nextIndex = 0;
  2266. while (*instr != CMN_OPCODE_END)
  2267. {
  2268. if (*instr == CMN_OPCODE_CAE)
  2269. {
  2270. uint32_t c = CMN_instrGetConst(instr);
  2271. if (c > nextIndex)
  2272. {
  2273. uint8_t *instr2 = instr;
  2274. while (*instr2 != CMN_OPCODE_END)
  2275. {
  2276. if (*instr2 == CMN_OPCODE_CAE)
  2277. {
  2278. uint32_t c2 = CMN_instrGetConst(instr2);
  2279. if (c2 == c)
  2280. {
  2281. *(instr2 + 1) = 0;
  2282. for (uint8_t i = 1; i < compiler->implicitAddressSize; ++i)
  2283. {
  2284. *(instr2 + 2 * i) = CMN_OPCODE_NOP;
  2285. *(instr2 + 2 * i + 1) = 0;
  2286. }
  2287. _CMN_compilerFillConst(compiler,instr2,nextIndex);
  2288. }
  2289. }
  2290. instr2 += 2;
  2291. }
  2292. nextIndex++;
  2293. }
  2294. else if (c == nextIndex)
  2295. nextIndex = c + 1;
  2296. }
  2297. instr += 2;
  2298. }
  2299. if (compiler->state != CMN_COMPILER_OK)
  2300. return compiler->state;
  2301. // now do a backwards pass and fill in the exit comm. addresses:
  2302. uint32_t funcEndAddr = 0;
  2303. uint32_t endAddr = _CMN_compilerPointerToAddr(compiler,_BC_END) - 1;
  2304. while (instr >= compiler->bytecode + CMN_BYTECODE_HEADER_SIZE)
  2305. {
  2306. if (instr[0] == CMN_OPCODE_RET)
  2307. funcEndAddr = _CMN_compilerPointerToAddr(compiler,instr);
  2308. else if (instr[0] == CMN_OPCODE_DES)
  2309. {
  2310. if (instr[1] == CMN_DES_FUNC)
  2311. funcEndAddr = 0;
  2312. else if (instr[1] == CMN_DES_EXIT)
  2313. _CMN_compilerFillConst(compiler,instr + 2,
  2314. funcEndAddr != 0 ? funcEndAddr : endAddr);
  2315. }
  2316. instr -= 2;
  2317. }
  2318. if (compiler->state != CMN_COMPILER_OK)
  2319. return compiler->state;
  2320. // now mark external calls also in the symbol table:
  2321. for (uint32_t i = 0; i < compiler->symbolCount; ++i)
  2322. {
  2323. char *s = compiler->symbolTable + i * CMN_STRING_PSEUDOHASH_SIZE;
  2324. if (s[0] == 'c')
  2325. {
  2326. char s2[CMN_STRING_PSEUDOHASH_SIZE];
  2327. s2[0] = 'f';
  2328. for (uint8_t j = 1; j < CMN_STRING_PSEUDOHASH_SIZE; ++j)
  2329. s2[j] = s[j];
  2330. if (CMN_compilerFindSymbol(compiler,s2) < 0)
  2331. s[0] = 'e'; // no corresponding 'f' symbol => external call
  2332. }
  2333. }
  2334. // fill the header:
  2335. compiler->bytecode[0] = 'C';
  2336. compiler->bytecode[1] = 'B';
  2337. for (uint8_t i = 2; i < 8; ++i)
  2338. compiler->bytecode[i] = 0;
  2339. compiler->bytecode[CMN_BYTECODE_CHECKSUM_BYTE] =
  2340. CMN_bytecodeChecksum(compiler->bytecode);
  2341. } // if (compiler ok)
  2342. } // if (character == 0)
  2343. return compiler->state;
  2344. #undef _TYPE_ENV
  2345. #undef _TOK_STR
  2346. #undef _APPEND_I
  2347. #undef _BC_END
  2348. #undef _CMN_COMPILER_FLAG_INIT_FUNC_NEEDED
  2349. #undef _CMN_COMPILER_FLAG_COMMANDS_STARTED
  2350. }
  2351. uint8_t CMN_instrTouchesMem(uint8_t opcode)
  2352. {
  2353. return opcode > CMN_OPCODE_RET && opcode != CMN_OPCODE_PCO &&
  2354. opcode != CMN_OPCODE_PAC && opcode != CMN_OPCODE_PSC &&
  2355. opcode != CMN_OPCODE_JMA;
  2356. }
  2357. uint8_t CMN_instrTouchesPtr(uint8_t opcode)
  2358. {
  2359. return (opcode >= CMN_OPCODE_PSC && opcode <= CMN_OPCODE_PUX);
  2360. }
  2361. void CMN_estimateMemory(const uint8_t *bytecode, uint32_t minStackSize,
  2362. uint32_t memoryCells[4], uint32_t pointers[4])
  2363. {
  2364. if (minStackSize < CMN_MINIMUM_STACK_SIZE)
  2365. minStackSize = CMN_MINIMUM_STACK_SIZE;
  2366. uint16_t stackTops[4];
  2367. bytecode += CMN_BYTECODE_HEADER_SIZE;
  2368. for (uint8_t i = 0; i < 4; ++i)
  2369. {
  2370. memoryCells[i] = 0;
  2371. pointers[i] = 0;
  2372. stackTops[i] = 0;
  2373. }
  2374. while (*bytecode != CMN_OPCODE_END)
  2375. {
  2376. uint8_t typeEnv = CMN_instrTypeEnv(bytecode);
  2377. if (CMN_instrTouchesMem(*bytecode))
  2378. {
  2379. memoryCells[typeEnv]++;
  2380. if (*bytecode == CMN_OPCODE_TRA)
  2381. memoryCells[CMN_instrGetConst(bytecode)]++;
  2382. }
  2383. else if (*bytecode == CMN_OPCODE_PSC)
  2384. {
  2385. uint64_t c1, c2;
  2386. CMN_instrGetConsts(bytecode,&c1,&c2);
  2387. if (c2 > stackTops[typeEnv])
  2388. stackTops[typeEnv] = c2;
  2389. }
  2390. int64_t pIndex = -1, pIndex2 = -1;
  2391. if ( // pointer in C1
  2392. *bytecode == CMN_OPCODE_PSC || *bytecode == CMN_OPCODE_PAC)
  2393. {
  2394. CMN_instrGetConsts(bytecode,(uint64_t *) &pIndex,(uint64_t *) &pIndex2);
  2395. }
  2396. else if ( // pointer in C
  2397. *bytecode == CMN_OPCODE_PAX || *bytecode == CMN_OPCODE_MEX ||
  2398. *bytecode == CMN_OPCODE_MGE)
  2399. {
  2400. pIndex = CMN_instrGetConst(bytecode);
  2401. }
  2402. else if ( // pointer in C1 and C2
  2403. *bytecode == CMN_OPCODE_PCO)
  2404. {
  2405. CMN_instrGetConsts(bytecode,(uint64_t *) &pIndex,(uint64_t *) &pIndex2);
  2406. if (pIndex2 > pIndex)
  2407. pIndex = pIndex2;
  2408. }
  2409. if (pIndex > CMN_LAST_SPECIAL_PTR)
  2410. {
  2411. pIndex -= CMN_LAST_SPECIAL_PTR;
  2412. if (pIndex > ((int32_t) pointers[typeEnv]))
  2413. pointers[typeEnv] = pIndex;
  2414. }
  2415. bytecode += 2;
  2416. }
  2417. for (uint8_t i = 0; i < 4; ++i)
  2418. if (memoryCells[i] != 0)
  2419. {
  2420. memoryCells[i] = stackTops[i] +
  2421. (memoryCells[i] > minStackSize ? memoryCells[i] : minStackSize);
  2422. pointers[i]++; // add stack top
  2423. }
  2424. }
  2425. void CMN_bytecodeRemoveInstrs(uint8_t *bytecode, uint32_t startAddr,
  2426. uint16_t instrCount)
  2427. {
  2428. uint8_t *instr = bytecode + CMN_BYTECODE_HEADER_SIZE + startAddr * 2;
  2429. do // shift
  2430. {
  2431. instr[0] = instr[instrCount * 2];
  2432. instr[1] = instr[instrCount * 2 + 1];
  2433. instr += 2;
  2434. } while (*instr != CMN_OPCODE_END);
  2435. instr = bytecode + CMN_BYTECODE_HEADER_SIZE;
  2436. while (*instr != CMN_OPCODE_END) // recompute addresses
  2437. {
  2438. if (*instr == CMN_OPCODE_CAL || // deals with addresses?
  2439. (*instr >= CMN_OPCODE_JIA && *instr <= CMN_OPCODE_JMA))
  2440. {
  2441. uint32_t jumpAddr = CMN_instrGetConst(instr);
  2442. /* TODO: possibly change this to also handle the case someone jumps
  2443. inside the removed code (if jump addr is inside the removed block, set
  2444. new addr to the start of the removed block) */
  2445. if (jumpAddr >= startAddr + instrCount)
  2446. {
  2447. jumpAddr -= instrCount;
  2448. uint8_t *tmp = instr + 2;
  2449. while (*tmp == CMN_OPCODE_COC)
  2450. {
  2451. tmp[0] = CMN_OPCODE_NOP;
  2452. tmp[1] = 0;
  2453. tmp += 2;
  2454. }
  2455. _CMN_compilerFillConst(0,instr,jumpAddr);
  2456. }
  2457. }
  2458. instr += 2;
  2459. }
  2460. bytecode[CMN_BYTECODE_CHECKSUM_BYTE] = CMN_bytecodeChecksum(bytecode);
  2461. }
  2462. uint8_t CMN_bytecodeChecksum(const uint8_t *bytecode)
  2463. {
  2464. uint8_t r = 0;
  2465. bytecode += CMN_BYTECODE_HEADER_SIZE;
  2466. while (*bytecode != CMN_OPCODE_END)
  2467. {
  2468. r += bytecode[0];
  2469. r += bytecode[1];
  2470. bytecode += 2;
  2471. }
  2472. return r;
  2473. }
  2474. void CMN_bytecodeOptimize(uint8_t *bytecode, uint32_t types,
  2475. CMN_Compiler *compiler)
  2476. {
  2477. #define _ITERATIONS 3 // how many times repeat some kind of processing
  2478. if (types & CMN_OPTIMIZE_REPLACE_OPS)
  2479. {
  2480. uint8_t *instr = bytecode + CMN_BYTECODE_HEADER_SIZE;
  2481. while (*instr != CMN_OPCODE_END)
  2482. {
  2483. // replace "push X, ??X" with "??C, CON X"
  2484. if (instr[0] == CMN_OPCODE_CON && !(instr[1] & CMN_MASK_INSTR_CON) &&
  2485. (instr[1] & CMN_MASK_INSTR_NOPOP) &&
  2486. instr[2] > CMN_OPCODE_SPECIALS &&
  2487. ((instr[2] & 0x03) == CMN_OPCODE_21) &&
  2488. !(instr[3] & CMN_MASK_INSTR_NOPOP))
  2489. {
  2490. /* note: if the constant continues with COCs, we can't do this because
  2491. the instructions won't fit, ALSO doing this with bigger constants
  2492. would lead to sign issues in type env. 0 (where we don't know the
  2493. number bit width -- here we just suppose it's bigger than 4). */
  2494. instr[0] = (instr[2] & (~0x03)) | CMN_OPCODE_1C1;
  2495. instr[1] &= ~CMN_MASK_INSTR_NOPOP;
  2496. instr[2] = CMN_OPCODE_CON;
  2497. instr[3] = instr[1];
  2498. if ((((instr[1] & 0x0f) == 0) && // operations that do nothing
  2499. (instr[0] == CMN_OPCODE_ADC || instr[0] == CMN_OPCODE_SUC ||
  2500. instr[0] == CMN_OPCODE_BOC || instr[0] == CMN_OPCODE_LOC)) ||
  2501. (((instr[1] & 0x0f) == 1) &&
  2502. (instr[0] == CMN_OPCODE_MUC || instr[0] == CMN_OPCODE_DIC ||
  2503. instr[0] == CMN_OPCODE_LAC || instr[0] == CMN_OPCODE_DSC)))
  2504. {
  2505. instr[0] = CMN_OPCODE_NOP;
  2506. instr[1] = 0;
  2507. }
  2508. }
  2509. else if (instr[0] == CMN_OPCODE_PAC && (instr[1] & CMN_MASK_INSTR_CON) &&
  2510. !(instr[3] & CMN_MASK_INSTR_CON))
  2511. {
  2512. if ((instr[1] & 0x0f) != 0) // "PAC 1 to 15" does nothing
  2513. {
  2514. instr[0] = CMN_OPCODE_NOP;
  2515. instr[1] = 0;
  2516. instr[2] = CMN_OPCODE_NOP;
  2517. instr[3] = 0;
  2518. }
  2519. else if ((instr[3] & 0x0f) == 0x0f) // "PAC 0 1" -> "POP"
  2520. {
  2521. instr[0] = CMN_OPCODE_POP;
  2522. instr[1] &= ~(CMN_MASK_INSTR_CON | 0x0f);
  2523. instr[2] = CMN_OPCODE_NOP;
  2524. instr[3] = 0;
  2525. }
  2526. }
  2527. /* now we simply collapse blocks of at most 16 pops into one pop of
  2528. multiple values (it could be done much better, this is just KISS) */
  2529. int popsInRow = 0;
  2530. while (instr[2 * popsInRow] == CMN_OPCODE_POP &&
  2531. (instr[2 * popsInRow + 1] & 0x0f) == 0 && popsInRow < 16)
  2532. popsInRow++;
  2533. if (popsInRow > 1)
  2534. {
  2535. for (int i = 1; i < popsInRow; ++i)
  2536. {
  2537. instr[2 * i] = CMN_OPCODE_NOP;
  2538. instr[2 * i + 1] = 0;
  2539. }
  2540. instr[1] = (instr[1] & 0xf0) | ((popsInRow - 1) & 0x0f);
  2541. }
  2542. instr += 2;
  2543. }
  2544. }
  2545. if (types & CMN_OPTIMIZE_REMOVE_NOPS)
  2546. {
  2547. uint8_t iterations = _ITERATIONS;
  2548. while (iterations)
  2549. {
  2550. uint8_t stop = 1;
  2551. uint32_t addr = 0;
  2552. while (bytecode[CMN_BYTECODE_HEADER_SIZE + 2 * addr] != CMN_OPCODE_END)
  2553. {
  2554. stop = 0;
  2555. uint16_t nopCount = 0;
  2556. while (bytecode[CMN_BYTECODE_HEADER_SIZE + 2 * (addr + nopCount)] ==
  2557. CMN_OPCODE_NOP)
  2558. nopCount++;
  2559. if (nopCount)
  2560. CMN_bytecodeRemoveInstrs(bytecode,addr,nopCount);
  2561. addr++;
  2562. }
  2563. if (stop)
  2564. break;
  2565. iterations--;
  2566. }
  2567. }
  2568. if (types & CMN_OPTIMIZE_INLINE)
  2569. {
  2570. uint8_t *instr = bytecode + CMN_BYTECODE_HEADER_SIZE;
  2571. while (*instr != CMN_OPCODE_END)
  2572. {
  2573. if (*instr == CMN_OPCODE_CAL)
  2574. {
  2575. uint8_t callLen = 0, funLen = 0;
  2576. const uint8_t *instr2 = bytecode + CMN_BYTECODE_HEADER_SIZE +
  2577. CMN_instrGetConst(instr) * 2;
  2578. while (instr[callLen * 2 + 1] & CMN_MASK_INSTR_CON)
  2579. callLen++;
  2580. callLen++;
  2581. while (instr2[funLen * 2] != CMN_OPCODE_RET)
  2582. funLen++;
  2583. if (funLen <= callLen) // will it fit?
  2584. {
  2585. funLen = 0;
  2586. while (instr2[2 * funLen] != CMN_OPCODE_RET)
  2587. {
  2588. instr[2 * funLen] = instr2[2 * funLen];
  2589. instr[2 * funLen + 1] = instr2[2 * funLen + 1];
  2590. funLen++;
  2591. }
  2592. while (funLen < callLen)
  2593. {
  2594. instr[2 * funLen] = CMN_OPCODE_NOP;
  2595. instr[2 * funLen + 1] = 0;
  2596. callLen--;
  2597. }
  2598. }
  2599. }
  2600. instr += 2;
  2601. }
  2602. }
  2603. if (types & CMN_OPTIMIZE_REMOVE_DEAD)
  2604. {
  2605. for (uint8_t i = 0; i < _ITERATIONS; ++i)
  2606. {
  2607. uint32_t addr = 0;
  2608. const uint8_t *instr = bytecode + CMN_BYTECODE_HEADER_SIZE;
  2609. uint16_t funcIndex = 0;
  2610. while (*instr != CMN_OPCODE_END)
  2611. {
  2612. // TODO: remove other unused code, e.g. if 0 etc.
  2613. if (instr[0] == CMN_OPCODE_DES && instr[1] == CMN_DES_FUNC)
  2614. {
  2615. uint32_t funcLen = 0;
  2616. while (instr[2 * funcLen] != CMN_OPCODE_RET)
  2617. funcLen++;
  2618. funcLen++;
  2619. uint8_t isCalled = 0;
  2620. const uint8_t *instr2 = bytecode + CMN_BYTECODE_HEADER_SIZE;
  2621. while (*instr2 != CMN_OPCODE_END)
  2622. {
  2623. // if something jumps into the function (with call or goto)
  2624. if (instr2[0] == CMN_OPCODE_CAL || instr2[0] == CMN_OPCODE_JMA)
  2625. {
  2626. uint32_t c = CMN_instrGetConst(instr2);
  2627. if (c >= addr + 2 && c < addr + funcLen)
  2628. {
  2629. isCalled = 1;
  2630. break;
  2631. }
  2632. }
  2633. instr2 += 2;
  2634. }
  2635. if (!isCalled)
  2636. {
  2637. CMN_bytecodeRemoveInstrs(bytecode,addr,funcLen);
  2638. instr -= 2;
  2639. addr--;
  2640. // remove the function from compiler's symbol table
  2641. if (compiler != 0)
  2642. {
  2643. uint16_t fi = 0;
  2644. for (uint16_t i = 0; i < compiler->symbolCount; ++i)
  2645. if (compiler->symbolTable[i * CMN_STRING_PSEUDOHASH_SIZE] == 'f')
  2646. {
  2647. if (fi == funcIndex)
  2648. {
  2649. compiler->symbolTable[i * CMN_STRING_PSEUDOHASH_SIZE] = 'n';
  2650. break;
  2651. }
  2652. fi++;
  2653. }
  2654. }
  2655. }
  2656. else
  2657. funcIndex++;
  2658. }
  2659. instr += 2;
  2660. addr++;
  2661. }
  2662. }
  2663. }
  2664. #undef _ITERATIONS
  2665. }
  2666. void CMN_preprocessorInit(CMN_Preprocessor *preprocessor, uint8_t minify,
  2667. void (*outFunction)(char))
  2668. {
  2669. preprocessor->state = _CMN_PREPPROCESSOR_IN;
  2670. preprocessor->minify = minify;
  2671. preprocessor->outFunction = outFunction;
  2672. CMN_tokenizerInit(&preprocessor->tokenizer);
  2673. }
  2674. void _CMN_preprocessorOutputChar(void (*outFunc)(char), char c)
  2675. {
  2676. if (c >= ' ' && c != '"' && c != '[' && c != ']' && c != '#')
  2677. {
  2678. outFunc('"'); outFunc(c); outFunc('"');
  2679. }
  2680. else
  2681. { // output chars that can't be inside str. lit. as num. lit.
  2682. outFunc('0' + c / 100);
  2683. outFunc('0' + (c / 10) % 10);
  2684. outFunc('0' + c% 10);
  2685. }
  2686. }
  2687. uint8_t CMN_preprocessorFeedChar(CMN_Preprocessor *preprocessor, char c)
  2688. {
  2689. uint8_t separate = 0; // 0: no, 1: space, 2: newline
  2690. #define _PCH(c) preprocessor->outFunction(c);
  2691. if (preprocessor->state == _CMN_PREPPROCESSOR_IN)
  2692. {
  2693. if (c == ']')
  2694. {
  2695. preprocessor->state = _CMN_PREPPROCESSOR_OUT;
  2696. _PCH('\n') _PCH('#') _PCH(')') _PCH('\n')
  2697. separate = 1;
  2698. }
  2699. else if (c == '[')
  2700. preprocessor->state = CMN_PREPROCESSOR_ERROR;
  2701. else
  2702. preprocessor->outFunction(c);
  2703. }
  2704. else if (preprocessor->state == _CMN_PREPPROCESSOR_OUT)
  2705. {
  2706. if (c == '[' || c == 0)
  2707. {
  2708. preprocessor->state = _CMN_PREPPROCESSOR_IN;
  2709. _PCH('\n') _PCH('#') _PCH('(') _PCH('\n')
  2710. separate = 1;
  2711. }
  2712. else if (c == ']')
  2713. preprocessor->state = CMN_PREPROCESSOR_ERROR;
  2714. else if (!preprocessor->minify)
  2715. {
  2716. _CMN_preprocessorOutputChar(preprocessor->outFunction,c);
  2717. _PCH(' ')
  2718. _PCH('-') _PCH('>')
  2719. _PCH('\n')
  2720. }
  2721. else
  2722. switch (CMN_tokenizerFeedChar(&(preprocessor->tokenizer),c))
  2723. {
  2724. case CMN_TOKENIZER_TOKEN:
  2725. {
  2726. const char *tokenC = preprocessor->tokenizer.tokenString;
  2727. _PCH(' ')
  2728. while (*tokenC != 0)
  2729. {
  2730. _PCH(' ')
  2731. _CMN_preprocessorOutputChar(preprocessor->outFunction,*tokenC);
  2732. _PCH(' ') _PCH('-') _PCH('>')
  2733. tokenC++;
  2734. }
  2735. separate = 2;
  2736. break;
  2737. }
  2738. case CMN_TOKENIZER_ERROR:
  2739. preprocessor->state = CMN_PREPROCESSOR_ERROR;
  2740. break;
  2741. default: break;
  2742. }
  2743. }
  2744. if (separate)
  2745. {
  2746. _PCH(' ')
  2747. _PCH(separate == 1 ? '3' : '1')
  2748. _PCH(separate == 1 ? '2' : '0')
  2749. _PCH(' ') _PCH('-') _PCH('>') _PCH('\n')
  2750. }
  2751. #undef _PCH
  2752. return preprocessor->state;
  2753. }
  2754. int CMN_interpretStr(const char *source, uint8_t *memory,
  2755. uint32_t memorySize, uint16_t minCells, uint32_t maxSymbols,
  2756. uint32_t maxSteps, CMN_IOFunction ioFunction,
  2757. void (*statusCallback)(uint8_t, uint32_t, CMN_Interpreter *))
  2758. {
  2759. CMN_Compiler compiler;
  2760. CMN_Interpreter interpreter;
  2761. maxSymbols *= CMN_STRING_PSEUDOHASH_SIZE;
  2762. if (maxSymbols > memorySize)
  2763. {
  2764. if (statusCallback)
  2765. statusCallback(1,0,0);
  2766. return 0;
  2767. }
  2768. CMN_compilerInit(&compiler,memory,memorySize - maxSymbols,
  2769. (char *) (memory + memorySize - maxSymbols),maxSymbols,0);
  2770. uint32_t strPos = 0;
  2771. while (1) // compile
  2772. {
  2773. if (CMN_compilerFeedChar(&compiler,source[strPos]) != CMN_COMPILER_OK)
  2774. {
  2775. if (statusCallback)
  2776. statusCallback(1,strPos,0);
  2777. return 0;
  2778. }
  2779. if (source[strPos] == 0)
  2780. break;
  2781. strPos++;
  2782. }
  2783. CMN_bytecodeOptimize(memory,CMN_OPTIMIZE_ALL,0);
  2784. uint8_t *ramStart = memory + CMN_BYTECODE_HEADER_SIZE;
  2785. while (*ramStart != CMN_OPCODE_END)
  2786. ramStart += 2;
  2787. ramStart += 2;
  2788. CMN_interpreterInit(&interpreter,memory,ramStart,
  2789. memory + memorySize - ramStart,minCells,ioFunction,0,0,0);
  2790. // run:
  2791. uint8_t status = CMN_interpreterStep(&interpreter,maxSteps);
  2792. if (status != CMN_INTERPRETER_END)
  2793. {
  2794. if (statusCallback != 0)
  2795. statusCallback(2,0,&interpreter);
  2796. return 0;
  2797. }
  2798. if (statusCallback != 0)
  2799. statusCallback(0,0,&interpreter);
  2800. return interpreter.memory0 != 0 ?
  2801. interpreter.memory0[interpreter.pointers[0][0]] : 0;
  2802. }
  2803. uint8_t CMN_bytecodeCheckSanity(const uint8_t *bytecode, uint32_t maxSize)
  2804. {
  2805. if (maxSize <= CMN_BYTECODE_HEADER_SIZE)
  2806. return CMN_BYTECODE_SANITY_ERROR; // too small, no space for END
  2807. if (bytecode[0] != 'C' || bytecode[1] != 'B')
  2808. return CMN_BYTECODE_SANITY_ERROR_HEADER;
  2809. const uint8_t *stopper = bytecode + maxSize;
  2810. const uint8_t *bytecodeOld = bytecode;
  2811. bytecode += CMN_BYTECODE_HEADER_SIZE;
  2812. char instrStr[16];
  2813. while (*bytecode != CMN_OPCODE_END)
  2814. {
  2815. CMN_instrToStr(bytecode,instrStr);
  2816. if (instrStr[0] == ' ') // no name => unknown opcode
  2817. return CMN_BYTECODE_SANITY_ERROR_INSTR;
  2818. bytecode += 2;
  2819. if (bytecode >= stopper)
  2820. return CMN_BYTECODE_SANITY_ERROR_NO_END;
  2821. }
  2822. if (bytecodeOld[CMN_BYTECODE_CHECKSUM_BYTE] !=
  2823. CMN_bytecodeChecksum(bytecodeOld))
  2824. return CMN_BYTECODE_SANITY_ERROR_CHECKSUM;
  2825. return CMN_BYTECODE_SANITY_OK;
  2826. }
  2827. #endif // guard