tokenizer.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License
  4. * as published by the Free Software Foundation; either version 2
  5. * of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. * Author: g0tsu
  12. * Email: g0tsu at dnmx.0rg
  13. */
  14. #include <libhighlight.h>
  15. #include <string.h>
  16. static hl_node * hl_token_find_prev(hl_node *node) {
  17. hl_node *iter = node;
  18. while (iter->parent) {
  19. iter = iter->parent;
  20. if (iter->type != HL_NODE_SPACE)
  21. return iter;
  22. }
  23. return NULL;
  24. }
  25. static void hl_token_check_function(hl_node *node) {
  26. hl_node *prev = hl_token_find_prev(node);
  27. if (prev && node->type == HL_NODE_TYPE)
  28. node->type = HL_NODE_FUNCTION;
  29. else if (node->type == 0)
  30. node->type = HL_NODE_FUNCTION_CALL;
  31. }
  32. static void hl_token_single(uint8_t c, hl_node *node, hl_ctx *ctx) {
  33. hl_node *prev;
  34. if (c >= '0' && c <= '9') {
  35. node->type = HL_NODE_NUMBER;
  36. return;
  37. }
  38. switch (c) {
  39. /*
  40. * Multiline comment
  41. */
  42. case '#':
  43. if (ctx->last == 0 && ctx->sh == 1) {
  44. ctx->last = HL_NODE_COMMENT_START;
  45. node->type = HL_NODE_COMMENT_START;
  46. }
  47. break;
  48. case '*':
  49. if (ctx->sh == 1)
  50. break;
  51. prev = node->parent;
  52. if (ctx->last == 0 && prev && prev->text_len == 1 && prev->text[0] == '/') {
  53. prev->type = HL_NODE_MCOMMENT_START;
  54. ctx->last = HL_NODE_MCOMMENT_START;
  55. }
  56. break;
  57. /*
  58. * A single line comment
  59. */
  60. case '/':
  61. prev = hl_token_find_prev(node);
  62. if (ctx->last == 0 && prev && prev->text_len == 1 && prev->text[0] == '/') {
  63. prev->type = HL_NODE_COMMENT_START;
  64. ctx->last = HL_NODE_COMMENT_START;
  65. }
  66. if (ctx->last == HL_NODE_MCOMMENT_START && prev->text[0] == '*') {
  67. node->type = HL_NODE_MCOMMENT_END;
  68. ctx->last = 0;
  69. }
  70. break;
  71. case ' ':
  72. case '\t':
  73. case '\v':
  74. node->type = HL_NODE_SPACE;
  75. break;
  76. case '\n':
  77. node->type = HL_NODE_NEWLINE;
  78. prev = hl_token_find_prev(node);
  79. if (prev && ctx->last == HL_NODE_COMMENT_START) {
  80. if (prev->type == HL_NODE_COMMENT_START)
  81. prev->type = HL_NODE_CHCOMMENT;
  82. else
  83. prev->type = HL_NODE_COMMENT_END;
  84. ctx->last = 0;
  85. }
  86. break;
  87. case '(':
  88. node->type = HL_NODE_SCOPE_START;
  89. hl_token_check_function(hl_token_find_prev(node));
  90. break;
  91. case ')':
  92. node->type = HL_NODE_SCOPE_END;
  93. break;
  94. case '>':
  95. case '<':
  96. node->type = HL_NODE_SYMBOL;
  97. default:
  98. break;
  99. }
  100. }
  101. void hl_next_word(char *word, int len, hl_node *node, hl_ctx *ctx) {
  102. node->text = strdup(word);
  103. node->text_len = len;
  104. if (len == 1) {
  105. hl_token_single(word[0], node, ctx);
  106. return;
  107. }
  108. if (ctx->last == HL_NODE_MCOMMENT_START || ctx->last == HL_NODE_COMMENT_START)
  109. return;
  110. if (word[0] == '"' && word[len -1] == '"') {
  111. node->type = HL_NODE_QUOTE;
  112. return;
  113. }
  114. if (word[0] == '\'' && word[len-1] == '\'') {
  115. node->type = HL_NODE_QUOTE;
  116. return;
  117. }
  118. if (hl_keyword_expr(word, len)) {
  119. node->type = HL_NODE_EXPR;
  120. return;
  121. }
  122. if (hl_keyword_type(word, len)) {
  123. node->type = HL_NODE_TYPE;
  124. return;
  125. }
  126. if (hl_keyword_decl(word, len)) {
  127. node->type = HL_NODE_DECL;
  128. return;
  129. }
  130. if (hl_keyword_is_number(word, len)) {
  131. node->type = HL_NODE_NUMBER;
  132. return;
  133. }
  134. if (word[0] == '#' && word[1] == '!') {
  135. ctx->last = HL_NODE_COMMENT_START;
  136. ctx->sh = 1;
  137. node->type = HL_NODE_COMMENT_START;
  138. return;
  139. }
  140. if (ctx->sh == 1) {
  141. if (word[0] == '$') {
  142. node->type = HL_NODE_TYPE;
  143. return;
  144. }
  145. if (word[0] == '#') {
  146. ctx->last = HL_NODE_COMMENT_START;
  147. node->type = HL_NODE_COMMENT_START;
  148. return;
  149. }
  150. } else {
  151. if (word[0] == '#' && hl_keyword_decl(++word, len - 1)) {
  152. node->type = HL_NODE_DECL;
  153. return;
  154. }
  155. }
  156. }