tokenizer.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License
  4. * as published by the Free Software Foundation; either version 2
  5. * of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. * Author: g0tsu
  12. * Email: g0tsu at dnmx.0rg
  13. */
  14. #include <libhighlight.h>
  15. #include <string.h>
  16. static hl_node * hl_token_find_prev(hl_node *node) {
  17. hl_node *iter = node;
  18. while (iter->parent) {
  19. iter = iter->parent;
  20. if (iter->type != HL_NODE_SPACE)
  21. return iter;
  22. }
  23. return NULL;
  24. }
  25. static void hl_token_check_function(hl_node *node) {
  26. if (hl_token_find_prev(node)->type == HL_NODE_TYPE)
  27. node->type = HL_NODE_FUNCTION;
  28. else if (node->type == 0)
  29. node->type = HL_NODE_FUNCTION_CALL;
  30. }
  31. static void hl_token_single(uint8_t c, hl_node *node, hl_ctx *ctx) {
  32. hl_node *prev;
  33. if (c >= '0' && c <= '9') {
  34. node->type = HL_NODE_NUMBER;
  35. return;
  36. }
  37. switch (c) {
  38. /*
  39. * Multiline comment
  40. */
  41. case '#':
  42. if (ctx->last == 0 && ctx->sh == 1) {
  43. ctx->last = HL_NODE_COMMENT_START;
  44. node->type = HL_NODE_COMMENT_START;
  45. }
  46. break;
  47. case '*':
  48. if (ctx->sh == 1)
  49. break;
  50. prev = node->parent;
  51. if (ctx->last == 0 && prev && prev->text_len == 1 && prev->text[0] == '/') {
  52. prev->type = HL_NODE_MCOMMENT_START;
  53. ctx->last = HL_NODE_MCOMMENT_START;
  54. }
  55. break;
  56. /*
  57. * A single line comment
  58. */
  59. case '/':
  60. prev = hl_token_find_prev(node);
  61. if (ctx->last == 0 && prev && prev->text_len == 1 && prev->text[0] == '/') {
  62. prev->type = HL_NODE_COMMENT_START;
  63. ctx->last = HL_NODE_COMMENT_START;
  64. }
  65. if (ctx->last == HL_NODE_MCOMMENT_START && prev->text[0] == '*') {
  66. node->type = HL_NODE_MCOMMENT_END;
  67. ctx->last = 0;
  68. }
  69. break;
  70. case ' ':
  71. case '\t':
  72. case '\v':
  73. node->type = HL_NODE_SPACE;
  74. break;
  75. case '\n':
  76. node->type = HL_NODE_NEWLINE;
  77. prev = hl_token_find_prev(node);
  78. if (prev && ctx->last == HL_NODE_COMMENT_START) {
  79. if (prev->type == HL_NODE_COMMENT_START)
  80. prev->type = HL_NODE_CHCOMMENT;
  81. else
  82. prev->type = HL_NODE_COMMENT_END;
  83. ctx->last = 0;
  84. }
  85. break;
  86. case '(':
  87. node->type = HL_NODE_SCOPE_START;
  88. hl_token_check_function(hl_token_find_prev(node));
  89. break;
  90. case ')':
  91. node->type = HL_NODE_SCOPE_END;
  92. break;
  93. case '>':
  94. case '<':
  95. node->type = HL_NODE_SYMBOL;
  96. default:
  97. break;
  98. }
  99. }
  100. void hl_next_word(char *word, int len, hl_node *node, hl_ctx *ctx) {
  101. node->text = strdup(word);
  102. node->text_len = len;
  103. if (len == 1) {
  104. hl_token_single(word[0], node, ctx);
  105. return;
  106. }
  107. if (ctx->last == HL_NODE_MCOMMENT_START || ctx->last == HL_NODE_COMMENT_START)
  108. return;
  109. if (word[0] == '"' && word[len -1] == '"') {
  110. node->type = HL_NODE_QUOTE;
  111. return;
  112. }
  113. if (word[0] == '\'' && word[len-1] == '\'') {
  114. node->type = HL_NODE_QUOTE;
  115. return;
  116. }
  117. if (hl_keyword_expr(word, len)) {
  118. node->type = HL_NODE_EXPR;
  119. return;
  120. }
  121. if (hl_keyword_type(word, len)) {
  122. node->type = HL_NODE_TYPE;
  123. return;
  124. }
  125. if (hl_keyword_decl(word, len)) {
  126. node->type = HL_NODE_DECL;
  127. return;
  128. }
  129. if (hl_keyword_is_number(word, len)) {
  130. node->type = HL_NODE_NUMBER;
  131. return;
  132. }
  133. if (word[0] == '#' && word[1] == '!') {
  134. ctx->last = HL_NODE_COMMENT_START;
  135. ctx->sh = 1;
  136. node->type = HL_NODE_COMMENT_START;
  137. return;
  138. }
  139. if (ctx->sh == 1) {
  140. if (word[0] == '$') {
  141. node->type = HL_NODE_TYPE;
  142. return;
  143. }
  144. if (word[0] == '#') {
  145. ctx->last = HL_NODE_COMMENT_START;
  146. node->type = HL_NODE_COMMENT_START;
  147. return;
  148. }
  149. } else {
  150. if (word[0] == '#' && hl_keyword_decl(++word, len - 1)) {
  151. node->type = HL_NODE_DECL;
  152. return;
  153. }
  154. }
  155. }