parser.c 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License
  4. * as published by the Free Software Foundation; either version 2
  5. * of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. * Author: g0tsu
  12. * Email: g0tsu at dnmx.0rg
  13. */
  14. #include <libhighlight.h>
  15. #include <stdlib.h>
  16. #include <stdio.h>
  17. #include <errno.h>
  18. #include <unistd.h>
  19. #include <string.h>
  20. /*
  21. * Single iteration parser/tokenizer
  22. * with comments and quotes support
  23. */
  24. #define MAX_WORD_SIZE 4096
  25. hl_root * hl_parser(uint8_t *buffer, size_t len,
  26. void (*user_next_word)(char *, int, hl_node *, hl_ctx *)) {
  27. char *word = (char *)malloc(sizeof(char) * MAX_WORD_SIZE);
  28. int word_len = 0;
  29. char iscomment = 0, skipcomment = 0;
  30. char _prev[1], _pprev[1];
  31. char *pp = _prev, *ppp = _pprev, *cp = buffer;
  32. size_t lenct = 0;
  33. void (*next_word)(char *, int, hl_node *, hl_ctx *) =
  34. (user_next_word ? user_next_word : hl_next_word);
  35. hl_ctx *ctx = (hl_ctx *)malloc(sizeof(*ctx));
  36. memset(ctx, 0, sizeof(*ctx));
  37. hl_root *res = (hl_root *)malloc(sizeof(*res));
  38. memset(res, 0, sizeof(*res));
  39. res->node = hl_node_create();
  40. while (lenct++ != len) {
  41. switch (*cp) {
  42. case ' ':
  43. case '\n':
  44. case '\t':
  45. case '\v':
  46. if (iscomment || skipcomment)
  47. break;
  48. goto next_word;
  49. case '\'':
  50. if (iscomment || (*pp == '\\' && *ppp != '\\'))
  51. break;
  52. skipcomment = !skipcomment;
  53. break;
  54. case '}':
  55. case '{':
  56. case ')':
  57. case '(':
  58. case ';':
  59. case ',':
  60. case ':':
  61. case '/':
  62. case '>':
  63. case '<':
  64. if (iscomment || skipcomment)
  65. break;
  66. goto next_word;
  67. break;
  68. case '"':
  69. if (skipcomment || *pp == '\\')
  70. break;
  71. iscomment = !iscomment;
  72. break;
  73. default:
  74. break;
  75. }
  76. if (word_len < MAX_WORD_SIZE)
  77. word[word_len++] = *cp;
  78. else {
  79. goto next_word;
  80. }
  81. *ppp = *pp;
  82. *pp = *cp;
  83. if (0) {
  84. next_word:
  85. if (word_len != 0) {
  86. res->text_size += word_len;
  87. res->size++;
  88. res->node = hl_node_insert(res->node);
  89. *(word + word_len) = 0;
  90. next_word(word, word_len, res->node, ctx);
  91. word_len = 0;
  92. }
  93. res->text_size++;
  94. res->size++;
  95. *word = *cp;
  96. res->node = hl_node_insert(res->node);
  97. *(word + 1) = 0;
  98. next_word(word, 1, res->node, ctx);
  99. }
  100. if (lenct == len) {
  101. if (word_len > 0) {
  102. res->text_size += word_len;
  103. res->size++;
  104. res->node = hl_node_insert(res->node);
  105. *(word + word_len) = 0;
  106. next_word(word, word_len, res->node, ctx);
  107. }
  108. break;
  109. }
  110. cp++;
  111. }
  112. /*
  113. * Always end up with newline
  114. */
  115. if (*cp != '\n') {
  116. res->node = hl_node_insert(res->node);
  117. next_word("\n", 1, res->node, ctx);
  118. }
  119. free(ctx);
  120. free(word);
  121. return res;
  122. }
  123. hl_root * hl_parser_file(
  124. int fd, void (*user_next_word)(char *, int, hl_node *, hl_ctx *)) {
  125. char *word = (char *)malloc(sizeof(char) * MAX_WORD_SIZE);
  126. int word_len = 0;
  127. char iscomment = 0, skipcomment = 0;
  128. char _prev[1], _pprev[1], _buff[1];
  129. char *pp = _prev, *ppp = _pprev, *cp = _buff;
  130. size_t lenct = 0;
  131. void (*next_word)(char *, int, hl_node *, hl_ctx *) =
  132. (user_next_word ? user_next_word : hl_next_word);
  133. hl_ctx *ctx = (hl_ctx *)malloc(sizeof(*ctx));
  134. memset(ctx, 0, sizeof(*ctx));
  135. hl_root *res = (hl_root *)malloc(sizeof(*res));
  136. memset(res, 0, sizeof(*res));
  137. res->node = hl_node_create();
  138. while (read(fd, cp, 1) != 0) {
  139. switch (*cp) {
  140. case ' ':
  141. case '\n':
  142. case '\t':
  143. case '\v':
  144. if (iscomment || skipcomment)
  145. break;
  146. goto next_fword;
  147. case '\'':
  148. if (iscomment || (*pp == '\\' && *ppp != '\\'))
  149. break;
  150. skipcomment = !skipcomment;
  151. break;
  152. case '}':
  153. case '{':
  154. case ')':
  155. case '(':
  156. case ';':
  157. case ',':
  158. case ':':
  159. case '/':
  160. case '>':
  161. case '<':
  162. if (iscomment || skipcomment)
  163. break;
  164. goto next_fword;
  165. break;
  166. case '"':
  167. if (skipcomment || *pp == '\\')
  168. break;
  169. iscomment = !iscomment;
  170. break;
  171. default:
  172. break;
  173. }
  174. if (word_len < MAX_WORD_SIZE)
  175. word[word_len++] = *cp;
  176. else {
  177. goto next_fword;
  178. }
  179. *ppp = *pp;
  180. *pp = *cp;
  181. if (0) {
  182. next_fword:
  183. if (word_len != 0) {
  184. res->text_size += word_len;
  185. res->size++;
  186. res->node = hl_node_insert(res->node);
  187. *(word + word_len) = 0;
  188. next_word(word, word_len, res->node, ctx);
  189. word_len = 0;
  190. }
  191. res->text_size++;
  192. res->size++;
  193. *word = *cp;
  194. res->node = hl_node_insert(res->node);
  195. *(word + 1) = 0;
  196. next_word(word, 1, res->node, ctx);
  197. }
  198. }
  199. /*
  200. * Always end up with newline
  201. */
  202. if (*cp != '\n') {
  203. res->node = hl_node_insert(res->node);
  204. next_word("\n", 1, res->node, ctx);
  205. }
  206. free(ctx);
  207. free(word);
  208. return res;
  209. }