hilite.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /* hlite.c, generic syntax highlighting, Ait Emacs, Kevin Bloom, BSD 3-Clause, 2023-2025 */
  2. #include "header.h"
  3. #include "util.h"
  4. int state = ID_DEFAULT;
  5. int next_state = ID_DEFAULT;
  6. int skip_count = 0;
  7. int exclude_state = ID_DEFAULT;
  8. int exclude_count = 0;
  9. char_t get_at(buffer_t *bp, point_t pt)
  10. {
  11. return (*ptr(bp, pt));
  12. }
  13. void set_parse_state(buffer_t *bp, point_t pt, window_t *wp, int loop)
  14. {
  15. register point_t po;
  16. state = ID_DEFAULT;
  17. next_state = ID_DEFAULT;
  18. skip_count = 0;
  19. if(bp->b_mode != NULL && loop) {
  20. for (po =0; po < pt; po++)
  21. parse_text(bp, po, TRUE);
  22. wp->w_hilite = state;
  23. }
  24. }
  25. void write_parse_state(window_t *wp)
  26. {
  27. state = wp->w_hilite;
  28. next_state = wp->w_hilite;
  29. skip_count = 0;
  30. }
  31. /* we don't bother running the syntax logic on space chars
  32. therefore we must manually decrease the skip_count if we are
  33. in a syntax highlight that includes space chars.
  34. */
  35. void dec_skip()
  36. {
  37. skip_count--;
  38. }
  39. /* quick set to TRUE causes the syntax highlighting for keywords to
  40. be disabled. The purpose is so that when we have to determine the
  41. state at b_page we can do it quicker and reduce potential lag.
  42. */
  43. int parse_text(buffer_t *bp, point_t pt, int quick)
  44. {
  45. // if(bp->b_mode == NULL)
  46. // return state;
  47. if (skip_count-- > 0) {
  48. if(exclude_count != 0)
  49. exclude_count--;
  50. if(exclude_state != ID_DEFAULT &&
  51. exclude_count == 0) {
  52. state = exclude_state;
  53. exclude_state = ID_DEFAULT;
  54. }
  55. return state;
  56. }
  57. char_t c_now = get_at(bp, pt);
  58. char_t c_prev = get_at(bp, pt-1);
  59. char_t next = c_now;
  60. int valid = TRUE, k = 0;
  61. state = next_state;
  62. if (state == ID_DEFAULT &&
  63. bp->b_mode != NULL &&
  64. bp->b_mode->mlc != NULL) {
  65. next = c_now;
  66. for(int i = 0; bp->b_mode->mlc[i] != '\0'; i++) {
  67. next = get_at(bp, pt + i);
  68. if(next != bp->b_mode->mlc[i]) {
  69. valid = FALSE;
  70. break;
  71. }
  72. }
  73. if(valid) {
  74. skip_count = 1;
  75. return (next_state = state = ID_BLOCK_COMMENT);
  76. }
  77. valid = TRUE;
  78. }
  79. if (state == ID_BLOCK_COMMENT &&
  80. bp->b_mode != NULL &&
  81. bp->b_mode->emlc != NULL) {
  82. next = c_now;
  83. for(int i = 0; bp->b_mode->emlc[i] != '\0'; i++) {
  84. next = get_at(bp, pt + i);
  85. if(next != bp->b_mode->emlc[i]) {
  86. valid = FALSE;
  87. break;
  88. }
  89. }
  90. if(valid) {
  91. skip_count = strlen(bp->b_mode->emlc) - 1;
  92. next_state = ID_DEFAULT;
  93. return ID_BLOCK_COMMENT;
  94. }
  95. valid = TRUE;
  96. }
  97. if (state == ID_DEFAULT &&
  98. bp->b_mode != NULL &&
  99. bp->b_mode->slc != NULL &&
  100. bp->b_mode->slc[0] != '\0') {
  101. next = c_now;
  102. for(int i = 0; bp->b_mode->slc[i] != '\0'; i++) {
  103. next = get_at(bp, pt + i);
  104. if(next != bp->b_mode->slc[i]) {
  105. valid = FALSE;
  106. break;
  107. }
  108. }
  109. if(valid) {
  110. skip_count = 1;
  111. return (next_state = state = ID_LINE_COMMENT);
  112. }
  113. valid = TRUE;
  114. }
  115. if (state == ID_LINE_COMMENT && c_now == '\n')
  116. return (next_state = ID_DEFAULT);
  117. if (state == ID_DEFAULT && c_now == '"') {
  118. int enable = FALSE;
  119. char_t z = get_at(bp, pt+1);
  120. point_t end = pos(bp, bp->b_ebuf);
  121. for(point_t i = pt+1; z != '\n' && i <= end; i++, z = get_at(bp, i)) {
  122. if(z == '"') {
  123. enable = TRUE;
  124. break;
  125. }
  126. if((bp->b_mode != NULL && !bp->b_mode->bmls) || (z == '\\' && get_at(bp, i+1) == '\n')) {
  127. enable = TRUE;
  128. break;
  129. }
  130. }
  131. if(enable)
  132. return (next_state = ID_DOUBLE_STRING);
  133. }
  134. if (state == ID_DEFAULT &&
  135. bp->b_mode != NULL &&
  136. bp->b_mode->bqas &&
  137. c_now == '`')
  138. return (next_state = ID_BACK_STRING);
  139. if (state == ID_DEFAULT &&
  140. bp->b_mode != NULL &&
  141. bp->b_mode->sqas &&
  142. c_now == '\'') {
  143. int enable = FALSE;
  144. char_t z = get_at(bp, pt+1);
  145. point_t end = pos(bp, bp->b_ebuf);
  146. for(point_t i = pt+1; z != '\n' && i <= end; i++, z = get_at(bp, i)) {
  147. if(z == '\'') {
  148. enable = TRUE;
  149. break;
  150. }
  151. }
  152. if(enable)
  153. return (next_state = ID_SINGLE_STRING);
  154. }
  155. if (state == ID_DOUBLE_STRING && c_now == '\\') {
  156. skip_count = 1;
  157. return (next_state = ID_DOUBLE_STRING);
  158. }
  159. if (state == ID_DOUBLE_STRING && c_now == '"') {
  160. next_state = ID_DEFAULT;
  161. return ID_DOUBLE_STRING;
  162. }
  163. if (state == ID_SINGLE_STRING && c_now == '\\') {
  164. skip_count = 1;
  165. return (next_state = ID_SINGLE_STRING);
  166. }
  167. if (state == ID_DEFAULT &&
  168. bp->b_mode != NULL &&
  169. bp->b_mode->bqas &&
  170. c_now == '`')
  171. return (next_state = ID_BACK_STRING);
  172. if (state == ID_BACK_STRING && c_now == '\\') {
  173. skip_count = 1;
  174. return (next_state = ID_BACK_STRING);
  175. }
  176. if (state == ID_SINGLE_STRING && c_now == '\'') {
  177. next_state = ID_DEFAULT;
  178. return ID_SINGLE_STRING;
  179. }
  180. if (state == ID_BACK_STRING && c_now == '`') {
  181. next_state = ID_DEFAULT;
  182. return ID_BACK_STRING;
  183. }
  184. point_t ep = pos(bp, bp->b_ebuf);
  185. int sub = 1;
  186. if(bp->b_mode != NULL && !quick &&
  187. bp->b_mode->keywords != NULL &&
  188. state == ID_DEFAULT) {
  189. for(int i = 0; bp->b_mode->keywords[i].word != NULL; i++) {
  190. int l = 0, t = 0;
  191. k = 0;
  192. sub = 1;
  193. exclude_count = 0;
  194. exclude_state = ID_DEFAULT;
  195. if(bp->b_mode->keywords[i].word[l] != '' && (pt == 0 ||
  196. (is_symbol(c_prev) &&
  197. (c_prev != '-' && c_prev != '_'))
  198. || isspace(c_prev))) {
  199. // do nothing
  200. } else if(bp->b_mode->keywords[i].word[l] == '') {
  201. l++;
  202. } else {
  203. return (state = ID_DEFAULT);
  204. }
  205. if(bp->b_mode->keywords[i].word[l] == '') {
  206. if(c_prev != '\n' && pt != 0)
  207. return (state = ID_DEFAULT);
  208. l++;
  209. }
  210. for(k = 0; bp->b_mode->keywords[i].word[l] != '\0'; k++, l++) {
  211. c_now = get_at(bp, pt+k);
  212. /* at the end */
  213. if(bp->b_mode->keywords[i].word[l] == '') {
  214. l++;
  215. if(bp->b_mode->keywords[i].word[l] == '\0') {
  216. for(; c_now != '\n' && pt+k != ep; k++) {
  217. c_now = get_at(bp, pt+k);
  218. }
  219. k--;
  220. break;
  221. } else if(bp->b_mode->keywords[i].word[l] > 32) {
  222. for(; c_now != bp->b_mode->keywords[i].word[l] && pt+k != ep; k++) {
  223. c_now = get_at(bp, pt+k);
  224. }
  225. k--;
  226. } else if(bp->b_mode->keywords[i].word[l] == '') {
  227. for(; c_now != '\n' && pt+k != ep ; k++) {
  228. c_now = get_at(bp, pt+k);
  229. if(bp->b_mode->keywords[i].word[l] == '' &&
  230. bp->b_mode->keywords[i].word[l+1] == c_now) {
  231. t = 2;
  232. break;
  233. }
  234. }
  235. if(t == 0) {
  236. k = 0;
  237. break;
  238. }
  239. if(t == 2) {
  240. l++;
  241. k--;
  242. sub++;
  243. continue;
  244. }
  245. }
  246. }
  247. if(bp->b_mode->keywords[i].word[l] == '' ||
  248. bp->b_mode->keywords[i].word[l] == '') {
  249. int all = bp->b_mode->keywords[i].word[l] == '';
  250. if(bp->b_mode->keywords[i].word[l+1] == '\0') {
  251. for(; !isspace(c_now) &&
  252. (all ? TRUE : !is_symbolis(
  253. c_now,
  254. bp->b_mode->saiv
  255. ));
  256. k++) {
  257. c_now = get_at(bp, pt+k);
  258. }
  259. k--;
  260. break;
  261. } else {
  262. l++;
  263. if(all) {
  264. for(; !isspace(c_now); k++) {
  265. if(bp->b_mode->keywords[i].word[l] == c_now) {
  266. t = 1;
  267. break;
  268. }
  269. if(bp->b_mode->keywords[i].word[l] == '' &&
  270. bp->b_mode->keywords[i].word[l+1] == c_now) {
  271. t = 2;
  272. break;
  273. }
  274. if(bp->b_mode->keywords[i].word[l] == '' &&
  275. isspace(get_at(bp, pt+k))) {
  276. t = 3;
  277. break;
  278. }
  279. if(pt+k == ep)
  280. break;
  281. c_now = get_at(bp, pt+k);
  282. }
  283. } else {
  284. for(; !isspace(c_now) &&
  285. (bp->b_mode->keywords[i].word[l] < 32 ||
  286. !is_symboli(
  287. c_now,
  288. bp->b_mode->keywords[i].word[l]
  289. ));
  290. k++) {
  291. if(bp->b_mode->keywords[i].word[l] == c_now) {
  292. t = 1;
  293. break;
  294. }
  295. if(bp->b_mode->keywords[i].word[l] == '' &&
  296. bp->b_mode->keywords[i].word[l+1] == c_now) {
  297. t = 2;
  298. break;
  299. }
  300. if(bp->b_mode->keywords[i].word[l] == '' &&
  301. isspace(get_at(bp, pt+k))) {
  302. t = 3;
  303. break;
  304. }
  305. if(pt+k == ep)
  306. break;
  307. c_now = get_at(bp, pt+k);
  308. }
  309. }
  310. if(t == 0) {
  311. k = 0;
  312. break;
  313. }
  314. if(t == 1) {
  315. k--;
  316. continue;
  317. }
  318. if(t == 2) {
  319. l++;
  320. k--;
  321. sub++;
  322. continue;
  323. }
  324. if(t == 3) {
  325. // do nothing
  326. }
  327. }
  328. }
  329. if(bp->b_mode->keywords[i].word[l] == '') {
  330. l++;
  331. c_now = get_at(bp, pt+k);
  332. for(; isspace(c_now) && pt+k != ep; k++) {
  333. c_now = get_at(bp, pt+k);
  334. }
  335. k--;
  336. c_now = get_at(bp, pt+k);
  337. }
  338. if(bp->b_mode->keywords[i].word[l] == '') {
  339. if(bp->b_mode->keywords[i].word[l+1] == c_now) {
  340. sub++;
  341. l++;
  342. continue;
  343. } else {
  344. k = 0;
  345. exclude_count = 0;
  346. break;
  347. }
  348. }
  349. if(bp->b_mode->keywords[i].word[l] == '') {
  350. if(bp->b_mode->keywords[i].word[l+1] == c_now) {
  351. exclude_state = bp->b_mode->keywords[i].color;
  352. exclude_count++;
  353. k--;
  354. continue;
  355. } else {
  356. k = 0;
  357. exclude_count = 0;
  358. break;
  359. }
  360. }
  361. if(bp->b_mode->keywords[i].word[l] != c_now) {
  362. k = 0;
  363. break;
  364. }
  365. }
  366. c_now = get_at(bp, pt+k);
  367. if(k > 0 && (isspace(c_now) ||
  368. (is_symbol(c_now) &&
  369. (c_now != '-' && (
  370. bp->b_mode->keywords[i].word[l] == '_' || c_now != '_')))) &&
  371. (bp->b_mode->keywords[i].word[l] == '\0' ||
  372. bp->b_mode->keywords[i].word[l+1] == '\0')) {
  373. skip_count = k-sub;
  374. next_state = ID_DEFAULT;
  375. if(exclude_state != ID_DEFAULT) {
  376. return (state = ID_DEFAULT);
  377. }
  378. return (state = bp->b_mode->keywords[i].color);
  379. }
  380. }
  381. }
  382. if (state != ID_DEFAULT)
  383. return (next_state = state);
  384. // if (state == ID_DEFAULT && c_now >= '0' && c_now <= '9') {
  385. // next_state = ID_DEFAULT;
  386. // return (state = ID_DIGITS);
  387. // }
  388. // if (state == ID_DEFAULT && 1 == is_symbol(c_now)) {
  389. // next_state = ID_DEFAULT;
  390. // return (state = ID_SYMBOL);
  391. // }
  392. return (next_state = state);
  393. }