postproc.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <stdbool.h>
  5. #include <regex.h>
  6. #include "main.h"
  7. #include "latex.h"
  8. #include "postproc.h"
  9. #include "str_split.h"
  10. const char* emdash_re = "[[:alpha:][:punct:]]+[[:space:]]*(--)[[:space:]]*[[:punct:][:alpha:]]+";
  11. const char* ascii_quote_re = "(\")";
  12. const char* latex_special_chars = "&#_%~{}^$";
  13. const char* posix_ext_special_chars = ".^$*+?()[{\\|";
  14. bool contains_emdash(const char* string, const size_t length) {
  15. char* tmp = (char*)malloc(length + 1);
  16. if(!tmp) {
  17. fprintf(stderr, "Failed to allocate memory when scanning for emdashes.\n");
  18. return false;
  19. }
  20. strncpy(tmp, string, length);
  21. regex_t emdash;
  22. if(regcomp(&emdash, emdash_re, REG_EXTENDED) != 0) {
  23. fprintf(stderr, "Failed to compile regex '%s' for detecting em-dashes.\n", emdash_re);
  24. return false;
  25. }
  26. bool match_found = false;
  27. if(regexec(&emdash, tmp, 0, NULL, 0) == 0) {
  28. match_found = true;
  29. }
  30. regfree(&emdash);
  31. free(tmp);
  32. return match_found;
  33. }
  34. void emdash_positions(int** ed_starts, int* neds, const char* string, const size_t length) {
  35. regex_t* emdash = (regex_t*)malloc(sizeof(regex_t));
  36. if(!emdash) {
  37. fprintf(stderr, "Failed to allocate memory for emdash regex.\n");
  38. return;
  39. }
  40. if(regcomp(emdash, emdash_re, REG_EXTENDED) != 0) {
  41. fprintf(stderr, "Failed to compile regex '%s' for detecting em-dashes.\n", emdash_re);
  42. return;
  43. }
  44. size_t ngroups = 2;
  45. regmatch_t groups[ngroups];
  46. char* tmp = (char*)malloc(length + 1);
  47. if(!tmp) {
  48. fprintf(stderr, "Failed to allocate memory when enumerating emdash positions.\n");
  49. return;
  50. }
  51. strcpy(tmp, string);
  52. char* cursor = tmp;
  53. *neds = 0;
  54. while(regexec(emdash, cursor, ngroups, groups, 0) == 0) {
  55. ++(*neds);
  56. cursor += groups[1].rm_eo;
  57. }
  58. cursor = tmp;
  59. int cursor_pos = 0;
  60. ed_starts = (int**)realloc(ed_starts, *neds * sizeof(int*));
  61. if(!ed_starts) {
  62. fprintf(stderr, "Failed to allocate memory when enumerating emdash positions.\n");
  63. return;
  64. }
  65. int idx = 0;
  66. while(regexec(emdash, cursor, ngroups, groups, 0) == 0) {
  67. ed_starts[idx] = (int*)malloc(sizeof(int));
  68. if(!ed_starts) {
  69. fprintf(stderr, "Failed to allocate memory when enumerating emdash positions.\n");
  70. return;
  71. }
  72. *ed_starts[idx] = cursor_pos + groups[1].rm_so;
  73. cursor += groups[1].rm_eo;
  74. cursor_pos += groups[1].rm_eo;
  75. ++idx;
  76. }
  77. regfree(emdash);
  78. free(tmp);
  79. }
  80. void character_locations(
  81. int** locations,
  82. int* nmatches,
  83. const char ch,
  84. const char* string,
  85. const size_t length) {
  86. const char* cursor;
  87. *nmatches = 0;
  88. for(cursor = string; cursor && *cursor != '\0'; ++cursor) {
  89. if(*cursor == ch) {
  90. ++(*nmatches);
  91. }
  92. }
  93. if(*nmatches == 0) {
  94. return;
  95. }
  96. locations = (int**)realloc(locations, *nmatches * sizeof(int*));
  97. if(!locations) {
  98. fprintf(stderr, "Failed to allocate memory for %d locations of '%c' when searching '%s'.\n", *nmatches, ch, string);
  99. if(locations) {
  100. free(locations);
  101. }
  102. locations = NULL;
  103. return;
  104. }
  105. int match_number = 0;
  106. for(cursor = string; cursor && *cursor != '\0'; ++cursor) {
  107. if(*cursor == ch) {
  108. locations[match_number] = (int*)malloc(sizeof(int));
  109. *locations[match_number] = cursor - string;
  110. match_number++;
  111. }
  112. }
  113. }
  114. bool contains_character(const char ch, const char* string, const size_t length) {
  115. const char* cursor;
  116. for(cursor = string; cursor && *cursor != '\0'; ++cursor) {
  117. if(*cursor == ch) {
  118. return true;
  119. }
  120. }
  121. return false;
  122. }
  123. char* escape_latex_specials(const char *src) {
  124. char* result = (char*)malloc(strlen(src) + 1);
  125. strcpy(result, src);
  126. const latex_character_escape* e;
  127. char* tmp;
  128. int nesc = sizeof(escapes)/sizeof(escapes[0]);
  129. for(e = escapes; e < escapes + nesc; ++e) {
  130. tmp = (char*)malloc(strlen(result)+1);
  131. strcpy(tmp, result);
  132. if(result) {
  133. free(result);
  134. }
  135. result = replace(tmp, e->special, e->repl);
  136. if(tmp) {
  137. free(tmp);
  138. }
  139. }
  140. tmp = NULL;
  141. return result;
  142. }
  143. void postprocess_line(hoedown_buffer* buf, const char* line, const size_t length) {
  144. if(!line || strlen(line) < 1 || length < 1) {
  145. return;
  146. }
  147. char* emdash_result = (char*)malloc(1);
  148. if(!emdash_result) {
  149. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  150. return;
  151. }
  152. *emdash_result = '\0';
  153. char* ss = (char*)malloc(1);
  154. if(!ss) {
  155. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  156. return;
  157. }
  158. *ss = '\0';
  159. if(contains_emdash(line, length)) {
  160. int** emdash_starts = (int**)malloc(sizeof(int*));
  161. if(!emdash_starts) {
  162. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  163. return;
  164. }
  165. int* num_emdashes = (int*)malloc(sizeof(int));
  166. if(!num_emdashes) {
  167. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  168. return;
  169. }
  170. emdash_positions(emdash_starts, num_emdashes, line, length);
  171. /* Put three dashes between substrings delimited by emdashes. */
  172. int idx;
  173. int start;
  174. int end;
  175. for(idx = 0; idx < *num_emdashes; ++idx) {
  176. start = idx == 0 ? 0 : *emdash_starts[idx-1] + 2;
  177. end = *emdash_starts[idx] - 1;
  178. ss = (char*)realloc(ss, end - start + 2);
  179. if(!ss) {
  180. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  181. return;
  182. }
  183. substring(ss, line, start, end);
  184. emdash_result = (char*)realloc(emdash_result, strlen(emdash_result) + strlen(ss) + sizeof("---"));
  185. if(!emdash_result) {
  186. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  187. return;
  188. }
  189. strcat(emdash_result, ss);
  190. strcat(emdash_result, "---");
  191. }
  192. /* Make the last substring separate so it doesn't get three dashes after it. */
  193. start = *emdash_starts[*num_emdashes-1] + 2;
  194. end = length - 1;
  195. ss = (char*)realloc(ss, end - start + 2);
  196. if(!ss) {
  197. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  198. return;
  199. }
  200. substring(ss, line, start, end);
  201. emdash_result = (char*)realloc(emdash_result, strlen(emdash_result) + strlen(ss));
  202. if(!emdash_result) {
  203. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  204. return;
  205. }
  206. strcat(emdash_result, ss);
  207. for(idx = 0; idx < *num_emdashes; ++idx) {
  208. free(emdash_starts[idx]);
  209. emdash_starts[idx] = NULL;
  210. }
  211. free(emdash_starts);
  212. free(num_emdashes);
  213. emdash_starts = NULL;
  214. num_emdashes = NULL;
  215. } else {
  216. emdash_result = (char*)realloc(emdash_result, length + 1);
  217. if(!emdash_result) {
  218. fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
  219. return;
  220. }
  221. strcpy(emdash_result, line);
  222. }
  223. char* quotes_result = replace(emdash_result, '\"', "``");
  224. char* result = escape_latex_specials(quotes_result);
  225. hoedown_buffer_put(buf, result, strlen(result));
  226. if(result) {
  227. free(result);
  228. }
  229. result = NULL;
  230. if(quotes_result) {
  231. free(quotes_result);
  232. }
  233. quotes_result = NULL;
  234. if(emdash_result) {
  235. free(emdash_result);
  236. }
  237. emdash_result = NULL;
  238. if(ss) {
  239. free(ss);
  240. }
  241. ss = NULL;
  242. }