123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <stdbool.h>
- #include <regex.h>
- #include "main.h"
- #include "latex.h"
- #include "postproc.h"
- #include "str_split.h"
- const char* emdash_re = "[[:alpha:][:punct:]]+[[:space:]]*(--)[[:space:]]*[[:punct:][:alpha:]]+";
- const char* ascii_quote_re = "(\")";
- const char* latex_special_chars = "&#_%~{}^$";
- const char* posix_ext_special_chars = ".^$*+?()[{\\|";
- bool contains_emdash(const char* string, const size_t length) {
- char* tmp = (char*)malloc(length + 1);
- if(!tmp) {
- fprintf(stderr, "Failed to allocate memory when scanning for emdashes.\n");
- return false;
- }
- strncpy(tmp, string, length);
- regex_t emdash;
- if(regcomp(&emdash, emdash_re, REG_EXTENDED) != 0) {
- fprintf(stderr, "Failed to compile regex '%s' for detecting em-dashes.\n", emdash_re);
- return false;
- }
- bool match_found = false;
- if(regexec(&emdash, tmp, 0, NULL, 0) == 0) {
- match_found = true;
- }
- regfree(&emdash);
- free(tmp);
- return match_found;
- }
- void emdash_positions(int** ed_starts, int* neds, const char* string, const size_t length) {
- regex_t* emdash = (regex_t*)malloc(sizeof(regex_t));
- if(!emdash) {
- fprintf(stderr, "Failed to allocate memory for emdash regex.\n");
- return;
- }
- if(regcomp(emdash, emdash_re, REG_EXTENDED) != 0) {
- fprintf(stderr, "Failed to compile regex '%s' for detecting em-dashes.\n", emdash_re);
- return;
- }
- size_t ngroups = 2;
- regmatch_t groups[ngroups];
- char* tmp = (char*)malloc(length + 1);
- if(!tmp) {
- fprintf(stderr, "Failed to allocate memory when enumerating emdash positions.\n");
- return;
- }
- strcpy(tmp, string);
- char* cursor = tmp;
- *neds = 0;
- while(regexec(emdash, cursor, ngroups, groups, 0) == 0) {
- ++(*neds);
- cursor += groups[1].rm_eo;
- }
- cursor = tmp;
- int cursor_pos = 0;
- ed_starts = (int**)realloc(ed_starts, *neds * sizeof(int*));
- if(!ed_starts) {
- fprintf(stderr, "Failed to allocate memory when enumerating emdash positions.\n");
- return;
- }
- int idx = 0;
- while(regexec(emdash, cursor, ngroups, groups, 0) == 0) {
- ed_starts[idx] = (int*)malloc(sizeof(int));
- if(!ed_starts) {
- fprintf(stderr, "Failed to allocate memory when enumerating emdash positions.\n");
- return;
- }
- *ed_starts[idx] = cursor_pos + groups[1].rm_so;
- cursor += groups[1].rm_eo;
- cursor_pos += groups[1].rm_eo;
- ++idx;
- }
- regfree(emdash);
- free(tmp);
- }
- void character_locations(
- int** locations,
- int* nmatches,
- const char ch,
- const char* string,
- const size_t length) {
- const char* cursor;
- *nmatches = 0;
- for(cursor = string; cursor && *cursor != '\0'; ++cursor) {
- if(*cursor == ch) {
- ++(*nmatches);
- }
- }
- if(*nmatches == 0) {
- return;
- }
- locations = (int**)realloc(locations, *nmatches * sizeof(int*));
- if(!locations) {
- fprintf(stderr, "Failed to allocate memory for %d locations of '%c' when searching '%s'.\n", *nmatches, ch, string);
- if(locations) {
- free(locations);
- }
- locations = NULL;
- return;
- }
- int match_number = 0;
- for(cursor = string; cursor && *cursor != '\0'; ++cursor) {
- if(*cursor == ch) {
- locations[match_number] = (int*)malloc(sizeof(int));
- *locations[match_number] = cursor - string;
- match_number++;
- }
- }
- }
- bool contains_character(const char ch, const char* string, const size_t length) {
- const char* cursor;
- for(cursor = string; cursor && *cursor != '\0'; ++cursor) {
- if(*cursor == ch) {
- return true;
- }
- }
- return false;
- }
- char* escape_latex_specials(const char *src) {
- char* result = (char*)malloc(strlen(src) + 1);
- strcpy(result, src);
- const latex_character_escape* e;
- char* tmp;
- int nesc = sizeof(escapes)/sizeof(escapes[0]);
- for(e = escapes; e < escapes + nesc; ++e) {
- tmp = (char*)malloc(strlen(result)+1);
- strcpy(tmp, result);
- if(result) {
- free(result);
- }
- result = replace(tmp, e->special, e->repl);
- if(tmp) {
- free(tmp);
- }
- }
- tmp = NULL;
- return result;
- }
- void postprocess_line(hoedown_buffer* buf, const char* line, const size_t length) {
- if(!line || strlen(line) < 1 || length < 1) {
- return;
- }
- char* emdash_result = (char*)malloc(1);
- if(!emdash_result) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- *emdash_result = '\0';
- char* ss = (char*)malloc(1);
- if(!ss) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- *ss = '\0';
- if(contains_emdash(line, length)) {
- int** emdash_starts = (int**)malloc(sizeof(int*));
- if(!emdash_starts) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- int* num_emdashes = (int*)malloc(sizeof(int));
- if(!num_emdashes) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- emdash_positions(emdash_starts, num_emdashes, line, length);
- /* Put three dashes between substrings delimited by emdashes. */
- int idx;
- int start;
- int end;
- for(idx = 0; idx < *num_emdashes; ++idx) {
- start = idx == 0 ? 0 : *emdash_starts[idx-1] + 2;
- end = *emdash_starts[idx] - 1;
- ss = (char*)realloc(ss, end - start + 2);
- if(!ss) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- substring(ss, line, start, end);
- emdash_result = (char*)realloc(emdash_result, strlen(emdash_result) + strlen(ss) + sizeof("---"));
- if(!emdash_result) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- strcat(emdash_result, ss);
- strcat(emdash_result, "---");
- }
- /* Make the last substring separate so it doesn't get three dashes after it. */
- start = *emdash_starts[*num_emdashes-1] + 2;
- end = length - 1;
- ss = (char*)realloc(ss, end - start + 2);
- if(!ss) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- substring(ss, line, start, end);
- emdash_result = (char*)realloc(emdash_result, strlen(emdash_result) + strlen(ss));
- if(!emdash_result) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- strcat(emdash_result, ss);
- for(idx = 0; idx < *num_emdashes; ++idx) {
- free(emdash_starts[idx]);
- emdash_starts[idx] = NULL;
- }
- free(emdash_starts);
- free(num_emdashes);
- emdash_starts = NULL;
- num_emdashes = NULL;
- } else {
- emdash_result = (char*)realloc(emdash_result, length + 1);
- if(!emdash_result) {
- fprintf(stderr, "Failed to allocate memory when postprocessing line.\n");
- return;
- }
- strcpy(emdash_result, line);
- }
- char* quotes_result = replace(emdash_result, '\"', "``");
- char* result = escape_latex_specials(quotes_result);
- hoedown_buffer_put(buf, result, strlen(result));
- if(result) {
- free(result);
- }
- result = NULL;
- if(quotes_result) {
- free(quotes_result);
- }
- quotes_result = NULL;
- if(emdash_result) {
- free(emdash_result);
- }
- emdash_result = NULL;
- if(ss) {
- free(ss);
- }
- ss = NULL;
- }
|