mkdio.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. /*
  2. * mkdio -- markdown front end input functions
  3. *
  4. * Copyright (C) 2007 David L Parsons.
  5. * The redistribution terms are provided in the COPYRIGHT file that must
  6. * be distributed with this source code.
  7. */
  8. #include "config.h"
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <ctype.h>
  12. #include "cstring.h"
  13. #include "markdown.h"
  14. #include "amalloc.h"
  15. typedef ANCHOR(Line) LineAnchor;
  16. /* create a new blank Document
  17. */
  18. Document*
  19. __mkd_new_Document()
  20. {
  21. Document *ret = calloc(sizeof(Document), 1);
  22. if ( ret ) {
  23. if ( ret->ctx = calloc(sizeof(MMIOT), 1) ) {
  24. ret->magic = VALID_DOCUMENT;
  25. return ret;
  26. }
  27. free(ret);
  28. }
  29. return 0;
  30. }
  31. /* add a line to the markdown input chain, expanding tabs and
  32. * noting the presence of special characters as we go.
  33. */
  34. void
  35. __mkd_enqueue(Document* a, Cstring *line)
  36. {
  37. Line *p = calloc(sizeof *p, 1);
  38. unsigned char c;
  39. int xp = 0;
  40. int size = S(*line);
  41. unsigned char *str = (unsigned char*)T(*line);
  42. CREATE(p->text);
  43. ATTACH(a->content, p);
  44. while ( size-- ) {
  45. if ( (c = *str++) == '\t' ) {
  46. /* expand tabs into ->tabstop spaces. We use ->tabstop
  47. * because the ENTIRE FREAKING COMPUTER WORLD uses editors
  48. * that don't do ^T/^D, but instead use tabs for indentation,
  49. * and, of course, set their tabs down to 4 spaces
  50. */
  51. do {
  52. EXPAND(p->text) = ' ';
  53. } while ( ++xp % a->tabstop );
  54. }
  55. else if ( c >= ' ' ) {
  56. if ( c == '|' )
  57. p->line_flags |= PIPECHAR;
  58. EXPAND(p->text) = c;
  59. ++xp;
  60. }
  61. }
  62. EXPAND(p->text) = 0;
  63. S(p->text)--;
  64. p->dle = mkd_firstnonblank(p);
  65. }
  66. /* trim leading characters from a line, then adjust the dle.
  67. */
  68. void
  69. __mkd_trim_line(Line *p, int clip)
  70. {
  71. if ( clip >= S(p->text) ) {
  72. S(p->text) = p->dle = 0;
  73. T(p->text)[0] = 0;
  74. }
  75. else if ( clip > 0 ) {
  76. CLIP(p->text, 0, clip);
  77. p->dle = mkd_firstnonblank(p);
  78. }
  79. }
  80. /* build a Document from any old input.
  81. */
  82. typedef int (*getc_func)(void*);
  83. Document *
  84. populate(getc_func getc, void* ctx, mkd_flag_t *flags)
  85. {
  86. Cstring line;
  87. Document *a = __mkd_new_Document();
  88. int c;
  89. int pandoc = (flags && !is_flag_set(flags, MKD_NOHEADER)) ? 0 : EOF;
  90. if ( !a ) return 0;
  91. a->tabstop = (flags && is_flag_set(flags, MKD_TABSTOP)) ? 4 : TABSTOP;
  92. CREATE(line);
  93. while ( (c = (*getc)(ctx)) != EOF ) {
  94. if ( c == '\n' ) {
  95. if ( pandoc != EOF && pandoc < 3 ) {
  96. if ( S(line) && (T(line)[0] == '%') )
  97. pandoc++;
  98. else
  99. pandoc = EOF;
  100. }
  101. __mkd_enqueue(a, &line);
  102. S(line) = 0;
  103. }
  104. else if ( isprint(c) || isspace(c) || (c & 0x80) )
  105. EXPAND(line) = c;
  106. }
  107. if ( S(line) )
  108. __mkd_enqueue(a, &line);
  109. DELETE(line);
  110. if ( pandoc == 3 ) {
  111. /* the first three lines started with %, so we have a header.
  112. * clip the first three lines out of content and hang them
  113. * off header.
  114. */
  115. Line *headers = T(a->content);
  116. a->title = headers; __mkd_trim_line(a->title, 1);
  117. a->author= headers->next; __mkd_trim_line(a->author, 1);
  118. a->date = headers->next->next; __mkd_trim_line(a->date, 1);
  119. T(a->content) = headers->next->next->next;
  120. }
  121. return a;
  122. }
  123. /* convert a file into a linked list
  124. */
  125. Document *
  126. mkd_in(FILE *f, mkd_flag_t *flags)
  127. {
  128. return populate((getc_func)fgetc, f, flags);
  129. }
  130. /* return a single character out of a buffer
  131. */
  132. int
  133. __mkd_io_strget(struct string_stream *in)
  134. {
  135. if ( !in->size ) return EOF;
  136. --(in->size);
  137. return *(in->data)++;
  138. }
  139. /* convert a block of text into a linked list
  140. */
  141. Document *
  142. mkd_string(const char *buf, int len, mkd_flag_t* flags)
  143. {
  144. struct string_stream about;
  145. about.data = buf;
  146. about.size = len;
  147. return populate((getc_func)__mkd_io_strget, &about, flags);
  148. }
  149. /* write the html to a file (xmlified if necessary)
  150. */
  151. int
  152. mkd_generatehtml(Document *p, FILE *output)
  153. {
  154. char *doc;
  155. int szdoc;
  156. DO_OR_DIE( szdoc = mkd_document(p,&doc) );
  157. if ( is_flag_set( &(p->ctx->flags), MKD_CDATA ) )
  158. DO_OR_DIE( mkd_generatexml(doc, szdoc, output) );
  159. else if ( fwrite(doc, szdoc, 1, output) != 1 )
  160. return EOF;
  161. DO_OR_DIE( putc('\n', output) );
  162. return 0;
  163. }
  164. /* convert some markdown text to html
  165. */
  166. int
  167. markdown(Document *document, FILE *out, mkd_flag_t* flags)
  168. {
  169. if ( mkd_compile(document, flags) ) {
  170. mkd_generatehtml(document, out);
  171. mkd_cleanup(document);
  172. return 0;
  173. }
  174. return -1;
  175. }
  176. /* anchor_format a string, returning the formatted string in malloc()ed space
  177. * MKD_URLENCODEDANCHOR is now perverted to being a html5 anchor
  178. *
  179. * !labelformat: print all characters
  180. * labelformat && h4anchor: prefix nonalpha label with L,
  181. * expand all nonalnum, _, ':', '.' to hex
  182. * except space which maps to -
  183. * labelformat && !h4anchor:expand space to -, other isspace() & '%' to hex
  184. */
  185. static char *
  186. mkd_anchor_format(char *s, int len, int labelformat, mkd_flag_t *flags)
  187. {
  188. char *res;
  189. unsigned char c;
  190. int i, needed, out = 0;
  191. int h4anchor = !is_flag_set(flags, MKD_URLENCODEDANCHOR);
  192. static const unsigned char hexchars[] = "0123456789abcdef";
  193. needed = (labelformat ? (4*len) : len) + 2; /* +2 for L & \0 */
  194. if ( (res = malloc(needed)) == NULL )
  195. return NULL;
  196. if ( h4anchor && labelformat && !isalpha(s[0]) )
  197. res[out++] = 'L';
  198. for ( i=0; i < len ; i++ ) {
  199. c = s[i];
  200. if ( labelformat ) {
  201. if ( h4anchor
  202. ? (isalnum(c) || (c == '_') || (c == ':') || (c == '.' ) )
  203. : !(isspace(c) || c == '%') )
  204. res[out++] = c;
  205. else if ( c == ' ' )
  206. res[out++] = '-';
  207. else {
  208. res[out++] = h4anchor ? '-' : '%';
  209. res[out++] = hexchars[c >> 4 & 0xf];
  210. res[out++] = hexchars[c & 0xf];
  211. if ( h4anchor )
  212. res[out++] = '-';
  213. }
  214. }
  215. else
  216. res[out++] = c;
  217. }
  218. res[out++] = 0;
  219. return res;
  220. } /* mkd_anchor_format */
  221. /* write out a Cstring, mangled into a form suitable for `<a href=` or `<a id=`
  222. */
  223. void
  224. mkd_string_to_anchor(char *s, int len, mkd_sta_function_t outchar,
  225. void *out, int labelformat,
  226. MMIOT *f)
  227. {
  228. char *res;
  229. char *line;
  230. int size;
  231. mkd_flag_t flags;
  232. int i;
  233. mkd_init_flags(&flags);
  234. set_mkd_flag(&flags,IS_LABEL);
  235. size = mkd_line(s, len, &line, &flags);
  236. if ( !line )
  237. return;
  238. if ( f->cb->e_anchor )
  239. res = (*(f->cb->e_anchor))(line, size, f->cb->e_data);
  240. else
  241. res = mkd_anchor_format(line, size, labelformat, &(f->flags));
  242. free(line);
  243. if ( !res )
  244. return;
  245. for ( i=0; res[i]; i++ )
  246. (*outchar)(res[i], out);
  247. if ( f->cb->e_anchor ) {
  248. if ( f->cb->e_free )
  249. (*(f->cb->e_free))(res, f->cb->e_data);
  250. }
  251. else
  252. free(res);
  253. }
  254. /* ___mkd_reparse() a line
  255. */
  256. static void
  257. mkd_parse_line(char *bfr, int size, MMIOT *f, mkd_flag_t *flags)
  258. {
  259. ___mkd_initmmiot(f, 0);
  260. if ( flags )
  261. COPY_FLAGS(f->flags, *flags);
  262. else
  263. mkd_init_flags(&f->flags);
  264. ___mkd_reparse(bfr, size, NULL, f, 0);
  265. ___mkd_emblock(f);
  266. }
  267. /* ___mkd_reparse() a line, returning it in malloc()ed memory
  268. */
  269. int
  270. mkd_line(char *bfr, int size, char **res, mkd_flag_t* flags)
  271. {
  272. MMIOT f;
  273. int len;
  274. mkd_parse_line(bfr, size, &f, flags);
  275. if ( len = S(f.out) ) {
  276. EXPAND(f.out) = 0;
  277. /* strdup() doesn't use amalloc(), so in an amalloc()ed
  278. * build this copies the string safely out of our memory
  279. * paranoia arena. In a non-amalloc world, it's a spurious
  280. * memory allocation, but it avoids unintentional hilarity
  281. * with amalloc()
  282. */
  283. *res = strdup(T(f.out));
  284. }
  285. else {
  286. *res = 0;
  287. len = EOF;
  288. }
  289. ___mkd_freemmiot(&f, 0);
  290. return len;
  291. }
  292. /* ___mkd_reparse() a line, writing it to a FILE
  293. */
  294. int
  295. mkd_generateline(char *bfr, int size, FILE *output, mkd_flag_t* flags)
  296. {
  297. MMIOT f;
  298. int status;
  299. mkd_parse_line(bfr, size, &f, flags);
  300. if ( flags && is_flag_set(flags, MKD_CDATA) )
  301. status = mkd_generatexml(T(f.out), S(f.out), output) != EOF;
  302. else
  303. status = fwrite(T(f.out), S(f.out), 1, output) == S(f.out);
  304. ___mkd_freemmiot(&f, 0);
  305. return status ? 0 : EOF;
  306. }
  307. /* set the url display callback
  308. */
  309. void
  310. mkd_e_url(Document *f, mkd_callback_t edit)
  311. {
  312. if ( f ) {
  313. if ( f->cb.e_url != edit )
  314. f->dirty = 1;
  315. f->cb.e_url = edit;
  316. }
  317. }
  318. /* set the url options callback
  319. */
  320. void
  321. mkd_e_flags(Document *f, mkd_callback_t edit)
  322. {
  323. if ( f ) {
  324. if ( f->cb.e_flags != edit )
  325. f->dirty = 1;
  326. f->cb.e_flags = edit;
  327. }
  328. }
  329. /* set the anchor formatter
  330. */
  331. void
  332. mkd_e_anchor(Document *f, mkd_callback_t format)
  333. {
  334. if ( f ) {
  335. if ( f->cb.e_anchor != format )
  336. f->dirty = 1;
  337. f->cb.e_anchor = format;
  338. }
  339. }
  340. /* set the url display/options deallocator
  341. */
  342. void
  343. mkd_e_free(Document *f, mkd_free_t dealloc)
  344. {
  345. if ( f ) {
  346. if ( f->cb.e_free != dealloc )
  347. f->dirty = 1;
  348. f->cb.e_free = dealloc;
  349. }
  350. }
  351. /* set the url display/options context data field
  352. */
  353. void
  354. mkd_e_data(Document *f, void *data)
  355. {
  356. if ( f ) {
  357. if ( f->cb.e_data != data )
  358. f->dirty = 1;
  359. f->cb.e_data = data;
  360. }
  361. }
  362. /* set the code block display callback
  363. */
  364. void
  365. mkd_e_code_format(Document *f, mkd_callback_t codefmt)
  366. {
  367. if ( f && (f->cb.e_codefmt != codefmt) ) {
  368. f->dirty = 1;
  369. f->cb.e_codefmt = codefmt;
  370. }
  371. }
  372. /* set the href prefix for markdown extra style footnotes
  373. */
  374. void
  375. mkd_ref_prefix(Document *f, char *data)
  376. {
  377. if ( f ) {
  378. if ( f->ref_prefix != data )
  379. f->dirty = 1;
  380. f->ref_prefix = data;
  381. }
  382. }
  383. #if 0
  384. static void
  385. sayflags(char *pfx, mkd_flag_t* flags, FILE *output)
  386. {
  387. int i;
  388. fprintf(output, "%.*s/", (int)strlen(pfx), " ");
  389. for (i=0; i<MKD_NR_FLAGS; i++)
  390. fputc( (i==0) || (i % 10) ? ' ' : (i/10)+'0', output);
  391. fputc('\\', output);
  392. fputc('\n', output);
  393. fprintf(output, "%s|", pfx);
  394. for (i=0; i<MKD_NR_FLAGS; i++)
  395. fputc((i%10)+'0', output);
  396. fputc('|', output);
  397. fputc('\n', output);
  398. fprintf(output, "%.*s\\", (int)strlen(pfx), " ");
  399. for (i=0;i<MKD_NR_FLAGS; i++)
  400. fputc(is_flag_set(flags, i)?'X':' ', output);
  401. fputc('/', output);
  402. fputc('\n', output);
  403. }
  404. #else
  405. #define sayflags(pfx,flags,output) 1
  406. #endif
  407. void
  408. ___mkd_or_flags(mkd_flag_t *dst, mkd_flag_t *src)
  409. {
  410. int i;
  411. for (i=0; i<MKD_NR_FLAGS; i++)
  412. if ( is_flag_set(src,i) )
  413. set_mkd_flag(dst, i);
  414. }
  415. int
  416. ___mkd_different(mkd_flag_t *dst, mkd_flag_t *src)
  417. {
  418. int i;
  419. mkd_flag_t zeroes;
  420. if ( dst == 0 || src == 0 ) {
  421. mkd_init_flags(&zeroes);
  422. if ( !dst )
  423. dst = &zeroes;
  424. if ( !src )
  425. src = &zeroes;
  426. }
  427. for (i=0; i < MKD_NR_FLAGS; i++)
  428. if ( is_flag_set(src,i) != is_flag_set(dst,i) )
  429. return 1;
  430. return 0;
  431. }
  432. int
  433. ___mkd_any_flags(mkd_flag_t *dst, mkd_flag_t *src)
  434. {
  435. int i;
  436. int count = 0;
  437. mkd_flag_t zeroes;
  438. if ( dst == 0 || src == 0 ) {
  439. mkd_init_flags(&zeroes);
  440. if ( !dst )
  441. dst = &zeroes;
  442. if ( !src )
  443. src = &zeroes;
  444. }
  445. for (i=0; i < MKD_NR_FLAGS; i++)
  446. if ( is_flag_set(src,i) && is_flag_set(dst,i) )
  447. ++count;
  448. return count;
  449. }