123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- /*
- * mkdio -- markdown front end input functions
- *
- * Copyright (C) 2007 David L Parsons.
- * The redistribution terms are provided in the COPYRIGHT file that must
- * be distributed with this source code.
- */
- #include "config.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h>
- #include "cstring.h"
- #include "markdown.h"
- #include "amalloc.h"
- typedef ANCHOR(Line) LineAnchor;
- /* create a new blank Document
- */
- Document*
- __mkd_new_Document()
- {
- Document *ret = calloc(sizeof(Document), 1);
- if ( ret ) {
- if ( ret->ctx = calloc(sizeof(MMIOT), 1) ) {
- ret->magic = VALID_DOCUMENT;
- return ret;
- }
- free(ret);
- }
- return 0;
- }
- /* add a line to the markdown input chain, expanding tabs and
- * noting the presence of special characters as we go.
- */
- void
- __mkd_enqueue(Document* a, Cstring *line)
- {
- Line *p = calloc(sizeof *p, 1);
- unsigned char c;
- int xp = 0;
- int size = S(*line);
- unsigned char *str = (unsigned char*)T(*line);
- CREATE(p->text);
- ATTACH(a->content, p);
- while ( size-- ) {
- if ( (c = *str++) == '\t' ) {
- /* expand tabs into ->tabstop spaces. We use ->tabstop
- * because the ENTIRE FREAKING COMPUTER WORLD uses editors
- * that don't do ^T/^D, but instead use tabs for indentation,
- * and, of course, set their tabs down to 4 spaces
- */
- do {
- EXPAND(p->text) = ' ';
- } while ( ++xp % a->tabstop );
- }
- else if ( c >= ' ' ) {
- if ( c == '|' )
- p->line_flags |= PIPECHAR;
- EXPAND(p->text) = c;
- ++xp;
- }
- }
- EXPAND(p->text) = 0;
- S(p->text)--;
- p->dle = mkd_firstnonblank(p);
- }
- /* trim leading characters from a line, then adjust the dle.
- */
- void
- __mkd_trim_line(Line *p, int clip)
- {
- if ( clip >= S(p->text) ) {
- S(p->text) = p->dle = 0;
- T(p->text)[0] = 0;
- }
- else if ( clip > 0 ) {
- CLIP(p->text, 0, clip);
- p->dle = mkd_firstnonblank(p);
- }
- }
- /* build a Document from any old input.
- */
- typedef int (*getc_func)(void*);
- Document *
- populate(getc_func getc, void* ctx, mkd_flag_t *flags)
- {
- Cstring line;
- Document *a = __mkd_new_Document();
- int c;
- int pandoc = (flags && !is_flag_set(flags, MKD_NOHEADER)) ? 0 : EOF;
- if ( !a ) return 0;
- a->tabstop = (flags && is_flag_set(flags, MKD_TABSTOP)) ? 4 : TABSTOP;
- CREATE(line);
- while ( (c = (*getc)(ctx)) != EOF ) {
- if ( c == '\n' ) {
- if ( pandoc != EOF && pandoc < 3 ) {
- if ( S(line) && (T(line)[0] == '%') )
- pandoc++;
- else
- pandoc = EOF;
- }
- __mkd_enqueue(a, &line);
- S(line) = 0;
- }
- else if ( isprint(c) || isspace(c) || (c & 0x80) )
- EXPAND(line) = c;
- }
- if ( S(line) )
- __mkd_enqueue(a, &line);
- DELETE(line);
- if ( pandoc == 3 ) {
- /* the first three lines started with %, so we have a header.
- * clip the first three lines out of content and hang them
- * off header.
- */
- Line *headers = T(a->content);
- a->title = headers; __mkd_trim_line(a->title, 1);
- a->author= headers->next; __mkd_trim_line(a->author, 1);
- a->date = headers->next->next; __mkd_trim_line(a->date, 1);
- T(a->content) = headers->next->next->next;
- }
- return a;
- }
- /* convert a file into a linked list
- */
- Document *
- mkd_in(FILE *f, mkd_flag_t *flags)
- {
- return populate((getc_func)fgetc, f, flags);
- }
- /* return a single character out of a buffer
- */
- int
- __mkd_io_strget(struct string_stream *in)
- {
- if ( !in->size ) return EOF;
- --(in->size);
- return *(in->data)++;
- }
- /* convert a block of text into a linked list
- */
- Document *
- mkd_string(const char *buf, int len, mkd_flag_t* flags)
- {
- struct string_stream about;
- about.data = buf;
- about.size = len;
- return populate((getc_func)__mkd_io_strget, &about, flags);
- }
- /* write the html to a file (xmlified if necessary)
- */
- int
- mkd_generatehtml(Document *p, FILE *output)
- {
- char *doc;
- int szdoc;
- DO_OR_DIE( szdoc = mkd_document(p,&doc) );
- if ( is_flag_set( &(p->ctx->flags), MKD_CDATA ) )
- DO_OR_DIE( mkd_generatexml(doc, szdoc, output) );
- else if ( fwrite(doc, szdoc, 1, output) != 1 )
- return EOF;
- DO_OR_DIE( putc('\n', output) );
- return 0;
- }
- /* convert some markdown text to html
- */
- int
- markdown(Document *document, FILE *out, mkd_flag_t* flags)
- {
- if ( mkd_compile(document, flags) ) {
- mkd_generatehtml(document, out);
- mkd_cleanup(document);
- return 0;
- }
- return -1;
- }
- /* anchor_format a string, returning the formatted string in malloc()ed space
- * MKD_URLENCODEDANCHOR is now perverted to being a html5 anchor
- *
- * !labelformat: print all characters
- * labelformat && h4anchor: prefix nonalpha label with L,
- * expand all nonalnum, _, ':', '.' to hex
- * except space which maps to -
- * labelformat && !h4anchor:expand space to -, other isspace() & '%' to hex
- */
- static char *
- mkd_anchor_format(char *s, int len, int labelformat, mkd_flag_t *flags)
- {
- char *res;
- unsigned char c;
- int i, needed, out = 0;
- int h4anchor = !is_flag_set(flags, MKD_URLENCODEDANCHOR);
- static const unsigned char hexchars[] = "0123456789abcdef";
- needed = (labelformat ? (4*len) : len) + 2; /* +2 for L & \0 */
- if ( (res = malloc(needed)) == NULL )
- return NULL;
- if ( h4anchor && labelformat && !isalpha(s[0]) )
- res[out++] = 'L';
-
-
- for ( i=0; i < len ; i++ ) {
- c = s[i];
- if ( labelformat ) {
- if ( h4anchor
- ? (isalnum(c) || (c == '_') || (c == ':') || (c == '.' ) )
- : !(isspace(c) || c == '%') )
- res[out++] = c;
- else if ( c == ' ' )
- res[out++] = '-';
- else {
- res[out++] = h4anchor ? '-' : '%';
- res[out++] = hexchars[c >> 4 & 0xf];
- res[out++] = hexchars[c & 0xf];
- if ( h4anchor )
- res[out++] = '-';
- }
- }
- else
- res[out++] = c;
- }
-
- res[out++] = 0;
- return res;
- } /* mkd_anchor_format */
- /* write out a Cstring, mangled into a form suitable for `<a href=` or `<a id=`
- */
- void
- mkd_string_to_anchor(char *s, int len, mkd_sta_function_t outchar,
- void *out, int labelformat,
- MMIOT *f)
- {
- char *res;
- char *line;
- int size;
- mkd_flag_t flags;
- int i;
- mkd_init_flags(&flags);
- set_mkd_flag(&flags,IS_LABEL);
- size = mkd_line(s, len, &line, &flags);
- if ( !line )
- return;
- if ( f->cb->e_anchor )
- res = (*(f->cb->e_anchor))(line, size, f->cb->e_data);
- else
- res = mkd_anchor_format(line, size, labelformat, &(f->flags));
- free(line);
- if ( !res )
- return;
- for ( i=0; res[i]; i++ )
- (*outchar)(res[i], out);
- if ( f->cb->e_anchor ) {
- if ( f->cb->e_free )
- (*(f->cb->e_free))(res, f->cb->e_data);
- }
- else
- free(res);
- }
- /* ___mkd_reparse() a line
- */
- static void
- mkd_parse_line(char *bfr, int size, MMIOT *f, mkd_flag_t *flags)
- {
- ___mkd_initmmiot(f, 0);
- if ( flags )
- COPY_FLAGS(f->flags, *flags);
- else
- mkd_init_flags(&f->flags);
- ___mkd_reparse(bfr, size, NULL, f, 0);
- ___mkd_emblock(f);
- }
- /* ___mkd_reparse() a line, returning it in malloc()ed memory
- */
- int
- mkd_line(char *bfr, int size, char **res, mkd_flag_t* flags)
- {
- MMIOT f;
- int len;
-
- mkd_parse_line(bfr, size, &f, flags);
- if ( len = S(f.out) ) {
- EXPAND(f.out) = 0;
- /* strdup() doesn't use amalloc(), so in an amalloc()ed
- * build this copies the string safely out of our memory
- * paranoia arena. In a non-amalloc world, it's a spurious
- * memory allocation, but it avoids unintentional hilarity
- * with amalloc()
- */
- *res = strdup(T(f.out));
- }
- else {
- *res = 0;
- len = EOF;
- }
- ___mkd_freemmiot(&f, 0);
- return len;
- }
- /* ___mkd_reparse() a line, writing it to a FILE
- */
- int
- mkd_generateline(char *bfr, int size, FILE *output, mkd_flag_t* flags)
- {
- MMIOT f;
- int status;
- mkd_parse_line(bfr, size, &f, flags);
- if ( flags && is_flag_set(flags, MKD_CDATA) )
- status = mkd_generatexml(T(f.out), S(f.out), output) != EOF;
- else
- status = fwrite(T(f.out), S(f.out), 1, output) == S(f.out);
- ___mkd_freemmiot(&f, 0);
- return status ? 0 : EOF;
- }
- /* set the url display callback
- */
- void
- mkd_e_url(Document *f, mkd_callback_t edit)
- {
- if ( f ) {
- if ( f->cb.e_url != edit )
- f->dirty = 1;
- f->cb.e_url = edit;
- }
- }
- /* set the url options callback
- */
- void
- mkd_e_flags(Document *f, mkd_callback_t edit)
- {
- if ( f ) {
- if ( f->cb.e_flags != edit )
- f->dirty = 1;
- f->cb.e_flags = edit;
- }
- }
- /* set the anchor formatter
- */
- void
- mkd_e_anchor(Document *f, mkd_callback_t format)
- {
- if ( f ) {
- if ( f->cb.e_anchor != format )
- f->dirty = 1;
- f->cb.e_anchor = format;
- }
- }
- /* set the url display/options deallocator
- */
- void
- mkd_e_free(Document *f, mkd_free_t dealloc)
- {
- if ( f ) {
- if ( f->cb.e_free != dealloc )
- f->dirty = 1;
- f->cb.e_free = dealloc;
- }
- }
- /* set the url display/options context data field
- */
- void
- mkd_e_data(Document *f, void *data)
- {
- if ( f ) {
- if ( f->cb.e_data != data )
- f->dirty = 1;
- f->cb.e_data = data;
- }
- }
- /* set the code block display callback
- */
- void
- mkd_e_code_format(Document *f, mkd_callback_t codefmt)
- {
- if ( f && (f->cb.e_codefmt != codefmt) ) {
- f->dirty = 1;
- f->cb.e_codefmt = codefmt;
- }
- }
- /* set the href prefix for markdown extra style footnotes
- */
- void
- mkd_ref_prefix(Document *f, char *data)
- {
- if ( f ) {
- if ( f->ref_prefix != data )
- f->dirty = 1;
- f->ref_prefix = data;
- }
- }
- #if 0
- static void
- sayflags(char *pfx, mkd_flag_t* flags, FILE *output)
- {
- int i;
- fprintf(output, "%.*s/", (int)strlen(pfx), " ");
- for (i=0; i<MKD_NR_FLAGS; i++)
- fputc( (i==0) || (i % 10) ? ' ' : (i/10)+'0', output);
- fputc('\\', output);
- fputc('\n', output);
- fprintf(output, "%s|", pfx);
- for (i=0; i<MKD_NR_FLAGS; i++)
- fputc((i%10)+'0', output);
- fputc('|', output);
- fputc('\n', output);
- fprintf(output, "%.*s\\", (int)strlen(pfx), " ");
- for (i=0;i<MKD_NR_FLAGS; i++)
- fputc(is_flag_set(flags, i)?'X':' ', output);
- fputc('/', output);
- fputc('\n', output);
- }
- #else
- #define sayflags(pfx,flags,output) 1
- #endif
- void
- ___mkd_or_flags(mkd_flag_t *dst, mkd_flag_t *src)
- {
- int i;
- for (i=0; i<MKD_NR_FLAGS; i++)
- if ( is_flag_set(src,i) )
- set_mkd_flag(dst, i);
- }
- int
- ___mkd_different(mkd_flag_t *dst, mkd_flag_t *src)
- {
- int i;
- mkd_flag_t zeroes;
- if ( dst == 0 || src == 0 ) {
- mkd_init_flags(&zeroes);
- if ( !dst )
- dst = &zeroes;
- if ( !src )
- src = &zeroes;
- }
- for (i=0; i < MKD_NR_FLAGS; i++)
- if ( is_flag_set(src,i) != is_flag_set(dst,i) )
- return 1;
- return 0;
- }
- int
- ___mkd_any_flags(mkd_flag_t *dst, mkd_flag_t *src)
- {
- int i;
- int count = 0;
- mkd_flag_t zeroes;
- if ( dst == 0 || src == 0 ) {
- mkd_init_flags(&zeroes);
- if ( !dst )
- dst = &zeroes;
- if ( !src )
- src = &zeroes;
- }
- for (i=0; i < MKD_NR_FLAGS; i++)
- if ( is_flag_set(src,i) && is_flag_set(dst,i) )
- ++count;
- return count;
- }
|