123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391 |
- #include <errno.h>
- #include <stdarg.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <wchar.h>
- #include "util.h"
- /* print to stderr, print error message of errno and exit().
- Unlike BSD err() it does not prefix __progname */
- __dead void
- err(int exitstatus, const char *fmt, ...)
- {
- va_list ap;
- int saved_errno;
- saved_errno = errno;
- if (fmt) {
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- fputs(": ", stderr);
- }
- fprintf(stderr, "%s\n", strerror(saved_errno));
- exit(exitstatus);
- }
- /* print to stderr and exit().
- Unlike BSD errx() it does not prefix __progname */
- __dead void
- errx(int exitstatus, const char *fmt, ...)
- {
- va_list ap;
- if (fmt) {
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- }
- fputs("\n", stderr);
- exit(exitstatus);
- }
- /* Handle read or write errors for a FILE * stream */
- void
- checkfileerror(FILE *fp, const char *name, int mode)
- {
- if (mode == 'r' && ferror(fp))
- errx(1, "read error: %s", name);
- else if (mode == 'w' && (fflush(fp) || ferror(fp)))
- errx(1, "write error: %s", name);
- }
- /* strcasestr() included for portability */
- char *
- strcasestr(const char *h, const char *n)
- {
- size_t i;
- if (!n[0])
- return (char *)h;
- for (; *h; ++h) {
- for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
- TOLOWER((unsigned char)h[i]); ++i)
- ;
- if (n[i] == '\0')
- return (char *)h;
- }
- return NULL;
- }
- /* Check if string has a non-empty scheme / protocol part. */
- int
- uri_hasscheme(const char *s)
- {
- const char *p = s;
- for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
- *p == '+' || *p == '-' || *p == '.'; p++)
- ;
- /* scheme, except if empty and starts with ":" then it is a path */
- return (*p == ':' && p != s);
- }
- /* Parse URI string `s` into an uri structure `u`.
- Returns 0 on success or -1 on failure */
- int
- uri_parse(const char *s, struct uri *u)
- {
- const char *p = s;
- char *endptr;
- size_t i;
- long l;
- u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
- u->path[0] = u->query[0] = u->fragment[0] = '\0';
- /* protocol-relative */
- if (*p == '/' && *(p + 1) == '/') {
- p += 2; /* skip "//" */
- goto parseauth;
- }
- /* scheme / protocol part */
- for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
- *p == '+' || *p == '-' || *p == '.'; p++)
- ;
- /* scheme, except if empty and starts with ":" then it is a path */
- if (*p == ':' && p != s) {
- if (*(p + 1) == '/' && *(p + 2) == '/')
- p += 3; /* skip "://" */
- else
- p++; /* skip ":" */
- if ((size_t)(p - s) >= sizeof(u->proto))
- return -1; /* protocol too long */
- memcpy(u->proto, s, p - s);
- u->proto[p - s] = '\0';
- if (*(p - 1) != '/')
- goto parsepath;
- } else {
- p = s; /* no scheme format, reset to start */
- goto parsepath;
- }
- parseauth:
- /* userinfo (username:password) */
- i = strcspn(p, "@/?#");
- if (p[i] == '@') {
- if (i >= sizeof(u->userinfo))
- return -1; /* userinfo too long */
- memcpy(u->userinfo, p, i);
- u->userinfo[i] = '\0';
- p += i + 1;
- }
- /* IPv6 address */
- if (*p == '[') {
- /* bracket not found, host too short or too long */
- i = strcspn(p, "]");
- if (p[i] != ']' || i < 3)
- return -1;
- i++; /* including "]" */
- } else {
- /* domain / host part, skip until port, path or end. */
- i = strcspn(p, ":/?#");
- }
- if (i >= sizeof(u->host))
- return -1; /* host too long */
- memcpy(u->host, p, i);
- u->host[i] = '\0';
- p += i;
- /* port */
- if (*p == ':') {
- p++;
- if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
- return -1; /* port too long */
- memcpy(u->port, p, i);
- u->port[i] = '\0';
- /* check for valid port: range 1 - 65535, may be empty */
- errno = 0;
- l = strtol(u->port, &endptr, 10);
- if (i && (errno || *endptr || l <= 0 || l > 65535))
- return -1;
- p += i;
- }
- parsepath:
- /* path */
- if ((i = strcspn(p, "?#")) >= sizeof(u->path))
- return -1; /* path too long */
- memcpy(u->path, p, i);
- u->path[i] = '\0';
- p += i;
- /* query */
- if (*p == '?') {
- p++;
- if ((i = strcspn(p, "#")) >= sizeof(u->query))
- return -1; /* query too long */
- memcpy(u->query, p, i);
- u->query[i] = '\0';
- p += i;
- }
- /* fragment */
- if (*p == '#') {
- p++;
- if ((i = strlen(p)) >= sizeof(u->fragment))
- return -1; /* fragment too long */
- memcpy(u->fragment, p, i);
- u->fragment[i] = '\0';
- }
- return 0;
- }
- /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
- Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
- Returns 0 on success, -1 on error or truncation. */
- int
- uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
- {
- char *p;
- int c;
- strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
- if (u->proto[0] || u->host[0]) {
- strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
- strlcpy(a->host, u->host, sizeof(a->host));
- strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
- strlcpy(a->host, u->host, sizeof(a->host));
- strlcpy(a->port, u->port, sizeof(a->port));
- strlcpy(a->path, u->path, sizeof(a->path));
- strlcpy(a->query, u->query, sizeof(a->query));
- return 0;
- }
- strlcpy(a->proto, b->proto, sizeof(a->proto));
- strlcpy(a->host, b->host, sizeof(a->host));
- strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
- strlcpy(a->host, b->host, sizeof(a->host));
- strlcpy(a->port, b->port, sizeof(a->port));
- if (!u->path[0]) {
- strlcpy(a->path, b->path, sizeof(a->path));
- } else if (u->path[0] == '/') {
- strlcpy(a->path, u->path, sizeof(a->path));
- } else {
- a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
- a->path[1] = '\0';
- if ((p = strrchr(b->path, '/'))) {
- c = *(++p);
- *p = '\0'; /* temporary NUL-terminate */
- if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
- return -1;
- *p = c; /* restore */
- }
- if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
- return -1;
- }
- if (u->path[0] || u->query[0])
- strlcpy(a->query, u->query, sizeof(a->query));
- else
- strlcpy(a->query, b->query, sizeof(a->query));
- return 0;
- }
- int
- uri_format(char *buf, size_t bufsiz, struct uri *u)
- {
- return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
- u->proto,
- u->userinfo[0] ? u->userinfo : "",
- u->userinfo[0] ? "@" : "",
- u->host,
- u->port[0] ? ":" : "",
- u->port,
- u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
- u->path,
- u->query[0] ? "?" : "",
- u->query,
- u->fragment[0] ? "#" : "",
- u->fragment);
- }
- /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
- * terminators and assign these fields as pointers. If there are less fields
- * than expected then the field is an empty string constant. */
- void
- parseline(char *line, char *fields[FieldLast])
- {
- char *prev, *s;
- size_t i;
- for (prev = line, i = 0;
- (s = strchr(prev, '\t')) && i < FieldLast - 1;
- i++) {
- *s = '\0';
- fields[i] = prev;
- prev = s + 1;
- }
- fields[i++] = prev;
- /* make non-parsed fields empty. */
- for (; i < FieldLast; i++)
- fields[i] = "";
- }
- /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
- int
- strtotime(const char *s, time_t *t)
- {
- long long l;
- char *e;
- errno = 0;
- l = strtoll(s, &e, 10);
- if (errno || *s == '\0' || *e)
- return -1;
- /* NOTE: the type long long supports the 64-bit range. If time_t is
- 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
- if (t)
- *t = (time_t)l;
- return 0;
- }
- /* Escape characters below as HTML 2.0 / XML 1.0. */
- void
- xmlencode(const char *s, FILE *fp)
- {
- for (; *s; ++s) {
- switch (*s) {
- case '<': fputs("<", fp); break;
- case '>': fputs(">", fp); break;
- case '\'': fputs("'", fp); break;
- case '&': fputs("&", fp); break;
- case '"': fputs(""", fp); break;
- default: putc(*s, fp);
- }
- }
- }
- /* print `len` columns of characters. If string is shorter pad the rest with
- * characters `pad`. */
- void
- printutf8pad(FILE *fp, const char *s, size_t len, int pad)
- {
- wchar_t wc;
- size_t col = 0, i, slen;
- int inc, rl, w;
- if (!len)
- return;
- slen = strlen(s);
- for (i = 0; i < slen; i += inc) {
- inc = 1; /* next byte */
- if ((unsigned char)s[i] < 32) {
- continue; /* skip control characters */
- } else if ((unsigned char)s[i] >= 127) {
- rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
- inc = rl;
- if (rl < 0) {
- mbtowc(NULL, NULL, 0); /* reset state */
- inc = 1; /* invalid, seek next byte */
- w = 1; /* replacement char is one width */
- } else if ((w = wcwidth(wc)) == -1) {
- continue;
- }
- if (col + w > len || (col + w == len && s[i + inc])) {
- fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
- col++;
- break;
- } else if (rl < 0) {
- fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
- col++;
- continue;
- }
- fwrite(&s[i], 1, rl, fp);
- col += w;
- } else {
- /* optimization: simple ASCII character */
- if (col + 1 > len || (col + 1 == len && s[i + 1])) {
- fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
- col++;
- break;
- }
- putc(s[i], fp);
- col++;
- }
- }
- for (; col < len; ++col)
- putc(pad, fp);
- }
|