util.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. #include <errno.h>
  2. #include <stdarg.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include <wchar.h>
  7. #include "util.h"
  8. /* print to stderr, print error message of errno and exit().
  9. Unlike BSD err() it does not prefix __progname */
  10. __dead void
  11. err(int exitstatus, const char *fmt, ...)
  12. {
  13. va_list ap;
  14. int saved_errno;
  15. saved_errno = errno;
  16. if (fmt) {
  17. va_start(ap, fmt);
  18. vfprintf(stderr, fmt, ap);
  19. va_end(ap);
  20. fputs(": ", stderr);
  21. }
  22. fprintf(stderr, "%s\n", strerror(saved_errno));
  23. exit(exitstatus);
  24. }
  25. /* print to stderr and exit().
  26. Unlike BSD errx() it does not prefix __progname */
  27. __dead void
  28. errx(int exitstatus, const char *fmt, ...)
  29. {
  30. va_list ap;
  31. if (fmt) {
  32. va_start(ap, fmt);
  33. vfprintf(stderr, fmt, ap);
  34. va_end(ap);
  35. }
  36. fputs("\n", stderr);
  37. exit(exitstatus);
  38. }
  39. /* Handle read or write errors for a FILE * stream */
  40. void
  41. checkfileerror(FILE *fp, const char *name, int mode)
  42. {
  43. if (mode == 'r' && ferror(fp))
  44. errx(1, "read error: %s", name);
  45. else if (mode == 'w' && (fflush(fp) || ferror(fp)))
  46. errx(1, "write error: %s", name);
  47. }
  48. /* strcasestr() included for portability */
  49. char *
  50. strcasestr(const char *h, const char *n)
  51. {
  52. size_t i;
  53. if (!n[0])
  54. return (char *)h;
  55. for (; *h; ++h) {
  56. for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
  57. TOLOWER((unsigned char)h[i]); ++i)
  58. ;
  59. if (n[i] == '\0')
  60. return (char *)h;
  61. }
  62. return NULL;
  63. }
  64. /* Check if string has a non-empty scheme / protocol part. */
  65. int
  66. uri_hasscheme(const char *s)
  67. {
  68. const char *p = s;
  69. for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
  70. *p == '+' || *p == '-' || *p == '.'; p++)
  71. ;
  72. /* scheme, except if empty and starts with ":" then it is a path */
  73. return (*p == ':' && p != s);
  74. }
  75. /* Parse URI string `s` into an uri structure `u`.
  76. Returns 0 on success or -1 on failure */
  77. int
  78. uri_parse(const char *s, struct uri *u)
  79. {
  80. const char *p = s;
  81. char *endptr;
  82. size_t i;
  83. long l;
  84. u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
  85. u->path[0] = u->query[0] = u->fragment[0] = '\0';
  86. /* protocol-relative */
  87. if (*p == '/' && *(p + 1) == '/') {
  88. p += 2; /* skip "//" */
  89. goto parseauth;
  90. }
  91. /* scheme / protocol part */
  92. for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
  93. *p == '+' || *p == '-' || *p == '.'; p++)
  94. ;
  95. /* scheme, except if empty and starts with ":" then it is a path */
  96. if (*p == ':' && p != s) {
  97. if (*(p + 1) == '/' && *(p + 2) == '/')
  98. p += 3; /* skip "://" */
  99. else
  100. p++; /* skip ":" */
  101. if ((size_t)(p - s) >= sizeof(u->proto))
  102. return -1; /* protocol too long */
  103. memcpy(u->proto, s, p - s);
  104. u->proto[p - s] = '\0';
  105. if (*(p - 1) != '/')
  106. goto parsepath;
  107. } else {
  108. p = s; /* no scheme format, reset to start */
  109. goto parsepath;
  110. }
  111. parseauth:
  112. /* userinfo (username:password) */
  113. i = strcspn(p, "@/?#");
  114. if (p[i] == '@') {
  115. if (i >= sizeof(u->userinfo))
  116. return -1; /* userinfo too long */
  117. memcpy(u->userinfo, p, i);
  118. u->userinfo[i] = '\0';
  119. p += i + 1;
  120. }
  121. /* IPv6 address */
  122. if (*p == '[') {
  123. /* bracket not found, host too short or too long */
  124. i = strcspn(p, "]");
  125. if (p[i] != ']' || i < 3)
  126. return -1;
  127. i++; /* including "]" */
  128. } else {
  129. /* domain / host part, skip until port, path or end. */
  130. i = strcspn(p, ":/?#");
  131. }
  132. if (i >= sizeof(u->host))
  133. return -1; /* host too long */
  134. memcpy(u->host, p, i);
  135. u->host[i] = '\0';
  136. p += i;
  137. /* port */
  138. if (*p == ':') {
  139. p++;
  140. if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
  141. return -1; /* port too long */
  142. memcpy(u->port, p, i);
  143. u->port[i] = '\0';
  144. /* check for valid port: range 1 - 65535, may be empty */
  145. errno = 0;
  146. l = strtol(u->port, &endptr, 10);
  147. if (i && (errno || *endptr || l <= 0 || l > 65535))
  148. return -1;
  149. p += i;
  150. }
  151. parsepath:
  152. /* path */
  153. if ((i = strcspn(p, "?#")) >= sizeof(u->path))
  154. return -1; /* path too long */
  155. memcpy(u->path, p, i);
  156. u->path[i] = '\0';
  157. p += i;
  158. /* query */
  159. if (*p == '?') {
  160. p++;
  161. if ((i = strcspn(p, "#")) >= sizeof(u->query))
  162. return -1; /* query too long */
  163. memcpy(u->query, p, i);
  164. u->query[i] = '\0';
  165. p += i;
  166. }
  167. /* fragment */
  168. if (*p == '#') {
  169. p++;
  170. if ((i = strlen(p)) >= sizeof(u->fragment))
  171. return -1; /* fragment too long */
  172. memcpy(u->fragment, p, i);
  173. u->fragment[i] = '\0';
  174. }
  175. return 0;
  176. }
  177. /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
  178. Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
  179. Returns 0 on success, -1 on error or truncation. */
  180. int
  181. uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
  182. {
  183. char *p;
  184. int c;
  185. strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
  186. if (u->proto[0] || u->host[0]) {
  187. strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
  188. strlcpy(a->host, u->host, sizeof(a->host));
  189. strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
  190. strlcpy(a->host, u->host, sizeof(a->host));
  191. strlcpy(a->port, u->port, sizeof(a->port));
  192. strlcpy(a->path, u->path, sizeof(a->path));
  193. strlcpy(a->query, u->query, sizeof(a->query));
  194. return 0;
  195. }
  196. strlcpy(a->proto, b->proto, sizeof(a->proto));
  197. strlcpy(a->host, b->host, sizeof(a->host));
  198. strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
  199. strlcpy(a->host, b->host, sizeof(a->host));
  200. strlcpy(a->port, b->port, sizeof(a->port));
  201. if (!u->path[0]) {
  202. strlcpy(a->path, b->path, sizeof(a->path));
  203. } else if (u->path[0] == '/') {
  204. strlcpy(a->path, u->path, sizeof(a->path));
  205. } else {
  206. a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
  207. a->path[1] = '\0';
  208. if ((p = strrchr(b->path, '/'))) {
  209. c = *(++p);
  210. *p = '\0'; /* temporary NUL-terminate */
  211. if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
  212. return -1;
  213. *p = c; /* restore */
  214. }
  215. if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
  216. return -1;
  217. }
  218. if (u->path[0] || u->query[0])
  219. strlcpy(a->query, u->query, sizeof(a->query));
  220. else
  221. strlcpy(a->query, b->query, sizeof(a->query));
  222. return 0;
  223. }
  224. int
  225. uri_format(char *buf, size_t bufsiz, struct uri *u)
  226. {
  227. return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
  228. u->proto,
  229. u->userinfo[0] ? u->userinfo : "",
  230. u->userinfo[0] ? "@" : "",
  231. u->host,
  232. u->port[0] ? ":" : "",
  233. u->port,
  234. u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
  235. u->path,
  236. u->query[0] ? "?" : "",
  237. u->query,
  238. u->fragment[0] ? "#" : "",
  239. u->fragment);
  240. }
  241. /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
  242. * terminators and assign these fields as pointers. If there are less fields
  243. * than expected then the field is an empty string constant. */
  244. void
  245. parseline(char *line, char *fields[FieldLast])
  246. {
  247. char *prev, *s;
  248. size_t i;
  249. for (prev = line, i = 0;
  250. (s = strchr(prev, '\t')) && i < FieldLast - 1;
  251. i++) {
  252. *s = '\0';
  253. fields[i] = prev;
  254. prev = s + 1;
  255. }
  256. fields[i++] = prev;
  257. /* make non-parsed fields empty. */
  258. for (; i < FieldLast; i++)
  259. fields[i] = "";
  260. }
  261. /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
  262. int
  263. strtotime(const char *s, time_t *t)
  264. {
  265. long long l;
  266. char *e;
  267. errno = 0;
  268. l = strtoll(s, &e, 10);
  269. if (errno || *s == '\0' || *e)
  270. return -1;
  271. /* NOTE: the type long long supports the 64-bit range. If time_t is
  272. 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
  273. if (t)
  274. *t = (time_t)l;
  275. return 0;
  276. }
  277. /* Escape characters below as HTML 2.0 / XML 1.0. */
  278. void
  279. xmlencode(const char *s, FILE *fp)
  280. {
  281. for (; *s; ++s) {
  282. switch (*s) {
  283. case '<': fputs("&lt;", fp); break;
  284. case '>': fputs("&gt;", fp); break;
  285. case '\'': fputs("&#39;", fp); break;
  286. case '&': fputs("&amp;", fp); break;
  287. case '"': fputs("&quot;", fp); break;
  288. default: putc(*s, fp);
  289. }
  290. }
  291. }
  292. /* print `len` columns of characters. If string is shorter pad the rest with
  293. * characters `pad`. */
  294. void
  295. printutf8pad(FILE *fp, const char *s, size_t len, int pad)
  296. {
  297. wchar_t wc;
  298. size_t col = 0, i, slen;
  299. int inc, rl, w;
  300. if (!len)
  301. return;
  302. slen = strlen(s);
  303. for (i = 0; i < slen; i += inc) {
  304. inc = 1; /* next byte */
  305. if ((unsigned char)s[i] < 32) {
  306. continue; /* skip control characters */
  307. } else if ((unsigned char)s[i] >= 127) {
  308. rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
  309. inc = rl;
  310. if (rl < 0) {
  311. mbtowc(NULL, NULL, 0); /* reset state */
  312. inc = 1; /* invalid, seek next byte */
  313. w = 1; /* replacement char is one width */
  314. } else if ((w = wcwidth(wc)) == -1) {
  315. continue;
  316. }
  317. if (col + w > len || (col + w == len && s[i + inc])) {
  318. fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
  319. col++;
  320. break;
  321. } else if (rl < 0) {
  322. fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
  323. col++;
  324. continue;
  325. }
  326. fwrite(&s[i], 1, rl, fp);
  327. col += w;
  328. } else {
  329. /* optimization: simple ASCII character */
  330. if (col + 1 > len || (col + 1 == len && s[i + 1])) {
  331. fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
  332. col++;
  333. break;
  334. }
  335. putc(s[i], fp);
  336. col++;
  337. }
  338. }
  339. for (; col < len; ++col)
  340. putc(pad, fp);
  341. }