wc.l 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. /* SPDX-FileCopyrightText: 2021 John Scott <jscott@posteo.net>
  2. * SPDX-License-Identifier: GPL-3.0-or-later
  3. *
  4. * We assume about the locale that, when converting a multibyte character
  5. * to a wide character, conversion of a complete character always results in
  6. * the initial shift state. I don't think Flex can cope with encodings
  7. * where this isn't the case anyway. */
  8. %option reentrant noyywrap extra-type="struct count*"
  9. %top{
  10. #define _POSIX_C_SOURCE 200809L
  11. }
  12. %{
  13. #include <assert.h>
  14. #include <errno.h>
  15. #include <locale.h>
  16. #include <pthread.h>
  17. #include <semaphore.h>
  18. #include <stdbool.h>
  19. #include <stdint.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <sys/stat.h>
  24. #include <unistd.h>
  25. #include <wchar.h>
  26. struct count {
  27. size_t lines;
  28. size_t words;
  29. size_t chars;
  30. };
  31. enum {
  32. BYTES = 1,
  33. LINES = 2,
  34. CHARS = 4,
  35. WORDS = 8
  36. };
  37. /* This is a bitwise combination of the prior options which
  38. * were specified on the command-line. */
  39. static int params;
  40. /* Trying to cancel a thread which isn't actually running is undefined behavior.
  41. * We follow Ulrich Drepper's recommendations for dealing with this problem:
  42. * https://udrepper.livejournal.com/16844.html (note that the relevant text
  43. * for ESRCH has since changed in POSIX.1, but not in a way that defeats his point).
  44. * This is a list of booleans indicating whether a thread is running or not. */
  45. static bool *thread_is_running;
  46. /* We want to sequentially number our child threads starting from zero so that
  47. * we can use these numbers as indices into thread_is_running. This is a global
  48. * that we can set just prior to a child starting which indicates the (n-1)'th
  49. * child thread we've started, and which the child can read to figure out its
  50. * own place in thread_is_running. Note that an int is suitable since we never
  51. * start more than argc threads. */
  52. static int seq_thread_id;
  53. /* We don't want to change seq_thread_id until the child thread we just started
  54. * has had the chance to read it. Our parent will wait for this semaphore to be
  55. * unlocked before clobbering it to make a new thread; the child will unlock it
  56. * after it has gotten to take a peek at seq_thread_id. */
  57. static sem_t seq_thread_id_guard;
  58. /* This mutex shall be locked by a thread just before it sets thread_is_running[j]
  59. * to false and bails out. The effect is that if, in the main thread, we lock this
  60. * mutex, then all threads are frozen: any threads that want to exit will be blocked
  61. * on us unlocking the mutex, hence we are guaranteed that none will exit by surprise
  62. * on us and we can safely cancel them. We are fortunate pthread_mutex_lock() is *not*
  63. * a cancellation point, otherwise this won't quite work like we want it to if multiple
  64. * consecutive cancellation requests are sent (which we don't do). */
  65. static pthread_mutex_t thread_is_running_guard = PTHREAD_MUTEX_INITIALIZER;
  66. %}
  67. %%
  68. [^[:blank:]\n]+ {
  69. if(yyextra->words == SIZE_MAX) {
  70. char errmsg[NL_TEXTMAX];
  71. fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
  72. ? "Failed to count words\n"
  73. : "Failed to count words: %s\n", errmsg);
  74. return 1;
  75. }
  76. yyextra->words++;
  77. if(params & CHARS) {
  78. const size_t l = mbsrtowcs(NULL, &(const char*){yytext}, 0, &(mbstate_t){0});
  79. if(l == (size_t)-1) {
  80. perror("Failed to read multibyte string");
  81. return 1;
  82. } else if(yyextra->chars > SIZE_MAX - l) {
  83. char errmsg[NL_TEXTMAX];
  84. fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
  85. ? "Failed to count characters\n"
  86. : "Failed to count characters: %s\n", errmsg);
  87. return 1;
  88. }
  89. yyextra->chars += l;
  90. } else {
  91. #pragma GCC diagnostic ignored "-Wsign-compare"
  92. if(yyleng > SIZE_MAX || yyextra->chars > SIZE_MAX - yyleng) {
  93. #pragma GCC diagnostic pop
  94. char errmsg[NL_TEXTMAX];
  95. fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
  96. ? "Failed to count bytes\n"
  97. : "Failed to count bytes: %s\n", errmsg);
  98. return 1;
  99. }
  100. yyextra->chars += yyleng;
  101. }
  102. }
  103. \n {
  104. if(yyextra->chars == SIZE_MAX) {
  105. char errmsg[NL_TEXTMAX];
  106. fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
  107. ? "Failed to count bytes/characters\n"
  108. : "Failed to count bytes/characters: %s\n", errmsg);
  109. return 1;
  110. }
  111. assert(yyextra->lines < SIZE_MAX);
  112. yyextra->chars++;
  113. yyextra->lines++;
  114. }
  115. . {
  116. if(params & CHARS) {
  117. #pragma GCC diagnostic ignored "-Wsign-compare"
  118. assert(yyleng <= SIZE_MAX);
  119. #pragma GCC diagnostic pop
  120. const size_t l = mbrlen(yytext, yyleng, &(mbstate_t){0});
  121. if(l == (size_t)-2) {
  122. char errmsg[NL_TEXTMAX];
  123. fprintf(stderr, strerror_r(EILSEQ, errmsg, sizeof(errmsg))
  124. ? "Failed to read complete multibyte character\n"
  125. : "Failed to read complete multibyte character: %s\n", errmsg);
  126. return 1;
  127. } else if(l == (size_t)-1) {
  128. perror("Failed to read multibyte character");
  129. return 1;
  130. } else if(yyextra->chars > SIZE_MAX - l) {
  131. char errmsg[NL_TEXTMAX];
  132. fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
  133. ? "Failed to count characters\n"
  134. : "Failed to count characters: %s\n", errmsg);
  135. return 1;
  136. }
  137. yyextra->chars += l;
  138. } else if(yyextra->chars > SIZE_MAX - yyleng) {
  139. char errmsg[NL_TEXTMAX];
  140. fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
  141. ? "Failed to count bytes\n"
  142. : "Failed to count bytes: %s\n", errmsg);
  143. return 1;
  144. }
  145. yyextra->chars += yyleng;
  146. }
  147. %%
  148. /* We rely on these types being the same in order that destroy_scanner()
  149. * has a prototype compatible with pthread_cleanup_{push, pop}. */
  150. static_assert(_Generic((yyscan_t){0}, void*: true, default: false), "yyscan_t != void*");
  151. static void destroy_scanner(yyscan_t scanner) {
  152. if(yylex_destroy(scanner)) {
  153. perror("Failed to destroy scanner");
  154. abort();
  155. }
  156. }
  157. static void destroy_file(void *stream) {
  158. if(fclose(stream) == EOF) {
  159. perror("Failed to close stream");
  160. abort();
  161. }
  162. }
  163. static void dfree(void *ptr) {
  164. free(*(char**)ptr);
  165. }
  166. static void thread_stop(void *index) {
  167. int k = pthread_mutex_lock(&thread_is_running_guard);
  168. if(k) {
  169. char errstr[NL_TEXTMAX];
  170. fprintf(stderr, strerror_r(k, errstr, sizeof(errstr))
  171. ? "Failed to lock mutex\n"
  172. : "Failed to lock mutex: %s\n", errstr);
  173. abort();
  174. }
  175. assert(thread_is_running[*(int*)index]);
  176. thread_is_running[*(int*)index] = false;
  177. k = pthread_mutex_unlock(&thread_is_running_guard);
  178. if(k) {
  179. abort();
  180. }
  181. }
  182. /* According to the command-line options, print the count of
  183. * what was found with an optional filename, which may be omitted. */
  184. static bool show_count(const struct count a[restrict static 1], const char *restrict filename) {
  185. char *buf = NULL;
  186. size_t buflen;
  187. FILE *const memstream = open_memstream(&buf, &buflen);
  188. if(!memstream) {
  189. perror("Failed to create memory stream");
  190. return false;
  191. }
  192. bool we_good = true;
  193. pthread_cleanup_push(dfree, &buf)
  194. pthread_cleanup_push(destroy_file, memstream);
  195. int k = 1;
  196. bool started = false;
  197. if(params & LINES) {
  198. k = fprintf(memstream, "%zu", a->lines);
  199. started = true;
  200. }
  201. if(k > 0 && params & WORDS) {
  202. k = fprintf(memstream, started ? " %zu" : "%zu", a->words);
  203. started = true;
  204. }
  205. if(k > 0 && (params & BYTES || params & CHARS)) {
  206. fprintf(memstream, started ? " %zu" : "%zu", a->chars);
  207. }
  208. if(k < 0 || (filename && (putc(' ', memstream) == EOF || fputs(filename, memstream) == EOF))) {
  209. perror("Failed to print count");
  210. we_good = false;
  211. }
  212. pthread_cleanup_pop(true);
  213. if(we_good && puts(buf) == EOF) {
  214. perror("Failed to print count");
  215. we_good = false;
  216. }
  217. pthread_cleanup_pop(true);
  218. return we_good;
  219. }
  220. /* Given a filename, print the count of lines, words, and bytes/characters
  221. * in it as appropriate. Also return a pointer to a dynamically-allocated
  222. * struct count describing what was found so we can get a total later.
  223. * If filename is NULL, use standard input.
  224. * Return NULL on error. */
  225. void *do_count(void *filename) {
  226. int my_seq_thread_id = seq_thread_id;
  227. if(sem_post(&seq_thread_id_guard) == -1) {
  228. perror("Failed to increment semaphore");
  229. abort();
  230. }
  231. int k = pthread_mutex_lock(&thread_is_running_guard);
  232. if(k) {
  233. char errstr[NL_TEXTMAX];
  234. fprintf(stderr, strerror_r(k, errstr, sizeof(errstr))
  235. ? "Failed to lock mutex\n"
  236. : "Failed to lock mutex: %s\n", errstr);
  237. abort();
  238. }
  239. assert(!thread_is_running[my_seq_thread_id]);
  240. thread_is_running[my_seq_thread_id] = true;
  241. k = pthread_mutex_unlock(&thread_is_running_guard);
  242. if(k) {
  243. abort();
  244. }
  245. /* This has to be declared up here because we want c to
  246. * have scope outside of all of the pthread_cleanup_{push, pop} calls. */
  247. struct count *c;
  248. pthread_cleanup_push(thread_stop, &my_seq_thread_id);
  249. c = calloc(1, sizeof(*c));
  250. if(!c) {
  251. perror("Failed to allocate memory for count");
  252. pthread_exit(NULL);
  253. }
  254. pthread_cleanup_push(free, c);
  255. yyscan_t scanner;
  256. if(yylex_init_extra(c, &scanner)) {
  257. perror("Failed to initialize scanner");
  258. pthread_exit(NULL);
  259. }
  260. pthread_cleanup_push(destroy_scanner, scanner);
  261. FILE *stream = filename ? fopen(filename, "r") : stdin;
  262. if(!stream) {
  263. char errstr[NL_TEXTMAX];
  264. #pragma GCC diagnostic ignored "-Wformat"
  265. fprintf(stderr, strerror_r(errno, errstr, sizeof(errstr))
  266. ? "Failed to open %s\n"
  267. : "Failed to open %s: %s\n", filename, errstr);
  268. #pragma GCC diagnostic pop
  269. pthread_exit(NULL);
  270. }
  271. pthread_cleanup_push(destroy_file, stream);
  272. struct stat details;
  273. const int fd = fileno(stream);
  274. if(fd == -1) {
  275. perror("Failed to query file destriptor for stream");
  276. pthread_exit(NULL);
  277. }
  278. if(fstat(fd, &details) == -1) {
  279. char errstr[NL_TEXTMAX];
  280. #pragma GCC diagnostic ignored "-Wformat"
  281. fprintf(stderr, strerror_r(errno, errstr, sizeof(errstr))
  282. ? "Failed to get details on %s\n"
  283. : "Failed to get details on %s: %s\n", filename, errstr);
  284. #pragma GCC diagnostic pop
  285. pthread_exit(NULL);
  286. }
  287. if(S_ISDIR(details.st_mode)) {
  288. char errstr[NL_TEXTMAX];
  289. #pragma GCC diagnostic ignored "-Wformat"
  290. fprintf(stderr, strerror_r(EISDIR, errstr, sizeof(errstr))
  291. ? "Failed to open %s\n"
  292. : "Failed to open %s: %s\n", filename, errstr);
  293. #pragma GCC diagnostic pop
  294. pthread_exit(NULL);
  295. }
  296. yyset_in(stream, scanner);
  297. if(yylex(scanner) || !show_count(c, filename)) {
  298. pthread_exit(NULL);
  299. }
  300. pthread_cleanup_pop(true); /* destroy_file(stream) */
  301. pthread_cleanup_pop(true); /* destroy_scanner(scanner) */
  302. pthread_cleanup_pop(false); /* DO NOT free(c) */
  303. pthread_cleanup_pop(true); /* thread_stop(&my_seq_thread_id) */
  304. pthread_exit(c);
  305. }
  306. int main(int argc, char *argv[]) {
  307. if(!setlocale(LC_ALL, "")) {
  308. fputs("Failed to enable default locale\n", stderr);
  309. exit(EXIT_FAILURE);
  310. }
  311. int opt;
  312. while((opt = getopt(argc, argv, "clmw")) != -1) {
  313. switch(opt) {
  314. case 'c':
  315. params |= BYTES;
  316. if(params & CHARS) {
  317. case 'm':
  318. params |= CHARS;
  319. if(params & BYTES) {
  320. fputs("-c and -m may not both be specified\n", stderr);
  321. exit(EXIT_FAILURE);
  322. }
  323. }
  324. break;
  325. case 'l':
  326. params |= LINES;
  327. break;
  328. case 'w':
  329. params |= WORDS;
  330. break;
  331. case '?':
  332. goto endthread_is_running_guard;
  333. }
  334. }
  335. argc -= optind;
  336. argv += optind;
  337. if(!params) {
  338. params = WORDS|LINES|BYTES;
  339. }
  340. if(!argc) {
  341. argc++;
  342. /* argv[0] is NULL, so this will do the right thing
  343. * when creating the child thread. */
  344. }
  345. if(setvbuf(stdout, NULL, _IONBF, 0)) {
  346. fputs("Failed to disable buffering on standard output\n", stderr);
  347. exit(EXIT_FAILURE);
  348. }
  349. /* To make cleanup easier, we do all of our dynamic memory
  350. * allocations before we get any threads started. */
  351. pthread_t *ids = calloc(argc, sizeof(*ids));
  352. if(!ids) {
  353. perror("Failed to allocate thread ID list");
  354. goto endthread_is_running_guard;
  355. }
  356. void **counts = calloc(argc, sizeof(*counts));
  357. if(!counts) {
  358. perror("Failed to allocate count list");
  359. goto endids;
  360. }
  361. thread_is_running = calloc(argc, sizeof(*thread_is_running));
  362. if(!thread_is_running) {
  363. perror("Failed to allocate running thread list");
  364. goto endcounts;
  365. }
  366. if(sem_init(&seq_thread_id_guard, false, 1U) == -1) {
  367. perror("Failed to initialize semaphore");
  368. goto endthread_is_running;
  369. }
  370. for(int i = 0; i < argc; i++) {
  371. if(sem_wait(&seq_thread_id_guard) == -1) {
  372. perror("Failed to wait on semaphore");
  373. abort();
  374. }
  375. seq_thread_id = i;
  376. int k;
  377. tryagain:
  378. k = pthread_create(ids + i, NULL, do_count, argv[i]);
  379. switch(k) {
  380. case 0:
  381. break;
  382. case EAGAIN:
  383. if(sched_yield() == -1) {
  384. perror("Failed to yield");
  385. }
  386. goto tryagain;
  387. default:
  388. fprintf(stderr, "Failed to create thread: %s\n", strerror(k));
  389. goto bail;
  390. }
  391. }
  392. if(sem_destroy(&seq_thread_id_guard) == -1) {
  393. perror("Failed to destroy semaphore");
  394. abort();
  395. }
  396. for(int i = 0; i < argc; i++) {
  397. int k = pthread_join(ids[i], counts + i);
  398. if(k) {
  399. fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
  400. abort();
  401. }
  402. if(!counts[i]) {
  403. for(int j = i + 1; j < argc; j++) {
  404. k = pthread_cancel(ids[j]);
  405. if(k) {
  406. fprintf(stderr, "Failed to cancel thread: %s\n", strerror(k));
  407. abort();
  408. }
  409. }
  410. void *retval;
  411. for(int j = i + 1; j < argc; j++) {
  412. k = pthread_join(ids[j], &retval);
  413. if(k) {
  414. fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
  415. abort();
  416. }
  417. if(retval != PTHREAD_CANCELED) {
  418. free(retval);
  419. }
  420. }
  421. goto endseq_thread_id_guard;
  422. }
  423. }
  424. free(ids);
  425. free(thread_is_running);
  426. int k = pthread_mutex_destroy(&thread_is_running_guard);
  427. if(k) {
  428. fprintf(stderr, "Failed to destroy mutex: %s\n", strerror(k));
  429. abort();
  430. }
  431. struct count total = {0};
  432. for(int i = 0; i < argc; i++) {
  433. total.lines += ((struct count*)counts[i])->lines;
  434. total.words += ((struct count*)counts[i])->words;
  435. total.chars += ((struct count*)counts[i])->chars;
  436. free(counts[i]);
  437. }
  438. free(counts);
  439. if(argc >= 2) {
  440. exit(show_count(&total, "total") ? EXIT_SUCCESS : EXIT_FAILURE);
  441. }
  442. exit(EXIT_SUCCESS);
  443. bail:
  444. k = pthread_mutex_lock(&thread_is_running_guard);
  445. if(k) {
  446. fprintf(stderr, "Failed to lock mutex: %s\n", strerror(k));
  447. abort();
  448. }
  449. for(int i = 0; i < argc; i++) {
  450. if(thread_is_running[i]) {
  451. int k = pthread_cancel(ids[i]);
  452. if(k) {
  453. fprintf(stderr, "Failed to cancel thread %d: %s\n", i, strerror(k));
  454. abort();
  455. }
  456. }
  457. }
  458. k = pthread_mutex_unlock(&thread_is_running_guard);
  459. if(k) {
  460. fprintf(stderr, "Failed to unlock mutex: %s\n", strerror(k));
  461. abort();
  462. }
  463. void *retval;
  464. for(int i = 0; i < argc; i++) {
  465. k = pthread_join(ids[i], &retval);
  466. if(k) {
  467. fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
  468. abort();
  469. }
  470. if(retval != PTHREAD_CANCELED) {
  471. free(retval);
  472. }
  473. }
  474. endseq_thread_id_guard:
  475. if(sem_destroy(&seq_thread_id_guard) == -1) {
  476. perror("Failed to destroy semaphore");
  477. abort();
  478. }
  479. endthread_is_running:
  480. free(thread_is_running);
  481. endcounts:
  482. free(counts);
  483. endids:
  484. free(ids);
  485. endthread_is_running_guard:
  486. k = pthread_mutex_destroy(&thread_is_running_guard);
  487. if(k) {
  488. fprintf(stderr, "Failed to destroy mutex: %s\n", strerror(k));
  489. abort();
  490. }
  491. exit(EXIT_FAILURE);
  492. }