123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539 |
- /* SPDX-FileCopyrightText: 2021 John Scott <jscott@posteo.net>
- * SPDX-License-Identifier: GPL-3.0-or-later
- *
- * We assume about the locale that, when converting a multibyte character
- * to a wide character, conversion of a complete character always results in
- * the initial shift state. I don't think Flex can cope with encodings
- * where this isn't the case anyway. */
- %option reentrant noyywrap extra-type="struct count*"
- %top{
- #define _POSIX_C_SOURCE 200809L
- }
- %{
- #include <assert.h>
- #include <errno.h>
- #include <locale.h>
- #include <pthread.h>
- #include <semaphore.h>
- #include <stdbool.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <sys/stat.h>
- #include <unistd.h>
- #include <wchar.h>
- struct count {
- size_t lines;
- size_t words;
- size_t chars;
- };
- enum {
- BYTES = 1,
- LINES = 2,
- CHARS = 4,
- WORDS = 8
- };
- /* This is a bitwise combination of the prior options which
- * were specified on the command-line. */
- static int params;
- /* Trying to cancel a thread which isn't actually running is undefined behavior.
- * We follow Ulrich Drepper's recommendations for dealing with this problem:
- * https://udrepper.livejournal.com/16844.html (note that the relevant text
- * for ESRCH has since changed in POSIX.1, but not in a way that defeats his point).
- * This is a list of booleans indicating whether a thread is running or not. */
- static bool *thread_is_running;
- /* We want to sequentially number our child threads starting from zero so that
- * we can use these numbers as indices into thread_is_running. This is a global
- * that we can set just prior to a child starting which indicates the (n-1)'th
- * child thread we've started, and which the child can read to figure out its
- * own place in thread_is_running. Note that an int is suitable since we never
- * start more than argc threads. */
- static int seq_thread_id;
- /* We don't want to change seq_thread_id until the child thread we just started
- * has had the chance to read it. Our parent will wait for this semaphore to be
- * unlocked before clobbering it to make a new thread; the child will unlock it
- * after it has gotten to take a peek at seq_thread_id. */
- static sem_t seq_thread_id_guard;
- /* This mutex shall be locked by a thread just before it sets thread_is_running[j]
- * to false and bails out. The effect is that if, in the main thread, we lock this
- * mutex, then all threads are frozen: any threads that want to exit will be blocked
- * on us unlocking the mutex, hence we are guaranteed that none will exit by surprise
- * on us and we can safely cancel them. We are fortunate pthread_mutex_lock() is *not*
- * a cancellation point, otherwise this won't quite work like we want it to if multiple
- * consecutive cancellation requests are sent (which we don't do). */
- static pthread_mutex_t thread_is_running_guard = PTHREAD_MUTEX_INITIALIZER;
- %}
- %%
- [^[:blank:]\n]+ {
- if(yyextra->words == SIZE_MAX) {
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
- ? "Failed to count words\n"
- : "Failed to count words: %s\n", errmsg);
- return 1;
- }
- yyextra->words++;
- if(params & CHARS) {
- const size_t l = mbsrtowcs(NULL, &(const char*){yytext}, 0, &(mbstate_t){0});
- if(l == (size_t)-1) {
- perror("Failed to read multibyte string");
- return 1;
- } else if(yyextra->chars > SIZE_MAX - l) {
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
- ? "Failed to count characters\n"
- : "Failed to count characters: %s\n", errmsg);
- return 1;
- }
- yyextra->chars += l;
- } else {
- #pragma GCC diagnostic ignored "-Wsign-compare"
- if(yyleng > SIZE_MAX || yyextra->chars > SIZE_MAX - yyleng) {
- #pragma GCC diagnostic pop
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
- ? "Failed to count bytes\n"
- : "Failed to count bytes: %s\n", errmsg);
- return 1;
- }
- yyextra->chars += yyleng;
- }
- }
- \n {
- if(yyextra->chars == SIZE_MAX) {
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
- ? "Failed to count bytes/characters\n"
- : "Failed to count bytes/characters: %s\n", errmsg);
- return 1;
- }
- assert(yyextra->lines < SIZE_MAX);
- yyextra->chars++;
- yyextra->lines++;
- }
- . {
- if(params & CHARS) {
- #pragma GCC diagnostic ignored "-Wsign-compare"
- assert(yyleng <= SIZE_MAX);
- #pragma GCC diagnostic pop
- const size_t l = mbrlen(yytext, yyleng, &(mbstate_t){0});
- if(l == (size_t)-2) {
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EILSEQ, errmsg, sizeof(errmsg))
- ? "Failed to read complete multibyte character\n"
- : "Failed to read complete multibyte character: %s\n", errmsg);
- return 1;
- } else if(l == (size_t)-1) {
- perror("Failed to read multibyte character");
- return 1;
- } else if(yyextra->chars > SIZE_MAX - l) {
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
- ? "Failed to count characters\n"
- : "Failed to count characters: %s\n", errmsg);
- return 1;
- }
- yyextra->chars += l;
- } else if(yyextra->chars > SIZE_MAX - yyleng) {
- char errmsg[NL_TEXTMAX];
- fprintf(stderr, strerror_r(EOVERFLOW, errmsg, sizeof(errmsg))
- ? "Failed to count bytes\n"
- : "Failed to count bytes: %s\n", errmsg);
- return 1;
- }
- yyextra->chars += yyleng;
- }
- %%
- /* We rely on these types being the same in order that destroy_scanner()
- * has a prototype compatible with pthread_cleanup_{push, pop}. */
- static_assert(_Generic((yyscan_t){0}, void*: true, default: false), "yyscan_t != void*");
- static void destroy_scanner(yyscan_t scanner) {
- if(yylex_destroy(scanner)) {
- perror("Failed to destroy scanner");
- abort();
- }
- }
- static void destroy_file(void *stream) {
- if(fclose(stream) == EOF) {
- perror("Failed to close stream");
- abort();
- }
- }
- static void dfree(void *ptr) {
- free(*(char**)ptr);
- }
- static void thread_stop(void *index) {
- int k = pthread_mutex_lock(&thread_is_running_guard);
- if(k) {
- char errstr[NL_TEXTMAX];
- fprintf(stderr, strerror_r(k, errstr, sizeof(errstr))
- ? "Failed to lock mutex\n"
- : "Failed to lock mutex: %s\n", errstr);
- abort();
- }
- assert(thread_is_running[*(int*)index]);
- thread_is_running[*(int*)index] = false;
- k = pthread_mutex_unlock(&thread_is_running_guard);
- if(k) {
- abort();
- }
- }
- /* According to the command-line options, print the count of
- * what was found with an optional filename, which may be omitted. */
- static bool show_count(const struct count a[restrict static 1], const char *restrict filename) {
- char *buf = NULL;
- size_t buflen;
- FILE *const memstream = open_memstream(&buf, &buflen);
- if(!memstream) {
- perror("Failed to create memory stream");
- return false;
- }
- bool we_good = true;
- pthread_cleanup_push(dfree, &buf)
- pthread_cleanup_push(destroy_file, memstream);
- int k = 1;
- bool started = false;
- if(params & LINES) {
- k = fprintf(memstream, "%zu", a->lines);
- started = true;
- }
- if(k > 0 && params & WORDS) {
- k = fprintf(memstream, started ? " %zu" : "%zu", a->words);
- started = true;
- }
- if(k > 0 && (params & BYTES || params & CHARS)) {
- fprintf(memstream, started ? " %zu" : "%zu", a->chars);
- }
- if(k < 0 || (filename && (putc(' ', memstream) == EOF || fputs(filename, memstream) == EOF))) {
- perror("Failed to print count");
- we_good = false;
- }
- pthread_cleanup_pop(true);
- if(we_good && puts(buf) == EOF) {
- perror("Failed to print count");
- we_good = false;
- }
- pthread_cleanup_pop(true);
- return we_good;
- }
- /* Given a filename, print the count of lines, words, and bytes/characters
- * in it as appropriate. Also return a pointer to a dynamically-allocated
- * struct count describing what was found so we can get a total later.
- * If filename is NULL, use standard input.
- * Return NULL on error. */
- void *do_count(void *filename) {
- int my_seq_thread_id = seq_thread_id;
- if(sem_post(&seq_thread_id_guard) == -1) {
- perror("Failed to increment semaphore");
- abort();
- }
- int k = pthread_mutex_lock(&thread_is_running_guard);
- if(k) {
- char errstr[NL_TEXTMAX];
- fprintf(stderr, strerror_r(k, errstr, sizeof(errstr))
- ? "Failed to lock mutex\n"
- : "Failed to lock mutex: %s\n", errstr);
- abort();
- }
- assert(!thread_is_running[my_seq_thread_id]);
- thread_is_running[my_seq_thread_id] = true;
- k = pthread_mutex_unlock(&thread_is_running_guard);
- if(k) {
- abort();
- }
- /* This has to be declared up here because we want c to
- * have scope outside of all of the pthread_cleanup_{push, pop} calls. */
- struct count *c;
- pthread_cleanup_push(thread_stop, &my_seq_thread_id);
- c = calloc(1, sizeof(*c));
- if(!c) {
- perror("Failed to allocate memory for count");
- pthread_exit(NULL);
- }
- pthread_cleanup_push(free, c);
- yyscan_t scanner;
- if(yylex_init_extra(c, &scanner)) {
- perror("Failed to initialize scanner");
- pthread_exit(NULL);
- }
- pthread_cleanup_push(destroy_scanner, scanner);
- FILE *stream = filename ? fopen(filename, "r") : stdin;
- if(!stream) {
- char errstr[NL_TEXTMAX];
- #pragma GCC diagnostic ignored "-Wformat"
- fprintf(stderr, strerror_r(errno, errstr, sizeof(errstr))
- ? "Failed to open %s\n"
- : "Failed to open %s: %s\n", filename, errstr);
- #pragma GCC diagnostic pop
- pthread_exit(NULL);
- }
- pthread_cleanup_push(destroy_file, stream);
- struct stat details;
- const int fd = fileno(stream);
- if(fd == -1) {
- perror("Failed to query file destriptor for stream");
- pthread_exit(NULL);
- }
- if(fstat(fd, &details) == -1) {
- char errstr[NL_TEXTMAX];
- #pragma GCC diagnostic ignored "-Wformat"
- fprintf(stderr, strerror_r(errno, errstr, sizeof(errstr))
- ? "Failed to get details on %s\n"
- : "Failed to get details on %s: %s\n", filename, errstr);
- #pragma GCC diagnostic pop
- pthread_exit(NULL);
- }
- if(S_ISDIR(details.st_mode)) {
- char errstr[NL_TEXTMAX];
- #pragma GCC diagnostic ignored "-Wformat"
- fprintf(stderr, strerror_r(EISDIR, errstr, sizeof(errstr))
- ? "Failed to open %s\n"
- : "Failed to open %s: %s\n", filename, errstr);
- #pragma GCC diagnostic pop
- pthread_exit(NULL);
- }
- yyset_in(stream, scanner);
- if(yylex(scanner) || !show_count(c, filename)) {
- pthread_exit(NULL);
- }
- pthread_cleanup_pop(true); /* destroy_file(stream) */
- pthread_cleanup_pop(true); /* destroy_scanner(scanner) */
- pthread_cleanup_pop(false); /* DO NOT free(c) */
- pthread_cleanup_pop(true); /* thread_stop(&my_seq_thread_id) */
- pthread_exit(c);
- }
- int main(int argc, char *argv[]) {
- if(!setlocale(LC_ALL, "")) {
- fputs("Failed to enable default locale\n", stderr);
- exit(EXIT_FAILURE);
- }
- int opt;
- while((opt = getopt(argc, argv, "clmw")) != -1) {
- switch(opt) {
- case 'c':
- params |= BYTES;
- if(params & CHARS) {
- case 'm':
- params |= CHARS;
- if(params & BYTES) {
- fputs("-c and -m may not both be specified\n", stderr);
- exit(EXIT_FAILURE);
- }
- }
- break;
- case 'l':
- params |= LINES;
- break;
- case 'w':
- params |= WORDS;
- break;
- case '?':
- goto endthread_is_running_guard;
- }
- }
- argc -= optind;
- argv += optind;
- if(!params) {
- params = WORDS|LINES|BYTES;
- }
- if(!argc) {
- argc++;
- /* argv[0] is NULL, so this will do the right thing
- * when creating the child thread. */
- }
- if(setvbuf(stdout, NULL, _IONBF, 0)) {
- fputs("Failed to disable buffering on standard output\n", stderr);
- exit(EXIT_FAILURE);
- }
- /* To make cleanup easier, we do all of our dynamic memory
- * allocations before we get any threads started. */
- pthread_t *ids = calloc(argc, sizeof(*ids));
- if(!ids) {
- perror("Failed to allocate thread ID list");
- goto endthread_is_running_guard;
- }
- void **counts = calloc(argc, sizeof(*counts));
- if(!counts) {
- perror("Failed to allocate count list");
- goto endids;
- }
- thread_is_running = calloc(argc, sizeof(*thread_is_running));
- if(!thread_is_running) {
- perror("Failed to allocate running thread list");
- goto endcounts;
- }
- if(sem_init(&seq_thread_id_guard, false, 1U) == -1) {
- perror("Failed to initialize semaphore");
- goto endthread_is_running;
- }
- for(int i = 0; i < argc; i++) {
- if(sem_wait(&seq_thread_id_guard) == -1) {
- perror("Failed to wait on semaphore");
- abort();
- }
- seq_thread_id = i;
- int k;
- tryagain:
- k = pthread_create(ids + i, NULL, do_count, argv[i]);
- switch(k) {
- case 0:
- break;
- case EAGAIN:
- if(sched_yield() == -1) {
- perror("Failed to yield");
- }
- goto tryagain;
- default:
- fprintf(stderr, "Failed to create thread: %s\n", strerror(k));
- goto bail;
- }
- }
- if(sem_destroy(&seq_thread_id_guard) == -1) {
- perror("Failed to destroy semaphore");
- abort();
- }
- for(int i = 0; i < argc; i++) {
- int k = pthread_join(ids[i], counts + i);
- if(k) {
- fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
- abort();
- }
- if(!counts[i]) {
- for(int j = i + 1; j < argc; j++) {
- k = pthread_cancel(ids[j]);
- if(k) {
- fprintf(stderr, "Failed to cancel thread: %s\n", strerror(k));
- abort();
- }
- }
- void *retval;
- for(int j = i + 1; j < argc; j++) {
- k = pthread_join(ids[j], &retval);
- if(k) {
- fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
- abort();
- }
- if(retval != PTHREAD_CANCELED) {
- free(retval);
- }
- }
- goto endseq_thread_id_guard;
- }
- }
- free(ids);
- free(thread_is_running);
- int k = pthread_mutex_destroy(&thread_is_running_guard);
- if(k) {
- fprintf(stderr, "Failed to destroy mutex: %s\n", strerror(k));
- abort();
- }
- struct count total = {0};
- for(int i = 0; i < argc; i++) {
- total.lines += ((struct count*)counts[i])->lines;
- total.words += ((struct count*)counts[i])->words;
- total.chars += ((struct count*)counts[i])->chars;
- free(counts[i]);
- }
- free(counts);
- if(argc >= 2) {
- exit(show_count(&total, "total") ? EXIT_SUCCESS : EXIT_FAILURE);
- }
- exit(EXIT_SUCCESS);
- bail:
- k = pthread_mutex_lock(&thread_is_running_guard);
- if(k) {
- fprintf(stderr, "Failed to lock mutex: %s\n", strerror(k));
- abort();
- }
- for(int i = 0; i < argc; i++) {
- if(thread_is_running[i]) {
- int k = pthread_cancel(ids[i]);
- if(k) {
- fprintf(stderr, "Failed to cancel thread %d: %s\n", i, strerror(k));
- abort();
- }
- }
- }
- k = pthread_mutex_unlock(&thread_is_running_guard);
- if(k) {
- fprintf(stderr, "Failed to unlock mutex: %s\n", strerror(k));
- abort();
- }
- void *retval;
- for(int i = 0; i < argc; i++) {
- k = pthread_join(ids[i], &retval);
- if(k) {
- fprintf(stderr, "Failed to join with thread: %s\n", strerror(k));
- abort();
- }
- if(retval != PTHREAD_CANCELED) {
- free(retval);
- }
- }
- endseq_thread_id_guard:
- if(sem_destroy(&seq_thread_id_guard) == -1) {
- perror("Failed to destroy semaphore");
- abort();
- }
- endthread_is_running:
- free(thread_is_running);
- endcounts:
- free(counts);
- endids:
- free(ids);
- endthread_is_running_guard:
- k = pthread_mutex_destroy(&thread_is_running_guard);
- if(k) {
- fprintf(stderr, "Failed to destroy mutex: %s\n", strerror(k));
- abort();
- }
- exit(EXIT_FAILURE);
- }
|