123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462 |
- // TODO: optionally find and remove headers that match the signature but don't have a corresponding subdirectory
- // TODO: exclusions should apply to generated headers as well???
- #include <iostream>
- #include <string>
- #include <filesystem>
- #include <exception>
- #include <vector>
- #include <algorithm>
- #include "simple/file.hpp" // ropen TODO: remove
- #include "simple/io/open.h" // open mode no_such_entity interface operation
- #include "simple/io/read_iterator.h" // read_iterator
- #include "simple/io/write.h" // write as_byte_range
- #include "simple/io/seek.h" // seek
- #include "simple/io/resize.h" // resize
- #include "simple/support/algorithm.hpp" // set_difference, copy, transform, find, mismatch
- #include "simple/support/tuple_utils.hpp" // transform
- #include "simple/support/function_utils.hpp" // overload transform_arg disjunction_f
- #include "simple/support/iterator.hpp" // *
- #include "simple/support/misc.hpp" // to_
- namespace fs = std::filesystem;
- using namespace std::literals;
- using namespace simple;
- using namespace io;
- using namespace support;
- int debug = 1;
- const char * exclude_filename = ".allinclude_exclude";
- const auto types = std::array{".h"s, ".hpp"s};
- const auto signature = "// this is an auto-generated allinclude\n"s;
- struct include_info
- {
- fs::path directory;
- std::vector<fs::path> includes = {};
- std::array<bool, 2> has_includes = { false, false };
- };
- std::vector<include_info> allincludes;
- std::vector<fs::path> excluded_headers;
- std::array<bool, 2> generate_allinclude_info(fs::path directory)
- {
- if(debug >= 3) std::cerr << "Processing " << directory << '\n';
- include_info info{std::move(directory)};
- auto is_directory = [](auto&& x) { return x.is_directory(); }; // cause it's overloaded
- auto is_header = [](auto&& x)
- {
- return find(types, x.path().extension()) != types.end() &&
- find(excluded_headers, x.path()) == excluded_headers.end();
- };
- // vv COMMENTS vv making_a_case vs_views pp_in_out
- const auto headers_offset = copy(fs::directory_iterator(info.directory),
- out_filter(disjunction_f(is_directory, is_header),
- out_partition(is_directory, offset_iterator(info.includes),
- out_transform(&fs::directory_entry::path, // vv COMMENTS vv implicit_path
- offset_expander(info.includes)
- )))).out.partition_point;
- auto headers_begin = headers_offset.base(); // dreaded iterator incompatibility
- if(debug >= 3) std::cerr << "Filtering directories with extensions" << '\n';
- // ignore directories with extensions
- // ideally we would just assume no extension for directories, but it's awkward with std filesystem and why would you even have a directory with dots
- const auto dirs_end = std::remove_if(info.includes.begin(), headers_begin,
- [](const auto& dir) { return dir.has_extension(); });
- if(debug && dirs_end != headers_begin)
- {
- std::cerr << "WARNING: Ignoring:" << '\n';
- std::copy(dirs_end, headers_begin, std::ostream_iterator<fs::path>(std::cerr, "\n"));
- }
- if(debug >= 3) std::cerr << "Sorting by name and extension" << '\n';
- std::sort(info.includes.begin(), dirs_end);
- std::sort(headers_begin, info.includes.end(), transform_arg{[](const auto& a)
- { return std::make_tuple(a.stem(), a.extension()); }});
- if(debug >= 3) std::cerr << "Filtering headers with subdirectory names" << '\n';
- auto headers_end = set_difference<ignore_count>(
- headers_begin, info.includes.end(), info.includes.begin(), dirs_end, headers_begin,
- transform_arg{&fs::path::stem}
- );
- if(debug >= 4)
- {
- if(info.includes.begin() != dirs_end)
- {
- std::cerr << "Subdirectories: " << '\n';
- std::copy(info.includes.begin(), dirs_end, std::ostream_iterator<fs::path>(std::cerr, "\n"));
- std::cerr << '\n';
- }
- if(headers_begin != headers_end)
- {
- std::cerr << "Includes: " << '\n';
- std::copy(headers_begin, headers_end, std::ostream_iterator<fs::path>(std::cerr, "\n"));
- std::cerr << '\n';
- }
- }
- if(dirs_end != info.includes.begin())
- {
- if(debug >= 3) std::cerr << "Adding appropriate headers for subdirectories" << '\n';
- headers_end = std::transform(offset_iterator(info.includes), offset_iterator(info.includes, dirs_end),
- out_flatten_tuple(out_optional(offset_expander(info.includes, headers_end))),
- [](auto&& dir)
- {
- return transform([&dir](bool has_include, auto&& extension)
- -> std::optional<fs::path>
- {
- if(has_include)
- return dir.replace_extension(extension);
- return std::nullopt;
- }, generate_allinclude_info(dir), types);
- }
- ).out.out.out.base();
- headers_begin = headers_offset.base(); // un-invalidate
- }
- if(debug >= 3) std::cerr << "Cleaning up include list" << '\n';
- headers_end = headers_end - headers_begin + info.includes.begin();
- std::rotate(info.includes.begin(), headers_begin, info.includes.end());
- info.includes.erase(headers_end, info.includes.end());
- if(debug >= 4)
- {
- std::cerr << "Final Includes: " << '\n';
- copy(info.includes, std::ostream_iterator<fs::path>(std::cerr, "\n"));
- std::cerr << '\n';
- }
- if(debug >= 3) std::cerr << "Checking include types" << '\n';
- transform(types, [&](auto&& type)
- { return find_if(info.includes, [&type](auto&& x)
- { return x.extension() == type; }) != info.includes.end(); },
- info.has_includes.begin()
- );
- if(debug >= 4)
- {
- std::cerr << "Has includes: ";
- copy(info.has_includes, std::ostream_iterator<bool>(std::cerr, " "));
- std::cerr << '\n';
- }
- if(debug >= 3) std::cerr << "Done processing: " << info.directory << '\n';
- return allincludes.emplace_back(std::move(info)).has_includes;
- }
- void write_allincludes(bool force = false)
- {
- // TODO: assert only h hpp extensions
- // TODO: for includes assert sorted by stem and extension
- // TODO: assert has_includes is correct??
- transform(allincludes, [](auto&& info)
- {
- if(debug >= 2) std::cerr << "Generating headers for: " << info.directory << '\n';
- return transform( [&info](auto iteration_state, bool has_include, auto&& extension)
- {
- std::string content;
- if(has_include)
- pick_unique(info.includes,
- transform_arg{&fs::path::stem, std::equal_to<>{}},
- [iteration_state](auto range)
- {
- static_assert(iteration_state.index() < 2);
- if constexpr (iteration_state.index() == 0)
- return range.begin();
- else
- return std::prev(range.end());
- },
- out_transform( [&info](auto&& path)
- { return "#include \"" + fs::relative(path,
- info.directory.parent_path()).native() +
- "\"\n"; },
- out_accumulate(content += signature
- )));
- auto allinc = info.directory;
- allinc.replace_extension(extension);
- if(debug >= 2 && not std::empty(content)) std::cerr << "Header: " << allinc << '\n';
- if(debug >= 4 && not std::empty(content)) std::cerr << "Content: " << '\n' << content << '\n';
- // TODO: a conditional function that can merge the two variants
- using read_write_handle_t = meta::prepend_t<open_error_t, interface<operation::read | operation::write>>;
- auto io_handle = std::empty(content)
- ? to_<read_write_handle_t>(open<mode::read, mode::write>(allinc)) // technically only need mode::read here, but prefer common interface
- : to_<read_write_handle_t>(open<mode::read, mode::write, mode::create>(allinc))
- ;
- return std::tuple{allinc, std::move(io_handle), content};
- }, info.has_includes, types);
- },
- out_flatten_tuple(
- out_filter([force, read_buffer = std::array<std::byte, 4096>{}](const auto& file) mutable // TODO: will the copy of the read_buffer array be ellided?
- {
- auto& [path, io_handle, content] = file;
- if(debug >= 2) std::cerr << "Checking header on disk: " << path << '\n';
- return std::visit(overload{
- [](no_such_entity) { if(debug >= 2) std::cerr << "Does not exist." << '\n'; return false; }, // content is also empty cause otherwise we create
- // TODO: c++20 can just capture structured binding without this silliness
- [&,&path=path,&content=content](const interface<operation::read | operation::write>& read_handle)
- {
- read_iterator io_begin(read_handle, as_byte_range(read_buffer)), io_end;
- if(debug >= 2) std::cerr << "Checking signature" << '\n';
- auto signature_data = as_byte_view(signature);
- bool empty = io_begin == io_end;
- auto [data_it, io_it] = support::mismatch(signature_data.begin(), signature_data.end(), std::move(io_begin), io_end);
- // can't tell if there was already an empty file or if we created it so gotta conceder that valid,
- // ideally open would tell if it created the file or not, but that's too much to ask from a C API
- if(data_it != signature_data.end() && not empty)
- {
- if(debug) std::cerr << "WARNING: " << path << " is not an allinclude file." << '\n';
- if(force)
- {
- if(debug) std::cerr << "Overwriting." << '\n';
- return true; // overwrite
- }
- else
- {
- if(debug) std::cerr << "Skipping, use force (-f) to overwrite or exclude it (-x file)." << '\n';
- return false; // skip
- }
- }
- if(debug >= 2) std::cerr << "Checking content" << '\n';
- bool should_write = false;
- if(std::empty(content))
- {
- should_write = true; // got to delete
- }
- else
- {
- auto content_data = as_byte_view(content);
- // this is `should_write = not equal(data.begin(),data.end(),io_it,io_end)`, except need to get io iterator out to check for errors below
- auto mismatch = std::tie(data_it, io_it) = support::mismatch(content_data.begin() + signature.size(), content_data.end(), std::move(io_it), io_end);
- should_write = not (mismatch == std::forward_as_tuple(content_data.end(), io_end)); // got to update, if found a mismatch
- }
- if(io_it.result_index() != 0)
- // TODO: still not perfect, cause have no object to get_message from
- // theoretically since the type indicates a specific error it shouldn't be a problem,
- // but will need a bit of an overhaul of errors in simple::io to support that
- // either way we abandon ship, cause something must've went totally wrong
- throw std::runtime_error("Failed to read "s + path.native());
- return should_write;
- },
- [&path=path](error e) { throw std::runtime_error("Failed to open "s + path.native() + "\n Error: " + get_message(e)); return false; }
- }, io_handle);
- },
- out_invoke([](const auto& file)
- {
- auto& [path, io_handle, content] = file;
- if(std::empty(content))
- {
- if(debug) std::cerr << "Removing: " << path << '\n';
- fs::remove(path);
- }
- else
- {
- if(debug) std::cerr << "Writing: " << path << '\n';
- auto& write_handle = get<0>(io_handle);
- if(seek(write_handle, 0).index() != 0)
- throw std::runtime_error("Failed to seek "s + path.native());
- auto content_data = as_byte_view(content);
- std::visit(overload{
- // TODO: c++20 can just capture structured binding without this silliness
- [&path=path](error) { throw std::runtime_error("Failed to write "s + path.native()); },
- [&,&path=path](const std::byte* written) { if(written != content_data.end()) throw std::runtime_error("Not all data written to "s + path.native()); }
- }, write(write_handle, content_data));
- ;
- if(resize(write_handle, content.size()).index() != 0)
- throw std::runtime_error("Failed to resize "s + path.native());
- }
- }
- ))));
- }
- int main(int argc, char const* argv[]) try
- {
- assert(std::is_sorted(types.begin(), types.end()));
- assert(argc > 0); ++argv; --argc; // stupid first arg is stupid
- if(std::find(argv, argv+argc, "-h"s) != argv+argc)
- {
- get<0>(write(standard_error, as_byte_view("allinclude [OPTIONS]\n"
- "OPTIONS:\n"
- " -h show this help\n"
- " -t dir target directory (default: current path)\n"
- " -f force overwrite\n"
- " -x file exclude a header file\n"
- " -q no logs\n"
- " -v some logs\n"
- " -vv a lot of logs\n"
- " -vvv spam logs\n"
- "\n"
- "CONFIGURATION FILES:\n"
- " .allinclude_exclude newline separated list of header files to exclude\n"
- )));
- return 0;
- }
- debug = (std::find(argv, argv+argc, "-vvv"s) != argv+argc) ? 4 :
- (std::find(argv, argv+argc, "-vv"s) != argv+argc) ? 3:
- (std::find(argv, argv+argc, "-v"s) != argv+argc) ? 2 :
- (std::find(argv, argv+argc, "-q"s) != argv+argc) ? 0 :
- 1;
- if(debug >= 3) std::cerr << "Reading target argument" << '\n';
- auto target_arg = std::find(argv, argv+argc, "-t"s);
- fs::path target = target_arg < (argv+argc-1) ? *(target_arg + 1) : fs::current_path();
- if(!fs::is_directory(target))
- {
- std::cerr << target << " is not a directory!" << '\n';
- return -1;
- }
- if(debug >= 3) std::cerr << "Reading exclude arguments" << '\n';
- {
- auto x = std::find(argv, argv+argc, "-x"s);
- while(x < argc+argv-1)
- {
- ++x;
- excluded_headers.emplace_back(*x);
- x = std::find(x+1, argv+argc, "-x"s);
- }
- }
- if(debug >= 3) std::cerr << "Reading exclude configuration file: " << exclude_filename << '\n';
- if(fs::exists(exclude_filename))
- {
- auto exclude_file = file::ropen(exclude_filename);
- std::string line;
- while(std::getline(exclude_file, line))
- excluded_headers.emplace_back(line);
- }
- if(debug >= 2)
- {
- std::cerr << "Target: " << target << '\n';
- std::cerr << "Debug level: " << debug << '\n';
- if(not std::empty(excluded_headers))
- {
- std::cerr << "Excluding: " << '\n';
- copy(excluded_headers, std::ostream_iterator<fs::path>(std::cerr, "\n"));
- std::cerr << '\n';
- }
- }
- if(debug >= 2) std::cerr << "Analyzing directory structure" << '\n';
- generate_allinclude_info(target);
- if(debug >= 2) std::cerr << "Writing include files" << '\n';
- write_allincludes(std::find(argv, argv+argc, "-f"s) != argv+argc);
- }
- catch(const std::exception& ex)
- {
- if(errno)
- std::perror("Error: ");
- std::cerr << "Exception: " << ex.what() << '\n';
- throw;
- }
- // ^^ COMMENTS ^^
- //
- // implicit_path: woopsies, didn't know entry implicitly converts to path, ha
- // ha, stupid stupid me, how shameful, oh my, I guess I gotta remove this line
- // now... no! transform stays! implicit conversion should go! >:( let me see
- // that defect report now :V
- //
- // making_a_case:
- // this abomination replaces the following "simple" loop
- // offset_iterator headers_offset(includes);
- // for(auto&& entry : fs::directory_iterator(info.directory))
- // {
- // // filter
- // if(is_directory(entry) || is_header(entry))
- // {
- // // transform + expand
- // includes.push_back(entry.path());
- //
- // // partition
- // if(is_directory(entry))
- // std::iter_swap(headers_offset++, includes.end() - 1);
- // }
- // }
- // what's the point you would ask... well, the abomination reads better, you
- // can take in what's happening in a more modular sequential way, it's as if
- // the list of directory entries were filtered, then partitioned, then
- // transformed, then pushed into the vector, while in reality it's an
- // amalgamated mess that you can see in the for loop - the filter is a block,
- // so have to follow brackets and indentation (simple enough in this case, but
- // you still have to do it), next comes the push_back with a rather easy to
- // miss transform tucked in, and then bam! a partition(which without the
- // comments would need to be identified first) using information from entry
- // prior to transformation... also it's apparent that the headers_offset is
- // the partition point returned by this algorithm, while with the for loop it's
- // a mutable state that you also need to track
- // is it easier to write? no, and that's fine. first of all you need to
- // identify these algorithms, then you need to know what's available in your
- // toolbox, or potentially add new tools, then you need to deal with function
- // object boilerplate, then you need to close all those parens at the end (you
- // can go nuts overloading a binary operator to not have to do that one simple
- // thing, but is it worth it?), then you need to dig into the resulting
- // iterator to get whatever you need out of it, but that's all fine, cause it's
- // even harder to make non obvious mistakes there, both at first and afterwards
- // every time this is revisited
- // is it zero cost theoretically? offset_expander is an overkill (unavoidable
- // branch on proxy assignment), but something lighter could be used here - an
- // iterator that always pushes back when assigned and always returns the last
- // element when read... call it just plain expander I guess... or back_inserter
- // if we're being bold... should also give lower/upper_bound_inserter a try,
- // something I wanted since forever, but never knew how to go about, so I would
- // say yes it is theoretically zero cost compared to the naive loop
- // looking at the loop and forgetting about the algorithmic pipeline you are
- // tempted to "optimize" it... checking is_directory twice, feels bad man, and
- // those swap shenanigans, we just want directories first and headers last,
- // right?
- // int headers_offset = 0;
- // for(auto&& entry : fs::directory_iterator(info.directory))
- // {
- // if(is_directory(entry))
- // {
- // includes.insert(entry.path());
- // ++headers_offset = 0;
- // }
- // else if(is_header(entry))
- // {
- // includes.push_back(entry.path());
- // }
- // }
- // then you might even think that insert is a problem, and replace vector with
- // deque... sad sad story... and there are people who think that this is
- // actually better than having to learn algorithms... cause you know we gotta
- // hire sum javascript ninjas and have them be "productive" from day one...
- //
- // vs_views: this kind of output iterator chaining maps to loop body much more
- // directly than the standard (or ranges v3) views, which are more like
- // manipulating the loop statement itself and can often be hard to reason about
- // without going into implementation details, and also can't cover this
- // particular use case afaik
- //
- // pp_in_out:
- // i want my pp in 'n out
|