allinclude.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. // TODO: optionally find and remove headers that match the signature but don't have a corresponding subdirectory
  2. // TODO: exclusions should apply to generated headers as well???
  3. #include <iostream>
  4. #include <string>
  5. #include <filesystem>
  6. #include <exception>
  7. #include <vector>
  8. #include <algorithm>
  9. #include "simple/file.hpp" // ropen TODO: remove
  10. #include "simple/io/open.h" // open mode no_such_entity interface operation
  11. #include "simple/io/read_iterator.h" // read_iterator
  12. #include "simple/io/write.h" // write as_byte_range
  13. #include "simple/io/seek.h" // seek
  14. #include "simple/io/resize.h" // resize
  15. #include "simple/support/algorithm.hpp" // set_difference, copy, transform, find, mismatch
  16. #include "simple/support/tuple_utils.hpp" // transform
  17. #include "simple/support/function_utils.hpp" // overload transform_arg disjunction_f
  18. #include "simple/support/iterator.hpp" // *
  19. #include "simple/support/misc.hpp" // to_
  20. namespace fs = std::filesystem;
  21. using namespace std::literals;
  22. using namespace simple;
  23. using namespace io;
  24. using namespace support;
  25. int debug = 1;
  26. const char * exclude_filename = ".allinclude_exclude";
  27. const auto types = std::array{".h"s, ".hpp"s};
  28. const auto signature = "// this is an auto-generated allinclude\n"s;
  29. struct include_info
  30. {
  31. fs::path directory;
  32. std::vector<fs::path> includes = {};
  33. std::array<bool, 2> has_includes = { false, false };
  34. };
  35. std::vector<include_info> allincludes;
  36. std::vector<fs::path> excluded_headers;
  37. std::array<bool, 2> generate_allinclude_info(fs::path directory)
  38. {
  39. if(debug >= 3) std::cerr << "Processing " << directory << '\n';
  40. include_info info{std::move(directory)};
  41. auto is_directory = [](auto&& x) { return x.is_directory(); }; // cause it's overloaded
  42. auto is_header = [](auto&& x)
  43. {
  44. return find(types, x.path().extension()) != types.end() &&
  45. find(excluded_headers, x.path()) == excluded_headers.end();
  46. };
  47. // vv COMMENTS vv making_a_case vs_views pp_in_out
  48. const auto headers_offset = copy(fs::directory_iterator(info.directory),
  49. out_filter(disjunction_f(is_directory, is_header),
  50. out_partition(is_directory, offset_iterator(info.includes),
  51. out_transform(&fs::directory_entry::path, // vv COMMENTS vv implicit_path
  52. offset_expander(info.includes)
  53. )))).out.partition_point;
  54. auto headers_begin = headers_offset.base(); // dreaded iterator incompatibility
  55. if(debug >= 3) std::cerr << "Filtering directories with extensions" << '\n';
  56. // ignore directories with extensions
  57. // ideally we would just assume no extension for directories, but it's awkward with std filesystem and why would you even have a directory with dots
  58. const auto dirs_end = std::remove_if(info.includes.begin(), headers_begin,
  59. [](const auto& dir) { return dir.has_extension(); });
  60. if(debug && dirs_end != headers_begin)
  61. {
  62. std::cerr << "WARNING: Ignoring:" << '\n';
  63. std::copy(dirs_end, headers_begin, std::ostream_iterator<fs::path>(std::cerr, "\n"));
  64. }
  65. if(debug >= 3) std::cerr << "Sorting by name and extension" << '\n';
  66. std::sort(info.includes.begin(), dirs_end);
  67. std::sort(headers_begin, info.includes.end(), transform_arg{[](const auto& a)
  68. { return std::make_tuple(a.stem(), a.extension()); }});
  69. if(debug >= 3) std::cerr << "Filtering headers with subdirectory names" << '\n';
  70. auto headers_end = set_difference<ignore_count>(
  71. headers_begin, info.includes.end(), info.includes.begin(), dirs_end, headers_begin,
  72. transform_arg{&fs::path::stem}
  73. );
  74. if(debug >= 4)
  75. {
  76. if(info.includes.begin() != dirs_end)
  77. {
  78. std::cerr << "Subdirectories: " << '\n';
  79. std::copy(info.includes.begin(), dirs_end, std::ostream_iterator<fs::path>(std::cerr, "\n"));
  80. std::cerr << '\n';
  81. }
  82. if(headers_begin != headers_end)
  83. {
  84. std::cerr << "Includes: " << '\n';
  85. std::copy(headers_begin, headers_end, std::ostream_iterator<fs::path>(std::cerr, "\n"));
  86. std::cerr << '\n';
  87. }
  88. }
  89. if(dirs_end != info.includes.begin())
  90. {
  91. if(debug >= 3) std::cerr << "Adding appropriate headers for subdirectories" << '\n';
  92. headers_end = std::transform(offset_iterator(info.includes), offset_iterator(info.includes, dirs_end),
  93. out_flatten_tuple(out_optional(offset_expander(info.includes, headers_end))),
  94. [](auto&& dir)
  95. {
  96. return transform([&dir](bool has_include, auto&& extension)
  97. -> std::optional<fs::path>
  98. {
  99. if(has_include)
  100. return dir.replace_extension(extension);
  101. return std::nullopt;
  102. }, generate_allinclude_info(dir), types);
  103. }
  104. ).out.out.out.base();
  105. headers_begin = headers_offset.base(); // un-invalidate
  106. }
  107. if(debug >= 3) std::cerr << "Cleaning up include list" << '\n';
  108. headers_end = headers_end - headers_begin + info.includes.begin();
  109. std::rotate(info.includes.begin(), headers_begin, info.includes.end());
  110. info.includes.erase(headers_end, info.includes.end());
  111. if(debug >= 4)
  112. {
  113. std::cerr << "Final Includes: " << '\n';
  114. copy(info.includes, std::ostream_iterator<fs::path>(std::cerr, "\n"));
  115. std::cerr << '\n';
  116. }
  117. if(debug >= 3) std::cerr << "Checking include types" << '\n';
  118. transform(types, [&](auto&& type)
  119. { return find_if(info.includes, [&type](auto&& x)
  120. { return x.extension() == type; }) != info.includes.end(); },
  121. info.has_includes.begin()
  122. );
  123. if(debug >= 4)
  124. {
  125. std::cerr << "Has includes: ";
  126. copy(info.has_includes, std::ostream_iterator<bool>(std::cerr, " "));
  127. std::cerr << '\n';
  128. }
  129. if(debug >= 3) std::cerr << "Done processing: " << info.directory << '\n';
  130. return allincludes.emplace_back(std::move(info)).has_includes;
  131. }
  132. void write_allincludes(bool force = false)
  133. {
  134. // TODO: assert only h hpp extensions
  135. // TODO: for includes assert sorted by stem and extension
  136. // TODO: assert has_includes is correct??
  137. transform(allincludes, [](auto&& info)
  138. {
  139. if(debug >= 2) std::cerr << "Generating headers for: " << info.directory << '\n';
  140. return transform( [&info](auto iteration_state, bool has_include, auto&& extension)
  141. {
  142. std::string content;
  143. if(has_include)
  144. pick_unique(info.includes,
  145. transform_arg{&fs::path::stem, std::equal_to<>{}},
  146. [iteration_state](auto range)
  147. {
  148. static_assert(iteration_state.index() < 2);
  149. if constexpr (iteration_state.index() == 0)
  150. return range.begin();
  151. else
  152. return std::prev(range.end());
  153. },
  154. out_transform( [&info](auto&& path)
  155. { return "#include \"" + fs::relative(path,
  156. info.directory.parent_path()).native() +
  157. "\"\n"; },
  158. out_accumulate(content += signature
  159. )));
  160. auto allinc = info.directory;
  161. allinc.replace_extension(extension);
  162. if(debug >= 2 && not std::empty(content)) std::cerr << "Header: " << allinc << '\n';
  163. if(debug >= 4 && not std::empty(content)) std::cerr << "Content: " << '\n' << content << '\n';
  164. // TODO: a conditional function that can merge the two variants
  165. using read_write_handle_t = meta::prepend_t<open_error_t, interface<operation::read | operation::write>>;
  166. auto io_handle = std::empty(content)
  167. ? to_<read_write_handle_t>(open<mode::read, mode::write>(allinc)) // technically only need mode::read here, but prefer common interface
  168. : to_<read_write_handle_t>(open<mode::read, mode::write, mode::create>(allinc))
  169. ;
  170. return std::tuple{allinc, std::move(io_handle), content};
  171. }, info.has_includes, types);
  172. },
  173. out_flatten_tuple(
  174. out_filter([force, read_buffer = std::array<std::byte, 4096>{}](const auto& file) mutable // TODO: will the copy of the read_buffer array be ellided?
  175. {
  176. auto& [path, io_handle, content] = file;
  177. if(debug >= 2) std::cerr << "Checking header on disk: " << path << '\n';
  178. return std::visit(overload{
  179. [](no_such_entity) { if(debug >= 2) std::cerr << "Does not exist." << '\n'; return false; }, // content is also empty cause otherwise we create
  180. // TODO: c++20 can just capture structured binding without this silliness
  181. [&,&path=path,&content=content](const interface<operation::read | operation::write>& read_handle)
  182. {
  183. read_iterator io_begin(read_handle, as_byte_range(read_buffer)), io_end;
  184. if(debug >= 2) std::cerr << "Checking signature" << '\n';
  185. auto signature_data = as_byte_view(signature);
  186. bool empty = io_begin == io_end;
  187. auto [data_it, io_it] = support::mismatch(signature_data.begin(), signature_data.end(), std::move(io_begin), io_end);
  188. // can't tell if there was already an empty file or if we created it so gotta conceder that valid,
  189. // ideally open would tell if it created the file or not, but that's too much to ask from a C API
  190. if(data_it != signature_data.end() && not empty)
  191. {
  192. if(debug) std::cerr << "WARNING: " << path << " is not an allinclude file." << '\n';
  193. if(force)
  194. {
  195. if(debug) std::cerr << "Overwriting." << '\n';
  196. return true; // overwrite
  197. }
  198. else
  199. {
  200. if(debug) std::cerr << "Skipping, use force (-f) to overwrite or exclude it (-x file)." << '\n';
  201. return false; // skip
  202. }
  203. }
  204. if(debug >= 2) std::cerr << "Checking content" << '\n';
  205. bool should_write = false;
  206. if(std::empty(content))
  207. {
  208. should_write = true; // got to delete
  209. }
  210. else
  211. {
  212. auto content_data = as_byte_view(content);
  213. // this is `should_write = not equal(data.begin(),data.end(),io_it,io_end)`, except need to get io iterator out to check for errors below
  214. auto mismatch = std::tie(data_it, io_it) = support::mismatch(content_data.begin() + signature.size(), content_data.end(), std::move(io_it), io_end);
  215. should_write = not (mismatch == std::forward_as_tuple(content_data.end(), io_end)); // got to update, if found a mismatch
  216. }
  217. if(io_it.result_index() != 0)
  218. // TODO: still not perfect, cause have no object to get_message from
  219. // theoretically since the type indicates a specific error it shouldn't be a problem,
  220. // but will need a bit of an overhaul of errors in simple::io to support that
  221. // either way we abandon ship, cause something must've went totally wrong
  222. throw std::runtime_error("Failed to read "s + path.native());
  223. return should_write;
  224. },
  225. [&path=path](error e) { throw std::runtime_error("Failed to open "s + path.native() + "\n Error: " + get_message(e)); return false; }
  226. }, io_handle);
  227. },
  228. out_invoke([](const auto& file)
  229. {
  230. auto& [path, io_handle, content] = file;
  231. if(std::empty(content))
  232. {
  233. if(debug) std::cerr << "Removing: " << path << '\n';
  234. fs::remove(path);
  235. }
  236. else
  237. {
  238. if(debug) std::cerr << "Writing: " << path << '\n';
  239. auto& write_handle = get<0>(io_handle);
  240. if(seek(write_handle, 0).index() != 0)
  241. throw std::runtime_error("Failed to seek "s + path.native());
  242. auto content_data = as_byte_view(content);
  243. std::visit(overload{
  244. // TODO: c++20 can just capture structured binding without this silliness
  245. [&path=path](error) { throw std::runtime_error("Failed to write "s + path.native()); },
  246. [&,&path=path](const std::byte* written) { if(written != content_data.end()) throw std::runtime_error("Not all data written to "s + path.native()); }
  247. }, write(write_handle, content_data));
  248. ;
  249. if(resize(write_handle, content.size()).index() != 0)
  250. throw std::runtime_error("Failed to resize "s + path.native());
  251. }
  252. }
  253. ))));
  254. }
  255. int main(int argc, char const* argv[]) try
  256. {
  257. assert(std::is_sorted(types.begin(), types.end()));
  258. assert(argc > 0); ++argv; --argc; // stupid first arg is stupid
  259. if(std::find(argv, argv+argc, "-h"s) != argv+argc)
  260. {
  261. get<0>(write(standard_error, as_byte_view("allinclude [OPTIONS]\n"
  262. "OPTIONS:\n"
  263. " -h show this help\n"
  264. " -t dir target directory (default: current path)\n"
  265. " -f force overwrite\n"
  266. " -x file exclude a header file\n"
  267. " -q no logs\n"
  268. " -v some logs\n"
  269. " -vv a lot of logs\n"
  270. " -vvv spam logs\n"
  271. "\n"
  272. "CONFIGURATION FILES:\n"
  273. " .allinclude_exclude newline separated list of header files to exclude\n"
  274. )));
  275. return 0;
  276. }
  277. debug = (std::find(argv, argv+argc, "-vvv"s) != argv+argc) ? 4 :
  278. (std::find(argv, argv+argc, "-vv"s) != argv+argc) ? 3:
  279. (std::find(argv, argv+argc, "-v"s) != argv+argc) ? 2 :
  280. (std::find(argv, argv+argc, "-q"s) != argv+argc) ? 0 :
  281. 1;
  282. if(debug >= 3) std::cerr << "Reading target argument" << '\n';
  283. auto target_arg = std::find(argv, argv+argc, "-t"s);
  284. fs::path target = target_arg < (argv+argc-1) ? *(target_arg + 1) : fs::current_path();
  285. if(!fs::is_directory(target))
  286. {
  287. std::cerr << target << " is not a directory!" << '\n';
  288. return -1;
  289. }
  290. if(debug >= 3) std::cerr << "Reading exclude arguments" << '\n';
  291. {
  292. auto x = std::find(argv, argv+argc, "-x"s);
  293. while(x < argc+argv-1)
  294. {
  295. ++x;
  296. excluded_headers.emplace_back(*x);
  297. x = std::find(x+1, argv+argc, "-x"s);
  298. }
  299. }
  300. if(debug >= 3) std::cerr << "Reading exclude configuration file: " << exclude_filename << '\n';
  301. if(fs::exists(exclude_filename))
  302. {
  303. auto exclude_file = file::ropen(exclude_filename);
  304. std::string line;
  305. while(std::getline(exclude_file, line))
  306. excluded_headers.emplace_back(line);
  307. }
  308. if(debug >= 2)
  309. {
  310. std::cerr << "Target: " << target << '\n';
  311. std::cerr << "Debug level: " << debug << '\n';
  312. if(not std::empty(excluded_headers))
  313. {
  314. std::cerr << "Excluding: " << '\n';
  315. copy(excluded_headers, std::ostream_iterator<fs::path>(std::cerr, "\n"));
  316. std::cerr << '\n';
  317. }
  318. }
  319. if(debug >= 2) std::cerr << "Analyzing directory structure" << '\n';
  320. generate_allinclude_info(target);
  321. if(debug >= 2) std::cerr << "Writing include files" << '\n';
  322. write_allincludes(std::find(argv, argv+argc, "-f"s) != argv+argc);
  323. }
  324. catch(const std::exception& ex)
  325. {
  326. if(errno)
  327. std::perror("Error: ");
  328. std::cerr << "Exception: " << ex.what() << '\n';
  329. throw;
  330. }
  331. // ^^ COMMENTS ^^
  332. //
  333. // implicit_path: woopsies, didn't know entry implicitly converts to path, ha
  334. // ha, stupid stupid me, how shameful, oh my, I guess I gotta remove this line
  335. // now... no! transform stays! implicit conversion should go! >:( let me see
  336. // that defect report now :V
  337. //
  338. // making_a_case:
  339. // this abomination replaces the following "simple" loop
  340. // offset_iterator headers_offset(includes);
  341. // for(auto&& entry : fs::directory_iterator(info.directory))
  342. // {
  343. // // filter
  344. // if(is_directory(entry) || is_header(entry))
  345. // {
  346. // // transform + expand
  347. // includes.push_back(entry.path());
  348. //
  349. // // partition
  350. // if(is_directory(entry))
  351. // std::iter_swap(headers_offset++, includes.end() - 1);
  352. // }
  353. // }
  354. // what's the point you would ask... well, the abomination reads better, you
  355. // can take in what's happening in a more modular sequential way, it's as if
  356. // the list of directory entries were filtered, then partitioned, then
  357. // transformed, then pushed into the vector, while in reality it's an
  358. // amalgamated mess that you can see in the for loop - the filter is a block,
  359. // so have to follow brackets and indentation (simple enough in this case, but
  360. // you still have to do it), next comes the push_back with a rather easy to
  361. // miss transform tucked in, and then bam! a partition(which without the
  362. // comments would need to be identified first) using information from entry
  363. // prior to transformation... also it's apparent that the headers_offset is
  364. // the partition point returned by this algorithm, while with the for loop it's
  365. // a mutable state that you also need to track
  366. // is it easier to write? no, and that's fine. first of all you need to
  367. // identify these algorithms, then you need to know what's available in your
  368. // toolbox, or potentially add new tools, then you need to deal with function
  369. // object boilerplate, then you need to close all those parens at the end (you
  370. // can go nuts overloading a binary operator to not have to do that one simple
  371. // thing, but is it worth it?), then you need to dig into the resulting
  372. // iterator to get whatever you need out of it, but that's all fine, cause it's
  373. // even harder to make non obvious mistakes there, both at first and afterwards
  374. // every time this is revisited
  375. // is it zero cost theoretically? offset_expander is an overkill (unavoidable
  376. // branch on proxy assignment), but something lighter could be used here - an
  377. // iterator that always pushes back when assigned and always returns the last
  378. // element when read... call it just plain expander I guess... or back_inserter
  379. // if we're being bold... should also give lower/upper_bound_inserter a try,
  380. // something I wanted since forever, but never knew how to go about, so I would
  381. // say yes it is theoretically zero cost compared to the naive loop
  382. // looking at the loop and forgetting about the algorithmic pipeline you are
  383. // tempted to "optimize" it... checking is_directory twice, feels bad man, and
  384. // those swap shenanigans, we just want directories first and headers last,
  385. // right?
  386. // int headers_offset = 0;
  387. // for(auto&& entry : fs::directory_iterator(info.directory))
  388. // {
  389. // if(is_directory(entry))
  390. // {
  391. // includes.insert(entry.path());
  392. // ++headers_offset = 0;
  393. // }
  394. // else if(is_header(entry))
  395. // {
  396. // includes.push_back(entry.path());
  397. // }
  398. // }
  399. // then you might even think that insert is a problem, and replace vector with
  400. // deque... sad sad story... and there are people who think that this is
  401. // actually better than having to learn algorithms... cause you know we gotta
  402. // hire sum javascript ninjas and have them be "productive" from day one...
  403. //
  404. // vs_views: this kind of output iterator chaining maps to loop body much more
  405. // directly than the standard (or ranges v3) views, which are more like
  406. // manipulating the loop statement itself and can often be hard to reason about
  407. // without going into implementation details, and also can't cover this
  408. // particular use case afaik
  409. //
  410. // pp_in_out:
  411. // i want my pp in 'n out