chunks_stats.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. #include "platform.h"
  2. #include <errno.h>
  3. #include <stddef.h>
  4. #include <stdint.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include "chunks_internal.h"
  9. #include "hexify.h"
  10. #include "rwhashtab.h"
  11. #include "storage.h"
  12. #include "chunks.h"
  13. struct chunks_stats_internal {
  14. RWHASHTAB * HT; /* Hash table of struct chunkdata_statstape. */
  15. struct chunkdata_statstape * dir; /* On-disk directory entries. */
  16. char * cachepath; /* Path to cache directory. */
  17. struct chunkstats stats_total; /* All archives, w/ multiplicity. */
  18. struct chunkstats stats_unique; /* All archives, w/o multiplicity. */
  19. struct chunkstats stats_extra; /* Extra (non-chunked) data. */
  20. struct chunkstats stats_tape; /* This archive, w/ multiplicity. */
  21. struct chunkstats stats_tapeu; /* Data unique to this archive. */
  22. struct chunkstats stats_tapee; /* Extra data in this archive. */
  23. };
  24. static int callback_zero(void *, void *);
  25. static int callback_add(void *, void *);
  26. static int callback_delete(void *, void *);
  27. /**
  28. * callback_zero(rec, cookie):
  29. * Mark the struct chunkdata_statstape ${rec} as being not used in the current
  30. * archive.
  31. */
  32. static int
  33. callback_zero(void * rec, void * cookie)
  34. {
  35. struct chunkdata_statstape * ch = rec;
  36. (void)cookie; /* UNUSED */
  37. ch->d.zlen_flags &= ~CHDATA_CTAPE;
  38. ch->ncopies_ctape = 0;
  39. /* Success! */
  40. return (0);
  41. }
  42. /**
  43. * callback_add(rec, cookie):
  44. * Add the "current archive" statistics to the total chunk statistics.
  45. */
  46. static int
  47. callback_add(void * rec, void * cookie)
  48. {
  49. struct chunkdata_statstape * ch = rec;
  50. (void)cookie; /* UNUSED */
  51. ch->d.ncopies += ch->ncopies_ctape;
  52. if (ch->d.zlen_flags & CHDATA_CTAPE)
  53. ch->d.nrefs += 1;
  54. /* Success! */
  55. return (0);
  56. }
  57. /**
  58. * callback_delete(rec, cookie):
  59. * If the reference count of the struct chunkdata_statstape ${rec} is zero,
  60. * delete the chunk using the storage layer delete cookie ${cookie}.
  61. */
  62. static int
  63. callback_delete(void * rec, void * cookie)
  64. {
  65. struct chunkdata_statstape * ch = rec;
  66. STORAGE_D * S = cookie;
  67. char hashbuf[65];
  68. if (ch->d.nrefs)
  69. goto done;
  70. hexify(ch->d.hash, hashbuf, 32);
  71. fprintf(stdout, " Removing unreferenced chunk file: %s\n", hashbuf);
  72. if (storage_delete_file(S, 'c', ch->d.hash))
  73. goto err0;
  74. done:
  75. /* Success! */
  76. return (0);
  77. err0:
  78. /* Failure! */
  79. return (-1);
  80. }
  81. /**
  82. * chunks_fsck_start(machinenum, cachepath):
  83. * Read the list of chunk files from the server and return a cookie which
  84. * can be used with chunks_stats_zeroarchive, chunks_stats_addchunk,
  85. * chunks_stats_extrastats, and other chunks_fsck_* calls.
  86. */
  87. CHUNKS_S *
  88. chunks_fsck_start(uint64_t machinenum, const char * cachepath)
  89. {
  90. struct chunks_stats_internal * C;
  91. uint8_t * flist;
  92. size_t nfiles;
  93. size_t file;
  94. /* Allocate memory. */
  95. if ((C = malloc(sizeof(struct chunks_stats_internal))) == NULL)
  96. goto err0;
  97. /* Create a copy of the path. */
  98. if ((C->cachepath = strdup(cachepath)) == NULL)
  99. goto err1;
  100. /* Get the list of chunk files from the server. */
  101. if (storage_directory_read(machinenum, 'c', 0, &flist, &nfiles))
  102. goto err2;
  103. /* Construct a chunkdata_statstape structure for each file. */
  104. if (nfiles > SIZE_MAX / sizeof(struct chunkdata_statstape)) {
  105. errno = ENOMEM;
  106. free(flist);
  107. goto err2;
  108. }
  109. if ((C->dir =
  110. malloc(nfiles * sizeof(struct chunkdata_statstape))) == NULL) {
  111. free(flist);
  112. goto err2;
  113. }
  114. for (file = 0; file < nfiles; file++) {
  115. memcpy(C->dir[file].d.hash, &flist[file * 32], 32);
  116. C->dir[file].d.len = C->dir[file].d.zlen_flags = 0;
  117. C->dir[file].d.nrefs = C->dir[file].d.ncopies = 0;
  118. }
  119. /* Free the file list. */
  120. free(flist);
  121. /* Create an empty chunk directory. */
  122. C->HT = rwhashtab_init(offsetof(struct chunkdata, hash), 32);
  123. if (C->HT == NULL)
  124. goto err2;
  125. /* Insert the chunkdata structures we constructed above. */
  126. for (file = 0; file < nfiles; file++) {
  127. if (rwhashtab_insert(C->HT, &C->dir[file]))
  128. goto err3;
  129. }
  130. /* Zero statistics. */
  131. chunks_stats_zero(&C->stats_total);
  132. chunks_stats_zero(&C->stats_extra);
  133. chunks_stats_zero(&C->stats_unique);
  134. /* Success! */
  135. return (C);
  136. err3:
  137. rwhashtab_free(C->HT);
  138. err2:
  139. free(C->cachepath);
  140. err1:
  141. free(C);
  142. err0:
  143. /* Failure! */
  144. return (NULL);
  145. }
  146. /**
  147. * chunks_fsck_archive_add(C):
  148. * Add the "current archive" statistics to the total chunk statistics.
  149. */
  150. int
  151. chunks_fsck_archive_add(CHUNKS_S * C)
  152. {
  153. /* Add global "this archive" stats to global "total" stats. */
  154. chunks_stats_addstats(&C->stats_total, &C->stats_tape);
  155. chunks_stats_addstats(&C->stats_unique, &C->stats_tapeu);
  156. chunks_stats_addstats(&C->stats_extra, &C->stats_tapee);
  157. /* Add per-chunk "this archive" stats to per-chunk "total" stats. */
  158. return (rwhashtab_foreach(C->HT, callback_add, NULL));
  159. }
  160. /**
  161. * chunks_fsck_deletechunks(C, S):
  162. * Using the storage layer delete cookie ${S}, delete any chunks which have
  163. * not been recorded as being used by any archives.
  164. */
  165. int
  166. chunks_fsck_deletechunks(CHUNKS_S * C, STORAGE_D * S)
  167. {
  168. /* Delete each chunk iff it has zero references. */
  169. return (rwhashtab_foreach(C->HT, callback_delete, S));
  170. }
  171. /**
  172. * chunks_fsck_end(C):
  173. * Write out the chunk directory, and close the fscking cookie.
  174. */
  175. int
  176. chunks_fsck_end(CHUNKS_S * C)
  177. {
  178. int rc = 0;
  179. /* Write out the new chunk directory. */
  180. if (chunks_directory_write(C->cachepath, C->HT, &C->stats_extra,
  181. ".tmp"))
  182. rc = -1;
  183. /* Free the chunk hash table. */
  184. chunks_directory_free(C->HT, C->dir);
  185. /* Free memory. */
  186. free(C->cachepath);
  187. free(C);
  188. /* Return status. */
  189. return (rc);
  190. }
  191. /**
  192. * chunks_stats_init(cachepath):
  193. * Prepare for calls to other chunks_stats* functions.
  194. */
  195. CHUNKS_S *
  196. chunks_stats_init(const char * cachepath)
  197. {
  198. struct chunks_stats_internal * C;
  199. void * dir;
  200. /* Allocate memory. */
  201. if ((C = malloc(sizeof(struct chunks_stats_internal))) == NULL)
  202. goto err0;
  203. /* Create a copy of the path. */
  204. if ((C->cachepath = strdup(cachepath)) == NULL)
  205. goto err1;
  206. /* Read directory. */
  207. if ((C->HT = chunks_directory_read(cachepath, &dir,
  208. &C->stats_unique, &C->stats_total, &C->stats_extra, 1, 1)) == NULL)
  209. goto err2;
  210. C->dir = dir;
  211. /* Success! */
  212. return (C);
  213. err2:
  214. free(C->cachepath);
  215. err1:
  216. free(C);
  217. err0:
  218. /* Failure! */
  219. return (NULL);
  220. }
  221. /**
  222. * chunks_stats_getdirsz(C):
  223. * Return the number of entries in the chunks directory associated with ${C}.
  224. */
  225. size_t
  226. chunks_stats_getdirsz(CHUNKS_S * C)
  227. {
  228. /* Get the value from the hash table. */
  229. return (rwhashtab_getsize(C->HT));
  230. }
  231. /**
  232. * chunks_stats_printglobal(stream, C, csv):
  233. * Print global statistics relating to a set of archives, optionally in ${csv}
  234. * format.
  235. */
  236. int
  237. chunks_stats_printglobal(FILE * stream, CHUNKS_S * C, int csv)
  238. {
  239. /* Print header. */
  240. if (chunks_stats_printheader(stream, csv))
  241. goto err0;
  242. /* Print the global statistics. */
  243. if (chunks_stats_print(stream, &C->stats_total, "All archives",
  244. &C->stats_extra, csv))
  245. goto err0;
  246. if (chunks_stats_print(stream, &C->stats_unique, " (unique data)",
  247. &C->stats_extra, csv))
  248. goto err0;
  249. /* Success! */
  250. return (0);
  251. err0:
  252. /* Failure! */
  253. return (-1);
  254. }
  255. /**
  256. * chunks_stats_zeroarchive(C):
  257. * Zero per-archive statistics.
  258. */
  259. void
  260. chunks_stats_zeroarchive(CHUNKS_S * C)
  261. {
  262. /* Zero global statistics. */
  263. chunks_stats_zero(&C->stats_tape);
  264. chunks_stats_zero(&C->stats_tapeu);
  265. chunks_stats_zero(&C->stats_tapee);
  266. /* Zero per-chunk statistics. */
  267. rwhashtab_foreach(C->HT, callback_zero, NULL);
  268. }
  269. /**
  270. * chunks_stats_addchunk(C, hash, len, zlen):
  271. * Add the given chunk to the per-archive statistics. If the chunk does not
  272. * exist, return 1.
  273. */
  274. int
  275. chunks_stats_addchunk(CHUNKS_S * C, const uint8_t * hash,
  276. size_t len, size_t zlen)
  277. {
  278. struct chunkdata_statstape * ch;
  279. /* If the chunk is not in ${S}->HT, error out. */
  280. if ((ch = rwhashtab_read(C->HT, hash)) == NULL)
  281. goto notpresent;
  282. /* Record the lengths if necessary. */
  283. if (ch->d.nrefs == 0 && ch->ncopies_ctape == 0) {
  284. ch->d.len = (uint32_t)len;
  285. ch->d.zlen_flags = (uint32_t)(zlen | (ch->d.zlen_flags & CHDATA_FLAGS));
  286. }
  287. /* Update "current tape" statistics. */
  288. chunks_stats_add(&C->stats_tape, len, zlen, 1);
  289. /* Update "data unique to this archive" statistics. */
  290. if ((ch->d.nrefs <= 1) && ((ch->d.zlen_flags & CHDATA_CTAPE) == 0))
  291. chunks_stats_add(&C->stats_tapeu, len, zlen, 1);
  292. /* Chunk is in current archive. */
  293. ch->ncopies_ctape += 1;
  294. ch->d.zlen_flags |= CHDATA_CTAPE;
  295. /* Success! */
  296. return (0);
  297. notpresent:
  298. /* No such chunk exists. */
  299. return (1);
  300. }
  301. /**
  302. * chunks_stats_extrastats(C, len):
  303. * Notify the chunk layer that non-chunked data of length ${len} belongs to
  304. * the current archive.
  305. */
  306. void
  307. chunks_stats_extrastats(CHUNKS_S * C, size_t len)
  308. {
  309. chunks_stats_add(&C->stats_tapee, len, len, 1);
  310. }
  311. /**
  312. * chunks_stats_printarchive(stream, C, name, csv):
  313. * Print accumulated statistics for an archive with the given name, optionally
  314. * in ${csv} format.
  315. */
  316. int
  317. chunks_stats_printarchive(FILE * stream, CHUNKS_S * C, const char * name,
  318. int csv)
  319. {
  320. /* Print statistics for this archive. */
  321. if (chunks_stats_print(stream, &C->stats_tape, name,
  322. &C->stats_tapee, csv))
  323. goto err0;
  324. if (chunks_stats_print(stream, &C->stats_tapeu, " (unique data)",
  325. &C->stats_tapee, csv))
  326. goto err0;
  327. /* Success! */
  328. return (0);
  329. err0:
  330. /* Failure! */
  331. return (-1);
  332. }
  333. /**
  334. * chunks_stats_free(C):
  335. * No more calls will be made to chunks_stats* functions.
  336. */
  337. void
  338. chunks_stats_free(CHUNKS_S * C)
  339. {
  340. /* Behave consistently with free(NULL). */
  341. if (C == NULL)
  342. return;
  343. /* Free the chunk hash table. */
  344. chunks_directory_free(C->HT, C->dir);
  345. /* Free memory. */
  346. free(C->cachepath);
  347. free(C);
  348. }
  349. /**
  350. * chunks_initialize(cachepath):
  351. * Initialize the chunk directory (file) in ${cachepath}. Return 0 on
  352. * success, -1 on error, and 1 if the file already exists.
  353. */
  354. int
  355. chunks_initialize(const char * cachepath)
  356. {
  357. RWHASHTAB * HT;
  358. struct chunkstats stats_extra;
  359. /* Bail if ${chunkpath}/directory already exists. */
  360. switch (chunks_directory_exists(cachepath)) {
  361. case 0:
  362. break;
  363. case 1:
  364. goto fileexists;
  365. case -1:
  366. goto err0;
  367. }
  368. /* Allocate empty hash table, and zero stats. */
  369. if ((HT = rwhashtab_init(0, 1)) == NULL)
  370. goto err0;
  371. chunks_stats_zero(&stats_extra);
  372. /* Write empty directory file. */
  373. if (chunks_directory_write(cachepath, HT, &stats_extra, ""))
  374. goto err1;
  375. /* Free memory. */
  376. rwhashtab_free(HT);
  377. /* Success! */
  378. return (0);
  379. err1:
  380. rwhashtab_free(HT);
  381. err0:
  382. /* Failure! */
  383. return (-1);
  384. fileexists:
  385. /* Failure! */
  386. return (1);
  387. }