chunks_write.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. #include "platform.h"
  2. #include <sys/types.h>
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <limits.h>
  6. #include <stdint.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <zlib.h>
  11. #include "chunks_internal.h"
  12. #include "hexify.h"
  13. #include "rwhashtab.h"
  14. #include "storage.h"
  15. #include "warnp.h"
  16. #include "chunks.h"
  17. struct chunks_write_internal {
  18. size_t maxlen; /* Maximum chunk size. */
  19. uint8_t * zbuf; /* Buffer for compression. */
  20. size_t zbuflen; /* Length of zbuf. */
  21. RWHASHTAB * HT; /* Hash table of struct chunkdata. */
  22. void * dir; /* On-disk directory entries. */
  23. char * path; /* Path to cache directory. */
  24. STORAGE_W * S; /* Storage layer cookie; NULL=dryrun. */
  25. struct chunkstats stats_total; /* All archives, w/ multiplicity. */
  26. struct chunkstats stats_unique; /* All archives, w/o multiplicity. */
  27. struct chunkstats stats_extra; /* Extra (non-chunked) data. */
  28. struct chunkstats stats_extra_copy; /* Copy for checkpoint stats. */
  29. struct chunkstats stats_tape; /* This archive, w/ multiplicity. */
  30. struct chunkstats stats_new; /* New chunks. */
  31. struct chunkstats stats_tapee; /* Extra data in this archive. */
  32. };
  33. /**
  34. * chunks_write_start(cachepath, S, maxchunksize):
  35. * Start a write transaction using the cache directory ${cachepath} and the
  36. * storage layer cookie ${S} which will involve chunks of maximum size
  37. * ${maxchunksize}.
  38. */
  39. CHUNKS_W *
  40. chunks_write_start(const char * cachepath, STORAGE_W * S, size_t maxchunksize)
  41. {
  42. struct chunks_write_internal * C;
  43. /* Sanity check. */
  44. if ((maxchunksize == 0) || (maxchunksize > SIZE_MAX / 2)) {
  45. warn0("Programmer error: maxchunksize invalid");
  46. goto err0;
  47. }
  48. /* Allocate memory. */
  49. if ((C = malloc(sizeof(struct chunks_write_internal))) == NULL)
  50. return (NULL);
  51. /* Set length parameters. */
  52. C->maxlen = maxchunksize;
  53. C->zbuflen = C->maxlen + (C->maxlen / 1000) + 13;
  54. /* Allocate buffer for holding a compressed chunk. */
  55. if ((C->zbuf = malloc(C->zbuflen)) == NULL)
  56. goto err1;
  57. /* Record the storage cookie that we're using. */
  58. C->S = S;
  59. /* Create a copy of the path. */
  60. if (cachepath == NULL) {
  61. C->path = NULL;
  62. } else {
  63. if ((C->path = strdup(cachepath)) == NULL)
  64. goto err2;
  65. }
  66. /* Read the existing chunk directory (if one exists). */
  67. if ((C->HT = chunks_directory_read(cachepath, &C->dir,
  68. &C->stats_unique, &C->stats_total, &C->stats_extra, 0, 0)) == NULL)
  69. goto err3;
  70. /* Zero "new chunks" and "this tape" statistics. */
  71. chunks_stats_zero(&C->stats_tape);
  72. chunks_stats_zero(&C->stats_new);
  73. chunks_stats_zero(&C->stats_tapee);
  74. /* Success! */
  75. return (C);
  76. err3:
  77. free(C->path);
  78. err2:
  79. free(C->zbuf);
  80. err1:
  81. free(C);
  82. err0:
  83. /* Failure! */
  84. return (NULL);
  85. }
  86. /**
  87. * chunks_write_chunk(C, hash, buf, buflen):
  88. * Write the chunk ${buf} of length ${buflen}, which has HMAC ${hash},
  89. * as part of the write transaction associated with the cookie ${C}.
  90. * Return the compressed size.
  91. */
  92. ssize_t
  93. chunks_write_chunk(CHUNKS_W * C, const uint8_t * hash,
  94. const uint8_t * buf, size_t buflen)
  95. {
  96. struct chunkdata * ch;
  97. uLongf zlen;
  98. char hashbuf[65];
  99. int rc;
  100. /* Sanity checks. */
  101. assert(buflen <= UINT32_MAX);
  102. assert(C->zbuflen < CHDATA_MALLOC);
  103. /* If the chunk is in ${C}->HT, return the compressed length. */
  104. if ((ch = rwhashtab_read(C->HT, hash)) != NULL) {
  105. chunks_stats_add(&C->stats_total, ch->len,
  106. ch->zlen_flags & CHDATA_ZLEN, 1);
  107. chunks_stats_add(&C->stats_tape, ch->len,
  108. ch->zlen_flags & CHDATA_ZLEN, 1);
  109. ch->ncopies += 1;
  110. if ((ch->zlen_flags & CHDATA_CTAPE) == 0) {
  111. ch->nrefs += 1;
  112. ch->zlen_flags |= CHDATA_CTAPE;
  113. }
  114. return (ch->zlen_flags & CHDATA_ZLEN);
  115. }
  116. /* Compress the chunk. */
  117. zlen = C->zbuflen;
  118. if ((rc = compress2(C->zbuf, &zlen, buf, buflen, 9)) != Z_OK) {
  119. switch (rc) {
  120. case Z_MEM_ERROR:
  121. errno = ENOMEM;
  122. warnp("Error compressing data");
  123. break;
  124. case Z_BUF_ERROR:
  125. warn0("Programmer error: "
  126. "Buffer too small to hold zlib-compressed data");
  127. break;
  128. default:
  129. warn0("Programmer error: "
  130. "Unexpected error code from compress2: %d", rc);
  131. break;
  132. }
  133. goto err0;
  134. }
  135. /* Sanity check the compressed size. */
  136. if (zlen > SSIZE_MAX) {
  137. warnp("Error compressing chunk");
  138. goto err0;
  139. }
  140. /* Ask the storage layer to write the file for us. */
  141. if (storage_write_file(C->S, C->zbuf, zlen, 'c', hash)) {
  142. hexify(hash, hashbuf, 32);
  143. warnp("Error storing chunk %s", hashbuf);
  144. goto err0;
  145. }
  146. /* Allocate a new struct chunkdata... */
  147. if ((ch = malloc(sizeof(struct chunkdata))) == NULL)
  148. goto err0;
  149. /* ... fill in the chunk parameters... */
  150. memcpy(ch->hash, hash, 32);
  151. ch->len = (uint32_t)buflen;
  152. ch->zlen_flags = (uint32_t)(zlen | CHDATA_MALLOC | CHDATA_CTAPE);
  153. ch->nrefs = 1;
  154. ch->ncopies = 1;
  155. /* ... and insert it into the hash table. */
  156. if (rwhashtab_insert(C->HT, ch))
  157. goto err1;
  158. /* Update statistics. */
  159. chunks_stats_add(&C->stats_total, ch->len, zlen, 1);
  160. chunks_stats_add(&C->stats_unique, ch->len, zlen, 1);
  161. chunks_stats_add(&C->stats_tape, ch->len, zlen, 1);
  162. chunks_stats_add(&C->stats_new, ch->len, zlen, 1);
  163. /* Success! */
  164. return ((ssize_t)zlen);
  165. err1:
  166. free(ch);
  167. err0:
  168. /* Failure! */
  169. return (-1);
  170. }
  171. /**
  172. * chunks_write_ispresent(C, hash):
  173. * If a chunk with hash ${hash} exists, return 0; otherwise, return 1.
  174. */
  175. int
  176. chunks_write_ispresent(CHUNKS_W * C, const uint8_t * hash)
  177. {
  178. if (rwhashtab_read(C->HT, hash) != NULL)
  179. return (0);
  180. else
  181. return (1);
  182. }
  183. /**
  184. * chunks_write_chunkref(C, hash):
  185. * If a chunk with hash ${hash} exists, mark it as being part of the write
  186. * transaction associated with the cookie ${C} and return 0. If it
  187. * does not exist, return 1.
  188. */
  189. int
  190. chunks_write_chunkref(CHUNKS_W * C, const uint8_t * hash)
  191. {
  192. struct chunkdata * ch;
  193. /*
  194. * If the chunk is in ${C}->HT, mark it as being part of the
  195. * transaction and return 0.
  196. */
  197. if ((ch = rwhashtab_read(C->HT, hash)) != NULL) {
  198. chunks_stats_add(&C->stats_total, ch->len,
  199. ch->zlen_flags & CHDATA_ZLEN, 1);
  200. chunks_stats_add(&C->stats_tape, ch->len,
  201. ch->zlen_flags & CHDATA_ZLEN, 1);
  202. ch->ncopies += 1;
  203. if ((ch->zlen_flags & CHDATA_CTAPE) == 0) {
  204. ch->nrefs += 1;
  205. ch->zlen_flags |= CHDATA_CTAPE;
  206. }
  207. return (0);
  208. }
  209. /* If it does not exist, return 1. */
  210. return (1);
  211. }
  212. /**
  213. * chunks_write_extrastats(C, len):
  214. * Notify the chunk layer that non-chunked data of length ${len} has been
  215. * written directly to the storage layer; this information is used when
  216. * displaying archive statistics.
  217. */
  218. void
  219. chunks_write_extrastats(CHUNKS_W * C, size_t len)
  220. {
  221. chunks_stats_add(&C->stats_extra, len, len, 1);
  222. chunks_stats_add(&C->stats_tapee, len, len, 1);
  223. }
  224. /**
  225. * chunks_write_extrastats_copy(C, direction):
  226. * Make a copy of the extra stats; if ${direction} is 0, copy from the real
  227. * stats to a copy; if 1, set the real stats to the copy.
  228. */
  229. void
  230. chunks_write_extrastats_copy(CHUNKS_W * C, size_t direction)
  231. {
  232. if (direction == 0)
  233. C->stats_extra_copy = C->stats_extra;
  234. else
  235. C->stats_extra = C->stats_extra_copy;
  236. }
  237. /**
  238. * chunks_write_printstats(stream, C, csv):
  239. * Print statistics for the write transaction associated with the cookie
  240. * ${C} to ${stream}, optionally in ${csv} format.
  241. */
  242. int
  243. chunks_write_printstats(FILE * stream, CHUNKS_W * C, int csv)
  244. {
  245. /* Print header. */
  246. if (chunks_stats_printheader(stream, csv))
  247. goto err0;
  248. /* Print the statistics we have. */
  249. if (chunks_stats_print(stream, &C->stats_total, "All archives",
  250. &C->stats_extra, csv))
  251. goto err0;
  252. if (chunks_stats_print(stream, &C->stats_unique, " (unique data)",
  253. &C->stats_extra, csv))
  254. goto err0;
  255. if (chunks_stats_print(stream, &C->stats_tape, "This archive",
  256. &C->stats_tapee, csv))
  257. goto err0;
  258. if (chunks_stats_print(stream, &C->stats_new, "New data",
  259. &C->stats_tapee, csv))
  260. goto err0;
  261. /* Success! */
  262. return (0);
  263. err0:
  264. /* Failure! */
  265. return (-1);
  266. }
  267. /**
  268. * chunks_write_checkpoint(C):
  269. * Create a checkpoint for the write transaction associated with the cookie
  270. * ${C}.
  271. */
  272. int
  273. chunks_write_checkpoint(CHUNKS_W * C)
  274. {
  275. /* If this isn't a dry run, write the new chunk directory. */
  276. if ((C->S != NULL) &&
  277. chunks_directory_write(C->path, C->HT, &C->stats_extra, ".ckpt"))
  278. goto err0;
  279. /* Success! */
  280. return (0);
  281. err0:
  282. /* Failure! */
  283. return (-1);
  284. }
  285. /**
  286. * chunks_write_free(C):
  287. * End the write transaction associated with the cookie ${C}.
  288. */
  289. void
  290. chunks_write_free(CHUNKS_W * C)
  291. {
  292. /* Behave consistently with free(NULL). */
  293. if (C == NULL)
  294. return;
  295. /* Free the chunk hash table. */
  296. chunks_directory_free(C->HT, C->dir);
  297. /* Free memory. */
  298. free(C->zbuf);
  299. free(C->path);
  300. free(C);
  301. }