multitape_read.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. #include "platform.h"
  2. #include <sys/types.h>
  3. #include <assert.h>
  4. #include <limits.h>
  5. #include <stddef.h>
  6. #include <stdint.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include "chunks.h"
  10. #include "crypto.h"
  11. #include "ctassert.h"
  12. #include "multitape_internal.h"
  13. #include "storage.h"
  14. #include "sysendian.h"
  15. #include "warnp.h"
  16. #include "multitape.h"
  17. /*
  18. * The readtape_read API relies upon chunks being no more than SSIZE_MAX
  19. * bytes in length.
  20. */
  21. CTASSERT(MAXCHUNK <= SSIZE_MAX);
  22. /* Stream parameters. */
  23. struct stream {
  24. struct stream * istr; /* Index stream. */
  25. uint8_t * chunk; /* Buffer for holding a chunk. */
  26. size_t chunklen; /* Length of current chunk. */
  27. size_t chunkpos; /* Position within current chunk. */
  28. off_t skiplen; /* Length to skip. */
  29. struct chunkheader ch; /* Pending chunk header. */
  30. int ch_valid; /* Non-zero if ch is valid. */
  31. };
  32. /* Cookie created by readtape_open and passed to other functions. */
  33. struct multitape_read_internal {
  34. struct stream h; /* Headers. */
  35. struct stream hi; /* Headers index. */
  36. struct stream c; /* Chunks. */
  37. struct stream ci; /* Chunks index. */
  38. struct stream cii; /* Chunks index index. */
  39. struct stream t; /* Trailers. */
  40. struct stream ti; /* Trailers index. */
  41. off_t hlen; /* Queued length of header. */
  42. off_t clen; /* Queued length of chunked data. */
  43. off_t tlen; /* Queued length of trailer. */
  44. struct tapemetaindex tmi; /* Metaindex. */
  45. STORAGE_R * S; /* Storage layer cookie. */
  46. CHUNKS_R * C; /* Chunk layer cookie. */
  47. };
  48. static int stream_get_chunkheader(struct stream *, CHUNKS_R *);
  49. static int stream_get_chunk(struct stream *, const uint8_t **, size_t *,
  50. CHUNKS_R *);
  51. static ssize_t stream_read(struct stream *, uint8_t *, size_t, CHUNKS_R *);
  52. /**
  53. * stream_get_chunkheader(S, C):
  54. * Fill ${S}->ch with the header for the next chunk, and set ch_valid to 1.
  55. * On EOF of the parent stream, ch_valid will remain zero.
  56. */
  57. static int
  58. stream_get_chunkheader(struct stream * S, CHUNKS_R * C)
  59. {
  60. off_t len;
  61. ssize_t readlen;
  62. /* Loop until we have a chunk which we're not skipping. */
  63. do {
  64. if (S->ch_valid) {
  65. len = le32dec(S->ch.len);
  66. if (len <= S->skiplen) {
  67. S->skiplen -= len;
  68. S->ch_valid = 0;
  69. } else {
  70. /* We have a useful chunk. */
  71. break;
  72. }
  73. }
  74. /* Get a chunk header from the parent stream. */
  75. readlen = stream_read(S->istr, (uint8_t *)&S->ch,
  76. sizeof(struct chunkheader), C);
  77. switch (readlen) {
  78. case -1:
  79. /* Error in stream_read. */
  80. goto err0;
  81. case 0:
  82. /* No more chunks available. */
  83. goto eof;
  84. case sizeof(struct chunkheader):
  85. /* Successful read of chunk header. */
  86. S->ch_valid = 1;
  87. break;
  88. default:
  89. /* Wrong length read. */
  90. warnp("Premature EOF of archive index");
  91. goto err0;
  92. }
  93. } while (1);
  94. eof:
  95. /* Success! */
  96. return (0);
  97. err0:
  98. /* Failure! */
  99. return (-1);
  100. }
  101. /**
  102. * stream_get_chunk(S, buf, clen, C):
  103. * Set ${buf} to point to the next available data, and set ${clen} to the
  104. * length of data available; use the chunk read cookie ${C} if needed.
  105. */
  106. static int
  107. stream_get_chunk(struct stream * S, const uint8_t ** buf, size_t * clen,
  108. CHUNKS_R * C)
  109. {
  110. size_t len, zlen;
  111. off_t skip;
  112. /* Skip part of the current chunk if appropriate. */
  113. if (S->skiplen) {
  114. skip = (off_t)(S->chunklen - S->chunkpos);
  115. if (skip > S->skiplen)
  116. skip = S->skiplen;
  117. S->skiplen -= skip;
  118. S->chunkpos += (size_t)skip;
  119. }
  120. while ((S->chunklen == S->chunkpos) && (S->istr != NULL)) {
  121. /* Get a chunk header. */
  122. if (stream_get_chunkheader(S, C))
  123. goto err0;
  124. /* EOF? */
  125. if (S->ch_valid == 0)
  126. goto eof;
  127. /* Decode chunk header. */
  128. len = le32dec(S->ch.len);
  129. zlen = le32dec(S->ch.zlen);
  130. /* Read chunk. */
  131. if (chunks_read_chunk(C, S->ch.hash, len, zlen, S->chunk, 0))
  132. goto err0;
  133. S->chunklen = len;
  134. /* Set current position within buffer. */
  135. S->chunkpos = (size_t)S->skiplen;
  136. S->skiplen = 0;
  137. /* The chunk is no longer pending. */
  138. S->ch_valid = 0;
  139. }
  140. /* We have some data. */
  141. *buf = S->chunk + S->chunkpos;
  142. *clen = S->chunklen - S->chunkpos;
  143. /* Success! */
  144. return (0);
  145. eof:
  146. *clen = 0;
  147. *buf = NULL;
  148. /* Success, but no data. */
  149. return (0);
  150. err0:
  151. /* Failure! */
  152. return (-1);
  153. }
  154. /**
  155. * stream_read(S, buf, buflen, C):
  156. * Fill ${buf} with ${buflen} bytes of data from the stream ${S}. Return the
  157. * length written (which may be less than ${buflen} on EOF).
  158. */
  159. static ssize_t
  160. stream_read(struct stream * S, uint8_t * buf, size_t buflen, CHUNKS_R * C)
  161. {
  162. const uint8_t * readbuf;
  163. size_t readlen;
  164. size_t bufpos;
  165. /* Sanity check. */
  166. assert(buflen < SSIZE_MAX);
  167. for (bufpos = 0; bufpos < buflen; bufpos += readlen) {
  168. /* Read data. */
  169. if (stream_get_chunk(S, &readbuf, &readlen, C))
  170. goto err0;
  171. /* Make sure we don't have too much data. */
  172. if (readlen > buflen - bufpos)
  173. readlen = buflen - bufpos;
  174. /* Stop looping if we have no more data. */
  175. if (readlen == 0)
  176. break;
  177. /* Mark the data as consumed. */
  178. S->chunkpos += readlen;
  179. /* Copy into the correct position in our buffer. */
  180. memcpy(buf + bufpos, readbuf, readlen);
  181. }
  182. /* Success (or perhaps EOF). */
  183. return ((ssize_t)bufpos);
  184. err0:
  185. /* Failure! */
  186. return (-1);
  187. }
  188. /**
  189. * get_entryheader(d):
  190. * Read an archive entry header and update the pending header, chunk and
  191. * trailer data lengths. Return -1 on error, 0 on EOF, or 1 on success.
  192. */
  193. static int
  194. get_entryheader(TAPE_R * d)
  195. {
  196. struct entryheader eh;
  197. ssize_t readlen;
  198. /* Read an archive entry header. */
  199. readlen = stream_read(&d->h, (uint8_t *)&eh,
  200. sizeof(struct entryheader), d->C);
  201. switch (readlen) {
  202. case -1:
  203. /* Error in stream_read. */
  204. return (-1);
  205. case 0:
  206. /* No more chunks available. */
  207. return (0);
  208. case sizeof(struct entryheader):
  209. /* Successful read of chunk header. Decode entry header. */
  210. d->hlen = le32dec(eh.hlen);
  211. d->clen = (off_t)le64dec(eh.clen);
  212. d->tlen = le32dec(eh.tlen);
  213. return (1);
  214. default:
  215. /* Wrong length read. */
  216. warnp("Premature EOF of archive index");
  217. return (-1);
  218. }
  219. }
  220. /**
  221. * readtape_open(machinenum, tapename):
  222. * Open the tape with the given name, and return a cookie which can be used
  223. * for accessing it.
  224. */
  225. TAPE_R *
  226. readtape_open(uint64_t machinenum, const char * tapename)
  227. {
  228. struct multitape_read_internal * d;
  229. struct tapemetadata tmd;
  230. /* Allocate memory. */
  231. if ((d = malloc(sizeof(struct multitape_read_internal))) == NULL)
  232. goto err0;
  233. memset(d, 0, sizeof(struct multitape_read_internal));
  234. /* Obtain a storage layer read cookie. */
  235. if ((d->S = storage_read_init(machinenum)) == NULL)
  236. goto err1;
  237. /* Obtain a chunk layer read cookie. */
  238. if ((d->C = chunks_read_init(d->S, MAXCHUNK)) == NULL)
  239. goto err2;
  240. /* Allocate chunk buffers. */
  241. d->h.chunk = malloc(MAXCHUNK);
  242. d->c.chunk = malloc(MAXCHUNK);
  243. d->ci.chunk = malloc(MAXCHUNK);
  244. d->t.chunk = malloc(MAXCHUNK);
  245. if ((d->h.chunk == NULL) || (d->c.chunk == NULL) ||
  246. (d->ci.chunk == NULL) || (d->t.chunk == NULL))
  247. goto err3;
  248. /* Initialize streams. */
  249. d->h.istr = &d->hi;
  250. d->c.istr = &d->ci;
  251. d->ci.istr = &d->cii;
  252. d->t.istr = &d->ti;
  253. /* Read the tape metadata. */
  254. if (multitape_metadata_get_byname(d->S, NULL, &tmd, tapename, 0))
  255. goto err3;
  256. /* Read the tape metaindex. */
  257. if (multitape_metaindex_get(d->S, NULL, &d->tmi, &tmd, 0))
  258. goto err4;
  259. /* Free parsed metadata. */
  260. multitape_metadata_free(&tmd);
  261. /* Initialize streams. */
  262. d->hi.chunklen = d->tmi.hindexlen;
  263. d->hi.chunk = d->tmi.hindex;
  264. d->cii.chunklen = d->tmi.cindexlen;
  265. d->cii.chunk = d->tmi.cindex;
  266. d->ti.chunklen = d->tmi.tindexlen;
  267. d->ti.chunk = d->tmi.tindex;
  268. /* Success! */
  269. return (d);
  270. err4:
  271. multitape_metadata_free(&tmd);
  272. err3:
  273. free(d->h.chunk);
  274. free(d->c.chunk);
  275. free(d->ci.chunk);
  276. free(d->t.chunk);
  277. chunks_read_free(d->C);
  278. err2:
  279. storage_read_free(d->S);
  280. err1:
  281. free(d);
  282. err0:
  283. /* Failure! */
  284. return (NULL);
  285. }
  286. /**
  287. * readtape_read(d, buffer):
  288. * Read some data from the tape associated with ${d}, make ${*buffer}
  289. * point to the data, and return the number of bytes read.
  290. */
  291. ssize_t
  292. readtape_read(TAPE_R * d, const void ** buffer)
  293. {
  294. const uint8_t ** buf = (const uint8_t **)buffer;
  295. struct stream * readstream;
  296. off_t * readmaxlen;
  297. size_t clen;
  298. /* Loop until we read EOF or have some data to return. */
  299. do {
  300. if (d->hlen) {
  301. /* We want some header data. */
  302. readstream = &d->h;
  303. readmaxlen = &d->hlen;
  304. } else if (d->clen) {
  305. /* We want some chunk data. */
  306. readstream = &d->c;
  307. readmaxlen = &d->clen;
  308. } else if (d->tlen) {
  309. /* We want some trailer data. */
  310. readstream = &d->t;
  311. readmaxlen = &d->tlen;
  312. } else {
  313. /* Read the next archive entry header. */
  314. switch (get_entryheader(d)) {
  315. case -1:
  316. goto err0;
  317. case 0:
  318. goto eof;
  319. case 1:
  320. break;
  321. }
  322. continue;
  323. }
  324. if (stream_get_chunk(readstream, buf, &clen, d->C))
  325. goto err0;
  326. if ((off_t)clen > *readmaxlen)
  327. clen = (size_t)(*readmaxlen);
  328. readstream->chunkpos += clen;
  329. *readmaxlen -= clen;
  330. /* Do we have data? */
  331. if (clen)
  332. break;
  333. /* Premature EOF. */
  334. warnp("Premature EOF reading archive");
  335. goto err0;
  336. } while (1);
  337. /* Sanity check. */
  338. if (clen > SSIZE_MAX) {
  339. warn0("Chunk is too large");
  340. goto err0;
  341. }
  342. /* Success! */
  343. return ((ssize_t)clen);
  344. eof:
  345. /* No more data. */
  346. return (0);
  347. err0:
  348. /* Failure! */
  349. return (-1);
  350. }
  351. /**
  352. * readtape_readchunk(d, ch):
  353. * Obtain a chunk header suitable for passing to writetape_writechunk.
  354. * Return the length of the chunk, or 0 if no chunk is available (EOF or if
  355. * the tape position isn't aligned at a chunk boundary).
  356. */
  357. ssize_t
  358. readtape_readchunk(TAPE_R * d, struct chunkheader ** ch)
  359. {
  360. size_t len;
  361. /*
  362. * If we've hit the end of a multitape archive entry, read the next
  363. * entry header. If a single file is split between two or more
  364. * multitape entries due to checkpointing, the second and subsequent
  365. * entries will have no header data but will instead go straight into
  366. * chunks.
  367. */
  368. if ((d->hlen == 0) && (d->clen == 0) && (d->tlen == 0)) {
  369. /* Read the next archive entry header. */
  370. switch (get_entryheader(d)) {
  371. case -1:
  372. goto err0;
  373. case 0:
  374. goto nochunk;
  375. case 1:
  376. break;
  377. }
  378. }
  379. /*
  380. * We can only return a chunk if we're in the chunk portion of an
  381. * archive entry.
  382. */
  383. if (d->hlen != 0 || d->clen == 0)
  384. goto nochunk;
  385. /*
  386. * Make sure we're not in the middle of a chunk (this should never
  387. * happen, since this stream contains complete chunks from files!)
  388. */
  389. if (d->c.chunkpos != d->c.chunklen) {
  390. warn0("c.chunkpos != c.chunklen");
  391. goto err0;
  392. }
  393. /* Get a chunk header. */
  394. if (stream_get_chunkheader(&d->c, d->C))
  395. goto err0;
  396. /*
  397. * EOF is an error, but we'll ignore it and let it be reported when
  398. * readtape_read is next called.
  399. */
  400. if (d->c.ch_valid == 0)
  401. goto nochunk;
  402. /* We need to be properly aligned on a chunk boundary. */
  403. if (d->c.skiplen != 0)
  404. goto nochunk;
  405. /* We have a chunk! */
  406. *ch = &d->c.ch;
  407. len = le32dec(d->c.ch.len);
  408. /* Sanity check. */
  409. if (len > SSIZE_MAX) {
  410. warn0("Chunk is too large");
  411. goto err0;
  412. }
  413. /* Return the chunk length. */
  414. return ((ssize_t)len);
  415. nochunk:
  416. /* We don't have a chunk available. */
  417. return (0);
  418. err0:
  419. /* Failure! */
  420. return (-1);
  421. }
  422. /**
  423. * readtape_skip(d, request):
  424. * Skip up to ${request} bytes from the tape associated with ${d},
  425. * and return the length skipped.
  426. */
  427. off_t
  428. readtape_skip(TAPE_R * d, off_t request)
  429. {
  430. off_t skipped;
  431. off_t skiplen;
  432. /* Loop until we have skipped enough. */
  433. for (skipped = 0; skipped < request;) {
  434. if (d->hlen) {
  435. /* We want to skip some header data. */
  436. if (request - skipped < d->hlen)
  437. skiplen = request - skipped;
  438. else
  439. skiplen = d->hlen;
  440. d->hlen -= skiplen;
  441. d->h.skiplen += skiplen;
  442. skipped += skiplen;
  443. } else if (d->clen) {
  444. /* We want to skip some chunk data. */
  445. if (request - skipped < d->clen)
  446. skiplen = request - skipped;
  447. else
  448. skiplen = d->clen;
  449. d->clen -= skiplen;
  450. d->c.skiplen += skiplen;
  451. skipped += skiplen;
  452. } else if (d->tlen) {
  453. /* We want to skip some trailer data. */
  454. if (request - skipped < d->tlen)
  455. skiplen = request - skipped;
  456. else
  457. skiplen = d->tlen;
  458. d->tlen -= skiplen;
  459. d->t.skiplen += skiplen;
  460. skipped += skiplen;
  461. } else {
  462. /* Read the next archive entry header. */
  463. switch (get_entryheader(d)) {
  464. case -1:
  465. goto err0;
  466. case 0:
  467. goto eof;
  468. case 1:
  469. break;
  470. }
  471. }
  472. }
  473. /* Success! */
  474. return (skipped);
  475. eof:
  476. /* No more data. */
  477. return (skipped);
  478. err0:
  479. /* Failure! */
  480. return (-1);
  481. }
  482. /**
  483. * readtape_close(d):
  484. * Close the tape associated with ${d}.
  485. */
  486. int
  487. readtape_close(TAPE_R * d)
  488. {
  489. /* Free metaindex buffers. */
  490. multitape_metaindex_free(&d->tmi);
  491. /* Free buffers. */
  492. free(d->h.chunk);
  493. free(d->c.chunk);
  494. free(d->ci.chunk);
  495. free(d->t.chunk);
  496. /* Close the chunk layer read cookie. */
  497. chunks_read_free(d->C);
  498. /* Close the storage layer read cookie. */
  499. storage_read_free(d->S);
  500. /* Free the multitape layer read cookie. */
  501. free(d);
  502. /* Success! */
  503. return (0);
  504. }