token.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. /*
  2. * Routines used by the file-transfer code.
  3. *
  4. * Copyright (C) 1996 Andrew Tridgell
  5. * Copyright (C) 1996 Paul Mackerras
  6. * Copyright (C) 2003-2008 Wayne Davison
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 3 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, visit the http://fsf.org website.
  20. */
  21. #include "rsync.h"
  22. #include "ifuncs.h"
  23. #include "zlib/zlib.h"
  24. extern int do_compression;
  25. extern int module_id;
  26. extern int def_compress_level;
  27. extern char *skip_compress;
  28. static int compression_level, per_file_default_level;
  29. struct suffix_tree {
  30. struct suffix_tree *sibling;
  31. struct suffix_tree *child;
  32. char letter, word_end;
  33. };
  34. static char *match_list;
  35. static struct suffix_tree *suftree;
  36. static void add_suffix(struct suffix_tree **prior, char ltr, const char *str)
  37. {
  38. struct suffix_tree *node, *newnode;
  39. if (ltr == '[') {
  40. const char *after = strchr(str, ']');
  41. /* Just skip bogus character classes. */
  42. if (!after++)
  43. return;
  44. while ((ltr = *str++) != ']')
  45. add_suffix(prior, ltr, after);
  46. return;
  47. }
  48. for (node = *prior; node; prior = &node->sibling, node = node->sibling) {
  49. if (node->letter == ltr) {
  50. if (*str)
  51. add_suffix(&node->child, *str, str+1);
  52. else
  53. node->word_end = 1;
  54. return;
  55. }
  56. if (node->letter > ltr)
  57. break;
  58. }
  59. if (!(newnode = new(struct suffix_tree)))
  60. out_of_memory("add_suffix");
  61. newnode->sibling = node;
  62. newnode->child = NULL;
  63. newnode->letter = ltr;
  64. *prior = newnode;
  65. if (*str) {
  66. add_suffix(&newnode->child, *str, str+1);
  67. newnode->word_end = 0;
  68. } else
  69. newnode->word_end = 1;
  70. }
  71. static void add_nocompress_suffixes(const char *str)
  72. {
  73. char *buf, *t;
  74. const char *f = str;
  75. if (!(buf = new_array(char, strlen(f) + 1)))
  76. out_of_memory("add_nocompress_suffixes");
  77. while (*f) {
  78. if (*f == '/') {
  79. f++;
  80. continue;
  81. }
  82. t = buf;
  83. do {
  84. if (isUpper(f))
  85. *t++ = toLower(f);
  86. else
  87. *t++ = *f;
  88. } while (*++f != '/' && *f);
  89. *t++ = '\0';
  90. fprintf(stderr, "adding `%s'\n", buf);
  91. add_suffix(&suftree, *buf, buf+1);
  92. }
  93. free(buf);
  94. }
  95. static void init_set_compression(void)
  96. {
  97. const char *f;
  98. char *t, *start;
  99. if (skip_compress)
  100. add_nocompress_suffixes(skip_compress);
  101. /* A non-daemon transfer skips the default suffix list if the
  102. * user specified --skip-compress. */
  103. if (skip_compress && module_id < 0)
  104. f = "";
  105. else
  106. f = lp_dont_compress(module_id);
  107. if (!(match_list = t = new_array(char, strlen(f) + 2)))
  108. out_of_memory("set_compression");
  109. per_file_default_level = def_compress_level;
  110. while (*f) {
  111. if (*f == ' ') {
  112. f++;
  113. continue;
  114. }
  115. start = t;
  116. do {
  117. if (isUpper(f))
  118. *t++ = toLower(f);
  119. else
  120. *t++ = *f;
  121. } while (*++f != ' ' && *f);
  122. *t++ = '\0';
  123. if (t - start == 1+1 && *start == '*') {
  124. /* Optimize a match-string of "*". */
  125. *match_list = '\0';
  126. suftree = NULL;
  127. per_file_default_level = 0;
  128. break;
  129. }
  130. /* Move *.foo items into the stuffix tree. */
  131. if (*start == '*' && start[1] == '.' && start[2]
  132. && !strpbrk(start+2, ".?*")) {
  133. add_suffix(&suftree, start[2], start+3);
  134. t = start;
  135. }
  136. }
  137. *t++ = '\0';
  138. }
  139. /* determine the compression level based on a wildcard filename list */
  140. void set_compression(const char *fname)
  141. {
  142. const struct suffix_tree *node;
  143. const char *s;
  144. char ltr;
  145. if (!do_compression)
  146. return;
  147. if (!match_list)
  148. init_set_compression();
  149. compression_level = per_file_default_level;
  150. if (!*match_list && !suftree)
  151. return;
  152. if ((s = strrchr(fname, '/')) != NULL)
  153. fname = s + 1;
  154. for (s = match_list; *s; s += strlen(s) + 1) {
  155. if (iwildmatch(s, fname)) {
  156. compression_level = 0;
  157. return;
  158. }
  159. }
  160. if (!(node = suftree) || !(s = strrchr(fname, '.'))
  161. || s == fname || !(ltr = *++s))
  162. return;
  163. while (1) {
  164. while (node->letter != ltr) {
  165. if (node->letter > ltr)
  166. return;
  167. if (!(node = node->sibling))
  168. return;
  169. }
  170. if ((ltr = *++s) == '\0') {
  171. if (node->word_end)
  172. compression_level = 0;
  173. return;
  174. }
  175. if (!(node = node->child))
  176. return;
  177. }
  178. }
  179. /* non-compressing recv token */
  180. static int32 simple_recv_token(int f, char **data)
  181. {
  182. static int32 residue;
  183. static char *buf;
  184. int32 n;
  185. if (!buf) {
  186. buf = new_array(char, CHUNK_SIZE);
  187. if (!buf)
  188. out_of_memory("simple_recv_token");
  189. }
  190. if (residue == 0) {
  191. int32 i = read_int(f);
  192. if (i <= 0)
  193. return i;
  194. residue = i;
  195. }
  196. *data = buf;
  197. n = MIN(CHUNK_SIZE,residue);
  198. residue -= n;
  199. read_buf(f,buf,n);
  200. return n;
  201. }
  202. /* non-compressing send token */
  203. static void simple_send_token(int f, int32 token, struct map_struct *buf,
  204. OFF_T offset, int32 n)
  205. {
  206. if (n > 0) {
  207. int32 len = 0;
  208. while (len < n) {
  209. int32 n1 = MIN(CHUNK_SIZE, n-len);
  210. write_int(f, n1);
  211. write_buf(f, map_ptr(buf, offset+len, n1), n1);
  212. len += n1;
  213. }
  214. }
  215. /* a -2 token means to send data only and no token */
  216. if (token != -2)
  217. write_int(f, -(token+1));
  218. }
  219. /* Flag bytes in compressed stream are encoded as follows: */
  220. #define END_FLAG 0 /* that's all folks */
  221. #define TOKEN_LONG 0x20 /* followed by 32-bit token number */
  222. #define TOKENRUN_LONG 0x21 /* ditto with 16-bit run count */
  223. #define DEFLATED_DATA 0x40 /* + 6-bit high len, then low len byte */
  224. #define TOKEN_REL 0x80 /* + 6-bit relative token number */
  225. #define TOKENRUN_REL 0xc0 /* ditto with 16-bit run count */
  226. #define MAX_DATA_COUNT 16383 /* fit 14 bit count into 2 bytes with flags */
  227. /* zlib.h says that if we want to be able to compress something in a single
  228. * call, avail_out must be at least 0.1% larger than avail_in plus 12 bytes.
  229. * We'll add in 0.1%+16, just to be safe (and we'll avoid floating point,
  230. * to ensure that this is a compile-time value). */
  231. #define AVAIL_OUT_SIZE(avail_in_size) ((avail_in_size)*1001/1000+16)
  232. /* For coding runs of tokens */
  233. static int32 last_token = -1;
  234. static int32 run_start;
  235. static int32 last_run_end;
  236. /* Deflation state */
  237. static z_stream tx_strm;
  238. /* Output buffer */
  239. static char *obuf;
  240. /* We want obuf to be able to hold both MAX_DATA_COUNT+2 bytes as well as
  241. * AVAIL_OUT_SIZE(CHUNK_SIZE) bytes, so make sure that it's large enough. */
  242. #if MAX_DATA_COUNT+2 > AVAIL_OUT_SIZE(CHUNK_SIZE)
  243. #define OBUF_SIZE (MAX_DATA_COUNT+2)
  244. #else
  245. #define OBUF_SIZE AVAIL_OUT_SIZE(CHUNK_SIZE)
  246. #endif
  247. /* Send a deflated token */
  248. static void
  249. send_deflated_token(int f, int32 token, struct map_struct *buf, OFF_T offset,
  250. int32 nb, int32 toklen)
  251. {
  252. int32 n, r;
  253. static int init_done, flush_pending;
  254. if (last_token == -1) {
  255. /* initialization */
  256. if (!init_done) {
  257. tx_strm.next_in = NULL;
  258. tx_strm.zalloc = NULL;
  259. tx_strm.zfree = NULL;
  260. if (deflateInit2(&tx_strm, compression_level,
  261. Z_DEFLATED, -15, 8,
  262. Z_DEFAULT_STRATEGY) != Z_OK) {
  263. rprintf(FERROR, "compression init failed\n");
  264. exit_cleanup(RERR_STREAMIO);
  265. }
  266. if ((obuf = new_array(char, OBUF_SIZE)) == NULL)
  267. out_of_memory("send_deflated_token");
  268. init_done = 1;
  269. } else
  270. deflateReset(&tx_strm);
  271. last_run_end = 0;
  272. run_start = token;
  273. flush_pending = 0;
  274. } else if (last_token == -2) {
  275. run_start = token;
  276. } else if (nb != 0 || token != last_token + 1
  277. || token >= run_start + 65536) {
  278. /* output previous run */
  279. r = run_start - last_run_end;
  280. n = last_token - run_start;
  281. if (r >= 0 && r <= 63) {
  282. write_byte(f, (n==0? TOKEN_REL: TOKENRUN_REL) + r);
  283. } else {
  284. write_byte(f, (n==0? TOKEN_LONG: TOKENRUN_LONG));
  285. write_int(f, run_start);
  286. }
  287. if (n != 0) {
  288. write_byte(f, n);
  289. write_byte(f, n >> 8);
  290. }
  291. last_run_end = last_token;
  292. run_start = token;
  293. }
  294. last_token = token;
  295. if (nb != 0 || flush_pending) {
  296. /* deflate the data starting at offset */
  297. int flush = Z_NO_FLUSH;
  298. tx_strm.avail_in = 0;
  299. tx_strm.avail_out = 0;
  300. do {
  301. if (tx_strm.avail_in == 0 && nb != 0) {
  302. /* give it some more input */
  303. n = MIN(nb, CHUNK_SIZE);
  304. tx_strm.next_in = (Bytef *)
  305. map_ptr(buf, offset, n);
  306. tx_strm.avail_in = n;
  307. nb -= n;
  308. offset += n;
  309. }
  310. if (tx_strm.avail_out == 0) {
  311. tx_strm.next_out = (Bytef *)(obuf + 2);
  312. tx_strm.avail_out = MAX_DATA_COUNT;
  313. if (flush != Z_NO_FLUSH) {
  314. /*
  315. * We left the last 4 bytes in the
  316. * buffer, in case they are the
  317. * last 4. Move them to the front.
  318. */
  319. memcpy(tx_strm.next_out,
  320. obuf+MAX_DATA_COUNT-2, 4);
  321. tx_strm.next_out += 4;
  322. tx_strm.avail_out -= 4;
  323. }
  324. }
  325. if (nb == 0 && token != -2)
  326. flush = Z_SYNC_FLUSH;
  327. r = deflate(&tx_strm, flush);
  328. if (r != Z_OK) {
  329. rprintf(FERROR, "deflate returned %d\n", r);
  330. exit_cleanup(RERR_STREAMIO);
  331. }
  332. if (nb == 0 || tx_strm.avail_out == 0) {
  333. n = MAX_DATA_COUNT - tx_strm.avail_out;
  334. if (flush != Z_NO_FLUSH) {
  335. /*
  336. * We have to trim off the last 4
  337. * bytes of output when flushing
  338. * (they are just 0, 0, ff, ff).
  339. */
  340. n -= 4;
  341. }
  342. if (n > 0) {
  343. obuf[0] = DEFLATED_DATA + (n >> 8);
  344. obuf[1] = n;
  345. write_buf(f, obuf, n+2);
  346. }
  347. }
  348. } while (nb != 0 || tx_strm.avail_out == 0);
  349. flush_pending = token == -2;
  350. }
  351. if (token == -1) {
  352. /* end of file - clean up */
  353. write_byte(f, END_FLAG);
  354. } else if (token != -2) {
  355. /* Add the data in the current block to the compressor's
  356. * history and hash table. */
  357. do {
  358. /* Break up long sections in the same way that
  359. * see_deflate_token() does. */
  360. int32 n1 = toklen > 0xffff ? 0xffff : toklen;
  361. toklen -= n1;
  362. tx_strm.next_in = (Bytef *)map_ptr(buf, offset, n1);
  363. tx_strm.avail_in = n1;
  364. tx_strm.next_out = (Bytef *) obuf;
  365. tx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
  366. r = deflate(&tx_strm, Z_INSERT_ONLY);
  367. if (r != Z_OK || tx_strm.avail_in != 0) {
  368. rprintf(FERROR, "deflate on token returned %d (%d bytes left)\n",
  369. r, tx_strm.avail_in);
  370. exit_cleanup(RERR_STREAMIO);
  371. }
  372. } while (toklen > 0);
  373. }
  374. }
  375. /* tells us what the receiver is in the middle of doing */
  376. static enum { r_init, r_idle, r_running, r_inflating, r_inflated } recv_state;
  377. /* for inflating stuff */
  378. static z_stream rx_strm;
  379. static char *cbuf;
  380. static char *dbuf;
  381. /* for decoding runs of tokens */
  382. static int32 rx_token;
  383. static int32 rx_run;
  384. /* Receive a deflated token and inflate it */
  385. static int32 recv_deflated_token(int f, char **data)
  386. {
  387. static int init_done;
  388. static int32 saved_flag;
  389. int32 n, flag;
  390. int r;
  391. for (;;) {
  392. switch (recv_state) {
  393. case r_init:
  394. if (!init_done) {
  395. rx_strm.next_out = NULL;
  396. rx_strm.zalloc = NULL;
  397. rx_strm.zfree = NULL;
  398. if (inflateInit2(&rx_strm, -15) != Z_OK) {
  399. rprintf(FERROR, "inflate init failed\n");
  400. exit_cleanup(RERR_STREAMIO);
  401. }
  402. if (!(cbuf = new_array(char, MAX_DATA_COUNT))
  403. || !(dbuf = new_array(char, AVAIL_OUT_SIZE(CHUNK_SIZE))))
  404. out_of_memory("recv_deflated_token");
  405. init_done = 1;
  406. } else {
  407. inflateReset(&rx_strm);
  408. }
  409. recv_state = r_idle;
  410. rx_token = 0;
  411. break;
  412. case r_idle:
  413. case r_inflated:
  414. if (saved_flag) {
  415. flag = saved_flag & 0xff;
  416. saved_flag = 0;
  417. } else
  418. flag = read_byte(f);
  419. if ((flag & 0xC0) == DEFLATED_DATA) {
  420. n = ((flag & 0x3f) << 8) + read_byte(f);
  421. read_buf(f, cbuf, n);
  422. rx_strm.next_in = (Bytef *)cbuf;
  423. rx_strm.avail_in = n;
  424. recv_state = r_inflating;
  425. break;
  426. }
  427. if (recv_state == r_inflated) {
  428. /* check previous inflated stuff ended correctly */
  429. rx_strm.avail_in = 0;
  430. rx_strm.next_out = (Bytef *)dbuf;
  431. rx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
  432. r = inflate(&rx_strm, Z_SYNC_FLUSH);
  433. n = AVAIL_OUT_SIZE(CHUNK_SIZE) - rx_strm.avail_out;
  434. /*
  435. * Z_BUF_ERROR just means no progress was
  436. * made, i.e. the decompressor didn't have
  437. * any pending output for us.
  438. */
  439. if (r != Z_OK && r != Z_BUF_ERROR) {
  440. rprintf(FERROR, "inflate flush returned %d (%d bytes)\n",
  441. r, n);
  442. exit_cleanup(RERR_STREAMIO);
  443. }
  444. if (n != 0 && r != Z_BUF_ERROR) {
  445. /* have to return some more data and
  446. save the flag for later. */
  447. saved_flag = flag + 0x10000;
  448. *data = dbuf;
  449. return n;
  450. }
  451. /*
  452. * At this point the decompressor should
  453. * be expecting to see the 0, 0, ff, ff bytes.
  454. */
  455. if (!inflateSyncPoint(&rx_strm)) {
  456. rprintf(FERROR, "decompressor lost sync!\n");
  457. exit_cleanup(RERR_STREAMIO);
  458. }
  459. rx_strm.avail_in = 4;
  460. rx_strm.next_in = (Bytef *)cbuf;
  461. cbuf[0] = cbuf[1] = 0;
  462. cbuf[2] = cbuf[3] = 0xff;
  463. inflate(&rx_strm, Z_SYNC_FLUSH);
  464. recv_state = r_idle;
  465. }
  466. if (flag == END_FLAG) {
  467. /* that's all folks */
  468. recv_state = r_init;
  469. return 0;
  470. }
  471. /* here we have a token of some kind */
  472. if (flag & TOKEN_REL) {
  473. rx_token += flag & 0x3f;
  474. flag >>= 6;
  475. } else
  476. rx_token = read_int(f);
  477. if (flag & 1) {
  478. rx_run = read_byte(f);
  479. rx_run += read_byte(f) << 8;
  480. recv_state = r_running;
  481. }
  482. return -1 - rx_token;
  483. case r_inflating:
  484. rx_strm.next_out = (Bytef *)dbuf;
  485. rx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
  486. r = inflate(&rx_strm, Z_NO_FLUSH);
  487. n = AVAIL_OUT_SIZE(CHUNK_SIZE) - rx_strm.avail_out;
  488. if (r != Z_OK) {
  489. rprintf(FERROR, "inflate returned %d (%d bytes)\n", r, n);
  490. exit_cleanup(RERR_STREAMIO);
  491. }
  492. if (rx_strm.avail_in == 0)
  493. recv_state = r_inflated;
  494. if (n != 0) {
  495. *data = dbuf;
  496. return n;
  497. }
  498. break;
  499. case r_running:
  500. ++rx_token;
  501. if (--rx_run == 0)
  502. recv_state = r_idle;
  503. return -1 - rx_token;
  504. }
  505. }
  506. }
  507. /*
  508. * put the data corresponding to a token that we've just returned
  509. * from recv_deflated_token into the decompressor's history buffer.
  510. */
  511. static void see_deflate_token(char *buf, int32 len)
  512. {
  513. int r;
  514. int32 blklen;
  515. unsigned char hdr[5];
  516. rx_strm.avail_in = 0;
  517. blklen = 0;
  518. hdr[0] = 0;
  519. do {
  520. if (rx_strm.avail_in == 0 && len != 0) {
  521. if (blklen == 0) {
  522. /* Give it a fake stored-block header. */
  523. rx_strm.next_in = (Bytef *)hdr;
  524. rx_strm.avail_in = 5;
  525. blklen = len;
  526. if (blklen > 0xffff)
  527. blklen = 0xffff;
  528. hdr[1] = blklen;
  529. hdr[2] = blklen >> 8;
  530. hdr[3] = ~hdr[1];
  531. hdr[4] = ~hdr[2];
  532. } else {
  533. rx_strm.next_in = (Bytef *)buf;
  534. rx_strm.avail_in = blklen;
  535. len -= blklen;
  536. blklen = 0;
  537. }
  538. }
  539. rx_strm.next_out = (Bytef *)dbuf;
  540. rx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE);
  541. r = inflate(&rx_strm, Z_SYNC_FLUSH);
  542. if (r != Z_OK) {
  543. rprintf(FERROR, "inflate (token) returned %d\n", r);
  544. exit_cleanup(RERR_STREAMIO);
  545. }
  546. } while (len || rx_strm.avail_out == 0);
  547. }
  548. /**
  549. * Transmit a verbatim buffer of length @p n followed by a token.
  550. * If token == -1 then we have reached EOF
  551. * If n == 0 then don't send a buffer
  552. */
  553. void send_token(int f, int32 token, struct map_struct *buf, OFF_T offset,
  554. int32 n, int32 toklen)
  555. {
  556. if (!do_compression)
  557. simple_send_token(f, token, buf, offset, n);
  558. else
  559. send_deflated_token(f, token, buf, offset, n, toklen);
  560. }
  561. /*
  562. * receive a token or buffer from the other end. If the reurn value is >0 then
  563. * it is a data buffer of that length, and *data will point at the data.
  564. * if the return value is -i then it represents token i-1
  565. * if the return value is 0 then the end has been reached
  566. */
  567. int32 recv_token(int f, char **data)
  568. {
  569. int tok;
  570. if (!do_compression) {
  571. tok = simple_recv_token(f,data);
  572. } else {
  573. tok = recv_deflated_token(f, data);
  574. }
  575. return tok;
  576. }
  577. /*
  578. * look at the data corresponding to a token, if necessary
  579. */
  580. void see_token(char *data, int32 toklen)
  581. {
  582. if (do_compression)
  583. see_deflate_token(data, toklen);
  584. }