tree.c 17 KB


  1. /*-
  2. * Copyright 2006-2025 Tarsnap Backup Inc.
  3. * All rights reserved.
  4. *
  5. * Portions of the file below are covered by the following license:
  6. *
  7. * Copyright (c) 2003-2007 Tim Kientzle
  8. * All rights reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  20. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  21. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  22. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  23. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  24. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  28. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. */
  30. /*-
  31. * This is a new directory-walking system that addresses a number
  32. * of problems I've had with fts(3). In particular, it has no
  33. * pathname-length limits (other than the size of 'int'), handles
  34. * deep logical traversals, uses considerably less memory, and has
  35. * an opaque interface (easier to modify in the future).
  36. *
  37. * Internally, it keeps a single list of "tree_entry" items that
  38. * represent filesystem objects that require further attention.
  39. * Non-directories are not kept in memory: they are pulled from
  40. * readdir(), returned to the client, then freed as soon as possible.
  41. * Any directory entry to be traversed gets pushed onto the stack.
  42. *
  43. * There is surprisingly little information that needs to be kept for
  44. * each item on the stack. Just the name, depth (represented here as the
  45. * string length of the parent directory's pathname), and some markers
  46. * indicating how to get back to the parent (via chdir("..") for a
  47. * regular dir or via fchdir(2) for a symlink).
  48. */
  49. #include "bsdtar_platform.h"
  50. __FBSDID("$FreeBSD: src/usr.bin/tar/tree.c,v 1.9 2008/11/27 05:49:52 kientzle Exp $");
  51. #ifdef HAVE_SYS_STAT_H
  52. #include <sys/stat.h>
  53. #endif
  54. #ifdef HAVE_DIRENT_H
  55. #include <dirent.h>
  56. #endif
  57. #ifdef HAVE_ERRNO_H
  58. #include <errno.h>
  59. #endif
  60. #ifdef HAVE_FCNTL_H
  61. #include <fcntl.h>
  62. #endif
  63. #ifdef HAVE_LIMITS_H
  64. #include <limits.h>
  65. #endif
  66. #ifdef HAVE_STDLIB_H
  67. #include <stdlib.h>
  68. #endif
  69. #ifdef HAVE_STRING_H
  70. #include <string.h>
  71. #endif
  72. #ifdef HAVE_UNISTD_H
  73. #include <unistd.h>
  74. #endif
  75. #include "fileutil.h"
  76. #include "tree.h"
  77. /*
  78. * TODO:
  79. * 1) Loop checking.
  80. * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
  81. */
  82. struct tree_entry {
  83. struct tree_entry *next;
  84. char *name;
  85. size_t dirname_length;
  86. dev_t dev;
  87. ino_t ino;
  88. #ifdef HAVE_FCHDIR
  89. int fd;
  90. #elif defined(_WIN32) && !defined(__CYGWIN__)
  91. char *fullpath;
  92. #else
  93. #error fchdir function required.
  94. #endif
  95. int flags;
  96. };
  97. /* Definitions for tree_entry.flags bitmap. */
  98. #define isDir 1 /* This entry is a regular directory. */
  99. #define isDirLink 2 /* This entry is a symbolic link to a directory. */
  100. #define needsPreVisit 4 /* This entry needs to be previsited. */
  101. #define needsPostVisit 8 /* This entry needs to be postvisited. */
  102. /*
  103. * Local data for this package.
  104. */
  105. struct tree {
  106. struct tree_entry *stack;
  107. DIR *d;
  108. #ifdef HAVE_FCHDIR
  109. int initialDirFd;
  110. #elif defined(_WIN32) && !defined(__CYGWIN__)
  111. char *initialDir;
  112. #endif
  113. int flags;
  114. int visit_type;
  115. int tree_errno; /* Error code from last failed operation. */
  116. char *buff;
  117. const char *basename;
  118. size_t buff_length;
  119. size_t path_length;
  120. size_t dirname_length;
  121. char realpath[PATH_MAX + 1];
  122. size_t realpath_dirname_length;
  123. int realpath_valid;
  124. char realpath_symlink[PATH_MAX + 1];
  125. int depth;
  126. int openCount;
  127. int maxOpenCount;
  128. int noatime;
  129. struct stat lst;
  130. struct stat st;
  131. };
  132. /* Definitions for tree.flags bitmap. */
  133. #define needsReturn 8 /* Marks first entry as not having been returned yet. */
  134. #define hasStat 16 /* The st entry is set. */
  135. #define hasLstat 32 /* The lst entry is set. */
  136. #ifdef HAVE_DIRENT_D_NAMLEN
  137. /* BSD extension; avoids need for a strlen() call. */
  138. #define D_NAMELEN(dp) (dp)->d_namlen
  139. #else
  140. #define D_NAMELEN(dp) (strlen((dp)->d_name))
  141. #endif
  142. static void
  143. errmsg(const char *m)
  144. {
  145. size_t s = strlen(m);
  146. ssize_t written;
  147. while (s > 0) {
  148. written = write(2, m, strlen(m));
  149. if (written <= 0)
  150. return;
  151. m += written;
  152. s -= written;
  153. }
  154. }
  155. /*
  156. * Attempt to opendir() with O_NOATIME if requested. This is not supported by
  157. * all operating systems or filesystems. If any error occurs, do not print any
  158. * message, and opendir() without O_NOATIME.
  159. */
  160. static DIR*
  161. tree_opendir(const char *path, int noatime)
  162. {
  163. #ifndef HAVE_FDOPENDIR
  164. (void)noatime; /* UNUSED */
  165. return (opendir(path));
  166. #else
  167. const int flags = O_RDONLY | O_DIRECTORY | O_CLOEXEC;
  168. DIR *dir;
  169. int fd;
  170. int saved_errno;
  171. /* Open a fd with noatime (if applicable). */
  172. if ((fd = fileutil_open_noatime(path, flags, noatime)) < 0)
  173. goto err0;
  174. /* Re-open as a DIR*. */
  175. if ((dir = fdopendir(fd)) == NULL)
  176. goto err1;
  177. /* Success! */
  178. return (dir);
  179. err1:
  180. saved_errno = errno;
  181. close(fd);
  182. errno = saved_errno;
  183. err0:
  184. /* Failure! */
  185. return (NULL);
  186. #endif
  187. }
  188. /*
  189. * Add a directory path to the current stack.
  190. */
  191. static void
  192. tree_push(struct tree *t, const char *path)
  193. {
  194. struct tree_entry *te;
  195. te = malloc(sizeof(*te));
  196. if (te == NULL)
  197. abort();
  198. memset(te, 0, sizeof(*te));
  199. te->next = t->stack;
  200. t->stack = te;
  201. #ifdef HAVE_FCHDIR
  202. te->fd = -1;
  203. #elif defined(_WIN32) && !defined(__CYGWIN__)
  204. te->fullpath = NULL;
  205. #endif
  206. te->name = strdup(path);
  207. te->flags = needsPreVisit | needsPostVisit;
  208. te->dirname_length = t->dirname_length;
  209. }
  210. /*
  211. * Append a name to the current path.
  212. */
  213. static void
  214. tree_append(struct tree *t, const char *name, size_t name_length)
  215. {
  216. char *p;
  217. size_t size_needed;
  218. if (t->buff != NULL)
  219. t->buff[t->dirname_length] = '\0';
  220. /* Strip trailing '/' from name, unless entire name is "/". */
  221. while (name_length > 1 && name[name_length - 1] == '/')
  222. name_length--;
  223. /* Resize pathname buffer as needed. */
  224. size_needed = name_length + 1 + t->dirname_length + 1;
  225. if (t->buff_length < size_needed) {
  226. if (t->buff_length < 1024)
  227. t->buff_length = 1024;
  228. while (t->buff_length < size_needed)
  229. t->buff_length *= 2;
  230. t->buff = realloc(t->buff, t->buff_length);
  231. }
  232. if (t->buff == NULL)
  233. abort();
  234. p = t->buff + t->dirname_length;
  235. t->path_length = t->dirname_length + name_length;
  236. /* Add a separating '/' if it's needed. */
  237. if (t->dirname_length > 0 && p[-1] != '/') {
  238. *p++ = '/';
  239. t->path_length ++;
  240. }
  241. strncpy(p, name, name_length);
  242. p[name_length] = '\0';
  243. t->basename = p;
  244. /* Adjust canonical name. */
  245. if ((t->realpath_valid) &&
  246. (t->realpath_dirname_length + name_length + 1 <= PATH_MAX)) {
  247. t->realpath[t->realpath_dirname_length] = '/';
  248. memcpy(t->realpath + t->realpath_dirname_length + 1,
  249. name, name_length);
  250. t->realpath[t->realpath_dirname_length + name_length + 1] = 0;
  251. } else {
  252. t->realpath_valid = 0;
  253. }
  254. }
  255. /*
  256. * Open a directory tree for traversal.
  257. */
  258. struct tree *
  259. tree_open(const char *path, int noatime)
  260. {
  261. struct tree *t;
  262. t = malloc(sizeof(*t));
  263. if (t == NULL)
  264. abort();
  265. memset(t, 0, sizeof(*t));
  266. t->noatime = noatime;
  267. tree_append(t, path, strlen(path));
  268. #ifdef HAVE_FCHDIR
  269. t->initialDirFd = open(".", O_RDONLY);
  270. #elif defined(_WIN32) && !defined(__CYGWIN__)
  271. t->initialDir = getcwd(NULL, 0);
  272. #endif
  273. /*
  274. * During most of the traversal, items are set up and then
  275. * returned immediately from tree_next(). That doesn't work
  276. * for the very first entry, so we set a flag for this special
  277. * case.
  278. */
  279. t->flags = needsReturn;
  280. return (t);
  281. }
  282. /*
  283. * We've finished a directory; ascend back to the parent.
  284. */
  285. static int
  286. tree_ascend(struct tree *t)
  287. {
  288. struct tree_entry *te;
  289. int r = 0;
  290. te = t->stack;
  291. t->depth--;
  292. if (te->flags & isDirLink) {
  293. #ifdef HAVE_FCHDIR
  294. if (fchdir(te->fd) != 0) {
  295. t->tree_errno = errno;
  296. r = TREE_ERROR_FATAL;
  297. }
  298. close(te->fd);
  299. #elif defined(_WIN32) && !defined(__CYGWIN__)
  300. if (chdir(te->fullpath) != 0) {
  301. t->tree_errno = errno;
  302. r = TREE_ERROR_FATAL;
  303. }
  304. free(te->fullpath);
  305. te->fullpath = NULL;
  306. #endif
  307. t->openCount--;
  308. } else {
  309. if (chdir("..") != 0) {
  310. t->tree_errno = errno;
  311. r = TREE_ERROR_FATAL;
  312. }
  313. }
  314. /* Figure out where we are. */
  315. if (getcwd(t->realpath, PATH_MAX) != NULL) {
  316. t->realpath_dirname_length = strlen(t->realpath);
  317. if (t->realpath[0] == '/' && t->realpath[1] == '\0')
  318. t->realpath_dirname_length = 0;
  319. t->realpath_valid = 1;
  320. } else {
  321. t->realpath_valid = 0;
  322. }
  323. return (r);
  324. }
  325. /*
  326. * Pop the working stack.
  327. */
  328. static void
  329. tree_pop(struct tree *t)
  330. {
  331. struct tree_entry *te;
  332. t->buff[t->dirname_length] = '\0';
  333. te = t->stack;
  334. t->stack = te->next;
  335. t->dirname_length = te->dirname_length;
  336. t->basename = t->buff + t->dirname_length;
  337. /* Special case: starting dir doesn't skip leading '/'. */
  338. if (t->dirname_length > 0)
  339. t->basename++;
  340. free(te->name);
  341. free(te);
  342. }
  343. /*
  344. * Get the next item in the tree traversal.
  345. */
  346. int
  347. tree_next(struct tree *t)
  348. {
  349. struct dirent *de = NULL;
  350. int r;
  351. /* If we're called again after a fatal error, that's an API
  352. * violation. Just crash now. */
  353. if (t->visit_type == TREE_ERROR_FATAL) {
  354. const char *msg = "Unable to continue traversing"
  355. " directory hierarchy after a fatal error.\n";
  356. errmsg(msg);
  357. *(volatile int *)0 = 1; /* Deliberate SEGV; NULL pointer dereference. */
  358. exit(1); /* In case the SEGV didn't work. */
  359. }
  360. /* Handle the startup case by returning the initial entry. */
  361. if (t->flags & needsReturn) {
  362. t->flags &= ~needsReturn;
  363. return (t->visit_type = TREE_REGULAR);
  364. }
  365. while (t->stack != NULL) {
  366. /* If there's an open dir, get the next entry from there. */
  367. while (t->d != NULL) {
  368. errno = 0;
  369. de = readdir(t->d);
  370. if (de == NULL) {
  371. if (errno) {
  372. /* If readdir fails, we're screwed. */
  373. t->tree_errno = errno;
  374. closedir(t->d);
  375. t->d = NULL;
  376. t->visit_type = TREE_ERROR_FATAL;
  377. return (t->visit_type);
  378. }
  379. /* Reached end of directory. */
  380. closedir(t->d);
  381. t->d = NULL;
  382. } else if (de->d_name[0] == '.'
  383. && de->d_name[1] == '\0') {
  384. /* Skip '.' */
  385. } else if (de->d_name[0] == '.'
  386. && de->d_name[1] == '.'
  387. && de->d_name[2] == '\0') {
  388. /* Skip '..' */
  389. } else {
  390. /*
  391. * Append the path to the current path
  392. * and return it.
  393. */
  394. tree_append(t, de->d_name, D_NAMELEN(de));
  395. t->flags &= ~hasLstat;
  396. t->flags &= ~hasStat;
  397. return (t->visit_type = TREE_REGULAR);
  398. }
  399. }
  400. /* If the current dir needs to be visited, set it up. */
  401. if (t->stack->flags & needsPreVisit) {
  402. tree_append(t, t->stack->name, strlen(t->stack->name));
  403. t->stack->flags &= ~needsPreVisit;
  404. /* If it is a link, set up fd for the ascent. */
  405. if (t->stack->flags & isDirLink) {
  406. #ifdef HAVE_FCHDIR
  407. t->stack->fd = open(".", O_RDONLY);
  408. #elif defined(_WIN32) && !defined(__CYGWIN__)
  409. t->stack->fullpath = getcwd(NULL, 0);
  410. #endif
  411. t->openCount++;
  412. if (t->openCount > t->maxOpenCount)
  413. t->maxOpenCount = t->openCount;
  414. }
  415. t->dirname_length = t->path_length;
  416. if (chdir(t->stack->name) != 0) {
  417. /* chdir() failed; return error */
  418. t->tree_errno = errno;
  419. tree_pop(t);
  420. return (t->visit_type = TREE_ERROR_DIR);
  421. }
  422. t->depth++;
  423. t->d = tree_opendir(".", t->noatime);
  424. if (t->d == NULL) {
  425. t->tree_errno = errno;
  426. r = tree_ascend(t); /* Undo "chdir" */
  427. tree_pop(t);
  428. t->visit_type = r != 0 ? r : TREE_ERROR_DIR;
  429. return (t->visit_type);
  430. }
  431. t->flags &= ~hasLstat;
  432. t->flags &= ~hasStat;
  433. t->basename = ".";
  434. /* Figure out where we are. */
  435. if (getcwd(t->realpath, PATH_MAX) != NULL) {
  436. t->realpath_dirname_length =
  437. strlen(t->realpath);
  438. if (t->realpath[0] == '/' &&
  439. t->realpath[1] == '\0')
  440. t->realpath_dirname_length = 0;
  441. t->realpath_valid = 1;
  442. } else {
  443. t->realpath_valid = 0;
  444. }
  445. return (t->visit_type = TREE_POSTDESCENT);
  446. }
  447. /* We've done everything necessary for the top stack entry. */
  448. if (t->stack->flags & needsPostVisit) {
  449. r = tree_ascend(t);
  450. tree_pop(t);
  451. t->flags &= ~hasLstat;
  452. t->flags &= ~hasStat;
  453. t->visit_type = r != 0 ? r : TREE_POSTASCENT;
  454. return (t->visit_type);
  455. }
  456. }
  457. return (t->visit_type = 0);
  458. }
  459. /*
  460. * Return error code.
  461. */
  462. int
  463. tree_errno(struct tree *t)
  464. {
  465. return (t->tree_errno);
  466. }
  467. /*
  468. * Called by the client to mark the directory just returned from
  469. * tree_next() as needing to be visited.
  470. */
  471. void
  472. tree_descend(struct tree *t)
  473. {
  474. if (t->visit_type != TREE_REGULAR)
  475. return;
  476. if (tree_current_is_physical_dir(t)) {
  477. tree_push(t, t->basename);
  478. t->stack->flags |= isDir;
  479. } else if (tree_current_is_dir(t)) {
  480. tree_push(t, t->basename);
  481. t->stack->flags |= isDirLink;
  482. }
  483. }
  484. /*
  485. * Get the stat() data for the entry just returned from tree_next().
  486. */
  487. const struct stat *
  488. tree_current_stat(struct tree *t)
  489. {
  490. if (!(t->flags & hasStat)) {
  491. if (stat(t->basename, &t->st) != 0)
  492. return NULL;
  493. t->flags |= hasStat;
  494. }
  495. return (&t->st);
  496. }
  497. /*
  498. * Get the lstat() data for the entry just returned from tree_next().
  499. */
  500. const struct stat *
  501. tree_current_lstat(struct tree *t)
  502. {
  503. if (!(t->flags & hasLstat)) {
  504. if (lstat(t->basename, &t->lst) != 0)
  505. return NULL;
  506. t->flags |= hasLstat;
  507. }
  508. return (&t->lst);
  509. }
  510. /*
  511. * Test whether current entry is a dir or link to a dir.
  512. */
  513. int
  514. tree_current_is_dir(struct tree *t)
  515. {
  516. const struct stat *st;
  517. /*
  518. * If we already have lstat() info, then try some
  519. * cheap tests to determine if this is a dir.
  520. */
  521. if (t->flags & hasLstat) {
  522. /* If lstat() says it's a dir, it must be a dir. */
  523. if (S_ISDIR(tree_current_lstat(t)->st_mode))
  524. return 1;
  525. /* Not a dir; might be a link to a dir. */
  526. /* If it's not a link, then it's not a link to a dir. */
  527. if (!S_ISLNK(tree_current_lstat(t)->st_mode))
  528. return 0;
  529. /*
  530. * It's a link, but we don't know what it's a link to,
  531. * so we'll have to use stat().
  532. */
  533. }
  534. st = tree_current_stat(t);
  535. /* If we can't stat it, it's not a dir. */
  536. if (st == NULL)
  537. return 0;
  538. /* Use the definitive test. Hopefully this is cached. */
  539. return (S_ISDIR(st->st_mode));
  540. }
  541. /*
  542. * Test whether current entry is a physical directory. Usually, we
  543. * already have at least one of stat() or lstat() in memory, so we
  544. * use tricks to try to avoid an extra trip to the disk.
  545. */
  546. int
  547. tree_current_is_physical_dir(struct tree *t)
  548. {
  549. const struct stat *st;
  550. /*
  551. * If stat() says it isn't a dir, then it's not a dir.
  552. * If stat() data is cached, this check is free, so do it first.
  553. */
  554. if ((t->flags & hasStat)
  555. && (!S_ISDIR(tree_current_stat(t)->st_mode)))
  556. return 0;
  557. /*
  558. * Either stat() said it was a dir (in which case, we have
  559. * to determine whether it's really a link to a dir) or
  560. * stat() info wasn't available. So we use lstat(), which
  561. * hopefully is already cached.
  562. */
  563. st = tree_current_lstat(t);
  564. /* If we can't stat it, it's not a dir. */
  565. if (st == NULL)
  566. return 0;
  567. /* Use the definitive test. Hopefully this is cached. */
  568. return (S_ISDIR(st->st_mode));
  569. }
  570. /*
  571. * Test whether current entry is a symbolic link.
  572. */
  573. int
  574. tree_current_is_physical_link(struct tree *t)
  575. {
  576. const struct stat *st = tree_current_lstat(t);
  577. if (st == NULL)
  578. return 0;
  579. return (S_ISLNK(st->st_mode));
  580. }
  581. /*
  582. * Return the access path for the entry just returned from tree_next().
  583. */
  584. const char *
  585. tree_current_access_path(struct tree *t)
  586. {
  587. return (t->basename);
  588. }
  589. /*
  590. * Return the full path for the entry just returned from tree_next().
  591. */
  592. const char *
  593. tree_current_path(struct tree *t)
  594. {
  595. return (t->buff);
  596. }
  597. /*
  598. * Return what you would get by calling realpath(3) on the path returned
  599. * by tree_current_access_path(t). In most cases this avoids needing to
  600. * call realpath(3).
  601. */
  602. const char *
  603. tree_current_realpath(struct tree *t)
  604. {
  605. if (tree_current_is_physical_link(t))
  606. return (realpath(t->basename, t->realpath_symlink));
  607. else if (t->realpath_valid)
  608. return (t->realpath);
  609. else
  610. return (realpath(t->basename, t->realpath));
  611. }
  612. /*
  613. * Return the length of the path for the entry just returned from tree_next().
  614. */
  615. size_t
  616. tree_current_pathlen(struct tree *t)
  617. {
  618. return (t->path_length);
  619. }
  620. /*
  621. * Return the nesting depth of the entry just returned from tree_next().
  622. */
  623. int
  624. tree_current_depth(struct tree *t)
  625. {
  626. return (t->depth);
  627. }
  628. /*
  629. * Terminate the traversal and release any resources.
  630. */
  631. int
  632. tree_close(struct tree *t)
  633. {
  634. int rc = 0;
  635. /* Release anything remaining in the stack. */
  636. while (t->stack != NULL)
  637. tree_pop(t);
  638. if (t->buff)
  639. free(t->buff);
  640. /* chdir() back to where we started. */
  641. #ifdef HAVE_FCHDIR
  642. if (t->initialDirFd >= 0) {
  643. rc = fchdir(t->initialDirFd);
  644. close(t->initialDirFd);
  645. t->initialDirFd = -1;
  646. }
  647. #elif defined(_WIN32) && !defined(__CYGWIN__)
  648. if (t->initialDir != NULL) {
  649. rc = chdir(t->initialDir);
  650. free(t->initialDir);
  651. t->initialDir = NULL;
  652. }
  653. #endif
  654. free(t);
  655. return (rc);
  656. }