ftw.scm 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. ;;;; ftw.scm --- file system tree walk
  2. ;;;; Copyright (C) 2002, 2003, 2006, 2011, 2012, 2014, 2016 Free Software Foundation, Inc.
  3. ;;;;
  4. ;;;; This library is free software; you can redistribute it and/or
  5. ;;;; modify it under the terms of the GNU Lesser General Public
  6. ;;;; License as published by the Free Software Foundation; either
  7. ;;;; version 3 of the License, or (at your option) any later version.
  8. ;;;;
  9. ;;;; This library is distributed in the hope that it will be useful,
  10. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;;;; Lesser General Public License for more details.
  13. ;;;;
  14. ;;;; You should have received a copy of the GNU Lesser General Public
  15. ;;;; License along with this library; if not, write to the Free Software
  16. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
  18. ;;; Commentary:
  19. ;; Two procedures are provided: `ftw' and `nftw'.
  20. ;; NOTE: The following description was adapted from the GNU libc info page, w/
  21. ;; significant modifications for a more "Schemey" interface. Most noticible
  22. ;; are the inlining of `struct FTW *' parameters `base' and `level' and the
  23. ;; omission of `descriptors' parameters.
  24. ;; * Types
  25. ;;
  26. ;; The X/Open specification defines two procedures to process whole
  27. ;; hierarchies of directories and the contained files. Both procedures
  28. ;; of this `ftw' family take as one of the arguments a callback procedure
  29. ;; which must be of these types.
  30. ;;
  31. ;; - Data Type: __ftw_proc_t
  32. ;; (lambda (filename statinfo flag) ...) => status
  33. ;;
  34. ;; Type for callback procedures given to the `ftw' procedure. The
  35. ;; first parameter is a filename, the second parameter is the
  36. ;; vector value as returned by calling `stat' on FILENAME.
  37. ;;
  38. ;; The last parameter is a symbol giving more information about
  39. ;; FILENAM. It can have one of the following values:
  40. ;;
  41. ;; `regular'
  42. ;; The current item is a normal file or files which do not fit
  43. ;; into one of the following categories. This means
  44. ;; especially special files, sockets etc.
  45. ;;
  46. ;; `directory'
  47. ;; The current item is a directory.
  48. ;;
  49. ;; `invalid-stat'
  50. ;; The `stat' call to fill the object pointed to by the second
  51. ;; parameter failed and so the information is invalid.
  52. ;;
  53. ;; `directory-not-readable'
  54. ;; The item is a directory which cannot be read.
  55. ;;
  56. ;; `symlink'
  57. ;; The item is a symbolic link. Since symbolic links are
  58. ;; normally followed seeing this value in a `ftw' callback
  59. ;; procedure means the referenced file does not exist. The
  60. ;; situation for `nftw' is different.
  61. ;;
  62. ;; - Data Type: __nftw_proc_t
  63. ;; (lambda (filename statinfo flag base level) ...) => status
  64. ;;
  65. ;; The first three arguments have the same as for the
  66. ;; `__ftw_proc_t' type. A difference is that for the third
  67. ;; argument some additional values are defined to allow finer
  68. ;; differentiation:
  69. ;;
  70. ;; `directory-processed'
  71. ;; The current item is a directory and all subdirectories have
  72. ;; already been visited and reported. This flag is returned
  73. ;; instead of `directory' if the `depth' flag is given to
  74. ;; `nftw' (see below).
  75. ;;
  76. ;; `stale-symlink'
  77. ;; The current item is a stale symbolic link. The file it
  78. ;; points to does not exist.
  79. ;;
  80. ;; The last two parameters are described below. They contain
  81. ;; information to help interpret FILENAME and give some information
  82. ;; about current state of the traversal of the directory hierarchy.
  83. ;;
  84. ;; `base'
  85. ;; The value specifies which part of the filename argument
  86. ;; given in the first parameter to the callback procedure is
  87. ;; the name of the file. The rest of the string is the path
  88. ;; to locate the file. This information is especially
  89. ;; important if the `chdir' flag for `nftw' was set since then
  90. ;; the current directory is the one the current item is found
  91. ;; in.
  92. ;;
  93. ;; `level'
  94. ;; While processing the directory the procedures tracks how
  95. ;; many directories have been examined to find the current
  96. ;; item. This nesting level is 0 for the item given starting
  97. ;; item (file or directory) and is incremented by one for each
  98. ;; entered directory.
  99. ;;
  100. ;; * Procedure: (ftw filename proc . options)
  101. ;; Do a file system tree walk starting at FILENAME using PROC.
  102. ;;
  103. ;; The `ftw' procedure calls the callback procedure given in the
  104. ;; parameter PROC for every item which is found in the directory
  105. ;; specified by FILENAME and all directories below. The procedure
  106. ;; follows symbolic links if necessary but does not process an item
  107. ;; twice. If FILENAME names no directory this item is the only
  108. ;; object reported by calling the callback procedure.
  109. ;;
  110. ;; The filename given to the callback procedure is constructed by
  111. ;; taking the FILENAME parameter and appending the names of all
  112. ;; passed directories and then the local file name. So the
  113. ;; callback procedure can use this parameter to access the file.
  114. ;; Before the callback procedure is called `ftw' calls `stat' for
  115. ;; this file and passes the information up to the callback
  116. ;; procedure. If this `stat' call was not successful the failure is
  117. ;; indicated by setting the flag argument of the callback procedure
  118. ;; to `invalid-stat'. Otherwise the flag is set according to the
  119. ;; description given in the description of `__ftw_proc_t' above.
  120. ;;
  121. ;; The callback procedure is expected to return non-#f to indicate
  122. ;; that no error occurred and the processing should be continued.
  123. ;; If an error occurred in the callback procedure or the call to
  124. ;; `ftw' shall return immediately the callback procedure can return
  125. ;; #f. This is the only correct way to stop the procedure. The
  126. ;; program must not use `throw' or similar techniques to continue
  127. ;; the program in another place. [Can we relax this? --ttn]
  128. ;;
  129. ;; The return value of the `ftw' procedure is #t if all callback
  130. ;; procedure calls returned #t and all actions performed by the
  131. ;; `ftw' succeeded. If some procedure call failed (other than
  132. ;; calling `stat' on an item) the procedure returns #f. If a
  133. ;; callback procedure returns a value other than #t this value is
  134. ;; returned as the return value of `ftw'.
  135. ;;
  136. ;; * Procedure: (nftw filename proc . control-flags)
  137. ;; Do a new-style file system tree walk starting at FILENAME using PROC.
  138. ;; Various optional CONTROL-FLAGS alter the default behavior.
  139. ;;
  140. ;; The `nftw' procedures works like the `ftw' procedures. It calls
  141. ;; the callback procedure PROC for all items it finds in the
  142. ;; directory FILENAME and below.
  143. ;;
  144. ;; The differences are that for one the callback procedure is of a
  145. ;; different type. It takes also `base' and `level' parameters as
  146. ;; described above.
  147. ;;
  148. ;; The second difference is that `nftw' takes additional optional
  149. ;; arguments which are zero or more of the following symbols:
  150. ;;
  151. ;; physical'
  152. ;; While traversing the directory symbolic links are not
  153. ;; followed. I.e., if this flag is given symbolic links are
  154. ;; reported using the `symlink' value for the type parameter
  155. ;; to the callback procedure. Please note that if this flag is
  156. ;; used the appearance of `symlink' in a callback procedure
  157. ;; does not mean the referenced file does not exist. To
  158. ;; indicate this the extra value `stale-symlink' exists.
  159. ;;
  160. ;; mount'
  161. ;; The callback procedure is only called for items which are on
  162. ;; the same mounted file system as the directory given as the
  163. ;; FILENAME parameter to `nftw'.
  164. ;;
  165. ;; chdir'
  166. ;; If this flag is given the current working directory is
  167. ;; changed to the directory containing the reported object
  168. ;; before the callback procedure is called.
  169. ;;
  170. ;; depth'
  171. ;; If this option is given the procedure visits first all files
  172. ;; and subdirectories before the callback procedure is called
  173. ;; for the directory itself (depth-first processing). This
  174. ;; also means the type flag given to the callback procedure is
  175. ;; `directory-processed' and not `directory'.
  176. ;;
  177. ;; The return value is computed in the same way as for `ftw'.
  178. ;; `nftw' returns #t if no failure occurred in `nftw' and all
  179. ;; callback procedure call return values are also #t. For internal
  180. ;; errors such as memory problems the error `ftw-error' is thrown.
  181. ;; If the return value of a callback invocation is not #t this
  182. ;; very same value is returned.
  183. ;;; Code:
  184. (define-module (ice-9 ftw)
  185. #:use-module (ice-9 match)
  186. #:use-module (ice-9 vlist)
  187. #:use-module (srfi srfi-1)
  188. #:autoload (ice-9 i18n) (string-locale<?)
  189. #:export (ftw nftw
  190. file-system-fold
  191. file-system-tree
  192. scandir))
  193. (define (directory-files dir)
  194. (let ((dir-stream (opendir dir)))
  195. (let loop ((new (readdir dir-stream))
  196. (acc '()))
  197. (if (eof-object? new)
  198. (begin
  199. (closedir dir-stream)
  200. acc)
  201. (loop (readdir dir-stream)
  202. (if (or (string=? "." new) ;;; ignore
  203. (string=? ".." new)) ;;; ignore
  204. acc
  205. (cons new acc)))))))
  206. (define (pathify . nodes)
  207. (let loop ((nodes nodes)
  208. (result ""))
  209. (if (null? nodes)
  210. (or (and (string=? "" result) "")
  211. (substring result 1 (string-length result)))
  212. (loop (cdr nodes) (string-append result "/" (car nodes))))))
  213. (define (abs? filename)
  214. (char=? #\/ (string-ref filename 0)))
  215. ;; `visited?-proc' returns a test procedure VISITED? which when called as
  216. ;; (VISITED? stat-obj) returns #f the first time a distinct file is seen,
  217. ;; then #t on any subsequent sighting of it.
  218. ;;
  219. ;; stat:dev and stat:ino together uniquely identify a file (see "Attribute
  220. ;; Meanings" in the glibc manual). Often there'll be just one dev, and
  221. ;; usually there's just a handful mounted, so the strategy here is a small
  222. ;; hash table indexed by dev, containing hash tables indexed by ino.
  223. ;;
  224. ;; It'd be possible to make a pair (dev . ino) and use that as the key to a
  225. ;; single hash table. It'd use an extra pair for every file visited, but
  226. ;; might be a little faster if it meant less scheme code.
  227. ;;
  228. (define (visited?-proc size)
  229. (let ((dev-hash (make-hash-table 7)))
  230. (lambda (s)
  231. (and s
  232. (let ((ino-hash (hashv-ref dev-hash (stat:dev s)))
  233. (ino (stat:ino s)))
  234. (or ino-hash
  235. (begin
  236. (set! ino-hash (make-hash-table size))
  237. (hashv-set! dev-hash (stat:dev s) ino-hash)))
  238. (or (hashv-ref ino-hash ino)
  239. (begin
  240. (hashv-set! ino-hash ino #t)
  241. #f)))))))
  242. (define (stat-dir-readable?-proc uid gid)
  243. (let ((uid (getuid))
  244. (gid (getgid)))
  245. (lambda (s)
  246. (let* ((perms (stat:perms s))
  247. (perms-bit-set? (lambda (mask)
  248. (not (= 0 (logand mask perms))))))
  249. (or (zero? uid)
  250. (and (= uid (stat:uid s))
  251. (perms-bit-set? #o400))
  252. (and (= gid (stat:gid s))
  253. (perms-bit-set? #o040))
  254. (perms-bit-set? #o004))))))
  255. (define (stat&flag-proc dir-readable? . control-flags)
  256. (let* ((directory-flag (if (memq 'depth control-flags)
  257. 'directory-processed
  258. 'directory))
  259. (stale-symlink-flag (if (memq 'nftw-style control-flags)
  260. 'stale-symlink
  261. 'symlink))
  262. (physical? (memq 'physical control-flags))
  263. (easy-flag (lambda (s)
  264. (let ((type (stat:type s)))
  265. (if (eq? 'directory type)
  266. (if (dir-readable? s)
  267. directory-flag
  268. 'directory-not-readable)
  269. 'regular)))))
  270. (lambda (name)
  271. (let ((s (false-if-exception (lstat name))))
  272. (cond ((not s)
  273. (values s 'invalid-stat))
  274. ((eq? 'symlink (stat:type s))
  275. (let ((s-follow (false-if-exception (stat name))))
  276. (cond ((not s-follow)
  277. (values s stale-symlink-flag))
  278. ((and s-follow physical?)
  279. (values s 'symlink))
  280. ((and s-follow (not physical?))
  281. (values s-follow (easy-flag s-follow))))))
  282. (else (values s (easy-flag s))))))))
  283. (define (clean name)
  284. (let ((last-char-index (1- (string-length name))))
  285. (if (char=? #\/ (string-ref name last-char-index))
  286. (substring name 0 last-char-index)
  287. name)))
  288. (define (ftw filename proc . options)
  289. (let* ((visited? (visited?-proc (cond ((memq 'hash-size options) => cadr)
  290. (else 211))))
  291. (stat&flag (stat&flag-proc
  292. (stat-dir-readable?-proc (getuid) (getgid)))))
  293. (letrec ((go (lambda (fullname)
  294. (call-with-values (lambda () (stat&flag fullname))
  295. (lambda (s flag)
  296. (or (visited? s)
  297. (let ((ret (proc fullname s flag))) ; callback
  298. (or (eq? #t ret)
  299. (throw 'ftw-early-exit ret))
  300. (and (eq? 'directory flag)
  301. (for-each
  302. (lambda (child)
  303. (go (pathify fullname child)))
  304. (directory-files fullname)))
  305. #t)))))))
  306. (catch 'ftw-early-exit
  307. (lambda () (go (clean filename)))
  308. (lambda (key val) val)))))
  309. (define (nftw filename proc . control-flags)
  310. (let* ((od (getcwd)) ; orig dir
  311. (odev (let ((s (false-if-exception (lstat filename))))
  312. (if s (stat:dev s) -1)))
  313. (same-dev? (if (memq 'mount control-flags)
  314. (lambda (s) (= (stat:dev s) odev))
  315. (lambda (s) #t)))
  316. (base-sub (lambda (name base) (substring name 0 base)))
  317. (maybe-cd (if (memq 'chdir control-flags)
  318. (if (abs? filename)
  319. (lambda (fullname base)
  320. (or (= 0 base)
  321. (chdir (base-sub fullname base))))
  322. (lambda (fullname base)
  323. (chdir
  324. (pathify od (base-sub fullname base)))))
  325. (lambda (fullname base) #t)))
  326. (maybe-cd-back (if (memq 'chdir control-flags)
  327. (lambda () (chdir od))
  328. (lambda () #t)))
  329. (depth-first? (memq 'depth control-flags))
  330. (visited? (visited?-proc
  331. (cond ((memq 'hash-size control-flags) => cadr)
  332. (else 211))))
  333. (has-kids? (if depth-first?
  334. (lambda (flag) (eq? flag 'directory-processed))
  335. (lambda (flag) (eq? flag 'directory))))
  336. (stat&flag (apply stat&flag-proc
  337. (stat-dir-readable?-proc (getuid) (getgid))
  338. (cons 'nftw-style control-flags))))
  339. (letrec ((go (lambda (fullname base level)
  340. (call-with-values (lambda () (stat&flag fullname))
  341. (lambda (s flag)
  342. (letrec ((self (lambda ()
  343. (maybe-cd fullname base)
  344. ;; the callback
  345. (let ((ret (proc fullname s flag
  346. base level)))
  347. (maybe-cd-back)
  348. (or (eq? #t ret)
  349. (throw 'nftw-early-exit ret)))))
  350. (kids (lambda ()
  351. (and (has-kids? flag)
  352. (for-each
  353. (lambda (child)
  354. (go (pathify fullname child)
  355. (1+ (string-length
  356. fullname))
  357. (1+ level)))
  358. (directory-files fullname))))))
  359. (or (visited? s)
  360. (not (same-dev? s))
  361. (if depth-first?
  362. (begin (kids) (self))
  363. (begin (self) (kids)))))))
  364. #t)))
  365. (let ((ret (catch 'nftw-early-exit
  366. (lambda () (go (clean filename) 0 0))
  367. (lambda (key val) val))))
  368. (chdir od)
  369. ret))))
  370. ;;;
  371. ;;; `file-system-fold' & co.
  372. ;;;
  373. (define-syntax-rule (errno-if-exception expr)
  374. (catch 'system-error
  375. (lambda ()
  376. expr)
  377. (lambda args
  378. (system-error-errno args))))
  379. (define* (file-system-fold enter? leaf down up skip error init file-name
  380. #:optional (stat lstat))
  381. "Traverse the directory at FILE-NAME, recursively. Enter
  382. sub-directories only when (ENTER? PATH STAT RESULT) returns true. When
  383. a sub-directory is entered, call (DOWN PATH STAT RESULT), where PATH is
  384. the path of the sub-directory and STAT the result of (stat PATH); when
  385. it is left, call (UP PATH STAT RESULT). For each file in a directory,
  386. call (LEAF PATH STAT RESULT). When ENTER? returns false, call (SKIP
  387. PATH STAT RESULT). When an `opendir' or STAT call raises an exception,
  388. call (ERROR PATH STAT ERRNO RESULT), with ERRNO being the operating
  389. system error number that was raised.
  390. Return the result of these successive applications.
  391. When FILE-NAME names a flat file, (LEAF PATH STAT INIT) is returned.
  392. The optional STAT parameter defaults to `lstat'."
  393. (define (mark v s)
  394. (vhash-cons (cons (stat:dev s) (stat:ino s)) #t v))
  395. (define (visited? v s)
  396. (vhash-assoc (cons (stat:dev s) (stat:ino s)) v))
  397. (let loop ((name file-name)
  398. (path "")
  399. (dir-stat (errno-if-exception (stat file-name)))
  400. (result init)
  401. (visited vlist-null))
  402. (define full-name
  403. (if (string=? path "")
  404. name
  405. (string-append path "/" name)))
  406. (cond
  407. ((integer? dir-stat)
  408. ;; FILE-NAME is not readable.
  409. (error full-name #f dir-stat result))
  410. ((visited? visited dir-stat)
  411. (values result visited))
  412. ((eq? 'directory (stat:type dir-stat)) ; true except perhaps the 1st time
  413. (if (enter? full-name dir-stat result)
  414. (let ((dir (errno-if-exception (opendir full-name)))
  415. (visited (mark visited dir-stat)))
  416. (cond
  417. ((directory-stream? dir)
  418. (let liip ((entry (readdir dir))
  419. (result (down full-name dir-stat result))
  420. (subdirs '()))
  421. (cond ((eof-object? entry)
  422. (begin
  423. (closedir dir)
  424. (let ((r+v
  425. (fold (lambda (subdir result+visited)
  426. (call-with-values
  427. (lambda ()
  428. (loop (car subdir)
  429. full-name
  430. (cdr subdir)
  431. (car result+visited)
  432. (cdr result+visited)))
  433. cons))
  434. (cons result visited)
  435. subdirs)))
  436. (values (up full-name dir-stat (car r+v))
  437. (cdr r+v)))))
  438. ((or (string=? entry ".")
  439. (string=? entry ".."))
  440. (liip (readdir dir)
  441. result
  442. subdirs))
  443. (else
  444. (let* ((child (string-append full-name "/" entry))
  445. (st (errno-if-exception (stat child))))
  446. (if (integer? st) ; CHILD is a dangling symlink?
  447. (liip (readdir dir)
  448. (error child #f st result)
  449. subdirs)
  450. (if (eq? (stat:type st) 'directory)
  451. (liip (readdir dir)
  452. result
  453. (alist-cons entry st subdirs))
  454. (liip (readdir dir)
  455. (leaf child st result)
  456. subdirs))))))))
  457. (else
  458. ;; Directory FULL-NAME not readable, but it is stat'able.
  459. (values (error full-name dir-stat dir result)
  460. visited))))
  461. (values (skip full-name dir-stat result)
  462. (mark visited dir-stat))))
  463. (else
  464. ;; Caller passed a FILE-NAME that names a flat file, not a directory.
  465. (leaf full-name dir-stat result)))))
  466. (define* (file-system-tree file-name
  467. #:optional (enter? (lambda (n s) #t))
  468. (stat lstat))
  469. "Return a tree of the form (FILE-NAME STAT CHILDREN ...) where STAT is
  470. the result of (STAT FILE-NAME) and CHILDREN are similar structures for
  471. each file contained in FILE-NAME when it designates a directory. The
  472. optional ENTER? predicate is invoked as (ENTER? NAME STAT) and should
  473. return true to allow recursion into directory NAME; the default value is
  474. a procedure that always returns #t. When a directory does not match
  475. ENTER?, it nonetheless appears in the resulting tree, only with zero
  476. children. The optional STAT parameter defaults to `lstat'. Return #f
  477. when FILE-NAME is not readable."
  478. (define (enter?* name stat result)
  479. (enter? name stat))
  480. (define (leaf name stat result)
  481. (match result
  482. (((siblings ...) rest ...)
  483. (cons (alist-cons (basename name) (cons stat '()) siblings)
  484. rest))))
  485. (define (down name stat result)
  486. (cons '() result))
  487. (define (up name stat result)
  488. (match result
  489. (((children ...) (siblings ...) rest ...)
  490. (cons (alist-cons (basename name) (cons stat children)
  491. siblings)
  492. rest))))
  493. (define skip ; keep an entry for skipped directories
  494. leaf)
  495. (define (error name stat errno result)
  496. (if (string=? name file-name)
  497. result
  498. (leaf name stat result)))
  499. (match (file-system-fold enter?* leaf down up skip error '(())
  500. file-name stat)
  501. (((tree)) tree)
  502. ((()) #f))) ; FILE-NAME is unreadable
  503. (define* (scandir name #:optional (select? (const #t))
  504. (entry<? string-locale<?))
  505. "Return the list of the names of files contained in directory NAME
  506. that match predicate SELECT? (by default, all files.) The returned list
  507. of file names is sorted according to ENTRY<?, which defaults to
  508. `string-locale<?'. Return #f when NAME is unreadable or is not a
  509. directory."
  510. ;; This procedure is implemented in terms of 'readdir' instead of
  511. ;; 'file-system-fold' to avoid the extra 'stat' call that the latter
  512. ;; makes for each entry.
  513. (define (opendir* directory)
  514. (catch 'system-error
  515. (lambda ()
  516. (opendir directory))
  517. (const #f)))
  518. (and=> (opendir* name)
  519. (lambda (stream)
  520. (let loop ((entry (readdir stream))
  521. (files '()))
  522. (if (eof-object? entry)
  523. (begin
  524. (closedir stream)
  525. (sort files entry<?))
  526. (loop (readdir stream)
  527. (if (select? entry)
  528. (cons entry files)
  529. files)))))))
  530. ;;; ftw.scm ends here