ftw.scm 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. ;;;; ftw.scm --- filesystem tree walk
  2. ;;;; Copyright (C) 2002, 2003, 2006 Free Software Foundation, Inc.
  3. ;;;;
  4. ;;;; This library is free software; you can redistribute it and/or
  5. ;;;; modify it under the terms of the GNU Lesser General Public
  6. ;;;; License as published by the Free Software Foundation; either
  7. ;;;; version 2.1 of the License, or (at your option) any later version.
  8. ;;;;
  9. ;;;; This library is distributed in the hope that it will be useful,
  10. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;;;; Lesser General Public License for more details.
  13. ;;;;
  14. ;;;; You should have received a copy of the GNU Lesser General Public
  15. ;;;; License along with this library; if not, write to the Free Software
  16. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
  18. ;;; Commentary:
  19. ;; Two procedures are provided: `ftw' and `nftw'.
  20. ;; NOTE: The following description was adapted from the GNU libc info page, w/
  21. ;; significant modifications for a more "Schemey" interface. Most noticible
  22. ;; are the inlining of `struct FTW *' parameters `base' and `level' and the
  23. ;; omission of `descriptors' parameters.
  24. ;; * Types
  25. ;;
  26. ;; The X/Open specification defines two procedures to process whole
  27. ;; hierarchies of directories and the contained files. Both procedures
  28. ;; of this `ftw' family take as one of the arguments a callback procedure
  29. ;; which must be of these types.
  30. ;;
  31. ;; - Data Type: __ftw_proc_t
  32. ;; (lambda (filename statinfo flag) ...) => status
  33. ;;
  34. ;; Type for callback procedures given to the `ftw' procedure. The
  35. ;; first parameter is a filename, the second parameter is the
  36. ;; vector value as returned by calling `stat' on FILENAME.
  37. ;;
  38. ;; The last parameter is a symbol giving more information about
  39. ;; FILENAM. It can have one of the following values:
  40. ;;
  41. ;; `regular'
  42. ;; The current item is a normal file or files which do not fit
  43. ;; into one of the following categories. This means
  44. ;; especially special files, sockets etc.
  45. ;;
  46. ;; `directory'
  47. ;; The current item is a directory.
  48. ;;
  49. ;; `invalid-stat'
  50. ;; The `stat' call to fill the object pointed to by the second
  51. ;; parameter failed and so the information is invalid.
  52. ;;
  53. ;; `directory-not-readable'
  54. ;; The item is a directory which cannot be read.
  55. ;;
  56. ;; `symlink'
  57. ;; The item is a symbolic link. Since symbolic links are
  58. ;; normally followed seeing this value in a `ftw' callback
  59. ;; procedure means the referenced file does not exist. The
  60. ;; situation for `nftw' is different.
  61. ;;
  62. ;; - Data Type: __nftw_proc_t
  63. ;; (lambda (filename statinfo flag base level) ...) => status
  64. ;;
  65. ;; The first three arguments have the same as for the
  66. ;; `__ftw_proc_t' type. A difference is that for the third
  67. ;; argument some additional values are defined to allow finer
  68. ;; differentiation:
  69. ;;
  70. ;; `directory-processed'
  71. ;; The current item is a directory and all subdirectories have
  72. ;; already been visited and reported. This flag is returned
  73. ;; instead of `directory' if the `depth' flag is given to
  74. ;; `nftw' (see below).
  75. ;;
  76. ;; `stale-symlink'
  77. ;; The current item is a stale symbolic link. The file it
  78. ;; points to does not exist.
  79. ;;
  80. ;; The last two parameters are described below. They contain
  81. ;; information to help interpret FILENAME and give some information
  82. ;; about current state of the traversal of the directory hierarchy.
  83. ;;
  84. ;; `base'
  85. ;; The value specifies which part of the filename argument
  86. ;; given in the first parameter to the callback procedure is
  87. ;; the name of the file. The rest of the string is the path
  88. ;; to locate the file. This information is especially
  89. ;; important if the `chdir' flag for `nftw' was set since then
  90. ;; the current directory is the one the current item is found
  91. ;; in.
  92. ;;
  93. ;; `level'
  94. ;; While processing the directory the procedures tracks how
  95. ;; many directories have been examined to find the current
  96. ;; item. This nesting level is 0 for the item given starting
  97. ;; item (file or directory) and is incremented by one for each
  98. ;; entered directory.
  99. ;;
  100. ;; * Procedure: (ftw filename proc . options)
  101. ;; Do a filesystem tree walk starting at FILENAME using PROC.
  102. ;;
  103. ;; The `ftw' procedure calls the callback procedure given in the
  104. ;; parameter PROC for every item which is found in the directory
  105. ;; specified by FILENAME and all directories below. The procedure
  106. ;; follows symbolic links if necessary but does not process an item
  107. ;; twice. If FILENAME names no directory this item is the only
  108. ;; object reported by calling the callback procedure.
  109. ;;
  110. ;; The filename given to the callback procedure is constructed by
  111. ;; taking the FILENAME parameter and appending the names of all
  112. ;; passed directories and then the local file name. So the
  113. ;; callback procedure can use this parameter to access the file.
  114. ;; Before the callback procedure is called `ftw' calls `stat' for
  115. ;; this file and passes the information up to the callback
  116. ;; procedure. If this `stat' call was not successful the failure is
  117. ;; indicated by setting the flag argument of the callback procedure
  118. ;; to `invalid-stat'. Otherwise the flag is set according to the
  119. ;; description given in the description of `__ftw_proc_t' above.
  120. ;;
  121. ;; The callback procedure is expected to return non-#f to indicate
  122. ;; that no error occurred and the processing should be continued.
  123. ;; If an error occurred in the callback procedure or the call to
  124. ;; `ftw' shall return immediately the callback procedure can return
  125. ;; #f. This is the only correct way to stop the procedure. The
  126. ;; program must not use `throw' or similar techniques to continue
  127. ;; the program in another place. [Can we relax this? --ttn]
  128. ;;
  129. ;; The return value of the `ftw' procedure is #t if all callback
  130. ;; procedure calls returned #t and all actions performed by the
  131. ;; `ftw' succeeded. If some procedure call failed (other than
  132. ;; calling `stat' on an item) the procedure returns #f. If a
  133. ;; callback procedure returns a value other than #t this value is
  134. ;; returned as the return value of `ftw'.
  135. ;;
  136. ;; * Procedure: (nftw filename proc . control-flags)
  137. ;; Do a new-style filesystem tree walk starting at FILENAME using PROC.
  138. ;; Various optional CONTROL-FLAGS alter the default behavior.
  139. ;;
  140. ;; The `nftw' procedures works like the `ftw' procedures. It calls
  141. ;; the callback procedure PROC for all items it finds in the
  142. ;; directory FILENAME and below.
  143. ;;
  144. ;; The differences are that for one the callback procedure is of a
  145. ;; different type. It takes also `base' and `level' parameters as
  146. ;; described above.
  147. ;;
  148. ;; The second difference is that `nftw' takes additional optional
  149. ;; arguments which are zero or more of the following symbols:
  150. ;;
  151. ;; physical'
  152. ;; While traversing the directory symbolic links are not
  153. ;; followed. I.e., if this flag is given symbolic links are
  154. ;; reported using the `symlink' value for the type parameter
  155. ;; to the callback procedure. Please note that if this flag is
  156. ;; used the appearance of `symlink' in a callback procedure
  157. ;; does not mean the referenced file does not exist. To
  158. ;; indicate this the extra value `stale-symlink' exists.
  159. ;;
  160. ;; mount'
  161. ;; The callback procedure is only called for items which are on
  162. ;; the same mounted filesystem as the directory given as the
  163. ;; FILENAME parameter to `nftw'.
  164. ;;
  165. ;; chdir'
  166. ;; If this flag is given the current working directory is
  167. ;; changed to the directory containing the reported object
  168. ;; before the callback procedure is called.
  169. ;;
  170. ;; depth'
  171. ;; If this option is given the procedure visits first all files
  172. ;; and subdirectories before the callback procedure is called
  173. ;; for the directory itself (depth-first processing). This
  174. ;; also means the type flag given to the callback procedure is
  175. ;; `directory-processed' and not `directory'.
  176. ;;
  177. ;; The return value is computed in the same way as for `ftw'.
  178. ;; `nftw' returns #t if no failure occurred in `nftw' and all
  179. ;; callback procedure call return values are also #t. For internal
  180. ;; errors such as memory problems the error `ftw-error' is thrown.
  181. ;; If the return value of a callback invocation is not #t this
  182. ;; very same value is returned.
  183. ;;; Code:
  184. (define-module (ice-9 ftw)
  185. :export (ftw nftw))
  186. (define (directory-files dir)
  187. (let ((dir-stream (opendir dir)))
  188. (let loop ((new (readdir dir-stream))
  189. (acc '()))
  190. (if (eof-object? new)
  191. (begin
  192. (closedir dir-stream)
  193. acc)
  194. (loop (readdir dir-stream)
  195. (if (or (string=? "." new) ;;; ignore
  196. (string=? ".." new)) ;;; ignore
  197. acc
  198. (cons new acc)))))))
  199. (define (pathify . nodes)
  200. (let loop ((nodes nodes)
  201. (result ""))
  202. (if (null? nodes)
  203. (or (and (string=? "" result) "")
  204. (substring result 1 (string-length result)))
  205. (loop (cdr nodes) (string-append result "/" (car nodes))))))
  206. (define (abs? filename)
  207. (char=? #\/ (string-ref filename 0)))
  208. ;; `visited?-proc' returns a test procedure VISITED? which when called as
  209. ;; (VISITED? stat-obj) returns #f the first time a distinct file is seen,
  210. ;; then #t on any subsequent sighting of it.
  211. ;;
  212. ;; stat:dev and stat:ino together uniquely identify a file (see "Attribute
  213. ;; Meanings" in the glibc manual). Often there'll be just one dev, and
  214. ;; usually there's just a handful mounted, so the strategy here is a small
  215. ;; hash table indexed by dev, containing hash tables indexed by ino.
  216. ;;
  217. ;; It'd be possible to make a pair (dev . ino) and use that as the key to a
  218. ;; single hash table. It'd use an extra pair for every file visited, but
  219. ;; might be a little faster if it meant less scheme code.
  220. ;;
  221. (define (visited?-proc size)
  222. (let ((dev-hash (make-hash-table 7)))
  223. (lambda (s)
  224. (and s
  225. (let ((ino-hash (hashv-ref dev-hash (stat:dev s)))
  226. (ino (stat:ino s)))
  227. (or ino-hash
  228. (begin
  229. (set! ino-hash (make-hash-table size))
  230. (hashv-set! dev-hash (stat:dev s) ino-hash)))
  231. (or (hashv-ref ino-hash ino)
  232. (begin
  233. (hashv-set! ino-hash ino #t)
  234. #f)))))))
  235. (define (stat-dir-readable?-proc uid gid)
  236. (let ((uid (getuid))
  237. (gid (getgid)))
  238. (lambda (s)
  239. (let* ((perms (stat:perms s))
  240. (perms-bit-set? (lambda (mask)
  241. (not (= 0 (logand mask perms))))))
  242. (or (and (= uid (stat:uid s))
  243. (perms-bit-set? #o400))
  244. (and (= gid (stat:gid s))
  245. (perms-bit-set? #o040))
  246. (perms-bit-set? #o004))))))
  247. (define (stat&flag-proc dir-readable? . control-flags)
  248. (let* ((directory-flag (if (memq 'depth control-flags)
  249. 'directory-processed
  250. 'directory))
  251. (stale-symlink-flag (if (memq 'nftw-style control-flags)
  252. 'stale-symlink
  253. 'symlink))
  254. (physical? (memq 'physical control-flags))
  255. (easy-flag (lambda (s)
  256. (let ((type (stat:type s)))
  257. (if (eq? 'directory type)
  258. (if (dir-readable? s)
  259. directory-flag
  260. 'directory-not-readable)
  261. 'regular)))))
  262. (lambda (name)
  263. (let ((s (false-if-exception (lstat name))))
  264. (cond ((not s)
  265. (values s 'invalid-stat))
  266. ((eq? 'symlink (stat:type s))
  267. (let ((s-follow (false-if-exception (stat name))))
  268. (cond ((not s-follow)
  269. (values s stale-symlink-flag))
  270. ((and s-follow physical?)
  271. (values s 'symlink))
  272. ((and s-follow (not physical?))
  273. (values s-follow (easy-flag s-follow))))))
  274. (else (values s (easy-flag s))))))))
  275. (define (clean name)
  276. (let ((last-char-index (1- (string-length name))))
  277. (if (char=? #\/ (string-ref name last-char-index))
  278. (substring name 0 last-char-index)
  279. name)))
  280. (define (ftw filename proc . options)
  281. (let* ((visited? (visited?-proc (cond ((memq 'hash-size options) => cadr)
  282. (else 211))))
  283. (stat&flag (stat&flag-proc
  284. (stat-dir-readable?-proc (getuid) (getgid)))))
  285. (letrec ((go (lambda (fullname)
  286. (call-with-values (lambda () (stat&flag fullname))
  287. (lambda (s flag)
  288. (or (visited? s)
  289. (let ((ret (proc fullname s flag))) ; callback
  290. (or (eq? #t ret)
  291. (throw 'ftw-early-exit ret))
  292. (and (eq? 'directory flag)
  293. (for-each
  294. (lambda (child)
  295. (go (pathify fullname child)))
  296. (directory-files fullname)))
  297. #t)))))))
  298. (catch 'ftw-early-exit
  299. (lambda () (go (clean filename)))
  300. (lambda (key val) val)))))
  301. (define (nftw filename proc . control-flags)
  302. (let* ((od (getcwd)) ; orig dir
  303. (odev (let ((s (false-if-exception (lstat filename))))
  304. (if s (stat:dev s) -1)))
  305. (same-dev? (if (memq 'mount control-flags)
  306. (lambda (s) (= (stat:dev s) odev))
  307. (lambda (s) #t)))
  308. (base-sub (lambda (name base) (substring name 0 base)))
  309. (maybe-cd (if (memq 'chdir control-flags)
  310. (if (abs? filename)
  311. (lambda (fullname base)
  312. (or (= 0 base)
  313. (chdir (base-sub fullname base))))
  314. (lambda (fullname base)
  315. (chdir
  316. (pathify od (base-sub fullname base)))))
  317. (lambda (fullname base) #t)))
  318. (maybe-cd-back (if (memq 'chdir control-flags)
  319. (lambda () (chdir od))
  320. (lambda () #t)))
  321. (depth-first? (memq 'depth control-flags))
  322. (visited? (visited?-proc
  323. (cond ((memq 'hash-size control-flags) => cadr)
  324. (else 211))))
  325. (has-kids? (if depth-first?
  326. (lambda (flag) (eq? flag 'directory-processed))
  327. (lambda (flag) (eq? flag 'directory))))
  328. (stat&flag (apply stat&flag-proc
  329. (stat-dir-readable?-proc (getuid) (getgid))
  330. (cons 'nftw-style control-flags))))
  331. (letrec ((go (lambda (fullname base level)
  332. (call-with-values (lambda () (stat&flag fullname))
  333. (lambda (s flag)
  334. (letrec ((self (lambda ()
  335. (maybe-cd fullname base)
  336. ;; the callback
  337. (let ((ret (proc fullname s flag
  338. base level)))
  339. (maybe-cd-back)
  340. (or (eq? #t ret)
  341. (throw 'nftw-early-exit ret)))))
  342. (kids (lambda ()
  343. (and (has-kids? flag)
  344. (for-each
  345. (lambda (child)
  346. (go (pathify fullname child)
  347. (1+ (string-length
  348. fullname))
  349. (1+ level)))
  350. (directory-files fullname))))))
  351. (or (visited? s)
  352. (not (same-dev? s))
  353. (if depth-first?
  354. (begin (kids) (self))
  355. (begin (self) (kids)))))))
  356. #t)))
  357. (let ((ret (catch 'nftw-early-exit
  358. (lambda () (go (clean filename) 0 0))
  359. (lambda (key val) val))))
  360. (chdir od)
  361. ret))))
  362. ;;; ftw.scm ends here