man-db.scm 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2017, 2018 Ludovic Courtès <ludo@gnu.org>
  3. ;;;
  4. ;;; This file is part of GNU Guix.
  5. ;;;
  6. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  7. ;;; under the terms of the GNU General Public License as published by
  8. ;;; the Free Software Foundation; either version 3 of the License, or (at
  9. ;;; your option) any later version.
  10. ;;;
  11. ;;; GNU Guix is distributed in the hope that it will be useful, but
  12. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. ;;; GNU General Public License for more details.
  15. ;;;
  16. ;;; You should have received a copy of the GNU General Public License
  17. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  18. (define-module (guix man-db)
  19. #:use-module (zlib)
  20. #:use-module ((guix build utils) #:select (find-files))
  21. #:use-module (gdbm) ;gdbm-ffi
  22. #:use-module (srfi srfi-9)
  23. #:use-module (srfi srfi-26)
  24. #:use-module (ice-9 match)
  25. #:use-module (ice-9 rdelim)
  26. #:use-module (ice-9 regex)
  27. #:export (mandb-entry?
  28. mandb-entry-file-name
  29. mandb-entry-name
  30. mandb-entry-section
  31. mandb-entry-synopsis
  32. mandb-entry-kind
  33. mandb-entries
  34. write-mandb-database))
  35. ;;; Comment:
  36. ;;;
  37. ;;; Scan gzipped man pages and create a man-db database. The database is
  38. ;;; meant to be used by 'man -k KEYWORD'.
  39. ;;;
  40. ;;; The implementation here aims to be simpler than that of 'man-db', and to
  41. ;;; produce deterministic output. See <https://bugs.gnu.org/29654>.
  42. ;;;
  43. ;;; Code:
  44. (define-record-type <mandb-entry>
  45. (mandb-entry file-name name section synopsis kind)
  46. mandb-entry?
  47. (file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz"
  48. (name mandb-entry-name) ;e.g., "ABIWORD"
  49. (section mandb-entry-section) ;number
  50. (synopsis mandb-entry-synopsis) ;string
  51. (kind mandb-entry-kind)) ;'ultimate | 'link
  52. (define (mandb-entry<? entry1 entry2)
  53. (match entry1
  54. (($ <mandb-entry> file1 name1 section1)
  55. (match entry2
  56. (($ <mandb-entry> file2 name2 section2)
  57. (or (< section1 section2)
  58. (string<? (basename file1) (basename file2))))))))
  59. (define abbreviate-file-name
  60. (let ((man-file-rx (make-regexp "(.+)\\.[0-9][a-z]?(\\.gz)?$")))
  61. (lambda (file)
  62. (match (regexp-exec man-file-rx (basename file))
  63. (#f
  64. (basename file))
  65. (matches
  66. (match:substring matches 1))))))
  67. (define (entry->string entry)
  68. "Return the wire format for ENTRY as a string."
  69. (match entry
  70. (($ <mandb-entry> file name section synopsis kind)
  71. ;; See db_store.c:make_content in man-db for the format.
  72. (string-append (abbreviate-file-name file) "\t"
  73. (number->string section) "\t"
  74. (number->string section)
  75. ;; Timestamp that we always set to the epoch.
  76. "\t0\t0"
  77. ;; See "db_storage.h" in man-db for the different kinds.
  78. "\t"
  79. (case kind
  80. ((ultimate) "A") ;ultimate man page
  81. ((link) "B") ;".so" link to other man page
  82. (else "A")) ;something that doesn't matter much
  83. "\t-\t-\t"
  84. (if (string-suffix? ".gz" file) "gz" "")
  85. "\t"
  86. synopsis "\x00"))))
  87. ;; The man-db schema version we're compatible with.
  88. (define %version-key "$version$\x00")
  89. (define %version-value "2.5.0\x00")
  90. (define (write-mandb-database file entries)
  91. "Write ENTRIES to FILE as a man-db database. FILE is usually
  92. \".../index.db\", and is a GDBM database."
  93. (let ((db (gdbm-open file GDBM_WRCREAT)))
  94. (gdbm-set! db %version-key %version-value)
  95. ;; Write ENTRIES in sorted order so we get deterministic output.
  96. (for-each (lambda (entry)
  97. (gdbm-set! db
  98. ;; For the 'whatis' tool to find anything, the key
  99. ;; should match the name of the software,
  100. ;; e.g. 'cat'. Derive it from the file name, as
  101. ;; the name could technically be #f.
  102. (string-append (abbreviate-file-name
  103. (mandb-entry-file-name entry))
  104. "\x00")
  105. (entry->string entry)))
  106. (sort entries mandb-entry<?))
  107. (gdbm-close db)))
  108. (define (read-synopsis port)
  109. "Read from PORT a man page synopsis."
  110. (define (section? line)
  111. ;; True if LINE starts with ".SH", ".PP", or so.
  112. (string-prefix? "." (string-trim line)))
  113. (define (extract-synopsis str)
  114. (match (string-contains str "\\-")
  115. (#f "")
  116. (index
  117. (string-map (match-lambda
  118. (#\newline #\space)
  119. (chr chr))
  120. (string-trim-both (string-drop str (+ 2 index)))))))
  121. ;; Synopses look like "Command \- Do something.", possibly spanning several
  122. ;; lines.
  123. (let loop ((lines '()))
  124. (match (read-line port 'concat)
  125. ((? eof-object?)
  126. (extract-synopsis (string-concatenate-reverse lines)))
  127. ((? section?)
  128. (extract-synopsis (string-concatenate-reverse lines)))
  129. (line
  130. (loop (cons line lines))))))
  131. (define* (man-page->entry file #:optional (resolve identity))
  132. "Parse FILE, a gzipped man page, and return a <mandb-entry> for it."
  133. (define (string->number* str)
  134. (if (and (string-prefix? "\"" str)
  135. (> (string-length str) 1)
  136. (string-suffix? "\"" str))
  137. (string->number (string-drop (string-drop-right str 1) 1))
  138. (string->number str)))
  139. ;; Note: This works for both gzipped and uncompressed files.
  140. (call-with-gzip-input-port (open-file file "r0")
  141. (lambda (port)
  142. (let loop ((name #f)
  143. (section #f)
  144. (synopsis #f)
  145. (kind 'ultimate))
  146. (if (and name section synopsis)
  147. (mandb-entry file name section synopsis kind)
  148. (let ((line (read-line port)))
  149. (if (eof-object? line)
  150. (mandb-entry file name (or section 0) (or synopsis "")
  151. kind)
  152. (match (string-tokenize line)
  153. ((".TH" name (= string->number* section) _ ...)
  154. (loop name section synopsis kind))
  155. ((".SH" (or "NAME" "\"NAME\""))
  156. (loop name section (read-synopsis port) kind))
  157. ((".so" link)
  158. (match (and=> (resolve link)
  159. (cut man-page->entry <> resolve))
  160. (#f
  161. (loop name section synopsis 'link))
  162. (alias
  163. (mandb-entry file
  164. (mandb-entry-name alias)
  165. (mandb-entry-section alias)
  166. (mandb-entry-synopsis alias)
  167. 'link))))
  168. (_
  169. (loop name section synopsis kind))))))))))
  170. (define (man-files directory)
  171. "Return the list of man pages found under DIRECTORY, recursively."
  172. ;; Filter the list to ensure that broken symlinks are excluded.
  173. (filter file-exists? (find-files directory "\\.[0-9][a-z]?(\\.gz)?$")))
  174. (define (mandb-entries directory)
  175. "Return mandb entries for the man pages found under DIRECTORY, recursively."
  176. (map (lambda (file)
  177. (man-page->entry file
  178. (lambda (link)
  179. (let ((file (string-append directory "/" link
  180. ".gz")))
  181. (and (file-exists? file) file)))))
  182. (man-files directory)))