man-db.scm 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2017, 2018 Ludovic Courtès <ludo@gnu.org>
  3. ;;;
  4. ;;; This file is part of GNU Guix.
  5. ;;;
  6. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  7. ;;; under the terms of the GNU General Public License as published by
  8. ;;; the Free Software Foundation; either version 3 of the License, or (at
  9. ;;; your option) any later version.
  10. ;;;
  11. ;;; GNU Guix is distributed in the hope that it will be useful, but
  12. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. ;;; GNU General Public License for more details.
  15. ;;;
  16. ;;; You should have received a copy of the GNU General Public License
  17. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  18. (define-module (guix man-db)
  19. #:use-module (zlib)
  20. #:use-module ((guix build utils) #:select (find-files))
  21. #:use-module (gdbm) ;gdbm-ffi
  22. #:use-module (srfi srfi-9)
  23. #:use-module (srfi srfi-26)
  24. #:use-module (ice-9 match)
  25. #:use-module (ice-9 rdelim)
  26. #:use-module (ice-9 regex)
  27. #:export (mandb-entry?
  28. mandb-entry-file-name
  29. mandb-entry-name
  30. mandb-entry-section
  31. mandb-entry-synopsis
  32. mandb-entry-kind
  33. mandb-entries
  34. write-mandb-database))
  35. ;;; Comment:
  36. ;;;
  37. ;;; Scan gzipped man pages and create a man-db database. The database is
  38. ;;; meant to be used by 'man -k KEYWORD'.
  39. ;;;
  40. ;;; The implementation here aims to be simpler than that of 'man-db', and to
  41. ;;; produce deterministic output. See <https://bugs.gnu.org/29654>.
  42. ;;;
  43. ;;; Code:
  44. (define-record-type <mandb-entry>
  45. (mandb-entry file-name name section synopsis kind)
  46. mandb-entry?
  47. (file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz"
  48. (name mandb-entry-name) ;e.g., "ABIWORD"
  49. (section mandb-entry-section) ;number
  50. (synopsis mandb-entry-synopsis) ;string
  51. (kind mandb-entry-kind)) ;'ultimate | 'link
  52. (define (mandb-entry<? entry1 entry2)
  53. (match entry1
  54. (($ <mandb-entry> file1 name1 section1)
  55. (match entry2
  56. (($ <mandb-entry> file2 name2 section2)
  57. (or (< section1 section2)
  58. (string<? (basename file1) (basename file2))))))))
  59. (define abbreviate-file-name
  60. (let ((man-file-rx (make-regexp "(.+)\\.[0-9][a-z]?(\\.gz)?$")))
  61. (lambda (file)
  62. (match (regexp-exec man-file-rx (basename file))
  63. (#f
  64. (basename file))
  65. (matches
  66. (match:substring matches 1))))))
  67. (define (entry->string entry)
  68. "Return the wire format for ENTRY as a string."
  69. (match entry
  70. (($ <mandb-entry> file name section synopsis kind)
  71. ;; See db_store.c:make_content in man-db for the format.
  72. (string-append (abbreviate-file-name file) "\t"
  73. (number->string section) "\t"
  74. (number->string section)
  75. ;; Timestamp that we always set to the epoch.
  76. "\t0\t0"
  77. ;; See "db_storage.h" in man-db for the different kinds.
  78. "\t"
  79. (case kind
  80. ((ultimate) "A") ;ultimate man page
  81. ((link) "B") ;".so" link to other man page
  82. (else "A")) ;something that doesn't matter much
  83. "\t-\t-\t"
  84. (if (string-suffix? ".gz" file) "gz" "")
  85. "\t"
  86. synopsis "\x00"))))
  87. ;; The man-db schema version we're compatible with.
  88. (define %version-key "$version$\x00")
  89. (define %version-value "2.5.0\x00")
  90. (define (write-mandb-database file entries)
  91. "Write ENTRIES to FILE as a man-db database. FILE is usually
  92. \".../index.db\", and is a GDBM database."
  93. (let ((db (gdbm-open file GDBM_WRCREAT)))
  94. (gdbm-set! db %version-key %version-value)
  95. ;; Write ENTRIES in sorted order so we get deterministic output.
  96. (for-each (lambda (entry)
  97. (gdbm-set! db
  98. (string-append (mandb-entry-file-name entry)
  99. "\x00")
  100. (entry->string entry)))
  101. (sort entries mandb-entry<?))
  102. (gdbm-close db)))
  103. (define (read-synopsis port)
  104. "Read from PORT a man page synopsis."
  105. (define (section? line)
  106. ;; True if LINE starts with ".SH", ".PP", or so.
  107. (string-prefix? "." (string-trim line)))
  108. (define (extract-synopsis str)
  109. (match (string-contains str "\\-")
  110. (#f "")
  111. (index
  112. (string-map (match-lambda
  113. (#\newline #\space)
  114. (chr chr))
  115. (string-trim-both (string-drop str (+ 2 index)))))))
  116. ;; Synopses look like "Command \- Do something.", possibly spanning several
  117. ;; lines.
  118. (let loop ((lines '()))
  119. (match (read-line port 'concat)
  120. ((? eof-object?)
  121. (extract-synopsis (string-concatenate-reverse lines)))
  122. ((? section?)
  123. (extract-synopsis (string-concatenate-reverse lines)))
  124. (line
  125. (loop (cons line lines))))))
  126. (define* (man-page->entry file #:optional (resolve identity))
  127. "Parse FILE, a gzipped man page, and return a <mandb-entry> for it."
  128. (define (string->number* str)
  129. (if (and (string-prefix? "\"" str)
  130. (> (string-length str) 1)
  131. (string-suffix? "\"" str))
  132. (string->number (string-drop (string-drop-right str 1) 1))
  133. (string->number str)))
  134. ;; Note: This works for both gzipped and uncompressed files.
  135. (call-with-gzip-input-port (open-file file "r0")
  136. (lambda (port)
  137. (let loop ((name #f)
  138. (section #f)
  139. (synopsis #f)
  140. (kind 'ultimate))
  141. (if (and name section synopsis)
  142. (mandb-entry file name section synopsis kind)
  143. (let ((line (read-line port)))
  144. (if (eof-object? line)
  145. (mandb-entry file name (or section 0) (or synopsis "")
  146. kind)
  147. (match (string-tokenize line)
  148. ((".TH" name (= string->number* section) _ ...)
  149. (loop name section synopsis kind))
  150. ((".SH" (or "NAME" "\"NAME\""))
  151. (loop name section (read-synopsis port) kind))
  152. ((".so" link)
  153. (match (and=> (resolve link)
  154. (cut man-page->entry <> resolve))
  155. (#f
  156. (loop name section synopsis 'link))
  157. (alias
  158. (mandb-entry file
  159. (mandb-entry-name alias)
  160. (mandb-entry-section alias)
  161. (mandb-entry-synopsis alias)
  162. 'link))))
  163. (_
  164. (loop name section synopsis kind))))))))))
  165. (define (man-files directory)
  166. "Return the list of man pages found under DIRECTORY, recursively."
  167. ;; Filter the list to ensure that broken symlinks are excluded.
  168. (filter file-exists? (find-files directory "\\.[0-9][a-z]?(\\.gz)?$")))
  169. (define (mandb-entries directory)
  170. "Return mandb entries for the man pages found under DIRECTORY, recursively."
  171. (map (lambda (file)
  172. (man-page->entry file
  173. (lambda (link)
  174. (let ((file (string-append directory "/" link
  175. ".gz")))
  176. (and (file-exists? file) file)))))
  177. (man-files directory)))