123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- ;;; GNU Guix --- Functional package management for GNU
- ;;; Copyright © 2017, 2018 Ludovic Courtès <ludo@gnu.org>
- ;;;
- ;;; This file is part of GNU Guix.
- ;;;
- ;;; GNU Guix is free software; you can redistribute it and/or modify it
- ;;; under the terms of the GNU General Public License as published by
- ;;; the Free Software Foundation; either version 3 of the License, or (at
- ;;; your option) any later version.
- ;;;
- ;;; GNU Guix is distributed in the hope that it will be useful, but
- ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
- ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;;; GNU General Public License for more details.
- ;;;
- ;;; You should have received a copy of the GNU General Public License
- ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
- (define-module (guix man-db)
- #:use-module (zlib)
- #:use-module ((guix build utils) #:select (find-files))
- #:use-module (gdbm) ;gdbm-ffi
- #:use-module (srfi srfi-9)
- #:use-module (srfi srfi-26)
- #:use-module (ice-9 match)
- #:use-module (ice-9 rdelim)
- #:use-module (ice-9 regex)
- #:export (mandb-entry?
- mandb-entry-file-name
- mandb-entry-name
- mandb-entry-section
- mandb-entry-synopsis
- mandb-entry-kind
- mandb-entries
- write-mandb-database))
- ;;; Comment:
- ;;;
- ;;; Scan gzipped man pages and create a man-db database. The database is
- ;;; meant to be used by 'man -k KEYWORD'.
- ;;;
- ;;; The implementation here aims to be simpler than that of 'man-db', and to
- ;;; produce deterministic output. See <https://bugs.gnu.org/29654>.
- ;;;
- ;;; Code:
- (define-record-type <mandb-entry>
- (mandb-entry file-name name section synopsis kind)
- mandb-entry?
- (file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz"
- (name mandb-entry-name) ;e.g., "ABIWORD"
- (section mandb-entry-section) ;number
- (synopsis mandb-entry-synopsis) ;string
- (kind mandb-entry-kind)) ;'ultimate | 'link
- (define (mandb-entry<? entry1 entry2)
- (match entry1
- (($ <mandb-entry> file1 name1 section1)
- (match entry2
- (($ <mandb-entry> file2 name2 section2)
- (or (< section1 section2)
- (string<? (basename file1) (basename file2))))))))
- (define abbreviate-file-name
- (let ((man-file-rx (make-regexp "(.+)\\.[0-9][a-z]?(\\.gz)?$")))
- (lambda (file)
- (match (regexp-exec man-file-rx (basename file))
- (#f
- (basename file))
- (matches
- (match:substring matches 1))))))
- (define (entry->string entry)
- "Return the wire format for ENTRY as a string."
- (match entry
- (($ <mandb-entry> file name section synopsis kind)
- ;; See db_store.c:make_content in man-db for the format.
- (string-append (abbreviate-file-name file) "\t"
- (number->string section) "\t"
- (number->string section)
- ;; Timestamp that we always set to the epoch.
- "\t0\t0"
- ;; See "db_storage.h" in man-db for the different kinds.
- "\t"
- (case kind
- ((ultimate) "A") ;ultimate man page
- ((link) "B") ;".so" link to other man page
- (else "A")) ;something that doesn't matter much
- "\t-\t-\t"
- (if (string-suffix? ".gz" file) "gz" "")
- "\t"
- synopsis "\x00"))))
- ;; The man-db schema version we're compatible with.
- (define %version-key "$version$\x00")
- (define %version-value "2.5.0\x00")
- (define (write-mandb-database file entries)
- "Write ENTRIES to FILE as a man-db database. FILE is usually
- \".../index.db\", and is a GDBM database."
- (let ((db (gdbm-open file GDBM_WRCREAT)))
- (gdbm-set! db %version-key %version-value)
- ;; Write ENTRIES in sorted order so we get deterministic output.
- (for-each (lambda (entry)
- (gdbm-set! db
- ;; For the 'whatis' tool to find anything, the key
- ;; should match the name of the software,
- ;; e.g. 'cat'. Derive it from the file name, as
- ;; the name could technically be #f.
- (string-append (abbreviate-file-name
- (mandb-entry-file-name entry))
- "\x00")
- (entry->string entry)))
- (sort entries mandb-entry<?))
- (gdbm-close db)))
- (define (read-synopsis port)
- "Read from PORT a man page synopsis."
- (define (section? line)
- ;; True if LINE starts with ".SH", ".PP", or so.
- (string-prefix? "." (string-trim line)))
- (define (extract-synopsis str)
- (match (string-contains str "\\-")
- (#f "")
- (index
- (string-map (match-lambda
- (#\newline #\space)
- (chr chr))
- (string-trim-both (string-drop str (+ 2 index)))))))
- ;; Synopses look like "Command \- Do something.", possibly spanning several
- ;; lines.
- (let loop ((lines '()))
- (match (read-line port 'concat)
- ((? eof-object?)
- (extract-synopsis (string-concatenate-reverse lines)))
- ((? section?)
- (extract-synopsis (string-concatenate-reverse lines)))
- (line
- (loop (cons line lines))))))
- (define* (man-page->entry file #:optional (resolve identity))
- "Parse FILE, a gzipped man page, and return a <mandb-entry> for it."
- (define (string->number* str)
- (if (and (string-prefix? "\"" str)
- (> (string-length str) 1)
- (string-suffix? "\"" str))
- (string->number (string-drop (string-drop-right str 1) 1))
- (string->number str)))
- ;; Note: This works for both gzipped and uncompressed files.
- (call-with-gzip-input-port (open-file file "r0")
- (lambda (port)
- (let loop ((name #f)
- (section #f)
- (synopsis #f)
- (kind 'ultimate))
- (if (and name section synopsis)
- (mandb-entry file name section synopsis kind)
- (let ((line (read-line port)))
- (if (eof-object? line)
- (mandb-entry file name (or section 0) (or synopsis "")
- kind)
- (match (string-tokenize line)
- ((".TH" name (= string->number* section) _ ...)
- (loop name section synopsis kind))
- ((".SH" (or "NAME" "\"NAME\""))
- (loop name section (read-synopsis port) kind))
- ((".so" link)
- (match (and=> (resolve link)
- (cut man-page->entry <> resolve))
- (#f
- (loop name section synopsis 'link))
- (alias
- (mandb-entry file
- (mandb-entry-name alias)
- (mandb-entry-section alias)
- (mandb-entry-synopsis alias)
- 'link))))
- (_
- (loop name section synopsis kind))))))))))
- (define (man-files directory)
- "Return the list of man pages found under DIRECTORY, recursively."
- ;; Filter the list to ensure that broken symlinks are excluded.
- (filter file-exists? (find-files directory "\\.[0-9][a-z]?(\\.gz)?$")))
- (define (mandb-entries directory)
- "Return mandb entries for the man pages found under DIRECTORY, recursively."
- (map (lambda (file)
- (man-page->entry file
- (lambda (link)
- (let ((file (string-append directory "/" link
- ".gz")))
- (and (file-exists? file) file)))))
- (man-files directory)))
|