search.scm 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2014 Mark H Weaver <mhw@netris.org>
  3. ;;; Copyright © 2015, 2016 Eric Bavier <bavier@member.fsf.org>
  4. ;;; Copyright © 2017 Thomas Danckaert <post@thomasdanckaert.be>
  5. ;;; Copyright © 2017 Ricardo Wurmus <rekado@elephly.net>
  6. ;;; Copyright © 2018, 2020, 2021 Tobias Geerinckx-Rice <me@tobias.gr>
  7. ;;; Copyright © 2018 Adam Massmann <massmannak@gmail.com>
  8. ;;; Copyright © 2020 Hartmut Goebel <h.goebel@crazy-compilers.com>
  9. ;;; Copyright © 2021 Vinicius Monego <monego@posteo.net>
  10. ;;;
  11. ;;; This file is part of GNU Guix.
  12. ;;;
  13. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  14. ;;; under the terms of the GNU General Public License as published by
  15. ;;; the Free Software Foundation; either version 3 of the License, or (at
  16. ;;; your option) any later version.
  17. ;;;
  18. ;;; GNU Guix is distributed in the hope that it will be useful, but
  19. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  20. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. ;;; GNU General Public License for more details.
  22. ;;;
  23. ;;; You should have received a copy of the GNU General Public License
  24. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  25. (define-module (gnu packages search)
  26. #:use-module ((guix licenses)
  27. #:select (gpl2 gpl2+ gpl3+ agpl3+ lgpl2.1+ bsd-3 x11 perl-license))
  28. #:use-module (guix packages)
  29. #:use-module (guix download)
  30. #:use-module (guix git-download)
  31. #:use-module (guix utils)
  32. #:use-module (guix build-system gnu)
  33. #:use-module (guix build-system perl)
  34. #:use-module (guix build-system python)
  35. #:use-module (gnu packages)
  36. #:use-module (gnu packages compression)
  37. #:use-module (gnu packages check)
  38. #:use-module (gnu packages databases)
  39. #:use-module (gnu packages freedesktop)
  40. #:use-module (gnu packages less)
  41. #:use-module (gnu packages linux)
  42. #:use-module (gnu packages pcre)
  43. #:use-module (gnu packages perl)
  44. #:use-module (gnu packages pdf)
  45. #:use-module (gnu packages python)
  46. #:use-module (gnu packages python-crypto)
  47. #:use-module (gnu packages python-web)
  48. #:use-module (gnu packages python-xyz)
  49. #:use-module (gnu packages sphinx)
  50. #:use-module (gnu packages time)
  51. #:use-module (gnu packages web)
  52. #:use-module (gnu packages xdisorg)
  53. #:use-module (gnu packages xml))
  54. (define-public xapian
  55. (package
  56. (name "xapian")
  57. (version "1.4.18")
  58. ;; Note: When updating Xapian, remember to update xapian-bindings below.
  59. (source (origin
  60. (method url-fetch)
  61. (uri (string-append "https://oligarchy.co.uk/xapian/" version
  62. "/xapian-core-" version ".tar.xz"))
  63. (sha256
  64. (base32 "0xsb4ihf3p767f0zx9p4janwni6r9sg5j6lry0002i8hmnsdnv8r"))))
  65. (build-system gnu-build-system)
  66. (inputs `(("zlib" ,zlib)
  67. ("util-linux" ,util-linux "lib")))
  68. (arguments
  69. `(#:phases
  70. (modify-phases %standard-phases
  71. (replace 'check
  72. ;; As of Xapian 1.3.3, the TCP server implementation uses
  73. ;; getaddrinfo(). This does not work in the build environment,
  74. ;; so exclude those tests. See HACKING for the list of targets.
  75. (lambda _
  76. (invoke "make"
  77. "check-inmemory"
  78. "check-remoteprog"
  79. ;"check-remotetcp"
  80. "check-multi"
  81. "check-glass"
  82. "check-chert"))))))
  83. (synopsis "Search Engine Library")
  84. (description
  85. "Xapian is a highly adaptable toolkit which allows developers to easily
  86. add advanced indexing and search facilities to their own applications. It
  87. supports the Probabilistic Information Retrieval model and also supports a
  88. rich set of boolean query operators.")
  89. (home-page "https://xapian.org/")
  90. (license (list gpl2+ bsd-3 x11))))
  91. (define-public python-xapian-bindings
  92. (package (inherit xapian)
  93. (name "python-xapian-bindings")
  94. (version (package-version xapian))
  95. (source (origin
  96. (method url-fetch)
  97. (uri (string-append "https://oligarchy.co.uk/xapian/" version
  98. "/xapian-bindings-" version ".tar.xz"))
  99. (sha256
  100. (base32
  101. "13ziql8027glgihgvnbsa75vkcn82g83mbihj60zf0njj170clpy"))))
  102. (build-system gnu-build-system)
  103. (arguments
  104. `(#:configure-flags '("--with-python3")
  105. #:make-flags
  106. (list (string-append "pkgpylibdir="
  107. (assoc-ref %outputs "out")
  108. "/lib/python" ,(version-major+minor
  109. (package-version python))
  110. "/site-packages/xapian"))))
  111. (native-inputs
  112. `(("python-sphinx" ,python-sphinx))) ;for documentation
  113. (inputs
  114. `(("python" ,python)
  115. ("xapian" ,xapian)
  116. ("zlib" ,zlib)))
  117. (synopsis "Python bindings for the Xapian search engine library")
  118. (license gpl2+)))
  119. (define-public perl-search-xapian
  120. (package
  121. (name "perl-search-xapian")
  122. (version "1.2.25.4")
  123. (source
  124. (origin
  125. (method url-fetch)
  126. (uri (string-append "mirror://cpan/authors/id/O/OL/OLLY/"
  127. "Search-Xapian-" version ".tar.gz"))
  128. (sha256
  129. (base32 "1pbl8pbgmbs3i8yik4p63g4pd9bhn0dp3d7l667dkvw0kccl66c7"))))
  130. (build-system perl-build-system)
  131. (native-inputs
  132. `(("perl-devel-leak" ,perl-devel-leak)))
  133. (inputs
  134. `(("xapian" ,xapian)))
  135. (home-page "https://metacpan.org/release/Search-Xapian")
  136. (synopsis "Perl XS frontend to the Xapian C++ search library")
  137. (description
  138. "Search::Xapian wraps most methods of most Xapian classes. The missing
  139. classes and methods should be added in the future. It also provides a
  140. simplified, more 'perlish' interface to some common operations.")
  141. (license perl-license)))
  142. (define-public libtocc
  143. (package
  144. (name "libtocc")
  145. (version "1.0.1")
  146. (source
  147. (origin
  148. (method url-fetch)
  149. (uri (string-append "https://github.com/aidin36/tocc/releases/download/"
  150. "v" version "/tocc-" version ".tar.gz"))
  151. (sha256
  152. (base32
  153. "1kd2jd74m8ksc8s7hh0haz0q0c3n0mr39bbky262kk4l58f1g068"))))
  154. (build-system gnu-build-system)
  155. (native-inputs `(("catch" ,catch-framework)))
  156. (inputs `(("unqlite" ,unqlite)))
  157. (arguments
  158. `(#:phases (modify-phases %standard-phases
  159. (add-before 'configure 'chdir-source
  160. (lambda _
  161. (chdir "libtocc/src")
  162. #t))
  163. (replace 'check
  164. (lambda _
  165. (with-directory-excursion "../tests"
  166. (invoke "./configure"
  167. (string-append "CONFIG_SHELL="
  168. (which "sh"))
  169. (string-append "SHELL="
  170. (which "sh"))
  171. "CPPFLAGS=-I../src"
  172. (string-append
  173. "LDFLAGS=-L../src/.libs "
  174. "-Wl,-rpath=../src/.libs"))
  175. (invoke "make")
  176. (invoke "./libtocctests")))))))
  177. (home-page "https://t-o-c-c.com/")
  178. (synopsis "Tool for Obsessive Compulsive Classifiers")
  179. (description
  180. "libtocc is the engine of the Tocc project, a tag-based file management
  181. system. The goal of Tocc is to provide a better system for classifying files
  182. that is more flexible than classic file systems that are based on a tree of
  183. files and directories.")
  184. (license gpl3+)))
  185. (define-public tocc
  186. (package
  187. (name "tocc")
  188. (version (package-version libtocc))
  189. (source (package-source libtocc))
  190. (build-system gnu-build-system)
  191. (inputs
  192. `(("libtocc" ,libtocc)
  193. ("unqlite" ,unqlite)))
  194. (arguments
  195. `(#:tests? #f ;No tests
  196. #:phases (modify-phases %standard-phases
  197. (add-after
  198. 'unpack 'chdir-source
  199. (lambda _ (chdir "cli/src"))))))
  200. (home-page "https://t-o-c-c.com/")
  201. (synopsis "Command-line interface to libtocc")
  202. (description
  203. "Tocc is a tag-based file management system. This package contains the
  204. command line tool for interacting with libtocc.")
  205. (license gpl3+)))
  206. (define-public searx
  207. (package
  208. (name "searx")
  209. (version "1.0.0")
  210. (source
  211. (origin
  212. (method git-fetch)
  213. (uri (git-reference
  214. (url "https://github.com/searx/searx")
  215. (commit version)))
  216. (file-name (git-file-name name version))
  217. (sha256
  218. (base32 "0ghkx8g8jnh8yd46p4mlbjn2zm12nx27v7qflr4c8xhlgi0px0mh"))))
  219. (build-system python-build-system)
  220. (arguments
  221. `(#:tests? #f ;what tests do is make online requests to each engine
  222. #:phases
  223. (modify-phases %standard-phases
  224. (add-after 'unpack 'relax-requirements
  225. (lambda _
  226. ;; These packages are outdated in Guix at the time of packaging.
  227. ;; When they are updated, remove corresponding substitutions.
  228. ;; Tests can run after build with 'searx-checker' tool in /bin.
  229. (substitute* "requirements.txt"
  230. (("flask-babel==2.0.0") "flask-babel>=1.0.0")
  231. (("jinja2==2.11.3") "jinja2>=2.11.2")
  232. (("lxml==4.6.3") "lxml>=4.4.2")
  233. (("pygments==2.8.0") "pygments>=2.7.3")
  234. (("requests\\[socks\\]==2.25.1") "requests>=2.25")
  235. (("==") ">=")))))))
  236. (propagated-inputs
  237. `(("python-babel" ,python-babel)
  238. ("python-certifi" ,python-certifi)
  239. ("python-dateutil" ,python-dateutil)
  240. ("python-flask" ,python-flask)
  241. ("python-flask-babel" ,python-flask-babel)
  242. ("python-idna" ,python-idna)
  243. ("python-jinja2" ,python-jinja2)
  244. ("python-langdetect" ,python-langdetect)
  245. ("python-lxml" ,python-lxml)
  246. ("python-pygments" ,python-pygments)
  247. ("python-pyyaml" ,python-pyyaml)
  248. ("python-requests" ,python-requests)))
  249. (home-page "https://searx.github.io/searx/")
  250. (synopsis "Privacy-respecting metasearch engine")
  251. (description "Searx is a privacy-respecting, hackable metasearch engine.")
  252. (license agpl3+)))
  253. (define-public bool
  254. (package
  255. (name "bool")
  256. (version "0.2.2")
  257. (source
  258. (origin
  259. (method url-fetch)
  260. (uri (string-append "mirror://gnu/bool/bool-"
  261. version ".tar.xz"))
  262. (sha256
  263. (base32
  264. "1frdmgrmb509fxbdpsxxw3lvvwv7xm1pavqrqgm4jg698iix6xfw"))))
  265. (build-system gnu-build-system)
  266. (home-page "https://www.gnu.org/software/bool/")
  267. (synopsis "Finding text and HTML files that match boolean expressions")
  268. (description
  269. "GNU Bool is a utility to perform text searches on files using Boolean
  270. expressions. For example, a search for \"hello AND world\" would return a
  271. file containing the phrase \"Hello, world!\". It supports both AND and OR
  272. statements, as well as the NEAR statement to search for the occurrence of
  273. words in close proximity to each other. It handles context gracefully,
  274. accounting for new lines and paragraph changes. It also has robust support
  275. for parsing HTML files.")
  276. (license gpl3+)))
  277. (define-public hyperestraier
  278. (package
  279. (name "hyperestraier")
  280. (version "1.4.13")
  281. (source
  282. (origin
  283. (method url-fetch)
  284. (uri (string-append "http://fallabs.com/" name "/"
  285. name "-" version ".tar.gz"))
  286. (sha256
  287. (base32
  288. "1qk3pxgzyrpcz5qfyd5xs2hw9q1cbb7j5zd4kp1diq501wcj2vs9"))))
  289. (inputs
  290. `(("qdbm" ,qdbm)
  291. ("zlib" ,zlib)))
  292. (build-system gnu-build-system)
  293. (arguments
  294. `(#:configure-flags (list (string-append "LDFLAGS=-Wl,-rpath="
  295. (assoc-ref %outputs "out")
  296. "/lib"))))
  297. (home-page "https://fallabs.com/hyperestraier")
  298. (synopsis "Full-text search system")
  299. (description "Hyper Estraier can be used to integrate full-text
  300. search into applications, using either the provided command line and CGI
  301. interfaces, or a C API.")
  302. (license lgpl2.1+)))
  303. (define-public mlocate
  304. (package
  305. (name "mlocate")
  306. (version "0.26")
  307. (source (origin
  308. (method url-fetch)
  309. (uri (string-append "http://releases.pagure.org/mlocate/"
  310. "mlocate-" version ".tar.xz"))
  311. (sha256
  312. (base32
  313. "0gi6y52gkakhhlnzy0p6izc36nqhyfx5830qirhvk3qrzrwxyqrh"))))
  314. (build-system gnu-build-system)
  315. (home-page "https://pagure.io/mlocate")
  316. (synopsis "Locate files on the file system")
  317. (description
  318. "mlocate is a locate/updatedb implementation. The @code{m} stands for
  319. \"merging\": @code{updatedb} reuses the existing database to avoid rereading
  320. most of the file system, which makes it faster and does not trash the system
  321. caches as much. The locate(1) utility is intended to be completely compatible
  322. with slocate, and attempts to be compatible to GNU locate when it does not
  323. conflict with slocate compatibility.")
  324. (license gpl2)))
  325. (define-public swish-e
  326. (package
  327. (name "swish-e")
  328. (version "2.4.7")
  329. (source (origin
  330. (method url-fetch)
  331. (uri (list (string-append
  332. "https://web.archive.org/web/20160730145202/"
  333. "http://swish-e.org/distribution/"
  334. "swish-e-" version ".tar.gz")
  335. (string-append "http://http.debian.net/debian/pool/"
  336. "main/s/swish-e/swish-e_" version
  337. ".orig.tar.gz")))
  338. (file-name (string-append name "-" version ".tar.gz"))
  339. (sha256
  340. (base32
  341. "0qkrk7z25yp9hynj21vxkyn7yi8gcagcfxnass5cgczcz0gm9pax"))
  342. (patches (search-patches "swish-e-search.patch"
  343. "swish-e-format-security.patch"))))
  344. (build-system gnu-build-system)
  345. ;; Several other packages and perl modules may be installed alongside
  346. ;; swish-e to extend its features at runtime, but are not required for
  347. ;; building: xpdf, catdoc, MP3::Tag, Spreadsheet::ParseExcel,
  348. ;; HTML::Entities.
  349. (inputs
  350. `(("perl" ,perl)
  351. ("perl-uri" ,perl-uri)
  352. ("perl-html-parser" ,perl-html-parser)
  353. ("perl-html-tagset" ,perl-html-tagset)
  354. ("perl-mime-types" ,perl-mime-types)))
  355. (arguments
  356. `(;; XXX: This fails to build with zlib (API mismatch) and tests fail
  357. ;; with libxml2, so disable both.
  358. #:configure-flags (list (string-append "--without-zlib")
  359. (string-append "--without-libxml2"))
  360. #:phases (modify-phases %standard-phases
  361. (add-after 'install 'wrap-programs
  362. (lambda* (#:key inputs outputs #:allow-other-keys)
  363. (let* ((out (assoc-ref outputs "out")))
  364. (for-each
  365. (lambda (program)
  366. (wrap-program program
  367. `("PERL5LIB" ":" prefix
  368. ,(map (lambda (i)
  369. (string-append (assoc-ref inputs i)
  370. "/lib/perl5/site_perl"))
  371. ;; These perl modules have no propagated
  372. ;; inputs, so no further analysis needed.
  373. '("perl-uri"
  374. "perl-html-parser"
  375. "perl-html-tagset"
  376. "perl-mime-types")))))
  377. (list (string-append out "/lib/swish-e/swishspider")
  378. (string-append out "/bin/swish-filter-test")))
  379. #t))))))
  380. (home-page (string-append "https://web.archive.org/web/20160730145202/"
  381. "http://swish-e.org"))
  382. (synopsis "Web indexing system")
  383. (description
  384. "Swish-e is Simple Web Indexing System for Humans - Enhanced. Swish-e
  385. can quickly and easily index directories of files or remote web sites and
  386. search the generated indexes.")
  387. (license gpl2+))) ;with exception
  388. (define-public xapers
  389. (package
  390. (name "xapers")
  391. (version "0.8.2")
  392. (source
  393. (origin
  394. (method url-fetch)
  395. (uri (string-append
  396. "https://finestructure.net/xapers/releases/xapers-"
  397. version ".tar.gz"))
  398. (sha256
  399. (base32
  400. "0ykz6hn3qj46w3c99d6q0pi5ncq2894simcl7vapv047zm3cylmd"))))
  401. (build-system python-build-system)
  402. (propagated-inputs
  403. `(("poppler" ,poppler)
  404. ("python-urwid" ,python-urwid)
  405. ("xclip" ,xclip)
  406. ("xdg-utils" ,xdg-utils)))
  407. (inputs
  408. `(("python-latexcodec" ,python-latexcodec)
  409. ("python-pybtex" ,python-pybtex)
  410. ("python-pycurl" ,python-pycurl)
  411. ("python-pyyaml" ,python-pyyaml)
  412. ("python-six" ,python-six)
  413. ("python-xapian-bindings" ,python-xapian-bindings)))
  414. (arguments
  415. `(#:modules ((ice-9 rdelim)
  416. (guix build python-build-system)
  417. (guix build utils))
  418. #:phases
  419. (modify-phases %standard-phases
  420. (add-after 'install 'install-doc
  421. (lambda* (#:key inputs outputs #:allow-other-keys)
  422. (define (purge-term-support input output)
  423. (let loop ((line (read-line input)))
  424. (if (string-prefix? "if [[ \"$term\"" line)
  425. (begin (display "eval \"$cmd\"\n" output)
  426. #t)
  427. (begin (display (string-append line "\n") output)
  428. (loop (read-line input))))))
  429. (let* ((out (assoc-ref outputs "out"))
  430. (bin (string-append out "/bin"))
  431. (adder-out (string-append bin "/xapers-adder"))
  432. (man1 (string-append out "/share/man/man1")))
  433. (install-file "man/man1/xapers.1" man1)
  434. (install-file "man/man1/xapers-adder.1" man1)
  435. ;; below is equivalent to setting --no-term option
  436. ;; permanently on; this is desirable to avoid imposing
  437. ;; an x-terminal installation on the user but breaks
  438. ;; some potential xapers-adder uses like auto browser
  439. ;; pdf handler, but user could instead still use
  440. ;; e.g. "xterm -e xapers-adder %F" for same use.
  441. ;; alternatively we could propagate xterm as an input
  442. ;; and replace 'x-terminal-emulator' with 'xterm'
  443. (call-with-input-file "bin/xapers-adder"
  444. (lambda (input)
  445. (call-with-output-file adder-out
  446. (lambda (output)
  447. (purge-term-support input output)))))
  448. (chmod adder-out #o555)))))))
  449. (home-page "https://finestructure.net/xapers/")
  450. (synopsis "Personal document indexing system")
  451. (description
  452. "Xapers is a personal document indexing system,
  453. geared towards academic journal articles build on the Xapian search engine.
  454. Think of it as your own personal document search engine, or a local cache of
  455. online libraries. It provides fast search of document text and
  456. bibliographic data and simple document and bibtex retrieval.")
  457. (license gpl3+)))
  458. (define-public ugrep
  459. (package
  460. (name "ugrep")
  461. (version "3.1.12")
  462. (source (origin
  463. (method git-fetch)
  464. (uri (git-reference
  465. (url "https://github.com/Genivia/ugrep")
  466. (commit (string-append "v" version))))
  467. (sha256
  468. (base32 "06y61sf2ywjaix4nss11wwkxipj8cc9ccx6bsmdm31h8d8wd2s0j"))
  469. (file-name (git-file-name name version))
  470. (modules '((guix build utils)))
  471. (snippet
  472. '(begin
  473. (delete-file-recursively "bin") ; pre-built executables
  474. (for-each delete-file (find-files "tests" "^archive\\..*"))
  475. (for-each delete-file (find-files "tests" "^.*\\.pdf$"))
  476. (for-each delete-file (find-files "tests" "^.*\\.class$"))
  477. #t))))
  478. (build-system gnu-build-system)
  479. (inputs
  480. `(("bzip2" ,bzip2)
  481. ("less" ,less)
  482. ("lz4" ,lz4)
  483. ("lzip" ,lzip) ;; lzma
  484. ("pcre2" ,pcre2)
  485. ("zlib" ,zlib)))
  486. (arguments
  487. `(#:tests? #f ; no way to rebuild the binary input files
  488. #:test-target "test"
  489. #:phases
  490. (modify-phases %standard-phases
  491. (add-before 'check 'check-setup
  492. (lambda _
  493. ;; Unpatch shebangs in tests.
  494. (substitute* '("tests/Hello.bat"
  495. "tests/Hello.sh")
  496. (("#!/gnu/store/.*/bin/sh") "#!/bin/sh")))))))
  497. (home-page "https://github.com/Genivia/ugrep/")
  498. (synopsis "Faster grep with an interactive query UI")
  499. (description "Ugrep is a ultra fast searcher of file systems, text
  500. and binary files, source code, archives, compressed files, documents, and
  501. more.
  502. While still being compatible with the standard GNU/BSD grep command-line
  503. options, ugrep supports fuzzy search as well as structured and (adjustable)
  504. colored output, piped through \"less\" for pagination. An interactive query
  505. UI allows refinement and has a built-in help (press F1). Ugrep implements
  506. multi-threaded and other techniques to speed up search, pattern-matching and
  507. decompression. Many pre-defined regexps ease searching e.g. C typdefs or XML
  508. attributes. Results can be output in several structured or self-defined
  509. formats.")
  510. (license bsd-3)))
  511. ;;; search.scm ends here