bioinformatics.scm 612 KB


  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ricardo Wurmus <rekado@elephly.net>
  3. ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
  4. ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
  5. ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
  6. ;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org>
  7. ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner <efraim@flashner.co.il>
  8. ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
  9. ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
  10. ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
  11. ;;; Copyright © 2017, 2021 Arun Isaac <arunisaac@systemreboot.net>
  12. ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
  13. ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
  14. ;;; Copyright © 2018, 2019, 2020, 2021 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
  15. ;;; Copyright © 2019, 2020, 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
  16. ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
  17. ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
  18. ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
  19. ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
  20. ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
  21. ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
  22. ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
  23. ;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
  24. ;;; Copyright © 2021 Felix Gruber <felgru@posteo.net>
  25. ;;;
  26. ;;; This file is part of GNU Guix.
  27. ;;;
  28. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  29. ;;; under the terms of the GNU General Public License as published by
  30. ;;; the Free Software Foundation; either version 3 of the License, or (at
  31. ;;; your option) any later version.
  32. ;;;
  33. ;;; GNU Guix is distributed in the hope that it will be useful, but
  34. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  35. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  36. ;;; GNU General Public License for more details.
  37. ;;;
  38. ;;; You should have received a copy of the GNU General Public License
  39. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  40. (define-module (gnu packages bioinformatics)
  41. #:use-module ((guix licenses) #:prefix license:)
  42. #:use-module (guix packages)
  43. #:use-module (guix utils)
  44. #:use-module (guix download)
  45. #:use-module (guix git-download)
  46. #:use-module (guix hg-download)
  47. #:use-module (guix build-system ant)
  48. #:use-module (guix build-system gnu)
  49. #:use-module (guix build-system cmake)
  50. #:use-module (guix build-system go)
  51. #:use-module (guix build-system haskell)
  52. #:use-module (guix build-system meson)
  53. #:use-module (guix build-system ocaml)
  54. #:use-module (guix build-system perl)
  55. #:use-module (guix build-system python)
  56. #:use-module (guix build-system qt)
  57. #:use-module (guix build-system r)
  58. #:use-module (guix build-system ruby)
  59. #:use-module (guix build-system scons)
  60. #:use-module (guix build-system trivial)
  61. #:use-module (guix deprecation)
  62. #:use-module (gnu packages)
  63. #:use-module (gnu packages admin)
  64. #:use-module (gnu packages assembly)
  65. #:use-module (gnu packages autotools)
  66. #:use-module (gnu packages algebra)
  67. #:use-module (gnu packages base)
  68. #:use-module (gnu packages bash)
  69. #:use-module (gnu packages bison)
  70. #:use-module (gnu packages bioconductor)
  71. #:use-module (gnu packages boost)
  72. #:use-module (gnu packages check)
  73. #:use-module (gnu packages code)
  74. #:use-module (gnu packages cmake)
  75. #:use-module (gnu packages compression)
  76. #:use-module (gnu packages cpio)
  77. #:use-module (gnu packages cran)
  78. #:use-module (gnu packages curl)
  79. #:use-module (gnu packages documentation)
  80. #:use-module (gnu packages databases)
  81. #:use-module (gnu packages datastructures)
  82. #:use-module (gnu packages dlang)
  83. #:use-module (gnu packages file)
  84. #:use-module (gnu packages flex)
  85. #:use-module (gnu packages gawk)
  86. #:use-module (gnu packages gcc)
  87. #:use-module (gnu packages gd)
  88. #:use-module (gnu packages golang)
  89. #:use-module (gnu packages glib)
  90. #:use-module (gnu packages graph)
  91. #:use-module (gnu packages graphics)
  92. #:use-module (gnu packages graphviz)
  93. #:use-module (gnu packages groff)
  94. #:use-module (gnu packages gtk)
  95. #:use-module (gnu packages guile)
  96. #:use-module (gnu packages guile-xyz)
  97. #:use-module (gnu packages haskell-check)
  98. #:use-module (gnu packages haskell-web)
  99. #:use-module (gnu packages haskell-xyz)
  100. #:use-module (gnu packages image)
  101. #:use-module (gnu packages image-processing)
  102. #:use-module (gnu packages imagemagick)
  103. #:use-module (gnu packages java)
  104. #:use-module (gnu packages java-compression)
  105. #:use-module (gnu packages jemalloc)
  106. #:use-module (gnu packages jupyter)
  107. #:use-module (gnu packages linux)
  108. #:use-module (gnu packages lisp-xyz)
  109. #:use-module (gnu packages logging)
  110. #:use-module (gnu packages lsof)
  111. #:use-module (gnu packages machine-learning)
  112. #:use-module (gnu packages man)
  113. #:use-module (gnu packages maths)
  114. #:use-module (gnu packages mpi)
  115. #:use-module (gnu packages ncurses)
  116. #:use-module (gnu packages node)
  117. #:use-module (gnu packages ocaml)
  118. #:use-module (gnu packages pcre)
  119. #:use-module (gnu packages parallel)
  120. #:use-module (gnu packages pdf)
  121. #:use-module (gnu packages perl)
  122. #:use-module (gnu packages perl-check)
  123. #:use-module (gnu packages pkg-config)
  124. #:use-module (gnu packages popt)
  125. #:use-module (gnu packages protobuf)
  126. #:use-module (gnu packages python)
  127. #:use-module (gnu packages python-build)
  128. #:use-module (gnu packages python-check)
  129. #:use-module (gnu packages python-compression)
  130. #:use-module (gnu packages python-crypto)
  131. #:use-module (gnu packages python-science)
  132. #:use-module (gnu packages python-web)
  133. #:use-module (gnu packages python-xyz)
  134. #:use-module (gnu packages qt)
  135. #:use-module (gnu packages rdf)
  136. #:use-module (gnu packages readline)
  137. #:use-module (gnu packages rsync)
  138. #:use-module (gnu packages ruby)
  139. #:use-module (gnu packages serialization)
  140. #:use-module (gnu packages shells)
  141. #:use-module (gnu packages sphinx)
  142. #:use-module (gnu packages statistics)
  143. #:use-module (gnu packages swig)
  144. #:use-module (gnu packages tbb)
  145. #:use-module (gnu packages tex)
  146. #:use-module (gnu packages texinfo)
  147. #:use-module (gnu packages textutils)
  148. #:use-module (gnu packages time)
  149. #:use-module (gnu packages tls)
  150. #:use-module (gnu packages vim)
  151. #:use-module (gnu packages web)
  152. #:use-module (gnu packages wget)
  153. #:use-module (gnu packages xml)
  154. #:use-module (gnu packages xorg)
  155. #:use-module (srfi srfi-1)
  156. #:use-module (srfi srfi-26)
  157. #:use-module (ice-9 match))
  158. (define-public aragorn
  159. (package
  160. (name "aragorn")
  161. (version "1.2.38")
  162. (source (origin
  163. (method url-fetch)
  164. (uri (string-append
  165. "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
  166. version ".tgz"))
  167. (sha256
  168. (base32
  169. "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
  170. (build-system gnu-build-system)
  171. (arguments
  172. `(#:tests? #f ; there are no tests
  173. #:phases
  174. (modify-phases %standard-phases
  175. (delete 'configure)
  176. (replace 'build
  177. (lambda _
  178. (invoke "gcc"
  179. "-O3"
  180. "-ffast-math"
  181. "-finline-functions"
  182. "-o"
  183. "aragorn"
  184. (string-append "aragorn" ,version ".c"))
  185. #t))
  186. (replace 'install
  187. (lambda* (#:key outputs #:allow-other-keys)
  188. (let* ((out (assoc-ref outputs "out"))
  189. (bin (string-append out "/bin"))
  190. (man (string-append out "/share/man/man1")))
  191. (install-file "aragorn" bin)
  192. (install-file "aragorn.1" man))
  193. #t)))))
  194. (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
  195. (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
  196. (description
  197. "Aragorn identifies transfer RNA, mitochondrial RNA and
  198. transfer-messenger RNA from nucleotide sequences, based on homology to known
  199. tRNA consensus sequences and RNA structure. It also outputs the secondary
  200. structure of the predicted RNA.")
  201. (license license:gpl2)))
  202. (define-public bamm
  203. (package
  204. (name "bamm")
  205. (version "1.7.3")
  206. (source (origin
  207. (method git-fetch)
  208. ;; BamM is not available on pypi.
  209. (uri (git-reference
  210. (url "https://github.com/Ecogenomics/BamM")
  211. (commit version)
  212. (recursive? #t)))
  213. (file-name (git-file-name name version))
  214. (sha256
  215. (base32
  216. "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
  217. (modules '((guix build utils)))
  218. (snippet
  219. `(begin
  220. ;; Delete bundled htslib.
  221. (delete-file-recursively "c/htslib-1.3.1")))))
  222. (build-system python-build-system)
  223. (arguments
  224. `(#:python ,python-2 ; BamM is Python 2 only.
  225. ;; Do not use bundled libhts. Do use the bundled libcfu because it has
  226. ;; been modified from its original form.
  227. #:configure-flags
  228. (let ((htslib (assoc-ref %build-inputs "htslib")))
  229. (list "--with-libhts-lib" (string-append htslib "/lib")
  230. "--with-libhts-inc" (string-append htslib "/include/htslib")))
  231. #:phases
  232. (modify-phases %standard-phases
  233. (add-after 'unpack 'autogen
  234. (lambda _
  235. (with-directory-excursion "c"
  236. (let ((sh (which "sh")))
  237. (for-each make-file-writable (find-files "." ".*"))
  238. ;; Use autogen so that 'configure' works.
  239. (substitute* "autogen.sh" (("/bin/sh") sh))
  240. (setenv "CONFIG_SHELL" sh)
  241. (invoke "./autogen.sh")))))
  242. (delete 'build) ;the build loops otherwise
  243. (replace 'check
  244. (lambda _
  245. ;; There are 2 errors printed, but they are safe to ignore:
  246. ;; 1) [E::hts_open_format] fail to open file ...
  247. ;; 2) samtools view: failed to open ...
  248. (invoke "nosetests")))
  249. (add-after 'install 'wrap-executable
  250. (lambda* (#:key inputs outputs #:allow-other-keys)
  251. (let* ((out (assoc-ref outputs "out"))
  252. (path (getenv "PATH"))
  253. (pythonpath (getenv "GUIX_PYTHONPATH")))
  254. (wrap-program (string-append out "/bin/bamm")
  255. `("PATH" ":" prefix (,path))
  256. `("GUIX_PYTHONPATH" ":" prefix (,pythonpath)))))))))
  257. (native-inputs
  258. `(("autoconf" ,autoconf)
  259. ("automake" ,automake)
  260. ("libtool" ,libtool)
  261. ("zlib" ,zlib)
  262. ("python-nose" ,python2-nose)
  263. ("python-pysam" ,python2-pysam)))
  264. (inputs
  265. `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
  266. ("samtools" ,samtools)
  267. ("bwa" ,bwa)
  268. ("grep" ,grep)
  269. ("sed" ,sed)
  270. ("coreutils" ,coreutils)))
  271. (propagated-inputs
  272. `(("python-numpy" ,python2-numpy)))
  273. (home-page "https://ecogenomics.github.io/BamM/")
  274. (synopsis "Metagenomics-focused BAM file manipulator")
  275. (description
  276. "BamM is a C library, wrapped in python, to efficiently generate and
  277. parse BAM files, specifically for the analysis of metagenomic data. For
  278. instance, it implements several methods to assess contig-wise read coverage.")
  279. (license license:lgpl3+)))
  280. (define-public bamtools
  281. (package
  282. (name "bamtools")
  283. (version "2.5.1")
  284. (source (origin
  285. (method git-fetch)
  286. (uri (git-reference
  287. (url "https://github.com/pezmaster31/bamtools")
  288. (commit (string-append "v" version))))
  289. (file-name (git-file-name name version))
  290. (sha256
  291. (base32
  292. "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
  293. (build-system cmake-build-system)
  294. (arguments
  295. `(#:tests? #f ;no "check" target
  296. #:phases
  297. (modify-phases %standard-phases
  298. (add-before
  299. 'configure 'set-ldflags
  300. (lambda* (#:key outputs #:allow-other-keys)
  301. (setenv "LDFLAGS"
  302. (string-append
  303. "-Wl,-rpath="
  304. (assoc-ref outputs "out") "/lib/bamtools"))
  305. #t)))))
  306. (inputs `(("zlib" ,zlib)))
  307. (home-page "https://github.com/pezmaster31/bamtools")
  308. (synopsis "C++ API and command-line toolkit for working with BAM data")
  309. (description
  310. "BamTools provides both a C++ API and a command-line toolkit for handling
  311. BAM files.")
  312. (license license:expat)))
  313. (define-public bamutils
  314. (package
  315. (name "bamutils")
  316. (version "1.0.13")
  317. (source (origin
  318. (method url-fetch)
  319. (uri
  320. (string-append
  321. "https://genome.sph.umich.edu/w/images/7/70/"
  322. "BamUtilLibStatGen." version ".tgz"))
  323. (sha256
  324. (base32
  325. "0asr1kmjbr3cyf4hkg865y8c2s30v87xvws4q6c8pyfi6wfd1h8n"))))
  326. (build-system gnu-build-system)
  327. (arguments
  328. `(#:tests? #f ; There are no tests.
  329. #:make-flags `("USER_WARNINGS=-std=gnu++98"
  330. ,(string-append "INSTALLDIR="
  331. (assoc-ref %outputs "out") "/bin"))
  332. #:phases
  333. (modify-phases %standard-phases
  334. (delete 'configure))))
  335. (inputs
  336. `(("zlib" ,zlib)))
  337. (home-page "https://genome.sph.umich.edu/wiki/BamUtil")
  338. (synopsis "Programs for working on SAM/BAM files")
  339. (description "This package provides several programs that perform
  340. operations on SAM/BAM files. All of these programs are built into a
  341. single executable called @code{bam}.")
  342. (license license:gpl3+)))
  343. (define-public bcftools
  344. (package
  345. (name "bcftools")
  346. (version "1.12")
  347. (source (origin
  348. (method url-fetch)
  349. (uri (string-append "https://github.com/samtools/bcftools/"
  350. "releases/download/"
  351. version "/bcftools-" version ".tar.bz2"))
  352. (sha256
  353. (base32
  354. "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks"))
  355. (modules '((guix build utils)))
  356. (snippet '(begin
  357. ;; Delete bundled htslib.
  358. (delete-file-recursively "htslib-1.12")
  359. #t))))
  360. (build-system gnu-build-system)
  361. (arguments
  362. `(#:configure-flags
  363. (list "--enable-libgsl")
  364. #:test-target "test"
  365. #:phases
  366. (modify-phases %standard-phases
  367. (add-before 'check 'patch-tests
  368. (lambda _
  369. (substitute* "test/test.pl"
  370. (("/bin/bash") (which "bash")))
  371. #t)))))
  372. (native-inputs
  373. `(("htslib" ,htslib)
  374. ("perl" ,perl)))
  375. (inputs
  376. `(("gsl" ,gsl)
  377. ("zlib" ,zlib)))
  378. (home-page "https://samtools.github.io/bcftools/")
  379. (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
  380. (description
  381. "BCFtools is a set of utilities that manipulate variant calls in the
  382. Variant Call Format (VCF) and its binary counterpart BCF. All commands work
  383. transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
  384. ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
  385. (license (list license:gpl3+ license:expat))))
  386. (define-public bcftools-1.10
  387. (package (inherit bcftools)
  388. (name "bcftools")
  389. (version "1.10")
  390. (source (origin
  391. (method url-fetch)
  392. (uri (string-append "https://github.com/samtools/bcftools/"
  393. "releases/download/"
  394. version "/bcftools-" version ".tar.bz2"))
  395. (sha256
  396. (base32
  397. "10xgwfdgqb6dsmr3ndnpb77mc3a38dy8kh2c6czn6wj7jhdp4dra"))
  398. (modules '((guix build utils)))
  399. (snippet '(begin
  400. ;; Delete bundled htslib.
  401. (delete-file-recursively "htslib-1.10")
  402. #t))))
  403. (build-system gnu-build-system)
  404. (native-inputs
  405. `(("htslib" ,htslib-1.10)
  406. ("perl" ,perl)))))
  407. (define-public bedops
  408. (package
  409. (name "bedops")
  410. (version "2.4.35")
  411. (source (origin
  412. (method git-fetch)
  413. (uri (git-reference
  414. (url "https://github.com/bedops/bedops")
  415. (commit (string-append "v" version))))
  416. (file-name (git-file-name name version))
  417. (sha256
  418. (base32
  419. "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
  420. (build-system gnu-build-system)
  421. (arguments
  422. '(#:tests? #f
  423. #:make-flags (list (string-append "BINDIR=" %output "/bin"))
  424. #:phases
  425. (modify-phases %standard-phases
  426. (add-after 'unpack 'unpack-tarballs
  427. (lambda _
  428. ;; FIXME: Bedops includes tarballs of minimally patched upstream
  429. ;; libraries jansson, zlib, and bzip2. We cannot just use stock
  430. ;; libraries because at least one of the libraries (zlib) is
  431. ;; patched to add a C++ function definition (deflateInit2cpp).
  432. ;; Until the Bedops developers offer a way to link against system
  433. ;; libraries we have to build the in-tree copies of these three
  434. ;; libraries.
  435. ;; See upstream discussion:
  436. ;; https://github.com/bedops/bedops/issues/124
  437. ;; Unpack the tarballs to benefit from shebang patching.
  438. (with-directory-excursion "third-party"
  439. (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
  440. (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
  441. (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
  442. ;; Disable unpacking of tarballs in Makefile.
  443. (substitute* "system.mk/Makefile.linux"
  444. (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
  445. (("\\./configure") "CONFIG_SHELL=bash ./configure"))
  446. (substitute* "third-party/zlib-1.2.7/Makefile.in"
  447. (("^SHELL=.*$") "SHELL=bash\n"))
  448. #t))
  449. (delete 'configure))))
  450. (home-page "https://github.com/bedops/bedops")
  451. (synopsis "Tools for high-performance genomic feature operations")
  452. (description
  453. "BEDOPS is a suite of tools to address common questions raised in genomic
  454. studies---mostly with regard to overlap and proximity relationships between
  455. data sets. It aims to be scalable and flexible, facilitating the efficient
  456. and accurate analysis and management of large-scale genomic data.
  457. BEDOPS provides tools that perform highly efficient and scalable Boolean and
  458. other set operations, statistical calculations, archiving, conversion and
  459. other management of genomic data of arbitrary scale. Tasks can be easily
  460. split by chromosome for distributing whole-genome analyses across a
  461. computational cluster.")
  462. (license license:gpl2+)))
  463. (define-public bedtools
  464. (package
  465. (name "bedtools")
  466. (version "2.30.0")
  467. (source (origin
  468. (method url-fetch)
  469. (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
  470. "download/v" version "/"
  471. "bedtools-" version ".tar.gz"))
  472. (sha256
  473. (base32
  474. "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik"))))
  475. (build-system gnu-build-system)
  476. (arguments
  477. '(#:test-target "test"
  478. #:make-flags
  479. (list (string-append "prefix=" (assoc-ref %outputs "out")))
  480. #:phases
  481. (modify-phases %standard-phases
  482. (delete 'configure))))
  483. (native-inputs
  484. `(("python" ,python-wrapper)))
  485. (inputs
  486. `(("samtools" ,samtools)
  487. ("zlib" ,zlib)))
  488. (home-page "https://github.com/arq5x/bedtools2")
  489. (synopsis "Tools for genome analysis and arithmetic")
  490. (description
  491. "Collectively, the bedtools utilities are a swiss-army knife of tools for
  492. a wide-range of genomics analysis tasks. The most widely-used tools enable
  493. genome arithmetic: that is, set theory on the genome. For example, bedtools
  494. allows one to intersect, merge, count, complement, and shuffle genomic
  495. intervals from multiple files in widely-used genomic file formats such as BAM,
  496. BED, GFF/GTF, VCF.")
  497. (license license:expat)))
  498. ;; Later releases of bedtools produce files with more columns than
  499. ;; what Ribotaper expects.
  500. (define-public bedtools-2.18
  501. (package (inherit bedtools)
  502. (name "bedtools")
  503. (version "2.18.0")
  504. (source (origin
  505. (method url-fetch)
  506. (uri (string-append "https://github.com/arq5x/bedtools2/"
  507. "releases/download/v" version
  508. "/bedtools-" version ".tar.gz"))
  509. (sha256
  510. (base32
  511. "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
  512. (arguments
  513. '(#:test-target "test"
  514. #:phases
  515. (modify-phases %standard-phases
  516. (delete 'configure)
  517. (replace 'install
  518. (lambda* (#:key outputs #:allow-other-keys)
  519. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  520. (for-each (lambda (file)
  521. (install-file file bin))
  522. (find-files "bin" ".*")))
  523. #t)))))))
  524. (define-public pbbam
  525. (package
  526. (name "pbbam")
  527. (version "0.23.0")
  528. (source (origin
  529. (method git-fetch)
  530. (uri (git-reference
  531. (url "https://github.com/PacificBiosciences/pbbam")
  532. (commit version)))
  533. (file-name (git-file-name name version))
  534. (sha256
  535. (base32
  536. "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
  537. (build-system meson-build-system)
  538. (arguments
  539. `(#:phases
  540. (modify-phases %standard-phases
  541. (add-after 'unpack 'find-googletest
  542. (lambda* (#:key inputs #:allow-other-keys)
  543. ;; It doesn't find gtest_main because there's no pkg-config file
  544. ;; for it. Find it another way.
  545. (substitute* "tests/meson.build"
  546. (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
  547. (format #f "cpp = meson.get_compiler('cpp')
  548. pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
  549. (assoc-ref inputs "googletest"))))
  550. #t)))
  551. ;; TODO: tests/pbbam_test cannot be linked
  552. ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
  553. ;; undefined reference to symbol '_ZTIN7testing4TestE'
  554. ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
  555. ;; error adding symbols: DSO missing from command line
  556. #:tests? #f
  557. #:configure-flags '("-Dtests=false")))
  558. ;; These libraries are listed as "Required" in the pkg-config file.
  559. (propagated-inputs
  560. `(("htslib" ,htslib)
  561. ("zlib" ,zlib)))
  562. (inputs
  563. `(("boost" ,boost)
  564. ("samtools" ,samtools)))
  565. (native-inputs
  566. `(("googletest" ,googletest)
  567. ("pkg-config" ,pkg-config)
  568. ("python" ,python-wrapper))) ; for tests
  569. (home-page "https://github.com/PacificBiosciences/pbbam")
  570. (synopsis "Work with PacBio BAM files")
  571. (description
  572. "The pbbam software package provides components to create, query, and
  573. edit PacBio BAM files and associated indices. These components include a core
  574. C++ library, bindings for additional languages, and command-line utilities.
  575. This library is not intended to be used as a general-purpose BAM utility - all
  576. input and output BAMs must adhere to the PacBio BAM format specification.
  577. Non-PacBio BAMs will cause exceptions to be thrown.")
  578. (license license:bsd-3)))
  579. (define-public pbgzip
  580. (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974"))
  581. (package
  582. (name "pbgzip")
  583. (version (git-version "0.0.0" "0" commit))
  584. (source (origin
  585. (method git-fetch)
  586. (uri (git-reference
  587. (url "https://github.com/nh13/pbgzip")
  588. (commit commit)))
  589. (file-name (git-file-name name version))
  590. (sha256
  591. (base32
  592. "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd"))))
  593. (build-system gnu-build-system)
  594. (native-inputs
  595. `(("autoconf" ,autoconf)
  596. ("automake" ,automake)))
  597. (inputs
  598. `(("zlib" ,zlib)))
  599. (home-page "https://github.com/nh13/pbgzip")
  600. (synopsis "Parallel Block GZIP")
  601. (description "This package implements parallel block gzip. For many
  602. formats, in particular genomics data formats, data are compressed in
  603. fixed-length blocks such that they can be easily indexed based on a (genomic)
  604. coordinate order, since typically each block is sorted according to this order.
  605. This allows for each block to be individually compressed (deflated), or more
  606. importantly, decompressed (inflated), with the latter enabling random retrieval
  607. of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited
  608. to any particular format, but certain features are tailored to genomics data
  609. formats when enabled. Parallel decompression is somewhat faster, but the true
  610. speedup comes during compression.")
  611. (license license:expat))))
  612. (define-public blasr-libcpp
  613. (package
  614. (name "blasr-libcpp")
  615. (version "5.3.3")
  616. (source (origin
  617. (method git-fetch)
  618. (uri (git-reference
  619. (url "https://github.com/PacificBiosciences/blasr_libcpp")
  620. (commit version)))
  621. (file-name (git-file-name name version))
  622. (sha256
  623. (base32
  624. "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
  625. (build-system meson-build-system)
  626. (arguments
  627. `(#:phases
  628. (modify-phases %standard-phases
  629. (add-after 'unpack 'link-with-hdf5
  630. (lambda* (#:key inputs #:allow-other-keys)
  631. (let ((hdf5 (assoc-ref inputs "hdf5")))
  632. (substitute* "meson.build"
  633. (("libblasr_deps = \\[" m)
  634. (string-append
  635. m
  636. (format #f "cpp.find_library('hdf5', dirs : '~a'), \
  637. cpp.find_library('hdf5_cpp', dirs : '~a'), "
  638. hdf5 hdf5)))))
  639. #t))
  640. (add-after 'unpack 'find-googletest
  641. (lambda* (#:key inputs #:allow-other-keys)
  642. ;; It doesn't find gtest_main because there's no pkg-config file
  643. ;; for it. Find it another way.
  644. (substitute* "unittest/meson.build"
  645. (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
  646. (format #f "cpp = meson.get_compiler('cpp')
  647. libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
  648. (assoc-ref inputs "googletest"))))
  649. #t)))
  650. ;; TODO: unittest/libblasr_unittest cannot be linked
  651. ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
  652. ;; undefined reference to symbol
  653. ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
  654. ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
  655. ;; error adding symbols: DSO missing from command line
  656. #:tests? #f
  657. #:configure-flags '("-Dtests=false")))
  658. (inputs
  659. `(("boost" ,boost)
  660. ("hdf5" ,hdf5)
  661. ("pbbam" ,pbbam)
  662. ("zlib" ,zlib)))
  663. (native-inputs
  664. `(("googletest" ,googletest)
  665. ("pkg-config" ,pkg-config)))
  666. (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
  667. (synopsis "Library for analyzing PacBio genomic sequences")
  668. (description
  669. "This package provides three libraries used by applications for analyzing
  670. PacBio genomic sequences. This library contains three sub-libraries: pbdata,
  671. hdf and alignment.")
  672. (license license:bsd-3)))
  673. (define-public blasr
  674. (package
  675. (name "blasr")
  676. (version "5.3.3")
  677. (source (origin
  678. (method git-fetch)
  679. (uri (git-reference
  680. (url "https://github.com/PacificBiosciences/blasr")
  681. (commit version)))
  682. (file-name (git-file-name name version))
  683. (sha256
  684. (base32
  685. "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
  686. (build-system meson-build-system)
  687. (arguments
  688. `(#:phases
  689. (modify-phases %standard-phases
  690. (add-after 'unpack 'link-with-hdf5
  691. (lambda* (#:key inputs #:allow-other-keys)
  692. (let ((hdf5 (assoc-ref inputs "hdf5")))
  693. (substitute* "meson.build"
  694. (("blasr_deps = \\[" m)
  695. (string-append
  696. m
  697. (format #f "cpp.find_library('hdf5', dirs : '~a'), \
  698. cpp.find_library('hdf5_cpp', dirs : '~a'), "
  699. hdf5 hdf5)))))
  700. #t)))
  701. ;; Tests require "cram" executable, which is not packaged.
  702. #:tests? #f
  703. #:configure-flags '("-Dtests=false")))
  704. (inputs
  705. `(("boost" ,boost)
  706. ("blasr-libcpp" ,blasr-libcpp)
  707. ("hdf5" ,hdf5)
  708. ("pbbam" ,pbbam)
  709. ("zlib" ,zlib)))
  710. (native-inputs
  711. `(("pkg-config" ,pkg-config)))
  712. (home-page "https://github.com/PacificBiosciences/blasr")
  713. (synopsis "PacBio long read aligner")
  714. (description
  715. "Blasr is a genomic sequence aligner for processing PacBio long reads.")
  716. (license license:bsd-3)))
  717. (define-public ribotaper
  718. (package
  719. (name "ribotaper")
  720. (version "1.3.1")
  721. (source (origin
  722. (method url-fetch)
  723. (uri (string-append "https://ohlerlab.mdc-berlin.de/"
  724. "files/RiboTaper/RiboTaper_Version_"
  725. version ".tar.gz"))
  726. (sha256
  727. (base32
  728. "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
  729. (build-system gnu-build-system)
  730. (arguments
  731. `(#:phases
  732. (modify-phases %standard-phases
  733. (add-after 'install 'wrap-executables
  734. (lambda* (#:key inputs outputs #:allow-other-keys)
  735. (let* ((out (assoc-ref outputs "out")))
  736. (for-each
  737. (lambda (script)
  738. (wrap-program (string-append out "/bin/" script)
  739. `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
  740. '("create_annotations_files.bash"
  741. "create_metaplots.bash"
  742. "Ribotaper_ORF_find.sh"
  743. "Ribotaper.sh")))
  744. #t)))))
  745. (inputs
  746. `(("bedtools" ,bedtools-2.18)
  747. ("samtools" ,samtools-0.1)
  748. ("r-minimal" ,r-minimal)
  749. ("r-foreach" ,r-foreach)
  750. ("r-xnomial" ,r-xnomial)
  751. ("r-domc" ,r-domc)
  752. ("r-multitaper" ,r-multitaper)
  753. ("r-seqinr" ,r-seqinr)))
  754. (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
  755. (synopsis "Define translated ORFs using ribosome profiling data")
  756. (description
  757. "Ribotaper is a method for defining translated @dfn{open reading
  758. frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
  759. provides the Ribotaper pipeline.")
  760. (license license:gpl3+)))
  761. (define-public ribodiff
  762. (package
  763. (name "ribodiff")
  764. (version "0.2.2")
  765. (source
  766. (origin
  767. (method git-fetch)
  768. (uri (git-reference
  769. (url "https://github.com/ratschlab/RiboDiff")
  770. (commit (string-append "v" version))))
  771. (file-name (git-file-name name version))
  772. (sha256
  773. (base32
  774. "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
  775. (build-system python-build-system)
  776. (arguments
  777. `(#:python ,python-2
  778. #:phases
  779. (modify-phases %standard-phases
  780. ;; Generate an installable executable script wrapper.
  781. (add-after 'unpack 'patch-setup.py
  782. (lambda _
  783. (substitute* "setup.py"
  784. (("^(.*)packages=.*" line prefix)
  785. (string-append line "\n"
  786. prefix "scripts=['scripts/TE.py'],\n")))
  787. #t)))))
  788. (inputs
  789. `(("python-numpy" ,python2-numpy)
  790. ("python-matplotlib" ,python2-matplotlib)
  791. ("python-scipy" ,python2-scipy)
  792. ("python-statsmodels" ,python2-statsmodels)))
  793. (native-inputs
  794. `(("python-mock" ,python2-mock)
  795. ("python-nose" ,python2-nose)))
  796. (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
  797. (synopsis "Detect translation efficiency changes from ribosome footprints")
  798. (description "RiboDiff is a statistical tool that detects the protein
  799. translational efficiency change from Ribo-Seq (ribosome footprinting) and
  800. RNA-Seq data. It uses a generalized linear model to detect genes showing
  801. difference in translational profile taking mRNA abundance into account. It
  802. facilitates us to decipher the translational regulation that behave
  803. independently with transcriptional regulation.")
  804. (license license:gpl3+)))
  805. (define-public bioawk
  806. (package
  807. (name "bioawk")
  808. (version "1.0")
  809. (source (origin
  810. (method git-fetch)
  811. (uri (git-reference
  812. (url "https://github.com/lh3/bioawk")
  813. (commit (string-append "v" version))))
  814. (file-name (git-file-name name version))
  815. (sha256
  816. (base32
  817. "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
  818. (build-system gnu-build-system)
  819. (inputs
  820. `(("zlib" ,zlib)))
  821. (native-inputs
  822. `(("bison" ,bison)))
  823. (arguments
  824. `(#:tests? #f ; There are no tests to run.
  825. ;; Bison must generate files, before other targets can build.
  826. #:parallel-build? #f
  827. #:phases
  828. (modify-phases %standard-phases
  829. (delete 'configure) ; There is no configure phase.
  830. (replace 'install
  831. (lambda* (#:key outputs #:allow-other-keys)
  832. (let* ((out (assoc-ref outputs "out"))
  833. (bin (string-append out "/bin"))
  834. (man (string-append out "/share/man/man1")))
  835. (mkdir-p man)
  836. (copy-file "awk.1" (string-append man "/bioawk.1"))
  837. (install-file "bioawk" bin))
  838. #t)))))
  839. (home-page "https://github.com/lh3/bioawk")
  840. (synopsis "AWK with bioinformatics extensions")
  841. (description "Bioawk is an extension to Brian Kernighan's awk, adding the
  842. support of several common biological data formats, including optionally gzip'ed
  843. BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
  844. also adds a few built-in functions and a command line option to use TAB as the
  845. input/output delimiter. When the new functionality is not used, bioawk is
  846. intended to behave exactly the same as the original BWK awk.")
  847. (license license:x11)))
  848. (define-public python-htsget
  849. (package
  850. (name "python-htsget")
  851. (version "0.2.5")
  852. (source (origin
  853. (method url-fetch)
  854. (uri (pypi-uri "htsget" version))
  855. (sha256
  856. (base32
  857. "0ic07q85vhw9djf23k57b21my7i5xp400m8gfqgr5gcryqvdr0yk"))))
  858. (build-system python-build-system)
  859. (native-inputs
  860. `(("python-setuptools-scm" ,python-setuptools-scm)))
  861. (propagated-inputs
  862. `(("python-humanize" ,python-humanize)
  863. ("python-requests" ,python-requests)
  864. ("python-six" ,python-six)))
  865. (home-page "https://pypi.org/project/htsget/")
  866. (synopsis "Python API and command line interface for the GA4GH htsget API")
  867. (description "This package is a client implementation of the GA4GH htsget
  868. protocol. It provides a simple and reliable way to retrieve genomic data from
  869. servers supporting the protocol.")
  870. (license license:asl2.0)))
  871. (define-public python-pybedtools
  872. (package
  873. (name "python-pybedtools")
  874. (version "0.8.2")
  875. (source (origin
  876. (method url-fetch)
  877. (uri (pypi-uri "pybedtools" version))
  878. (sha256
  879. (base32
  880. "0wc7z8g8prgdx7n5chjva2fdq03wiwhqisjjxzkjg1j5k5ha7151"))))
  881. (build-system python-build-system)
  882. (arguments
  883. `(#:modules ((srfi srfi-26)
  884. (guix build utils)
  885. (guix build python-build-system))
  886. ;; See https://github.com/daler/pybedtools/issues/192
  887. #:phases
  888. (modify-phases %standard-phases
  889. (add-after 'unpack 'disable-broken-tests
  890. (lambda _
  891. (substitute* "pybedtools/test/test_helpers.py"
  892. ;; Requires internet access.
  893. (("def test_chromsizes")
  894. "def _do_not_test_chromsizes")
  895. ;; Broken as a result of the workaround used in the check phase
  896. ;; (see: https://github.com/daler/pybedtools/issues/192).
  897. (("def test_getting_example_beds")
  898. "def _do_not_test_getting_example_beds"))
  899. ;; This issue still occurs on python2
  900. (substitute* "pybedtools/test/test_issues.py"
  901. (("def test_issue_303")
  902. "def _test_issue_303"))))
  903. ;; Force the Cythonization of C++ files to guard against compilation
  904. ;; problems.
  905. (add-after 'unpack 'remove-cython-generated-files
  906. (lambda _
  907. (let ((cython-sources (map (cut string-drop-right <> 4)
  908. (find-files "." "\\.pyx$")))
  909. (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
  910. (define (strip-extension filename)
  911. (string-take filename (string-index-right filename #\.)))
  912. (define (cythonized? c/c++-file)
  913. (member (strip-extension c/c++-file) cython-sources))
  914. (for-each delete-file (filter cythonized? c/c++-files)))))
  915. (add-after 'remove-cython-generated-files 'generate-cython-extensions
  916. (lambda _
  917. (invoke "python" "setup.py" "cythonize")))
  918. (replace 'check
  919. (lambda _
  920. ;; The tests need to be run from elsewhere...
  921. (mkdir-p "/tmp/test")
  922. (copy-recursively "pybedtools/test" "/tmp/test")
  923. (with-directory-excursion "/tmp/test"
  924. (invoke "pytest" "-v" "--doctest-modules")))))))
  925. (propagated-inputs
  926. `(("bedtools" ,bedtools)
  927. ("samtools" ,samtools)
  928. ("python-matplotlib" ,python-matplotlib)
  929. ("python-pysam" ,python-pysam)
  930. ("python-pyyaml" ,python-pyyaml)))
  931. (native-inputs
  932. `(("python-numpy" ,python-numpy)
  933. ("python-pandas" ,python-pandas)
  934. ("python-cython" ,python-cython)
  935. ("kentutils" ,kentutils) ; for bedGraphToBigWig
  936. ("python-six" ,python-six)
  937. ;; For the test suite.
  938. ("python-pytest" ,python-pytest)
  939. ("python-psutil" ,python-psutil)))
  940. (home-page "https://pythonhosted.org/pybedtools/")
  941. (synopsis "Python wrapper for BEDtools programs")
  942. (description
  943. "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
  944. which are widely used for genomic interval manipulation or \"genome algebra\".
  945. pybedtools extends BEDTools by offering feature-level manipulations from with
  946. Python.")
  947. (license license:gpl2+)))
  948. (define-public python2-pybedtools
  949. (let ((pybedtools (package-with-python2 python-pybedtools)))
  950. (package
  951. (inherit pybedtools)
  952. (native-inputs
  953. `(("python2-pathlib" ,python2-pathlib)
  954. ,@(package-native-inputs pybedtools))))))
  955. (define-public python-biom-format
  956. (package
  957. (name "python-biom-format")
  958. (version "2.1.7")
  959. (source
  960. (origin
  961. (method git-fetch)
  962. ;; Use GitHub as source because PyPI distribution does not contain
  963. ;; test data: https://github.com/biocore/biom-format/issues/693
  964. (uri (git-reference
  965. (url "https://github.com/biocore/biom-format")
  966. (commit version)))
  967. (file-name (git-file-name name version))
  968. (sha256
  969. (base32
  970. "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
  971. (modules '((guix build utils)))
  972. (snippet '(begin
  973. ;; Delete generated C files.
  974. (for-each delete-file (find-files "." "\\.c"))
  975. #t))))
  976. (build-system python-build-system)
  977. (arguments
  978. `(#:phases
  979. (modify-phases %standard-phases
  980. (add-after 'unpack 'use-cython
  981. (lambda _ (setenv "USE_CYTHON" "1") #t))
  982. (add-after 'unpack 'disable-broken-tests
  983. (lambda _
  984. (substitute* "biom/tests/test_cli/test_validate_table.py"
  985. (("^(.+)def test_invalid_hdf5" m indent)
  986. (string-append indent
  987. "@npt.dec.skipif(True, msg='Guix')\n"
  988. m)))
  989. (substitute* "biom/tests/test_table.py"
  990. (("^(.+)def test_from_hdf5_issue_731" m indent)
  991. (string-append indent
  992. "@npt.dec.skipif(True, msg='Guix')\n"
  993. m)))
  994. #t))
  995. (add-before 'reset-gzip-timestamps 'make-files-writable
  996. (lambda* (#:key outputs #:allow-other-keys)
  997. (let ((out (assoc-ref outputs "out")))
  998. (for-each (lambda (file) (chmod file #o644))
  999. (find-files out "\\.gz"))
  1000. #t))))))
  1001. (propagated-inputs
  1002. `(("python-numpy" ,python-numpy)
  1003. ("python-scipy" ,python-scipy)
  1004. ("python-flake8" ,python-flake8)
  1005. ("python-future" ,python-future)
  1006. ("python-click" ,python-click)
  1007. ("python-h5py" ,python-h5py)
  1008. ;; FIXME: Upgrade to pandas 1.0 when
  1009. ;; https://github.com/biocore/biom-format/issues/837 is resolved.
  1010. ("python-pandas" ,python-pandas-0.25)))
  1011. (native-inputs
  1012. `(("python-cython" ,python-cython)
  1013. ("python-pytest" ,python-pytest)
  1014. ("python-pytest-cov" ,python-pytest-cov)
  1015. ("python-nose" ,python-nose)))
  1016. (home-page "http://www.biom-format.org")
  1017. (synopsis "Biological Observation Matrix (BIOM) format utilities")
  1018. (description
  1019. "The BIOM file format is designed to be a general-use format for
  1020. representing counts of observations e.g. operational taxonomic units, KEGG
  1021. orthology groups or lipid types, in one or more biological samples
  1022. e.g. microbiome samples, genomes, metagenomes.")
  1023. (license license:bsd-3)
  1024. (properties `((python2-variant . ,(delay python2-biom-format))))))
  1025. (define-public python2-biom-format
  1026. (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
  1027. (package
  1028. (inherit base)
  1029. (arguments
  1030. (substitute-keyword-arguments (package-arguments base)
  1031. ((#:phases phases)
  1032. `(modify-phases ,phases
  1033. ;; Do not require the unmaintained pyqi library.
  1034. (add-after 'unpack 'remove-pyqi
  1035. (lambda _
  1036. (substitute* "setup.py"
  1037. (("install_requires.append\\(\"pyqi\"\\)") "pass"))
  1038. #t)))))))))
  1039. (define-public python-pairtools
  1040. (package
  1041. (name "python-pairtools")
  1042. (version "0.3.0")
  1043. (source (origin
  1044. (method git-fetch)
  1045. (uri (git-reference
  1046. (url "https://github.com/mirnylab/pairtools")
  1047. (commit (string-append "v" version))))
  1048. (file-name (git-file-name name version))
  1049. (sha256
  1050. (base32
  1051. "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
  1052. (build-system python-build-system)
  1053. (arguments
  1054. `(#:phases
  1055. (modify-phases %standard-phases
  1056. (add-after 'unpack 'fix-references
  1057. (lambda _
  1058. (substitute* '("pairtools/pairtools_merge.py"
  1059. "pairtools/pairtools_sort.py")
  1060. (("/bin/bash") (which "bash")))
  1061. #t))
  1062. (replace 'check
  1063. (lambda* (#:key inputs outputs #:allow-other-keys)
  1064. (add-installed-pythonpath inputs outputs)
  1065. (with-directory-excursion "/tmp"
  1066. (invoke "pytest" "-v")))))))
  1067. (native-inputs
  1068. `(("python-cython" ,python-cython)
  1069. ("python-nose" ,python-nose)
  1070. ("python-pytest" ,python-pytest)))
  1071. (inputs
  1072. `(("python" ,python-wrapper)))
  1073. (propagated-inputs
  1074. `(("htslib" ,htslib) ; for bgzip, looked up in PATH
  1075. ("samtools" ,samtools) ; looked up in PATH
  1076. ("lz4" ,lz4) ; for lz4c
  1077. ("python-click" ,python-click)
  1078. ("python-numpy" ,python-numpy)))
  1079. (home-page "https://github.com/mirnylab/pairtools")
  1080. (synopsis "Process mapped Hi-C data")
  1081. (description "Pairtools is a simple and fast command-line framework to
  1082. process sequencing data from a Hi-C experiment. Process pair-end sequence
  1083. alignments and perform the following operations:
  1084. @itemize
  1085. @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
  1086. sequences of Hi-C DNA molecules
  1087. @item sort @code{.pairs} files for downstream analyses
  1088. @item detect, tag and remove PCR/optical duplicates
  1089. @item generate extensive statistics of Hi-C datasets
  1090. @item select Hi-C pairs given flexibly defined criteria
  1091. @item restore @code{.sam} alignments from Hi-C pairs.
  1092. @end itemize
  1093. ")
  1094. (license license:expat)))
  1095. (define-public bioperl-minimal
  1096. (package
  1097. (name "bioperl-minimal")
  1098. (version "1.7.0")
  1099. (source
  1100. (origin
  1101. (method git-fetch)
  1102. (uri (git-reference
  1103. (url "https://github.com/bioperl/bioperl-live")
  1104. (commit (string-append "release-"
  1105. (string-map (lambda (c)
  1106. (if (char=? c #\.)
  1107. #\- c)) version)))))
  1108. (file-name (git-file-name name version))
  1109. (sha256
  1110. (base32
  1111. "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
  1112. (build-system perl-build-system)
  1113. (arguments
  1114. (let ((transitive-inputs
  1115. (map (compose package-name cadr)
  1116. (delete-duplicates
  1117. (concatenate
  1118. (map (compose package-transitive-target-inputs cadr)
  1119. (package-inputs this-package)))))))
  1120. `(#:phases
  1121. (modify-phases %standard-phases
  1122. (add-after
  1123. 'install 'wrap-programs
  1124. (lambda* (#:key outputs #:allow-other-keys)
  1125. ;; Make sure all executables in "bin" find the required Perl
  1126. ;; modules at runtime. As the PERL5LIB variable contains also
  1127. ;; the paths of native inputs, we pick the transitive target
  1128. ;; inputs from %build-inputs.
  1129. (let* ((out (assoc-ref outputs "out"))
  1130. (bin (string-append out "/bin/"))
  1131. (path (string-join
  1132. (cons (string-append out "/lib/perl5/site_perl")
  1133. (map (lambda (name)
  1134. (assoc-ref %build-inputs name))
  1135. ',transitive-inputs))
  1136. ":")))
  1137. (for-each (lambda (file)
  1138. (wrap-program file
  1139. `("PERL5LIB" ":" prefix (,path))))
  1140. (find-files bin "\\.pl$"))
  1141. #t)))))))
  1142. (inputs
  1143. `(("perl-module-build" ,perl-module-build)
  1144. ("perl-data-stag" ,perl-data-stag)
  1145. ("perl-libwww" ,perl-libwww)
  1146. ("perl-uri" ,perl-uri)))
  1147. (native-inputs
  1148. `(("perl-test-most" ,perl-test-most)))
  1149. (home-page "https://metacpan.org/release/BioPerl")
  1150. (synopsis "Bioinformatics toolkit")
  1151. (description
  1152. "BioPerl is the product of a community effort to produce Perl code which
  1153. is useful in biology. Examples include Sequence objects, Alignment objects
  1154. and database searching objects. These objects not only do what they are
  1155. advertised to do in the documentation, but they also interact - Alignment
  1156. objects are made from the Sequence objects, Sequence objects have access to
  1157. Annotation and SeqFeature objects and databases, Blast objects can be
  1158. converted to Alignment objects, and so on. This means that the objects
  1159. provide a coordinated and extensible framework to do computational biology.")
  1160. (license license:perl-license)))
  1161. (define-public perl-bio-db-hts
  1162. (package
  1163. (name "perl-bio-db-hts")
  1164. (version "3.01")
  1165. (source
  1166. (origin
  1167. (method url-fetch)
  1168. (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-"
  1169. version ".tar.gz"))
  1170. (sha256
  1171. (base32
  1172. "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj"))))
  1173. (build-system perl-build-system)
  1174. (native-inputs
  1175. `(("perl-module-build" ,perl-module-build)
  1176. ("pkg-config" ,pkg-config)))
  1177. (propagated-inputs
  1178. `(("bioperl-minimal" ,bioperl-minimal)
  1179. ("htslib" ,htslib-1.9)))
  1180. (home-page "https://metacpan.org/release/Bio-DB-HTS")
  1181. (synopsis "Perl interface to HTS library for DNA sequencing")
  1182. (description "This is a Perl interface to the HTS library for DNA
  1183. sequencing.")
  1184. (license license:asl2.0)))
  1185. (define-public python-biopython
  1186. (package
  1187. (name "python-biopython")
  1188. (version "1.73")
  1189. (source (origin
  1190. (method url-fetch)
  1191. ;; use PyPi rather than biopython.org to ease updating
  1192. (uri (pypi-uri "biopython" version))
  1193. (sha256
  1194. (base32
  1195. "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))
  1196. (build-system python-build-system)
  1197. (arguments
  1198. `(#:phases
  1199. (modify-phases %standard-phases
  1200. (add-before 'check 'set-home
  1201. ;; Some tests require a home directory to be set.
  1202. (lambda _ (setenv "HOME" "/tmp") #t)))))
  1203. (propagated-inputs
  1204. `(("python-numpy" ,python-numpy)))
  1205. (home-page "https://biopython.org/")
  1206. (synopsis "Tools for biological computation in Python")
  1207. (description
  1208. "Biopython is a set of tools for biological computation including parsers
  1209. for bioinformatics files into Python data structures; interfaces to common
  1210. bioinformatics programs; a standard sequence class and tools for performing
  1211. common operations on them; code to perform data classification; code for
  1212. dealing with alignments; code making it easy to split up parallelizable tasks
  1213. into separate processes; and more.")
  1214. (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
  1215. (define-public python2-biopython
  1216. (package-with-python2 python-biopython))
  1217. (define-public python-fastalite
  1218. (package
  1219. (name "python-fastalite")
  1220. (version "0.3")
  1221. (source
  1222. (origin
  1223. (method url-fetch)
  1224. (uri (pypi-uri "fastalite" version))
  1225. (sha256
  1226. (base32
  1227. "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
  1228. (build-system python-build-system)
  1229. (arguments
  1230. `(#:tests? #f)) ; Test data is not distributed.
  1231. (home-page "https://github.com/nhoffman/fastalite")
  1232. (synopsis "Simplest possible FASTA parser")
  1233. (description "This library implements a FASTA and a FASTQ parser without
  1234. relying on a complex dependency tree.")
  1235. (license license:expat)))
  1236. (define-public python2-fastalite
  1237. (package-with-python2 python-fastalite))
  1238. (define-public bpp-core
  1239. ;; The last release was in 2014 and the recommended way to install from source
  1240. ;; is to clone the git repository, so we do this.
  1241. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1242. (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
  1243. (package
  1244. (name "bpp-core")
  1245. (version (string-append "2.2.0-1." (string-take commit 7)))
  1246. (source (origin
  1247. (method git-fetch)
  1248. (uri (git-reference
  1249. (url "http://biopp.univ-montp2.fr/git/bpp-core")
  1250. (commit commit)))
  1251. (file-name (string-append name "-" version "-checkout"))
  1252. (sha256
  1253. (base32
  1254. "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
  1255. (build-system cmake-build-system)
  1256. (arguments
  1257. `(#:parallel-build? #f))
  1258. (home-page "http://biopp.univ-montp2.fr")
  1259. (synopsis "C++ libraries for Bioinformatics")
  1260. (description
  1261. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1262. analysis, phylogenetics, molecular evolution and population genetics. It is
  1263. Object Oriented and is designed to be both easy to use and computer efficient.
  1264. Bio++ intends to help programmers to write computer expensive programs, by
  1265. providing them a set of re-usable tools.")
  1266. (license license:cecill-c))))
  1267. (define-public bpp-phyl
  1268. ;; The last release was in 2014 and the recommended way to install from source
  1269. ;; is to clone the git repository, so we do this.
  1270. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1271. (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
  1272. (package
  1273. (name "bpp-phyl")
  1274. (version (string-append "2.2.0-1." (string-take commit 7)))
  1275. (source (origin
  1276. (method git-fetch)
  1277. (uri (git-reference
  1278. (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
  1279. (commit commit)))
  1280. (file-name (string-append name "-" version "-checkout"))
  1281. (sha256
  1282. (base32
  1283. "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
  1284. (build-system cmake-build-system)
  1285. (arguments
  1286. `(#:parallel-build? #f
  1287. ;; If out-of-source, test data is not copied into the build directory
  1288. ;; so the tests fail.
  1289. #:out-of-source? #f))
  1290. (inputs
  1291. `(("bpp-core" ,bpp-core)
  1292. ("bpp-seq" ,bpp-seq)))
  1293. (home-page "http://biopp.univ-montp2.fr")
  1294. (synopsis "Bio++ phylogenetic Library")
  1295. (description
  1296. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1297. analysis, phylogenetics, molecular evolution and population genetics. This
  1298. library provides phylogenetics-related modules.")
  1299. (license license:cecill-c))))
  1300. (define-public bpp-popgen
  1301. ;; The last release was in 2014 and the recommended way to install from source
  1302. ;; is to clone the git repository, so we do this.
  1303. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1304. (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
  1305. (package
  1306. (name "bpp-popgen")
  1307. (version (string-append "2.2.0-1." (string-take commit 7)))
  1308. (source (origin
  1309. (method git-fetch)
  1310. (uri (git-reference
  1311. (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
  1312. (commit commit)))
  1313. (file-name (string-append name "-" version "-checkout"))
  1314. (sha256
  1315. (base32
  1316. "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
  1317. (build-system cmake-build-system)
  1318. (arguments
  1319. `(#:parallel-build? #f
  1320. #:tests? #f)) ; There are no tests.
  1321. (inputs
  1322. `(("bpp-core" ,bpp-core)
  1323. ("bpp-seq" ,bpp-seq)))
  1324. (home-page "http://biopp.univ-montp2.fr")
  1325. (synopsis "Bio++ population genetics library")
  1326. (description
  1327. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1328. analysis, phylogenetics, molecular evolution and population genetics. This
  1329. library provides population genetics-related modules.")
  1330. (license license:cecill-c))))
  1331. (define-public bpp-seq
  1332. ;; The last release was in 2014 and the recommended way to install from source
  1333. ;; is to clone the git repository, so we do this.
  1334. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1335. (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
  1336. (package
  1337. (name "bpp-seq")
  1338. (version (string-append "2.2.0-1." (string-take commit 7)))
  1339. (source (origin
  1340. (method git-fetch)
  1341. (uri (git-reference
  1342. (url "http://biopp.univ-montp2.fr/git/bpp-seq")
  1343. (commit commit)))
  1344. (file-name (string-append name "-" version "-checkout"))
  1345. (sha256
  1346. (base32
  1347. "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
  1348. (build-system cmake-build-system)
  1349. (arguments
  1350. `(#:parallel-build? #f
  1351. ;; If out-of-source, test data is not copied into the build directory
  1352. ;; so the tests fail.
  1353. #:out-of-source? #f))
  1354. (inputs
  1355. `(("bpp-core" ,bpp-core)))
  1356. (home-page "http://biopp.univ-montp2.fr")
  1357. (synopsis "Bio++ sequence library")
  1358. (description
  1359. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1360. analysis, phylogenetics, molecular evolution and population genetics. This
  1361. library provides sequence-related modules.")
  1362. (license license:cecill-c))))
  1363. (define-public bppsuite
  1364. ;; The last release was in 2014 and the recommended way to install from source
  1365. ;; is to clone the git repository, so we do this.
  1366. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1367. (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
  1368. (package
  1369. (name "bppsuite")
  1370. (version (string-append "2.2.0-1." (string-take commit 7)))
  1371. (source (origin
  1372. (method git-fetch)
  1373. (uri (git-reference
  1374. (url "http://biopp.univ-montp2.fr/git/bppsuite")
  1375. (commit commit)))
  1376. (file-name (string-append name "-" version "-checkout"))
  1377. (sha256
  1378. (base32
  1379. "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
  1380. (build-system cmake-build-system)
  1381. (arguments
  1382. `(#:parallel-build? #f
  1383. #:tests? #f)) ; There are no tests.
  1384. (native-inputs
  1385. `(("groff" ,groff)
  1386. ("man-db" ,man-db)
  1387. ("texinfo" ,texinfo)))
  1388. (inputs
  1389. `(("bpp-core" ,bpp-core)
  1390. ("bpp-seq" ,bpp-seq)
  1391. ("bpp-phyl" ,bpp-phyl)
  1392. ("bpp-phyl" ,bpp-popgen)))
  1393. (home-page "http://biopp.univ-montp2.fr")
  1394. (synopsis "Bioinformatics tools written with the Bio++ libraries")
  1395. (description
  1396. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1397. analysis, phylogenetics, molecular evolution and population genetics. This
  1398. package provides command line tools using the Bio++ library.")
  1399. (license license:cecill-c))))
  1400. (define-public blast+
  1401. (package
  1402. (name "blast+")
  1403. (version "2.11.0")
  1404. (source (origin
  1405. (method url-fetch)
  1406. (uri (string-append
  1407. "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
  1408. version "/ncbi-blast-" version "+-src.tar.gz"))
  1409. (sha256
  1410. (base32
  1411. "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq"))
  1412. (modules '((guix build utils)))
  1413. (snippet
  1414. '(begin
  1415. ;; Remove bundled bzip2, zlib and pcre.
  1416. (delete-file-recursively "c++/src/util/compress/bzip2")
  1417. (delete-file-recursively "c++/src/util/compress/zlib")
  1418. (delete-file-recursively "c++/src/util/regexp")
  1419. (substitute* "c++/src/util/compress/Makefile.in"
  1420. (("bzip2 zlib api") "api"))
  1421. ;; Remove useless msbuild directory
  1422. (delete-file-recursively
  1423. "c++/src/build-system/project_tree_builder/msbuild")
  1424. #t))))
  1425. (build-system gnu-build-system)
  1426. (arguments
  1427. `(;; There are two(!) tests for this massive library, and both fail with
  1428. ;; "unparsable timing stats".
  1429. ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
  1430. ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
  1431. #:tests? #f
  1432. #:out-of-source? #t
  1433. #:parallel-build? #f ; not supported
  1434. #:phases
  1435. (modify-phases %standard-phases
  1436. (add-before 'configure 'set-HOME
  1437. ;; $HOME needs to be set at some point during the configure phase
  1438. (lambda _ (setenv "HOME" "/tmp") #t))
  1439. (add-after 'unpack 'enter-dir
  1440. (lambda _ (chdir "c++") #t))
  1441. (add-after 'enter-dir 'fix-build-system
  1442. (lambda _
  1443. (define (which* cmd)
  1444. (cond ((string=? cmd "date")
  1445. ;; make call to "date" deterministic
  1446. "date -d @0")
  1447. ((which cmd)
  1448. => identity)
  1449. (else
  1450. (format (current-error-port)
  1451. "WARNING: Unable to find absolute path for ~s~%"
  1452. cmd)
  1453. #f)))
  1454. ;; Rewrite hardcoded paths to various tools
  1455. (substitute* (append '("src/build-system/configure.ac"
  1456. "src/build-system/configure"
  1457. "src/build-system/helpers/run_with_lock.c"
  1458. "scripts/common/impl/if_diff.sh"
  1459. "scripts/common/impl/run_with_lock.sh"
  1460. "src/build-system/Makefile.configurables.real"
  1461. "src/build-system/Makefile.in.top"
  1462. "src/build-system/Makefile.meta.gmake=no"
  1463. "src/build-system/Makefile.meta.in"
  1464. "src/build-system/Makefile.meta_l"
  1465. "src/build-system/Makefile.meta_p"
  1466. "src/build-system/Makefile.meta_r"
  1467. "src/build-system/Makefile.mk.in"
  1468. "src/build-system/Makefile.requirements"
  1469. "src/build-system/Makefile.rules_with_autodep.in")
  1470. (find-files "scripts/common/check" "\\.sh$"))
  1471. (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
  1472. (or (which* cmd) all)))
  1473. (substitute* (find-files "src/build-system" "^config.*")
  1474. (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
  1475. (("^PATH=.*") ""))
  1476. ;; rewrite "/var/tmp" in check script
  1477. (substitute* "scripts/common/check/check_make_unix.sh"
  1478. (("/var/tmp") "/tmp"))
  1479. ;; do not reset PATH
  1480. (substitute* (find-files "scripts/common/impl/" "\\.sh$")
  1481. (("^ *PATH=.*") "")
  1482. (("action=/bin/") "action=")
  1483. (("export PATH") ":"))
  1484. #t))
  1485. (replace 'configure
  1486. (lambda* (#:key inputs outputs #:allow-other-keys)
  1487. (let ((out (assoc-ref outputs "out"))
  1488. (lib (string-append (assoc-ref outputs "lib") "/lib"))
  1489. (include (string-append (assoc-ref outputs "include")
  1490. "/include/ncbi-tools++")))
  1491. ;; The 'configure' script doesn't recognize things like
  1492. ;; '--enable-fast-install'.
  1493. (invoke "./configure.orig"
  1494. (string-append "--with-build-root=" (getcwd) "/build")
  1495. (string-append "--prefix=" out)
  1496. (string-append "--libdir=" lib)
  1497. (string-append "--includedir=" include)
  1498. (string-append "--with-bz2="
  1499. (assoc-ref inputs "bzip2"))
  1500. (string-append "--with-z="
  1501. (assoc-ref inputs "zlib"))
  1502. (string-append "--with-pcre="
  1503. (assoc-ref inputs "pcre"))
  1504. ;; Each library is built twice by default, once
  1505. ;; with "-static" in its name, and again
  1506. ;; without.
  1507. "--without-static"
  1508. "--with-dll")
  1509. #t))))))
  1510. (outputs '("out" ; 21 MB
  1511. "lib" ; 226 MB
  1512. "include")) ; 33 MB
  1513. (inputs
  1514. `(("bzip2" ,bzip2)
  1515. ("lmdb" ,lmdb)
  1516. ("zlib" ,zlib)
  1517. ("pcre" ,pcre)
  1518. ("perl" ,perl)
  1519. ("python" ,python-wrapper)))
  1520. (native-inputs
  1521. `(("cpio" ,cpio)))
  1522. (home-page "https://blast.ncbi.nlm.nih.gov")
  1523. (synopsis "Basic local alignment search tool")
  1524. (description
  1525. "BLAST is a popular method of performing a DNA or protein sequence
  1526. similarity search, using heuristics to produce results quickly. It also
  1527. calculates an “expect value” that estimates how many matches would have
  1528. occurred at a given score by chance, which can aid a user in judging how much
  1529. confidence to have in an alignment.")
  1530. ;; Most of the sources are in the public domain, with the following
  1531. ;; exceptions:
  1532. ;; * Expat:
  1533. ;; * ./c++/include/util/bitset/
  1534. ;; * ./c++/src/html/ncbi_menu*.js
  1535. ;; * Boost license:
  1536. ;; * ./c++/include/util/impl/floating_point_comparison.hpp
  1537. ;; * LGPL 2+:
  1538. ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
  1539. ;; * ASL 2.0:
  1540. ;; * ./c++/src/corelib/teamcity_*
  1541. (license (list license:public-domain
  1542. license:expat
  1543. license:boost1.0
  1544. license:lgpl2.0+
  1545. license:asl2.0))))
  1546. (define-public bless
  1547. (package
  1548. (name "bless")
  1549. (version "1p02")
  1550. (source (origin
  1551. (method url-fetch)
  1552. (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
  1553. version ".tgz"))
  1554. (sha256
  1555. (base32
  1556. "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
  1557. (modules '((guix build utils)))
  1558. (snippet
  1559. `(begin
  1560. ;; Remove bundled boost, pigz, zlib, and .git directory
  1561. ;; FIXME: also remove bundled sources for murmurhash3 and
  1562. ;; kmc once packaged.
  1563. (delete-file-recursively "boost")
  1564. (delete-file-recursively "pigz")
  1565. (delete-file-recursively "google-sparsehash")
  1566. (delete-file-recursively "zlib")
  1567. (delete-file-recursively ".git")
  1568. #t))))
  1569. (build-system gnu-build-system)
  1570. (arguments
  1571. '(#:tests? #f ;no "check" target
  1572. #:make-flags
  1573. (list (string-append "ZLIB="
  1574. (assoc-ref %build-inputs "zlib:static")
  1575. "/lib/libz.a")
  1576. (string-append "LDFLAGS="
  1577. (string-join '("-lboost_filesystem"
  1578. "-lboost_system"
  1579. "-lboost_iostreams"
  1580. "-lz"
  1581. "-fopenmp"))))
  1582. #:phases
  1583. (modify-phases %standard-phases
  1584. (add-after 'unpack 'do-not-build-bundled-pigz
  1585. (lambda* (#:key inputs outputs #:allow-other-keys)
  1586. (substitute* "Makefile"
  1587. (("cd pigz/pigz-2.3.3; make") ""))
  1588. #t))
  1589. (add-after 'unpack 'patch-paths-to-executables
  1590. (lambda* (#:key inputs outputs #:allow-other-keys)
  1591. (substitute* "parse_args.cpp"
  1592. (("kmc_binary = .*")
  1593. (string-append "kmc_binary = \""
  1594. (assoc-ref outputs "out")
  1595. "/bin/kmc\";"))
  1596. (("pigz_binary = .*")
  1597. (string-append "pigz_binary = \""
  1598. (assoc-ref inputs "pigz")
  1599. "/bin/pigz\";")))
  1600. #t))
  1601. (replace 'install
  1602. (lambda* (#:key outputs #:allow-other-keys)
  1603. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  1604. (for-each (lambda (file)
  1605. (install-file file bin))
  1606. '("bless" "kmc/bin/kmc"))
  1607. #t)))
  1608. (delete 'configure))))
  1609. (native-inputs
  1610. `(("perl" ,perl)))
  1611. (inputs
  1612. `(("openmpi" ,openmpi)
  1613. ("boost" ,boost)
  1614. ("sparsehash" ,sparsehash)
  1615. ("pigz" ,pigz)
  1616. ("zlib:static" ,zlib "static")
  1617. ("zlib" ,zlib)))
  1618. (supported-systems '("x86_64-linux"))
  1619. (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
  1620. (synopsis "Bloom-filter-based error correction tool for NGS reads")
  1621. (description
  1622. "@dfn{Bloom-filter-based error correction solution for high-throughput
  1623. sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
  1624. correction tool for genomic reads produced by @dfn{Next-generation
  1625. sequencing} (NGS). BLESS produces accurate correction results with much less
  1626. memory compared with previous solutions and is also able to tolerate a higher
  1627. false-positive rate. BLESS can extend reads like DNA assemblers to correct
  1628. errors at the end of reads.")
  1629. (license license:gpl3+)))
  1630. (define-public bowtie
  1631. (package
  1632. (name "bowtie")
  1633. (version "2.3.4.3")
  1634. (source (origin
  1635. (method git-fetch)
  1636. (uri (git-reference
  1637. (url "https://github.com/BenLangmead/bowtie2")
  1638. (commit (string-append "v" version))))
  1639. (file-name (git-file-name name version))
  1640. (sha256
  1641. (base32
  1642. "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
  1643. (modules '((guix build utils)))
  1644. (snippet
  1645. '(begin
  1646. (substitute* "Makefile"
  1647. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  1648. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  1649. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
  1650. #t))))
  1651. (build-system gnu-build-system)
  1652. (arguments
  1653. '(#:make-flags
  1654. (list "allall"
  1655. "WITH_TBB=1"
  1656. (string-append "prefix=" (assoc-ref %outputs "out")))
  1657. #:phases
  1658. (modify-phases %standard-phases
  1659. (delete 'configure)
  1660. (replace 'check
  1661. (lambda _
  1662. (invoke "perl"
  1663. "scripts/test/simple_tests.pl"
  1664. "--bowtie2=./bowtie2"
  1665. "--bowtie2-build=./bowtie2-build")
  1666. #t)))))
  1667. (inputs
  1668. `(("tbb" ,tbb)
  1669. ("zlib" ,zlib)
  1670. ("python" ,python-wrapper)))
  1671. (native-inputs
  1672. `(("perl" ,perl)
  1673. ("perl-clone" ,perl-clone)
  1674. ("perl-test-deep" ,perl-test-deep)
  1675. ("perl-test-simple" ,perl-test-simple)))
  1676. (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
  1677. (synopsis "Fast and sensitive nucleotide sequence read aligner")
  1678. (description
  1679. "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
  1680. reads to long reference sequences. It is particularly good at aligning reads
  1681. of about 50 up to 100s or 1,000s of characters, and particularly good at
  1682. aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
  1683. genome with an FM Index to keep its memory footprint small: for the human
  1684. genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
  1685. gapped, local, and paired-end alignment modes.")
  1686. (supported-systems '("x86_64-linux"))
  1687. (license license:gpl3+)))
  1688. (define-public bowtie1
  1689. (package
  1690. (name "bowtie1")
  1691. (version "1.3.0")
  1692. (source (origin
  1693. (method url-fetch)
  1694. (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
  1695. version "/bowtie-" version "-src.zip"))
  1696. (sha256
  1697. (base32
  1698. "11dbihdnrizc6qhx9xsw77w3q5ssx642alaqzvhxx32ak9glvq04"))
  1699. (modules '((guix build utils)))
  1700. (snippet
  1701. '(substitute* "Makefile"
  1702. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  1703. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  1704. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
  1705. (build-system gnu-build-system)
  1706. (arguments
  1707. '(#:tests? #f ; no "check" target
  1708. #:make-flags
  1709. (list "CC=gcc" "all"
  1710. (string-append "prefix=" (assoc-ref %outputs "out")))
  1711. #:phases
  1712. (modify-phases %standard-phases
  1713. (delete 'configure))))
  1714. (inputs
  1715. `(("python-wrapper" ,python-wrapper)
  1716. ("tbb" ,tbb)
  1717. ("zlib" ,zlib)))
  1718. (supported-systems '("x86_64-linux"))
  1719. (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
  1720. (synopsis "Fast aligner for short nucleotide sequence reads")
  1721. (description
  1722. "Bowtie is a fast, memory-efficient short read aligner. It aligns short
  1723. DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
  1724. reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
  1725. keep its memory footprint small: typically about 2.2 GB for the human
  1726. genome (2.9 GB for paired-end).")
  1727. (license license:artistic2.0)))
  1728. (define-public tophat
  1729. (package
  1730. (name "tophat")
  1731. (version "2.1.1")
  1732. (source (origin
  1733. (method url-fetch)
  1734. (uri (string-append
  1735. "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
  1736. version ".tar.gz"))
  1737. (sha256
  1738. (base32
  1739. "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
  1740. (modules '((guix build utils)))
  1741. (snippet
  1742. '(begin
  1743. ;; Remove bundled SeqAn and samtools
  1744. (delete-file-recursively "src/SeqAn-1.4.2")
  1745. (delete-file-recursively "src/samtools-0.1.18")
  1746. #t))))
  1747. (build-system gnu-build-system)
  1748. (arguments
  1749. '(#:parallel-build? #f ; not supported
  1750. #:phases
  1751. (modify-phases %standard-phases
  1752. (add-after 'set-paths 'hide-default-gcc
  1753. (lambda* (#:key inputs #:allow-other-keys)
  1754. (let ((gcc (assoc-ref inputs "gcc")))
  1755. ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
  1756. ;; conflicts with the GCC 5 input.
  1757. (setenv "CPLUS_INCLUDE_PATH"
  1758. (string-join
  1759. (delete (string-append gcc "/include/c++")
  1760. (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
  1761. ":"))
  1762. #t)))
  1763. (add-after 'unpack 'use-system-samtools
  1764. (lambda* (#:key inputs #:allow-other-keys)
  1765. (substitute* "src/Makefile.in"
  1766. (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
  1767. (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
  1768. (("SAMPROG = samtools_0\\.1\\.18") "")
  1769. (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
  1770. (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
  1771. (substitute* '("src/common.cpp"
  1772. "src/tophat.py")
  1773. (("samtools_0.1.18") (which "samtools")))
  1774. (substitute* '("src/common.h"
  1775. "src/bam2fastx.cpp")
  1776. (("#include \"bam.h\"") "#include <samtools/bam.h>")
  1777. (("#include \"sam.h\"") "#include <samtools/sam.h>"))
  1778. (substitute* '("src/bwt_map.h"
  1779. "src/map2gtf.h"
  1780. "src/align_status.h")
  1781. (("#include <bam.h>") "#include <samtools/bam.h>")
  1782. (("#include <sam.h>") "#include <samtools/sam.h>"))
  1783. #t)))))
  1784. (native-inputs
  1785. `(("gcc@5" ,gcc-5))) ;; doesn't build with later versions
  1786. (inputs
  1787. `(("boost" ,boost)
  1788. ("bowtie" ,bowtie)
  1789. ("ncurses" ,ncurses)
  1790. ("perl" ,perl)
  1791. ("python" ,python-2)
  1792. ("samtools" ,samtools-0.1)
  1793. ("seqan" ,seqan-1)
  1794. ("zlib" ,zlib)))
  1795. (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
  1796. (synopsis "Spliced read mapper for RNA-Seq data")
  1797. (description
  1798. "TopHat is a fast splice junction mapper for nucleotide sequence
  1799. reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
  1800. mammalian-sized genomes using the ultra high-throughput short read
  1801. aligner Bowtie, and then analyzes the mapping results to identify
  1802. splice junctions between exons.")
  1803. ;; TopHat is released under the Boost Software License, Version 1.0
  1804. ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
  1805. (license license:boost1.0)))
  1806. (define-public bwa
  1807. (package
  1808. (name "bwa")
  1809. (version "0.7.17")
  1810. (source (origin
  1811. (method url-fetch)
  1812. (uri (string-append
  1813. "https://github.com/lh3/bwa/releases/download/v"
  1814. version "/bwa-" version ".tar.bz2"))
  1815. (sha256
  1816. (base32
  1817. "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
  1818. (build-system gnu-build-system)
  1819. (arguments
  1820. '(#:tests? #f ;no "check" target
  1821. #:phases
  1822. (modify-phases %standard-phases
  1823. (replace 'install
  1824. (lambda* (#:key outputs #:allow-other-keys)
  1825. (let* ((out (assoc-ref outputs "out"))
  1826. (bin (string-append out "/bin"))
  1827. (lib (string-append out "/lib"))
  1828. (doc (string-append out "/share/doc/bwa"))
  1829. (man (string-append out "/share/man/man1")))
  1830. (install-file "bwa" bin)
  1831. (install-file "libbwa.a" lib)
  1832. (install-file "README.md" doc)
  1833. (install-file "bwa.1" man))
  1834. #t))
  1835. ;; no "configure" script
  1836. (delete 'configure))))
  1837. (inputs `(("zlib" ,zlib)))
  1838. ;; Non-portable SSE instructions are used so building fails on platforms
  1839. ;; other than x86_64.
  1840. (supported-systems '("x86_64-linux"))
  1841. (home-page "http://bio-bwa.sourceforge.net/")
  1842. (synopsis "Burrows-Wheeler sequence aligner")
  1843. (description
  1844. "BWA is a software package for mapping low-divergent sequences against a
  1845. large reference genome, such as the human genome. It consists of three
  1846. algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
  1847. designed for Illumina sequence reads up to 100bp, while the rest two for
  1848. longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
  1849. features such as long-read support and split alignment, but BWA-MEM, which is
  1850. the latest, is generally recommended for high-quality queries as it is faster
  1851. and more accurate. BWA-MEM also has better performance than BWA-backtrack for
  1852. 70-100bp Illumina reads.")
  1853. (license license:gpl3+)))
  1854. (define-public bwa-pssm
  1855. (package (inherit bwa)
  1856. (name "bwa-pssm")
  1857. (version "0.5.11")
  1858. (source (origin
  1859. (method git-fetch)
  1860. (uri (git-reference
  1861. (url "https://github.com/pkerpedjiev/bwa-pssm")
  1862. (commit version)))
  1863. (file-name (git-file-name name version))
  1864. (sha256
  1865. (base32
  1866. "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
  1867. (build-system gnu-build-system)
  1868. (inputs
  1869. `(("gdsl" ,gdsl)
  1870. ("zlib" ,zlib)
  1871. ("perl" ,perl)))
  1872. (home-page "http://bwa-pssm.binf.ku.dk/")
  1873. (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
  1874. (description
  1875. "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
  1876. the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
  1877. existing aligners it is fast and sensitive. Unlike most other aligners,
  1878. however, it is also adaptible in the sense that one can direct the alignment
  1879. based on known biases within the data set. It is coded as a modification of
  1880. the original BWA alignment program and shares the genome index structure as
  1881. well as many of the command line options.")
  1882. (license license:gpl3+)))
  1883. (define-public bwa-meth
  1884. (package
  1885. (name "bwa-meth")
  1886. (version "0.2.2")
  1887. (source (origin
  1888. (method git-fetch)
  1889. (uri (git-reference
  1890. (url "https://github.com/brentp/bwa-meth")
  1891. (commit (string-append "v" version))))
  1892. (file-name (git-file-name name version))
  1893. (sha256
  1894. (base32
  1895. "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
  1896. (build-system python-build-system)
  1897. (arguments
  1898. `(#:phases
  1899. (modify-phases %standard-phases
  1900. (add-after 'unpack 'keep-references-to-bwa
  1901. (lambda* (#:key inputs #:allow-other-keys)
  1902. (substitute* "bwameth.py"
  1903. (("bwa (mem|index)" _ command)
  1904. (string-append (which "bwa") " " command))
  1905. ;; There's an ill-advised check for "samtools" on PATH.
  1906. (("^checkX.*") ""))
  1907. #t)))))
  1908. (inputs
  1909. `(("bwa" ,bwa)))
  1910. (native-inputs
  1911. `(("python-toolshed" ,python-toolshed)))
  1912. (home-page "https://github.com/brentp/bwa-meth")
  1913. (synopsis "Fast and accurante alignment of BS-Seq reads")
  1914. (description
  1915. "BWA-Meth works for single-end reads and for paired-end reads from the
  1916. directional protocol (most common). It uses the method employed by
  1917. methylcoder and Bismark of in silico conversion of all C's to T's in both
  1918. reference and reads. It recovers the original read (needed to tabulate
  1919. methylation) by attaching it as a comment which BWA appends as a tag to the
  1920. read. It performs favorably to existing aligners gauged by number of on and
  1921. off-target reads for a capture method that targets CpG-rich region.")
  1922. (license license:expat)))
  1923. (define-public python-bx-python
  1924. (package
  1925. (name "python-bx-python")
  1926. (version "0.8.2")
  1927. (source (origin
  1928. (method url-fetch)
  1929. (uri (pypi-uri "bx-python" version))
  1930. (sha256
  1931. (base32
  1932. "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
  1933. (build-system python-build-system)
  1934. ;; Tests fail because test data are not included
  1935. (arguments '(#:tests? #f))
  1936. (propagated-inputs
  1937. `(("python-numpy" ,python-numpy)
  1938. ("python-six" ,python-six)))
  1939. (inputs
  1940. `(("zlib" ,zlib)))
  1941. (native-inputs
  1942. `(("python-lzo" ,python-lzo)
  1943. ("python-nose" ,python-nose)
  1944. ("python-cython" ,python-cython)))
  1945. (home-page "https://github.com/bxlab/bx-python")
  1946. (synopsis "Tools for manipulating biological data")
  1947. (description
  1948. "bx-python provides tools for manipulating biological data, particularly
  1949. multiple sequence alignments.")
  1950. (license license:expat)))
  1951. (define-public python2-bx-python
  1952. (package-with-python2 python-bx-python))
  1953. (define-public python-pyega3
  1954. (package
  1955. (name "python-pyega3")
  1956. (version "3.4.1")
  1957. (source (origin
  1958. (method url-fetch)
  1959. (uri (pypi-uri "pyega3" version))
  1960. (sha256
  1961. (base32
  1962. "1k736in8g27rarx65ym9xk50x53zjg75h37bb8ljynxv04rypx2q"))))
  1963. (build-system python-build-system)
  1964. (arguments
  1965. `(#:tests? #f)) ; The tests require network access.
  1966. (native-inputs
  1967. `(("python-psutil" ,python-psutil)
  1968. ("python-htsget" ,python-htsget)))
  1969. (propagated-inputs
  1970. `(("python-requests" ,python-requests)
  1971. ("python-tqdm" ,python-tqdm)
  1972. ("python-urllib3" ,python-urllib3)
  1973. ("python-responses" ,python-responses)))
  1974. (home-page "https://github.com/EGA-archive/ega-download-client")
  1975. (synopsis "Python client for EGA")
  1976. (description "This package is a python-based tool for viewing and
  1977. downloading files from authorized EGA datasets. It uses the EGA data API and
  1978. has several key features:
  1979. @itemize
  1980. @item Files are transferred over secure https connections and received
  1981. unencrypted, so no need for decryption after download.
  1982. @item Downloads resume from where they left off in the event that the
  1983. connection is interrupted.
  1984. @item Supports file segmenting and parallelized download of segments,
  1985. improving overall performance.
  1986. @item After download completes, file integrity is verified using checksums.
  1987. @item Implements the GA4GH-compliant htsget protocol for download of genomic
  1988. ranges for data files with accompanying index files.
  1989. @end itemize\n")
  1990. (license license:asl2.0)))
  1991. (define-public python-pysam
  1992. (package
  1993. (name "python-pysam")
  1994. (version "0.16.0.1")
  1995. (source (origin
  1996. (method git-fetch)
  1997. ;; Test data is missing on PyPi.
  1998. (uri (git-reference
  1999. (url "https://github.com/pysam-developers/pysam")
  2000. (commit (string-append "v" version))))
  2001. (file-name (git-file-name name version))
  2002. (sha256
  2003. (base32
  2004. "168bwwm8c2k22m7paip8q0yajyl7xdxgnik0bgjl7rhqg0majz0f"))
  2005. (modules '((guix build utils)))
  2006. (snippet '(begin
  2007. ;; Drop bundled htslib. TODO: Also remove samtools
  2008. ;; and bcftools.
  2009. (delete-file-recursively "htslib")))))
  2010. (build-system python-build-system)
  2011. (arguments
  2012. `(#:phases
  2013. (modify-phases %standard-phases
  2014. (add-before 'build 'set-flags
  2015. (lambda* (#:key inputs #:allow-other-keys)
  2016. (setenv "HTSLIB_MODE" "external")
  2017. (setenv "HTSLIB_LIBRARY_DIR"
  2018. (string-append (assoc-ref inputs "htslib") "/lib"))
  2019. (setenv "HTSLIB_INCLUDE_DIR"
  2020. (string-append (assoc-ref inputs "htslib") "/include"))
  2021. (setenv "LDFLAGS" "-lncurses")
  2022. (setenv "CFLAGS" "-D_CURSES_LIB=1")))
  2023. (replace 'check
  2024. (lambda* (#:key tests? #:allow-other-keys)
  2025. ;; FIXME: These tests fail with "AttributeError: 'array.array'
  2026. ;; object has no attribute 'tostring'".
  2027. (delete-file "tests/AlignmentFile_test.py")
  2028. (when tests?
  2029. ;; Step out of source dir so python does not import from CWD.
  2030. (with-directory-excursion "tests"
  2031. (setenv "HOME" "/tmp")
  2032. (invoke "make" "-C" "pysam_data")
  2033. (invoke "make" "-C" "cbcf_data")
  2034. (invoke "pytest" "-k"
  2035. (string-append
  2036. ;; requires network access.
  2037. "not FileHTTP"
  2038. ;; bug in test suite with samtools update
  2039. ;; https://github.com/pysam-developers/pysam/issues/961
  2040. " and not TestHeaderBAM"
  2041. " and not TestHeaderCRAM"
  2042. " and not test_text_processing")))))))))
  2043. (propagated-inputs
  2044. `(("htslib" ,htslib-1.10))) ; Included from installed header files.
  2045. (inputs
  2046. `(("ncurses" ,ncurses)
  2047. ("curl" ,curl)
  2048. ("zlib" ,zlib)))
  2049. (native-inputs
  2050. `(("python-cython" ,python-cython)
  2051. ("python-pytest" ,python-pytest)
  2052. ;; Dependencies below are are for tests only.
  2053. ("samtools" ,samtools-1.10)
  2054. ("bcftools" ,bcftools-1.10)
  2055. ("python-nose" ,python-nose)))
  2056. (home-page "https://github.com/pysam-developers/pysam")
  2057. (synopsis "Python bindings to the SAMtools C API")
  2058. (description
  2059. "Pysam is a Python module for reading and manipulating files in the
  2060. SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
  2061. also includes an interface for tabix.")
  2062. (license license:expat)))
  2063. (define-public python2-pysam
  2064. (package-with-python2 python-pysam))
  2065. (define-public python-twobitreader
  2066. (package
  2067. (name "python-twobitreader")
  2068. (version "3.1.6")
  2069. (source (origin
  2070. (method git-fetch)
  2071. (uri (git-reference
  2072. (url "https://github.com/benjschiller/twobitreader")
  2073. (commit version)))
  2074. (file-name (git-file-name name version))
  2075. (sha256
  2076. (base32
  2077. "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
  2078. (build-system python-build-system)
  2079. ;; Tests are not included
  2080. (arguments '(#:tests? #f))
  2081. (native-inputs
  2082. `(("python-sphinx" ,python-sphinx)))
  2083. (home-page "https://github.com/benjschiller/twobitreader")
  2084. (synopsis "Python library for reading .2bit files")
  2085. (description
  2086. "twobitreader is a Python library for reading .2bit files as used by the
  2087. UCSC genome browser.")
  2088. (license license:artistic2.0)))
  2089. (define-public python2-twobitreader
  2090. (package-with-python2 python-twobitreader))
  2091. (define-public python-plastid
  2092. (package
  2093. (name "python-plastid")
  2094. (version "0.4.8")
  2095. (source (origin
  2096. (method url-fetch)
  2097. (uri (pypi-uri "plastid" version))
  2098. (sha256
  2099. (base32
  2100. "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
  2101. (build-system python-build-system)
  2102. (arguments
  2103. ;; Some test files are not included.
  2104. `(#:tests? #f))
  2105. (propagated-inputs
  2106. `(("python-numpy" ,python-numpy)
  2107. ("python-scipy" ,python-scipy)
  2108. ("python-pandas" ,python-pandas)
  2109. ("python-pysam" ,python-pysam)
  2110. ("python-matplotlib" ,python-matplotlib)
  2111. ("python-biopython" ,python-biopython)
  2112. ("python-twobitreader" ,python-twobitreader)
  2113. ("python-termcolor" ,python-termcolor)))
  2114. (native-inputs
  2115. `(("python-cython" ,python-cython)
  2116. ("python-nose" ,python-nose)))
  2117. (home-page "https://github.com/joshuagryphon/plastid")
  2118. (synopsis "Python library for genomic analysis")
  2119. (description
  2120. "plastid is a Python library for genomic analysis – in particular,
  2121. high-throughput sequencing data – with an emphasis on simplicity.")
  2122. (license license:bsd-3)))
  2123. (define-public python2-plastid
  2124. (package-with-python2 python-plastid))
  2125. (define-public tetoolkit
  2126. (package
  2127. (name "tetoolkit")
  2128. (version "2.0.3")
  2129. (source (origin
  2130. (method git-fetch)
  2131. (uri (git-reference
  2132. (url "https://github.com/mhammell-laboratory/tetoolkit")
  2133. (commit version)))
  2134. (file-name (git-file-name name version))
  2135. (sha256
  2136. (base32
  2137. "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
  2138. (build-system python-build-system)
  2139. (arguments
  2140. `(#:python ,python-2 ; not guaranteed to work with Python 3
  2141. #:phases
  2142. (modify-phases %standard-phases
  2143. (add-after 'unpack 'make-writable
  2144. (lambda _
  2145. (for-each make-file-writable (find-files "."))
  2146. #t))
  2147. (add-after 'unpack 'patch-invocations
  2148. (lambda* (#:key inputs #:allow-other-keys)
  2149. (substitute* '("bin/TEtranscripts"
  2150. "bin/TEcount")
  2151. (("'sort ")
  2152. (string-append "'" (which "sort") " "))
  2153. (("'rm -f ")
  2154. (string-append "'" (which "rm") " -f "))
  2155. (("'Rscript'") (string-append "'" (which "Rscript") "'")))
  2156. (substitute* "TEToolkit/IO/ReadInputs.py"
  2157. (("BamToBED") (which "bamToBed")))
  2158. (substitute* "TEToolkit/Normalization.py"
  2159. (("\"Rscript\"")
  2160. (string-append "\"" (which "Rscript") "\"")))
  2161. #t))
  2162. (add-after 'install 'wrap-program
  2163. (lambda* (#:key outputs #:allow-other-keys)
  2164. ;; Make sure the executables find R packages.
  2165. (let ((out (assoc-ref outputs "out")))
  2166. (for-each
  2167. (lambda (script)
  2168. (wrap-program (string-append out "/bin/" script)
  2169. `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
  2170. '("TEtranscripts"
  2171. "TEcount")))
  2172. #t)))))
  2173. (inputs
  2174. `(("coreutils" ,coreutils)
  2175. ("bedtools" ,bedtools)
  2176. ("python-argparse" ,python2-argparse)
  2177. ("python-pysam" ,python2-pysam)
  2178. ("r-minimal" ,r-minimal)
  2179. ("r-deseq2" ,r-deseq2)))
  2180. (home-page "https://github.com/mhammell-laboratory/tetoolkit")
  2181. (synopsis "Transposable elements in differential enrichment analysis")
  2182. (description
  2183. "This is package for including transposable elements in differential
  2184. enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
  2185. RNA-seq (and similar data) and annotates reads to both genes and transposable
  2186. elements. TEtranscripts then performs differential analysis using DESeq2.
  2187. Note that TEtranscripts and TEcount rely on specially curated GTF files, which
  2188. are not included due to their size.")
  2189. (license license:gpl3+)))
  2190. (define-public cd-hit
  2191. (package
  2192. (name "cd-hit")
  2193. (version "4.6.8")
  2194. (source (origin
  2195. (method url-fetch)
  2196. (uri (string-append "https://github.com/weizhongli/cdhit"
  2197. "/releases/download/V" version
  2198. "/cd-hit-v" version
  2199. "-2017-0621-source.tar.gz"))
  2200. (sha256
  2201. (base32
  2202. "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
  2203. (build-system gnu-build-system)
  2204. (arguments
  2205. `(#:tests? #f ; there are no tests
  2206. #:make-flags
  2207. ;; Executables are copied directly to the PREFIX.
  2208. (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
  2209. ;; Support longer sequences (e.g. Pacbio sequences)
  2210. "MAX_SEQ=60000000")
  2211. #:phases
  2212. (modify-phases %standard-phases
  2213. ;; No "configure" script
  2214. (delete 'configure)
  2215. ;; Remove sources of non-determinism
  2216. (add-after 'unpack 'be-timeless
  2217. (lambda _
  2218. (substitute* "cdhit-utility.c++"
  2219. ((" \\(built on \" __DATE__ \"\\)") ""))
  2220. (substitute* "cdhit-common.c++"
  2221. (("__DATE__") "\"0\"")
  2222. (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
  2223. #t))
  2224. ;; The "install" target does not create the target directory.
  2225. (add-before 'install 'create-target-dir
  2226. (lambda* (#:key outputs #:allow-other-keys)
  2227. (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
  2228. #t)))))
  2229. (inputs
  2230. `(("perl" ,perl)))
  2231. (home-page "http://weizhongli-lab.org/cd-hit/")
  2232. (synopsis "Cluster and compare protein or nucleotide sequences")
  2233. (description
  2234. "CD-HIT is a program for clustering and comparing protein or nucleotide
  2235. sequences. CD-HIT is designed to be fast and handle extremely large
  2236. databases.")
  2237. ;; The manual says: "It can be copied under the GNU General Public License
  2238. ;; version 2 (GPLv2)."
  2239. (license license:gpl2)))
  2240. (define-public clipper
  2241. (package
  2242. (name "clipper")
  2243. (version "2.0")
  2244. (source (origin
  2245. (method git-fetch)
  2246. (uri (git-reference
  2247. (url "https://github.com/YeoLab/clipper")
  2248. (commit version)))
  2249. (file-name (git-file-name name version))
  2250. (sha256
  2251. (base32
  2252. "1bcag4lb5bkzsj2vg7lrq24aw6yfgq275ifrbhd82l7kqgbbjbkv"))))
  2253. (build-system python-build-system)
  2254. (arguments
  2255. `(#:phases
  2256. (modify-phases %standard-phases
  2257. (add-before 'reset-gzip-timestamps 'make-files-writable
  2258. (lambda* (#:key outputs #:allow-other-keys)
  2259. ;; Make sure .gz files are writable so that the
  2260. ;; 'reset-gzip-timestamps' phase can do its work.
  2261. (let ((out (assoc-ref outputs "out")))
  2262. (for-each make-file-writable
  2263. (find-files out "\\.gz$"))
  2264. #t))))))
  2265. (inputs
  2266. `(("htseq" ,htseq)
  2267. ("python-pybedtools" ,python-pybedtools)
  2268. ("python-cython" ,python-cython)
  2269. ("python-scikit-learn" ,python-scikit-learn)
  2270. ("python-matplotlib" ,python-matplotlib)
  2271. ("python-pandas" ,python-pandas)
  2272. ("python-pysam" ,python-pysam)
  2273. ("python-numpy" ,python-numpy)
  2274. ("python-scipy" ,python-scipy)))
  2275. (native-inputs
  2276. `(("python-setuptools-git" ,python-setuptools-git)
  2277. ("python-mock" ,python-mock) ; for tests
  2278. ("python-nose" ,python-nose) ; for tests
  2279. ("python-pytz" ,python-pytz))) ; for tests
  2280. (home-page "https://github.com/YeoLab/clipper")
  2281. (synopsis "CLIP peak enrichment recognition")
  2282. (description
  2283. "CLIPper is a tool to define peaks in CLIP-seq datasets.")
  2284. (license license:gpl2)))
  2285. (define-public codingquarry
  2286. (package
  2287. (name "codingquarry")
  2288. (version "2.0")
  2289. (source (origin
  2290. (method url-fetch)
  2291. (uri (string-append
  2292. "mirror://sourceforge/codingquarry/CodingQuarry_v"
  2293. version ".tar.gz"))
  2294. (sha256
  2295. (base32
  2296. "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
  2297. (build-system gnu-build-system)
  2298. (arguments
  2299. '(#:tests? #f ; no "check" target
  2300. #:phases
  2301. (modify-phases %standard-phases
  2302. (delete 'configure)
  2303. (replace 'install
  2304. (lambda* (#:key outputs #:allow-other-keys)
  2305. (let* ((out (assoc-ref outputs "out"))
  2306. (bin (string-append out "/bin"))
  2307. (doc (string-append out "/share/doc/codingquarry")))
  2308. (install-file "INSTRUCTIONS.pdf" doc)
  2309. (copy-recursively "QuarryFiles"
  2310. (string-append out "/QuarryFiles"))
  2311. (install-file "CodingQuarry" bin)
  2312. (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
  2313. #t)))))
  2314. (inputs `(("openmpi" ,openmpi)))
  2315. (native-search-paths
  2316. (list (search-path-specification
  2317. (variable "QUARRY_PATH")
  2318. (files '("QuarryFiles")))))
  2319. (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
  2320. (synopsis "Fungal gene predictor")
  2321. (description "CodingQuarry is a highly accurate, self-training GHMM fungal
  2322. gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
  2323. (home-page "https://sourceforge.net/projects/codingquarry/")
  2324. (license license:gpl3+)))
  2325. (define-public couger
  2326. (package
  2327. (name "couger")
  2328. (version "1.8.2")
  2329. (source (origin
  2330. (method url-fetch)
  2331. (uri (string-append
  2332. "http://couger.oit.duke.edu/static/assets/COUGER"
  2333. version ".zip"))
  2334. (sha256
  2335. (base32
  2336. "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
  2337. (build-system gnu-build-system)
  2338. (arguments
  2339. `(#:tests? #f
  2340. #:phases
  2341. (modify-phases %standard-phases
  2342. (delete 'configure)
  2343. (delete 'build)
  2344. (replace
  2345. 'install
  2346. (lambda* (#:key outputs #:allow-other-keys)
  2347. (let* ((out (assoc-ref outputs "out"))
  2348. (bin (string-append out "/bin")))
  2349. (copy-recursively "src" (string-append out "/src"))
  2350. (mkdir bin)
  2351. ;; Add "src" directory to module lookup path.
  2352. (substitute* "couger"
  2353. (("from argparse")
  2354. (string-append "import sys\nsys.path.append(\""
  2355. out "\")\nfrom argparse")))
  2356. (install-file "couger" bin))
  2357. #t))
  2358. (add-after
  2359. 'install 'wrap-program
  2360. (lambda* (#:key inputs outputs #:allow-other-keys)
  2361. ;; Make sure 'couger' runs with the correct PYTHONPATH.
  2362. (let* ((out (assoc-ref outputs "out"))
  2363. (path (getenv "GUIX_PYTHONPATH")))
  2364. (wrap-program (string-append out "/bin/couger")
  2365. `("GUIX_PYTHONPATH" ":" prefix (,path))))
  2366. #t)))))
  2367. (inputs
  2368. `(("python" ,python-2)
  2369. ("python2-pillow" ,python2-pillow)
  2370. ("python2-numpy" ,python2-numpy)
  2371. ("python2-scipy" ,python2-scipy)
  2372. ("python2-matplotlib" ,python2-matplotlib)))
  2373. (propagated-inputs
  2374. `(("r-minimal" ,r-minimal)
  2375. ("libsvm" ,libsvm)
  2376. ("randomjungle" ,randomjungle)))
  2377. (native-inputs
  2378. `(("unzip" ,unzip)))
  2379. (home-page "http://couger.oit.duke.edu")
  2380. (synopsis "Identify co-factors in sets of genomic regions")
  2381. (description
  2382. "COUGER can be applied to any two sets of genomic regions bound by
  2383. paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
  2384. putative co-factors that provide specificity to each TF. The framework
  2385. determines the genomic targets uniquely-bound by each TF, and identifies a
  2386. small set of co-factors that best explain the in vivo binding differences
  2387. between the two TFs.
  2388. COUGER uses classification algorithms (support vector machines and random
  2389. forests) with features that reflect the DNA binding specificities of putative
  2390. co-factors. The features are generated either from high-throughput TF-DNA
  2391. binding data (from protein binding microarray experiments), or from large
  2392. collections of DNA motifs.")
  2393. (license license:gpl3+)))
  2394. (define-public clustal-omega
  2395. (package
  2396. (name "clustal-omega")
  2397. (version "1.2.4")
  2398. (source (origin
  2399. (method url-fetch)
  2400. (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
  2401. version ".tar.gz"))
  2402. (sha256
  2403. (base32
  2404. "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
  2405. (build-system gnu-build-system)
  2406. (inputs
  2407. `(("argtable" ,argtable)))
  2408. (home-page "http://www.clustal.org/omega/")
  2409. (synopsis "Multiple sequence aligner for protein and DNA/RNA")
  2410. (description
  2411. "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
  2412. program for protein and DNA/RNA. It produces high quality MSAs and is capable
  2413. of handling data-sets of hundreds of thousands of sequences in reasonable
  2414. time.")
  2415. (license license:gpl2+)))
  2416. (define-public crossmap
  2417. (package
  2418. (name "crossmap")
  2419. (version "0.3.8")
  2420. (source (origin
  2421. (method url-fetch)
  2422. (uri (pypi-uri "CrossMap" version))
  2423. (sha256
  2424. (base32
  2425. "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
  2426. (build-system python-build-system)
  2427. (inputs
  2428. `(("python-bx-python" ,python-bx-python)
  2429. ("python-numpy" ,python-numpy)
  2430. ("python-pybigwig" ,python-pybigwig)
  2431. ("python-pysam" ,python-pysam)
  2432. ("zlib" ,zlib)))
  2433. (native-inputs
  2434. `(("python-cython" ,python-cython)
  2435. ("python-nose" ,python-nose)))
  2436. (home-page "http://crossmap.sourceforge.net/")
  2437. (synopsis "Convert genome coordinates between assemblies")
  2438. (description
  2439. "CrossMap is a program for conversion of genome coordinates or annotation
  2440. files between different genome assemblies. It supports most commonly used
  2441. file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
  2442. (license license:gpl2+)))
  2443. (define-public python-dnaio
  2444. (package
  2445. (name "python-dnaio")
  2446. (version "0.3")
  2447. (source
  2448. (origin
  2449. (method url-fetch)
  2450. (uri (pypi-uri "dnaio" version))
  2451. (sha256
  2452. (base32
  2453. "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
  2454. (build-system python-build-system)
  2455. (native-inputs
  2456. `(("python-cython" ,python-cython)
  2457. ("python-pytest" ,python-pytest)
  2458. ("python-xopen" ,python-xopen)))
  2459. (home-page "https://github.com/marcelm/dnaio/")
  2460. (synopsis "Read FASTA and FASTQ files efficiently")
  2461. (description
  2462. "dnaio is a Python library for fast parsing of FASTQ and also FASTA
  2463. files. The code was previously part of the cutadapt tool.")
  2464. (license license:expat)))
  2465. (define-public python-deeptoolsintervals
  2466. (package
  2467. (name "python-deeptoolsintervals")
  2468. (version "0.1.9")
  2469. (source (origin
  2470. (method url-fetch)
  2471. (uri (pypi-uri "deeptoolsintervals" version))
  2472. (sha256
  2473. (base32
  2474. "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
  2475. (build-system python-build-system)
  2476. (inputs
  2477. `(("zlib" ,zlib)))
  2478. (home-page "https://github.com/deeptools/deeptools_intervals")
  2479. (synopsis "Create GTF-based interval trees with associated meta-data")
  2480. (description
  2481. "This package provides a Python module creating/accessing GTF-based
  2482. interval trees with associated meta-data. It is primarily used by the
  2483. @code{deeptools} package.")
  2484. (license license:expat)))
  2485. (define-public python-deeptools
  2486. (package
  2487. (name "python-deeptools")
  2488. (version "3.4.3")
  2489. (source (origin
  2490. (method git-fetch)
  2491. (uri (git-reference
  2492. (url "https://github.com/deeptools/deepTools")
  2493. (commit version)))
  2494. (file-name (git-file-name name version))
  2495. (sha256
  2496. (base32
  2497. "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
  2498. (build-system python-build-system)
  2499. (native-inputs
  2500. `(("python-mock" ,python-mock)
  2501. ("python-nose" ,python-nose)))
  2502. (propagated-inputs
  2503. `(("python-matplotlib" ,python-matplotlib)
  2504. ("python-numpy" ,python-numpy)
  2505. ("python-numpydoc" ,python-numpydoc)
  2506. ("python-py2bit" ,python-py2bit)
  2507. ("python-pybigwig" ,python-pybigwig)
  2508. ("python-pysam" ,python-pysam)
  2509. ("python-scipy" ,python-scipy)
  2510. ("python-deeptoolsintervals" ,python-deeptoolsintervals)
  2511. ("python-plotly" ,python-plotly-2.4.1)))
  2512. (home-page "https://pypi.org/project/deepTools/")
  2513. (synopsis "Useful tools for exploring deep sequencing data")
  2514. (description "This package addresses the challenge of handling large amounts
  2515. of data that are now routinely generated from DNA sequencing centers.
  2516. @code{deepTools} contains useful modules to process the mapped reads data for
  2517. multiple quality checks, creating normalized coverage files in standard bedGraph
  2518. and bigWig file formats, that allow comparison between different files. Finally,
  2519. using such normalized and standardized files, deepTools can create many
  2520. publication-ready visualizations to identify enrichments and for functional
  2521. annotations of the genome.")
  2522. ;; The file deeptools/cm.py is licensed under the BSD license. The
  2523. ;; remainder of the code is licensed under the MIT license.
  2524. (license (list license:bsd-3 license:expat))))
  2525. (define-deprecated deeptools python-deeptools)
  2526. (define-public cutadapt
  2527. (package
  2528. (name "cutadapt")
  2529. (version "2.1")
  2530. (source (origin
  2531. (method url-fetch)
  2532. (uri (pypi-uri "cutadapt" version))
  2533. (sha256
  2534. (base32
  2535. "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
  2536. (build-system python-build-system)
  2537. (inputs
  2538. `(("python-dnaio" ,python-dnaio)
  2539. ("python-xopen" ,python-xopen)))
  2540. (native-inputs
  2541. `(("python-cython" ,python-cython)
  2542. ("python-pytest" ,python-pytest)
  2543. ("python-setuptools-scm" ,python-setuptools-scm)))
  2544. (home-page "https://cutadapt.readthedocs.io/en/stable/")
  2545. (synopsis "Remove adapter sequences from nucleotide sequencing reads")
  2546. (description
  2547. "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
  2548. other types of unwanted sequence from high-throughput sequencing reads.")
  2549. (license license:expat)))
  2550. (define-public libbigwig
  2551. (package
  2552. (name "libbigwig")
  2553. (version "0.4.4")
  2554. (source (origin
  2555. (method git-fetch)
  2556. (uri (git-reference
  2557. (url "https://github.com/dpryan79/libBigWig")
  2558. (commit version)))
  2559. (file-name (git-file-name name version))
  2560. (sha256
  2561. (base32
  2562. "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
  2563. (build-system gnu-build-system)
  2564. (arguments
  2565. `(#:test-target "test"
  2566. #:tests? #f ; tests require access to the web
  2567. #:make-flags
  2568. (list "CC=gcc"
  2569. (string-append "prefix=" (assoc-ref %outputs "out")))
  2570. #:phases
  2571. (modify-phases %standard-phases
  2572. (delete 'configure))))
  2573. (inputs
  2574. `(("zlib" ,zlib)
  2575. ("curl" ,curl)))
  2576. (native-inputs
  2577. `(("doxygen" ,doxygen)
  2578. ;; Need for tests
  2579. ("python" ,python-2)))
  2580. (home-page "https://github.com/dpryan79/libBigWig")
  2581. (synopsis "C library for handling bigWig files")
  2582. (description
  2583. "This package provides a C library for parsing local and remote BigWig
  2584. files.")
  2585. (license license:expat)))
  2586. (define-public python-pybigwig
  2587. (package
  2588. (name "python-pybigwig")
  2589. (version "0.3.17")
  2590. (source (origin
  2591. (method url-fetch)
  2592. (uri (pypi-uri "pyBigWig" version))
  2593. (sha256
  2594. (base32
  2595. "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
  2596. (modules '((guix build utils)))
  2597. (snippet
  2598. '(begin
  2599. ;; Delete bundled libBigWig sources
  2600. (delete-file-recursively "libBigWig")
  2601. #t))))
  2602. (build-system python-build-system)
  2603. (arguments
  2604. `(#:phases
  2605. (modify-phases %standard-phases
  2606. (add-after 'unpack 'link-with-libBigWig
  2607. (lambda* (#:key inputs #:allow-other-keys)
  2608. (substitute* "setup.py"
  2609. (("libs=\\[") "libs=[\"BigWig\", "))
  2610. #t)))))
  2611. (propagated-inputs
  2612. `(("python-numpy" ,python-numpy)))
  2613. (inputs
  2614. `(("libbigwig" ,libbigwig)
  2615. ("zlib" ,zlib)
  2616. ("curl" ,curl)))
  2617. (home-page "https://github.com/dpryan79/pyBigWig")
  2618. (synopsis "Access bigWig files in Python using libBigWig")
  2619. (description
  2620. "This package provides Python bindings to the libBigWig library for
  2621. accessing bigWig files.")
  2622. (license license:expat)))
  2623. (define-public python2-pybigwig
  2624. (package-with-python2 python-pybigwig))
  2625. (define-public python-schema-salad
  2626. (package
  2627. (name "python-schema-salad")
  2628. (version "7.1.20210316164414")
  2629. (source
  2630. (origin
  2631. (method url-fetch)
  2632. (uri (pypi-uri "schema-salad" version))
  2633. (sha256
  2634. (base32
  2635. "04jaykdpgfnkrghvli5swxzqp7yba842am4bz42hcfljsmkrxvrk"))))
  2636. (build-system python-build-system)
  2637. (arguments
  2638. `(#:phases
  2639. (modify-phases %standard-phases
  2640. (add-before 'check 'skip-failing-tests
  2641. (lambda _
  2642. ;; Skip tests that require network access.
  2643. (substitute* "schema_salad/tests/test_cwl11.py"
  2644. (("^def test_(secondaryFiles|outputBinding)" all)
  2645. (string-append "@pytest.mark.skip(reason="
  2646. "\"test requires network access\")\n"
  2647. all)))
  2648. #t)))))
  2649. (propagated-inputs
  2650. `(("python-cachecontrol" ,python-cachecontrol-0.11)
  2651. ("python-lockfile" ,python-lockfile)
  2652. ("python-mistune" ,python-mistune)
  2653. ("python-rdflib" ,python-rdflib)
  2654. ("python-rdflib-jsonld" ,python-rdflib-jsonld)
  2655. ("python-requests" ,python-requests)
  2656. ("python-ruamel.yaml" ,python-ruamel.yaml)
  2657. ("python-typing-extensions" ,python-typing-extensions)))
  2658. (native-inputs
  2659. `(("python-pytest" ,python-pytest)
  2660. ("python-pytest-runner" ,python-pytest-runner)))
  2661. (home-page "https://github.com/common-workflow-language/schema_salad")
  2662. (synopsis "Schema Annotations for Linked Avro Data (SALAD)")
  2663. (description
  2664. "Salad is a schema language for describing JSON or YAML structured linked
  2665. data documents. Salad schema describes rules for preprocessing, structural
  2666. validation, and hyperlink checking for documents described by a Salad schema.
  2667. Salad supports rich data modeling with inheritance, template specialization,
  2668. object identifiers, object references, documentation generation, code
  2669. generation, and transformation to RDF. Salad provides a bridge between document
  2670. and record oriented data modeling and the Semantic Web.")
  2671. (license license:asl2.0)))
  2672. (define-public cwltool
  2673. (package
  2674. (name "cwltool")
  2675. (version "3.0.20210319143721")
  2676. (source (origin
  2677. (method git-fetch)
  2678. (uri (git-reference
  2679. (url "https://github.com/common-workflow-language/cwltool")
  2680. (commit version)))
  2681. (file-name (git-file-name name version))
  2682. (sha256
  2683. (base32
  2684. "1sgs9ckyxb9f9169mc3wm9lnjg4080ai42xqsrwpw9l8apy4c9m5"))))
  2685. (build-system python-build-system)
  2686. (arguments
  2687. `(#:phases
  2688. (modify-phases %standard-phases
  2689. (add-after 'unpack 'loosen-version-restrictions
  2690. (lambda _
  2691. (substitute* "setup.py"
  2692. (("== 1.5.1") ">=1.5.1") ; prov
  2693. ((", < 3.5") "") ; shellescape
  2694. ((" >= 6.0.2, < 6.2") "")) ; pytest
  2695. #t))
  2696. (add-after 'unpack 'dont-use-git
  2697. (lambda _
  2698. (substitute* "gittaggers.py"
  2699. (("self.git_timestamp_tag\\(\\)")
  2700. (string-append "time.strftime('.%Y%m%d%H%M%S', time.gmtime(int("
  2701. (string-drop ,version 4) ")))")))
  2702. #t))
  2703. (add-after 'unpack 'modify-tests
  2704. (lambda _
  2705. ;; Tries to connect to the internet.
  2706. (delete-file "tests/test_content_type.py")
  2707. (delete-file "tests/test_udocker.py")
  2708. (delete-file "tests/test_http_input.py")
  2709. (substitute* "tests/test_load_tool.py"
  2710. (("def test_load_graph_fragment_from_packed")
  2711. (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
  2712. "def test_load_graph_fragment_from_packed")))
  2713. (substitute* "tests/test_examples.py"
  2714. (("def test_env_filtering")
  2715. (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
  2716. "def test_env_filtering")))
  2717. ;; Tries to use cwl-runners.
  2718. (substitute* "tests/test_examples.py"
  2719. (("def test_v1_0_arg_empty_prefix_separate_false")
  2720. (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
  2721. "def test_v1_0_arg_empty_prefix_separate_false")))
  2722. #t)))))
  2723. (propagated-inputs
  2724. `(("python-argcomplete" ,python-argcomplete)
  2725. ("python-bagit" ,python-bagit)
  2726. ("python-coloredlogs" ,python-coloredlogs)
  2727. ("python-mypy-extensions" ,python-mypy-extensions)
  2728. ("python-prov" ,python-prov)
  2729. ("python-pydot" ,python-pydot)
  2730. ("python-psutil" ,python-psutil)
  2731. ("python-rdflib" ,python-rdflib)
  2732. ("python-requests" ,python-requests)
  2733. ("python-ruamel.yaml" ,python-ruamel.yaml)
  2734. ("python-schema-salad" ,python-schema-salad)
  2735. ("python-shellescape" ,python-shellescape)
  2736. ("python-typing-extensions" ,python-typing-extensions)
  2737. ;; Not listed as needed but still necessary:
  2738. ("node" ,node)))
  2739. (native-inputs
  2740. `(("python-arcp" ,python-arcp)
  2741. ("python-humanfriendly" ,python-humanfriendly)
  2742. ("python-mock" ,python-mock)
  2743. ("python-pytest" ,python-pytest)
  2744. ("python-pytest-cov" ,python-pytest-cov)
  2745. ("python-pytest-mock" ,python-pytest-mock)
  2746. ("python-pytest-runner" ,python-pytest-runner)
  2747. ("python-rdflib-jsonld" ,python-rdflib-jsonld)))
  2748. (home-page
  2749. "https://github.com/common-workflow-language/common-workflow-language")
  2750. (synopsis "Common Workflow Language reference implementation")
  2751. (description
  2752. "This is the reference implementation of the @acronym{CWL, Common Workflow
  2753. Language} standards. The CWL open standards are for describing analysis
  2754. workflows and tools in a way that makes them portable and scalable across a
  2755. variety of software and hardware environments, from workstations to cluster,
  2756. cloud, and high performance computing (HPC) environments. CWL is designed to
  2757. meet the needs of data-intensive science, such as Bioinformatics, Medical
  2758. Imaging, Astronomy, Physics, and Chemistry. The @acronym{cwltool, CWL reference
  2759. implementation} is intended to be feature complete and to provide comprehensive
  2760. validation of CWL files as well as provide other tools related to working with
  2761. CWL descriptions.")
  2762. (license license:asl2.0)))
  2763. (define-public python-dendropy
  2764. (package
  2765. (name "python-dendropy")
  2766. (version "4.4.0")
  2767. (source
  2768. (origin
  2769. (method git-fetch)
  2770. ;; Source from GitHub so that tests are included.
  2771. (uri (git-reference
  2772. (url "https://github.com/jeetsukumaran/DendroPy")
  2773. (commit (string-append "v" version))))
  2774. (file-name (git-file-name name version))
  2775. (sha256
  2776. (base32
  2777. "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
  2778. (build-system python-build-system)
  2779. (home-page "https://dendropy.org/")
  2780. (synopsis "Library for phylogenetics and phylogenetic computing")
  2781. (description
  2782. "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
  2783. writing, simulation, processing and manipulation of phylogenetic
  2784. trees (phylogenies) and characters.")
  2785. (license license:bsd-3)))
  2786. (define-public python2-dendropy
  2787. (let ((base (package-with-python2 python-dendropy)))
  2788. (package/inherit base
  2789. (arguments
  2790. `(#:phases
  2791. (modify-phases %standard-phases
  2792. (add-after 'unpack 'remove-failing-test
  2793. (lambda _
  2794. ;; This test fails when the full test suite is run, as documented
  2795. ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
  2796. (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
  2797. (("test_collection_comments_and_annotations")
  2798. "do_not_test_collection_comments_and_annotations"))
  2799. #t)))
  2800. ,@(package-arguments base))))))
  2801. (define-public python-py2bit
  2802. (package
  2803. (name "python-py2bit")
  2804. (version "0.3.0")
  2805. (source
  2806. (origin
  2807. (method url-fetch)
  2808. (uri (pypi-uri "py2bit" version))
  2809. (sha256
  2810. (base32
  2811. "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
  2812. (build-system python-build-system)
  2813. (home-page "https://github.com/dpryan79/py2bit")
  2814. (synopsis "Access 2bit files using lib2bit")
  2815. (description
  2816. "This package provides Python bindings for lib2bit to access 2bit files
  2817. with Python.")
  2818. (license license:expat)))
  2819. (define-public delly
  2820. (package
  2821. (name "delly")
  2822. (version "0.8.3")
  2823. (source (origin
  2824. (method git-fetch)
  2825. (uri (git-reference
  2826. (url "https://github.com/dellytools/delly")
  2827. (commit (string-append "v" version))))
  2828. (file-name (git-file-name name version))
  2829. (sha256
  2830. (base32 "1ibnplgfzj96w8glkx17v7sld3pm402fr5ybmf3h0rlcryabxrqy"))
  2831. (modules '((guix build utils)))
  2832. (snippet
  2833. '(begin
  2834. (delete-file-recursively "src/htslib")
  2835. #t))))
  2836. (build-system gnu-build-system)
  2837. (arguments
  2838. `(#:tests? #f ; There are no tests to run.
  2839. #:make-flags
  2840. (list "PARALLEL=1" ; Allow parallel execution at run-time.
  2841. (string-append "prefix=" (assoc-ref %outputs "out")))
  2842. #:phases
  2843. (modify-phases %standard-phases
  2844. (delete 'configure) ; There is no configure phase.
  2845. (add-after 'install 'install-templates
  2846. (lambda* (#:key outputs #:allow-other-keys)
  2847. (let ((templates (string-append (assoc-ref outputs "out")
  2848. "/share/delly/templates")))
  2849. (mkdir-p templates)
  2850. (copy-recursively "excludeTemplates" templates)
  2851. #t))))))
  2852. (inputs
  2853. `(("boost" ,boost)
  2854. ("bzip2" ,bzip2)
  2855. ("htslib" ,htslib)
  2856. ("zlib" ,zlib)))
  2857. (home-page "https://github.com/dellytools/delly")
  2858. (synopsis "Integrated structural variant prediction method")
  2859. (description "Delly is an integrated structural variant prediction method
  2860. that can discover and genotype deletions, tandem duplications, inversions and
  2861. translocations at single-nucleotide resolution in short-read massively parallel
  2862. sequencing data. It uses paired-ends and split-reads to sensitively and
  2863. accurately delineate genomic rearrangements throughout the genome.")
  2864. (license license:gpl3+)))
  2865. (define-public trf
  2866. (package
  2867. (name "trf")
  2868. (version "4.09.1")
  2869. (source (origin
  2870. (method git-fetch)
  2871. (uri (git-reference
  2872. (url "https://github.com/Benson-Genomics-Lab/TRF")
  2873. (commit (string-append "v" version))))
  2874. (file-name (git-file-name name version))
  2875. (sha256
  2876. (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
  2877. (build-system gnu-build-system)
  2878. (home-page "https://github.com/Benson-Genomics-Lab/TRF")
  2879. (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
  2880. (description "A tandem repeat in DNA is two or more adjacent, approximate
  2881. copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
  2882. locate and display tandem repeats in DNA sequences. In order to use the
  2883. program, the user submits a sequence in FASTA format. The output consists of
  2884. two files: a repeat table file and an alignment file. Submitted sequences may
  2885. be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
  2886. bases are detected.")
  2887. (license license:agpl3+)))
  2888. (define-public repeat-masker
  2889. (package
  2890. (name "repeat-masker")
  2891. (version "4.1.1")
  2892. (source (origin
  2893. (method url-fetch)
  2894. (uri (string-append "http://www.repeatmasker.org/"
  2895. "RepeatMasker/RepeatMasker-"
  2896. version ".tar.gz"))
  2897. (sha256
  2898. (base32 "03144sl9kh5ni2i33phi7x2pjndzbm5bjw3r4kqvmm6hxyb4k4x2"))))
  2899. (build-system gnu-build-system)
  2900. (arguments
  2901. `(#:tests? #false ; there are none
  2902. #:phases
  2903. (modify-phases %standard-phases
  2904. (delete 'configure)
  2905. (replace 'build
  2906. (lambda* (#:key inputs outputs #:allow-other-keys)
  2907. (let ((share (string-append (assoc-ref outputs "out")
  2908. "/share/RepeatMasker")))
  2909. (mkdir-p share)
  2910. (copy-recursively "." share)
  2911. (with-directory-excursion share
  2912. (invoke "perl" "configure"
  2913. "--trf_prgm" (which "trf")
  2914. "--hmmer_dir"
  2915. (string-append (assoc-ref inputs "hmmer")
  2916. "/bin"))))))
  2917. (replace 'install
  2918. (lambda* (#:key outputs #:allow-other-keys)
  2919. (let* ((out (assoc-ref outputs "out"))
  2920. (share (string-append out "/share/RepeatMasker"))
  2921. (bin (string-append out "/bin"))
  2922. (path (getenv "PERL5LIB")))
  2923. (install-file (string-append share "/RepeatMasker") bin)
  2924. (wrap-program (string-append bin "/RepeatMasker")
  2925. `("PERL5LIB" ":" prefix (,path ,share)))))))))
  2926. (inputs
  2927. `(("perl" ,perl)
  2928. ("perl-text-soundex" ,perl-text-soundex)
  2929. ("python" ,python)
  2930. ("python-h5py" ,python-h5py)
  2931. ("hmmer" ,hmmer)
  2932. ("trf" ,trf)))
  2933. (home-page "https://github.com/Benson-Genomics-Lab/TRF")
  2934. (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
  2935. (description "A tandem repeat in DNA is two or more adjacent, approximate
  2936. copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
  2937. locate and display tandem repeats in DNA sequences. In order to use the
  2938. program, the user submits a sequence in FASTA format. The output consists of
  2939. two files: a repeat table file and an alignment file. Submitted sequences may
  2940. be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
  2941. bases are detected.")
  2942. (license license:osl2.1)))
  2943. (define-public diamond
  2944. (package
  2945. (name "diamond")
  2946. (version "0.9.30")
  2947. (source (origin
  2948. (method git-fetch)
  2949. (uri (git-reference
  2950. (url "https://github.com/bbuchfink/diamond")
  2951. (commit (string-append "v" version))))
  2952. (file-name (git-file-name name version))
  2953. (sha256
  2954. (base32
  2955. "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
  2956. (build-system cmake-build-system)
  2957. (arguments
  2958. '(#:tests? #f ; no "check" target
  2959. #:phases
  2960. (modify-phases %standard-phases
  2961. (add-after 'unpack 'remove-native-compilation
  2962. (lambda _
  2963. (substitute* "CMakeLists.txt" (("-march=native") ""))
  2964. #t)))))
  2965. (inputs
  2966. `(("zlib" ,zlib)))
  2967. (home-page "https://github.com/bbuchfink/diamond")
  2968. (synopsis "Accelerated BLAST compatible local sequence aligner")
  2969. (description
  2970. "DIAMOND is a BLAST-compatible local aligner for mapping protein and
  2971. translated DNA query sequences against a protein reference database (BLASTP
  2972. and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
  2973. reads at a typical sensitivity of 90-99% relative to BLAST depending on the
  2974. data and settings.")
  2975. (license license:agpl3+)))
  2976. (define-public discrover
  2977. (package
  2978. (name "discrover")
  2979. (version "1.6.0")
  2980. (source
  2981. (origin
  2982. (method git-fetch)
  2983. (uri (git-reference
  2984. (url "https://github.com/maaskola/discrover")
  2985. (commit version)))
  2986. (file-name (git-file-name name version))
  2987. (sha256
  2988. (base32
  2989. "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
  2990. (build-system cmake-build-system)
  2991. (arguments
  2992. `(#:tests? #f ; there are no tests
  2993. #:phases
  2994. (modify-phases %standard-phases
  2995. (add-after 'unpack 'fix-latex-errors
  2996. (lambda _
  2997. (with-fluids ((%default-port-encoding #f))
  2998. (substitute* "doc/references.bib"
  2999. (("\\{S\\}illanp[^,]+,")
  3000. "{S}illanp{\\\"a}{\\\"a},")))
  3001. ;; XXX: I just can't get pdflatex to not complain about these
  3002. ;; characters. They end up in the manual via the generated
  3003. ;; discrover-cli-help.txt.
  3004. (substitute* "src/hmm/cli.cpp"
  3005. (("µ") "mu")
  3006. (("η") "eta")
  3007. (("≤") "<="))
  3008. ;; This seems to be a syntax error.
  3009. (substitute* "doc/discrover-manual.tex"
  3010. (("theverbbox\\[t\\]") "theverbbox"))
  3011. #t))
  3012. (add-after 'unpack 'add-missing-includes
  3013. (lambda _
  3014. (substitute* "src/executioninformation.hpp"
  3015. (("#define EXECUTIONINFORMATION_HPP" line)
  3016. (string-append line "\n#include <random>")))
  3017. (substitute* "src/plasma/fasta.hpp"
  3018. (("#define FASTA_HPP" line)
  3019. (string-append line "\n#include <random>")))
  3020. #t)))))
  3021. (inputs
  3022. `(("boost" ,boost)
  3023. ("cairo" ,cairo)
  3024. ("rmath-standalone" ,rmath-standalone)))
  3025. (native-inputs
  3026. `(("texlive" ,(texlive-updmap.cfg (list texlive-cm
  3027. texlive-amsfonts
  3028. texlive-doi
  3029. texlive-latex-examplep
  3030. texlive-hyperref
  3031. texlive-latex-ms
  3032. texlive-latex-natbib
  3033. texlive-bibtex ; style files used by natbib
  3034. texlive-latex-pgf ; tikz
  3035. texlive-latex-verbatimbox)))
  3036. ("imagemagick" ,imagemagick)))
  3037. (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
  3038. (synopsis "Discover discriminative nucleotide sequence motifs")
  3039. (description "Discrover is a motif discovery method to find binding sites
  3040. of nucleic acid binding proteins.")
  3041. (license license:gpl3+)))
  3042. (define-public eigensoft
  3043. (package
  3044. (name "eigensoft")
  3045. (version "7.2.1")
  3046. (source
  3047. (origin
  3048. (method git-fetch)
  3049. (uri (git-reference
  3050. (url "https://github.com/DReichLab/EIG")
  3051. (commit (string-append "v" version))))
  3052. (file-name (git-file-name name version))
  3053. (sha256
  3054. (base32
  3055. "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
  3056. (modules '((guix build utils)))
  3057. ;; Remove pre-built binaries.
  3058. (snippet '(begin
  3059. (delete-file-recursively "bin")
  3060. (mkdir "bin")
  3061. #t))))
  3062. (build-system gnu-build-system)
  3063. (arguments
  3064. `(#:tests? #f ; There are no tests.
  3065. #:make-flags '("CC=gcc")
  3066. #:phases
  3067. (modify-phases %standard-phases
  3068. ;; There is no configure phase, but the Makefile is in a
  3069. ;; sub-directory.
  3070. (replace 'configure
  3071. (lambda _ (chdir "src") #t))
  3072. ;; The provided install target only copies executables to
  3073. ;; the "bin" directory in the build root.
  3074. (add-after 'install 'actually-install
  3075. (lambda* (#:key outputs #:allow-other-keys)
  3076. (let* ((out (assoc-ref outputs "out"))
  3077. (bin (string-append out "/bin")))
  3078. (for-each (lambda (file)
  3079. (install-file file bin))
  3080. (find-files "../bin" ".*"))
  3081. #t))))))
  3082. (inputs
  3083. `(("gsl" ,gsl)
  3084. ("lapack" ,lapack)
  3085. ("openblas" ,openblas)
  3086. ("perl" ,perl)
  3087. ("gfortran" ,gfortran "lib")))
  3088. (home-page "https://github.com/DReichLab/EIG")
  3089. (synopsis "Tools for population genetics")
  3090. (description "The EIGENSOFT package provides tools for population
  3091. genetics and stratification correction. EIGENSOFT implements methods commonly
  3092. used in population genetics analyses such as PCA, computation of Tracy-Widom
  3093. statistics, and finding related individuals in structured populations. It
  3094. comes with a built-in plotting script and supports multiple file formats and
  3095. quantitative phenotypes.")
  3096. ;; The license of the eigensoft tools is Expat, but since it's
  3097. ;; linking with the GNU Scientific Library (GSL) the effective
  3098. ;; license is the GPL.
  3099. (license license:gpl3+)))
  3100. (define-public edirect
  3101. (package
  3102. (name "edirect")
  3103. (version "13.3.20200128")
  3104. (source (origin
  3105. (method url-fetch)
  3106. (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
  3107. "/versions/" version
  3108. "/edirect-" version ".tar.gz"))
  3109. (sha256
  3110. (base32
  3111. "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
  3112. (modules '((guix build utils)))
  3113. (snippet
  3114. '(begin (delete-file "Mozilla-CA.tar.gz")
  3115. (substitute* "rchive.go"
  3116. ;; This go library does not have any license.
  3117. (("github.com/fiam/gounidecode/unidecode")
  3118. "golang.org/rainycape/unidecode"))
  3119. #t))))
  3120. (build-system perl-build-system)
  3121. (arguments
  3122. `(#:phases
  3123. (modify-phases %standard-phases
  3124. (delete 'configure)
  3125. (delete 'build)
  3126. (delete 'check) ; simple check after install
  3127. (add-after 'unpack 'patch-programs
  3128. (lambda* (#:key inputs #:allow-other-keys)
  3129. ;; Ignore errors about missing xtract.Linux and rchive.Linux.
  3130. (substitute* "pm-refresh"
  3131. (("cat \\\"\\$target")
  3132. "grep ^[[:digit:]] \"$target"))
  3133. #t))
  3134. (replace 'install
  3135. (lambda* (#:key inputs outputs #:allow-other-keys)
  3136. (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
  3137. (edirect-go (assoc-ref inputs "edirect-go-programs")))
  3138. (for-each
  3139. (lambda (file)
  3140. (install-file file bin))
  3141. '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
  3142. "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
  3143. "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
  3144. "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
  3145. "pm-index" "pm-invert" "pm-merge" "pm-promote"))
  3146. (symlink (string-append edirect-go "/bin/xtract.Linux")
  3147. (string-append bin "/xtract"))
  3148. (symlink (string-append edirect-go "/bin/rchive.Linux")
  3149. (string-append bin "/rchive")))
  3150. #t))
  3151. (add-after 'install 'wrap-program
  3152. (lambda* (#:key outputs #:allow-other-keys)
  3153. ;; Make sure everything can run in a pure environment.
  3154. (let ((out (assoc-ref outputs "out"))
  3155. (path (getenv "PERL5LIB")))
  3156. (for-each
  3157. (lambda (file)
  3158. (wrap-program file
  3159. `("PERL5LIB" ":" prefix (,path)))
  3160. (wrap-program file
  3161. `("PATH" ":" prefix (,(string-append out "/bin")
  3162. ,(dirname (which "sed"))
  3163. ,(dirname (which "gzip"))
  3164. ,(dirname (which "grep"))
  3165. ,(dirname (which "perl"))
  3166. ,(dirname (which "uname"))))))
  3167. (find-files out ".")))
  3168. #t))
  3169. (add-after 'wrap-program 'check
  3170. (lambda* (#:key outputs #:allow-other-keys)
  3171. (invoke (string-append (assoc-ref outputs "out")
  3172. "/bin/edirect.pl")
  3173. "-filter" "-help")
  3174. #t)))))
  3175. (inputs
  3176. `(("edirect-go-programs" ,edirect-go-programs)
  3177. ("perl-html-parser" ,perl-html-parser)
  3178. ("perl-encode-locale" ,perl-encode-locale)
  3179. ("perl-file-listing" ,perl-file-listing)
  3180. ("perl-html-tagset" ,perl-html-tagset)
  3181. ("perl-html-tree" ,perl-html-tree)
  3182. ("perl-http-cookies" ,perl-http-cookies)
  3183. ("perl-http-date" ,perl-http-date)
  3184. ("perl-http-message" ,perl-http-message)
  3185. ("perl-http-negotiate" ,perl-http-negotiate)
  3186. ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
  3187. ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
  3188. ("perl-net-http" ,perl-net-http)
  3189. ("perl-uri" ,perl-uri)
  3190. ("perl-www-robotrules" ,perl-www-robotrules)
  3191. ("perl-xml-simple" ,perl-xml-simple)
  3192. ("perl" ,perl)))
  3193. (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
  3194. (synopsis "Tools for accessing the NCBI's set of databases")
  3195. (description
  3196. "Entrez Direct (EDirect) is a method for accessing the National Center
  3197. for Biotechnology Information's (NCBI) set of interconnected
  3198. databases (publication, sequence, structure, gene, variation, expression,
  3199. etc.) from a terminal. Functions take search terms from command-line
  3200. arguments. Individual operations are combined to build multi-step queries.
  3201. Record retrieval and formatting normally complete the process.
  3202. EDirect also provides an argument-driven function that simplifies the
  3203. extraction of data from document summaries or other results that are returned
  3204. in structured XML format. This can eliminate the need for writing custom
  3205. software to answer ad hoc questions.")
  3206. (native-search-paths
  3207. ;; Ideally this should be set for LWP somewhere.
  3208. (list (search-path-specification
  3209. (variable "PERL_LWP_SSL_CA_FILE")
  3210. (file-type 'regular)
  3211. (separator #f)
  3212. (files '("/etc/ssl/certs/ca-certificates.crt")))))
  3213. (license license:public-domain)))
  3214. (define-public edirect-go-programs
  3215. (package
  3216. (inherit edirect)
  3217. (name "edirect-go-programs")
  3218. (build-system go-build-system)
  3219. (arguments
  3220. `(#:install-source? #f
  3221. #:tests? #f ; No tests.
  3222. #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
  3223. #:phases
  3224. (modify-phases %standard-phases
  3225. (replace 'build
  3226. (lambda* (#:key import-path #:allow-other-keys)
  3227. (with-directory-excursion (string-append "src/" import-path)
  3228. (invoke "go" "build" "-v" "-x" "j2x.go")
  3229. (invoke "go" "build" "-v" "-x" "t2x.go")
  3230. (invoke "go" "build" "-v" "-x" "-o"
  3231. "xtract.Linux" "xtract.go" "common.go")
  3232. (invoke "go" "build" "-v" "-x" "-o"
  3233. "rchive.Linux" "rchive.go" "common.go")
  3234. (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
  3235. (replace 'install
  3236. (lambda* (#:key outputs import-path #:allow-other-keys)
  3237. (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
  3238. (source (string-append "src/" import-path "/")))
  3239. (for-each (lambda (file)
  3240. (format #t "installing ~a~%" file)
  3241. (install-file (string-append source file) dest))
  3242. '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
  3243. #t))))))
  3244. (native-inputs '())
  3245. (propagated-inputs '())
  3246. (inputs
  3247. `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
  3248. ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
  3249. ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
  3250. ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
  3251. ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
  3252. ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
  3253. ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
  3254. ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
  3255. ("go-golang-org-x-image" ,go-golang-org-x-image)
  3256. ("go-golang-org-x-text" ,go-golang-org-x-text)))))
  3257. (define-public exonerate
  3258. (package
  3259. (name "exonerate")
  3260. (version "2.4.0")
  3261. (source
  3262. (origin
  3263. (method url-fetch)
  3264. (uri
  3265. (string-append
  3266. "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
  3267. "exonerate-" version ".tar.gz"))
  3268. (sha256
  3269. (base32
  3270. "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
  3271. (build-system gnu-build-system)
  3272. (arguments
  3273. `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
  3274. (native-inputs
  3275. `(("pkg-config" ,pkg-config)))
  3276. (inputs
  3277. `(("glib" ,glib)))
  3278. (home-page
  3279. "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
  3280. (synopsis "Generic tool for biological sequence alignment")
  3281. (description
  3282. "Exonerate is a generic tool for pairwise sequence comparison. It allows
  3283. the alignment of sequences using a many alignment models, either exhaustive
  3284. dynamic programming or a variety of heuristics.")
  3285. (license license:gpl3)))
  3286. (define-public express
  3287. (package
  3288. (name "express")
  3289. (version "1.5.3")
  3290. (source (origin
  3291. (method git-fetch)
  3292. (uri (git-reference
  3293. (url "https://github.com/adarob/eXpress")
  3294. (commit version)))
  3295. (file-name (git-file-name name version))
  3296. (sha256
  3297. (base32
  3298. "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
  3299. (build-system cmake-build-system)
  3300. (arguments
  3301. `(#:tests? #f ;no "check" target
  3302. #:phases
  3303. (modify-phases %standard-phases
  3304. (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
  3305. (lambda* (#:key inputs #:allow-other-keys)
  3306. (substitute* "CMakeLists.txt"
  3307. (("set\\(Boost_USE_STATIC_LIBS ON\\)")
  3308. "set(Boost_USE_STATIC_LIBS OFF)")
  3309. (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
  3310. (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
  3311. (substitute* "src/CMakeLists.txt"
  3312. (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
  3313. (string-append (assoc-ref inputs "bamtools") "/lib"))
  3314. (("libprotobuf.a") "libprotobuf.so"))
  3315. #t))
  3316. (add-after 'unpack 'remove-update-check
  3317. (lambda _
  3318. (substitute* "src/main.cpp"
  3319. (("#include \"update_check.h\"") "")
  3320. (("check_version\\(PACKAGE_VERSION\\);") ""))
  3321. #t)))))
  3322. (inputs
  3323. `(("boost" ,boost)
  3324. ("bamtools" ,bamtools)
  3325. ("protobuf" ,protobuf)
  3326. ("zlib" ,zlib)))
  3327. (home-page "http://bio.math.berkeley.edu/eXpress")
  3328. (synopsis "Streaming quantification for high-throughput genomic sequencing")
  3329. (description
  3330. "eXpress is a streaming tool for quantifying the abundances of a set of
  3331. target sequences from sampled subsequences. Example applications include
  3332. transcript-level RNA-Seq quantification, allele-specific/haplotype expression
  3333. analysis (from RNA-Seq), transcription factor binding quantification in
  3334. ChIP-Seq, and analysis of metagenomic data.")
  3335. (license license:artistic2.0)))
  3336. (define-public express-beta-diversity
  3337. (package
  3338. (name "express-beta-diversity")
  3339. (version "1.0.8")
  3340. (source (origin
  3341. (method git-fetch)
  3342. (uri (git-reference
  3343. (url "https://github.com/dparks1134/ExpressBetaDiversity")
  3344. (commit (string-append "v" version))))
  3345. (file-name (git-file-name name version))
  3346. (sha256
  3347. (base32
  3348. "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
  3349. (build-system gnu-build-system)
  3350. (arguments
  3351. `(#:phases
  3352. (modify-phases %standard-phases
  3353. (delete 'configure)
  3354. (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
  3355. (replace 'check
  3356. (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
  3357. (replace 'install
  3358. (lambda* (#:key outputs #:allow-other-keys)
  3359. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  3360. (install-file "../scripts/convertToEBD.py" bin)
  3361. (install-file "../bin/ExpressBetaDiversity" bin)
  3362. #t))))))
  3363. (inputs
  3364. `(("python" ,python-2)))
  3365. (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
  3366. (synopsis "Taxon- and phylogenetic-based beta diversity measures")
  3367. (description
  3368. "Express Beta Diversity (EBD) calculates ecological beta diversity
  3369. (dissimilarity) measures between biological communities. EBD implements a
  3370. variety of diversity measures including those that make use of phylogenetic
  3371. similarity of community members.")
  3372. (license license:gpl3+)))
  3373. (define-public fasttree
  3374. (package
  3375. (name "fasttree")
  3376. (version "2.1.10")
  3377. (source (origin
  3378. (method url-fetch)
  3379. (uri (string-append
  3380. "http://www.microbesonline.org/fasttree/FastTree-"
  3381. version ".c"))
  3382. (sha256
  3383. (base32
  3384. "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
  3385. (build-system gnu-build-system)
  3386. (arguments
  3387. `(#:tests? #f ; no "check" target
  3388. #:phases
  3389. (modify-phases %standard-phases
  3390. (delete 'unpack)
  3391. (delete 'configure)
  3392. (replace 'build
  3393. (lambda* (#:key source #:allow-other-keys)
  3394. (invoke "gcc"
  3395. "-O3"
  3396. "-finline-functions"
  3397. "-funroll-loops"
  3398. "-Wall"
  3399. "-o"
  3400. "FastTree"
  3401. source
  3402. "-lm")
  3403. (invoke "gcc"
  3404. "-DOPENMP"
  3405. "-fopenmp"
  3406. "-O3"
  3407. "-finline-functions"
  3408. "-funroll-loops"
  3409. "-Wall"
  3410. "-o"
  3411. "FastTreeMP"
  3412. source
  3413. "-lm")
  3414. #t))
  3415. (replace 'install
  3416. (lambda* (#:key outputs #:allow-other-keys)
  3417. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  3418. (install-file "FastTree" bin)
  3419. (install-file "FastTreeMP" bin)
  3420. #t))))))
  3421. (home-page "http://www.microbesonline.org/fasttree")
  3422. (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
  3423. (description
  3424. "FastTree can handle alignments with up to a million of sequences in a
  3425. reasonable amount of time and memory. For large alignments, FastTree is
  3426. 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
  3427. (license license:gpl2+)))
  3428. (define-public fastx-toolkit
  3429. (package
  3430. (name "fastx-toolkit")
  3431. (version "0.0.14")
  3432. (source (origin
  3433. (method url-fetch)
  3434. (uri
  3435. (string-append
  3436. "https://github.com/agordon/fastx_toolkit/releases/download/"
  3437. version "/fastx_toolkit-" version ".tar.bz2"))
  3438. (sha256
  3439. (base32
  3440. "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
  3441. (build-system gnu-build-system)
  3442. (inputs
  3443. `(("libgtextutils" ,libgtextutils)))
  3444. (native-inputs
  3445. `(("gcc" ,gcc-6) ;; doesn't build with later versions
  3446. ("pkg-config" ,pkg-config)))
  3447. (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
  3448. (synopsis "Tools for FASTA/FASTQ file preprocessing")
  3449. (description
  3450. "The FASTX-Toolkit is a collection of command line tools for Short-Reads
  3451. FASTA/FASTQ files preprocessing.
  3452. Next-Generation sequencing machines usually produce FASTA or FASTQ files,
  3453. containing multiple short-reads sequences. The main processing of such
  3454. FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
  3455. is sometimes more productive to preprocess the files before mapping the
  3456. sequences to the genome---manipulating the sequences to produce better mapping
  3457. results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
  3458. (license license:agpl3+)))
  3459. (define-public flexbar
  3460. (package
  3461. (name "flexbar")
  3462. (version "3.4.0")
  3463. (source (origin
  3464. (method git-fetch)
  3465. (uri (git-reference
  3466. (url "https://github.com/seqan/flexbar")
  3467. (commit (string-append "v" version))))
  3468. (file-name (git-file-name name version))
  3469. (sha256
  3470. (base32
  3471. "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
  3472. (build-system cmake-build-system)
  3473. (arguments
  3474. `(#:phases
  3475. (modify-phases %standard-phases
  3476. (add-after 'unpack 'do-not-tune-to-CPU
  3477. (lambda _
  3478. (substitute* "src/CMakeLists.txt"
  3479. ((" -march=native") ""))
  3480. #t))
  3481. (replace 'check
  3482. (lambda* (#:key outputs #:allow-other-keys)
  3483. (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
  3484. (with-directory-excursion "../source/test"
  3485. (invoke "bash" "flexbar_test.sh"))
  3486. #t))
  3487. (replace 'install
  3488. (lambda* (#:key outputs #:allow-other-keys)
  3489. (let* ((out (string-append (assoc-ref outputs "out")))
  3490. (bin (string-append out "/bin/")))
  3491. (install-file "flexbar" bin))
  3492. #t)))))
  3493. (inputs
  3494. `(("tbb" ,tbb)
  3495. ("zlib" ,zlib)))
  3496. (native-inputs
  3497. `(("pkg-config" ,pkg-config)
  3498. ("seqan" ,seqan-2)))
  3499. (home-page "https://github.com/seqan/flexbar")
  3500. (synopsis "Barcode and adapter removal tool for sequencing platforms")
  3501. (description
  3502. "Flexbar preprocesses high-throughput nucleotide sequencing data
  3503. efficiently. It demultiplexes barcoded runs and removes adapter sequences.
  3504. Moreover, trimming and filtering features are provided. Flexbar increases
  3505. read mapping rates and improves genome and transcriptome assemblies. It
  3506. supports next-generation sequencing data in fasta/q and csfasta/q format from
  3507. Illumina, Roche 454, and the SOLiD platform.")
  3508. (license license:bsd-3)))
  3509. (define-public fraggenescan
  3510. (package
  3511. (name "fraggenescan")
  3512. (version "1.30")
  3513. (source
  3514. (origin
  3515. (method url-fetch)
  3516. (uri
  3517. (string-append "mirror://sourceforge/fraggenescan/"
  3518. "FragGeneScan" version ".tar.gz"))
  3519. (sha256
  3520. (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
  3521. (build-system gnu-build-system)
  3522. (arguments
  3523. `(#:phases
  3524. (modify-phases %standard-phases
  3525. (delete 'configure)
  3526. (add-before 'build 'patch-paths
  3527. (lambda* (#:key outputs #:allow-other-keys)
  3528. (let* ((out (string-append (assoc-ref outputs "out")))
  3529. (share (string-append out "/share/fraggenescan/")))
  3530. (substitute* "run_FragGeneScan.pl"
  3531. (("system\\(\"rm")
  3532. (string-append "system(\"" (which "rm")))
  3533. (("system\\(\"mv")
  3534. (string-append "system(\"" (which "mv")))
  3535. (("\\\"awk") (string-append "\"" (which "awk")))
  3536. ;; This script and other programs expect the training files
  3537. ;; to be in the non-standard location bin/train/XXX. Change
  3538. ;; this to be share/fraggenescan/train/XXX instead.
  3539. (("^\\$train.file = \\$dir.*")
  3540. (string-append "$train_file = \""
  3541. share
  3542. "train/\".$FGS_train_file;")))
  3543. (substitute* "run_hmm.c"
  3544. (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
  3545. (string-append " strcpy(train_dir, \"" share "/train/\");"))))
  3546. #t))
  3547. (replace 'build
  3548. (lambda _
  3549. (invoke "make" "clean")
  3550. (invoke "make" "fgs")
  3551. #t))
  3552. (replace 'install
  3553. (lambda* (#:key outputs #:allow-other-keys)
  3554. (let* ((out (string-append (assoc-ref outputs "out")))
  3555. (bin (string-append out "/bin/"))
  3556. (share (string-append out "/share/fraggenescan/train")))
  3557. (install-file "run_FragGeneScan.pl" bin)
  3558. (install-file "FragGeneScan" bin)
  3559. (copy-recursively "train" share))
  3560. #t))
  3561. (delete 'check)
  3562. (add-after 'install 'post-install-check
  3563. ;; In lieu of 'make check', run one of the examples and check the
  3564. ;; output files gets created.
  3565. (lambda* (#:key outputs #:allow-other-keys)
  3566. (let* ((out (string-append (assoc-ref outputs "out")))
  3567. (bin (string-append out "/bin/"))
  3568. (frag (string-append bin "run_FragGeneScan.pl")))
  3569. ;; Test complete genome.
  3570. (invoke frag
  3571. "-genome=./example/NC_000913.fna"
  3572. "-out=./test2"
  3573. "-complete=1"
  3574. "-train=complete")
  3575. (unless (and (file-exists? "test2.faa")
  3576. (file-exists? "test2.ffn")
  3577. (file-exists? "test2.gff")
  3578. (file-exists? "test2.out"))
  3579. (error "Expected files do not exist."))
  3580. ;; Test incomplete sequences.
  3581. (invoke frag
  3582. "-genome=./example/NC_000913-fgs.ffn"
  3583. "-out=out"
  3584. "-complete=0"
  3585. "-train=454_30")
  3586. #t))))))
  3587. (inputs
  3588. `(("perl" ,perl)
  3589. ("python" ,python-2))) ;not compatible with python 3.
  3590. (home-page "https://sourceforge.net/projects/fraggenescan/")
  3591. (synopsis "Finds potentially fragmented genes in short reads")
  3592. (description
  3593. "FragGeneScan is a program for predicting bacterial and archaeal genes in
  3594. short and error-prone DNA sequencing reads. It can also be applied to predict
  3595. genes in incomplete assemblies or complete genomes.")
  3596. ;; GPL3+ according to private correspondense with the authors.
  3597. (license license:gpl3+)))
  3598. (define-public fxtract
  3599. (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
  3600. (package
  3601. (name "fxtract")
  3602. (version "2.3")
  3603. (source
  3604. (origin
  3605. (method git-fetch)
  3606. (uri (git-reference
  3607. (url "https://github.com/ctSkennerton/fxtract")
  3608. (commit version)))
  3609. (file-name (git-file-name name version))
  3610. (sha256
  3611. (base32
  3612. "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
  3613. (build-system gnu-build-system)
  3614. (arguments
  3615. `(#:make-flags (list
  3616. (string-append "PREFIX=" (assoc-ref %outputs "out"))
  3617. "CC=gcc")
  3618. #:test-target "fxtract_test"
  3619. #:phases
  3620. (modify-phases %standard-phases
  3621. (delete 'configure)
  3622. (add-before 'build 'copy-util
  3623. (lambda* (#:key inputs #:allow-other-keys)
  3624. (rmdir "util")
  3625. (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
  3626. #t))
  3627. ;; Do not use make install as this requires additional dependencies.
  3628. (replace 'install
  3629. (lambda* (#:key outputs #:allow-other-keys)
  3630. (let* ((out (assoc-ref outputs "out"))
  3631. (bin (string-append out"/bin")))
  3632. (install-file "fxtract" bin)
  3633. #t))))))
  3634. (inputs
  3635. `(("pcre" ,pcre)
  3636. ("zlib" ,zlib)))
  3637. (native-inputs
  3638. ;; ctskennerton-util is licensed under GPL2.
  3639. `(("ctskennerton-util"
  3640. ,(origin
  3641. (method git-fetch)
  3642. (uri (git-reference
  3643. (url "https://github.com/ctSkennerton/util")
  3644. (commit util-commit)))
  3645. (file-name (string-append
  3646. "ctstennerton-util-" util-commit "-checkout"))
  3647. (sha256
  3648. (base32
  3649. "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
  3650. (home-page "https://github.com/ctSkennerton/fxtract")
  3651. (synopsis "Extract sequences from FASTA and FASTQ files")
  3652. (description
  3653. "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
  3654. or FASTQ) file given a subsequence. It uses a simple substring search for
  3655. basic tasks but can change to using POSIX regular expressions, PCRE, hash
  3656. lookups or multi-pattern searching as required. By default fxtract looks in
  3657. the sequence of each record but can also be told to look in the header,
  3658. comment or quality sections.")
  3659. ;; 'util' requires SSE instructions.
  3660. (supported-systems '("x86_64-linux"))
  3661. (license license:expat))))
  3662. (define-public gemma
  3663. (package
  3664. (name "gemma")
  3665. (version "0.98.3")
  3666. (source (origin
  3667. (method git-fetch)
  3668. (uri (git-reference
  3669. (url "https://github.com/genetics-statistics/GEMMA")
  3670. (commit version)))
  3671. (file-name (git-file-name name version))
  3672. (sha256
  3673. (base32
  3674. "1p8a7kkfn1mmrg017aziy544aha8i9h6wd1x2dk3w2794wl33qb7"))
  3675. (modules '((guix build utils)))
  3676. (snippet
  3677. '(begin
  3678. (delete-file-recursively "contrib")
  3679. #t))))
  3680. (build-system gnu-build-system)
  3681. (inputs
  3682. `(("gsl" ,gsl)
  3683. ("openblas" ,openblas)
  3684. ("zlib" ,zlib)))
  3685. (native-inputs
  3686. `(("catch" ,catch-framework2-1)
  3687. ("perl" ,perl)
  3688. ("shunit2" ,shunit2)
  3689. ("which" ,which)))
  3690. (arguments
  3691. `(#:phases
  3692. (modify-phases %standard-phases
  3693. (delete 'configure)
  3694. (add-after 'unpack 'prepare-build
  3695. (lambda* (#:key inputs #:allow-other-keys)
  3696. (mkdir-p "bin")
  3697. (substitute* "Makefile"
  3698. (("/usr/local/opt/openblas")
  3699. (assoc-ref inputs "openblas")))
  3700. #t))
  3701. (replace 'check
  3702. (lambda* (#:key tests? #:allow-other-keys)
  3703. (when tests?
  3704. ;; 'make slow-check' expects shunit2-2.0.3.
  3705. (with-directory-excursion "test"
  3706. (invoke "./test_suite.sh"))
  3707. #t)))
  3708. (replace 'install
  3709. (lambda* (#:key outputs #:allow-other-keys)
  3710. (install-file "bin/gemma"
  3711. (string-append (assoc-ref outputs "out") "/bin"))
  3712. #t)))))
  3713. (home-page "https://github.com/genetics-statistics/GEMMA")
  3714. (synopsis "Tool for genome-wide efficient mixed model association")
  3715. (description
  3716. "@acronym{GEMMA, Genome-wide Efficient Mixed Model Association} provides a
  3717. standard linear mixed model resolver with application in @acronym{GWAS,
  3718. genome-wide association studies}.")
  3719. (license license:gpl3)))
  3720. (define-public grit
  3721. (package
  3722. (name "grit")
  3723. (version "2.0.5")
  3724. (source (origin
  3725. (method git-fetch)
  3726. (uri (git-reference
  3727. (url "https://github.com/nboley/grit")
  3728. (commit version)))
  3729. (file-name (git-file-name name version))
  3730. (sha256
  3731. (base32
  3732. "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
  3733. (build-system python-build-system)
  3734. (arguments
  3735. `(#:python ,python-2
  3736. #:phases
  3737. (modify-phases %standard-phases
  3738. (add-after 'unpack 'generate-from-cython-sources
  3739. (lambda* (#:key inputs outputs #:allow-other-keys)
  3740. ;; Delete these C files to force fresh generation from pyx sources.
  3741. (delete-file "grit/sparsify_support_fns.c")
  3742. (delete-file "grit/call_peaks_support_fns.c")
  3743. (substitute* "setup.py"
  3744. (("Cython.Setup") "Cython.Build"))
  3745. #t)))))
  3746. (inputs
  3747. `(("python-scipy" ,python2-scipy)
  3748. ("python-numpy" ,python2-numpy)
  3749. ("python-pysam" ,python2-pysam)
  3750. ("python-networkx" ,python2-networkx)))
  3751. (native-inputs
  3752. `(("python-cython" ,python2-cython)))
  3753. ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
  3754. (home-page "https://github.com/nboley/grit")
  3755. (synopsis "Tool for integrative analysis of RNA-seq type assays")
  3756. (description
  3757. "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
  3758. full length transcript models. When none of these data sources are available,
  3759. GRIT can be run by providing a candidate set of TES or TSS sites. In
  3760. addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
  3761. also be run in quantification mode, where it uses a provided GTF file and just
  3762. estimates transcript expression.")
  3763. (license license:gpl3+)))
  3764. (define-public hisat
  3765. (package
  3766. (name "hisat")
  3767. (version "0.1.4")
  3768. (source (origin
  3769. (method url-fetch)
  3770. (uri (string-append
  3771. "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
  3772. version "-beta-source.zip"))
  3773. (sha256
  3774. (base32
  3775. "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
  3776. (build-system gnu-build-system)
  3777. (arguments
  3778. `(#:tests? #f ;no check target
  3779. #:make-flags '("allall"
  3780. ;; Disable unsupported `popcnt' instructions on
  3781. ;; architectures other than x86_64
  3782. ,@(if (string-prefix? "x86_64"
  3783. (or (%current-target-system)
  3784. (%current-system)))
  3785. '()
  3786. '("POPCNT_CAPABILITY=0")))
  3787. #:phases
  3788. (modify-phases %standard-phases
  3789. (add-after 'unpack 'patch-sources
  3790. (lambda _
  3791. ;; XXX Cannot use snippet because zip files are not supported
  3792. (substitute* "Makefile"
  3793. (("^CC = .*$") "CC = gcc")
  3794. (("^CPP = .*$") "CPP = g++")
  3795. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  3796. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  3797. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
  3798. (substitute* '("hisat-build" "hisat-inspect")
  3799. (("/usr/bin/env") (which "env")))
  3800. #t))
  3801. (replace 'install
  3802. (lambda* (#:key outputs #:allow-other-keys)
  3803. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  3804. (for-each (lambda (file)
  3805. (install-file file bin))
  3806. (find-files
  3807. "."
  3808. "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
  3809. #t))
  3810. (delete 'configure))))
  3811. (native-inputs
  3812. `(("unzip" ,unzip)))
  3813. (inputs
  3814. `(("perl" ,perl)
  3815. ("python" ,python)
  3816. ("zlib" ,zlib)))
  3817. ;; Non-portable SSE instructions are used so building fails on platforms
  3818. ;; other than x86_64.
  3819. (supported-systems '("x86_64-linux"))
  3820. (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
  3821. (synopsis "Hierarchical indexing for spliced alignment of transcripts")
  3822. (description
  3823. "HISAT is a fast and sensitive spliced alignment program for mapping
  3824. RNA-seq reads. In addition to one global FM index that represents a whole
  3825. genome, HISAT uses a large set of small FM indexes that collectively cover the
  3826. whole genome. These small indexes (called local indexes) combined with
  3827. several alignment strategies enable effective alignment of RNA-seq reads, in
  3828. particular, reads spanning multiple exons.")
  3829. (license license:gpl3+)))
  3830. (define-public hisat2
  3831. (package
  3832. (name "hisat2")
  3833. (version "2.2.1")
  3834. (source
  3835. (origin
  3836. (method git-fetch)
  3837. (uri (git-reference
  3838. (url "https://github.com/DaehwanKimLab/hisat2/")
  3839. (commit (string-append "v" version))))
  3840. (file-name (git-file-name name version))
  3841. (sha256
  3842. (base32
  3843. "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754"))))
  3844. (build-system gnu-build-system)
  3845. (arguments
  3846. `(#:tests? #f ; no check target
  3847. #:make-flags (list "CC=gcc" "CXX=g++" "allall")
  3848. #:modules ((guix build gnu-build-system)
  3849. (guix build utils)
  3850. (srfi srfi-26))
  3851. #:phases
  3852. (modify-phases %standard-phases
  3853. (add-after 'unpack 'make-deterministic
  3854. (lambda _
  3855. (substitute* "Makefile"
  3856. (("`date`") "0"))))
  3857. (delete 'configure)
  3858. (add-before 'build 'build-manual
  3859. (lambda _
  3860. (mkdir-p "doc")
  3861. (invoke "make" "doc")))
  3862. (replace 'install
  3863. (lambda* (#:key outputs #:allow-other-keys)
  3864. (let* ((out (assoc-ref outputs "out"))
  3865. (bin (string-append out "/bin/"))
  3866. (doc (string-append out "/share/doc/hisat2/")))
  3867. (for-each
  3868. (cut install-file <> bin)
  3869. (find-files "."
  3870. "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
  3871. (mkdir-p doc)
  3872. (install-file "doc/manual.inc.html" doc)))))))
  3873. (native-inputs
  3874. `(("perl" ,perl)
  3875. ("pandoc" ,pandoc))) ; for documentation
  3876. (inputs
  3877. `(("python" ,python-wrapper)))
  3878. (home-page "https://daehwankimlab.github.io/hisat2/")
  3879. (synopsis "Graph-based alignment of genomic sequencing reads")
  3880. (description "HISAT2 is a fast and sensitive alignment program for mapping
  3881. next-generation sequencing reads (both DNA and RNA) to a population of human
  3882. genomes (as well as to a single reference genome). In addition to using one
  3883. global @dfn{graph FM} (GFM) index that represents a population of human
  3884. genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
  3885. the whole genome. These small indexes, combined with several alignment
  3886. strategies, enable rapid and accurate alignment of sequencing reads. This new
  3887. indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
  3888. ;; HISAT2 contains files from Bowtie2, which is released under
  3889. ;; GPLv2 or later. The HISAT2 source files are released under
  3890. ;; GPLv3 or later.
  3891. (license license:gpl3+)))
  3892. (define-public hmmer
  3893. (package
  3894. (name "hmmer")
  3895. (version "3.3.2")
  3896. (source
  3897. (origin
  3898. (method url-fetch)
  3899. (uri (string-append
  3900. "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
  3901. (sha256
  3902. (base32
  3903. "0s9wf6n0qanbx8qs6igfl3vyjikwbrvh4d9d6mv54yp3xysykzlj"))))
  3904. (build-system gnu-build-system)
  3905. (native-inputs `(("perl" ,perl)
  3906. ("python" ,python))) ; for tests
  3907. (home-page "http://hmmer.org/")
  3908. (synopsis "Biosequence analysis using profile hidden Markov models")
  3909. (description
  3910. "HMMER is used for searching sequence databases for homologs of protein
  3911. sequences, and for making protein sequence alignments. It implements methods
  3912. using probabilistic models called profile hidden Markov models (profile
  3913. HMMs).")
  3914. ;; hmmer uses non-portable SSE intrinsics so building fails on other
  3915. ;; platforms.
  3916. (supported-systems '("x86_64-linux" "i686-linux"))
  3917. (license license:bsd-3)))
  3918. (define-public htseq
  3919. (package
  3920. (name "htseq")
  3921. (version "0.9.1")
  3922. (source (origin
  3923. (method url-fetch)
  3924. (uri (pypi-uri "HTSeq" version))
  3925. (sha256
  3926. (base32
  3927. "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
  3928. (build-system python-build-system)
  3929. (native-inputs
  3930. `(("python-cython" ,python-cython)))
  3931. ;; Numpy needs to be propagated when htseq is used as a Python library.
  3932. (propagated-inputs
  3933. `(("python-numpy" ,python-numpy)))
  3934. (inputs
  3935. `(("python-pysam" ,python-pysam)
  3936. ("python-matplotlib" ,python-matplotlib)))
  3937. (home-page "https://htseq.readthedocs.io/")
  3938. (synopsis "Analysing high-throughput sequencing data with Python")
  3939. (description
  3940. "HTSeq is a Python package that provides infrastructure to process data
  3941. from high-throughput sequencing assays.")
  3942. (license license:gpl3+)))
  3943. (define-public python2-htseq
  3944. (package-with-python2 htseq))
  3945. (define-public java-htsjdk
  3946. (package
  3947. (name "java-htsjdk")
  3948. (version "2.3.0") ; last version without build dependency on gradle
  3949. (source (origin
  3950. (method git-fetch)
  3951. (uri (git-reference
  3952. (url "https://github.com/samtools/htsjdk")
  3953. (commit version)))
  3954. (file-name (git-file-name name version))
  3955. (sha256
  3956. (base32
  3957. "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
  3958. (modules '((guix build utils)))
  3959. (snippet
  3960. ;; Delete pre-built binaries
  3961. '(begin
  3962. (delete-file-recursively "lib")
  3963. (mkdir-p "lib")
  3964. #t))))
  3965. (build-system ant-build-system)
  3966. (arguments
  3967. `(#:tests? #f ; test require Internet access
  3968. #:jdk ,icedtea-8
  3969. #:make-flags
  3970. (list (string-append "-Ddist=" (assoc-ref %outputs "out")
  3971. "/share/java/htsjdk/"))
  3972. #:build-target "all"
  3973. #:phases
  3974. (modify-phases %standard-phases
  3975. ;; The build phase also installs the jars
  3976. (delete 'install))))
  3977. (inputs
  3978. `(("java-ngs" ,java-ngs)
  3979. ("java-snappy-1" ,java-snappy-1)
  3980. ("java-commons-compress" ,java-commons-compress)
  3981. ("java-commons-logging-minimal" ,java-commons-logging-minimal)
  3982. ("java-commons-jexl-2" ,java-commons-jexl-2)
  3983. ("java-xz" ,java-xz)))
  3984. (native-inputs
  3985. `(("java-testng" ,java-testng)))
  3986. (home-page "http://samtools.github.io/htsjdk/")
  3987. (synopsis "Java API for high-throughput sequencing data (HTS) formats")
  3988. (description
  3989. "HTSJDK is an implementation of a unified Java library for accessing
  3990. common file formats, such as SAM and VCF, used for high-throughput
  3991. sequencing (HTS) data. There are also an number of useful utilities for
  3992. manipulating HTS data.")
  3993. (license license:expat)))
  3994. (define-public java-htsjdk-latest
  3995. (package
  3996. (name "java-htsjdk")
  3997. (version "2.14.3")
  3998. (source (origin
  3999. (method git-fetch)
  4000. (uri (git-reference
  4001. (url "https://github.com/samtools/htsjdk")
  4002. (commit version)))
  4003. (file-name (string-append name "-" version "-checkout"))
  4004. (sha256
  4005. (base32
  4006. "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
  4007. (build-system ant-build-system)
  4008. (arguments
  4009. `(#:tests? #f ; test require Scala
  4010. #:jdk ,icedtea-8
  4011. #:jar-name "htsjdk.jar"
  4012. #:phases
  4013. (modify-phases %standard-phases
  4014. (add-after 'unpack 'remove-useless-build.xml
  4015. (lambda _ (delete-file "build.xml") #t))
  4016. ;; The tests require the scalatest package.
  4017. (add-after 'unpack 'remove-tests
  4018. (lambda _ (delete-file-recursively "src/test") #t)))))
  4019. (inputs
  4020. `(("java-ngs" ,java-ngs)
  4021. ("java-snappy-1" ,java-snappy-1)
  4022. ("java-commons-compress" ,java-commons-compress)
  4023. ("java-commons-logging-minimal" ,java-commons-logging-minimal)
  4024. ("java-commons-jexl-2" ,java-commons-jexl-2)
  4025. ("java-xz" ,java-xz)))
  4026. (native-inputs
  4027. `(("java-junit" ,java-junit)))
  4028. (home-page "http://samtools.github.io/htsjdk/")
  4029. (synopsis "Java API for high-throughput sequencing data (HTS) formats")
  4030. (description
  4031. "HTSJDK is an implementation of a unified Java library for accessing
  4032. common file formats, such as SAM and VCF, used for high-throughput
  4033. sequencing (HTS) data. There are also an number of useful utilities for
  4034. manipulating HTS data.")
  4035. (license license:expat)))
  4036. ;; This is needed for picard 2.10.3
  4037. (define-public java-htsjdk-2.10.1
  4038. (package (inherit java-htsjdk-latest)
  4039. (name "java-htsjdk")
  4040. (version "2.10.1")
  4041. (source (origin
  4042. (method git-fetch)
  4043. (uri (git-reference
  4044. (url "https://github.com/samtools/htsjdk")
  4045. (commit version)))
  4046. (file-name (string-append name "-" version "-checkout"))
  4047. (sha256
  4048. (base32
  4049. "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
  4050. (build-system ant-build-system)
  4051. (arguments
  4052. `(#:tests? #f ; tests require Scala
  4053. #:jdk ,icedtea-8
  4054. #:jar-name "htsjdk.jar"
  4055. #:phases
  4056. (modify-phases %standard-phases
  4057. (add-after 'unpack 'remove-useless-build.xml
  4058. (lambda _ (delete-file "build.xml") #t))
  4059. ;; The tests require the scalatest package.
  4060. (add-after 'unpack 'remove-tests
  4061. (lambda _ (delete-file-recursively "src/test") #t)))))))
  4062. ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
  4063. ;; recent version of java-htsjdk, which depends on gradle.
  4064. (define-public java-picard
  4065. (package
  4066. (name "java-picard")
  4067. (version "2.3.0")
  4068. (source (origin
  4069. (method git-fetch)
  4070. (uri (git-reference
  4071. (url "https://github.com/broadinstitute/picard")
  4072. (commit version)))
  4073. (file-name (string-append "java-picard-" version "-checkout"))
  4074. (sha256
  4075. (base32
  4076. "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
  4077. (modules '((guix build utils)))
  4078. (snippet
  4079. '(begin
  4080. ;; Delete pre-built binaries.
  4081. (delete-file-recursively "lib")
  4082. (mkdir-p "lib")
  4083. (substitute* "build.xml"
  4084. ;; Remove build-time dependency on git.
  4085. (("failifexecutionfails=\"true\"")
  4086. "failifexecutionfails=\"false\"")
  4087. ;; Use our htsjdk.
  4088. (("depends=\"compile-htsjdk, ")
  4089. "depends=\"")
  4090. (("depends=\"compile-htsjdk-tests, ")
  4091. "depends=\"")
  4092. ;; Build picard-lib.jar before building picard.jar
  4093. (("name=\"picard-jar\" depends=\"" line)
  4094. (string-append line "picard-lib-jar, ")))
  4095. #t))))
  4096. (build-system ant-build-system)
  4097. (arguments
  4098. `(#:build-target "picard-jar"
  4099. #:test-target "test"
  4100. ;; Tests require jacoco:coverage.
  4101. #:tests? #f
  4102. #:make-flags
  4103. (list (string-append "-Dhtsjdk_lib_dir="
  4104. (assoc-ref %build-inputs "java-htsjdk")
  4105. "/share/java/htsjdk/")
  4106. "-Dhtsjdk-classes=dist/tmp"
  4107. (string-append "-Dhtsjdk-version="
  4108. ,(package-version java-htsjdk)))
  4109. #:jdk ,icedtea-8
  4110. #:phases
  4111. (modify-phases %standard-phases
  4112. ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
  4113. (delete 'generate-jar-indices)
  4114. (add-after 'unpack 'use-our-htsjdk
  4115. (lambda* (#:key inputs #:allow-other-keys)
  4116. (substitute* "build.xml"
  4117. (("\\$\\{htsjdk\\}/lib")
  4118. (search-input-directory inputs
  4119. "share/java/htsjdk")))))
  4120. (add-after 'unpack 'make-test-target-independent
  4121. (lambda* (#:key inputs #:allow-other-keys)
  4122. (substitute* "build.xml"
  4123. (("name=\"test\" depends=\"compile, ")
  4124. "name=\"test\" depends=\""))
  4125. #t))
  4126. (replace 'install (install-jars "dist")))))
  4127. (inputs
  4128. `(("java-htsjdk" ,java-htsjdk)
  4129. ("java-guava" ,java-guava)))
  4130. (native-inputs
  4131. `(("java-testng" ,java-testng)))
  4132. (home-page "http://broadinstitute.github.io/picard/")
  4133. (synopsis "Tools for manipulating high-throughput sequencing data and formats")
  4134. (description "Picard is a set of Java command line tools for manipulating
  4135. high-throughput sequencing (HTS) data and formats. Picard is implemented
  4136. using the HTSJDK Java library to support accessing file formats that are
  4137. commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
  4138. VCF.")
  4139. (license license:expat)))
  4140. ;; This is needed for dropseq-tools
  4141. (define-public java-picard-2.10.3
  4142. (package
  4143. (name "java-picard")
  4144. (version "2.10.3")
  4145. (source (origin
  4146. (method git-fetch)
  4147. (uri (git-reference
  4148. (url "https://github.com/broadinstitute/picard")
  4149. (commit version)))
  4150. (file-name (string-append "java-picard-" version "-checkout"))
  4151. (sha256
  4152. (base32
  4153. "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
  4154. (build-system ant-build-system)
  4155. (arguments
  4156. `(#:jar-name "picard.jar"
  4157. ;; Tests require jacoco:coverage.
  4158. #:tests? #f
  4159. #:jdk ,icedtea-8
  4160. #:main-class "picard.cmdline.PicardCommandLine"
  4161. #:modules ((guix build ant-build-system)
  4162. (guix build utils)
  4163. (guix build java-utils)
  4164. (sxml simple)
  4165. (sxml transform)
  4166. (sxml xpath))
  4167. #:phases
  4168. (modify-phases %standard-phases
  4169. ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
  4170. (delete 'generate-jar-indices)
  4171. (add-after 'unpack 'remove-useless-build.xml
  4172. (lambda _ (delete-file "build.xml") #t))
  4173. ;; This is necessary to ensure that htsjdk is found when using
  4174. ;; picard.jar as an executable.
  4175. (add-before 'build 'edit-classpath-in-manifest
  4176. (lambda* (#:key inputs #:allow-other-keys)
  4177. (chmod "build.xml" #o664)
  4178. (call-with-output-file "build.xml.new"
  4179. (lambda (port)
  4180. (sxml->xml
  4181. (pre-post-order
  4182. (with-input-from-file "build.xml"
  4183. (lambda _ (xml->sxml #:trim-whitespace? #t)))
  4184. `((target . ,(lambda (tag . kids)
  4185. (let ((name ((sxpath '(name *text*))
  4186. (car kids)))
  4187. ;; FIXME: We're breaking the line
  4188. ;; early with a dummy path to
  4189. ;; ensure that the store reference
  4190. ;; isn't broken apart and can still
  4191. ;; be found by the reference
  4192. ;; scanner.
  4193. (msg (format #f
  4194. "\
  4195. Class-Path: /~a \
  4196. ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
  4197. ;; maximum line length is 70
  4198. (string-tabulate (const #\b) 57)
  4199. (assoc-ref inputs "java-htsjdk"))))
  4200. (if (member "manifest" name)
  4201. `(,tag ,@kids
  4202. (replaceregexp
  4203. (@ (file "${manifest.file}")
  4204. (match "\\r\\n\\r\\n")
  4205. (replace "${line.separator}")))
  4206. (echo
  4207. (@ (message ,msg)
  4208. (file "${manifest.file}")
  4209. (append "true"))))
  4210. `(,tag ,@kids)))))
  4211. (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
  4212. (*text* . ,(lambda (_ txt) txt))))
  4213. port)))
  4214. (rename-file "build.xml.new" "build.xml")
  4215. #t)))))
  4216. (propagated-inputs
  4217. `(("java-htsjdk" ,java-htsjdk-2.10.1)))
  4218. (native-inputs
  4219. `(("java-testng" ,java-testng)
  4220. ("java-guava" ,java-guava)))
  4221. (home-page "http://broadinstitute.github.io/picard/")
  4222. (synopsis "Tools for manipulating high-throughput sequencing data and formats")
  4223. (description "Picard is a set of Java command line tools for manipulating
  4224. high-throughput sequencing (HTS) data and formats. Picard is implemented
  4225. using the HTSJDK Java library to support accessing file formats that are
  4226. commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
  4227. VCF.")
  4228. (license license:expat)))
  4229. ;; This is the last version of Picard to provide net.sf.samtools
  4230. (define-public java-picard-1.113
  4231. (package (inherit java-picard)
  4232. (name "java-picard")
  4233. (version "1.113")
  4234. (source (origin
  4235. (method git-fetch)
  4236. (uri (git-reference
  4237. (url "https://github.com/broadinstitute/picard")
  4238. (commit version)))
  4239. (file-name (string-append "java-picard-" version "-checkout"))
  4240. (sha256
  4241. (base32
  4242. "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
  4243. (modules '((guix build utils)))
  4244. (snippet
  4245. '(begin
  4246. ;; Delete pre-built binaries.
  4247. (delete-file-recursively "lib")
  4248. (mkdir-p "lib")
  4249. #t))))
  4250. (build-system ant-build-system)
  4251. (arguments
  4252. `(#:build-target "picard-jar"
  4253. #:test-target "test"
  4254. ;; FIXME: the class path at test time is wrong.
  4255. ;; [testng] Error: A JNI error has occurred, please check your installation and try again
  4256. ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
  4257. #:tests? #f
  4258. #:jdk ,icedtea-8
  4259. #:ant ,ant/java8
  4260. ;; This is only used for tests.
  4261. #:make-flags
  4262. (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
  4263. #:phases
  4264. (modify-phases %standard-phases
  4265. ;; FIXME: This phase fails.
  4266. (delete 'generate-jar-indices)
  4267. ;; Do not use bundled ant bzip2.
  4268. (add-after 'unpack 'use-ant-bzip
  4269. (lambda* (#:key inputs #:allow-other-keys)
  4270. (substitute* "build.xml"
  4271. (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
  4272. (search-input-file inputs "/lib/ant.jar")))))
  4273. (add-after 'unpack 'make-test-target-independent
  4274. (lambda* (#:key inputs #:allow-other-keys)
  4275. (substitute* "build.xml"
  4276. (("name=\"test\" depends=\"compile, ")
  4277. "name=\"test\" depends=\"compile-tests, ")
  4278. (("name=\"compile\" depends=\"compile-src, compile-tests\"")
  4279. "name=\"compile\" depends=\"compile-src\""))
  4280. #t))
  4281. (add-after 'unpack 'fix-deflater-path
  4282. (lambda* (#:key outputs #:allow-other-keys)
  4283. (substitute* "src/java/net/sf/samtools/Defaults.java"
  4284. (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
  4285. (string-append "getStringProperty(\"intel_deflater_so_path\", \""
  4286. (assoc-ref outputs "out")
  4287. "/lib/jni/libIntelDeflater.so"
  4288. "\")")))
  4289. #t))
  4290. ;; Build the deflater library, because we've previously deleted the
  4291. ;; pre-built one. This can only be built with access to the JDK
  4292. ;; sources.
  4293. (add-after 'build 'build-jni
  4294. (lambda* (#:key inputs #:allow-other-keys)
  4295. (mkdir-p "lib/jni")
  4296. (mkdir-p "jdk-src")
  4297. (invoke "tar" "--strip-components=1" "-C" "jdk-src"
  4298. "-xf" (assoc-ref inputs "jdk-src"))
  4299. (invoke "javah" "-jni"
  4300. "-classpath" "classes"
  4301. "-d" "lib/"
  4302. "net.sf.samtools.util.zip.IntelDeflater")
  4303. (with-directory-excursion "src/c/inteldeflater"
  4304. (invoke "gcc" "-I../../../lib" "-I."
  4305. (string-append "-I" (assoc-ref inputs "jdk")
  4306. "/include/linux")
  4307. "-I../../../jdk-src/src/share/native/common/"
  4308. "-I../../../jdk-src/src/solaris/native/common/"
  4309. "-c" "-O3" "-fPIC" "IntelDeflater.c")
  4310. (invoke "gcc" "-shared"
  4311. "-o" "../../../lib/jni/libIntelDeflater.so"
  4312. "IntelDeflater.o" "-lz" "-lstdc++"))
  4313. #t))
  4314. ;; We can only build everything else after building the JNI library.
  4315. (add-after 'build-jni 'build-rest
  4316. (lambda* (#:key make-flags #:allow-other-keys)
  4317. (apply invoke `("ant" "all" ,@make-flags))
  4318. #t))
  4319. (add-before 'build 'set-JAVA6_HOME
  4320. (lambda _
  4321. (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
  4322. #t))
  4323. (replace 'install (install-jars "dist"))
  4324. (add-after 'install 'install-jni-lib
  4325. (lambda* (#:key outputs #:allow-other-keys)
  4326. (let ((jni (string-append (assoc-ref outputs "out")
  4327. "/lib/jni")))
  4328. (mkdir-p jni)
  4329. (install-file "lib/jni/libIntelDeflater.so" jni)
  4330. #t))))))
  4331. (inputs
  4332. `(("java-snappy-1" ,java-snappy-1)
  4333. ("java-commons-jexl-2" ,java-commons-jexl-2)
  4334. ("java-cofoja" ,java-cofoja)
  4335. ("ant" ,ant/java8) ; for bzip2 support at runtime
  4336. ("zlib" ,zlib)))
  4337. (native-inputs
  4338. `(("ant-apache-bcel" ,ant-apache-bcel)
  4339. ("ant-junit" ,ant-junit)
  4340. ("java-testng" ,java-testng)
  4341. ("java-commons-bcel" ,java-commons-bcel)
  4342. ("java-jcommander" ,java-jcommander)
  4343. ("jdk" ,icedtea-8 "jdk")
  4344. ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
  4345. (define-public fastqc
  4346. (package
  4347. (name "fastqc")
  4348. (version "0.11.5")
  4349. (source
  4350. (origin
  4351. (method url-fetch)
  4352. (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
  4353. "projects/fastqc/fastqc_v"
  4354. version "_source.zip"))
  4355. (sha256
  4356. (base32
  4357. "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
  4358. (build-system ant-build-system)
  4359. (arguments
  4360. `(#:tests? #f ; there are no tests
  4361. #:build-target "build"
  4362. #:phases
  4363. (modify-phases %standard-phases
  4364. (add-after 'unpack 'fix-dependencies
  4365. (lambda* (#:key inputs #:allow-other-keys)
  4366. (substitute* "build.xml"
  4367. (("jbzip2-0.9.jar")
  4368. (search-input-file inputs "/share/java/jbzip2.jar"))
  4369. (("sam-1.103.jar")
  4370. (search-input-file inputs
  4371. "/share/java/sam-1.112.jar"))
  4372. (("cisd-jhdf5.jar")
  4373. (search-input-file inputs
  4374. "/share/java/sis-jhdf5.jar")))))
  4375. ;; There is no installation target
  4376. (replace 'install
  4377. (lambda* (#:key inputs outputs #:allow-other-keys)
  4378. (let* ((out (assoc-ref outputs "out"))
  4379. (bin (string-append out "/bin"))
  4380. (share (string-append out "/share/fastqc/"))
  4381. (exe (string-append share "/fastqc")))
  4382. (for-each mkdir-p (list bin share))
  4383. (copy-recursively "bin" share)
  4384. (substitute* exe
  4385. (("my \\$java_bin = 'java';")
  4386. (string-append "my $java_bin = '"
  4387. (assoc-ref inputs "java")
  4388. "/bin/java';")))
  4389. (chmod exe #o555)
  4390. (symlink exe (string-append bin "/fastqc"))
  4391. #t))))))
  4392. (inputs
  4393. `(("java" ,icedtea)
  4394. ("perl" ,perl) ; needed for the wrapper script
  4395. ("java-cisd-jhdf5" ,java-cisd-jhdf5)
  4396. ("java-picard-1.113" ,java-picard-1.113)
  4397. ("java-jbzip2" ,java-jbzip2)))
  4398. (native-inputs
  4399. `(("unzip" ,unzip)))
  4400. (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
  4401. (synopsis "Quality control tool for high throughput sequence data")
  4402. (description
  4403. "FastQC aims to provide a simple way to do some quality control
  4404. checks on raw sequence data coming from high throughput sequencing
  4405. pipelines. It provides a modular set of analyses which you can use to
  4406. give a quick impression of whether your data has any problems of which
  4407. you should be aware before doing any further analysis.
  4408. The main functions of FastQC are:
  4409. @itemize
  4410. @item Import of data from BAM, SAM or FastQ files (any variant);
  4411. @item Providing a quick overview to tell you in which areas there may
  4412. be problems;
  4413. @item Summary graphs and tables to quickly assess your data;
  4414. @item Export of results to an HTML based permanent report;
  4415. @item Offline operation to allow automated generation of reports
  4416. without running the interactive application.
  4417. @end itemize\n")
  4418. (license license:gpl3+)))
  4419. (define-public fastp
  4420. (package
  4421. (name "fastp")
  4422. (version "0.20.1")
  4423. (source
  4424. (origin
  4425. (method git-fetch)
  4426. (uri (git-reference
  4427. (url "https://github.com/OpenGene/fastp")
  4428. (commit (string-append "v" version))))
  4429. (file-name (git-file-name name version))
  4430. (sha256
  4431. (base32
  4432. "0ly8mxdvrcy23jwxyppysx3dhb1lwsqhfbgpyvargxhfk6k700x4"))))
  4433. (build-system gnu-build-system)
  4434. (arguments
  4435. `(#:tests? #f ; there are none
  4436. #:make-flags
  4437. (list (string-append "PREFIX=" (assoc-ref %outputs "out")))
  4438. #:phases
  4439. (modify-phases %standard-phases
  4440. (delete 'configure)
  4441. (add-before 'install 'create-target-dir
  4442. (lambda* (#:key outputs #:allow-other-keys)
  4443. (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
  4444. (inputs
  4445. `(("zlib" ,zlib)))
  4446. (home-page "https://github.com/OpenGene/fastp/")
  4447. (synopsis "All-in-one FastQ preprocessor")
  4448. (description
  4449. "Fastp is a tool designed to provide fast all-in-one preprocessing for
  4450. FastQ files. This tool has multi-threading support to afford high
  4451. performance.")
  4452. (license license:expat)))
  4453. (define-public htslib
  4454. (package
  4455. (name "htslib")
  4456. (version "1.12")
  4457. (source (origin
  4458. (method url-fetch)
  4459. (uri (string-append
  4460. "https://github.com/samtools/htslib/releases/download/"
  4461. version "/htslib-" version ".tar.bz2"))
  4462. (sha256
  4463. (base32
  4464. "1jplnvizgr0fyyvvmkfmnsywrrpqhid3760vw15bllz98qdi9012"))))
  4465. (build-system gnu-build-system)
  4466. ;; Let htslib translate "gs://" and "s3://" to regular https links with
  4467. ;; "--enable-gcs" and "--enable-s3". For these options to work, we also
  4468. ;; need to set "--enable-libcurl".
  4469. (arguments
  4470. `(#:configure-flags '("--enable-gcs"
  4471. "--enable-libcurl"
  4472. "--enable-s3")))
  4473. (inputs
  4474. `(("curl" ,curl)
  4475. ("openssl" ,openssl)))
  4476. ;; This is referred to in the pkg-config file as a required library.
  4477. (propagated-inputs
  4478. `(("zlib" ,zlib)))
  4479. (native-inputs
  4480. `(("perl" ,perl)))
  4481. (home-page "https://www.htslib.org")
  4482. (synopsis "C library for reading/writing high-throughput sequencing data")
  4483. (description
  4484. "HTSlib is a C library for reading/writing high-throughput sequencing
  4485. data. It also provides the @command{bgzip}, @command{htsfile}, and
  4486. @command{tabix} utilities.")
  4487. ;; Files under cram/ are released under the modified BSD license;
  4488. ;; the rest is released under the Expat license
  4489. (license (list license:expat license:bsd-3))))
  4490. (define-public htslib-1.10
  4491. (package (inherit htslib)
  4492. (name "htslib")
  4493. (version "1.10")
  4494. (source (origin
  4495. (method url-fetch)
  4496. (uri (string-append
  4497. "https://github.com/samtools/htslib/releases/download/"
  4498. version "/htslib-" version ".tar.bz2"))
  4499. (sha256
  4500. (base32
  4501. "0wm9ay7qgypj3mwx9zl1mrpnr36298b1aj5vx69l4k7bzbclvr3s"))))))
  4502. (define-public htslib-1.9
  4503. (package (inherit htslib)
  4504. (name "htslib")
  4505. (version "1.9")
  4506. (source (origin
  4507. (method url-fetch)
  4508. (uri (string-append
  4509. "https://github.com/samtools/htslib/releases/download/"
  4510. version "/htslib-" version ".tar.bz2"))
  4511. (sha256
  4512. (base32
  4513. "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))))
  4514. ;; This package should be removed once no packages rely upon it.
  4515. (define htslib-1.3
  4516. (package
  4517. (inherit htslib)
  4518. (version "1.3.1")
  4519. (source (origin
  4520. (method url-fetch)
  4521. (uri (string-append
  4522. "https://github.com/samtools/htslib/releases/download/"
  4523. version "/htslib-" version ".tar.bz2"))
  4524. (sha256
  4525. (base32
  4526. "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
  4527. (define-public idr
  4528. (package
  4529. (name "idr")
  4530. (version "2.0.3")
  4531. (source (origin
  4532. (method git-fetch)
  4533. (uri (git-reference
  4534. (url "https://github.com/nboley/idr")
  4535. (commit version)))
  4536. (file-name (git-file-name name version))
  4537. (sha256
  4538. (base32
  4539. "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
  4540. ;; Delete generated C code.
  4541. (snippet
  4542. '(begin (delete-file "idr/inv_cdf.c") #t))))
  4543. (build-system python-build-system)
  4544. ;; There is only one test ("test_inv_cdf.py") and it tests features that
  4545. ;; are no longer part of this package. It also asserts False, which
  4546. ;; causes the tests to always fail.
  4547. (arguments `(#:tests? #f))
  4548. (propagated-inputs
  4549. `(("python-scipy" ,python-scipy)
  4550. ("python-sympy" ,python-sympy)
  4551. ("python-numpy" ,python-numpy)
  4552. ("python-matplotlib" ,python-matplotlib)))
  4553. (native-inputs
  4554. `(("python-cython" ,python-cython)))
  4555. (home-page "https://github.com/nboley/idr")
  4556. (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
  4557. (description
  4558. "The IDR (Irreproducible Discovery Rate) framework is a unified approach
  4559. to measure the reproducibility of findings identified from replicate
  4560. experiments and provide highly stable thresholds based on reproducibility.")
  4561. (license license:gpl2+)))
  4562. (define-public jellyfish
  4563. (package
  4564. (name "jellyfish")
  4565. (version "2.3.0")
  4566. (source (origin
  4567. (method url-fetch)
  4568. (uri (string-append "https://github.com/gmarcais/Jellyfish/"
  4569. "releases/download/v" version
  4570. "/jellyfish-" version ".tar.gz"))
  4571. (sha256
  4572. (base32
  4573. "0npa62wzasdibas5zp3n8j3armsci4kyvh0jw7jr0am4gg7vg5g1"))))
  4574. (build-system gnu-build-system)
  4575. (outputs '("out" ;for library
  4576. "ruby" ;for Ruby bindings
  4577. "python")) ;for Python bindings
  4578. (arguments
  4579. `(#:configure-flags
  4580. (list "--without-sse" ; configure script probes for CPU features when SSE is enabled.
  4581. (string-append "--enable-ruby-binding="
  4582. (assoc-ref %outputs "ruby"))
  4583. (string-append "--enable-python-binding="
  4584. (assoc-ref %outputs "python")))
  4585. #:phases
  4586. (modify-phases %standard-phases
  4587. (add-before 'check 'set-SHELL-variable
  4588. (lambda _
  4589. ;; generator_manager.hpp either uses /bin/sh or $SHELL
  4590. ;; to run tests.
  4591. (setenv "SHELL" (which "bash")))))))
  4592. (native-inputs
  4593. `(("bc" ,bc)
  4594. ("time" ,time)
  4595. ("ruby" ,ruby)
  4596. ("python" ,python-wrapper)
  4597. ("pkg-config" ,pkg-config)))
  4598. (inputs
  4599. `(("htslib" ,htslib)))
  4600. (synopsis "Tool for fast counting of k-mers in DNA")
  4601. (description
  4602. "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
  4603. DNA. A k-mer is a substring of length k, and counting the occurrences of all
  4604. such substrings is a central step in many analyses of DNA sequence. Jellyfish
  4605. is a command-line program that reads FASTA and multi-FASTA files containing
  4606. DNA sequences. It outputs its k-mer counts in a binary format, which can be
  4607. translated into a human-readable text format using the @code{jellyfish dump}
  4608. command, or queried for specific k-mers with @code{jellyfish query}.")
  4609. (home-page "http://www.genome.umd.edu/jellyfish.html")
  4610. ;; JELLYFISH seems to be 64-bit only.
  4611. (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
  4612. ;; One of these licenses may be picked
  4613. (license (list license:gpl3+ license:bsd-3))))
  4614. (define-public khmer
  4615. (package
  4616. (name "khmer")
  4617. (version "3.0.0a3")
  4618. (source
  4619. (origin
  4620. (method git-fetch)
  4621. (uri (git-reference
  4622. (url "https://github.com/dib-lab/khmer")
  4623. (commit (string-append "v" version))))
  4624. (file-name (git-file-name name version))
  4625. (sha256
  4626. (base32
  4627. "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
  4628. (modules '((guix build utils)))
  4629. (snippet
  4630. '(begin
  4631. ;; Delete bundled libraries. We do not replace the bundled seqan
  4632. ;; as it is a modified subset of the old version 1.4.1.
  4633. ;;
  4634. ;; We do not replace the bundled MurmurHash as the canonical
  4635. ;; repository for this code 'SMHasher' is unsuitable for providing
  4636. ;; a library. See
  4637. ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
  4638. (delete-file-recursively "third-party/zlib")
  4639. (delete-file-recursively "third-party/bzip2")
  4640. (delete-file-recursively "third-party/seqan")
  4641. (substitute* "setup.cfg"
  4642. (("# libraries = z,bz2")
  4643. "libraries = z,bz2")
  4644. (("include:third-party/zlib:third-party/bzip2")
  4645. "include:"))
  4646. #t))))
  4647. (build-system python-build-system)
  4648. (arguments
  4649. `(#:phases
  4650. (modify-phases %standard-phases
  4651. (add-after 'unpack 'set-cc
  4652. (lambda _ (setenv "CC" "gcc") #t))
  4653. (add-before 'reset-gzip-timestamps 'make-files-writable
  4654. (lambda* (#:key outputs #:allow-other-keys)
  4655. ;; Make sure .gz files are writable so that the
  4656. ;; 'reset-gzip-timestamps' phase can do its work.
  4657. (let ((out (assoc-ref outputs "out")))
  4658. (for-each make-file-writable
  4659. (find-files out "\\.gz$"))
  4660. #t))))))
  4661. (native-inputs
  4662. `(("python-cython" ,python-cython)
  4663. ("python-pytest" ,python-pytest)
  4664. ("python-pytest-runner" ,python-pytest-runner)))
  4665. (inputs
  4666. `(("zlib" ,zlib)
  4667. ("bzip2" ,bzip2)
  4668. ("seqan" ,seqan-1)
  4669. ("python-screed" ,python-screed)
  4670. ("python-bz2file" ,python-bz2file)))
  4671. (home-page "https://khmer.readthedocs.org/")
  4672. (synopsis "K-mer counting, filtering and graph traversal library")
  4673. (description "The khmer software is a set of command-line tools for
  4674. working with DNA shotgun sequencing data from genomes, transcriptomes,
  4675. metagenomes and single cells. Khmer can make de novo assemblies faster, and
  4676. sometimes better. Khmer can also identify and fix problems with shotgun
  4677. data.")
  4678. ;; When building on i686, armhf and mips64el, we get the following error:
  4679. ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
  4680. (supported-systems '("x86_64-linux" "aarch64-linux"))
  4681. (license license:bsd-3)))
  4682. (define-public kaiju
  4683. (package
  4684. (name "kaiju")
  4685. (version "1.6.3")
  4686. (source (origin
  4687. (method git-fetch)
  4688. (uri (git-reference
  4689. (url "https://github.com/bioinformatics-centre/kaiju")
  4690. (commit (string-append "v" version))))
  4691. (file-name (git-file-name name version))
  4692. (sha256
  4693. (base32
  4694. "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
  4695. (build-system gnu-build-system)
  4696. (arguments
  4697. `(#:tests? #f ; There are no tests.
  4698. #:phases
  4699. (modify-phases %standard-phases
  4700. (delete 'configure)
  4701. (add-before 'build 'move-to-src-dir
  4702. (lambda _ (chdir "src") #t))
  4703. (replace 'install
  4704. (lambda* (#:key inputs outputs #:allow-other-keys)
  4705. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  4706. (mkdir-p bin)
  4707. (chdir "..")
  4708. (copy-recursively "bin" bin))
  4709. #t)))))
  4710. (inputs
  4711. `(("perl" ,perl)
  4712. ("zlib" ,zlib)))
  4713. (home-page "http://kaiju.binf.ku.dk/")
  4714. (synopsis "Fast and sensitive taxonomic classification for metagenomics")
  4715. (description "Kaiju is a program for sensitive taxonomic classification
  4716. of high-throughput sequencing reads from metagenomic whole genome sequencing
  4717. experiments.")
  4718. (license license:gpl3+)))
  4719. (define-public macs
  4720. (package
  4721. (name "macs")
  4722. (version "2.2.7.1")
  4723. (source (origin
  4724. ;; The PyPi tarball does not contain tests.
  4725. (method git-fetch)
  4726. (uri (git-reference
  4727. (url "https://github.com/macs3-project/MACS")
  4728. (commit (string-append "v" version))))
  4729. (file-name (git-file-name name version))
  4730. (sha256
  4731. (base32
  4732. "08zsgh65xbpv1md2s3wqmrk9g2mz6izmn59ryw5lbac54120p291"))
  4733. (modules '((guix build utils)))
  4734. ;; Remove files generated by Cython
  4735. (snippet
  4736. '(begin
  4737. (for-each (lambda (file)
  4738. (let ((generated-file
  4739. (string-append (string-drop-right file 3) "c")))
  4740. (when (file-exists? generated-file)
  4741. (delete-file generated-file))))
  4742. (find-files "." "\\.pyx$"))
  4743. (delete-file "MACS2/IO/CallPeakUnitPrecompiled.c")))))
  4744. (build-system python-build-system)
  4745. (arguments
  4746. `(#:phases
  4747. (modify-phases %standard-phases
  4748. (replace 'check
  4749. (lambda* (#:key tests? inputs outputs #:allow-other-keys)
  4750. (when tests?
  4751. (add-installed-pythonpath inputs outputs)
  4752. (invoke "pytest" "-v")))))))
  4753. (inputs
  4754. `(("python-numpy" ,python-numpy)))
  4755. (native-inputs
  4756. `(("python-cython" ,python-cython)
  4757. ("python-pytest" ,python-pytest)))
  4758. (home-page "https://github.com/macs3-project/MACS")
  4759. (synopsis "Model based analysis for ChIP-Seq data")
  4760. (description
  4761. "MACS is an implementation of a ChIP-Seq analysis algorithm for
  4762. identifying transcript factor binding sites named Model-based Analysis of
  4763. ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
  4764. the significance of enriched ChIP regions and it improves the spatial
  4765. resolution of binding sites through combining the information of both
  4766. sequencing tag position and orientation.")
  4767. (license license:bsd-3)))
  4768. (define-public mafft
  4769. (package
  4770. (name "mafft")
  4771. (version "7.475")
  4772. (source (origin
  4773. (method url-fetch)
  4774. (uri (string-append
  4775. "https://mafft.cbrc.jp/alignment/software/mafft-" version
  4776. "-without-extensions-src.tgz"))
  4777. (file-name (string-append name "-" version ".tgz"))
  4778. (sha256
  4779. (base32
  4780. "0i2i2m3blh2xkbkdk48hxfssks30ny0v381gdl7zwhcvp0axs26r"))))
  4781. (build-system gnu-build-system)
  4782. (arguments
  4783. `(#:tests? #f ; no automated tests, though there are tests in the read me
  4784. #:make-flags (let ((out (assoc-ref %outputs "out")))
  4785. (list (string-append "PREFIX=" out)
  4786. (string-append "BINDIR="
  4787. (string-append out "/bin"))))
  4788. #:phases
  4789. (modify-phases %standard-phases
  4790. (add-after 'unpack 'enter-dir
  4791. (lambda _ (chdir "core") #t))
  4792. (add-after 'enter-dir 'patch-makefile
  4793. (lambda _
  4794. ;; on advice from the MAFFT authors, there is no need to
  4795. ;; distribute mafft-profile, mafft-distance, or
  4796. ;; mafft-homologs.rb as they are too "specialised".
  4797. (substitute* "Makefile"
  4798. ;; remove mafft-homologs.rb from SCRIPTS
  4799. (("^SCRIPTS = mafft mafft-homologs.rb")
  4800. "SCRIPTS = mafft")
  4801. ;; remove mafft-homologs from MANPAGES
  4802. (("^MANPAGES = mafft.1 mafft-homologs.1")
  4803. "MANPAGES = mafft.1")
  4804. ;; remove mafft-distance from PROGS
  4805. (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
  4806. "PROGS = dvtditr dndfast7 dndblast sextet5")
  4807. ;; remove mafft-profile from PROGS
  4808. (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
  4809. "splittbfast disttbfast tbfast f2cl mccaskillwrap")
  4810. (("^rm -f mafft-profile mafft-profile.exe") "#")
  4811. (("^rm -f mafft-distance mafft-distance.exe") ")#")
  4812. ;; do not install MAN pages in libexec folder
  4813. (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
  4814. \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
  4815. #t))
  4816. (add-after 'enter-dir 'patch-paths
  4817. (lambda* (#:key inputs #:allow-other-keys)
  4818. (substitute* '("pairash.c"
  4819. "mafft.tmpl")
  4820. (("perl") (which "perl"))
  4821. (("([\"`| ])awk" _ prefix)
  4822. (string-append prefix (which "awk")))
  4823. (("grep") (which "grep")))
  4824. #t))
  4825. (delete 'configure)
  4826. (add-after 'install 'wrap-programs
  4827. (lambda* (#:key outputs #:allow-other-keys)
  4828. (let* ((out (assoc-ref outputs "out"))
  4829. (bin (string-append out "/bin"))
  4830. (path (string-append
  4831. (assoc-ref %build-inputs "coreutils") "/bin:")))
  4832. (for-each (lambda (file)
  4833. (wrap-program file
  4834. `("PATH" ":" prefix (,path))))
  4835. (find-files bin)))
  4836. #t)))))
  4837. (inputs
  4838. `(("perl" ,perl)
  4839. ("ruby" ,ruby)
  4840. ("gawk" ,gawk)
  4841. ("grep" ,grep)
  4842. ("coreutils" ,coreutils)))
  4843. (home-page "https://mafft.cbrc.jp/alignment/software/")
  4844. (synopsis "Multiple sequence alignment program")
  4845. (description
  4846. "MAFFT offers a range of multiple alignment methods for nucleotide and
  4847. protein sequences. For instance, it offers L-INS-i (accurate; for alignment
  4848. of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
  4849. sequences).")
  4850. (license (license:non-copyleft
  4851. "https://mafft.cbrc.jp/alignment/software/license.txt"
  4852. "BSD-3 with different formatting"))))
  4853. (define-public mash
  4854. (package
  4855. (name "mash")
  4856. (version "2.1")
  4857. (source (origin
  4858. (method git-fetch)
  4859. (uri (git-reference
  4860. (url "https://github.com/marbl/mash")
  4861. (commit (string-append "v" version))))
  4862. (file-name (git-file-name name version))
  4863. (sha256
  4864. (base32
  4865. "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
  4866. (modules '((guix build utils)))
  4867. (snippet
  4868. '(begin
  4869. ;; Delete bundled kseq.
  4870. ;; TODO: Also delete bundled murmurhash and open bloom filter.
  4871. (delete-file "src/mash/kseq.h")
  4872. #t))))
  4873. (build-system gnu-build-system)
  4874. (arguments
  4875. `(#:tests? #f ; No tests.
  4876. #:configure-flags
  4877. (list
  4878. (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
  4879. (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
  4880. #:make-flags (list "CC=gcc")
  4881. #:phases
  4882. (modify-phases %standard-phases
  4883. (add-after 'unpack 'fix-includes
  4884. (lambda _
  4885. (substitute* '("src/mash/Sketch.cpp"
  4886. "src/mash/CommandFind.cpp"
  4887. "src/mash/CommandScreen.cpp")
  4888. (("^#include \"kseq\\.h\"")
  4889. "#include \"htslib/kseq.h\""))
  4890. #t))
  4891. (add-after 'fix-includes 'use-c++14
  4892. (lambda _
  4893. ;; capnproto 0.7 requires c++14 to build
  4894. (substitute* "configure.ac"
  4895. (("c\\+\\+11") "c++14"))
  4896. (substitute* "Makefile.in"
  4897. (("c\\+\\+11") "c++14"))
  4898. #t)))))
  4899. (native-inputs
  4900. `(("autoconf" ,autoconf)
  4901. ;; Capnproto and htslib are statically embedded in the final
  4902. ;; application. Therefore we also list their licenses, below.
  4903. ("capnproto" ,capnproto)
  4904. ("htslib" ,htslib)))
  4905. (inputs
  4906. `(("gsl" ,gsl)
  4907. ("zlib" ,zlib)))
  4908. (supported-systems '("x86_64-linux"))
  4909. (home-page "https://mash.readthedocs.io")
  4910. (synopsis "Fast genome and metagenome distance estimation using MinHash")
  4911. (description "Mash is a fast sequence distance estimator that uses the
  4912. MinHash algorithm and is designed to work with genomes and metagenomes in the
  4913. form of assemblies or reads.")
  4914. (license (list license:bsd-3 ; Mash
  4915. license:expat ; HTSlib and capnproto
  4916. license:public-domain ; MurmurHash 3
  4917. license:cpl1.0)))) ; Open Bloom Filter
  4918. (define-public metabat
  4919. (package
  4920. (name "metabat")
  4921. (version "2.12.1")
  4922. (source
  4923. (origin
  4924. (method git-fetch)
  4925. (uri (git-reference
  4926. (url "https://bitbucket.org/berkeleylab/metabat.git")
  4927. (commit (string-append "v" version))))
  4928. (file-name (git-file-name name version))
  4929. (sha256
  4930. (base32
  4931. "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
  4932. (patches (search-patches "metabat-fix-compilation.patch"))))
  4933. (build-system scons-build-system)
  4934. (arguments
  4935. `(#:scons ,scons-python2
  4936. #:scons-flags
  4937. (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
  4938. (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
  4939. #:tests? #f ;; Tests are run during the build phase.
  4940. #:phases
  4941. (modify-phases %standard-phases
  4942. (add-after 'unpack 'fix-includes
  4943. (lambda _
  4944. (substitute* "src/BamUtils.h"
  4945. (("^#include \"bam/bam\\.h\"")
  4946. "#include \"samtools/bam.h\"")
  4947. (("^#include \"bam/sam\\.h\"")
  4948. "#include \"samtools/sam.h\""))
  4949. (substitute* "src/KseqReader.h"
  4950. (("^#include \"bam/kseq\\.h\"")
  4951. "#include \"htslib/kseq.h\""))
  4952. #t))
  4953. (add-after 'unpack 'fix-scons
  4954. (lambda* (#:key inputs #:allow-other-keys)
  4955. (substitute* "SConstruct"
  4956. (("^htslib_dir += 'samtools'")
  4957. (string-append "htslib_dir = '"
  4958. (assoc-ref inputs "htslib")
  4959. "'"))
  4960. (("^samtools_dir = 'samtools'")
  4961. (string-append "samtools_dir = '"
  4962. (assoc-ref inputs "samtools")
  4963. "'"))
  4964. (("^findStaticOrShared\\('bam', hts_lib")
  4965. (string-append "findStaticOrShared('bam', '"
  4966. (assoc-ref inputs "samtools")
  4967. "/lib'"))
  4968. ;; Do not distribute README.
  4969. (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
  4970. #t)))))
  4971. (inputs
  4972. `(("zlib" ,zlib)
  4973. ("perl" ,perl)
  4974. ("samtools" ,samtools)
  4975. ("htslib" ,htslib)
  4976. ("boost" ,boost)))
  4977. (home-page "https://bitbucket.org/berkeleylab/metabat")
  4978. (synopsis
  4979. "Reconstruction of single genomes from complex microbial communities")
  4980. (description
  4981. "Grouping large genomic fragments assembled from shotgun metagenomic
  4982. sequences to deconvolute complex microbial communities, or metagenome binning,
  4983. enables the study of individual organisms and their interactions. MetaBAT is
  4984. an automated metagenome binning software, which integrates empirical
  4985. probabilistic distances of genome abundance and tetranucleotide frequency.")
  4986. ;; The source code contains inline assembly.
  4987. (supported-systems '("x86_64-linux" "i686-linux"))
  4988. (license (license:non-copyleft "file://license.txt"
  4989. "See license.txt in the distribution."))))
  4990. (define-public minced
  4991. (package
  4992. (name "minced")
  4993. (version "0.3.2")
  4994. (source (origin
  4995. (method git-fetch)
  4996. (uri (git-reference
  4997. (url "https://github.com/ctSkennerton/minced")
  4998. (commit version)))
  4999. (file-name (git-file-name name version))
  5000. (sha256
  5001. (base32
  5002. "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
  5003. (build-system gnu-build-system)
  5004. (arguments
  5005. `(#:test-target "test"
  5006. #:phases
  5007. (modify-phases %standard-phases
  5008. (delete 'configure)
  5009. (add-before 'check 'fix-test
  5010. (lambda _
  5011. ;; Fix test for latest version.
  5012. (substitute* "t/Aquifex_aeolicus_VF5.expected"
  5013. (("minced:0.1.6") "minced:0.2.0"))
  5014. #t))
  5015. (replace 'install ; No install target.
  5016. (lambda* (#:key inputs outputs #:allow-other-keys)
  5017. (let* ((out (assoc-ref outputs "out"))
  5018. (bin (string-append out "/bin"))
  5019. (wrapper (string-append bin "/minced")))
  5020. ;; Minced comes with a wrapper script that tries to figure out where
  5021. ;; it is located before running the JAR. Since these paths are known
  5022. ;; to us, we build our own wrapper to avoid coreutils dependency.
  5023. (install-file "minced.jar" bin)
  5024. (with-output-to-file wrapper
  5025. (lambda _
  5026. (display
  5027. (string-append
  5028. "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
  5029. (assoc-ref inputs "jre") "/bin/java -jar "
  5030. bin "/minced.jar \"$@\"\n"))))
  5031. (chmod wrapper #o555))
  5032. #t)))))
  5033. (native-inputs
  5034. `(("jdk" ,icedtea "jdk")))
  5035. (inputs
  5036. `(("bash" ,bash)
  5037. ("jre" ,icedtea "out")))
  5038. (home-page "https://github.com/ctSkennerton/minced")
  5039. (synopsis "Mining CRISPRs in Environmental Datasets")
  5040. (description
  5041. "MinCED is a program to find Clustered Regularly Interspaced Short
  5042. Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
  5043. unassembled metagenomic reads, but is mainly designed for full genomes and
  5044. assembled metagenomic sequence.")
  5045. (license license:gpl3+)))
  5046. (define-public miso
  5047. (package
  5048. (name "miso")
  5049. (version "0.5.4")
  5050. (source (origin
  5051. (method url-fetch)
  5052. (uri (pypi-uri "misopy" version))
  5053. (sha256
  5054. (base32
  5055. "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
  5056. (modules '((guix build utils)))
  5057. (snippet '(begin
  5058. (substitute* "setup.py"
  5059. ;; Use setuptools, or else the executables are not
  5060. ;; installed.
  5061. (("distutils.core") "setuptools")
  5062. ;; Use "gcc" instead of "cc" for compilation.
  5063. (("^defines")
  5064. "cc.set_executables(
  5065. compiler='gcc',
  5066. compiler_so='gcc',
  5067. linker_exe='gcc',
  5068. linker_so='gcc -shared'); defines"))
  5069. #t))))
  5070. (build-system python-build-system)
  5071. (arguments
  5072. `(#:python ,python-2 ; only Python 2 is supported
  5073. #:tests? #f)) ; no "test" target
  5074. (inputs
  5075. `(("samtools" ,samtools)
  5076. ("python-numpy" ,python2-numpy)
  5077. ("python-pysam" ,python2-pysam)
  5078. ("python-scipy" ,python2-scipy)
  5079. ("python-matplotlib" ,python2-matplotlib)))
  5080. (native-inputs
  5081. `(("python-mock" ,python2-mock) ; for tests
  5082. ("python-pytz" ,python2-pytz))) ; for tests
  5083. (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
  5084. (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
  5085. (description
  5086. "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
  5087. the expression level of alternatively spliced genes from RNA-Seq data, and
  5088. identifies differentially regulated isoforms or exons across samples. By
  5089. modeling the generative process by which reads are produced from isoforms in
  5090. RNA-Seq, the MISO model uses Bayesian inference to compute the probability
  5091. that a read originated from a particular isoform.")
  5092. (license license:gpl2)))
  5093. (define-public muscle
  5094. (package
  5095. (name "muscle")
  5096. (version "3.8.1551")
  5097. (source (origin
  5098. (method url-fetch/tarbomb)
  5099. (uri (string-append
  5100. "http://www.drive5.com/muscle/muscle_src_"
  5101. version ".tar.gz"))
  5102. (sha256
  5103. (base32
  5104. "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
  5105. (build-system gnu-build-system)
  5106. (arguments
  5107. `(#:make-flags (list "LDLIBS = -lm")
  5108. #:phases
  5109. (modify-phases %standard-phases
  5110. (delete 'configure)
  5111. (replace 'check
  5112. ;; There are no tests, so just test if it runs.
  5113. (lambda _ (invoke "./muscle" "-version") #t))
  5114. (replace 'install
  5115. (lambda* (#:key outputs #:allow-other-keys)
  5116. (let* ((out (assoc-ref outputs "out"))
  5117. (bin (string-append out "/bin")))
  5118. (install-file "muscle" bin)
  5119. #t))))))
  5120. (home-page "http://www.drive5.com/muscle")
  5121. (synopsis "Multiple sequence alignment program")
  5122. (description
  5123. "MUSCLE aims to be a fast and accurate multiple sequence alignment
  5124. program for nucleotide and protein sequences.")
  5125. ;; License information found in 'muscle -h' and usage.cpp.
  5126. (license license:public-domain)))
  5127. (define-public newick-utils
  5128. ;; There are no recent releases so we package from git.
  5129. (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
  5130. (package
  5131. (name "newick-utils")
  5132. (version (string-append "1.6-1." (string-take commit 8)))
  5133. (source (origin
  5134. (method git-fetch)
  5135. (uri (git-reference
  5136. (url "https://github.com/tjunier/newick_utils")
  5137. (commit commit)))
  5138. (file-name (string-append name "-" version "-checkout"))
  5139. (sha256
  5140. (base32
  5141. "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
  5142. (build-system gnu-build-system)
  5143. (inputs
  5144. ;; XXX: TODO: Enable Lua and Guile bindings.
  5145. ;; https://github.com/tjunier/newick_utils/issues/13
  5146. `(("libxml2" ,libxml2)
  5147. ("flex" ,flex)
  5148. ("bison" ,bison)))
  5149. (native-inputs
  5150. `(("autoconf" ,autoconf)
  5151. ("automake" ,automake)
  5152. ("libtool" ,libtool)))
  5153. (synopsis "Programs for working with newick format phylogenetic trees")
  5154. (description
  5155. "Newick-utils is a suite of utilities for processing phylogenetic trees
  5156. in Newick format. Functions include re-rooting, extracting subtrees,
  5157. trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
  5158. (home-page "https://github.com/tjunier/newick_utils")
  5159. (license license:bsd-3))))
  5160. (define-public orfm
  5161. (package
  5162. (name "orfm")
  5163. (version "0.7.1")
  5164. (source (origin
  5165. (method url-fetch)
  5166. (uri (string-append
  5167. "https://github.com/wwood/OrfM/releases/download/v"
  5168. version "/orfm-" version ".tar.gz"))
  5169. (sha256
  5170. (base32
  5171. "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
  5172. (build-system gnu-build-system)
  5173. (inputs `(("zlib" ,zlib)))
  5174. (native-inputs
  5175. `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
  5176. ("ruby-rspec" ,ruby-rspec)
  5177. ("ruby" ,ruby)))
  5178. (synopsis "Simple and not slow open reading frame (ORF) caller")
  5179. (description
  5180. "An ORF caller finds stretches of DNA that, when translated, are not
  5181. interrupted by stop codons. OrfM finds and prints these ORFs.")
  5182. (home-page "https://github.com/wwood/OrfM")
  5183. (license license:lgpl3+)))
  5184. (define-public python2-pbcore
  5185. (package
  5186. (name "python2-pbcore")
  5187. (version "1.2.10")
  5188. (source (origin
  5189. (method url-fetch)
  5190. (uri (pypi-uri "pbcore" version))
  5191. (sha256
  5192. (base32
  5193. "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
  5194. (build-system python-build-system)
  5195. (arguments
  5196. `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
  5197. #:phases (modify-phases %standard-phases
  5198. (add-after 'unpack 'remove-sphinx-dependency
  5199. (lambda _
  5200. ;; Sphinx is only required for documentation tests, which
  5201. ;; we do not run; furthermore it depends on python2-sphinx
  5202. ;; which is no longer maintained.
  5203. (substitute* "requirements-dev.txt"
  5204. (("^sphinx") ""))
  5205. #t)))))
  5206. (propagated-inputs
  5207. `(("python-cython" ,python2-cython)
  5208. ("python-numpy" ,python2-numpy)
  5209. ("python-pysam" ,python2-pysam)
  5210. ("python-h5py" ,python2-h5py)))
  5211. (native-inputs
  5212. `(("python-nose" ,python2-nose)
  5213. ("python-pyxb" ,python2-pyxb)))
  5214. (home-page "https://pacificbiosciences.github.io/pbcore/")
  5215. (synopsis "Library for reading and writing PacBio data files")
  5216. (description
  5217. "The pbcore package provides Python APIs for interacting with PacBio data
  5218. files and writing bioinformatics applications.")
  5219. (license license:bsd-3)))
  5220. (define-public python2-warpedlmm
  5221. (package
  5222. (name "python2-warpedlmm")
  5223. (version "0.21")
  5224. (source
  5225. (origin
  5226. (method url-fetch)
  5227. (uri (pypi-uri "WarpedLMM" version ".zip"))
  5228. (sha256
  5229. (base32
  5230. "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
  5231. (build-system python-build-system)
  5232. (arguments
  5233. `(#:python ,python-2 ; requires Python 2.7
  5234. #:tests? #f ; test data are not included
  5235. #:phases
  5236. (modify-phases %standard-phases
  5237. (add-after 'unpack 'use-weave
  5238. (lambda _
  5239. (substitute* "warpedlmm/util/linalg.py"
  5240. (("from scipy import linalg, weave")
  5241. "from scipy import linalg\nimport weave"))
  5242. #t)))))
  5243. (propagated-inputs
  5244. `(("python-scipy" ,python2-scipy)
  5245. ("python-numpy" ,python2-numpy)
  5246. ("python-matplotlib" ,python2-matplotlib)
  5247. ("python-fastlmm" ,python2-fastlmm)
  5248. ("python-pandas" ,python2-pandas)
  5249. ("python-pysnptools" ,python2-pysnptools)
  5250. ("python-weave" ,python2-weave)))
  5251. (native-inputs
  5252. `(("python-mock" ,python2-mock)
  5253. ("python-nose" ,python2-nose)
  5254. ("unzip" ,unzip)))
  5255. (home-page "https://github.com/PMBio/warpedLMM")
  5256. (synopsis "Implementation of warped linear mixed models")
  5257. (description
  5258. "WarpedLMM is a Python implementation of the warped linear mixed model,
  5259. which automatically learns an optimal warping function (or transformation) for
  5260. the phenotype as it models the data.")
  5261. (license license:asl2.0)))
  5262. (define-public pbtranscript-tofu
  5263. (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
  5264. (package
  5265. (name "pbtranscript-tofu")
  5266. (version (string-append "2.2.3." (string-take commit 7)))
  5267. (source (origin
  5268. (method git-fetch)
  5269. (uri (git-reference
  5270. (url "https://github.com/PacificBiosciences/cDNA_primer")
  5271. (commit commit)))
  5272. (file-name (string-append name "-" version "-checkout"))
  5273. (sha256
  5274. (base32
  5275. "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
  5276. (modules '((guix build utils)))
  5277. (snippet
  5278. '(begin
  5279. ;; remove bundled Cython sources
  5280. (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
  5281. #t))))
  5282. (build-system python-build-system)
  5283. (arguments
  5284. `(#:python ,python-2
  5285. ;; FIXME: Tests fail with "No such file or directory:
  5286. ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
  5287. #:tests? #f
  5288. #:phases
  5289. (modify-phases %standard-phases
  5290. (add-after 'unpack 'enter-directory
  5291. (lambda _
  5292. (chdir "pbtranscript-tofu/pbtranscript/")
  5293. #t))
  5294. ;; With setuptools version 18.0 and later this setup.py hack causes
  5295. ;; a build error, so we disable it.
  5296. (add-after 'enter-directory 'patch-setuppy
  5297. (lambda _
  5298. (substitute* "setup.py"
  5299. (("if 'setuptools.extension' in sys.modules:")
  5300. "if False:"))
  5301. #t)))))
  5302. (inputs
  5303. `(("python-numpy" ,python2-numpy)
  5304. ("python-bx-python" ,python2-bx-python)
  5305. ("python-networkx" ,python2-networkx)
  5306. ("python-scipy" ,python2-scipy)
  5307. ("python-pbcore" ,python2-pbcore)
  5308. ("python-h5py" ,python2-h5py)))
  5309. (native-inputs
  5310. `(("python-cython" ,python2-cython)
  5311. ("python-nose" ,python2-nose)))
  5312. (home-page "https://github.com/PacificBiosciences/cDNA_primer")
  5313. (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
  5314. (description
  5315. "pbtranscript-tofu contains scripts to analyze transcriptome data
  5316. generated using the PacBio Iso-Seq protocol.")
  5317. (license license:bsd-3))))
  5318. (define-public prank
  5319. (package
  5320. (name "prank")
  5321. (version "170427")
  5322. (source (origin
  5323. (method url-fetch)
  5324. (uri (string-append
  5325. "http://wasabiapp.org/download/prank/prank.source."
  5326. version ".tgz"))
  5327. (sha256
  5328. (base32
  5329. "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
  5330. (build-system gnu-build-system)
  5331. (arguments
  5332. `(#:phases
  5333. (modify-phases %standard-phases
  5334. (add-after 'unpack 'enter-src-dir
  5335. (lambda _
  5336. (chdir "src")
  5337. #t))
  5338. (add-after 'unpack 'remove-m64-flag
  5339. ;; Prank will build with the correct 'bit-ness' without this flag
  5340. ;; and this allows building on 32-bit machines.
  5341. (lambda _ (substitute* "src/Makefile"
  5342. (("-m64") ""))
  5343. #t))
  5344. (delete 'configure)
  5345. (replace 'install
  5346. (lambda* (#:key outputs #:allow-other-keys)
  5347. (let* ((out (assoc-ref outputs "out"))
  5348. (bin (string-append out "/bin"))
  5349. (man (string-append out "/share/man/man1"))
  5350. (path (string-append
  5351. (assoc-ref %build-inputs "mafft") "/bin:"
  5352. (assoc-ref %build-inputs "exonerate") "/bin:"
  5353. (assoc-ref %build-inputs "bppsuite") "/bin")))
  5354. (install-file "prank" bin)
  5355. (wrap-program (string-append bin "/prank")
  5356. `("PATH" ":" prefix (,path)))
  5357. (install-file "prank.1" man))
  5358. #t)))))
  5359. (inputs
  5360. `(("mafft" ,mafft)
  5361. ("exonerate" ,exonerate)
  5362. ("bppsuite" ,bppsuite)))
  5363. (home-page "http://wasabiapp.org/software/prank/")
  5364. (synopsis "Probabilistic multiple sequence alignment program")
  5365. (description
  5366. "PRANK is a probabilistic multiple sequence alignment program for DNA,
  5367. codon and amino-acid sequences. It is based on a novel algorithm that treats
  5368. insertions correctly and avoids over-estimation of the number of deletion
  5369. events. In addition, PRANK borrows ideas from maximum likelihood methods used
  5370. in phylogenetics and correctly takes into account the evolutionary distances
  5371. between sequences. Lastly, PRANK allows for defining a potential structure
  5372. for sequences to be aligned and then, simultaneously with the alignment,
  5373. predicts the locations of structural units in the sequences.")
  5374. (license license:gpl2+)))
  5375. (define-public proteinortho
  5376. (package
  5377. (name "proteinortho")
  5378. (version "6.0.14")
  5379. (source (origin
  5380. (method git-fetch)
  5381. (uri (git-reference
  5382. (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
  5383. (commit (string-append "v" version))))
  5384. (file-name (git-file-name name version))
  5385. (sha256
  5386. (base32
  5387. "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
  5388. (modules '((guix build utils)))
  5389. (snippet
  5390. '(begin
  5391. ;; remove pre-built scripts
  5392. (delete-file-recursively "src/BUILD/")
  5393. #t))))
  5394. (build-system gnu-build-system)
  5395. (arguments
  5396. `(#:test-target "test"
  5397. #:make-flags '("CC=gcc")
  5398. #:phases
  5399. (modify-phases %standard-phases
  5400. (replace 'configure
  5401. ;; There is no configure script, so we modify the Makefile directly.
  5402. (lambda* (#:key outputs #:allow-other-keys)
  5403. (substitute* "Makefile"
  5404. (("INSTALLDIR=.*")
  5405. (string-append
  5406. "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
  5407. #t))
  5408. (add-before 'install 'make-install-directory
  5409. ;; The install directory is not created during 'make install'.
  5410. (lambda* (#:key outputs #:allow-other-keys)
  5411. (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
  5412. #t))
  5413. (add-after 'install 'wrap-programs
  5414. (lambda* (#:key inputs outputs #:allow-other-keys)
  5415. (let ((path (getenv "PATH"))
  5416. (out (assoc-ref outputs "out"))
  5417. (guile (search-input-file inputs "bin/guile")))
  5418. (for-each (lambda (script)
  5419. (wrap-script script #:guile guile
  5420. `("PATH" ":" prefix (,path))))
  5421. (cons (string-append out "/bin/proteinortho")
  5422. (find-files out "\\.(pl|py)$"))))
  5423. #t)))))
  5424. (inputs
  5425. `(("guile" ,guile-3.0) ; for wrap-script
  5426. ("diamond" ,diamond)
  5427. ("perl" ,perl)
  5428. ("python" ,python-wrapper)
  5429. ("blast+" ,blast+)
  5430. ("lapack" ,lapack)
  5431. ("openblas" ,openblas)))
  5432. (native-inputs
  5433. `(("which" ,which)))
  5434. (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
  5435. (synopsis "Detect orthologous genes across species")
  5436. (description
  5437. "Proteinortho is a tool to detect orthologous genes across different
  5438. species. For doing so, it compares similarities of given gene sequences and
  5439. clusters them to find significant groups. The algorithm was designed to handle
  5440. large-scale data and can be applied to hundreds of species at once.")
  5441. (license license:gpl3+)))
  5442. (define-public pyicoteo
  5443. (package
  5444. (name "pyicoteo")
  5445. (version "2.0.7")
  5446. (source
  5447. (origin
  5448. (method git-fetch)
  5449. (uri (git-reference
  5450. (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
  5451. (commit (string-append "v" version))))
  5452. (file-name (git-file-name name version))
  5453. (sha256
  5454. (base32
  5455. "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
  5456. (build-system python-build-system)
  5457. (arguments
  5458. `(#:python ,python-2 ; does not work with Python 3
  5459. #:tests? #f)) ; there are no tests
  5460. (inputs
  5461. `(("python2-matplotlib" ,python2-matplotlib)))
  5462. (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
  5463. (synopsis "Analyze high-throughput genetic sequencing data")
  5464. (description
  5465. "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
  5466. sequencing data. It works with genomic coordinates. There are currently six
  5467. different command-line tools:
  5468. @enumerate
  5469. @item pyicoregion: for generating exploratory regions automatically;
  5470. @item pyicoenrich: for differential enrichment between two conditions;
  5471. @item pyicoclip: for calling CLIP-Seq peaks without a control;
  5472. @item pyicos: for genomic coordinates manipulation;
  5473. @item pyicoller: for peak calling on punctuated ChIP-Seq;
  5474. @item pyicount: to count how many reads from N experiment files overlap in a
  5475. region file;
  5476. @item pyicotrocol: to combine operations from pyicoteo.
  5477. @end enumerate\n")
  5478. (license license:gpl3+)))
  5479. (define-public prodigal
  5480. (package
  5481. (name "prodigal")
  5482. ;; Check for a new home page when updating this package:
  5483. ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
  5484. (version "2.6.3")
  5485. (source (origin
  5486. (method git-fetch)
  5487. (uri (git-reference
  5488. (url "https://github.com/hyattpd/Prodigal")
  5489. (commit (string-append "v" version))))
  5490. (file-name (git-file-name name version))
  5491. (sha256
  5492. (base32
  5493. "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
  5494. (build-system gnu-build-system)
  5495. (arguments
  5496. `(#:tests? #f ; no check target
  5497. #:make-flags (list (string-append "INSTALLDIR="
  5498. (assoc-ref %outputs "out")
  5499. "/bin"))
  5500. #:phases
  5501. (modify-phases %standard-phases
  5502. (delete 'configure))))
  5503. (home-page "https://github.com/hyattpd/Prodigal")
  5504. (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
  5505. (description
  5506. "Prodigal runs smoothly on finished genomes, draft genomes, and
  5507. metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
  5508. format. It runs quickly, in an unsupervised fashion, handles gaps, handles
  5509. partial genes, and identifies translation initiation sites.")
  5510. (license license:gpl3+)))
  5511. (define-public roary
  5512. (package
  5513. (name "roary")
  5514. (version "3.12.0")
  5515. (source
  5516. (origin
  5517. (method url-fetch)
  5518. (uri (string-append
  5519. "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
  5520. version ".tar.gz"))
  5521. (sha256
  5522. (base32
  5523. "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
  5524. (build-system perl-build-system)
  5525. (arguments
  5526. `(#:phases
  5527. (modify-phases %standard-phases
  5528. (delete 'configure)
  5529. (delete 'build)
  5530. (replace 'check
  5531. (lambda _
  5532. ;; The tests are not run by default, so we run each test file
  5533. ;; directly.
  5534. (setenv "PATH" (string-append (getcwd) "/bin" ":"
  5535. (getenv "PATH")))
  5536. (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
  5537. (getenv "PERL5LIB")))
  5538. (for-each (lambda (file)
  5539. (display file)(display "\n")
  5540. (invoke "perl" file))
  5541. (find-files "t" ".*\\.t$"))
  5542. #t))
  5543. (replace 'install
  5544. ;; There is no 'install' target in the Makefile.
  5545. (lambda* (#:key outputs #:allow-other-keys)
  5546. (let* ((out (assoc-ref outputs "out"))
  5547. (bin (string-append out "/bin"))
  5548. (perl (string-append out "/lib/perl5/site_perl"))
  5549. (roary-plots "contrib/roary_plots"))
  5550. (mkdir-p bin)
  5551. (mkdir-p perl)
  5552. (copy-recursively "bin" bin)
  5553. (copy-recursively "lib" perl)
  5554. #t)))
  5555. (add-after 'install 'wrap-programs
  5556. (lambda* (#:key inputs outputs #:allow-other-keys)
  5557. (let* ((out (assoc-ref outputs "out"))
  5558. (perl5lib (getenv "PERL5LIB"))
  5559. (path (getenv "PATH")))
  5560. (for-each (lambda (prog)
  5561. (let ((binary (string-append out "/" prog)))
  5562. (wrap-program binary
  5563. `("PERL5LIB" ":" prefix
  5564. (,(string-append perl5lib ":" out
  5565. "/lib/perl5/site_perl"))))
  5566. (wrap-program binary
  5567. `("PATH" ":" prefix
  5568. (,(string-append path ":" out "/bin"))))))
  5569. (find-files "bin" ".*[^R]$"))
  5570. (let ((file
  5571. (string-append out "/bin/roary-create_pan_genome_plots.R"))
  5572. (r-site-lib (getenv "R_LIBS_SITE"))
  5573. (coreutils-path
  5574. (dirname (search-input-file inputs "bin/chmod"))))
  5575. (wrap-program file
  5576. `("R_LIBS_SITE" ":" prefix
  5577. (,(string-append r-site-lib ":" out "/site-library/"))))
  5578. (wrap-program file
  5579. `("PATH" ":" prefix
  5580. (,(string-append coreutils-path ":" out "/bin"))))))
  5581. #t)))))
  5582. (native-inputs
  5583. `(("perl-env-path" ,perl-env-path)
  5584. ("perl-test-files" ,perl-test-files)
  5585. ("perl-test-most" ,perl-test-most)
  5586. ("perl-test-output" ,perl-test-output)))
  5587. (inputs
  5588. `(("perl-array-utils" ,perl-array-utils)
  5589. ("bioperl" ,bioperl-minimal)
  5590. ("perl-digest-md5-file" ,perl-digest-md5-file)
  5591. ("perl-exception-class" ,perl-exception-class)
  5592. ("perl-file-find-rule" ,perl-file-find-rule)
  5593. ("perl-file-grep" ,perl-file-grep)
  5594. ("perl-file-slurper" ,perl-file-slurper)
  5595. ("perl-file-which" ,perl-file-which)
  5596. ("perl-graph" ,perl-graph)
  5597. ("perl-graph-readwrite" ,perl-graph-readwrite)
  5598. ("perl-log-log4perl" ,perl-log-log4perl)
  5599. ("perl-moose" ,perl-moose)
  5600. ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
  5601. ("perl-text-csv" ,perl-text-csv)
  5602. ("bedtools" ,bedtools)
  5603. ("cd-hit" ,cd-hit)
  5604. ("blast+" ,blast+)
  5605. ("mcl" ,mcl)
  5606. ("parallel" ,parallel)
  5607. ("prank" ,prank)
  5608. ("mafft" ,mafft)
  5609. ("fasttree" ,fasttree)
  5610. ("grep" ,grep)
  5611. ("sed" ,sed)
  5612. ("gawk" ,gawk)
  5613. ("r-minimal" ,r-minimal)
  5614. ("r-ggplot2" ,r-ggplot2)
  5615. ("coreutils" ,coreutils)))
  5616. (home-page "https://sanger-pathogens.github.io/Roary/")
  5617. (synopsis "High speed stand-alone pan genome pipeline")
  5618. (description
  5619. "Roary is a high speed stand alone pan genome pipeline, which takes
  5620. annotated assemblies in GFF3 format (produced by the Prokka program) and
  5621. calculates the pan genome. Using a standard desktop PC, it can analyse
  5622. datasets with thousands of samples, without compromising the quality of the
  5623. results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
  5624. single processor. Roary is not intended for metagenomics or for comparing
  5625. extremely diverse sets of genomes.")
  5626. (license license:gpl3)))
  5627. (define-public raxml
  5628. (package
  5629. (name "raxml")
  5630. (version "8.2.12")
  5631. (source
  5632. (origin
  5633. (method git-fetch)
  5634. (uri (git-reference
  5635. (url "https://github.com/stamatak/standard-RAxML")
  5636. (commit (string-append "v" version))))
  5637. (file-name (git-file-name name version))
  5638. (sha256
  5639. (base32
  5640. "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
  5641. (build-system gnu-build-system)
  5642. (arguments
  5643. `(#:tests? #f ; There are no tests.
  5644. ;; Use 'standard' Makefile rather than SSE or AVX ones.
  5645. #:make-flags (list "-f" "Makefile.HYBRID.gcc")
  5646. #:phases
  5647. (modify-phases %standard-phases
  5648. (delete 'configure)
  5649. (replace 'install
  5650. (lambda* (#:key outputs #:allow-other-keys)
  5651. (let* ((out (assoc-ref outputs "out"))
  5652. (bin (string-append out "/bin"))
  5653. (executable "raxmlHPC-HYBRID"))
  5654. (install-file executable bin)
  5655. (symlink (string-append bin "/" executable) "raxml"))
  5656. #t)))))
  5657. (inputs
  5658. `(("openmpi" ,openmpi)))
  5659. (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
  5660. (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
  5661. (description
  5662. "RAxML is a tool for phylogenetic analysis and post-analysis of large
  5663. phylogenies.")
  5664. ;; The source includes x86 specific code
  5665. (supported-systems '("x86_64-linux" "i686-linux"))
  5666. (license license:gpl2+)))
  5667. (define-public rsem
  5668. (package
  5669. (name "rsem")
  5670. (version "1.3.1")
  5671. (source
  5672. (origin
  5673. (method git-fetch)
  5674. (uri (git-reference
  5675. (url "https://github.com/deweylab/RSEM")
  5676. (commit (string-append "v" version))))
  5677. (sha256
  5678. (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
  5679. (file-name (git-file-name name version))
  5680. (modules '((guix build utils)))
  5681. (snippet
  5682. '(begin
  5683. ;; remove bundled copy of boost and samtools
  5684. (delete-file-recursively "boost")
  5685. (delete-file-recursively "samtools-1.3")
  5686. #t))))
  5687. (build-system gnu-build-system)
  5688. (arguments
  5689. `(#:tests? #f ;no "check" target
  5690. #:make-flags
  5691. (list (string-append "BOOST="
  5692. (assoc-ref %build-inputs "boost")
  5693. "/include/")
  5694. (string-append "SAMHEADERS="
  5695. (assoc-ref %build-inputs "htslib")
  5696. "/include/htslib/sam.h")
  5697. (string-append "SAMLIBS="
  5698. (assoc-ref %build-inputs "htslib")
  5699. "/lib/libhts.so"))
  5700. #:phases
  5701. (modify-phases %standard-phases
  5702. ;; No "configure" script.
  5703. ;; Do not build bundled samtools library.
  5704. (replace 'configure
  5705. (lambda _
  5706. (substitute* "Makefile"
  5707. (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
  5708. (("^\\$\\(SAMLIBS\\).*") ""))
  5709. #t))
  5710. (replace 'install
  5711. (lambda* (#:key outputs #:allow-other-keys)
  5712. (let* ((out (string-append (assoc-ref outputs "out")))
  5713. (bin (string-append out "/bin/"))
  5714. (perl (string-append out "/lib/perl5/site_perl")))
  5715. (mkdir-p bin)
  5716. (mkdir-p perl)
  5717. (for-each (lambda (file)
  5718. (install-file file bin))
  5719. (find-files "." "rsem-.*"))
  5720. (install-file "rsem_perl_utils.pm" perl))
  5721. #t))
  5722. (add-after 'install 'wrap-program
  5723. (lambda* (#:key outputs #:allow-other-keys)
  5724. (let ((out (assoc-ref outputs "out")))
  5725. (for-each (lambda (prog)
  5726. (wrap-program (string-append out "/bin/" prog)
  5727. `("PERL5LIB" ":" prefix
  5728. (,(string-append out "/lib/perl5/site_perl")))))
  5729. '("rsem-calculate-expression"
  5730. "rsem-control-fdr"
  5731. "rsem-generate-data-matrix"
  5732. "rsem-generate-ngvector"
  5733. "rsem-plot-transcript-wiggles"
  5734. "rsem-prepare-reference"
  5735. "rsem-run-ebseq"
  5736. "rsem-run-prsem-testing-procedure")))
  5737. #t)))))
  5738. (inputs
  5739. `(("boost" ,boost)
  5740. ("r-minimal" ,r-minimal)
  5741. ("perl" ,perl)
  5742. ("htslib" ,htslib-1.3)
  5743. ("zlib" ,zlib)))
  5744. (home-page "http://deweylab.biostat.wisc.edu/rsem/")
  5745. (synopsis "Estimate gene expression levels from RNA-Seq data")
  5746. (description
  5747. "RSEM is a software package for estimating gene and isoform expression
  5748. levels from RNA-Seq data. The RSEM package provides a user-friendly
  5749. interface, supports threads for parallel computation of the EM algorithm,
  5750. single-end and paired-end read data, quality scores, variable-length reads and
  5751. RSPD estimation. In addition, it provides posterior mean and 95% credibility
  5752. interval estimates for expression levels. For visualization, it can generate
  5753. BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
  5754. (license license:gpl3+)))
  5755. (define-public rseqc
  5756. (package
  5757. (name "rseqc")
  5758. (version "3.0.1")
  5759. (source
  5760. (origin
  5761. (method url-fetch)
  5762. (uri
  5763. (string-append "mirror://sourceforge/rseqc/"
  5764. "RSeQC-" version ".tar.gz"))
  5765. (sha256
  5766. (base32
  5767. "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
  5768. (build-system python-build-system)
  5769. (inputs
  5770. `(("python-cython" ,python-cython)
  5771. ("python-bx-python" ,python-bx-python)
  5772. ("python-pybigwig" ,python-pybigwig)
  5773. ("python-pysam" ,python-pysam)
  5774. ("python-numpy" ,python-numpy)
  5775. ("zlib" ,zlib)))
  5776. (native-inputs
  5777. `(("python-nose" ,python-nose)))
  5778. (home-page "http://rseqc.sourceforge.net/")
  5779. (synopsis "RNA-seq quality control package")
  5780. (description
  5781. "RSeQC provides a number of modules that can comprehensively evaluate
  5782. high throughput sequence data, especially RNA-seq data. Some basic modules
  5783. inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
  5784. while RNA-seq specific modules evaluate sequencing saturation, mapped reads
  5785. distribution, coverage uniformity, strand specificity, etc.")
  5786. (license license:gpl3+)))
  5787. (define-public seek
  5788. ;; There are no release tarballs. According to the installation
  5789. ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
  5790. ;; stable release is identified by this changeset ID.
  5791. (let ((changeset "2329130")
  5792. (revision "1"))
  5793. (package
  5794. (name "seek")
  5795. (version (string-append "0-" revision "." changeset))
  5796. (source (origin
  5797. (method hg-fetch)
  5798. (uri (hg-reference
  5799. (url "https://bitbucket.org/libsleipnir/sleipnir")
  5800. (changeset changeset)))
  5801. (file-name (string-append name "-" version "-checkout"))
  5802. (sha256
  5803. (base32
  5804. "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
  5805. (build-system gnu-build-system)
  5806. (arguments
  5807. `(#:modules ((srfi srfi-1)
  5808. (guix build gnu-build-system)
  5809. (guix build utils))
  5810. #:phases
  5811. (let ((dirs '("SeekMiner"
  5812. "SeekEvaluator"
  5813. "SeekPrep"
  5814. "Distancer"
  5815. "Data2DB"
  5816. "PCL2Bin")))
  5817. (modify-phases %standard-phases
  5818. (replace 'bootstrap
  5819. (lambda _
  5820. (substitute* "gen_tools_am"
  5821. (("/usr/bin/env.*") (which "perl")))
  5822. (invoke "bash" "gen_auto")
  5823. #t))
  5824. (add-after 'build 'build-additional-tools
  5825. (lambda* (#:key make-flags #:allow-other-keys)
  5826. (for-each (lambda (dir)
  5827. (with-directory-excursion (string-append "tools/" dir)
  5828. (apply invoke "make" make-flags)))
  5829. dirs)
  5830. #t))
  5831. (add-after 'install 'install-additional-tools
  5832. (lambda* (#:key make-flags #:allow-other-keys)
  5833. (for-each (lambda (dir)
  5834. (with-directory-excursion (string-append "tools/" dir)
  5835. (apply invoke `("make" ,@make-flags "install"))))
  5836. dirs)
  5837. #t))))))
  5838. (inputs
  5839. `(("gsl" ,gsl)
  5840. ("boost" ,boost)
  5841. ("libsvm" ,libsvm)
  5842. ("readline" ,readline)
  5843. ("gengetopt" ,gengetopt)
  5844. ("log4cpp" ,log4cpp)))
  5845. (native-inputs
  5846. `(("autoconf" ,autoconf)
  5847. ("automake" ,automake)
  5848. ("perl" ,perl)))
  5849. (home-page "http://seek.princeton.edu")
  5850. (synopsis "Gene co-expression search engine")
  5851. (description
  5852. "SEEK is a computational gene co-expression search engine. SEEK provides
  5853. biologists with a way to navigate the massive human expression compendium that
  5854. now contains thousands of expression datasets. SEEK returns a robust ranking
  5855. of co-expressed genes in the biological area of interest defined by the user's
  5856. query genes. It also prioritizes thousands of expression datasets according
  5857. to the user's query of interest.")
  5858. (license license:cc-by3.0))))
  5859. (define-public samtools
  5860. (package
  5861. (name "samtools")
  5862. (version "1.12")
  5863. (source
  5864. (origin
  5865. (method url-fetch)
  5866. (uri
  5867. (string-append "mirror://sourceforge/samtools/samtools/"
  5868. version "/samtools-" version ".tar.bz2"))
  5869. (sha256
  5870. (base32
  5871. "1jrdj2idpma5ja9cg0rr73b565vdbr9wyy6zig54bidicc2pg8vd"))
  5872. (modules '((guix build utils)))
  5873. (snippet '(begin
  5874. ;; Delete bundled htslib.
  5875. (delete-file-recursively "htslib-1.12")
  5876. #t))))
  5877. (build-system gnu-build-system)
  5878. (arguments
  5879. `(#:modules ((ice-9 ftw)
  5880. (ice-9 regex)
  5881. (guix build gnu-build-system)
  5882. (guix build utils))
  5883. #:configure-flags (list "--with-ncurses")
  5884. #:phases
  5885. (modify-phases %standard-phases
  5886. (add-after 'unpack 'patch-tests
  5887. (lambda _
  5888. (substitute* "test/test.pl"
  5889. ;; The test script calls out to /bin/bash
  5890. (("/bin/bash") (which "bash")))
  5891. #t))
  5892. (add-after 'install 'install-library
  5893. (lambda* (#:key outputs #:allow-other-keys)
  5894. (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
  5895. (install-file "libbam.a" lib)
  5896. #t)))
  5897. (add-after 'install 'install-headers
  5898. (lambda* (#:key outputs #:allow-other-keys)
  5899. (let ((include (string-append (assoc-ref outputs "out")
  5900. "/include/samtools/")))
  5901. (for-each (lambda (file)
  5902. (install-file file include))
  5903. (scandir "." (lambda (name) (string-match "\\.h$" name))))
  5904. #t))))))
  5905. (native-inputs `(("pkg-config" ,pkg-config)))
  5906. (inputs
  5907. `(("htslib" ,htslib)
  5908. ("ncurses" ,ncurses)
  5909. ("perl" ,perl)
  5910. ("python" ,python)
  5911. ("zlib" ,zlib)))
  5912. (home-page "http://samtools.sourceforge.net")
  5913. (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
  5914. (description
  5915. "Samtools implements various utilities for post-processing nucleotide
  5916. sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
  5917. variant calling (in conjunction with bcftools), and a simple alignment
  5918. viewer.")
  5919. (license license:expat)))
  5920. (define-public samtools-1.10
  5921. (package (inherit samtools)
  5922. (name "samtools")
  5923. (version "1.10")
  5924. (source
  5925. (origin
  5926. (method url-fetch)
  5927. (uri
  5928. (string-append "mirror://sourceforge/samtools/samtools/"
  5929. version "/samtools-" version ".tar.bz2"))
  5930. (sha256
  5931. (base32
  5932. "119ms0dpydw8dkh3zc4yyw9zhdzgv12px4l2kayigv31bpqcb7kv"))
  5933. (modules '((guix build utils)))
  5934. (snippet '(begin
  5935. ;; Delete bundled htslib.
  5936. (delete-file-recursively "htslib-1.10")
  5937. #t))))
  5938. (inputs
  5939. `(("htslib" ,htslib-1.10)
  5940. ("ncurses" ,ncurses)
  5941. ("perl" ,perl)
  5942. ("python" ,python)
  5943. ("zlib" ,zlib)))))
  5944. (define-public samtools-0.1
  5945. ;; This is the most recent version of the 0.1 line of samtools. The input
  5946. ;; and output formats differ greatly from that used and produced by samtools
  5947. ;; 1.x and is still used in many bioinformatics pipelines.
  5948. (package (inherit samtools)
  5949. (version "0.1.19")
  5950. (source
  5951. (origin
  5952. (method url-fetch)
  5953. (uri
  5954. (string-append "mirror://sourceforge/samtools/samtools/"
  5955. version "/samtools-" version ".tar.bz2"))
  5956. (sha256
  5957. (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
  5958. (arguments
  5959. `(#:tests? #f ;no "check" target
  5960. #:make-flags
  5961. (list "LIBCURSES=-lncurses")
  5962. ,@(substitute-keyword-arguments (package-arguments samtools)
  5963. ((#:phases phases)
  5964. `(modify-phases ,phases
  5965. (replace 'install
  5966. (lambda* (#:key outputs #:allow-other-keys)
  5967. (let ((bin (string-append
  5968. (assoc-ref outputs "out") "/bin")))
  5969. (mkdir-p bin)
  5970. (install-file "samtools" bin)
  5971. #t)))
  5972. (delete 'patch-tests)
  5973. (delete 'configure))))))))
  5974. (define-public mosaik
  5975. (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
  5976. (package
  5977. (name "mosaik")
  5978. (version "2.2.30")
  5979. (source (origin
  5980. ;; There are no release tarballs nor tags.
  5981. (method git-fetch)
  5982. (uri (git-reference
  5983. (url "https://github.com/wanpinglee/MOSAIK")
  5984. (commit commit)))
  5985. (file-name (string-append name "-" version))
  5986. (sha256
  5987. (base32
  5988. "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
  5989. (build-system gnu-build-system)
  5990. (arguments
  5991. `(#:tests? #f ; no tests
  5992. #:make-flags (list "CC=gcc")
  5993. #:phases
  5994. (modify-phases %standard-phases
  5995. (replace 'configure
  5996. (lambda _ (chdir "src") #t))
  5997. (replace 'install
  5998. (lambda* (#:key outputs #:allow-other-keys)
  5999. (let ((bin (string-append (assoc-ref outputs "out")
  6000. "/bin")))
  6001. (mkdir-p bin)
  6002. (copy-recursively "../bin" bin)
  6003. #t))))))
  6004. (inputs
  6005. `(("perl" ,perl)
  6006. ("zlib:static" ,zlib "static")
  6007. ("zlib" ,zlib)))
  6008. (supported-systems '("x86_64-linux"))
  6009. (home-page "https://github.com/wanpinglee/MOSAIK")
  6010. (synopsis "Map nucleotide sequence reads to reference genomes")
  6011. (description
  6012. "MOSAIK is a program for mapping second and third-generation sequencing
  6013. reads to a reference genome. MOSAIK can align reads generated by all the
  6014. major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
  6015. Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
  6016. ;; MOSAIK is released under the GPLv2+ with the exception of third-party
  6017. ;; code released into the public domain:
  6018. ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
  6019. ;; 2. MD5 implementation - RSA Data Security, RFC 1321
  6020. (license (list license:gpl2+ license:public-domain)))))
  6021. (define-public mosaicatcher
  6022. (package
  6023. (name "mosaicatcher")
  6024. (version "0.3.1")
  6025. (source (origin
  6026. ;; There are no release tarballs nor tags.
  6027. (method git-fetch)
  6028. (uri (git-reference
  6029. (url "https://github.com/friendsofstrandseq/mosaicatcher")
  6030. (commit (string-append version "-dev"))))
  6031. (file-name (git-file-name name version))
  6032. (sha256
  6033. (base32
  6034. "1n2s5wvvj2y0vfgjkg1q11xahpbagxz7h2vf5q7qyy25s12kbzbd"))
  6035. (patches (search-patches "mosaicatcher-unbundle-htslib.patch"))))
  6036. (build-system cmake-build-system)
  6037. (arguments
  6038. `(#:tests? #false ; there are no tests
  6039. #:phases
  6040. (modify-phases %standard-phases
  6041. (add-after 'unpack 'chdir
  6042. (lambda _ (chdir "src")))
  6043. (replace 'install
  6044. (lambda* (#:key inputs outputs #:allow-other-keys)
  6045. (let* ((target (assoc-ref outputs "out"))
  6046. (bin (string-append target "/bin"))
  6047. (share (string-append target "/share/mosaicatcher")))
  6048. (install-file "mosaic" bin)
  6049. (mkdir-p share)
  6050. (copy-recursively "../R" share)))))))
  6051. (inputs
  6052. `(("boost" ,boost)
  6053. ("htslib" ,htslib)))
  6054. (home-page "https://github.com/friendsofstrandseq/mosaicatcher")
  6055. (synopsis "Count and classify Strand-seq reads")
  6056. (description
  6057. "Mosaicatcher counts Strand-seq reads and classifies strand states of
  6058. each chromosome in each cell using a Hidden Markov Model.")
  6059. (license license:expat)))
  6060. (define-public ngs-sdk
  6061. (package
  6062. (name "ngs-sdk")
  6063. (version "2.10.5")
  6064. (source (origin
  6065. (method git-fetch)
  6066. (uri (git-reference
  6067. (url "https://github.com/ncbi/ngs")
  6068. (commit version)))
  6069. (file-name (git-file-name name version))
  6070. (sha256
  6071. (base32
  6072. "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
  6073. (build-system gnu-build-system)
  6074. (arguments
  6075. `(#:parallel-build? #f ; not supported
  6076. #:tests? #f ; no "check" target
  6077. #:phases
  6078. (modify-phases %standard-phases
  6079. (replace 'configure
  6080. (lambda* (#:key outputs #:allow-other-keys)
  6081. (let ((out (assoc-ref outputs "out")))
  6082. ;; Allow 'konfigure.perl' to find 'package.prl'.
  6083. (setenv "PERL5LIB"
  6084. (string-append ".:" (getenv "PERL5LIB")))
  6085. ;; The 'configure' script doesn't recognize things like
  6086. ;; '--enable-fast-install'.
  6087. (invoke "./configure"
  6088. (string-append "--build-prefix=" (getcwd) "/build")
  6089. (string-append "--prefix=" out))
  6090. #t)))
  6091. (add-after 'unpack 'enter-dir
  6092. (lambda _ (chdir "ngs-sdk") #t)))))
  6093. (native-inputs `(("perl" ,perl)))
  6094. ;; According to the test
  6095. ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
  6096. ;; in ngs-sdk/setup/konfigure.perl
  6097. (supported-systems '("i686-linux" "x86_64-linux"))
  6098. (home-page "https://github.com/ncbi/ngs")
  6099. (synopsis "API for accessing Next Generation Sequencing data")
  6100. (description
  6101. "NGS is a domain-specific API for accessing reads, alignments and pileups
  6102. produced from Next Generation Sequencing. The API itself is independent from
  6103. any particular back-end implementation, and supports use of multiple back-ends
  6104. simultaneously.")
  6105. (license license:public-domain)))
  6106. (define-public java-ngs
  6107. (package (inherit ngs-sdk)
  6108. (name "java-ngs")
  6109. (arguments
  6110. `(,@(substitute-keyword-arguments
  6111. `(#:modules ((guix build gnu-build-system)
  6112. (guix build utils)
  6113. (srfi srfi-1)
  6114. (srfi srfi-26))
  6115. ,@(package-arguments ngs-sdk))
  6116. ((#:phases phases)
  6117. `(modify-phases ,phases
  6118. (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
  6119. (inputs
  6120. `(("jdk" ,icedtea "jdk")
  6121. ("ngs-sdk" ,ngs-sdk)))
  6122. (synopsis "Java bindings for NGS SDK")))
  6123. (define-public ncbi-vdb
  6124. (package
  6125. (name "ncbi-vdb")
  6126. (version "2.10.6")
  6127. (source (origin
  6128. (method git-fetch)
  6129. (uri (git-reference
  6130. (url "https://github.com/ncbi/ncbi-vdb")
  6131. (commit version)))
  6132. (file-name (git-file-name name version))
  6133. (sha256
  6134. (base32
  6135. "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
  6136. (build-system gnu-build-system)
  6137. (arguments
  6138. `(#:parallel-build? #f ; not supported
  6139. #:tests? #f ; no "check" target
  6140. #:make-flags '("HAVE_HDF5=1")
  6141. #:phases
  6142. (modify-phases %standard-phases
  6143. (add-after 'unpack 'make-files-writable
  6144. (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
  6145. (add-before 'configure 'set-perl-search-path
  6146. (lambda _
  6147. ;; Work around "dotless @INC" build failure.
  6148. (setenv "PERL5LIB"
  6149. (string-append (getcwd) "/setup:"
  6150. (getenv "PERL5LIB")))
  6151. #t))
  6152. ;; See https://github.com/ncbi/ncbi-vdb/issues/14
  6153. (add-after 'unpack 'patch-krypto-flags
  6154. (lambda _
  6155. (substitute* "libs/krypto/Makefile"
  6156. (("-Wa,-march=generic64\\+aes") "")
  6157. (("-Wa,-march=generic64\\+sse4") ""))
  6158. #t))
  6159. (replace 'configure
  6160. (lambda* (#:key inputs outputs #:allow-other-keys)
  6161. (let ((out (assoc-ref outputs "out")))
  6162. ;; Override include path for libmagic
  6163. (substitute* "setup/package.prl"
  6164. (("name => 'magic', Include => '/usr/include'")
  6165. (string-append "name=> 'magic', Include => '"
  6166. (assoc-ref inputs "libmagic")
  6167. "/include" "'")))
  6168. ;; Install kdf5 library (needed by sra-tools)
  6169. (substitute* "build/Makefile.install"
  6170. (("LIBRARIES_TO_INSTALL =")
  6171. "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
  6172. (substitute* "build/Makefile.env"
  6173. (("CFLAGS =" prefix)
  6174. (string-append prefix "-msse2 ")))
  6175. ;; Override search path for ngs-java
  6176. (substitute* "setup/package.prl"
  6177. (("/usr/local/ngs/ngs-java")
  6178. (assoc-ref inputs "java-ngs")))
  6179. ;; The 'configure' script doesn't recognize things like
  6180. ;; '--enable-fast-install'.
  6181. (invoke "./configure"
  6182. (string-append "--build-prefix=" (getcwd) "/build")
  6183. (string-append "--prefix=" (assoc-ref outputs "out"))
  6184. (string-append "--debug")
  6185. (string-append "--with-xml2-prefix="
  6186. (assoc-ref inputs "libxml2"))
  6187. (string-append "--with-ngs-sdk-prefix="
  6188. (assoc-ref inputs "ngs-sdk"))
  6189. (string-append "--with-hdf5-prefix="
  6190. (assoc-ref inputs "hdf5")))
  6191. #t)))
  6192. (add-after 'install 'install-interfaces
  6193. (lambda* (#:key outputs #:allow-other-keys)
  6194. ;; Install interface libraries. On i686 the interface libraries
  6195. ;; are installed to "linux/gcc/i386", so we need to use the Linux
  6196. ;; architecture name ("i386") instead of the target system prefix
  6197. ;; ("i686").
  6198. (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
  6199. (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
  6200. ,(system->linux-architecture
  6201. (or (%current-target-system)
  6202. (%current-system)))
  6203. "/rel/ilib")
  6204. (string-append (assoc-ref outputs "out")
  6205. "/ilib"))
  6206. ;; Install interface headers
  6207. (copy-recursively "interfaces"
  6208. (string-append (assoc-ref outputs "out")
  6209. "/include"))
  6210. #t))
  6211. ;; These files are needed by sra-tools.
  6212. (add-after 'install 'install-configuration-files
  6213. (lambda* (#:key outputs #:allow-other-keys)
  6214. (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
  6215. (mkdir target)
  6216. (install-file "libs/kfg/default.kfg" target)
  6217. (install-file "libs/kfg/certs.kfg" target))
  6218. #t)))))
  6219. (inputs
  6220. `(("libxml2" ,libxml2)
  6221. ("ngs-sdk" ,ngs-sdk)
  6222. ("java-ngs" ,java-ngs)
  6223. ("libmagic" ,file)
  6224. ("hdf5" ,hdf5)))
  6225. (native-inputs `(("perl" ,perl)))
  6226. ;; NCBI-VDB requires SSE capability.
  6227. (supported-systems '("i686-linux" "x86_64-linux"))
  6228. (home-page "https://github.com/ncbi/ncbi-vdb")
  6229. (synopsis "Database engine for genetic information")
  6230. (description
  6231. "The NCBI-VDB library implements a highly compressed columnar data
  6232. warehousing engine that is most often used to store genetic information.
  6233. Databases are stored in a portable image within the file system, and can be
  6234. accessed/downloaded on demand across HTTP.")
  6235. (license license:public-domain)))
  6236. (define-public plink
  6237. (package
  6238. (name "plink")
  6239. (version "1.07")
  6240. (source
  6241. (origin
  6242. (method url-fetch)
  6243. (uri (string-append
  6244. "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
  6245. version "-src.zip"))
  6246. (sha256
  6247. (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
  6248. (patches (search-patches "plink-1.07-unclobber-i.patch"
  6249. "plink-endian-detection.patch"))))
  6250. (build-system gnu-build-system)
  6251. (arguments
  6252. '(#:tests? #f ;no "check" target
  6253. #:make-flags (list (string-append "LIB_LAPACK="
  6254. (assoc-ref %build-inputs "lapack")
  6255. "/lib/liblapack.so")
  6256. "WITH_LAPACK=1"
  6257. "FORCE_DYNAMIC=1"
  6258. ;; disable phoning home
  6259. "WITH_WEBCHECK=")
  6260. #:phases
  6261. (modify-phases %standard-phases
  6262. ;; no "configure" script
  6263. (delete 'configure)
  6264. (replace 'install
  6265. (lambda* (#:key outputs #:allow-other-keys)
  6266. (let ((bin (string-append (assoc-ref outputs "out")
  6267. "/bin/")))
  6268. (install-file "plink" bin)
  6269. #t))))))
  6270. (inputs
  6271. `(("zlib" ,zlib)
  6272. ("lapack" ,lapack)))
  6273. (native-inputs
  6274. `(("unzip" ,unzip)))
  6275. (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
  6276. (synopsis "Whole genome association analysis toolset")
  6277. (description
  6278. "PLINK is a whole genome association analysis toolset, designed to
  6279. perform a range of basic, large-scale analyses in a computationally efficient
  6280. manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
  6281. so there is no support for steps prior to this (e.g. study design and
  6282. planning, generating genotype or CNV calls from raw data). Through
  6283. integration with gPLINK and Haploview, there is some support for the
  6284. subsequent visualization, annotation and storage of results.")
  6285. ;; Code is released under GPLv2, except for fisher.h, which is under
  6286. ;; LGPLv2.1+
  6287. (license (list license:gpl2 license:lgpl2.1+))))
  6288. (define-public plink-ng
  6289. (package (inherit plink)
  6290. (name "plink-ng")
  6291. (version "2.00a2.3")
  6292. (source
  6293. (origin
  6294. (method git-fetch)
  6295. (uri (git-reference
  6296. (url "https://github.com/chrchang/plink-ng")
  6297. (commit (string-append "v" version))))
  6298. (file-name (git-file-name name version))
  6299. (sha256
  6300. (base32 "1p88lz9agzjlspjhciz61qjc36cfniv4nkxszyy0njqyc5rzc0cd"))))
  6301. (build-system gnu-build-system)
  6302. (arguments
  6303. `(#:tests? #f ;no "check" target
  6304. #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
  6305. "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
  6306. "ZLIB=-lz"
  6307. "BIN=plink prettify"
  6308. (string-append "CC=" ,(cc-for-target))
  6309. (string-append "PREFIX=" (assoc-ref %outputs "out"))
  6310. "DESTDIR=")
  6311. #:phases
  6312. (modify-phases %standard-phases
  6313. (add-after 'unpack 'chdir
  6314. (lambda _ (chdir "1.9") #t))
  6315. (delete 'configure)))) ; no "configure" script
  6316. (inputs
  6317. `(("lapack" ,lapack)
  6318. ("openblas" ,openblas)
  6319. ("zlib" ,zlib)))
  6320. (home-page "https://www.cog-genomics.org/plink/")
  6321. (license license:gpl3+)))
  6322. (define-public smithlab-cpp
  6323. (let ((revision "1")
  6324. (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
  6325. (package
  6326. (name "smithlab-cpp")
  6327. (version (string-append "0." revision "." (string-take commit 7)))
  6328. (source (origin
  6329. (method git-fetch)
  6330. (uri (git-reference
  6331. (url "https://github.com/smithlabcode/smithlab_cpp")
  6332. (commit commit)))
  6333. (file-name (string-append name "-" version "-checkout"))
  6334. (sha256
  6335. (base32
  6336. "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
  6337. (build-system gnu-build-system)
  6338. (arguments
  6339. `(#:modules ((guix build gnu-build-system)
  6340. (guix build utils)
  6341. (srfi srfi-26))
  6342. #:tests? #f ;no "check" target
  6343. #:phases
  6344. (modify-phases %standard-phases
  6345. (add-after 'unpack 'use-samtools-headers
  6346. (lambda _
  6347. (substitute* '("SAM.cpp"
  6348. "SAM.hpp")
  6349. (("sam.h") "samtools/sam.h"))
  6350. #t))
  6351. (replace 'install
  6352. (lambda* (#:key outputs #:allow-other-keys)
  6353. (let* ((out (assoc-ref outputs "out"))
  6354. (lib (string-append out "/lib"))
  6355. (include (string-append out "/include/smithlab-cpp")))
  6356. (mkdir-p lib)
  6357. (mkdir-p include)
  6358. (for-each (cut install-file <> lib)
  6359. (find-files "." "\\.o$"))
  6360. (for-each (cut install-file <> include)
  6361. (find-files "." "\\.hpp$")))
  6362. #t))
  6363. (delete 'configure))))
  6364. (inputs
  6365. `(("samtools" ,samtools-0.1)
  6366. ("zlib" ,zlib)))
  6367. (home-page "https://github.com/smithlabcode/smithlab_cpp")
  6368. (synopsis "C++ helper library for functions used in Smith lab projects")
  6369. (description
  6370. "Smithlab CPP is a C++ library that includes functions used in many of
  6371. the Smith lab bioinformatics projects, such as a wrapper around Samtools data
  6372. structures, classes for genomic regions, mapped sequencing reads, etc.")
  6373. (license license:gpl3+))))
  6374. (define-public preseq
  6375. (package
  6376. (name "preseq")
  6377. (version "2.0.3")
  6378. (source (origin
  6379. (method url-fetch)
  6380. (uri (string-append "https://github.com/smithlabcode/preseq/"
  6381. "releases/download/v" version
  6382. "/preseq_v" version ".tar.bz2"))
  6383. (sha256
  6384. (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
  6385. (modules '((guix build utils)))
  6386. (snippet '(begin
  6387. ;; Remove bundled samtools.
  6388. (delete-file-recursively "samtools")
  6389. #t))))
  6390. (build-system gnu-build-system)
  6391. (arguments
  6392. `(#:tests? #f ;no "check" target
  6393. #:phases
  6394. (modify-phases %standard-phases
  6395. (delete 'configure))
  6396. #:make-flags
  6397. (list (string-append "PREFIX="
  6398. (assoc-ref %outputs "out"))
  6399. (string-append "LIBBAM="
  6400. (assoc-ref %build-inputs "samtools")
  6401. "/lib/libbam.a")
  6402. (string-append "SMITHLAB_CPP="
  6403. (assoc-ref %build-inputs "smithlab-cpp")
  6404. "/lib")
  6405. "PROGS=preseq"
  6406. "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
  6407. (inputs
  6408. `(("gsl" ,gsl)
  6409. ("samtools" ,samtools-0.1)
  6410. ("smithlab-cpp" ,smithlab-cpp)
  6411. ("zlib" ,zlib)))
  6412. (home-page "http://smithlabresearch.org/software/preseq/")
  6413. (synopsis "Program for analyzing library complexity")
  6414. (description
  6415. "The preseq package is aimed at predicting and estimating the complexity
  6416. of a genomic sequencing library, equivalent to predicting and estimating the
  6417. number of redundant reads from a given sequencing depth and how many will be
  6418. expected from additional sequencing using an initial sequencing experiment.
  6419. The estimates can then be used to examine the utility of further sequencing,
  6420. optimize the sequencing depth, or to screen multiple libraries to avoid low
  6421. complexity samples.")
  6422. (license license:gpl3+)))
  6423. (define-public python-screed
  6424. (package
  6425. (name "python-screed")
  6426. (version "1.0")
  6427. (source
  6428. (origin
  6429. (method url-fetch)
  6430. (uri (pypi-uri "screed" version))
  6431. (sha256
  6432. (base32
  6433. "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
  6434. (build-system python-build-system)
  6435. (native-inputs
  6436. `(("python-pytest" ,python-pytest)
  6437. ("python-pytest-cov" ,python-pytest-cov)
  6438. ("python-pytest-runner" ,python-pytest-runner)))
  6439. (inputs
  6440. `(("python-bz2file" ,python-bz2file)))
  6441. (home-page "https://github.com/dib-lab/screed/")
  6442. (synopsis "Short read sequence database utilities")
  6443. (description "Screed parses FASTA and FASTQ files and generates databases.
  6444. Values such as sequence name, sequence description, sequence quality and the
  6445. sequence itself can be retrieved from these databases.")
  6446. (license license:bsd-3)))
  6447. (define-public python2-screed
  6448. (package-with-python2 python-screed))
  6449. (define-public sra-tools
  6450. (package
  6451. (name "sra-tools")
  6452. (version "2.10.6")
  6453. (source
  6454. (origin
  6455. (method git-fetch)
  6456. (uri (git-reference
  6457. (url "https://github.com/ncbi/sra-tools")
  6458. (commit version)))
  6459. (file-name (git-file-name name version))
  6460. (sha256
  6461. (base32
  6462. "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
  6463. (build-system gnu-build-system)
  6464. (arguments
  6465. `(#:parallel-build? #f ; not supported
  6466. #:tests? #f ; no "check" target
  6467. #:make-flags
  6468. (list (string-append "DEFAULT_CRT="
  6469. (assoc-ref %build-inputs "ncbi-vdb")
  6470. "/kfg/certs.kfg")
  6471. (string-append "DEFAULT_KFG="
  6472. (assoc-ref %build-inputs "ncbi-vdb")
  6473. "/kfg/default.kfg")
  6474. (string-append "VDB_LIBDIR="
  6475. (assoc-ref %build-inputs "ncbi-vdb")
  6476. ,(if (string-prefix? "x86_64"
  6477. (or (%current-target-system)
  6478. (%current-system)))
  6479. "/lib64"
  6480. "/lib32")))
  6481. #:phases
  6482. (modify-phases %standard-phases
  6483. (add-before 'configure 'set-perl-search-path
  6484. (lambda _
  6485. ;; Work around "dotless @INC" build failure.
  6486. (setenv "PERL5LIB"
  6487. (string-append (getcwd) "/setup:"
  6488. (getenv "PERL5LIB")))
  6489. #t))
  6490. (replace 'configure
  6491. (lambda* (#:key inputs outputs #:allow-other-keys)
  6492. ;; The build system expects a directory containing the sources and
  6493. ;; raw build output of ncbi-vdb, including files that are not
  6494. ;; installed. Since we are building against an installed version of
  6495. ;; ncbi-vdb, the following modifications are needed.
  6496. (substitute* "setup/konfigure.perl"
  6497. ;; Make the configure script look for the "ilib" directory of
  6498. ;; "ncbi-vdb" without first checking for the existence of a
  6499. ;; matching library in its "lib" directory.
  6500. (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
  6501. "my $f = File::Spec->catdir($ilibdir, $ilib);")
  6502. ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
  6503. (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
  6504. "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
  6505. ;; Dynamic linking
  6506. (substitute* "tools/copycat/Makefile"
  6507. (("smagic-static") "lmagic"))
  6508. (substitute* "tools/driver-tool/utf8proc/Makefile"
  6509. (("CC\\?=gcc") "myCC=gcc")
  6510. (("\\(CC\\)") "(myCC)"))
  6511. ;; The 'configure' script doesn't recognize things like
  6512. ;; '--enable-fast-install'.
  6513. (invoke "./configure"
  6514. (string-append "--build-prefix=" (getcwd) "/build")
  6515. (string-append "--prefix=" (assoc-ref outputs "out"))
  6516. (string-append "--debug")
  6517. (string-append "--with-fuse-prefix="
  6518. (assoc-ref inputs "fuse"))
  6519. (string-append "--with-magic-prefix="
  6520. (assoc-ref inputs "libmagic"))
  6521. ;; TODO: building with libxml2 fails with linker errors
  6522. #;
  6523. (string-append "--with-xml2-prefix="
  6524. (assoc-ref inputs "libxml2"))
  6525. (string-append "--with-ncbi-vdb-sources="
  6526. (assoc-ref inputs "ncbi-vdb"))
  6527. (string-append "--with-ncbi-vdb-build="
  6528. (assoc-ref inputs "ncbi-vdb"))
  6529. (string-append "--with-ngs-sdk-prefix="
  6530. (assoc-ref inputs "ngs-sdk"))
  6531. (string-append "--with-hdf5-prefix="
  6532. (assoc-ref inputs "hdf5")))
  6533. #t)))))
  6534. (native-inputs `(("perl" ,perl)))
  6535. (inputs
  6536. `(("ngs-sdk" ,ngs-sdk)
  6537. ("ncbi-vdb" ,ncbi-vdb)
  6538. ("libmagic" ,file)
  6539. ("fuse" ,fuse)
  6540. ("hdf5" ,hdf5-1.10)
  6541. ("zlib" ,zlib)
  6542. ("python" ,python-wrapper)))
  6543. (home-page
  6544. "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
  6545. (synopsis "Tools and libraries for reading and writing sequencing data")
  6546. (description
  6547. "The SRA Toolkit from NCBI is a collection of tools and libraries for
  6548. reading of sequencing files from the Sequence Read Archive (SRA) database and
  6549. writing files into the .sra format.")
  6550. (license license:public-domain)))
  6551. (define-public seqan
  6552. (package
  6553. (name "seqan")
  6554. (version "3.0.3")
  6555. (source (origin
  6556. (method url-fetch)
  6557. (uri (string-append "https://github.com/seqan/seqan3/releases/"
  6558. "download/" version "/seqan3-"
  6559. version "-Source.tar.xz"))
  6560. (sha256
  6561. (base32
  6562. "1h2z0cvgidhkmh5xsbw75waqbrqbbv6kkrvb0b92xfh3gqpaiz22"))))
  6563. (build-system cmake-build-system)
  6564. (arguments
  6565. `(#:phases
  6566. (modify-phases %standard-phases
  6567. (replace 'check
  6568. (lambda _
  6569. (invoke "ctest" "test" "--output-on-failure"))))))
  6570. (native-inputs
  6571. `(("bzip2" ,bzip2)
  6572. ("cereal" ,cereal)
  6573. ("zlib" ,zlib)))
  6574. (home-page "https://www.seqan.de")
  6575. (synopsis "Library for nucleotide sequence analysis")
  6576. (description
  6577. "SeqAn is a C++ library of efficient algorithms and data structures for
  6578. the analysis of sequences with the focus on biological data. It contains
  6579. algorithms and data structures for string representation and their
  6580. manipulation, online and indexed string search, efficient I/O of
  6581. bioinformatics file formats, sequence alignment, and more.")
  6582. (license license:bsd-3)))
  6583. (define-public seqan-2
  6584. (package
  6585. (inherit seqan)
  6586. (version "2.4.0")
  6587. (source (origin
  6588. (method url-fetch)
  6589. (uri (string-append "https://github.com/seqan/seqan/releases/"
  6590. "download/seqan-v" version
  6591. "/seqan-library-" version ".tar.xz"))
  6592. (sha256
  6593. (base32
  6594. "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
  6595. ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
  6596. ;; makes sense to split the outputs.
  6597. (outputs '("out" "doc"))
  6598. (build-system trivial-build-system)
  6599. (arguments
  6600. `(#:modules ((guix build utils))
  6601. #:builder
  6602. (begin
  6603. (use-modules (guix build utils))
  6604. (let ((tar (assoc-ref %build-inputs "tar"))
  6605. (xz (assoc-ref %build-inputs "xz"))
  6606. (out (assoc-ref %outputs "out"))
  6607. (doc (assoc-ref %outputs "doc")))
  6608. (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
  6609. (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
  6610. (chdir (string-append "seqan-library-" ,version))
  6611. (copy-recursively "include" (string-append out "/include"))
  6612. (copy-recursively "share" (string-append doc "/share"))
  6613. #t))))
  6614. (native-inputs
  6615. `(("source" ,source)
  6616. ("tar" ,tar)
  6617. ("xz" ,xz)))))
  6618. (define-public seqan-1
  6619. (package (inherit seqan)
  6620. (name "seqan")
  6621. (version "1.4.2")
  6622. (source (origin
  6623. (method url-fetch)
  6624. (uri (string-append "https://packages.seqan.de/seqan-library/"
  6625. "seqan-library-" version ".tar.bz2"))
  6626. (sha256
  6627. (base32
  6628. "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
  6629. ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
  6630. ;; makes sense to split the outputs.
  6631. (outputs '("out" "doc"))
  6632. (build-system trivial-build-system)
  6633. (arguments
  6634. `(#:modules ((guix build utils))
  6635. #:builder
  6636. (begin
  6637. (use-modules (guix build utils))
  6638. (let ((tar (assoc-ref %build-inputs "tar"))
  6639. (bzip (assoc-ref %build-inputs "bzip2"))
  6640. (out (assoc-ref %outputs "out"))
  6641. (doc (assoc-ref %outputs "doc")))
  6642. (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
  6643. (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
  6644. (chdir (string-append "seqan-library-" ,version))
  6645. (copy-recursively "include" (string-append out "/include"))
  6646. (copy-recursively "share" (string-append doc "/share"))
  6647. #t))))
  6648. (native-inputs
  6649. `(("source" ,source)
  6650. ("tar" ,tar)
  6651. ("bzip2" ,bzip2)))))
  6652. (define-public seqmagick
  6653. (package
  6654. (name "seqmagick")
  6655. (version "0.8.0")
  6656. (source
  6657. (origin
  6658. (method url-fetch)
  6659. (uri (pypi-uri "seqmagick" version))
  6660. (sha256
  6661. (base32
  6662. "0pf98da7i59q47gwrbx0wjk6xlvbybiwphw80w7h4ydjj0579a2b"))))
  6663. (build-system python-build-system)
  6664. (inputs
  6665. `(("python-biopython" ,python-biopython)))
  6666. (native-inputs
  6667. `(("python-nose" ,python-nose)))
  6668. (home-page "https://github.com/fhcrc/seqmagick")
  6669. (synopsis "Tools for converting and modifying sequence files")
  6670. (description
  6671. "Bioinformaticians often have to convert sequence files between formats
  6672. and do little manipulations on them, and it's not worth writing scripts for
  6673. that. Seqmagick is a utility to expose the file format conversion in
  6674. BioPython in a convenient way. Instead of having a big mess of scripts, there
  6675. is one that takes arguments.")
  6676. (license license:gpl3)))
  6677. (define-public seqtk
  6678. (package
  6679. (name "seqtk")
  6680. (version "1.3")
  6681. (source (origin
  6682. (method git-fetch)
  6683. (uri (git-reference
  6684. (url "https://github.com/lh3/seqtk")
  6685. (commit (string-append "v" version))))
  6686. (file-name (git-file-name name version))
  6687. (sha256
  6688. (base32
  6689. "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
  6690. (build-system gnu-build-system)
  6691. (arguments
  6692. `(#:phases
  6693. (modify-phases %standard-phases
  6694. (delete 'configure)
  6695. (replace 'check
  6696. ;; There are no tests, so we just run a sanity check.
  6697. (lambda _ (invoke "./seqtk" "seq") #t))
  6698. (replace 'install
  6699. (lambda* (#:key outputs #:allow-other-keys)
  6700. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  6701. (install-file "seqtk" bin)
  6702. #t))))))
  6703. (inputs
  6704. `(("zlib" ,zlib)))
  6705. (home-page "https://github.com/lh3/seqtk")
  6706. (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
  6707. (description
  6708. "Seqtk is a fast and lightweight tool for processing sequences in the
  6709. FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
  6710. optionally compressed by gzip.")
  6711. (license license:expat)))
  6712. (define-public snap-aligner
  6713. (package
  6714. (name "snap-aligner")
  6715. (version "1.0beta.18")
  6716. (source (origin
  6717. (method git-fetch)
  6718. (uri (git-reference
  6719. (url "https://github.com/amplab/snap")
  6720. (commit (string-append "v" version))))
  6721. (file-name (git-file-name name version))
  6722. (sha256
  6723. (base32
  6724. "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
  6725. (build-system gnu-build-system)
  6726. (arguments
  6727. '(#:phases
  6728. (modify-phases %standard-phases
  6729. (delete 'configure)
  6730. (replace 'check (lambda _ (invoke "./unit_tests") #t))
  6731. (replace 'install
  6732. (lambda* (#:key outputs #:allow-other-keys)
  6733. (let* ((out (assoc-ref outputs "out"))
  6734. (bin (string-append out "/bin")))
  6735. (install-file "snap-aligner" bin)
  6736. (install-file "SNAPCommand" bin)
  6737. #t))))))
  6738. (native-inputs
  6739. `(("zlib" ,zlib)))
  6740. (home-page "http://snap.cs.berkeley.edu/")
  6741. (synopsis "Short read DNA sequence aligner")
  6742. (description
  6743. "SNAP is a fast and accurate aligner for short DNA reads. It is
  6744. optimized for modern read lengths of 100 bases or higher, and takes advantage
  6745. of these reads to align data quickly through a hash-based indexing scheme.")
  6746. ;; 32-bit systems are not supported by the unpatched code.
  6747. ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
  6748. ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
  6749. ;; systems without a lot of memory cannot make good use of this program.
  6750. (supported-systems '("x86_64-linux"))
  6751. (license license:asl2.0)))
  6752. (define-public sortmerna
  6753. (package
  6754. (name "sortmerna")
  6755. (version "2.1b")
  6756. (source
  6757. (origin
  6758. (method git-fetch)
  6759. (uri (git-reference
  6760. (url "https://github.com/biocore/sortmerna")
  6761. (commit version)))
  6762. (file-name (git-file-name name version))
  6763. (sha256
  6764. (base32
  6765. "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
  6766. (build-system gnu-build-system)
  6767. (outputs '("out" ;for binaries
  6768. "db")) ;for sequence databases
  6769. (arguments
  6770. `(#:phases
  6771. (modify-phases %standard-phases
  6772. (replace 'install
  6773. (lambda* (#:key outputs #:allow-other-keys)
  6774. (let* ((out (assoc-ref outputs "out"))
  6775. (bin (string-append out "/bin"))
  6776. (db (assoc-ref outputs "db"))
  6777. (share
  6778. (string-append db "/share/sortmerna/rRNA_databases")))
  6779. (install-file "sortmerna" bin)
  6780. (install-file "indexdb_rna" bin)
  6781. (for-each (lambda (file)
  6782. (install-file file share))
  6783. (find-files "rRNA_databases" ".*fasta"))
  6784. #t))))))
  6785. (inputs
  6786. `(("zlib" ,zlib)))
  6787. (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
  6788. (synopsis "Biological sequence analysis tool for NGS reads")
  6789. (description
  6790. "SortMeRNA is a biological sequence analysis tool for filtering, mapping
  6791. and @acronym{OTU, operational taxonomic unit} picking of @acronym{NGS, next
  6792. generation sequencing} reads. The core algorithm is based on approximate seeds
  6793. and allows for fast and sensitive analyses of nucleotide sequences. The main
  6794. application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
  6795. ;; The source includes x86 specific code
  6796. (supported-systems '("x86_64-linux" "i686-linux"))
  6797. (license license:lgpl3)))
  6798. (define-public star
  6799. (package
  6800. (name "star")
  6801. (version "2.7.8a")
  6802. (source (origin
  6803. (method git-fetch)
  6804. (uri (git-reference
  6805. (url "https://github.com/alexdobin/STAR")
  6806. (commit version)))
  6807. (file-name (git-file-name name version))
  6808. (sha256
  6809. (base32
  6810. "0zc5biymja9zml9yizcj1h68fq9c6sxfcav8a0lbgvgsm44rvans"))
  6811. (modules '((guix build utils)))
  6812. (snippet
  6813. '(begin
  6814. (substitute* "source/Makefile"
  6815. (("/bin/rm") "rm"))
  6816. ;; Remove pre-built binaries and bundled htslib sources.
  6817. (delete-file-recursively "bin/MacOSX_x86_64")
  6818. (delete-file-recursively "bin/Linux_x86_64")
  6819. (delete-file-recursively "bin/Linux_x86_64_static")
  6820. (delete-file-recursively "source/htslib")
  6821. #t))))
  6822. (build-system gnu-build-system)
  6823. (arguments
  6824. '(#:tests? #f ;no check target
  6825. #:make-flags '("STAR")
  6826. #:phases
  6827. (modify-phases %standard-phases
  6828. (add-after 'unpack 'enter-source-dir
  6829. (lambda _ (chdir "source") #t))
  6830. (add-after 'enter-source-dir 'make-reproducible
  6831. (lambda _
  6832. (substitute* "Makefile"
  6833. (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
  6834. (string-append pre "Built with Guix" post)))
  6835. #t))
  6836. ;; See https://github.com/alexdobin/STAR/pull/562
  6837. (add-after 'enter-source-dir 'add-missing-header
  6838. (lambda _
  6839. (substitute* "SoloReadFeature_inputRecords.cpp"
  6840. (("#include \"binarySearch2.h\"" h)
  6841. (string-append h "\n#include <math.h>")))
  6842. #t))
  6843. (add-after 'enter-source-dir 'do-not-use-bundled-htslib
  6844. (lambda _
  6845. (substitute* "Makefile"
  6846. (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
  6847. _ prefix) prefix))
  6848. (substitute* '("BAMfunctions.cpp"
  6849. "signalFromBAM.h"
  6850. "bam_cat.h"
  6851. "bam_cat.c"
  6852. "STAR.cpp"
  6853. "bamRemoveDuplicates.cpp")
  6854. (("#include \"htslib/([^\"]+\\.h)\"" _ header)
  6855. (string-append "#include <" header ">")))
  6856. (substitute* "IncludeDefine.h"
  6857. (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
  6858. (string-append "<" header ">")))
  6859. #t))
  6860. (replace 'install
  6861. (lambda* (#:key outputs #:allow-other-keys)
  6862. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  6863. (install-file "STAR" bin))
  6864. #t))
  6865. (delete 'configure))))
  6866. (native-inputs
  6867. `(("xxd" ,xxd)))
  6868. (inputs
  6869. `(("htslib" ,htslib)
  6870. ("zlib" ,zlib)))
  6871. (home-page "https://github.com/alexdobin/STAR")
  6872. (synopsis "Universal RNA-seq aligner")
  6873. (description
  6874. "The Spliced Transcripts Alignment to a Reference (STAR) software is
  6875. based on a previously undescribed RNA-seq alignment algorithm that uses
  6876. sequential maximum mappable seed search in uncompressed suffix arrays followed
  6877. by seed clustering and stitching procedure. In addition to unbiased de novo
  6878. detection of canonical junctions, STAR can discover non-canonical splices and
  6879. chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
  6880. sequences.")
  6881. ;; Only 64-bit systems are supported according to the README.
  6882. (supported-systems '("x86_64-linux" "mips64el-linux"))
  6883. ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
  6884. (license license:gpl3+)))
  6885. (define-public star-for-pigx
  6886. (package
  6887. (inherit star)
  6888. (name "star")
  6889. (version "2.7.3a")
  6890. (source (origin
  6891. (method git-fetch)
  6892. (uri (git-reference
  6893. (url "https://github.com/alexdobin/STAR")
  6894. (commit version)))
  6895. (file-name (git-file-name name version))
  6896. (sha256
  6897. (base32
  6898. "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
  6899. (modules '((guix build utils)))
  6900. (snippet
  6901. '(begin
  6902. (substitute* "source/Makefile"
  6903. (("/bin/rm") "rm"))
  6904. ;; Remove pre-built binaries and bundled htslib sources.
  6905. (delete-file-recursively "bin/MacOSX_x86_64")
  6906. (delete-file-recursively "bin/Linux_x86_64")
  6907. (delete-file-recursively "bin/Linux_x86_64_static")
  6908. (delete-file-recursively "source/htslib")
  6909. #t))))))
  6910. (define-public starlong
  6911. (package (inherit star)
  6912. (name "starlong")
  6913. (arguments
  6914. (substitute-keyword-arguments (package-arguments star)
  6915. ((#:make-flags flags)
  6916. `(list "STARlong"))
  6917. ((#:phases phases)
  6918. `(modify-phases ,phases
  6919. ;; Allow extra long sequence reads.
  6920. (add-after 'unpack 'make-extra-long
  6921. (lambda _
  6922. (substitute* "source/IncludeDefine.h"
  6923. (("(#define DEF_readNameLengthMax ).*" _ match)
  6924. (string-append match "900000\n")))
  6925. #t))
  6926. (replace 'install
  6927. (lambda* (#:key outputs #:allow-other-keys)
  6928. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  6929. (install-file "STARlong" bin))
  6930. #t))))))))
  6931. (define-public subread
  6932. (package
  6933. (name "subread")
  6934. (version "1.6.0")
  6935. (source (origin
  6936. (method url-fetch)
  6937. (uri (string-append "mirror://sourceforge/subread/subread-"
  6938. version "/subread-" version "-source.tar.gz"))
  6939. (sha256
  6940. (base32
  6941. "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
  6942. (build-system gnu-build-system)
  6943. (arguments
  6944. `(#:tests? #f ;no "check" target
  6945. ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
  6946. ;; optimizations by default, so we override these flags such that x86_64
  6947. ;; flags are only added when the build target is an x86_64 system.
  6948. #:make-flags
  6949. (list (let ((system ,(or (%current-target-system)
  6950. (%current-system)))
  6951. (flags '("-ggdb" "-fomit-frame-pointer"
  6952. "-ffast-math" "-funroll-loops"
  6953. "-fmessage-length=0"
  6954. "-O9" "-Wall" "-DMAKE_FOR_EXON"
  6955. "-DMAKE_STANDALONE"
  6956. "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
  6957. (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
  6958. (if (string-prefix? "x86_64" system)
  6959. (string-append "CCFLAGS=" (string-join (append flags flags64)))
  6960. (string-append "CCFLAGS=" (string-join flags))))
  6961. "-f" "Makefile.Linux"
  6962. "CC=gcc ${CCFLAGS}")
  6963. #:phases
  6964. (modify-phases %standard-phases
  6965. (add-after 'unpack 'enter-dir
  6966. (lambda _ (chdir "src") #t))
  6967. (replace 'install
  6968. (lambda* (#:key outputs #:allow-other-keys)
  6969. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  6970. (mkdir-p bin)
  6971. (copy-recursively "../bin" bin))
  6972. #t))
  6973. ;; no "configure" script
  6974. (delete 'configure))))
  6975. (inputs `(("zlib" ,zlib)))
  6976. (home-page "http://bioinf.wehi.edu.au/subread-package/")
  6977. (synopsis "Tool kit for processing next-gen sequencing data")
  6978. (description
  6979. "The subread package contains the following tools: subread aligner, a
  6980. general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
  6981. and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
  6982. features; exactSNP: a SNP caller that discovers SNPs by testing signals
  6983. against local background noises.")
  6984. (license license:gpl3+)))
  6985. (define-public stringtie
  6986. (package
  6987. (name "stringtie")
  6988. (version "1.2.1")
  6989. (source (origin
  6990. (method url-fetch)
  6991. (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
  6992. "stringtie-" version ".tar.gz"))
  6993. (sha256
  6994. (base32
  6995. "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
  6996. (modules '((guix build utils)))
  6997. (snippet
  6998. '(begin
  6999. (delete-file-recursively "samtools-0.1.18")
  7000. #t))))
  7001. (build-system gnu-build-system)
  7002. (arguments
  7003. `(#:tests? #f ;no test suite
  7004. #:phases
  7005. (modify-phases %standard-phases
  7006. ;; no configure script
  7007. (delete 'configure)
  7008. (add-before 'build 'use-system-samtools
  7009. (lambda _
  7010. (substitute* "Makefile"
  7011. (("stringtie: \\$\\{BAM\\}/libbam\\.a")
  7012. "stringtie: "))
  7013. (substitute* '("gclib/GBam.h"
  7014. "gclib/GBam.cpp")
  7015. (("#include \"(bam|sam|kstring).h\"" _ header)
  7016. (string-append "#include <samtools/" header ".h>")))
  7017. #t))
  7018. (add-after 'unpack 'remove-duplicate-typedef
  7019. (lambda _
  7020. ;; This typedef conflicts with the typedef in
  7021. ;; glibc-2.25/include/bits/types.h
  7022. (substitute* "gclib/GThreads.h"
  7023. (("typedef long long __intmax_t;") ""))
  7024. #t))
  7025. (replace 'install
  7026. (lambda* (#:key outputs #:allow-other-keys)
  7027. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  7028. (install-file "stringtie" bin)
  7029. #t))))))
  7030. (inputs
  7031. `(("samtools" ,samtools-0.1)
  7032. ("zlib" ,zlib)))
  7033. (home-page "http://ccb.jhu.edu/software/stringtie/")
  7034. (synopsis "Transcript assembly and quantification for RNA-Seq data")
  7035. (description
  7036. "StringTie is a fast and efficient assembler of RNA-Seq sequence
  7037. alignments into potential transcripts. It uses a novel network flow algorithm
  7038. as well as an optional de novo assembly step to assemble and quantitate
  7039. full-length transcripts representing multiple splice variants for each gene
  7040. locus. Its input can include not only the alignments of raw reads used by
  7041. other transcript assemblers, but also alignments of longer sequences that have
  7042. been assembled from those reads. To identify differentially expressed genes
  7043. between experiments, StringTie's output can be processed either by the
  7044. Cuffdiff or Ballgown programs.")
  7045. (license license:artistic2.0)))
  7046. (define-public taxtastic
  7047. (package
  7048. (name "taxtastic")
  7049. (version "0.8.11")
  7050. (source (origin
  7051. ;; The Pypi version does not include tests.
  7052. (method git-fetch)
  7053. (uri (git-reference
  7054. (url "https://github.com/fhcrc/taxtastic")
  7055. (commit (string-append "v" version))))
  7056. (file-name (git-file-name name version))
  7057. (sha256
  7058. (base32
  7059. "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
  7060. (build-system python-build-system)
  7061. (arguments
  7062. `(#:phases
  7063. (modify-phases %standard-phases
  7064. (add-after 'unpack 'prepare-directory
  7065. (lambda _
  7066. ;; The git checkout must be writable for tests.
  7067. (for-each make-file-writable (find-files "."))
  7068. ;; This test fails, but the error is not caught by the test
  7069. ;; framework, so the tests fail...
  7070. (substitute* "tests/test_taxit.py"
  7071. (("self.cmd_fails\\(''\\)")
  7072. "self.cmd_fails('nothing')"))
  7073. ;; This version file is expected to be created with git describe.
  7074. (mkdir-p "taxtastic/data")
  7075. (with-output-to-file "taxtastic/data/ver"
  7076. (lambda () (display ,version)))
  7077. #t))
  7078. (add-after 'unpack 'python37-compatibility
  7079. (lambda _
  7080. (substitute* "taxtastic/utils.py"
  7081. (("import csv") "import csv, errno")
  7082. (("os.errno") "errno"))
  7083. #t))
  7084. (replace 'check
  7085. ;; Note, this fails to run with "-v" as it tries to write to a
  7086. ;; closed output stream.
  7087. (lambda _ (invoke "python" "-m" "unittest") #t)))))
  7088. (propagated-inputs
  7089. `(("python-sqlalchemy" ,python-sqlalchemy)
  7090. ("python-decorator" ,python-decorator)
  7091. ("python-biopython" ,python-biopython)
  7092. ("python-pandas" ,python-pandas)
  7093. ("python-psycopg2" ,python-psycopg2)
  7094. ("python-fastalite" ,python-fastalite)
  7095. ("python-pyyaml" ,python-pyyaml)
  7096. ("python-six" ,python-six)
  7097. ("python-jinja2" ,python-jinja2)
  7098. ("python-dendropy" ,python-dendropy)))
  7099. (home-page "https://github.com/fhcrc/taxtastic")
  7100. (synopsis "Tools for taxonomic naming and annotation")
  7101. (description
  7102. "Taxtastic is software written in python used to build and maintain
  7103. reference packages i.e. collections of reference trees, reference alignments,
  7104. profiles, and associated taxonomic information.")
  7105. (license license:gpl3+)))
  7106. (define-public vcftools
  7107. (package
  7108. (name "vcftools")
  7109. (version "0.1.16")
  7110. (source (origin
  7111. (method url-fetch)
  7112. (uri (string-append
  7113. "https://github.com/vcftools/vcftools/releases/download/v"
  7114. version "/vcftools-" version ".tar.gz"))
  7115. (sha256
  7116. (base32
  7117. "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
  7118. (build-system gnu-build-system)
  7119. (arguments
  7120. `(#:tests? #f ; no "check" target
  7121. #:make-flags (list
  7122. "CFLAGS=-O2" ; override "-m64" flag
  7123. (string-append "PREFIX=" (assoc-ref %outputs "out"))
  7124. (string-append "MANDIR=" (assoc-ref %outputs "out")
  7125. "/share/man/man1"))))
  7126. (native-inputs
  7127. `(("pkg-config" ,pkg-config)))
  7128. (inputs
  7129. `(("perl" ,perl)
  7130. ("zlib" ,zlib)))
  7131. (home-page "https://vcftools.github.io/")
  7132. (synopsis "Tools for working with VCF files")
  7133. (description
  7134. "VCFtools is a program package designed for working with VCF files, such
  7135. as those generated by the 1000 Genomes Project. The aim of VCFtools is to
  7136. provide easily accessible methods for working with complex genetic variation
  7137. data in the form of VCF files.")
  7138. ;; The license is declared as LGPLv3 in the README and
  7139. ;; at https://vcftools.github.io/license.html
  7140. (license license:lgpl3)))
  7141. (define-public infernal
  7142. (package
  7143. (name "infernal")
  7144. (version "1.1.4")
  7145. (source (origin
  7146. (method url-fetch)
  7147. (uri (string-append "http://eddylab.org/software/infernal/"
  7148. "infernal-" version ".tar.gz"))
  7149. (sha256
  7150. (base32
  7151. "1z4mgwqg1j4n5ika08ai8mg9yjyjhf4821jp83v2bgwzxrykqjgr"))))
  7152. (build-system gnu-build-system)
  7153. (native-inputs
  7154. `(("perl" ,perl)
  7155. ("python" ,python))) ; for tests
  7156. (home-page "http://eddylab.org/infernal/")
  7157. (synopsis "Inference of RNA alignments")
  7158. (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
  7159. searching DNA sequence databases for RNA structure and sequence similarities.
  7160. It is an implementation of a special case of profile stochastic context-free
  7161. grammars called @dfn{covariance models} (CMs). A CM is like a sequence
  7162. profile, but it scores a combination of sequence consensus and RNA secondary
  7163. structure consensus, so in many cases, it is more capable of identifying RNA
  7164. homologs that conserve their secondary structure more than their primary
  7165. sequence.")
  7166. ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
  7167. (supported-systems '("i686-linux" "x86_64-linux"))
  7168. (license license:bsd-3)))
  7169. (define-public r-snapatac
  7170. (package
  7171. (name "r-snapatac")
  7172. (version "2.0")
  7173. (source
  7174. (origin
  7175. (method git-fetch)
  7176. (uri (git-reference
  7177. (url "https://github.com/r3fang/SnapATAC")
  7178. (commit (string-append "v" version))))
  7179. (file-name (git-file-name name version))
  7180. (sha256
  7181. (base32 "037jzlbl436fi7lkpq7d83i2vd1crnrik3vac2x6xj75dbikb2av"))))
  7182. (properties `((upstream-name . "SnapATAC")))
  7183. (build-system r-build-system)
  7184. (propagated-inputs
  7185. `(("r-bigmemory" ,r-bigmemory)
  7186. ("r-doparallel" ,r-doparallel)
  7187. ("r-dosnow" ,r-dosnow)
  7188. ("r-edger" ,r-edger)
  7189. ("r-foreach" ,r-foreach)
  7190. ("r-genomicranges" ,r-genomicranges)
  7191. ("r-igraph" ,r-igraph)
  7192. ("r-iranges" ,r-iranges)
  7193. ("r-irlba" ,r-irlba)
  7194. ("r-matrix" ,r-matrix)
  7195. ("r-plyr" ,r-plyr)
  7196. ("r-plot3d" ,r-plot3d)
  7197. ("r-rann" ,r-rann)
  7198. ("r-raster" ,r-raster)
  7199. ("r-rcolorbrewer" ,r-rcolorbrewer)
  7200. ("r-rhdf5" ,r-rhdf5)
  7201. ("r-rtsne" ,r-rtsne)
  7202. ("r-scales" ,r-scales)
  7203. ("r-viridis" ,r-viridis)))
  7204. (home-page "https://github.com/r3fang/SnapATAC")
  7205. (synopsis "Single nucleus analysis package for ATAC-Seq")
  7206. (description
  7207. "This package provides a fast and accurate analysis toolkit for single
  7208. cell ATAC-seq (Assay for transposase-accessible chromatin using sequencing).
  7209. Single cell ATAC-seq can resolve the heterogeneity of a complex tissue and
  7210. reveal cell-type specific regulatory landscapes. However, the exceeding data
  7211. sparsity has posed unique challenges for the data analysis. This package
  7212. @code{r-snapatac} is an end-to-end bioinformatics pipeline for analyzing large-
  7213. scale single cell ATAC-seq data which includes quality control, normalization,
  7214. clustering analysis, differential analysis, motif inference and exploration of
  7215. single cell ATAC-seq sequencing data.")
  7216. (license license:gpl3)))
  7217. (define-public r-shinycell
  7218. (let ((commit
  7219. "aecbd56e66802f28e397f5ae1f19403aadd12163")
  7220. (revision "1"))
  7221. (package
  7222. (name "r-shinycell")
  7223. (version (git-version "2.0.0" revision commit))
  7224. (source
  7225. (origin
  7226. (method git-fetch)
  7227. (uri (git-reference
  7228. (url "https://github.com/SGDDNB/ShinyCell")
  7229. (commit commit)))
  7230. (file-name (git-file-name name version))
  7231. (sha256
  7232. (base32
  7233. "13jn2ikmvljnzayk485g1mmq5abcp9m1b8n1djdb1agmn83zaki5"))))
  7234. (properties `((upstream-name . "ShinyCell")))
  7235. (build-system r-build-system)
  7236. (propagated-inputs
  7237. `(("r-data-table" ,r-data-table)
  7238. ("r-ggplot2" ,r-ggplot2)
  7239. ("r-glue" ,r-glue)
  7240. ("r-gridextra" ,r-gridextra)
  7241. ("r-hdf5r" ,r-hdf5r)
  7242. ("r-matrix" ,r-matrix)
  7243. ("r-r-utils" ,r-r-utils)
  7244. ("r-rcolorbrewer" ,r-rcolorbrewer)
  7245. ("r-readr" ,r-readr)
  7246. ("r-reticulate" ,r-reticulate)))
  7247. (home-page "https://github.com/SGDDNB/ShinyCell")
  7248. (synopsis "Shiny interactive web apps for single-cell data")
  7249. (description
  7250. "This package provides Shiny apps for interactive exploration of
  7251. single-cell data.")
  7252. (license license:gpl3))))
  7253. (define-public r-archr
  7254. (let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1")
  7255. (revision "1"))
  7256. (package
  7257. (name "r-archr")
  7258. (version (git-version "1.0.0" revision commit))
  7259. (source
  7260. (origin
  7261. (method git-fetch)
  7262. (uri (git-reference
  7263. (url "https://github.com/GreenleafLab/ArchR")
  7264. (commit commit)))
  7265. (file-name (git-file-name name version))
  7266. (sha256
  7267. (base32
  7268. "1zj3sdfhgn2q2256fmz61a92vw1wylyck632d7842d6knd0v92v8"))))
  7269. (properties `((upstream-name . "ArchR")))
  7270. (build-system r-build-system)
  7271. (propagated-inputs
  7272. `(("r-biocgenerics" ,r-biocgenerics)
  7273. ("r-biostrings" ,r-biostrings)
  7274. ("r-chromvar" ,r-chromvar)
  7275. ("r-complexheatmap" ,r-complexheatmap)
  7276. ("r-data-table" ,r-data-table)
  7277. ("r-genomicranges" ,r-genomicranges)
  7278. ("r-ggplot2" ,r-ggplot2)
  7279. ("r-ggrepel" ,r-ggrepel)
  7280. ("r-gridextra" ,r-gridextra)
  7281. ("r-gtable" ,r-gtable)
  7282. ("r-gtools" ,r-gtools)
  7283. ("r-magrittr" ,r-magrittr)
  7284. ("r-matrix" ,r-matrix)
  7285. ("r-matrixstats" ,r-matrixstats)
  7286. ("r-motifmatchr" ,r-motifmatchr)
  7287. ("r-nabor" ,r-nabor)
  7288. ("r-plyr" ,r-plyr)
  7289. ("r-rcpp" ,r-rcpp)
  7290. ("r-rhdf5" ,r-rhdf5)
  7291. ("r-rsamtools" ,r-rsamtools)
  7292. ("r-s4vectors" ,r-s4vectors)
  7293. ("r-stringr" ,r-stringr)
  7294. ("r-summarizedexperiment" ,r-summarizedexperiment)
  7295. ("r-uwot" ,r-uwot)))
  7296. (home-page "https://github.com/GreenleafLab/ArchR")
  7297. (synopsis "Analyze single-cell regulatory chromatin in R")
  7298. (description
  7299. "This package is designed to streamline scATAC analyses in R.")
  7300. (license license:gpl2+))))
  7301. (define-public r-scde
  7302. (package
  7303. (name "r-scde")
  7304. (version "1.99.2")
  7305. (source (origin
  7306. (method git-fetch)
  7307. (uri (git-reference
  7308. (url "https://github.com/hms-dbmi/scde")
  7309. (commit version)))
  7310. (file-name (git-file-name name version))
  7311. (sha256
  7312. (base32
  7313. "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
  7314. (build-system r-build-system)
  7315. (propagated-inputs
  7316. `(("r-rcpp" ,r-rcpp)
  7317. ("r-rcpparmadillo" ,r-rcpparmadillo)
  7318. ("r-mgcv" ,r-mgcv)
  7319. ("r-rook" ,r-rook)
  7320. ("r-rjson" ,r-rjson)
  7321. ("r-cairo" ,r-cairo)
  7322. ("r-rcolorbrewer" ,r-rcolorbrewer)
  7323. ("r-edger" ,r-edger)
  7324. ("r-quantreg" ,r-quantreg)
  7325. ("r-nnet" ,r-nnet)
  7326. ("r-rmtstat" ,r-rmtstat)
  7327. ("r-extremes" ,r-extremes)
  7328. ("r-pcamethods" ,r-pcamethods)
  7329. ("r-biocparallel" ,r-biocparallel)
  7330. ("r-flexmix" ,r-flexmix)))
  7331. (home-page "https://hms-dbmi.github.io/scde/")
  7332. (synopsis "R package for analyzing single-cell RNA-seq data")
  7333. (description "The SCDE package implements a set of statistical methods for
  7334. analyzing single-cell RNA-seq data. SCDE fits individual error models for
  7335. single-cell RNA-seq measurements. These models can then be used for
  7336. assessment of differential expression between groups of cells, as well as
  7337. other types of analysis. The SCDE package also contains the pagoda framework
  7338. which applies pathway and gene set overdispersion analysis to identify aspects
  7339. of transcriptional heterogeneity among single cells.")
  7340. ;; See https://github.com/hms-dbmi/scde/issues/38
  7341. (license license:gpl2)))
  7342. (define-public r-centipede
  7343. (package
  7344. (name "r-centipede")
  7345. (version "1.2")
  7346. (source (origin
  7347. (method url-fetch)
  7348. (uri (string-append "http://download.r-forge.r-project.org/"
  7349. "src/contrib/CENTIPEDE_" version ".tar.gz"))
  7350. (sha256
  7351. (base32
  7352. "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
  7353. (build-system r-build-system)
  7354. (home-page "http://centipede.uchicago.edu/")
  7355. (synopsis "Predict transcription factor binding sites")
  7356. (description
  7357. "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
  7358. of the genome that are bound by particular transcription factors. It starts
  7359. by identifying a set of candidate binding sites, and then aims to classify the
  7360. sites according to whether each site is bound or not bound by a transcription
  7361. factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
  7362. between two different types of motif instances using as much relevant
  7363. information as possible.")
  7364. (license (list license:gpl2+ license:gpl3+))))
  7365. (define-public r-demultiplex
  7366. (let ((commit "6e2a1422c8e6f418cfb271997eebc91f9195f299")
  7367. (revision "1"))
  7368. (package
  7369. (name "r-demultiplex")
  7370. (version (git-version "1.0.2" revision commit))
  7371. (source
  7372. (origin
  7373. (method git-fetch)
  7374. (uri (git-reference
  7375. (url "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
  7376. (commit commit)))
  7377. (file-name (git-file-name name version))
  7378. (sha256
  7379. (base32
  7380. "01kv88wp8vdaq07sjk0d3d1cb553mq1xqg0war81pgmg63bgi38w"))))
  7381. (properties `((upstream-name . "deMULTIplex")))
  7382. (build-system r-build-system)
  7383. (propagated-inputs
  7384. `(("r-kernsmooth" ,r-kernsmooth)
  7385. ("r-reshape2" ,r-reshape2)
  7386. ("r-rtsne" ,r-rtsne)
  7387. ("r-shortread" ,r-shortread)
  7388. ("r-stringdist" ,r-stringdist)))
  7389. (home-page "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
  7390. (synopsis "MULTI-seq pre-processing and classification tools")
  7391. (description
  7392. "deMULTIplex is an R package for analyzing single-cell RNA sequencing
  7393. data generated with the MULTI-seq sample multiplexing method. The package
  7394. includes software to
  7395. @enumerate
  7396. @item Convert raw MULTI-seq sample barcode library FASTQs into a sample
  7397. barcode UMI count matrix, and
  7398. @item Classify cell barcodes into sample barcode groups.
  7399. @end enumerate
  7400. ")
  7401. (license license:cc0))))
  7402. (define-public vsearch
  7403. (package
  7404. (name "vsearch")
  7405. (version "2.9.1")
  7406. (source
  7407. (origin
  7408. (method git-fetch)
  7409. (uri (git-reference
  7410. (url "https://github.com/torognes/vsearch")
  7411. (commit (string-append "v" version))))
  7412. (file-name (git-file-name name version))
  7413. (sha256
  7414. (base32
  7415. "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
  7416. (patches (search-patches "vsearch-unbundle-cityhash.patch"))
  7417. (snippet
  7418. '(begin
  7419. ;; Remove bundled cityhash sources. The vsearch source is adjusted
  7420. ;; for this in the patch.
  7421. (delete-file "src/city.h")
  7422. (delete-file "src/citycrc.h")
  7423. (delete-file "src/city.cc")
  7424. #t))))
  7425. (build-system gnu-build-system)
  7426. (inputs
  7427. `(("zlib" ,zlib)
  7428. ("bzip2" ,bzip2)
  7429. ("cityhash" ,cityhash)))
  7430. (native-inputs
  7431. `(("autoconf" ,autoconf)
  7432. ("automake" ,automake)))
  7433. (synopsis "Sequence search tools for metagenomics")
  7434. (description
  7435. "VSEARCH supports DNA sequence searching, clustering, chimera detection,
  7436. dereplication, pairwise alignment, shuffling, subsampling, sorting and
  7437. masking. The tool takes advantage of parallelism in the form of SIMD
  7438. vectorization as well as multiple threads to perform accurate alignments at
  7439. high speed. VSEARCH uses an optimal global aligner (full dynamic programming
  7440. Needleman-Wunsch).")
  7441. (home-page "https://github.com/torognes/vsearch")
  7442. ;; vsearch uses non-portable SSE intrinsics so building fails on other
  7443. ;; platforms.
  7444. (supported-systems '("x86_64-linux"))
  7445. ;; Dual licensed; also includes public domain source.
  7446. (license (list license:gpl3 license:bsd-2))))
  7447. (define-public pardre
  7448. (package
  7449. (name "pardre")
  7450. ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
  7451. (version "1.1.5-1")
  7452. (source
  7453. (origin
  7454. (method url-fetch)
  7455. (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
  7456. "1.1.5" ".tar.gz"))
  7457. (sha256
  7458. (base32
  7459. "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
  7460. (build-system gnu-build-system)
  7461. (arguments
  7462. `(#:tests? #f ; no tests included
  7463. #:phases
  7464. (modify-phases %standard-phases
  7465. (delete 'configure)
  7466. (replace 'install
  7467. (lambda* (#:key outputs #:allow-other-keys)
  7468. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  7469. (install-file "ParDRe" bin)
  7470. #t))))))
  7471. (inputs
  7472. `(("openmpi" ,openmpi)
  7473. ("zlib" ,zlib)))
  7474. (synopsis "Parallel tool to remove duplicate DNA reads")
  7475. (description
  7476. "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
  7477. Duplicate reads can be seen as identical or nearly identical sequences with
  7478. some mismatches. This tool lets users avoid the analysis of unnecessary
  7479. reads, reducing the time of subsequent procedures with the
  7480. dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
  7481. in order to exploit the parallel capabilities of multicore clusters. It is
  7482. faster than multithreaded counterparts (end of 2015) for the same number of
  7483. cores and, thanks to the message-passing technology, it can be executed on
  7484. clusters.")
  7485. (home-page "https://sourceforge.net/projects/pardre/")
  7486. (license license:gpl3+)))
  7487. (define-public ngshmmalign
  7488. (package
  7489. (name "ngshmmalign")
  7490. (version "0.1.1")
  7491. (source
  7492. (origin
  7493. (method url-fetch)
  7494. (uri (string-append "https://github.com/cbg-ethz/ngshmmalign/"
  7495. "releases/download/" version
  7496. "/ngshmmalign-" version ".tar.bz2"))
  7497. (sha256
  7498. (base32
  7499. "0jryvlssi2r2ii1dxnx39yk6bh4yqgq010fnxrgfgbaj3ykijlzv"))))
  7500. (build-system cmake-build-system)
  7501. (arguments '(#:tests? #false)) ; there are none
  7502. (inputs
  7503. `(("boost" ,boost)))
  7504. (home-page "https://github.com/cbg-ethz/ngshmmalign/")
  7505. (synopsis "Profile HMM aligner for NGS reads")
  7506. (description
  7507. "ngshmmalign is a profile HMM aligner for NGS reads designed particularly
  7508. for small genomes (such as those of RNA viruses like HIV-1 and HCV) that
  7509. experience substantial biological insertions and deletions.")
  7510. (license license:gpl2+)))
  7511. (define-public prinseq
  7512. (package
  7513. (name "prinseq")
  7514. (version "0.20.4")
  7515. (source
  7516. (origin
  7517. (method url-fetch)
  7518. (uri (string-append "mirror://sourceforge/prinseq/standalone/"
  7519. "prinseq-lite-" version ".tar.gz"))
  7520. (sha256
  7521. (base32
  7522. "0vxmzvmm67whxrqdaaamwgjk7cf0fzfs5s673jgg00kz7g70splv"))))
  7523. (build-system gnu-build-system)
  7524. (arguments
  7525. `(#:tests? #false ; no check target
  7526. #:phases
  7527. (modify-phases %standard-phases
  7528. (delete 'configure)
  7529. (delete 'build)
  7530. (replace 'install
  7531. (lambda* (#:key inputs outputs #:allow-other-keys)
  7532. (let* ((out (assoc-ref outputs "out"))
  7533. (bin (string-append out "/bin"))
  7534. (scripts (find-files "." "prinseq.*.pl"))
  7535. (guile (search-input-file "bin/guile")))
  7536. (substitute* scripts
  7537. (("\"perl -pe")
  7538. (string-append "\"" (which "perl") " -pe")))
  7539. (for-each (lambda (file)
  7540. (chmod file #o555)
  7541. (install-file file bin)
  7542. (wrap-script (string-append bin "/" (basename file))
  7543. #:guile guile
  7544. `("PERL5LIB" ":" prefix
  7545. (,(getenv "PERL5LIB")))))
  7546. scripts)))))))
  7547. (inputs
  7548. `(("guile" ,guile-3.0) ; for wrapper scripts
  7549. ("perl" ,perl)
  7550. ("perl-cairo" ,perl-cairo)
  7551. ("perl-data-dumper" ,perl-data-dumper)
  7552. ("perl-digest-md5" ,perl-digest-md5)
  7553. ("perl-getopt-long" ,perl-getopt-long)
  7554. ("perl-json" ,perl-json)
  7555. ("perl-statistics-pca" ,perl-statistics-pca)))
  7556. (home-page "http://prinseq.sourceforge.net/")
  7557. (synopsis "Preprocess sequence data in FASTA or FASTQ formats")
  7558. (description
  7559. "PRINSEQ is a bioinformatics tool to help you preprocess your genomic or
  7560. metagenomic sequence data in FASTA or FASTQ formats. The tool is written in
  7561. Perl and can be helpful if you want to filter, reformat, or trim your sequence
  7562. data. It also generates basic statistics for your sequences.")
  7563. (license license:gpl3+)))
  7564. (define-public shorah
  7565. (package
  7566. (name "shorah")
  7567. (version "1.99.2")
  7568. (source
  7569. (origin
  7570. (method url-fetch)
  7571. (uri (string-append "https://github.com/cbg-ethz/shorah"
  7572. "/releases/download/v" version
  7573. "/shorah-" version ".tar.xz"))
  7574. (sha256
  7575. (base32
  7576. "158dir9qcqspknlnyfr9zwk41x48nrh5wcg10k2grh9cidp9daiq"))))
  7577. (build-system gnu-build-system)
  7578. (arguments
  7579. `(#:phases
  7580. (modify-phases %standard-phases
  7581. (add-after 'unpack 'fix-test-wrapper
  7582. (lambda* (#:key outputs #:allow-other-keys)
  7583. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  7584. (substitute* "examples/run_end2end_test"
  7585. (("\\$\\{interpreter\\} ../\\$\\{testscript\\}")
  7586. (string-append bin "/${testscript}"))))))
  7587. (delete 'check)
  7588. (add-after 'install 'wrap-programs
  7589. (lambda* (#:key outputs #:allow-other-keys)
  7590. (let* ((out (assoc-ref outputs "out"))
  7591. (site (string-append
  7592. out "/lib/python"
  7593. ,(version-major+minor
  7594. (package-version python))
  7595. "/site-packages"))
  7596. (pythonpath (getenv "GUIX_PYTHONPATH"))
  7597. (script (string-append out "/bin/shorah")))
  7598. (chmod script #o555)
  7599. (wrap-program script `("PYTHONPATH" ":" prefix (,site ,pythonpath))))))
  7600. (add-after 'wrap-programs 'check
  7601. (lambda* (#:key tests? #:allow-other-keys)
  7602. (when tests?
  7603. (invoke "make" "check")))))))
  7604. (inputs
  7605. `(("boost" ,boost)
  7606. ("htslib" ,htslib)
  7607. ("python" ,python)
  7608. ("python-biopython" ,python-biopython)
  7609. ("python-numpy" ,python-numpy)
  7610. ("zlib" ,zlib)))
  7611. (native-inputs
  7612. `(("pkg-config" ,pkg-config)))
  7613. (home-page "")
  7614. (synopsis "Short reads assembly into haplotypes")
  7615. (description
  7616. "ShoRAH is a project for the analysis of next generation sequencing data.
  7617. It is designed to analyse genetically heterogeneous samples. Its tools
  7618. provide error correction, haplotype reconstruction and estimation of the
  7619. frequency of the different genetic variants present in a mixed sample.")
  7620. (license license:gpl3+)))
  7621. (define-public ruby-bio-kseq
  7622. (package
  7623. (name "ruby-bio-kseq")
  7624. (version "0.0.2")
  7625. (source
  7626. (origin
  7627. (method url-fetch)
  7628. (uri (rubygems-uri "bio-kseq" version))
  7629. (sha256
  7630. (base32
  7631. "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
  7632. (build-system ruby-build-system)
  7633. (arguments
  7634. `(#:test-target "spec"))
  7635. (native-inputs
  7636. `(("bundler" ,bundler)
  7637. ("ruby-rspec" ,ruby-rspec)
  7638. ("ruby-rake-compiler" ,ruby-rake-compiler)))
  7639. (inputs
  7640. `(("zlib" ,zlib)))
  7641. (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
  7642. (description
  7643. "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
  7644. FASTQ parsing code. It provides a fast iterator over sequences and their
  7645. quality scores.")
  7646. (home-page "https://github.com/gusevfe/bio-kseq")
  7647. (license license:expat)))
  7648. (define-public bio-locus
  7649. (package
  7650. (name "bio-locus")
  7651. (version "0.0.7")
  7652. (source
  7653. (origin
  7654. (method url-fetch)
  7655. (uri (rubygems-uri "bio-locus" version))
  7656. (sha256
  7657. (base32
  7658. "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
  7659. (build-system ruby-build-system)
  7660. (native-inputs
  7661. `(("ruby-rspec" ,ruby-rspec)))
  7662. (synopsis "Tool for fast querying of genome locations")
  7663. (description
  7664. "Bio-locus is a tabix-like tool for fast querying of genome
  7665. locations. Many file formats in bioinformatics contain records that
  7666. start with a chromosome name and a position for a SNP, or a start-end
  7667. position for indels. Bio-locus allows users to store this chr+pos or
  7668. chr+pos+alt information in a database.")
  7669. (home-page "https://github.com/pjotrp/bio-locus")
  7670. (license license:expat)))
  7671. (define-public bio-blastxmlparser
  7672. (package
  7673. (name "bio-blastxmlparser")
  7674. (version "2.0.4")
  7675. (source (origin
  7676. (method url-fetch)
  7677. (uri (rubygems-uri "bio-blastxmlparser" version))
  7678. (sha256
  7679. (base32
  7680. "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
  7681. (build-system ruby-build-system)
  7682. (propagated-inputs
  7683. `(("ruby-bio-logger" ,ruby-bio-logger)
  7684. ("ruby-nokogiri" ,ruby-nokogiri)))
  7685. (inputs
  7686. `(("ruby-rspec" ,ruby-rspec)))
  7687. (synopsis "Fast big data BLAST XML parser and library")
  7688. (description
  7689. "Very fast parallel big-data BLAST XML file parser which can be used as
  7690. command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
  7691. generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
  7692. (home-page "https://github.com/pjotrp/blastxmlparser")
  7693. (license license:expat)))
  7694. (define-public bioruby
  7695. (package
  7696. (name "bioruby")
  7697. (version "1.5.2")
  7698. (source
  7699. (origin
  7700. (method url-fetch)
  7701. (uri (rubygems-uri "bio" version))
  7702. (sha256
  7703. (base32
  7704. "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
  7705. (build-system ruby-build-system)
  7706. (propagated-inputs
  7707. `(("ruby-libxml" ,ruby-libxml)))
  7708. (native-inputs
  7709. `(("which" ,which))) ; required for test phase
  7710. (arguments
  7711. `(#:phases
  7712. (modify-phases %standard-phases
  7713. (add-before 'build 'patch-test-command
  7714. (lambda _
  7715. (substitute* '("test/functional/bio/test_command.rb")
  7716. (("/bin/sh") (which "sh")))
  7717. (substitute* '("test/functional/bio/test_command.rb")
  7718. (("/bin/ls") (which "ls")))
  7719. (substitute* '("test/functional/bio/test_command.rb")
  7720. (("which") (which "which")))
  7721. (substitute* '("test/functional/bio/test_command.rb",
  7722. "test/data/command/echoarg2.sh")
  7723. (("/bin/echo") (which "echo")))
  7724. #t)))))
  7725. (synopsis "Ruby library, shell and utilities for bioinformatics")
  7726. (description "BioRuby comes with a comprehensive set of Ruby development
  7727. tools and libraries for bioinformatics and molecular biology. BioRuby has
  7728. components for sequence analysis, pathway analysis, protein modelling and
  7729. phylogenetic analysis; it supports many widely used data formats and provides
  7730. easy access to databases, external programs and public web services, including
  7731. BLAST, KEGG, GenBank, MEDLINE and GO.")
  7732. (home-page "http://bioruby.org/")
  7733. ;; Code is released under Ruby license, except for setup
  7734. ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
  7735. (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
  7736. (define-public bio-vcf
  7737. (package
  7738. (name "bio-vcf")
  7739. (version "0.9.5")
  7740. (source
  7741. (origin
  7742. (method url-fetch)
  7743. (uri (rubygems-uri "bio-vcf" version))
  7744. (sha256
  7745. (base32
  7746. "1glw5pn9s8z13spxk6yyfqaz80n9lga67f33w35nkpq9dwi2vg6g"))))
  7747. (build-system ruby-build-system)
  7748. (native-inputs
  7749. `(("ruby-cucumber" ,ruby-cucumber)))
  7750. (synopsis "Smart VCF parser DSL")
  7751. (description
  7752. "Bio-vcf provides a @acronym{DSL, domain specific language} for processing
  7753. the VCF format. Record named fields can be queried with regular expressions.
  7754. Bio-vcf is a new generation VCF parser, filter and converter. Bio-vcf is not
  7755. only very fast for genome-wide (WGS) data, it also comes with a filtering,
  7756. evaluation and rewrite language and can output any type of textual data,
  7757. including VCF header and contents in RDF and JSON.")
  7758. (home-page "https://github.com/vcflib/bio-vcf")
  7759. (license license:expat)))
  7760. (define-public r-phantompeakqualtools
  7761. (let ((commit "8d2b2d18c686d894ef5908b37da7adf72a07ef42")
  7762. (revision "1"))
  7763. (package
  7764. (name "r-phantompeakqualtools")
  7765. (version (git-version "1.2.2" revision commit))
  7766. (source
  7767. (origin
  7768. (method git-fetch)
  7769. (uri (git-reference
  7770. (url "https://github.com/kundajelab/phantompeakqualtools")
  7771. (commit commit)))
  7772. (file-name (git-file-name name version))
  7773. (sha256
  7774. (base32
  7775. "00anrvcwsp02d98qhj1xpj85644h2pp4kfzq6dgbmwmdr6jvy7p4"))))
  7776. (build-system gnu-build-system)
  7777. (arguments
  7778. `(#:tests? #f ; There are no tests.
  7779. #:phases
  7780. (modify-phases %standard-phases
  7781. (delete 'configure)
  7782. (delete 'build)
  7783. (replace 'install
  7784. (lambda* (#:key inputs outputs #:allow-other-keys)
  7785. (let ((script (string-append (assoc-ref outputs "out")
  7786. "/share/scripts")))
  7787. (install-file "run_spp.R" script)))))))
  7788. (inputs
  7789. `(("r" ,r-minimal)))
  7790. (propagated-inputs
  7791. `(("r-catools" ,r-catools)
  7792. ("r-snow" ,r-snow)
  7793. ("r-snowfall" ,r-snowfall)
  7794. ("r-bitops" ,r-bitops)
  7795. ("r-rsamtools" ,r-rsamtools)
  7796. ("r-spp" ,r-spp)
  7797. ("gawk" ,gawk)
  7798. ("samtools" ,samtools)
  7799. ("boost" ,boost)
  7800. ("gzip" ,gzip)))
  7801. (home-page "https://github.com/kundajelab/phantompeakqualtools")
  7802. (synopsis "Informative enrichment for ChIP-seq data")
  7803. (description "This package computes informative enrichment and quality
  7804. measures for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be
  7805. used to obtain robust estimates of the predominant fragment length or
  7806. characteristic tag shift values in these assays.")
  7807. (license license:bsd-3))))
  7808. (define-public r-r4rna
  7809. (package
  7810. (name "r-r4rna")
  7811. (version "0.1.4")
  7812. (source
  7813. (origin
  7814. (method url-fetch)
  7815. (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
  7816. version ".tar.gz"))
  7817. (sha256
  7818. (base32
  7819. "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
  7820. (build-system r-build-system)
  7821. (propagated-inputs
  7822. `(("r-optparse" ,r-optparse)
  7823. ("r-rcolorbrewer" ,r-rcolorbrewer)))
  7824. (home-page "https://www.e-rna.org/r-chie/index.cgi")
  7825. (synopsis "Analysis framework for RNA secondary structure")
  7826. (description
  7827. "The R4RNA package aims to be a general framework for the analysis of RNA
  7828. secondary structure and comparative analysis in R.")
  7829. (license license:gpl3+)))
  7830. (define-public rcas-web
  7831. (package
  7832. (name "rcas-web")
  7833. (version "0.1.0")
  7834. (source
  7835. (origin
  7836. (method url-fetch)
  7837. (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
  7838. "releases/download/v" version
  7839. "/rcas-web-" version ".tar.gz"))
  7840. (sha256
  7841. (base32
  7842. "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
  7843. (build-system gnu-build-system)
  7844. (arguments
  7845. `(#:phases
  7846. (modify-phases %standard-phases
  7847. (add-before 'configure 'find-RCAS
  7848. ;; The configure script can't find non-1.3.x versions of RCAS because
  7849. ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
  7850. (lambda _
  7851. (substitute* "configure"
  7852. (("1\\.3\\.4") "0.0.0"))
  7853. #t))
  7854. (add-after 'install 'wrap-executable
  7855. (lambda* (#:key inputs outputs #:allow-other-keys)
  7856. (let* ((out (assoc-ref outputs "out"))
  7857. (json (assoc-ref inputs "guile-json"))
  7858. (redis (assoc-ref inputs "guile-redis"))
  7859. (path (string-append
  7860. json "/share/guile/site/2.2:"
  7861. redis "/share/guile/site/2.2")))
  7862. (wrap-program (string-append out "/bin/rcas-web")
  7863. `("GUILE_LOAD_PATH" ":" = (,path))
  7864. `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
  7865. `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
  7866. #t)))))
  7867. (inputs
  7868. `(("r-minimal" ,r-minimal)
  7869. ("r-rcas" ,r-rcas)
  7870. ("guile" ,guile-2.2)
  7871. ("guile-json" ,guile-json-1)
  7872. ("guile-redis" ,guile2.2-redis)))
  7873. (native-inputs
  7874. `(("pkg-config" ,pkg-config)))
  7875. (home-page "https://github.com/BIMSBbioinfo/rcas-web")
  7876. (synopsis "Web interface for RNA-centric annotation system (RCAS)")
  7877. (description "This package provides a simple web interface for the
  7878. @dfn{RNA-centric annotation system} (RCAS).")
  7879. (license license:agpl3+)))
  7880. (define-public r-chipkernels
  7881. (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
  7882. (revision "1"))
  7883. (package
  7884. (name "r-chipkernels")
  7885. (version (git-version "1.1" revision commit))
  7886. (source
  7887. (origin
  7888. (method git-fetch)
  7889. (uri (git-reference
  7890. (url "https://github.com/ManuSetty/ChIPKernels")
  7891. (commit commit)))
  7892. (file-name (git-file-name name version))
  7893. (sha256
  7894. (base32
  7895. "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
  7896. (build-system r-build-system)
  7897. (propagated-inputs
  7898. `(("r-iranges" ,r-iranges)
  7899. ("r-xvector" ,r-xvector)
  7900. ("r-biostrings" ,r-biostrings)
  7901. ("r-bsgenome" ,r-bsgenome)
  7902. ("r-gtools" ,r-gtools)
  7903. ("r-genomicranges" ,r-genomicranges)
  7904. ("r-sfsmisc" ,r-sfsmisc)
  7905. ("r-kernlab" ,r-kernlab)
  7906. ("r-s4vectors" ,r-s4vectors)
  7907. ("r-biocgenerics" ,r-biocgenerics)))
  7908. (home-page "https://github.com/ManuSetty/ChIPKernels")
  7909. (synopsis "Build string kernels for DNA Sequence analysis")
  7910. (description "ChIPKernels is an R package for building different string
  7911. kernels used for DNA Sequence analysis. A dictionary of the desired kernel
  7912. must be built and this dictionary can be used for determining kernels for DNA
  7913. Sequences.")
  7914. (license license:gpl2+))))
  7915. (define-public r-seqgl
  7916. (package
  7917. (name "r-seqgl")
  7918. (version "1.1.4")
  7919. (source
  7920. (origin
  7921. (method git-fetch)
  7922. (uri (git-reference
  7923. (url "https://github.com/ManuSetty/SeqGL")
  7924. (commit version)))
  7925. (file-name (git-file-name name version))
  7926. (sha256
  7927. (base32
  7928. "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
  7929. (build-system r-build-system)
  7930. (propagated-inputs
  7931. `(("r-biostrings" ,r-biostrings)
  7932. ("r-chipkernels" ,r-chipkernels)
  7933. ("r-genomicranges" ,r-genomicranges)
  7934. ("r-spams" ,r-spams)
  7935. ("r-wgcna" ,r-wgcna)
  7936. ("r-fastcluster" ,r-fastcluster)))
  7937. (home-page "https://github.com/ManuSetty/SeqGL")
  7938. (synopsis "Group lasso for Dnase/ChIP-seq data")
  7939. (description "SeqGL is a group lasso based algorithm to extract
  7940. transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
  7941. This package presents a method which uses group lasso to discriminate between
  7942. bound and non bound genomic regions to accurately identify transcription
  7943. factors bound at the specific regions.")
  7944. (license license:gpl2+)))
  7945. (define-public emboss
  7946. (package
  7947. (name "emboss")
  7948. (version "6.5.7")
  7949. (source (origin
  7950. (method url-fetch)
  7951. (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
  7952. (version-major+minor version) ".0/"
  7953. "EMBOSS-" version ".tar.gz"))
  7954. (sha256
  7955. (base32
  7956. "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
  7957. (build-system gnu-build-system)
  7958. (arguments
  7959. `(#:configure-flags
  7960. (list (string-append "--with-hpdf="
  7961. (assoc-ref %build-inputs "libharu")))
  7962. #:phases
  7963. (modify-phases %standard-phases
  7964. (add-after 'unpack 'fix-checks
  7965. (lambda _
  7966. ;; The PNGDRIVER tests check for the presence of libgd, libpng
  7967. ;; and zlib, but assume that they are all found at the same
  7968. ;; prefix.
  7969. (substitute* "configure.in"
  7970. (("CHECK_PNGDRIVER")
  7971. "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
  7972. AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
  7973. AM_CONDITIONAL(AMPNG, true)"))
  7974. #t))
  7975. (add-after 'fix-checks 'disable-update-check
  7976. (lambda _
  7977. ;; At build time there is no connection to the Internet, so
  7978. ;; looking for updates will not work.
  7979. (substitute* "Makefile.am"
  7980. (("\\$\\(bindir\\)/embossupdate") ""))
  7981. #t))
  7982. (add-after 'disable-update-check 'autogen
  7983. (lambda _ (invoke "autoreconf" "-vif") #t)))))
  7984. (inputs
  7985. `(("perl" ,perl)
  7986. ("libpng" ,libpng)
  7987. ("gd" ,gd)
  7988. ("libx11" ,libx11)
  7989. ("libharu" ,libharu)
  7990. ("zlib" ,zlib)))
  7991. (native-inputs
  7992. `(("autoconf" ,autoconf)
  7993. ("automake" ,automake)
  7994. ("libtool" ,libtool)
  7995. ("pkg-config" ,pkg-config)))
  7996. (home-page "http://emboss.sourceforge.net")
  7997. (synopsis "Molecular biology analysis suite")
  7998. (description "EMBOSS is the \"European Molecular Biology Open Software
  7999. Suite\". EMBOSS is an analysis package specially developed for the needs of
  8000. the molecular biology (e.g. EMBnet) user community. The software
  8001. automatically copes with data in a variety of formats and even allows
  8002. transparent retrieval of sequence data from the web. It also provides a
  8003. number of libraries for the development of software in the field of molecular
  8004. biology. EMBOSS also integrates a range of currently available packages and
  8005. tools for sequence analysis into a seamless whole.")
  8006. (license license:gpl2+)))
  8007. (define-public bits
  8008. (let ((revision "1")
  8009. (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
  8010. (package
  8011. (name "bits")
  8012. ;; The version is 2.13.0 even though no release archives have been
  8013. ;; published as yet.
  8014. (version (git-version "2.13.0" revision commit))
  8015. (source (origin
  8016. (method git-fetch)
  8017. (uri (git-reference
  8018. (url "https://github.com/arq5x/bits")
  8019. (commit commit)))
  8020. (file-name (git-file-name name version))
  8021. (sha256
  8022. (base32
  8023. "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
  8024. (build-system gnu-build-system)
  8025. (arguments
  8026. `(#:tests? #f ;no tests included
  8027. #:phases
  8028. (modify-phases %standard-phases
  8029. (delete 'configure)
  8030. (add-after 'unpack 'remove-cuda
  8031. (lambda _
  8032. (substitute* "Makefile"
  8033. ((".*_cuda") "")
  8034. (("(bits_test_intersections) \\\\" _ match) match))
  8035. #t))
  8036. (replace 'install
  8037. (lambda* (#:key outputs #:allow-other-keys)
  8038. (copy-recursively
  8039. "bin" (string-append (assoc-ref outputs "out") "/bin"))
  8040. #t)))))
  8041. (inputs
  8042. `(("gsl" ,gsl)
  8043. ("zlib" ,zlib)))
  8044. (home-page "https://github.com/arq5x/bits")
  8045. (synopsis "Implementation of binary interval search algorithm")
  8046. (description "This package provides an implementation of the
  8047. BITS (Binary Interval Search) algorithm, an approach to interval set
  8048. intersection. It is especially suited for the comparison of diverse genomic
  8049. datasets and the exploration of large datasets of genome
  8050. intervals (e.g. genes, sequence alignments).")
  8051. (license license:gpl2))))
  8052. (define-public piranha
  8053. ;; There is no release tarball for the latest version. The latest commit is
  8054. ;; older than one year at the time of this writing.
  8055. (let ((revision "1")
  8056. (commit "0466d364b71117d01e4471b74c514436cc281233"))
  8057. (package
  8058. (name "piranha")
  8059. (version (git-version "1.2.1" revision commit))
  8060. (source (origin
  8061. (method git-fetch)
  8062. (uri (git-reference
  8063. (url "https://github.com/smithlabcode/piranha")
  8064. (commit commit)))
  8065. (file-name (git-file-name name version))
  8066. (sha256
  8067. (base32
  8068. "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
  8069. (build-system gnu-build-system)
  8070. (arguments
  8071. `(#:test-target "test"
  8072. #:phases
  8073. (modify-phases %standard-phases
  8074. (add-after 'unpack 'copy-smithlab-cpp
  8075. (lambda* (#:key inputs #:allow-other-keys)
  8076. (for-each (lambda (file)
  8077. (install-file file "./src/smithlab_cpp/"))
  8078. (find-files (assoc-ref inputs "smithlab-cpp")))
  8079. #t))
  8080. (add-after 'install 'install-to-store
  8081. (lambda* (#:key outputs #:allow-other-keys)
  8082. (let* ((out (assoc-ref outputs "out"))
  8083. (bin (string-append out "/bin")))
  8084. (for-each (lambda (file)
  8085. (install-file file bin))
  8086. (find-files "bin" ".*")))
  8087. #t)))
  8088. #:configure-flags
  8089. (list (string-append "--with-bam_tools_headers="
  8090. (assoc-ref %build-inputs "bamtools") "/include/bamtools")
  8091. (string-append "--with-bam_tools_library="
  8092. (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
  8093. (inputs
  8094. `(("bamtools" ,bamtools)
  8095. ("samtools" ,samtools-0.1)
  8096. ("gsl" ,gsl)
  8097. ("smithlab-cpp"
  8098. ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
  8099. (origin
  8100. (method git-fetch)
  8101. (uri (git-reference
  8102. (url "https://github.com/smithlabcode/smithlab_cpp")
  8103. (commit commit)))
  8104. (file-name (string-append "smithlab_cpp-" commit "-checkout"))
  8105. (sha256
  8106. (base32
  8107. "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
  8108. (native-inputs
  8109. `(("python" ,python-2)))
  8110. (home-page "https://github.com/smithlabcode/piranha")
  8111. (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
  8112. (description
  8113. "Piranha is a peak-caller for genomic data produced by CLIP-seq and
  8114. RIP-seq experiments. It takes input in BED or BAM format and identifies
  8115. regions of statistically significant read enrichment. Additional covariates
  8116. may optionally be provided to further inform the peak-calling process.")
  8117. (license license:gpl3+))))
  8118. (define-public pepr
  8119. (package
  8120. (name "pepr")
  8121. (version "1.0.9")
  8122. (source (origin
  8123. (method url-fetch)
  8124. (uri (pypi-uri "PePr" version))
  8125. (sha256
  8126. (base32
  8127. "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
  8128. (build-system python-build-system)
  8129. (arguments
  8130. `(#:python ,python-2 ; python2 only
  8131. #:tests? #f)) ; no tests included
  8132. (propagated-inputs
  8133. `(("python2-numpy" ,python2-numpy)
  8134. ("python2-scipy" ,python2-scipy)
  8135. ("python2-pysam" ,python2-pysam)))
  8136. (home-page "https://github.com/shawnzhangyx/PePr")
  8137. (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
  8138. (description
  8139. "PePr is a ChIP-Seq peak calling or differential binding analysis tool
  8140. that is primarily designed for data with biological replicates. It uses a
  8141. negative binomial distribution to model the read counts among the samples in
  8142. the same group, and look for consistent differences between ChIP and control
  8143. group or two ChIP groups run under different conditions.")
  8144. (license license:gpl3+)))
  8145. (define-public filevercmp
  8146. (let ((commit "1a9b779b93d0b244040274794d402106907b71b7")
  8147. (revision "1"))
  8148. (package
  8149. (name "filevercmp")
  8150. (version (git-version "0" revision commit))
  8151. (source (origin
  8152. (method git-fetch)
  8153. (uri (git-reference
  8154. (url "https://github.com/ekg/filevercmp")
  8155. (commit commit)))
  8156. (file-name (git-file-name name commit))
  8157. (sha256
  8158. (base32
  8159. "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
  8160. (build-system gnu-build-system)
  8161. (arguments
  8162. `(#:tests? #f ; There are no tests to run.
  8163. #:phases
  8164. (modify-phases %standard-phases
  8165. (delete 'configure) ; There is no configure phase.
  8166. (replace 'install
  8167. (lambda* (#:key outputs #:allow-other-keys)
  8168. (let ((out (assoc-ref outputs "out")))
  8169. (install-file "filevercmp" (string-append out "/bin"))
  8170. (install-file "filevercmp.h" (string-append out "/include"))
  8171. #t))))))
  8172. (home-page "https://github.com/ekg/filevercmp")
  8173. (synopsis "This program compares version strings")
  8174. (description "This program compares version strings. It intends to be a
  8175. replacement for strverscmp.")
  8176. (license license:gpl3+))))
  8177. (define-public multiqc
  8178. (package
  8179. (name "multiqc")
  8180. (version "1.10.1")
  8181. (source
  8182. (origin
  8183. (method url-fetch)
  8184. (uri (pypi-uri "multiqc" version))
  8185. (sha256
  8186. (base32
  8187. "0y9sgjca3bp0kk3ngry4zf4q2diyzp5bvzsx5l23nsysfbfkigm4"))))
  8188. (build-system python-build-system)
  8189. (arguments
  8190. `(#:phases
  8191. (modify-phases %standard-phases
  8192. (replace 'check
  8193. (lambda* (#:key inputs outputs tests? #:allow-other-keys)
  8194. (when tests?
  8195. (setenv "HOME" "/tmp")
  8196. (let ((here (getcwd)))
  8197. (copy-recursively (assoc-ref inputs "tests") "/tmp/tests")
  8198. ;; ModuleNotFoundError: No module named 'multiqc.modules.ccs'
  8199. (delete-file "/tmp/tests/unit_tests/test_ccs.py")
  8200. (with-directory-excursion "/tmp/tests"
  8201. (setenv "PYTHONPATH" (string-append here ":" (getenv "PYTHONPATH")))
  8202. (invoke "python" "-munittest" "discover")))))))))
  8203. (propagated-inputs
  8204. `(("python-click" ,python-click)
  8205. ("python-coloredlogs" ,python-coloredlogs)
  8206. ("python-future" ,python-future)
  8207. ("python-jinja2" ,python-jinja2)
  8208. ("python-lzstring" ,python-lzstring)
  8209. ("python-markdown" ,python-markdown)
  8210. ("python-matplotlib" ,python-matplotlib)
  8211. ("python-networkx" ,python-networkx)
  8212. ("python-numpy" ,python-numpy)
  8213. ("python-pyyaml" ,python-pyyaml)
  8214. ("python-requests" ,python-requests)
  8215. ("python-rich" ,python-rich)
  8216. ("python-simplejson" ,python-simplejson)
  8217. ("python-spectra" ,python-spectra)))
  8218. (native-inputs
  8219. `(("python-pytest" ,python-pytest)
  8220. ("tests"
  8221. ,(let ((commit "02272d48a382beb27489fcf9e6308a0407dc3c2e"))
  8222. (origin
  8223. (method git-fetch)
  8224. (uri (git-reference
  8225. (url "https://github.com/ewels/MultiQC_TestData")
  8226. (commit commit)))
  8227. (file-name (git-file-name "multiqc-test-data" commit))
  8228. (sha256
  8229. (base32
  8230. "1bha64wanrigczw4yn81din56396n61j5gqdrkslhslmskcafi91")))))))
  8231. (home-page "https://multiqc.info")
  8232. (synopsis "Aggregate bioinformatics analysis reports")
  8233. (description
  8234. "MultiQC is a tool to aggregate bioinformatics results across many
  8235. samples into a single report. It contains modules for a large number of
  8236. common bioinformatics tools.")
  8237. (license license:gpl3+)))
  8238. (define-public variant-tools
  8239. (package
  8240. (name "variant-tools")
  8241. (version "3.1.2")
  8242. (source
  8243. (origin
  8244. (method git-fetch)
  8245. (uri (git-reference
  8246. (url "https://github.com/vatlab/varianttools")
  8247. ;; There is no tag corresponding to version 3.1.2
  8248. (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
  8249. (file-name (git-file-name name version))
  8250. (sha256
  8251. (base32
  8252. "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
  8253. (build-system python-build-system)
  8254. (inputs
  8255. `(("boost" ,boost)
  8256. ("c-blosc" ,c-blosc)
  8257. ("gsl" ,gsl)
  8258. ("hdf5" ,hdf5)
  8259. ("hdf5-blosc" ,hdf5-blosc)
  8260. ("python-cython" ,python-cython)
  8261. ("zlib" ,zlib)))
  8262. (propagated-inputs
  8263. `(("python-numpy" ,python-numpy)
  8264. ("python-pycurl" ,python-pycurl)
  8265. ("python-pyzmq" ,python-pyzmq)
  8266. ("python-scipy" ,python-scipy)
  8267. ("python-tables" ,python-tables)))
  8268. (home-page "https://vatlab.github.io/vat-docs/")
  8269. (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
  8270. (description
  8271. "Variant tools is a tool for the manipulation, annotation,
  8272. selection, simulation, and analysis of variants in the context of next-gen
  8273. sequencing analysis. Unlike some other tools used for next-gen sequencing
  8274. analysis, variant tools is project based and provides a whole set of tools to
  8275. manipulate and analyze genetic variants.")
  8276. (license license:gpl3+)))
  8277. (define-public r-raremetals2
  8278. (package
  8279. (name "r-raremetals2")
  8280. (version "0.1")
  8281. (source
  8282. (origin
  8283. (method url-fetch)
  8284. (uri (string-append "http://genome.sph.umich.edu/w/images/"
  8285. "b/b7/RareMETALS2_" version ".tar.gz"))
  8286. (sha256
  8287. (base32
  8288. "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
  8289. (properties `((upstream-name . "RareMETALS2")))
  8290. (build-system r-build-system)
  8291. (propagated-inputs
  8292. `(("r-seqminer" ,r-seqminer)
  8293. ("r-mvtnorm" ,r-mvtnorm)
  8294. ("r-mass" ,r-mass)
  8295. ("r-compquadform" ,r-compquadform)
  8296. ("r-getopt" ,r-getopt)))
  8297. (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
  8298. (synopsis "Analyze gene-level association tests for binary trait")
  8299. (description
  8300. "The R package rareMETALS2 is an extension of the R package rareMETALS.
  8301. It was designed to meta-analyze gene-level association tests for binary trait.
  8302. While rareMETALS offers a near-complete solution for meta-analysis of
  8303. gene-level tests for quantitative trait, it does not offer the optimal
  8304. solution for binary trait. The package rareMETALS2 offers improved features
  8305. for analyzing gene-level association tests in meta-analyses for binary
  8306. trait.")
  8307. (license license:gpl3)))
  8308. (define-public r-dropbead
  8309. (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
  8310. (revision "2"))
  8311. (package
  8312. (name "r-dropbead")
  8313. (version (string-append "0-" revision "." (string-take commit 7)))
  8314. (source
  8315. (origin
  8316. (method git-fetch)
  8317. (uri (git-reference
  8318. (url "https://github.com/rajewsky-lab/dropbead")
  8319. (commit commit)))
  8320. (file-name (git-file-name name version))
  8321. (sha256
  8322. (base32
  8323. "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
  8324. (build-system r-build-system)
  8325. (propagated-inputs
  8326. `(("r-ggplot2" ,r-ggplot2)
  8327. ("r-rcolorbrewer" ,r-rcolorbrewer)
  8328. ("r-gridextra" ,r-gridextra)
  8329. ("r-gplots" ,r-gplots)
  8330. ("r-plyr" ,r-plyr)))
  8331. (home-page "https://github.com/rajewsky-lab/dropbead")
  8332. (synopsis "Basic exploration and analysis of Drop-seq data")
  8333. (description "This package offers a quick and straight-forward way to
  8334. explore and perform basic analysis of single cell sequencing data coming from
  8335. droplet sequencing. It has been particularly tailored for Drop-seq.")
  8336. (license license:gpl3))))
  8337. (define-public r-cellchat
  8338. (let ((commit
  8339. "21edd226ca408e4c413408f98562d71ee0b54e5d")
  8340. (revision "1"))
  8341. (package
  8342. (name "r-cellchat")
  8343. (version (git-version "1.0.0" revision commit))
  8344. (source
  8345. (origin
  8346. (method git-fetch)
  8347. (uri (git-reference
  8348. (url "https://github.com/sqjin/CellChat")
  8349. (commit commit)))
  8350. (file-name (git-file-name name version))
  8351. (sha256
  8352. (base32
  8353. "0cvzl9mi8jjznpql2gv67swnk1dndn3a2h22z5l84h7lwpwjmh53"))
  8354. (snippet
  8355. '(for-each delete-file '("src/CellChat.so"
  8356. "src/CellChat_Rcpp.o"
  8357. "src/RcppExports.o")))))
  8358. (properties `((upstream-name . "CellChat")))
  8359. (build-system r-build-system)
  8360. (propagated-inputs
  8361. `(("r-biocgenerics" ,r-biocgenerics)
  8362. ("r-circlize" ,r-circlize)
  8363. ("r-colorspace" ,r-colorspace)
  8364. ("r-complexheatmap" ,r-complexheatmap)
  8365. ("r-cowplot" ,r-cowplot)
  8366. ("r-dplyr" ,r-dplyr)
  8367. ("r-expm" ,r-expm)
  8368. ("r-fnn" ,r-fnn)
  8369. ("r-forcats" ,r-forcats)
  8370. ("r-future" ,r-future)
  8371. ("r-future-apply" ,r-future-apply)
  8372. ("r-gg-gap" ,r-gg-gap)
  8373. ("r-ggalluvial" ,r-ggalluvial)
  8374. ("r-ggplot2" ,r-ggplot2)
  8375. ("r-ggrepel" ,r-ggrepel)
  8376. ("r-igraph" ,r-igraph)
  8377. ("r-irlba" ,r-irlba)
  8378. ("r-magrittr" ,r-magrittr)
  8379. ("r-matrix" ,r-matrix)
  8380. ("r-nmf" ,r-nmf)
  8381. ("r-patchwork" ,r-patchwork)
  8382. ("r-pbapply" ,r-pbapply)
  8383. ("r-rcolorbrewer" ,r-rcolorbrewer)
  8384. ("r-rcpp" ,r-rcpp)
  8385. ("r-rcppeigen" ,r-rcppeigen)
  8386. ("r-reshape2" ,r-reshape2)
  8387. ("r-reticulate" ,r-reticulate)
  8388. ("r-rspectra" ,r-rspectra)
  8389. ("r-rtsne" ,r-rtsne)
  8390. ("r-scales" ,r-scales)
  8391. ("r-shape" ,r-shape)
  8392. ("r-sna" ,r-sna)
  8393. ("r-stringr" ,r-stringr)
  8394. ("r-svglite" ,r-svglite)))
  8395. (native-inputs `(("r-knitr" ,r-knitr)))
  8396. (home-page "https://github.com/sqjin/CellChat")
  8397. (synopsis "Analysis of cell-cell communication from single-cell transcriptomics data")
  8398. (description
  8399. "This package infers, visualizes and analyzes the cell-cell
  8400. communication networks from scRNA-seq data.")
  8401. (license license:gpl3))))
  8402. (define-public sambamba
  8403. (package
  8404. (name "sambamba")
  8405. (version "0.8.0")
  8406. (source
  8407. (origin
  8408. (method git-fetch)
  8409. (uri (git-reference
  8410. (url "https://github.com/biod/sambamba")
  8411. (commit (string-append "v" version))))
  8412. (file-name (git-file-name name version))
  8413. (sha256
  8414. (base32
  8415. "07dznzl6m8k7sw84jxw2kx6i3ymrapbmcmyh0fxz8wrybhw8fmwc"))))
  8416. (build-system gnu-build-system)
  8417. (arguments
  8418. `(#:tests? #f ; there is no test target
  8419. #:parallel-build? #f ; not supported
  8420. #:phases
  8421. (modify-phases %standard-phases
  8422. (delete 'configure)
  8423. (add-after 'unpack 'fix-ldc-version
  8424. (lambda _
  8425. (substitute* "Makefile"
  8426. ;; We use ldc2 instead of ldmd2 to compile sambamba.
  8427. (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
  8428. #t))
  8429. (add-after 'unpack 'unbundle-prerequisites
  8430. (lambda _
  8431. (substitute* "Makefile"
  8432. (("= lz4/lib/liblz4.a") "= -L-llz4")
  8433. (("ldc_version_info lz4-static") "ldc_version_info"))
  8434. #t))
  8435. (replace 'install
  8436. (lambda* (#:key outputs #:allow-other-keys)
  8437. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  8438. (mkdir-p bin)
  8439. (copy-file (string-append "bin/sambamba-" ,version)
  8440. (string-append bin "/sambamba"))
  8441. #t))))))
  8442. (native-inputs
  8443. `(("python" ,python)))
  8444. (inputs
  8445. `(("ldc" ,ldc)
  8446. ("lz4" ,lz4)
  8447. ("zlib" ,zlib)))
  8448. (home-page "https://github.com/biod/sambamba")
  8449. (synopsis "Tools for working with SAM/BAM data")
  8450. (description "Sambamba is a high performance modern robust and
  8451. fast tool (and library), written in the D programming language, for
  8452. working with SAM and BAM files. Current parallelised functionality is
  8453. an important subset of samtools functionality, including view, index,
  8454. sort, markdup, and depth.")
  8455. (license license:gpl2+)))
  8456. (define-public ritornello
  8457. (package
  8458. (name "ritornello")
  8459. (version "2.0.1")
  8460. (source (origin
  8461. (method git-fetch)
  8462. (uri (git-reference
  8463. (url "https://github.com/KlugerLab/Ritornello")
  8464. (commit (string-append "v" version))))
  8465. (file-name (git-file-name name version))
  8466. (sha256
  8467. (base32
  8468. "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
  8469. (build-system gnu-build-system)
  8470. (arguments
  8471. `(#:tests? #f ; there are no tests
  8472. #:phases
  8473. (modify-phases %standard-phases
  8474. (add-after 'unpack 'patch-samtools-references
  8475. (lambda* (#:key inputs #:allow-other-keys)
  8476. (substitute* '("src/SamStream.h"
  8477. "src/FLD.cpp")
  8478. (("<sam.h>") "<samtools/sam.h>"))
  8479. #t))
  8480. (delete 'configure)
  8481. (replace 'install
  8482. (lambda* (#:key inputs outputs #:allow-other-keys)
  8483. (let* ((out (assoc-ref outputs "out"))
  8484. (bin (string-append out "/bin/")))
  8485. (mkdir-p bin)
  8486. (install-file "bin/Ritornello" bin)
  8487. #t))))))
  8488. (inputs
  8489. `(("samtools" ,samtools-0.1)
  8490. ("fftw" ,fftw)
  8491. ("boost" ,boost)
  8492. ("zlib" ,zlib)))
  8493. (home-page "https://github.com/KlugerLab/Ritornello")
  8494. (synopsis "Control-free peak caller for ChIP-seq data")
  8495. (description "Ritornello is a ChIP-seq peak calling algorithm based on
  8496. signal processing that can accurately call binding events without the need to
  8497. do a pair total DNA input or IgG control sample. It has been tested for use
  8498. with narrow binding events such as transcription factor ChIP-seq.")
  8499. (license license:gpl3+)))
  8500. (define-public trim-galore
  8501. (package
  8502. (name "trim-galore")
  8503. (version "0.6.6")
  8504. (source
  8505. (origin
  8506. (method git-fetch)
  8507. (uri (git-reference
  8508. (url "https://github.com/FelixKrueger/TrimGalore")
  8509. (commit version)))
  8510. (file-name (git-file-name name version))
  8511. (sha256
  8512. (base32
  8513. "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs"))))
  8514. (build-system gnu-build-system)
  8515. (arguments
  8516. `(#:tests? #f ; no tests
  8517. #:phases
  8518. (modify-phases %standard-phases
  8519. (replace 'configure
  8520. (lambda _
  8521. ;; Trim Galore tries to figure out what version of Python
  8522. ;; cutadapt is using by looking at the shebang. Of course that
  8523. ;; doesn't work, because cutadapt is wrapped in a shell script.
  8524. (substitute* "trim_galore"
  8525. (("my \\$python_return.*")
  8526. "my $python_return = \"Python 3.999\";\n"))
  8527. #t))
  8528. (delete 'build)
  8529. (add-after 'unpack 'hardcode-tool-references
  8530. (lambda* (#:key inputs #:allow-other-keys)
  8531. (substitute* "trim_galore"
  8532. (("\\$path_to_cutadapt = 'cutadapt'")
  8533. (string-append "$path_to_cutadapt = '"
  8534. (assoc-ref inputs "cutadapt")
  8535. "/bin/cutadapt'"))
  8536. (("\\$compression_path = \"gzip\"")
  8537. (string-append "$compression_path = \""
  8538. (assoc-ref inputs "gzip")
  8539. "/bin/gzip\""))
  8540. (("\"gunzip")
  8541. (string-append "\""
  8542. (assoc-ref inputs "gzip")
  8543. "/bin/gunzip"))
  8544. (("\"pigz")
  8545. (string-append "\""
  8546. (assoc-ref inputs "pigz")
  8547. "/bin/pigz")))
  8548. #t))
  8549. (replace 'install
  8550. (lambda* (#:key outputs #:allow-other-keys)
  8551. (let ((bin (string-append (assoc-ref outputs "out")
  8552. "/bin")))
  8553. (mkdir-p bin)
  8554. (install-file "trim_galore" bin)
  8555. #t))))))
  8556. (inputs
  8557. `(("gzip" ,gzip)
  8558. ("perl" ,perl)
  8559. ("pigz" ,pigz)
  8560. ("cutadapt" ,cutadapt)))
  8561. (native-inputs
  8562. `(("unzip" ,unzip)))
  8563. (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
  8564. (synopsis "Wrapper around Cutadapt and FastQC")
  8565. (description "Trim Galore! is a wrapper script to automate quality and
  8566. adapter trimming as well as quality control, with some added functionality to
  8567. remove biased methylation positions for RRBS sequence files.")
  8568. (license license:gpl3+)))
  8569. (define-public gess
  8570. (package
  8571. (name "gess")
  8572. (version "1.0")
  8573. (source (origin
  8574. (method url-fetch)
  8575. (uri (string-append "http://compbio.uthscsa.edu/"
  8576. "GESS_Web/files/"
  8577. "gess-" version ".src.tar.gz"))
  8578. (sha256
  8579. (base32
  8580. "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
  8581. (build-system gnu-build-system)
  8582. (arguments
  8583. `(#:tests? #f ; no tests
  8584. #:phases
  8585. (modify-phases %standard-phases
  8586. (delete 'configure)
  8587. (delete 'build)
  8588. (replace 'install
  8589. (lambda* (#:key inputs outputs #:allow-other-keys)
  8590. (let* ((python (assoc-ref inputs "python"))
  8591. (out (assoc-ref outputs "out"))
  8592. (bin (string-append out "/bin/"))
  8593. (target (string-append
  8594. out "/lib/python"
  8595. ,(version-major+minor
  8596. (package-version python))
  8597. "/site-packages/gess/")))
  8598. (mkdir-p target)
  8599. (copy-recursively "." target)
  8600. ;; Make GESS.py executable
  8601. (chmod (string-append target "GESS.py") #o555)
  8602. ;; Add Python shebang to the top and make Matplotlib
  8603. ;; usable.
  8604. (substitute* (string-append target "GESS.py")
  8605. (("\"\"\"Description:" line)
  8606. (string-append "#!" (which "python") "
  8607. import matplotlib
  8608. matplotlib.use('Agg')
  8609. " line)))
  8610. ;; Make sure GESS has all modules in its path
  8611. (wrap-script (string-append target "GESS.py")
  8612. #:guile (search-input-file inputs "bin/guile")
  8613. `("GUIX_PYTHONPATH" ":" = (,target ,(getenv "GUIX_PYTHONPATH"))))
  8614. (mkdir-p bin)
  8615. (symlink (string-append target "GESS.py")
  8616. (string-append bin "GESS.py"))
  8617. #t))))))
  8618. (inputs
  8619. `(("python" ,python-2)
  8620. ("python2-pysam" ,python2-pysam)
  8621. ("python2-scipy" ,python2-scipy)
  8622. ("python2-numpy" ,python2-numpy)
  8623. ("python2-networkx" ,python2-networkx)
  8624. ("python2-biopython" ,python2-biopython)
  8625. ("guile" ,guile-3.0))) ; for the script wrapper
  8626. (home-page "https://compbio.uthscsa.edu/GESS_Web/")
  8627. (synopsis "Detect exon-skipping events from raw RNA-seq data")
  8628. (description
  8629. "GESS is an implementation of a novel computational method to detect de
  8630. novo exon-skipping events directly from raw RNA-seq data without the prior
  8631. knowledge of gene annotation information. GESS stands for the graph-based
  8632. exon-skipping scanner detection scheme.")
  8633. (license license:bsd-3)))
  8634. (define-public phylip
  8635. (package
  8636. (name "phylip")
  8637. (version "3.696")
  8638. (source
  8639. (origin
  8640. (method url-fetch)
  8641. (uri (string-append "http://evolution.gs.washington.edu/phylip/"
  8642. "download/phylip-" version ".tar.gz"))
  8643. (sha256
  8644. (base32
  8645. "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
  8646. (build-system gnu-build-system)
  8647. (arguments
  8648. `(#:tests? #f ; no check target
  8649. #:make-flags (list "-f" "Makefile.unx" "install")
  8650. #:parallel-build? #f ; not supported
  8651. #:phases
  8652. (modify-phases %standard-phases
  8653. (add-after 'unpack 'enter-dir
  8654. (lambda _ (chdir "src") #t))
  8655. (delete 'configure)
  8656. (replace 'install
  8657. (lambda* (#:key inputs outputs #:allow-other-keys)
  8658. (let ((target (string-append (assoc-ref outputs "out")
  8659. "/bin")))
  8660. (mkdir-p target)
  8661. (for-each (lambda (file)
  8662. (install-file file target))
  8663. (find-files "../exe" ".*")))
  8664. #t)))))
  8665. (home-page "http://evolution.genetics.washington.edu/phylip/")
  8666. (synopsis "Tools for inferring phylogenies")
  8667. (description "PHYLIP (the PHYLogeny Inference Package) is a package of
  8668. programs for inferring phylogenies (evolutionary trees).")
  8669. (license license:bsd-2)))
  8670. (define-public imp
  8671. (package
  8672. (name "imp")
  8673. (version "2.13.0")
  8674. (source
  8675. (origin
  8676. (method url-fetch)
  8677. (uri (string-append "https://integrativemodeling.org/"
  8678. version "/download/imp-" version ".tar.gz"))
  8679. (sha256
  8680. (base32
  8681. "1z1vcpwbylixk0zywngg5iw0jv083jj1bqphi817jpg3fb9fx2jj"))))
  8682. (build-system cmake-build-system)
  8683. (arguments
  8684. `( ;; CMake 3.17 or newer is required for the CMAKE_TEST_ARGUMENTS used
  8685. ;; below to have an effect.
  8686. #:cmake ,cmake
  8687. #:configure-flags
  8688. (let ((disabled-tests
  8689. '("expensive" ;exclude expensive tests
  8690. "IMP.modeller" ;fail to import its own modules
  8691. "IMP.parallel-test_sge.py" ;fail in build container
  8692. ;; The following test fails non-reproducibly on
  8693. ;; an inexact numbers assertion.
  8694. "IMP.em-medium_test_local_fitting.py")))
  8695. (list
  8696. (string-append
  8697. "-DCMAKE_CTEST_ARGUMENTS="
  8698. (string-join
  8699. (list "-L" "-tests?-" ;select only tests
  8700. "-E" (format #f "'(~a)'" (string-join disabled-tests "|")))
  8701. ";"))))))
  8702. (native-inputs
  8703. `(("python" ,python-wrapper)
  8704. ("swig" ,swig)))
  8705. (inputs
  8706. `(("boost" ,boost)
  8707. ("cgal" ,cgal)
  8708. ("gsl" ,gsl)
  8709. ("hdf5" ,hdf5)
  8710. ("fftw" ,fftw)
  8711. ("eigen" ,eigen)
  8712. ;; Enabling MPI causes the build to use all the available memory and
  8713. ;; fail (tested on a machine with 32 GiB of RAM).
  8714. ;;("mpi" ,openmpi)
  8715. ("opencv" ,opencv)))
  8716. (propagated-inputs
  8717. `(("python-numpy" ,python-numpy)
  8718. ("python-scipy" ,python-scipy)
  8719. ("python-pandas" ,python-pandas)
  8720. ("python-scikit-learn" ,python-scikit-learn)
  8721. ("python-networkx" ,python-networkx)))
  8722. (home-page "https://integrativemodeling.org")
  8723. (synopsis "Integrative modeling platform")
  8724. (description "IMP's broad goal is to contribute to a comprehensive
  8725. structural characterization of biomolecules ranging in size and complexity
  8726. from small peptides to large macromolecular assemblies, by integrating data
  8727. from diverse biochemical and biophysical experiments. IMP provides a C++ and
  8728. Python toolbox for solving complex modeling problems, and a number of
  8729. applications for tackling some common problems in a user-friendly way.")
  8730. ;; IMP is largely available under the GNU Lesser GPL; see the file
  8731. ;; COPYING.LGPL for the full text of this license. Some IMP modules are
  8732. ;; available under the GNU GPL (see the file COPYING.GPL).
  8733. (license (list license:lgpl2.1+
  8734. license:gpl3+))))
  8735. (define-public tadbit
  8736. (package
  8737. (name "tadbit")
  8738. (version "1.0.1")
  8739. (source (origin
  8740. (method git-fetch)
  8741. (uri (git-reference
  8742. (url "https://github.com/3DGenomes/TADbit")
  8743. (commit (string-append "v" version))))
  8744. (file-name (git-file-name name version))
  8745. (sha256
  8746. (base32
  8747. "0hqrlymh2a2bimcfdvlssy1x5h1lp3h1c5a7jj11hmcqczzqn3ni"))))
  8748. (build-system python-build-system)
  8749. (arguments
  8750. `(#:phases
  8751. (modify-phases %standard-phases
  8752. (add-after 'unpack 'fix-problems-with-setup.py
  8753. (lambda* (#:key outputs #:allow-other-keys)
  8754. ;; Don't attempt to install the bash completions to
  8755. ;; the home directory.
  8756. (rename-file "extras/.bash_completion"
  8757. "extras/tadbit")
  8758. (substitute* "setup.py"
  8759. (("\\(path.expanduser\\('~'\\)")
  8760. (string-append "(\""
  8761. (assoc-ref outputs "out")
  8762. "/etc/bash_completion.d\""))
  8763. (("extras/\\.bash_completion")
  8764. "extras/tadbit"))
  8765. #t))
  8766. (replace 'check
  8767. (lambda* (#:key inputs outputs #:allow-other-keys)
  8768. (add-installed-pythonpath inputs outputs)
  8769. (invoke "python3" "test/test_all.py")
  8770. #t)))))
  8771. (native-inputs
  8772. `(("glib" ,glib "bin") ;for gtester
  8773. ("pkg-config" ,pkg-config)))
  8774. (inputs
  8775. ;; TODO: add Chimera for visualization
  8776. `(("imp" ,imp)
  8777. ("mcl" ,mcl)
  8778. ("python-future" ,python-future)
  8779. ("python-h5py" ,python-h5py)
  8780. ("python-scipy" ,python-scipy)
  8781. ("python-numpy" ,python-numpy)
  8782. ("python-matplotlib" ,python-matplotlib)
  8783. ("python-pysam" ,python-pysam)))
  8784. (home-page "https://3dgenomes.github.io/TADbit/")
  8785. (synopsis "Analyze, model, and explore 3C-based data")
  8786. (description
  8787. "TADbit is a complete Python library to deal with all steps to analyze,
  8788. model, and explore 3C-based data. With TADbit the user can map FASTQ files to
  8789. obtain raw interaction binned matrices (Hi-C like matrices), normalize and
  8790. correct interaction matrices, identify and compare the so-called
  8791. @dfn{Topologically Associating Domains} (TADs), build 3D models from the
  8792. interaction matrices, and finally, extract structural properties from the
  8793. models. TADbit is complemented by TADkit for visualizing 3D models.")
  8794. (license license:gpl3+)))
  8795. (define-public kentutils
  8796. (package
  8797. (name "kentutils")
  8798. ;; 302.1.0 is out, but the only difference is the inclusion of
  8799. ;; pre-built binaries.
  8800. (version "302.0.0")
  8801. (source
  8802. (origin
  8803. (method git-fetch)
  8804. (uri (git-reference
  8805. (url "https://github.com/ENCODE-DCC/kentUtils")
  8806. (commit (string-append "v" version))))
  8807. (file-name (git-file-name name version))
  8808. (sha256
  8809. (base32
  8810. "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
  8811. (modules '((guix build utils)
  8812. (srfi srfi-26)
  8813. (ice-9 ftw)))
  8814. (snippet
  8815. '(begin
  8816. ;; Only the contents of the specified directories are free
  8817. ;; for all uses, so we remove the rest. "hg/autoSql" and
  8818. ;; "hg/autoXml" are nominally free, but they depend on a
  8819. ;; library that is built from the sources in "hg/lib",
  8820. ;; which is nonfree.
  8821. (let ((free (list "." ".."
  8822. "utils" "lib" "inc" "tagStorm"
  8823. "parasol" "htslib"))
  8824. (directory? (lambda (file)
  8825. (eq? 'directory (stat:type (stat file))))))
  8826. (for-each (lambda (file)
  8827. (and (directory? file)
  8828. (delete-file-recursively file)))
  8829. (map (cut string-append "src/" <>)
  8830. (scandir "src"
  8831. (lambda (file)
  8832. (not (member file free)))))))
  8833. ;; Only make the utils target, not the userApps target,
  8834. ;; because that requires libraries we won't build.
  8835. (substitute* "Makefile"
  8836. ((" userApps") " utils"))
  8837. ;; Only build libraries that are free.
  8838. (substitute* "src/makefile"
  8839. (("DIRS =.*") "DIRS =\n")
  8840. (("cd jkOwnLib.*") "")
  8841. ((" hgLib") "")
  8842. (("cd hg.*") ""))
  8843. (substitute* "src/utils/makefile"
  8844. ;; These tools depend on "jkhgap.a", which is part of the
  8845. ;; nonfree "src/hg/lib" directory.
  8846. (("raSqlQuery") "")
  8847. (("pslLiftSubrangeBlat") "")
  8848. ;; Do not build UCSC tools, which may require nonfree
  8849. ;; components.
  8850. (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
  8851. #t))))
  8852. (build-system gnu-build-system)
  8853. (arguments
  8854. `( ;; There is no global test target and the test target for
  8855. ;; individual tools depends on input files that are not
  8856. ;; included.
  8857. #:tests? #f
  8858. #:phases
  8859. (modify-phases %standard-phases
  8860. (add-after 'unpack 'fix-permissions
  8861. (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
  8862. (add-after 'unpack 'fix-paths
  8863. (lambda _
  8864. (substitute* "Makefile"
  8865. (("/bin/echo") (which "echo")))
  8866. #t))
  8867. (add-after 'unpack 'prepare-samtabix
  8868. (lambda* (#:key inputs #:allow-other-keys)
  8869. (copy-recursively (assoc-ref inputs "samtabix")
  8870. "samtabix")
  8871. #t))
  8872. (delete 'configure)
  8873. (replace 'install
  8874. (lambda* (#:key outputs #:allow-other-keys)
  8875. (let ((bin (string-append (assoc-ref outputs "out")
  8876. "/bin")))
  8877. (copy-recursively "bin" bin))
  8878. #t)))))
  8879. (native-inputs
  8880. `(("samtabix"
  8881. ,(let ((commit "10fd107909c1ac4d679299908be4262a012965ba"))
  8882. (origin
  8883. (method git-fetch)
  8884. (uri (git-reference
  8885. (url "http://genome-source.cse.ucsc.edu/samtabix.git")
  8886. (commit commit)))
  8887. (file-name (git-file-name "samtabix" (string-take commit 7)))
  8888. (sha256
  8889. (base32
  8890. "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma")))))))
  8891. (inputs
  8892. `(("zlib" ,zlib)
  8893. ("tcsh" ,tcsh)
  8894. ("perl" ,perl)
  8895. ("libpng" ,libpng)
  8896. ("mariadb-dev" ,mariadb "dev")
  8897. ("openssl" ,openssl)))
  8898. (home-page "https://genome.cse.ucsc.edu/index.html")
  8899. (synopsis "Assorted bioinformatics utilities")
  8900. (description "This package provides the kentUtils, a selection of
  8901. bioinformatics utilities used in combination with the UCSC genome
  8902. browser.")
  8903. ;; Only a subset of the sources are released under a non-copyleft
  8904. ;; free software license. All other sources are removed in a
  8905. ;; snippet. See this bug report for an explanation of how the
  8906. ;; license statements apply:
  8907. ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
  8908. (license (license:non-copyleft
  8909. "http://genome.ucsc.edu/license/"
  8910. "The contents of this package are free for all uses."))))
  8911. (define-public f-seq
  8912. (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
  8913. (revision "1"))
  8914. (package
  8915. (name "f-seq")
  8916. (version (git-version "1.1" revision commit))
  8917. (source (origin
  8918. (method git-fetch)
  8919. (uri (git-reference
  8920. (url "https://github.com/aboyle/F-seq")
  8921. (commit commit)))
  8922. (file-name (git-file-name name version))
  8923. (sha256
  8924. (base32
  8925. "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
  8926. (modules '((guix build utils)))
  8927. ;; Remove bundled Java library archives.
  8928. (snippet
  8929. '(begin
  8930. (for-each delete-file (find-files "lib" ".*"))
  8931. #t))))
  8932. (build-system ant-build-system)
  8933. (arguments
  8934. `(#:tests? #f ; no tests included
  8935. #:phases
  8936. (modify-phases %standard-phases
  8937. (replace 'install
  8938. (lambda* (#:key inputs outputs #:allow-other-keys)
  8939. (let* ((target (assoc-ref outputs "out"))
  8940. (bin (string-append target "/bin"))
  8941. (doc (string-append target "/share/doc/f-seq"))
  8942. (lib (string-append target "/lib")))
  8943. (mkdir-p target)
  8944. (mkdir-p doc)
  8945. (substitute* "bin/linux/fseq"
  8946. (("java") (which "java"))
  8947. (("\\$REALDIR/../lib/commons-cli-1.1.jar")
  8948. (search-input-file inputs
  8949. "/share/java/commons-cli.jar"))
  8950. (("REALDIR=.*")
  8951. (string-append "REALDIR=" bin "\n")))
  8952. (install-file "README.txt" doc)
  8953. (install-file "bin/linux/fseq" bin)
  8954. (install-file "build~/fseq.jar" lib)
  8955. (copy-recursively "lib" lib)
  8956. #t))))))
  8957. (inputs
  8958. `(("perl" ,perl)
  8959. ("java-commons-cli" ,java-commons-cli)))
  8960. (home-page "https://fureylab.web.unc.edu/software/fseq/")
  8961. (synopsis "Feature density estimator for high-throughput sequence tags")
  8962. (description
  8963. "F-Seq is a software package that generates a continuous tag sequence
  8964. density estimation allowing identification of biologically meaningful sites
  8965. such as transcription factor binding sites (ChIP-seq) or regions of open
  8966. chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
  8967. Browser.")
  8968. (license license:gpl3+))))
  8969. (define-public bismark
  8970. (package
  8971. (name "bismark")
  8972. (version "0.20.1")
  8973. (source
  8974. (origin
  8975. (method git-fetch)
  8976. (uri (git-reference
  8977. (url "https://github.com/FelixKrueger/Bismark")
  8978. (commit version)))
  8979. (file-name (git-file-name name version))
  8980. (sha256
  8981. (base32
  8982. "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
  8983. (build-system perl-build-system)
  8984. (arguments
  8985. `(#:tests? #f ; there are no tests
  8986. #:modules ((guix build utils)
  8987. (ice-9 popen)
  8988. (srfi srfi-26)
  8989. (guix build perl-build-system))
  8990. #:phases
  8991. (modify-phases %standard-phases
  8992. ;; The bundled plotly.js is minified.
  8993. (add-after 'unpack 'replace-plotly.js
  8994. (lambda* (#:key inputs #:allow-other-keys)
  8995. (let* ((file (assoc-ref inputs "plotly.js"))
  8996. (installed "plotly/plotly.js"))
  8997. (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
  8998. (call-with-output-file installed
  8999. (cut dump-port minified <>))))
  9000. #t))
  9001. (delete 'configure)
  9002. (delete 'build)
  9003. (replace 'install
  9004. (lambda* (#:key inputs outputs #:allow-other-keys)
  9005. (let* ((out (assoc-ref outputs "out"))
  9006. (bin (string-append out "/bin"))
  9007. (share (string-append out "/share/bismark"))
  9008. (docdir (string-append out "/share/doc/bismark"))
  9009. (docs '("Docs/Bismark_User_Guide.html"))
  9010. (scripts '("bismark"
  9011. "bismark_genome_preparation"
  9012. "bismark_methylation_extractor"
  9013. "bismark2bedGraph"
  9014. "bismark2report"
  9015. "coverage2cytosine"
  9016. "deduplicate_bismark"
  9017. "filter_non_conversion"
  9018. "bam2nuc"
  9019. "bismark2summary"
  9020. "NOMe_filtering")))
  9021. (substitute* "bismark2report"
  9022. (("\\$RealBin/plotly")
  9023. (string-append share "/plotly")))
  9024. (mkdir-p share)
  9025. (mkdir-p docdir)
  9026. (mkdir-p bin)
  9027. (for-each (lambda (file) (install-file file bin))
  9028. scripts)
  9029. (for-each (lambda (file) (install-file file docdir))
  9030. docs)
  9031. (copy-recursively "Docs/Images" (string-append docdir "/Images"))
  9032. (copy-recursively "plotly"
  9033. (string-append share "/plotly"))
  9034. ;; Fix references to gunzip
  9035. (substitute* (map (lambda (file)
  9036. (string-append bin "/" file))
  9037. scripts)
  9038. (("\"gunzip -c")
  9039. (string-append "\"" (assoc-ref inputs "gzip")
  9040. "/bin/gunzip -c")))
  9041. #t))))))
  9042. (inputs
  9043. `(("gzip" ,gzip)
  9044. ("perl-carp" ,perl-carp)
  9045. ("perl-getopt-long" ,perl-getopt-long)))
  9046. (native-inputs
  9047. `(("plotly.js"
  9048. ,(origin
  9049. (method url-fetch)
  9050. (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
  9051. "v1.39.4/dist/plotly.js"))
  9052. (sha256
  9053. (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
  9054. ("uglify-js" ,uglify-js)))
  9055. (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
  9056. (synopsis "Map bisulfite treated sequence reads and analyze methylation")
  9057. (description "Bismark is a program to map bisulfite treated sequencing
  9058. reads to a genome of interest and perform methylation calls in a single step.
  9059. The output can be easily imported into a genome viewer, such as SeqMonk, and
  9060. enables a researcher to analyse the methylation levels of their samples
  9061. straight away. Its main features are:
  9062. @itemize
  9063. @item Bisulfite mapping and methylation calling in one single step
  9064. @item Supports single-end and paired-end read alignments
  9065. @item Supports ungapped and gapped alignments
  9066. @item Alignment seed length, number of mismatches etc are adjustable
  9067. @item Output discriminates between cytosine methylation in CpG, CHG
  9068. and CHH context
  9069. @end itemize\n")
  9070. (license license:gpl3+)))
  9071. (define-public paml
  9072. (package
  9073. (name "paml")
  9074. (version "4.9e")
  9075. (source (origin
  9076. (method url-fetch)
  9077. (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
  9078. "paml" version ".tgz"))
  9079. (sha256
  9080. (base32
  9081. "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
  9082. (modules '((guix build utils)))
  9083. ;; Remove Windows binaries
  9084. (snippet
  9085. '(begin
  9086. (for-each delete-file (find-files "." "\\.exe$"))
  9087. ;; Some files in the original tarball have restrictive
  9088. ;; permissions, which makes repackaging fail
  9089. (for-each (lambda (file) (chmod file #o644)) (find-files "."))
  9090. #t))))
  9091. (build-system gnu-build-system)
  9092. (arguments
  9093. `(#:tests? #f ; there are no tests
  9094. #:make-flags '("CC=gcc")
  9095. #:phases
  9096. (modify-phases %standard-phases
  9097. (replace 'configure
  9098. (lambda _
  9099. (substitute* "src/BFdriver.c"
  9100. (("/bin/bash") (which "bash")))
  9101. (chdir "src")
  9102. #t))
  9103. (replace 'install
  9104. (lambda* (#:key outputs #:allow-other-keys)
  9105. (let ((tools '("baseml" "basemlg" "codeml"
  9106. "pamp" "evolver" "yn00" "chi2"))
  9107. (bin (string-append (assoc-ref outputs "out") "/bin"))
  9108. (docdir (string-append (assoc-ref outputs "out")
  9109. "/share/doc/paml")))
  9110. (mkdir-p bin)
  9111. (for-each (lambda (file) (install-file file bin)) tools)
  9112. (copy-recursively "../doc" docdir)
  9113. #t))))))
  9114. (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
  9115. (synopsis "Phylogentic analysis by maximum likelihood")
  9116. (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
  9117. contains a few programs for model fitting and phylogenetic tree reconstruction
  9118. using nucleotide or amino-acid sequence data.")
  9119. ;; GPLv3 only
  9120. (license license:gpl3)))
  9121. (define-public segemehl
  9122. (package
  9123. (name "segemehl")
  9124. (version "0.3.4")
  9125. (source (origin
  9126. (method url-fetch)
  9127. (uri (string-append "https://www.bioinf.uni-leipzig.de/Software"
  9128. "/segemehl/downloads/segemehl-"
  9129. version ".tar.gz"))
  9130. (sha256
  9131. (base32
  9132. "0lbzbb7i8zadsn9b99plairhq6s2h1z8qdn6n7djclfis01nycz4"))))
  9133. (build-system gnu-build-system)
  9134. (arguments
  9135. `(#:make-flags
  9136. (list (string-append "CC=" ,(cc-for-target))
  9137. "all")
  9138. #:tests? #false ; there are none
  9139. #:phases
  9140. (modify-phases %standard-phases
  9141. (delete 'configure)
  9142. ;; There is no installation target
  9143. (replace 'install
  9144. (lambda* (#:key inputs outputs #:allow-other-keys)
  9145. (let* ((out (assoc-ref outputs "out"))
  9146. (bin (string-append out "/bin"))
  9147. (exes (list "segemehl.x" "haarz.x")))
  9148. (mkdir-p bin)
  9149. (for-each (lambda (exe)
  9150. (install-file exe bin))
  9151. exes)))))))
  9152. (inputs
  9153. `(("htslib" ,htslib)
  9154. ("ncurses" ,ncurses)
  9155. ("zlib" ,zlib)))
  9156. (native-inputs
  9157. `(("pkg-config" ,pkg-config)))
  9158. (home-page "https://www.bioinf.uni-leipzig.de/Software/segemehl")
  9159. (synopsis "Map short sequencer reads to reference genomes")
  9160. (description "Segemehl is software to map short sequencer reads to
  9161. reference genomes. Segemehl implements a matching strategy based on enhanced
  9162. suffix arrays (ESA). It accepts fasta and fastq queries (gzip'ed and
  9163. bgzip'ed). In addition to the alignment of reads from standard DNA- and
  9164. RNA-seq protocols, it also allows the mapping of bisulfite converted
  9165. reads (Lister and Cokus) and implements a split read mapping strategy. The
  9166. output of segemehl is a SAM or BAM formatted alignment file.")
  9167. (license license:gpl3+)))
  9168. (define-public kallisto
  9169. (package
  9170. (name "kallisto")
  9171. (version "0.46.2")
  9172. (source (origin
  9173. (method git-fetch)
  9174. (uri (git-reference
  9175. (url "https://github.com/pachterlab/kallisto")
  9176. (commit (string-append "v" version))))
  9177. (file-name (git-file-name name version))
  9178. (sha256
  9179. (base32
  9180. "0ij5n7v3m90jdfi7sn8nvglfyf58abp1f5xq42r4k73l0lfds6xi"))
  9181. (modules '((guix build utils)))
  9182. (snippet
  9183. '(delete-file-recursively "ext/htslib/"))))
  9184. (build-system cmake-build-system)
  9185. (arguments
  9186. `(#:tests? #f ; no "check" target
  9187. #:phases
  9188. (modify-phases %standard-phases
  9189. (add-after 'unpack 'do-not-use-bundled-htslib
  9190. (lambda _
  9191. (substitute* "CMakeLists.txt"
  9192. (("^ExternalProject_Add" m)
  9193. (string-append "if (NEVER)\n" m))
  9194. (("^\\)")
  9195. (string-append ")\nendif(NEVER)"))
  9196. (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
  9197. (string-append "# " m)))
  9198. (substitute* "src/CMakeLists.txt"
  9199. (("target_link_libraries\\(kallisto kallisto_core pthread \
  9200. \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
  9201. "target_link_libraries(kallisto kallisto_core pthread hts)")
  9202. (("include_directories\\(\\.\\./ext/htslib\\)") ""))
  9203. #t)))))
  9204. (inputs
  9205. `(("hdf5" ,hdf5)
  9206. ("htslib" ,htslib-1.9)
  9207. ("zlib" ,zlib)))
  9208. (home-page "https://pachterlab.github.io/kallisto/")
  9209. (synopsis "Near-optimal RNA-Seq quantification")
  9210. (description
  9211. "Kallisto is a program for quantifying abundances of transcripts from
  9212. RNA-Seq data, or more generally of target sequences using high-throughput
  9213. sequencing reads. It is based on the novel idea of pseudoalignment for
  9214. rapidly determining the compatibility of reads with targets, without the need
  9215. for alignment. Pseudoalignment of reads preserves the key information needed
  9216. for quantification, and kallisto is therefore not only fast, but also as
  9217. accurate as existing quantification tools.")
  9218. (license license:bsd-2)))
  9219. (define-public libgff
  9220. (package
  9221. (name "libgff")
  9222. (version "2.0.0")
  9223. (source (origin
  9224. (method git-fetch)
  9225. (uri (git-reference
  9226. (url "https://github.com/COMBINE-lab/libgff")
  9227. (commit (string-append "v" version))))
  9228. (file-name (git-file-name name version))
  9229. (sha256
  9230. (base32
  9231. "0ds9r22y8bl1rj7bhl0003kgmm6aam7g8l41mnjfrzw15d9zf9k4"))))
  9232. (build-system cmake-build-system)
  9233. (arguments `(#:tests? #f)) ; no tests included
  9234. (home-page "https://github.com/COMBINE-lab/libgff")
  9235. (synopsis "Parser library for reading/writing GFF files")
  9236. (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
  9237. code that is used in the Cufflinks codebase. The goal of this library is to
  9238. provide this functionality without the necessity of drawing in a heavy-weight
  9239. dependency like SeqAn.")
  9240. (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
  9241. (define-public sailfish
  9242. (package
  9243. (name "sailfish")
  9244. (version "0.10.1")
  9245. (source (origin
  9246. (method git-fetch)
  9247. (uri (git-reference
  9248. (url "https://github.com/kingsfordgroup/sailfish")
  9249. (commit (string-append "v" version))))
  9250. (file-name (git-file-name name version))
  9251. (sha256
  9252. (base32
  9253. "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
  9254. (modules '((guix build utils)))
  9255. (snippet
  9256. '(begin
  9257. ;; Delete bundled headers for eigen3.
  9258. (delete-file-recursively "include/eigen3/")
  9259. #t))))
  9260. (build-system cmake-build-system)
  9261. (arguments
  9262. `(#:configure-flags
  9263. (list (string-append "-DBOOST_INCLUDEDIR="
  9264. (assoc-ref %build-inputs "boost")
  9265. "/include/")
  9266. (string-append "-DBOOST_LIBRARYDIR="
  9267. (assoc-ref %build-inputs "boost")
  9268. "/lib/")
  9269. (string-append "-DBoost_LIBRARIES="
  9270. "-lboost_iostreams "
  9271. "-lboost_filesystem "
  9272. "-lboost_system "
  9273. "-lboost_thread "
  9274. "-lboost_timer "
  9275. "-lboost_chrono "
  9276. "-lboost_program_options")
  9277. "-DBoost_FOUND=TRUE"
  9278. ;; Don't download RapMap---we already have it!
  9279. "-DFETCHED_RAPMAP=1")
  9280. ;; Tests must be run after installation and the location of the test
  9281. ;; data file must be overridden. But the tests fail. It looks like
  9282. ;; they are not really meant to be run.
  9283. #:tests? #f
  9284. #:phases
  9285. (modify-phases %standard-phases
  9286. ;; Boost cannot be found, even though it's right there.
  9287. (add-after 'unpack 'do-not-look-for-boost
  9288. (lambda* (#:key inputs #:allow-other-keys)
  9289. (substitute* "CMakeLists.txt"
  9290. (("find_package\\(Boost 1\\.53\\.0") "#"))
  9291. #t))
  9292. (add-after 'unpack 'do-not-assign-to-macro
  9293. (lambda _
  9294. (substitute* "include/spdlog/details/format.cc"
  9295. (("const unsigned CHAR_WIDTH = 1;") ""))
  9296. #t))
  9297. (add-after 'unpack 'prepare-rapmap
  9298. (lambda* (#:key inputs #:allow-other-keys)
  9299. (let ((src "external/install/src/rapmap/")
  9300. (include "external/install/include/rapmap/")
  9301. (rapmap (assoc-ref inputs "rapmap")))
  9302. (mkdir-p "/tmp/rapmap")
  9303. (invoke "tar" "xf"
  9304. (assoc-ref inputs "rapmap")
  9305. "-C" "/tmp/rapmap"
  9306. "--strip-components=1")
  9307. (mkdir-p src)
  9308. (mkdir-p include)
  9309. (for-each (lambda (file)
  9310. (install-file file src))
  9311. (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
  9312. (copy-recursively "/tmp/rapmap/include" include))
  9313. #t))
  9314. (add-after 'unpack 'use-system-libraries
  9315. (lambda* (#:key inputs #:allow-other-keys)
  9316. (substitute* '("src/SailfishIndexer.cpp"
  9317. "src/SailfishUtils.cpp"
  9318. "src/SailfishQuantify.cpp"
  9319. "src/FASTAParser.cpp"
  9320. "include/PCA.hpp"
  9321. "include/SailfishUtils.hpp"
  9322. "include/SailfishIndex.hpp"
  9323. "include/CollapsedEMOptimizer.hpp"
  9324. "src/CollapsedEMOptimizer.cpp")
  9325. (("#include \"jellyfish/config.h\"") ""))
  9326. (substitute* "src/CMakeLists.txt"
  9327. (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
  9328. (search-input-directory
  9329. inputs
  9330. (string-append "/include/jellyfish-" ,(package-version jellyfish))))
  9331. (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
  9332. (search-input-file inputs
  9333. "/lib/libjellyfish-2.0.a"))
  9334. (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
  9335. (search-input-file inputs
  9336. "/lib/libdivsufsort.so"))
  9337. (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
  9338. (search-input-file inputs
  9339. "/lib/libdivsufsort64.so")))
  9340. (substitute* "CMakeLists.txt"
  9341. ;; Don't prefer static libs
  9342. (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
  9343. (("find_package\\(Jellyfish.*") "")
  9344. (("ExternalProject_Add\\(libjellyfish") "message(")
  9345. (("ExternalProject_Add\\(libgff") "message(")
  9346. (("ExternalProject_Add\\(libsparsehash") "message(")
  9347. (("ExternalProject_Add\\(libdivsufsort") "message("))
  9348. ;; Ensure that Eigen headers can be found
  9349. (setenv "CPLUS_INCLUDE_PATH"
  9350. (string-append (search-input-directory
  9351. inputs "/include/eigen3")
  9352. ":"
  9353. (or (getenv "CPLUS_INCLUDE_PATH") ""))))))))
  9354. (inputs
  9355. `(("boost" ,boost)
  9356. ("eigen" ,eigen)
  9357. ("jemalloc" ,jemalloc)
  9358. ("jellyfish" ,jellyfish)
  9359. ("sparsehash" ,sparsehash)
  9360. ("rapmap" ,(origin
  9361. (method git-fetch)
  9362. (uri (git-reference
  9363. (url "https://github.com/COMBINE-lab/RapMap")
  9364. (commit (string-append "sf-v" version))))
  9365. (file-name (string-append "rapmap-sf-v" version "-checkout"))
  9366. (sha256
  9367. (base32
  9368. "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
  9369. (modules '((guix build utils)))
  9370. ;; These files are expected to be excluded.
  9371. (snippet
  9372. '(begin (delete-file-recursively "include/spdlog")
  9373. (for-each delete-file '("include/xxhash.h"
  9374. "src/xxhash.c"))
  9375. #t))))
  9376. ("libdivsufsort" ,libdivsufsort)
  9377. ("libgff" ,libgff)
  9378. ("tbb" ,tbb)
  9379. ("zlib" ,zlib)))
  9380. (native-inputs
  9381. `(("pkg-config" ,pkg-config)))
  9382. (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
  9383. (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
  9384. (description "Sailfish is a tool for genomic transcript quantification
  9385. from RNA-seq data. It requires a set of target transcripts (either from a
  9386. reference or de-novo assembly) to quantify. All you need to run sailfish is a
  9387. fasta file containing your reference transcripts and a (set of) fasta/fastq
  9388. file(s) containing your reads.")
  9389. (license license:gpl3+)))
  9390. (define libstadenio-for-salmon
  9391. (package
  9392. (name "libstadenio")
  9393. (version "1.14.8")
  9394. (source (origin
  9395. (method git-fetch)
  9396. (uri (git-reference
  9397. (url "https://github.com/COMBINE-lab/staden-io_lib")
  9398. (commit (string-append "v" version))))
  9399. (file-name (string-append name "-" version "-checkout"))
  9400. (sha256
  9401. (base32
  9402. "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
  9403. (build-system gnu-build-system)
  9404. (arguments '(#:parallel-tests? #f)) ; not supported
  9405. (inputs
  9406. `(("zlib" ,zlib)))
  9407. (native-inputs
  9408. `(("perl" ,perl))) ; for tests
  9409. (home-page "https://github.com/COMBINE-lab/staden-io_lib")
  9410. (synopsis "General purpose trace and experiment file library")
  9411. (description "This package provides a library of file reading and writing
  9412. code to provide a general purpose Trace file (and Experiment File) reading
  9413. interface.
  9414. The following file formats are supported:
  9415. @enumerate
  9416. @item SCF trace files
  9417. @item ABI trace files
  9418. @item ALF trace files
  9419. @item ZTR trace files
  9420. @item SFF trace archives
  9421. @item SRF trace archives
  9422. @item Experiment files
  9423. @item Plain text files
  9424. @item SAM/BAM sequence files
  9425. @item CRAM sequence files
  9426. @end enumerate\n")
  9427. (license license:bsd-3)))
  9428. (define-public salmon
  9429. (package
  9430. (name "salmon")
  9431. (version "1.4.0")
  9432. (source (origin
  9433. (method git-fetch)
  9434. (uri (git-reference
  9435. (url "https://github.com/COMBINE-lab/salmon")
  9436. (commit (string-append "v" version))))
  9437. (file-name (git-file-name name version))
  9438. (sha256
  9439. (base32
  9440. "1di7y2s8cjr9480lngcmaz3wcabc1lpkyanzbhir1nkhcjmj70h4"))
  9441. (modules '((guix build utils)))
  9442. (snippet
  9443. ;; Delete bundled headers for eigen3.
  9444. '(delete-file-recursively "include/eigen3/"))))
  9445. (build-system cmake-build-system)
  9446. (arguments
  9447. `(#:configure-flags
  9448. (list (string-append "-Dlibgff_DIR="
  9449. (assoc-ref %build-inputs "libgff") "/lib")
  9450. "-Dlibgff_FOUND=TRUE"
  9451. "-DTBB_FOUND=TRUE"
  9452. "-DTBB_VERSION=2020.3"
  9453. "-DTBB_LIBRARIES=tbb -ltbbmalloc"
  9454. "-DFETCHED_PUFFERFISH=TRUE"
  9455. "-DUSE_SHARED_LIBS=TRUE")
  9456. #:phases
  9457. (modify-phases %standard-phases
  9458. (add-after 'unpack 'prepare-pufferfish
  9459. (lambda* (#:key inputs #:allow-other-keys)
  9460. (copy-recursively (assoc-ref inputs "pufferfish")
  9461. "external/pufferfish")
  9462. ;; This test isn't working correctly, so compilation aborts.
  9463. (substitute* "external/pufferfish/include/string_view.hpp"
  9464. (("#if __has_include\\(<string_view>\\)")
  9465. "#if 0"))
  9466. (let ((headers "external/install/pufferfish/include/pufferfish")
  9467. (source "external/install/src/pufferfish"))
  9468. (mkdir-p headers)
  9469. (mkdir-p source)
  9470. (for-each (lambda (file)
  9471. (install-file (string-append "external/pufferfish/include/" file)
  9472. headers))
  9473. (list "ProgOpts.hpp" "BooPHF.hpp" "SpinLock.hpp"
  9474. "Kmer.hpp" "CanonicalKmer.hpp" "string_view.hpp"
  9475. "CanonicalKmerIterator.hpp"
  9476. "PufferfishBaseIndex.hpp"
  9477. "PufferfishIndex.hpp"
  9478. "PufferfishSparseIndex.hpp"
  9479. "PufferfishLossyIndex.hpp"
  9480. "PufferfishTypes.hpp"
  9481. "rank9b.hpp" "rank9sel.hpp" "macros.hpp"
  9482. "select.hpp" "Util.hpp"
  9483. "PairedAlignmentFormatter.hpp"
  9484. "SelectiveAlignmentUtils.hpp"
  9485. "PuffAligner.hpp" "MemCollector.hpp"
  9486. "MemChainer.hpp" "CommonTypes.hpp"
  9487. "SAMWriter.hpp" "PufferfishConfig.hpp"
  9488. "BulkChunk.hpp" "BinWriter.hpp"))
  9489. (for-each (lambda (dir)
  9490. (copy-recursively
  9491. (string-append "external/pufferfish/include/" dir)
  9492. (string-append headers "/" dir)))
  9493. (list "libdivide"
  9494. "ksw2pp"
  9495. "compact_vector"
  9496. "metro"
  9497. "chobo"
  9498. "sparsepp"
  9499. "simde"
  9500. "tsl"))
  9501. (copy-recursively
  9502. (string-append "external/pufferfish/src/metro/")
  9503. (string-append source "/metro"))
  9504. (install-file
  9505. (string-append "external/pufferfish/src/rank9b.cpp")
  9506. source)
  9507. ;; Do not complain about not having built libtbb
  9508. (substitute* "external/pufferfish/external/twopaco/CMakeLists.txt"
  9509. (("add_dependencies.*") "")))))
  9510. (add-after 'unpack 'do-not-phone-home
  9511. (lambda _
  9512. (substitute* "src/Salmon.cpp"
  9513. (("getVersionMessage\\(\\)") "\"\""))))
  9514. (add-after 'unpack 'use-system-libraries
  9515. (lambda* (#:key inputs #:allow-other-keys)
  9516. ;; Ensure that all headers can be found
  9517. (setenv "CPLUS_INCLUDE_PATH"
  9518. (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
  9519. ":"
  9520. (getcwd) "/external/install/pufferfish/include:"
  9521. (assoc-ref inputs "eigen")
  9522. "/include/eigen3"))))
  9523. (add-after 'unpack 'fix-error-message-in-tests
  9524. (lambda _
  9525. (substitute* "cmake/TestSalmonQuasi.cmake"
  9526. (("SALMON_QUASI_INDEX_COMMAND")
  9527. "SALMON_QUASI_INDEX_CMD")))))))
  9528. (inputs
  9529. `(("boost" ,boost)
  9530. ("bzip2" ,bzip2)
  9531. ("cereal" ,cereal)
  9532. ("curl" ,curl)
  9533. ("eigen" ,eigen)
  9534. ("jemalloc" ,jemalloc)
  9535. ("libgff" ,libgff)
  9536. ("pufferfish" ,(origin
  9537. (method git-fetch)
  9538. (uri (git-reference
  9539. (url "https://github.com/COMBINE-lab/pufferfish")
  9540. (commit (string-append "salmon-v" version))))
  9541. (file-name (git-file-name "pufferfish" version))
  9542. (sha256
  9543. (base32
  9544. "0qb4a2nl1d59qasr17sslgxnkjd5kbk5mns4cjshrmsvkrqp995n"))))
  9545. ("tbb" ,tbb)
  9546. ("libstadenio-for-salmon" ,libstadenio-for-salmon)
  9547. ("xz" ,xz)
  9548. ("zlib" ,zlib)))
  9549. (native-inputs
  9550. `(("pkg-config" ,pkg-config)))
  9551. (home-page "https://github.com/COMBINE-lab/salmon")
  9552. (synopsis "Quantification from RNA-seq reads using lightweight alignments")
  9553. (description "Salmon is a program to produce highly-accurate,
  9554. transcript-level quantification estimates from RNA-seq data. Salmon achieves
  9555. its accuracy and speed via a number of different innovations, including the
  9556. use of lightweight alignments (accurate but fast-to-compute proxies for
  9557. traditional read alignments) and massively-parallel stochastic collapsed
  9558. variational inference.")
  9559. (license license:gpl3+)))
  9560. (define-public python-loompy
  9561. (package
  9562. (name "python-loompy")
  9563. (version "2.0.17")
  9564. ;; The tarball on Pypi does not include the tests.
  9565. (source (origin
  9566. (method git-fetch)
  9567. (uri (git-reference
  9568. (url "https://github.com/linnarsson-lab/loompy")
  9569. (commit version)))
  9570. (file-name (git-file-name name version))
  9571. (sha256
  9572. (base32
  9573. "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
  9574. (build-system python-build-system)
  9575. (arguments
  9576. `(#:phases
  9577. (modify-phases %standard-phases
  9578. (replace 'check
  9579. (lambda _
  9580. (invoke "pytest" "tests"))))))
  9581. (propagated-inputs
  9582. `(("python-h5py" ,python-h5py)
  9583. ("python-numpy" ,python-numpy)
  9584. ("python-pandas" ,python-pandas)
  9585. ("python-scipy" ,python-scipy)))
  9586. (native-inputs
  9587. `(("python-pytest" ,python-pytest)))
  9588. (home-page "https://github.com/linnarsson-lab/loompy")
  9589. (synopsis "Work with .loom files for single-cell RNA-seq data")
  9590. (description "The loom file format is an efficient format for very large
  9591. omics datasets, consisting of a main matrix, optional additional layers, a
  9592. variable number of row and column annotations. Loom also supports sparse
  9593. graphs. This library makes it easy to work with @file{.loom} files for
  9594. single-cell RNA-seq data.")
  9595. (license license:bsd-3)))
  9596. ;; We cannot use the latest commit because it requires Java 9.
  9597. (define-public java-forester
  9598. (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
  9599. (revision "1"))
  9600. (package
  9601. (name "java-forester")
  9602. (version (string-append "0-" revision "." (string-take commit 7)))
  9603. (source (origin
  9604. (method git-fetch)
  9605. (uri (git-reference
  9606. (url "https://github.com/cmzmasek/forester")
  9607. (commit commit)))
  9608. (file-name (string-append name "-" version "-checkout"))
  9609. (sha256
  9610. (base32
  9611. "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
  9612. (modules '((guix build utils)))
  9613. (snippet
  9614. '(begin
  9615. ;; Delete bundled jars and pre-built classes
  9616. (delete-file-recursively "forester/java/resources")
  9617. (delete-file-recursively "forester/java/classes")
  9618. (for-each delete-file (find-files "forester/java/" "\\.jar$"))
  9619. ;; Delete bundled applications
  9620. (delete-file-recursively "forester_applications")
  9621. #t))))
  9622. (build-system ant-build-system)
  9623. (arguments
  9624. `(#:tests? #f ; there are none
  9625. #:jdk ,icedtea-8
  9626. #:modules ((guix build ant-build-system)
  9627. (guix build utils)
  9628. (guix build java-utils)
  9629. (sxml simple)
  9630. (sxml transform))
  9631. #:phases
  9632. (modify-phases %standard-phases
  9633. (add-after 'unpack 'chdir
  9634. (lambda _ (chdir "forester/java") #t))
  9635. (add-after 'chdir 'fix-dependencies
  9636. (lambda _
  9637. (chmod "build.xml" #o664)
  9638. (call-with-output-file "build.xml.new"
  9639. (lambda (port)
  9640. (sxml->xml
  9641. (pre-post-order
  9642. (with-input-from-file "build.xml"
  9643. (lambda _ (xml->sxml #:trim-whitespace? #t)))
  9644. `(;; Remove all unjar tags to avoid repacking classes.
  9645. (unjar . ,(lambda _ '()))
  9646. (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
  9647. (*text* . ,(lambda (_ txt) txt))))
  9648. port)))
  9649. (rename-file "build.xml.new" "build.xml")
  9650. #t))
  9651. ;; FIXME: itext is difficult to package as it depends on a few
  9652. ;; unpackaged libraries.
  9653. (add-after 'chdir 'remove-dependency-on-unpackaged-itext
  9654. (lambda _
  9655. (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
  9656. (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
  9657. (("pdf_written_to = PdfExporter.*")
  9658. "throw new IOException(\"PDF export is not available.\");"))
  9659. #t))
  9660. ;; There is no install target
  9661. (replace 'install (install-jars ".")))))
  9662. (propagated-inputs
  9663. `(("java-commons-codec" ,java-commons-codec)
  9664. ("java-openchart2" ,java-openchart2)))
  9665. (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
  9666. (synopsis "Phylogenomics libraries for Java")
  9667. (description "Forester is a collection of Java libraries for
  9668. phylogenomics and evolutionary biology research. It includes support for
  9669. reading, writing, and exporting phylogenetic trees.")
  9670. (license license:lgpl2.1+))))
  9671. (define-public java-forester-1.005
  9672. (package
  9673. (name "java-forester")
  9674. (version "1.005")
  9675. (source (origin
  9676. (method url-fetch)
  9677. (uri (string-append "https://repo1.maven.org/maven2/"
  9678. "org/biojava/thirdparty/forester/"
  9679. version "/forester-" version "-sources.jar"))
  9680. (file-name (string-append name "-" version ".jar"))
  9681. (sha256
  9682. (base32
  9683. "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
  9684. (build-system ant-build-system)
  9685. (arguments
  9686. `(#:tests? #f ; there are none
  9687. #:jdk ,icedtea-8
  9688. #:modules ((guix build ant-build-system)
  9689. (guix build utils)
  9690. (guix build java-utils)
  9691. (sxml simple)
  9692. (sxml transform))
  9693. #:phases
  9694. (modify-phases %standard-phases
  9695. (add-after 'unpack 'fix-dependencies
  9696. (lambda* (#:key inputs #:allow-other-keys)
  9697. (call-with-output-file "build.xml"
  9698. (lambda (port)
  9699. (sxml->xml
  9700. (pre-post-order
  9701. (with-input-from-file "src/build.xml"
  9702. (lambda _ (xml->sxml #:trim-whitespace? #t)))
  9703. `(;; Remove all unjar tags to avoid repacking classes.
  9704. (unjar . ,(lambda _ '()))
  9705. (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
  9706. (*text* . ,(lambda (_ txt) txt))))
  9707. port)))
  9708. (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
  9709. "synth_look_and_feel_1.xml")
  9710. (copy-file (assoc-ref inputs "phyloxml.xsd")
  9711. "phyloxml.xsd")
  9712. (substitute* "build.xml"
  9713. (("../resources/synth_laf/synth_look_and_feel_1.xml")
  9714. "synth_look_and_feel_1.xml")
  9715. (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
  9716. "phyloxml.xsd"))
  9717. #t))
  9718. ;; FIXME: itext is difficult to package as it depends on a few
  9719. ;; unpackaged libraries.
  9720. (add-after 'unpack 'remove-dependency-on-unpackaged-itext
  9721. (lambda _
  9722. (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
  9723. (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
  9724. "src/org/forester/archaeopteryx/MainFrameApplication.java")
  9725. (("pdf_written_to = PdfExporter.*")
  9726. "throw new IOException(\"PDF export is not available.\"); /*")
  9727. ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
  9728. (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
  9729. #t))
  9730. (add-after 'unpack 'delete-pre-built-classes
  9731. (lambda _ (delete-file-recursively "src/classes") #t))
  9732. ;; There is no install target
  9733. (replace 'install (install-jars ".")))))
  9734. (propagated-inputs
  9735. `(("java-commons-codec" ,java-commons-codec)
  9736. ("java-openchart2" ,java-openchart2)))
  9737. ;; The source archive does not contain the resources.
  9738. (native-inputs
  9739. `(("phyloxml.xsd"
  9740. ,(origin
  9741. (method url-fetch)
  9742. (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
  9743. "b61cc2dcede0bede317db362472333115756b8c6/"
  9744. "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
  9745. (file-name (string-append name "-phyloxml-" version ".xsd"))
  9746. (sha256
  9747. (base32
  9748. "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
  9749. ("synth_look_and_feel_1.xml"
  9750. ,(origin
  9751. (method url-fetch)
  9752. (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
  9753. "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
  9754. "forester/java/classes/resources/"
  9755. "synth_look_and_feel_1.xml"))
  9756. (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
  9757. (sha256
  9758. (base32
  9759. "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
  9760. (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
  9761. (synopsis "Phylogenomics libraries for Java")
  9762. (description "Forester is a collection of Java libraries for
  9763. phylogenomics and evolutionary biology research. It includes support for
  9764. reading, writing, and exporting phylogenetic trees.")
  9765. (license license:lgpl2.1+)))
  9766. (define-public java-biojava-core
  9767. (package
  9768. (name "java-biojava-core")
  9769. (version "4.2.11")
  9770. (source (origin
  9771. (method git-fetch)
  9772. (uri (git-reference
  9773. (url "https://github.com/biojava/biojava")
  9774. (commit (string-append "biojava-" version))))
  9775. (file-name (string-append name "-" version "-checkout"))
  9776. (sha256
  9777. (base32
  9778. "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
  9779. (build-system ant-build-system)
  9780. (arguments
  9781. `(#:jdk ,icedtea-8
  9782. #:jar-name "biojava-core.jar"
  9783. #:source-dir "biojava-core/src/main/java/"
  9784. #:test-dir "biojava-core/src/test"
  9785. ;; These tests seem to require internet access.
  9786. #:test-exclude (list "**/SearchIOTest.java"
  9787. "**/BlastXMLParserTest.java"
  9788. "**/GenbankCookbookTest.java"
  9789. "**/GenbankProxySequenceReaderTest.java")
  9790. #:phases
  9791. (modify-phases %standard-phases
  9792. (add-before 'build 'copy-resources
  9793. (lambda _
  9794. (copy-recursively "biojava-core/src/main/resources"
  9795. "build/classes")
  9796. #t))
  9797. (add-before 'check 'copy-test-resources
  9798. (lambda _
  9799. (copy-recursively "biojava-core/src/test/resources"
  9800. "build/test-classes")
  9801. #t)))))
  9802. (propagated-inputs
  9803. `(("java-log4j-api" ,java-log4j-api)
  9804. ("java-log4j-core" ,java-log4j-core)
  9805. ("java-slf4j-api" ,java-slf4j-api)
  9806. ("java-slf4j-simple" ,java-slf4j-simple)))
  9807. (native-inputs
  9808. `(("java-junit" ,java-junit)
  9809. ("java-hamcrest-core" ,java-hamcrest-core)))
  9810. (home-page "https://biojava.org")
  9811. (synopsis "Core libraries of Java framework for processing biological data")
  9812. (description "BioJava is a project dedicated to providing a Java framework
  9813. for processing biological data. It provides analytical and statistical
  9814. routines, parsers for common file formats, reference implementations of
  9815. popular algorithms, and allows the manipulation of sequences and 3D
  9816. structures. The goal of the biojava project is to facilitate rapid
  9817. application development for bioinformatics.
  9818. This package provides the core libraries.")
  9819. (license license:lgpl2.1+)))
  9820. (define-public java-biojava-phylo
  9821. (package (inherit java-biojava-core)
  9822. (name "java-biojava-phylo")
  9823. (build-system ant-build-system)
  9824. (arguments
  9825. `(#:jdk ,icedtea-8
  9826. #:jar-name "biojava-phylo.jar"
  9827. #:source-dir "biojava-phylo/src/main/java/"
  9828. #:test-dir "biojava-phylo/src/test"
  9829. #:phases
  9830. (modify-phases %standard-phases
  9831. (add-before 'build 'copy-resources
  9832. (lambda _
  9833. (copy-recursively "biojava-phylo/src/main/resources"
  9834. "build/classes")
  9835. #t))
  9836. (add-before 'check 'copy-test-resources
  9837. (lambda _
  9838. (copy-recursively "biojava-phylo/src/test/resources"
  9839. "build/test-classes")
  9840. #t)))))
  9841. (propagated-inputs
  9842. `(("java-log4j-api" ,java-log4j-api)
  9843. ("java-log4j-core" ,java-log4j-core)
  9844. ("java-slf4j-api" ,java-slf4j-api)
  9845. ("java-slf4j-simple" ,java-slf4j-simple)
  9846. ("java-biojava-core" ,java-biojava-core)
  9847. ("java-forester" ,java-forester)))
  9848. (native-inputs
  9849. `(("java-junit" ,java-junit)
  9850. ("java-hamcrest-core" ,java-hamcrest-core)))
  9851. (home-page "https://biojava.org")
  9852. (synopsis "Biojava interface to the forester phylogenomics library")
  9853. (description "The phylo module provides a biojava interface layer to the
  9854. forester phylogenomics library for constructing phylogenetic trees.")))
  9855. (define-public java-biojava-alignment
  9856. (package (inherit java-biojava-core)
  9857. (name "java-biojava-alignment")
  9858. (build-system ant-build-system)
  9859. (arguments
  9860. `(#:jdk ,icedtea-8
  9861. #:jar-name "biojava-alignment.jar"
  9862. #:source-dir "biojava-alignment/src/main/java/"
  9863. #:test-dir "biojava-alignment/src/test"
  9864. #:phases
  9865. (modify-phases %standard-phases
  9866. (add-before 'build 'copy-resources
  9867. (lambda _
  9868. (copy-recursively "biojava-alignment/src/main/resources"
  9869. "build/classes")
  9870. #t))
  9871. (add-before 'check 'copy-test-resources
  9872. (lambda _
  9873. (copy-recursively "biojava-alignment/src/test/resources"
  9874. "build/test-classes")
  9875. #t)))))
  9876. (propagated-inputs
  9877. `(("java-log4j-api" ,java-log4j-api)
  9878. ("java-log4j-core" ,java-log4j-core)
  9879. ("java-slf4j-api" ,java-slf4j-api)
  9880. ("java-slf4j-simple" ,java-slf4j-simple)
  9881. ("java-biojava-core" ,java-biojava-core)
  9882. ("java-biojava-phylo" ,java-biojava-phylo)
  9883. ("java-forester" ,java-forester)))
  9884. (native-inputs
  9885. `(("java-junit" ,java-junit)
  9886. ("java-hamcrest-core" ,java-hamcrest-core)))
  9887. (home-page "https://biojava.org")
  9888. (synopsis "Biojava API for genetic sequence alignment")
  9889. (description "The alignment module of BioJava provides an API that
  9890. contains
  9891. @itemize
  9892. @item implementations of dynamic programming algorithms for sequence
  9893. alignment;
  9894. @item reading and writing of popular alignment file formats;
  9895. @item a single-, or multi- threaded multiple sequence alignment algorithm.
  9896. @end itemize\n")))
  9897. (define-public java-biojava-core-4.0
  9898. (package (inherit java-biojava-core)
  9899. (name "java-biojava-core")
  9900. (version "4.0.0")
  9901. (source (origin
  9902. (method git-fetch)
  9903. (uri (git-reference
  9904. (url "https://github.com/biojava/biojava")
  9905. (commit (string-append "biojava-" version))))
  9906. (file-name (string-append name "-" version "-checkout"))
  9907. (sha256
  9908. (base32
  9909. "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
  9910. (define-public java-biojava-phylo-4.0
  9911. (package (inherit java-biojava-core-4.0)
  9912. (name "java-biojava-phylo")
  9913. (build-system ant-build-system)
  9914. (arguments
  9915. `(#:jdk ,icedtea-8
  9916. #:jar-name "biojava-phylo.jar"
  9917. #:source-dir "biojava-phylo/src/main/java/"
  9918. #:test-dir "biojava-phylo/src/test"
  9919. #:phases
  9920. (modify-phases %standard-phases
  9921. (add-before 'build 'copy-resources
  9922. (lambda _
  9923. (copy-recursively "biojava-phylo/src/main/resources"
  9924. "build/classes")
  9925. #t))
  9926. (add-before 'check 'copy-test-resources
  9927. (lambda _
  9928. (copy-recursively "biojava-phylo/src/test/resources"
  9929. "build/test-classes")
  9930. #t)))))
  9931. (propagated-inputs
  9932. `(("java-log4j-api" ,java-log4j-api)
  9933. ("java-log4j-core" ,java-log4j-core)
  9934. ("java-slf4j-api" ,java-slf4j-api)
  9935. ("java-slf4j-simple" ,java-slf4j-simple)
  9936. ("java-biojava-core" ,java-biojava-core-4.0)
  9937. ("java-forester" ,java-forester-1.005)))
  9938. (native-inputs
  9939. `(("java-junit" ,java-junit)
  9940. ("java-hamcrest-core" ,java-hamcrest-core)))
  9941. (home-page "https://biojava.org")
  9942. (synopsis "Biojava interface to the forester phylogenomics library")
  9943. (description "The phylo module provides a biojava interface layer to the
  9944. forester phylogenomics library for constructing phylogenetic trees.")))
  9945. (define-public java-biojava-alignment-4.0
  9946. (package (inherit java-biojava-core-4.0)
  9947. (name "java-biojava-alignment")
  9948. (build-system ant-build-system)
  9949. (arguments
  9950. `(#:jdk ,icedtea-8
  9951. #:jar-name "biojava-alignment.jar"
  9952. #:source-dir "biojava-alignment/src/main/java/"
  9953. #:test-dir "biojava-alignment/src/test"
  9954. #:phases
  9955. (modify-phases %standard-phases
  9956. (add-before 'build 'copy-resources
  9957. (lambda _
  9958. (copy-recursively "biojava-alignment/src/main/resources"
  9959. "build/classes")
  9960. #t))
  9961. (add-before 'check 'copy-test-resources
  9962. (lambda _
  9963. (copy-recursively "biojava-alignment/src/test/resources"
  9964. "build/test-classes")
  9965. #t)))))
  9966. (propagated-inputs
  9967. `(("java-log4j-api" ,java-log4j-api)
  9968. ("java-log4j-core" ,java-log4j-core)
  9969. ("java-slf4j-api" ,java-slf4j-api)
  9970. ("java-slf4j-simple" ,java-slf4j-simple)
  9971. ("java-biojava-core" ,java-biojava-core-4.0)
  9972. ("java-biojava-phylo" ,java-biojava-phylo-4.0)
  9973. ("java-forester" ,java-forester-1.005)))
  9974. (native-inputs
  9975. `(("java-junit" ,java-junit)
  9976. ("java-hamcrest-core" ,java-hamcrest-core)))
  9977. (home-page "https://biojava.org")
  9978. (synopsis "Biojava API for genetic sequence alignment")
  9979. (description "The alignment module of BioJava provides an API that
  9980. contains
  9981. @itemize
  9982. @item implementations of dynamic programming algorithms for sequence
  9983. alignment;
  9984. @item reading and writing of popular alignment file formats;
  9985. @item a single-, or multi- threaded multiple sequence alignment algorithm.
  9986. @end itemize\n")))
  9987. (define-public dropseq-tools
  9988. (package
  9989. (name "dropseq-tools")
  9990. (version "1.13")
  9991. (source
  9992. (origin
  9993. (method url-fetch)
  9994. (uri "http://mccarrolllab.com/download/1276/")
  9995. (file-name (string-append "dropseq-tools-" version ".zip"))
  9996. (sha256
  9997. (base32
  9998. "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
  9999. ;; Delete bundled libraries
  10000. (modules '((guix build utils)))
  10001. (snippet
  10002. '(begin
  10003. (for-each delete-file (find-files "jar/lib" "\\.jar$"))
  10004. (delete-file-recursively "3rdParty")
  10005. #t))))
  10006. (build-system ant-build-system)
  10007. (arguments
  10008. `(#:tests? #f ; test data are not included
  10009. #:test-target "test"
  10010. #:build-target "all"
  10011. #:source-dir "public/src/"
  10012. #:jdk ,icedtea-8
  10013. #:make-flags
  10014. (list (string-append "-Dpicard.executable.dir="
  10015. (assoc-ref %build-inputs "java-picard")
  10016. "/share/java/"))
  10017. #:modules ((ice-9 match)
  10018. (srfi srfi-1)
  10019. (guix build utils)
  10020. (guix build java-utils)
  10021. (guix build ant-build-system))
  10022. #:phases
  10023. (modify-phases %standard-phases
  10024. ;; FIXME: fails with "java.io.FileNotFoundException:
  10025. ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
  10026. (delete 'generate-jar-indices)
  10027. ;; All dependencies must be linked to "lib", because that's where
  10028. ;; they will be searched for when the Class-Path property of the
  10029. ;; manifest is computed.
  10030. (add-after 'unpack 'record-references
  10031. (lambda* (#:key inputs #:allow-other-keys)
  10032. (mkdir-p "jar/lib")
  10033. (let ((dirs (filter-map (match-lambda
  10034. ((name . dir)
  10035. (if (and (string-prefix? "java-" name)
  10036. (not (string=? name "java-testng")))
  10037. dir #f)))
  10038. inputs)))
  10039. (for-each (lambda (jar)
  10040. (symlink jar (string-append "jar/lib/" (basename jar))))
  10041. (append-map (lambda (dir) (find-files dir "\\.jar$"))
  10042. dirs)))
  10043. #t))
  10044. ;; There is no installation target
  10045. (replace 'install
  10046. (lambda* (#:key inputs outputs #:allow-other-keys)
  10047. (let* ((out (assoc-ref outputs "out"))
  10048. (bin (string-append out "/bin"))
  10049. (share (string-append out "/share/java/"))
  10050. (lib (string-append share "/lib/"))
  10051. (scripts (list "BAMTagHistogram"
  10052. "BAMTagofTagCounts"
  10053. "BaseDistributionAtReadPosition"
  10054. "CollapseBarcodesInPlace"
  10055. "CollapseTagWithContext"
  10056. "ConvertToRefFlat"
  10057. "CreateIntervalsFiles"
  10058. "DetectBeadSynthesisErrors"
  10059. "DigitalExpression"
  10060. "Drop-seq_alignment.sh"
  10061. "FilterBAM"
  10062. "FilterBAMByTag"
  10063. "GatherGeneGCLength"
  10064. "GatherMolecularBarcodeDistributionByGene"
  10065. "GatherReadQualityMetrics"
  10066. "PolyATrimmer"
  10067. "ReduceGTF"
  10068. "SelectCellsByNumTranscripts"
  10069. "SingleCellRnaSeqMetricsCollector"
  10070. "TagBamWithReadSequenceExtended"
  10071. "TagReadWithGeneExon"
  10072. "TagReadWithInterval"
  10073. "TrimStartingSequence"
  10074. "ValidateReference")))
  10075. (for-each mkdir-p (list bin share lib))
  10076. (install-file "dist/dropseq.jar" share)
  10077. (for-each (lambda (script)
  10078. (chmod script #o555)
  10079. (install-file script bin))
  10080. scripts)
  10081. (substitute* (map (lambda (script)
  10082. (string-append bin "/" script))
  10083. scripts)
  10084. (("^java") (which "java"))
  10085. (("jar_deploy_dir=.*")
  10086. (string-append "jar_deploy_dir=" share "\n"))))
  10087. #t))
  10088. ;; FIXME: We do this after stripping jars because we don't want it to
  10089. ;; copy all these jars and strip them. We only want to install
  10090. ;; links. Arguably, this is a problem with the ant-build-system.
  10091. (add-after 'strip-jar-timestamps 'install-links
  10092. (lambda* (#:key outputs #:allow-other-keys)
  10093. (let* ((out (assoc-ref outputs "out"))
  10094. (share (string-append out "/share/java/"))
  10095. (lib (string-append share "/lib/")))
  10096. (for-each (lambda (jar)
  10097. (symlink (readlink jar)
  10098. (string-append lib (basename jar))))
  10099. (find-files "jar/lib" "\\.jar$")))
  10100. #t)))))
  10101. (inputs
  10102. `(("jdk" ,icedtea-8)
  10103. ("java-picard" ,java-picard-2.10.3)
  10104. ("java-log4j-1.2-api" ,java-log4j-1.2-api)
  10105. ("java-commons-math3" ,java-commons-math3)
  10106. ("java-commons-jexl2" ,java-commons-jexl-2)
  10107. ("java-commons-collections4" ,java-commons-collections4)
  10108. ("java-commons-lang2" ,java-commons-lang)
  10109. ("java-commons-io" ,java-commons-io)
  10110. ("java-snappy-1.0.3-rc3" ,java-snappy-1)
  10111. ("java-guava" ,java-guava)
  10112. ("java-la4j" ,java-la4j)
  10113. ("java-biojava-core" ,java-biojava-core-4.0)
  10114. ("java-biojava-alignment" ,java-biojava-alignment-4.0)
  10115. ("java-jdistlib" ,java-jdistlib)
  10116. ("java-simple-xml" ,java-simple-xml)
  10117. ("java-snakeyaml" ,java-snakeyaml)))
  10118. (native-inputs
  10119. `(("unzip" ,unzip)
  10120. ("java-testng" ,java-testng)))
  10121. (home-page "http://mccarrolllab.com/dropseq/")
  10122. (synopsis "Tools for Drop-seq analyses")
  10123. (description "Drop-seq is a technology to enable biologists to
  10124. analyze RNA expression genome-wide in thousands of individual cells at
  10125. once. This package provides tools to perform Drop-seq analyses.")
  10126. (license license:expat)))
  10127. (define-public pigx-rnaseq
  10128. (package
  10129. (name "pigx-rnaseq")
  10130. (version "0.0.19")
  10131. (source (origin
  10132. (method url-fetch)
  10133. (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
  10134. "releases/download/v" version
  10135. "/pigx_rnaseq-" version ".tar.gz"))
  10136. (sha256
  10137. (base32
  10138. "1ja3bda1appxrzbfy7wp7khy30mm7lic8xbq3gkbpc5bld3as9cm"))))
  10139. (build-system gnu-build-system)
  10140. (arguments
  10141. `(#:parallel-tests? #f ; not supported
  10142. #:phases
  10143. (modify-phases %standard-phases
  10144. ;; "test.sh" runs the whole pipeline, which takes a long time and
  10145. ;; might fail due to OOM. The MultiQC is also resource intensive.
  10146. (add-after 'unpack 'disable-resource-intensive-test
  10147. (lambda _
  10148. (substitute* "Makefile.in"
  10149. (("^ tests/test_multiqc/test.sh") "")
  10150. (("^ test.sh") ""))))
  10151. (add-before 'check 'set-timezone
  10152. ;; The readr package is picky about timezones.
  10153. (lambda* (#:key inputs #:allow-other-keys)
  10154. (setenv "TZ" "UTC+1")
  10155. (setenv "TZDIR"
  10156. (search-input-directory inputs
  10157. "share/zoneinfo")))))))
  10158. (inputs
  10159. `(("coreutils" ,coreutils)
  10160. ("sed" ,sed)
  10161. ("gzip" ,gzip)
  10162. ("snakemake" ,snakemake)
  10163. ("multiqc" ,multiqc)
  10164. ("star" ,star-for-pigx)
  10165. ("hisat2" ,hisat2)
  10166. ("fastp" ,fastp)
  10167. ("htseq" ,htseq)
  10168. ("samtools" ,samtools)
  10169. ("r-minimal" ,r-minimal)
  10170. ("r-rmarkdown" ,r-rmarkdown)
  10171. ("r-ggplot2" ,r-ggplot2)
  10172. ("r-ggpubr" ,r-ggpubr)
  10173. ("r-ggrepel" ,r-ggrepel)
  10174. ("r-gprofiler2" ,r-gprofiler2)
  10175. ("r-deseq2" ,r-deseq2)
  10176. ("r-dt" ,r-dt)
  10177. ("r-knitr" ,r-knitr)
  10178. ("r-pheatmap" ,r-pheatmap)
  10179. ("r-corrplot" ,r-corrplot)
  10180. ("r-reshape2" ,r-reshape2)
  10181. ("r-plotly" ,r-plotly)
  10182. ("r-scales" ,r-scales)
  10183. ("r-summarizedexperiment" ,r-summarizedexperiment)
  10184. ("r-crosstalk" ,r-crosstalk)
  10185. ("r-tximport" ,r-tximport)
  10186. ("r-rtracklayer" ,r-rtracklayer)
  10187. ("r-rjson" ,r-rjson)
  10188. ("salmon" ,salmon)
  10189. ("pandoc" ,pandoc)
  10190. ("pandoc-citeproc" ,pandoc-citeproc)
  10191. ("python-wrapper" ,python-wrapper)
  10192. ("python-deeptools" ,python-deeptools)
  10193. ("python-pyyaml" ,python-pyyaml)))
  10194. (native-inputs
  10195. `(("tzdata" ,tzdata)))
  10196. (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
  10197. (synopsis "Analysis pipeline for RNA sequencing experiments")
  10198. (description "PiGX RNAseq is an analysis pipeline for preprocessing and
  10199. reporting for RNA sequencing experiments. It is easy to use and produces high
  10200. quality reports. The inputs are reads files from the sequencing experiment,
  10201. and a configuration file which describes the experiment. In addition to
  10202. quality control of the experiment, the pipeline produces a differential
  10203. expression report comparing samples in an easily configurable manner.")
  10204. (license license:gpl3+)))
  10205. (define-public pigx-chipseq
  10206. (package
  10207. (name "pigx-chipseq")
  10208. (version "0.0.52")
  10209. (source (origin
  10210. (method url-fetch)
  10211. (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
  10212. "releases/download/v" version
  10213. "/pigx_chipseq-" version ".tar.gz"))
  10214. (sha256
  10215. (base32
  10216. "097cvc8kr3r1nq0sgjpirzmixwjl074qp4qq3sx4ngfqi06af6r9"))))
  10217. (build-system gnu-build-system)
  10218. ;; parts of the tests rely on access to the network
  10219. (arguments '(#:tests? #f))
  10220. (inputs
  10221. `(("grep" ,grep)
  10222. ("coreutils" ,coreutils)
  10223. ("r-minimal" ,r-minimal)
  10224. ("r-argparser" ,r-argparser)
  10225. ("r-biocparallel" ,r-biocparallel)
  10226. ("r-biostrings" ,r-biostrings)
  10227. ("r-chipseq" ,r-chipseq)
  10228. ("r-corrplot" ,r-corrplot)
  10229. ("r-data-table" ,r-data-table)
  10230. ("r-deseq2" ,r-deseq2)
  10231. ("r-dplyr" ,r-dplyr)
  10232. ("r-dt" ,r-dt)
  10233. ("r-genomation" ,r-genomation)
  10234. ("r-genomicalignments" ,r-genomicalignments)
  10235. ("r-genomicranges" ,r-genomicranges)
  10236. ("r-ggplot2" ,r-ggplot2)
  10237. ("r-ggrepel" ,r-ggrepel)
  10238. ("r-gprofiler2" ,r-gprofiler2)
  10239. ("r-heatmaply" ,r-heatmaply)
  10240. ("r-htmlwidgets" ,r-htmlwidgets)
  10241. ("r-jsonlite" ,r-jsonlite)
  10242. ("r-pheatmap" ,r-pheatmap)
  10243. ("r-plotly" ,r-plotly)
  10244. ("r-rmarkdown" ,r-rmarkdown)
  10245. ("r-rsamtools" ,r-rsamtools)
  10246. ("r-rsubread" ,r-rsubread)
  10247. ("r-rtracklayer" ,r-rtracklayer)
  10248. ("r-s4vectors" ,r-s4vectors)
  10249. ("r-stringr" ,r-stringr)
  10250. ("r-tibble" ,r-tibble)
  10251. ("r-tidyr" ,r-tidyr)
  10252. ("python-wrapper" ,python-wrapper)
  10253. ("python-pyyaml" ,python-pyyaml)
  10254. ("python-magic" ,python-magic)
  10255. ("python-xlrd" ,python-xlrd)
  10256. ("trim-galore" ,trim-galore)
  10257. ("macs" ,macs)
  10258. ("multiqc" ,multiqc)
  10259. ("perl" ,perl)
  10260. ("pandoc" ,pandoc)
  10261. ("pandoc-citeproc" ,pandoc-citeproc)
  10262. ("fastqc" ,fastqc)
  10263. ("bowtie" ,bowtie)
  10264. ("idr" ,idr)
  10265. ("snakemake" ,snakemake)
  10266. ("samtools" ,samtools)
  10267. ("bedtools" ,bedtools)
  10268. ("kentutils" ,kentutils)))
  10269. (native-inputs
  10270. `(("python-pytest" ,python-pytest)))
  10271. (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
  10272. (synopsis "Analysis pipeline for ChIP sequencing experiments")
  10273. (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
  10274. calling and reporting for ChIP sequencing experiments. It is easy to use and
  10275. produces high quality reports. The inputs are reads files from the sequencing
  10276. experiment, and a configuration file which describes the experiment. In
  10277. addition to quality control of the experiment, the pipeline enables to set up
  10278. multiple peak calling analysis and allows the generation of a UCSC track hub
  10279. in an easily configurable manner.")
  10280. (license license:gpl3+)))
  10281. (define-public pigx-bsseq
  10282. (package
  10283. (name "pigx-bsseq")
  10284. (version "0.1.5")
  10285. (source (origin
  10286. (method url-fetch)
  10287. (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
  10288. "releases/download/v" version
  10289. "/pigx_bsseq-" version ".tar.gz"))
  10290. (sha256
  10291. (base32
  10292. "05al5dacfp1vf1x3cq20jhd6w4xj5vaxslzaka6yrpg0av8sh3k3"))))
  10293. (build-system gnu-build-system)
  10294. (arguments
  10295. `(;; TODO: tests currently require 12+GB of RAM. See
  10296. ;; https://github.com/BIMSBbioinfo/pigx_bsseq/issues/164
  10297. #:tests? #f
  10298. #:phases
  10299. (modify-phases %standard-phases
  10300. (add-before 'check 'set-timezone
  10301. ;; The readr package is picky about timezones.
  10302. (lambda* (#:key inputs #:allow-other-keys)
  10303. (setenv "TZ" "UTC+1")
  10304. (setenv "TZDIR"
  10305. (search-input-directory inputs
  10306. "share/zoneinfo")))))))
  10307. (native-inputs
  10308. `(("tzdata" ,tzdata)))
  10309. (inputs
  10310. `(("coreutils" ,coreutils)
  10311. ("sed" ,sed)
  10312. ("grep" ,grep)
  10313. ("r-minimal" ,r-minimal)
  10314. ("r-annotationhub" ,r-annotationhub)
  10315. ("r-dt" ,r-dt)
  10316. ("r-genomation" ,r-genomation)
  10317. ("r-ggbio" ,r-ggbio)
  10318. ("r-ggrepel" ,r-ggrepel)
  10319. ("r-matrixstats" ,r-matrixstats)
  10320. ("r-methylkit" ,r-methylkit)
  10321. ("r-reshape2" ,r-reshape2)
  10322. ("r-rtracklayer" ,r-rtracklayer)
  10323. ("r-rmarkdown" ,r-rmarkdown)
  10324. ("r-bookdown" ,r-bookdown)
  10325. ("r-ggplot2" ,r-ggplot2)
  10326. ("r-ggbio" ,r-ggbio)
  10327. ("pandoc" ,pandoc)
  10328. ("pandoc-citeproc" ,pandoc-citeproc)
  10329. ("python-wrapper" ,python-wrapper)
  10330. ("python-pyyaml" ,python-pyyaml)
  10331. ("snakemake" ,snakemake)
  10332. ("bismark" ,bismark)
  10333. ("bowtie" ,bowtie)
  10334. ("bwa-meth" ,bwa-meth)
  10335. ("fastqc" ,fastqc)
  10336. ("methyldackel" ,methyldackel)
  10337. ("multiqc" ,multiqc)
  10338. ("trim-galore" ,trim-galore)
  10339. ("cutadapt" ,cutadapt)
  10340. ("samblaster" ,samblaster)
  10341. ("samtools" ,samtools)))
  10342. (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
  10343. (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
  10344. (description "PiGx BSseq is a data processing pipeline for raw fastq read
  10345. data of bisulfite experiments; it produces reports on aggregate methylation
  10346. and coverage and can be used to produce information on differential
  10347. methylation and segmentation.")
  10348. (license license:gpl3+)))
  10349. (define-public pigx-scrnaseq
  10350. (package
  10351. (name "pigx-scrnaseq")
  10352. (version "1.1.7")
  10353. (source (origin
  10354. (method url-fetch)
  10355. (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
  10356. "releases/download/v" version
  10357. "/pigx_scrnaseq-" version ".tar.gz"))
  10358. (sha256
  10359. (base32
  10360. "1h5mcxzwj3cidlkvy9ly5wmi48vwfsjf8dxjfirknqxr9a92hwlx"))))
  10361. (build-system gnu-build-system)
  10362. (inputs
  10363. `(("coreutils" ,coreutils)
  10364. ("perl" ,perl)
  10365. ("fastqc" ,fastqc)
  10366. ("flexbar" ,flexbar)
  10367. ("java" ,icedtea-8)
  10368. ("jellyfish" ,jellyfish)
  10369. ("python-wrapper" ,python-wrapper)
  10370. ("python-pyyaml" ,python-pyyaml)
  10371. ("python-pandas" ,python-pandas)
  10372. ("python-magic" ,python-magic)
  10373. ("python-numpy" ,python-numpy)
  10374. ("python-loompy" ,python-loompy)
  10375. ("pandoc" ,pandoc)
  10376. ("pandoc-citeproc" ,pandoc-citeproc)
  10377. ("samtools" ,samtools)
  10378. ("snakemake" ,snakemake)
  10379. ("star" ,star-for-pigx)
  10380. ("r-minimal" ,r-minimal)
  10381. ("r-argparser" ,r-argparser)
  10382. ("r-cowplot" ,r-cowplot)
  10383. ("r-data-table" ,r-data-table)
  10384. ("r-delayedarray" ,r-delayedarray)
  10385. ("r-delayedmatrixstats" ,r-delayedmatrixstats)
  10386. ("r-dplyr" ,r-dplyr)
  10387. ("r-dropbead" ,r-dropbead)
  10388. ("r-dt" ,r-dt)
  10389. ("r-genomicalignments" ,r-genomicalignments)
  10390. ("r-genomicfiles" ,r-genomicfiles)
  10391. ("r-genomicranges" ,r-genomicranges)
  10392. ("r-ggplot2" ,r-ggplot2)
  10393. ("r-hdf5array" ,r-hdf5array)
  10394. ("r-pheatmap" ,r-pheatmap)
  10395. ("r-rmarkdown" ,r-rmarkdown)
  10396. ("r-rsamtools" ,r-rsamtools)
  10397. ("r-rtracklayer" ,r-rtracklayer)
  10398. ("r-rtsne" ,r-rtsne)
  10399. ("r-scater" ,r-scater)
  10400. ("r-scran" ,r-scran)
  10401. ("r-seurat" ,r-seurat)
  10402. ("r-singlecellexperiment" ,r-singlecellexperiment)
  10403. ("r-stringr" ,r-stringr)
  10404. ("r-yaml" ,r-yaml)))
  10405. (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
  10406. (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
  10407. (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
  10408. quality control for single cell RNA sequencing experiments. The inputs are
  10409. read files from the sequencing experiment, and a configuration file which
  10410. describes the experiment. It produces processed files for downstream analysis
  10411. and interactive quality reports. The pipeline is designed to work with UMI
  10412. based methods.")
  10413. (license license:gpl3+)))
  10414. (define-public pigx-sars-cov2-ww
  10415. (package
  10416. (name "pigx-sars-cov2-ww")
  10417. (version "0.0.3")
  10418. (source (origin
  10419. (method url-fetch)
  10420. (uri (string-append "https://github.com/BIMSBbioinfo/pigx_sarscov2_ww/"
  10421. "releases/download/v" version
  10422. "/pigx_sars-cov2-ww-" version ".tar.gz"))
  10423. (sha256
  10424. (base32
  10425. "1hhdbwsnl0d37lrmisw5hr630xr8s41qvxflm05anh11rj8n22yw"))))
  10426. (build-system gnu-build-system)
  10427. (inputs
  10428. `(("bash-minimal" ,bash-minimal)
  10429. ("bwa" ,bwa)
  10430. ("ensembl-vep" ,ensembl-vep)
  10431. ("fastqc" ,fastqc)
  10432. ("kraken2" ,kraken2)
  10433. ("krona-tools" ,krona-tools)
  10434. ("lofreq" ,lofreq)
  10435. ("multiqc" ,multiqc)
  10436. ("prinseq" ,prinseq)
  10437. ("python-pyyaml" ,python-pyyaml)
  10438. ("python-wrapper" ,python-wrapper)
  10439. ("r-base64url" ,r-base64url)
  10440. ("r-dplyr" ,r-dplyr)
  10441. ("r-dt" ,r-dt)
  10442. ("r-ggplot2" ,r-ggplot2)
  10443. ("r-magrittr" ,r-magrittr)
  10444. ("r-minimal" ,r-minimal)
  10445. ("r-plotly" ,r-plotly)
  10446. ("r-qpcr" ,r-qpcr)
  10447. ("r-reshape2" ,r-reshape2)
  10448. ("r-rmarkdown" ,r-rmarkdown)
  10449. ("r-stringr" ,r-stringr)
  10450. ("r-tidyr" ,r-tidyr)
  10451. ("samtools" ,samtools)
  10452. ("snakemake" ,snakemake)
  10453. ("wget" ,wget)))
  10454. (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
  10455. (synopsis "Analysis pipeline for wastewater sequencing")
  10456. (description "PiGx SARS-CoV-2 is a pipeline for analysing data from
  10457. sequenced wastewater samples and identifying given variants-of-concern of
  10458. SARS-CoV-2. The pipeline can be used for continuous sampling. The output
  10459. report will provide an intuitive visual overview about the development of
  10460. variant abundance over time and location.")
  10461. (license license:gpl3+)))
  10462. (define-public pigx
  10463. (package
  10464. (name "pigx")
  10465. (version "0.0.3")
  10466. (source (origin
  10467. (method url-fetch)
  10468. (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
  10469. "releases/download/v" version
  10470. "/pigx-" version ".tar.gz"))
  10471. (sha256
  10472. (base32
  10473. "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
  10474. (build-system gnu-build-system)
  10475. (inputs
  10476. `(("python" ,python)
  10477. ("pigx-bsseq" ,pigx-bsseq)
  10478. ("pigx-chipseq" ,pigx-chipseq)
  10479. ("pigx-rnaseq" ,pigx-rnaseq)
  10480. ("pigx-scrnaseq" ,pigx-scrnaseq)))
  10481. (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
  10482. (synopsis "Analysis pipelines for genomics")
  10483. (description "PiGx is a collection of genomics pipelines. It includes the
  10484. following pipelines:
  10485. @itemize
  10486. @item PiGx BSseq for raw fastq read data of bisulfite experiments
  10487. @item PiGx RNAseq for RNAseq samples
  10488. @item PiGx scRNAseq for single cell dropseq analysis
  10489. @item PiGx ChIPseq for reads from ChIPseq experiments
  10490. @end itemize
  10491. All pipelines are easily configured with a simple sample sheet and a
  10492. descriptive settings file. The result is a set of comprehensive, interactive
  10493. HTML reports with interesting findings about your samples.")
  10494. (license license:gpl3+)))
  10495. (define-public genrich
  10496. (package
  10497. (name "genrich")
  10498. (version "0.5")
  10499. (source (origin
  10500. (method git-fetch)
  10501. (uri (git-reference
  10502. (url "https://github.com/jsh58/Genrich")
  10503. (commit (string-append "v" version))))
  10504. (file-name (git-file-name name version))
  10505. (sha256
  10506. (base32
  10507. "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
  10508. (build-system gnu-build-system)
  10509. (arguments
  10510. `(#:tests? #f ; there are none
  10511. #:phases
  10512. (modify-phases %standard-phases
  10513. (delete 'configure)
  10514. (replace 'install
  10515. (lambda* (#:key outputs #:allow-other-keys)
  10516. (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
  10517. #t)))))
  10518. (inputs
  10519. `(("zlib" ,zlib)))
  10520. (home-page "https://github.com/jsh58/Genrich")
  10521. (synopsis "Detecting sites of genomic enrichment")
  10522. (description "Genrich is a peak-caller for genomic enrichment
  10523. assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
  10524. following the assay and produces a file detailing peaks of significant
  10525. enrichment.")
  10526. (license license:expat)))
  10527. (define-public mantis
  10528. (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
  10529. (revision "1"))
  10530. (package
  10531. (name "mantis")
  10532. (version (git-version "0" revision commit))
  10533. (source (origin
  10534. (method git-fetch)
  10535. (uri (git-reference
  10536. (url "https://github.com/splatlab/mantis")
  10537. (commit commit)))
  10538. (file-name (git-file-name name version))
  10539. (sha256
  10540. (base32
  10541. "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
  10542. (build-system cmake-build-system)
  10543. (arguments '(#:tests? #f)) ; there are none
  10544. (inputs
  10545. `(("sdsl-lite" ,sdsl-lite)
  10546. ("openssl" ,openssl)
  10547. ("zlib" ,zlib)))
  10548. (home-page "https://github.com/splatlab/mantis")
  10549. (synopsis "Large-scale sequence-search index data structure")
  10550. (description "Mantis is a space-efficient data structure that can be
  10551. used to index thousands of raw-read genomics experiments and facilitate
  10552. large-scale sequence searches on those experiments. Mantis uses counting
  10553. quotient filters instead of Bloom filters, enabling rapid index builds and
  10554. queries, small indexes, and exact results, i.e., no false positives or
  10555. negatives. Furthermore, Mantis is also a colored de Bruijn graph
  10556. representation, so it supports fast graph traversal and other topological
  10557. analyses in addition to large-scale sequence-level searches.")
  10558. ;; uses __uint128_t and inline assembly
  10559. (supported-systems '("x86_64-linux"))
  10560. (license license:bsd-3))))
  10561. (define-public sjcount
  10562. ;; There is no tag for version 3.2, nor is there a release archive.
  10563. (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
  10564. (revision "1"))
  10565. (package
  10566. (name "sjcount")
  10567. (version (git-version "3.2" revision commit))
  10568. (source (origin
  10569. (method git-fetch)
  10570. (uri (git-reference
  10571. (url "https://github.com/pervouchine/sjcount-full")
  10572. (commit commit)))
  10573. (file-name (string-append name "-" version "-checkout"))
  10574. (sha256
  10575. (base32
  10576. "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
  10577. (build-system gnu-build-system)
  10578. (arguments
  10579. `(#:tests? #f ; requires a 1.4G test file
  10580. #:make-flags
  10581. (list (string-append "SAMTOOLS_DIR="
  10582. (assoc-ref %build-inputs "samtools")
  10583. "/lib/"))
  10584. #:phases
  10585. (modify-phases %standard-phases
  10586. (replace 'configure
  10587. (lambda* (#:key inputs #:allow-other-keys)
  10588. (substitute* "makefile"
  10589. (("-I \\$\\{SAMTOOLS_DIR\\}")
  10590. (string-append "-I" (assoc-ref inputs "samtools")
  10591. "/include/samtools"))
  10592. (("-lz ") "-lz -lpthread "))
  10593. #t))
  10594. (replace 'install
  10595. (lambda* (#:key outputs #:allow-other-keys)
  10596. (for-each (lambda (tool)
  10597. (install-file tool
  10598. (string-append (assoc-ref outputs "out")
  10599. "/bin")))
  10600. '("j_count" "b_count" "sjcount"))
  10601. #t)))))
  10602. (inputs
  10603. `(("samtools" ,samtools-0.1)
  10604. ("zlib" ,zlib)))
  10605. (home-page "https://github.com/pervouchine/sjcount-full/")
  10606. (synopsis "Annotation-agnostic splice junction counting pipeline")
  10607. (description "Sjcount is a utility for fast quantification of splice
  10608. junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
  10609. version does count multisplits.")
  10610. (license license:gpl3+))))
  10611. (define-public minimap2
  10612. (package
  10613. (name "minimap2")
  10614. (version "2.18")
  10615. (source
  10616. (origin
  10617. (method url-fetch)
  10618. (uri (string-append "https://github.com/lh3/minimap2/"
  10619. "releases/download/v" version "/"
  10620. "minimap2-" version ".tar.bz2"))
  10621. (sha256
  10622. (base32
  10623. "1d7fvdqcqd6wns875rkyd7f34ii15gc9l1sivd2wbbpcb0fi0mbs"))))
  10624. (build-system gnu-build-system)
  10625. (arguments
  10626. `(#:tests? #f ; there are none
  10627. #:modules ((guix build utils)
  10628. (guix build gnu-build-system)
  10629. (srfi srfi-26))
  10630. #:make-flags
  10631. (list (string-append "CC=" ,(cc-for-target))
  10632. (let ((system ,(or (%current-target-system)
  10633. (%current-system))))
  10634. (cond
  10635. ((string-prefix? "x86_64" system)
  10636. "all")
  10637. ((or (string-prefix? "i586" system)
  10638. (string-prefix? "i686" system))
  10639. "sse2only=1")
  10640. ((string-prefix? "armhf" system)
  10641. "arm_neon=1")
  10642. ((string-prefix? "aarch64" system)
  10643. "aarch64=1")
  10644. (else ""))))
  10645. #:phases
  10646. (modify-phases %standard-phases
  10647. (delete 'configure)
  10648. (replace 'install
  10649. (lambda* (#:key outputs #:allow-other-keys)
  10650. (let* ((out (assoc-ref outputs "out"))
  10651. (bin (string-append out "/bin"))
  10652. (lib (string-append out "/lib"))
  10653. (inc (string-append out "/include"))
  10654. (man (string-append out "/share/man/man1")))
  10655. (install-file "minimap2" bin)
  10656. (install-file "libminimap2.a" lib)
  10657. (install-file "minimap2.1" man)
  10658. (map (cut install-file <> inc)
  10659. (find-files "." "\\.h$"))
  10660. ;; Not this file.
  10661. (delete-file (string-append inc "/emmintrin.h"))
  10662. (mkdir-p (string-append lib "/pkgconfig"))
  10663. (with-output-to-file (string-append lib "/pkgconfig/minimap2.pc")
  10664. (lambda _
  10665. (format #t "prefix=~a~@
  10666. exec_prefix=${prefix}~@
  10667. libdir=${exec_prefix}/lib~@
  10668. includedir=${prefix}/include~@
  10669. ~@
  10670. Name: libminimap2~@
  10671. Version: ~a~@
  10672. Description: A versatile pairwise aligner for genomic and spliced nucleotide sequence~@
  10673. Libs: -L${libdir} -lminimap2~@
  10674. Cflags: -I${includedir}~%"
  10675. out ,version))))
  10676. #t)))))
  10677. (inputs
  10678. `(("zlib" ,zlib)))
  10679. (home-page "https://lh3.github.io/minimap2/")
  10680. (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
  10681. (description "Minimap2 is a versatile sequence alignment program that
  10682. aligns DNA or mRNA sequences against a large reference database. Typical use
  10683. cases include:
  10684. @enumerate
  10685. @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
  10686. @item finding overlaps between long reads with error rate up to ~15%;
  10687. @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
  10688. reads against a reference genome;
  10689. @item aligning Illumina single- or paired-end reads;
  10690. @item assembly-to-assembly alignment;
  10691. @item full-genome alignment between two closely related species with
  10692. divergence below ~15%.
  10693. @end enumerate\n")
  10694. (license license:expat)))
  10695. (define-public python-mappy
  10696. (package
  10697. (name "python-mappy")
  10698. (version "2.18")
  10699. (source (origin
  10700. (method url-fetch)
  10701. (uri (pypi-uri "mappy" version))
  10702. (sha256
  10703. (base32
  10704. "1a05p7rkmxa6qhm108na8flzj2v45jab06drk59kzk1ip2sgvzqq"))))
  10705. (build-system python-build-system)
  10706. (native-inputs
  10707. `(("python-cython" ,python-cython)))
  10708. (inputs
  10709. `(("zlib" ,zlib)))
  10710. (home-page "https://github.com/lh3/minimap2")
  10711. (synopsis "Python binding for minimap2")
  10712. (description "This package provides a convenient interface to minimap2,
  10713. a fast and accurate C program to align genomic and transcribe nucleotide
  10714. sequences.")
  10715. (license license:expat)))
  10716. (define-public miniasm
  10717. (package
  10718. (name "miniasm")
  10719. (version "0.3")
  10720. (source (origin
  10721. (method git-fetch)
  10722. (uri (git-reference
  10723. (url "https://github.com/lh3/miniasm")
  10724. (commit (string-append "v" version))))
  10725. (file-name (git-file-name name version))
  10726. (sha256
  10727. (base32
  10728. "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
  10729. (build-system gnu-build-system)
  10730. (inputs
  10731. `(("zlib" ,zlib)))
  10732. (arguments
  10733. `(#:tests? #f ; There are no tests.
  10734. #:phases
  10735. (modify-phases %standard-phases
  10736. (delete 'configure)
  10737. (replace 'install
  10738. (lambda* (#:key inputs outputs #:allow-other-keys)
  10739. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  10740. (install-file "miniasm" bin)
  10741. (install-file "minidot" bin)
  10742. #t))))))
  10743. (home-page "https://github.com/lh3/miniasm")
  10744. (synopsis "Ultrafast de novo assembly for long noisy reads")
  10745. (description "Miniasm is a very fast OLC-based de novo assembler for noisy
  10746. long reads. It takes all-vs-all read self-mappings (typically by minimap) as
  10747. input and outputs an assembly graph in the GFA format. Different from
  10748. mainstream assemblers, miniasm does not have a consensus step. It simply
  10749. concatenates pieces of read sequences to generate the final unitig sequences.
  10750. Thus the per-base error rate is similar to the raw input reads.")
  10751. (license license:expat)))
  10752. (define-public bandage
  10753. (package
  10754. (name "bandage")
  10755. (version "0.8.1")
  10756. (source
  10757. (origin
  10758. (method git-fetch)
  10759. (uri (git-reference
  10760. (url "https://github.com/rrwick/Bandage")
  10761. (commit (string-append "v" version))))
  10762. (file-name (git-file-name name version))
  10763. (sha256
  10764. (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv"))))
  10765. (build-system qt-build-system)
  10766. (arguments
  10767. `(#:phases
  10768. (modify-phases %standard-phases
  10769. (replace 'configure
  10770. (lambda _
  10771. (invoke "qmake" "Bandage.pro")))
  10772. (replace 'check
  10773. (lambda* (#:key tests? #:allow-other-keys)
  10774. (when tests?
  10775. (substitute* "tests/bandage_command_line_tests.sh"
  10776. (("^bandagepath=.*")
  10777. (string-append "bandagepath=" (getcwd) "/Bandage\n")))
  10778. (with-directory-excursion "tests"
  10779. (setenv "XDG_RUNTIME_DIR" (getcwd))
  10780. (invoke "./bandage_command_line_tests.sh")))
  10781. #t))
  10782. (replace 'install
  10783. (lambda* (#:key outputs #:allow-other-keys)
  10784. (let ((out (assoc-ref outputs "out")))
  10785. (install-file "Bandage" (string-append out "/bin"))
  10786. #t))))))
  10787. (inputs
  10788. `(("qtbase" ,qtbase-5)
  10789. ("qtsvg" ,qtsvg)))
  10790. (native-inputs
  10791. `(("imagemagick" ,imagemagick)))
  10792. (home-page "https://rrwick.github.io/Bandage/")
  10793. (synopsis
  10794. "Bioinformatics Application for Navigating De novo Assembly Graphs Easily")
  10795. (description "Bandage is a program for visualising de novo assembly graphs.
  10796. It allows users to interact with the assembly graphs made by de novo assemblers
  10797. such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not
  10798. only assembled contigs but also the connections between those contigs, which
  10799. were previously not easily accessible. Bandage visualises assembly graphs, with
  10800. connections, using graph layout algorithms. Nodes in the drawn graph, which
  10801. represent contigs, can be automatically labelled with their ID, length or depth.
  10802. Users can interact with the graph by moving, labelling and colouring nodes.
  10803. Sequence information can also be extracted directly from the graph viewer. By
  10804. displaying connections between contigs, Bandage opens up new possibilities for
  10805. analysing and improving de novo assemblies that are not possible by looking at
  10806. contigs alone.")
  10807. (license (list license:gpl2+ ; bundled ogdf
  10808. license:gpl3+))))
  10809. (define-public libmaus2
  10810. (package
  10811. (name "libmaus2")
  10812. (version "2.0.786")
  10813. (source (origin
  10814. (method git-fetch)
  10815. (uri (git-reference
  10816. (url "https://gitlab.com/german.tischler/libmaus2")
  10817. (commit (string-append version "-release-20210531143054"))))
  10818. (file-name (git-file-name name version))
  10819. (sha256
  10820. (base32
  10821. "1rxakmwjcx2yq5sjh3v849f7dfw4xzc2fyzf6s28s3p95z84w564"))))
  10822. (build-system gnu-build-system)
  10823. ;; The test suite attempts to execute ../test-driver, which does not exist.
  10824. (arguments '(#:tests? #false))
  10825. (propagated-inputs
  10826. `(("zlib" ,zlib)))
  10827. (native-inputs
  10828. `(("gcc" ,gcc-10) ;Code has C++17 requirements
  10829. ("pkg-config" ,pkg-config)))
  10830. (home-page "https://gitlab.com/german.tischler/libmaus2")
  10831. (synopsis "Collection of data structures and algorithms useful for bioinformatics")
  10832. (description "libmaus2 is a collection of data structures and
  10833. algorithms. It contains:
  10834. @itemize
  10835. @item I/O classes (single byte and UTF-8);
  10836. @item @code{bitio} classes (input, output and various forms of bit level
  10837. manipulation);
  10838. @item text indexing classes (suffix and LCP array, fulltext and minute (FM),
  10839. etc.);
  10840. @item BAM sequence alignment files input/output (simple and collating);
  10841. and many lower level support classes.
  10842. @end itemize\n")
  10843. ;; The code is explicitly available under the terms of either GPLv2 or
  10844. ;; GPLv3 according to the AUTHORS file, though most files have a GPLv3+
  10845. ;; license header.
  10846. (license (list license:gpl2+ license:gpl3+))))
  10847. (define-public biobambam2
  10848. (package
  10849. (name "biobambam2")
  10850. (version "2.0.182")
  10851. (source (origin
  10852. (method git-fetch)
  10853. (uri (git-reference
  10854. (url "https://gitlab.com/german.tischler/biobambam2")
  10855. (commit (string-append version "-release-20210412001032"))))
  10856. (file-name (git-file-name name version))
  10857. (sha256
  10858. (base32
  10859. "0b7w7a2a7hpkgrdn0n7hy4pilzrj82zqrh7q4bg1l0cd6bqr60m5"))))
  10860. (build-system gnu-build-system)
  10861. (arguments
  10862. ;; The test suite attempts to execute ../test-driver, which does not exist.
  10863. `(#:tests? #false
  10864. #:configure-flags
  10865. (list (string-append "--with-libmaus2="
  10866. (assoc-ref %build-inputs "libmaus2")))))
  10867. (inputs
  10868. `(("libmaus2" ,libmaus2)
  10869. ("xerces-c" ,xerces-c)))
  10870. (native-inputs
  10871. `(("gcc" ,gcc-10) ;Code has C++17 requirements
  10872. ("pkg-config" ,pkg-config)))
  10873. (home-page "https://gitlab.com/german.tischler/biobambam2")
  10874. (synopsis "Tools for processing BAM files")
  10875. (description "This package contains some tools for processing BAM files
  10876. including:
  10877. @itemize
  10878. @item bamsormadup: parallel sorting and duplicate marking
  10879. @item bamcollate2: reads BAM and writes BAM reordered such that alignment or
  10880. collated by query name
  10881. @item bammarkduplicates: reads BAM and writes BAM with duplicate alignments
  10882. marked using the BAM flags field
  10883. @item bammaskflags: reads BAM and writes BAM while masking (removing) bits
  10884. from the flags column
  10885. @item bamrecompress: reads BAM and writes BAM with a defined compression
  10886. setting. This tool is capable of multi-threading.
  10887. @item bamsort: reads BAM and writes BAM resorted by coordinates or query name
  10888. @item bamtofastq: reads BAM and writes FastQ; output can be collated or
  10889. uncollated by query name.
  10890. @end itemize
  10891. ")
  10892. ;; The COPYING file states that the code is distributed under version 3 of
  10893. ;; the GPL, but the license headers include the "or later" clause.
  10894. (license license:gpl3+)))
  10895. (define-public r-circus
  10896. (package
  10897. (name "r-circus")
  10898. (version "0.1.5")
  10899. (source
  10900. (origin
  10901. (method git-fetch)
  10902. (uri (git-reference
  10903. (url "https://github.com/BIMSBbioinfo/ciRcus")
  10904. (commit (string-append "v" version))))
  10905. (file-name (git-file-name name version))
  10906. (sha256
  10907. (base32
  10908. "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
  10909. (build-system r-build-system)
  10910. (propagated-inputs
  10911. `(("r-annotationdbi" ,r-annotationdbi)
  10912. ("r-annotationhub" ,r-annotationhub)
  10913. ("r-biomart" ,r-biomart)
  10914. ("r-data-table" ,r-data-table)
  10915. ("r-dbi" ,r-dbi)
  10916. ("r-genomicfeatures" ,r-genomicfeatures)
  10917. ("r-genomicranges" ,r-genomicranges)
  10918. ("r-ggplot2" ,r-ggplot2)
  10919. ("r-hash" ,r-hash)
  10920. ("r-iranges" ,r-iranges)
  10921. ("r-rcolorbrewer" ,r-rcolorbrewer)
  10922. ("r-rmysql" ,r-rmysql)
  10923. ("r-s4vectors" ,r-s4vectors)
  10924. ("r-stringr" ,r-stringr)
  10925. ("r-summarizedexperiment" ,r-summarizedexperiment)))
  10926. (native-inputs
  10927. `(("r-knitr" ,r-knitr)))
  10928. (home-page "https://github.com/BIMSBbioinfo/ciRcus")
  10929. (synopsis "Annotation, analysis and visualization of circRNA data")
  10930. (description "Circus is an R package for annotation, analysis and
  10931. visualization of circRNA data. Users can annotate their circRNA candidates
  10932. with host genes, gene featrues they are spliced from, and discriminate between
  10933. known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
  10934. can be calculated, and a number of descriptive plots easily generated.")
  10935. (license license:artistic2.0)))
  10936. (define-public gffread
  10937. ;; We cannot use the tagged release because it is not in sync with gclib.
  10938. ;; See https://github.com/gpertea/gffread/issues/26
  10939. (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
  10940. (revision "1"))
  10941. (package
  10942. (name "gffread")
  10943. (version (git-version "0.9.12" revision commit))
  10944. (source
  10945. (origin
  10946. (method git-fetch)
  10947. (uri (git-reference
  10948. (url "https://github.com/gpertea/gffread")
  10949. (commit commit)))
  10950. (file-name (git-file-name name version))
  10951. (sha256
  10952. (base32
  10953. "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
  10954. (build-system gnu-build-system)
  10955. (arguments
  10956. `(#:tests? #f ; no check target
  10957. #:make-flags
  10958. (list "GCLDIR=gclib")
  10959. #:phases
  10960. (modify-phases %standard-phases
  10961. (delete 'configure)
  10962. (add-after 'unpack 'copy-gclib-source
  10963. (lambda* (#:key inputs #:allow-other-keys)
  10964. (mkdir-p "gclib")
  10965. (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
  10966. #t))
  10967. ;; There is no install target
  10968. (replace 'install
  10969. (lambda* (#:key outputs #:allow-other-keys)
  10970. (let* ((out (assoc-ref outputs "out"))
  10971. (bin (string-append out "/bin")))
  10972. (install-file "gffread" bin))
  10973. #t)))))
  10974. (native-inputs
  10975. `(("gclib-source"
  10976. ,(let ((version "0.10.3")
  10977. (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
  10978. (revision "1"))
  10979. (origin
  10980. (method git-fetch)
  10981. (uri (git-reference
  10982. (url "https://github.com/gpertea/gclib")
  10983. (commit commit)))
  10984. (file-name (git-file-name "gclib" version))
  10985. (sha256
  10986. (base32
  10987. "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
  10988. (home-page "https://github.com/gpertea/gffread/")
  10989. (synopsis "Parse and convert GFF/GTF files")
  10990. (description
  10991. "This package provides a GFF/GTF file parsing utility providing format
  10992. conversions, region filtering, FASTA sequence extraction and more.")
  10993. ;; gffread is under Expat, but gclib is under Artistic 2.0
  10994. (license (list license:expat
  10995. license:artistic2.0)))))
  10996. (define-public find-circ
  10997. ;; The last release was in 2015. The license was clarified in 2017, so we
  10998. ;; take the latest commit.
  10999. (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
  11000. (revision "1"))
  11001. (package
  11002. (name "find-circ")
  11003. (version (git-version "1.2" revision commit))
  11004. (source
  11005. (origin
  11006. (method git-fetch)
  11007. (uri (git-reference
  11008. (url "https://github.com/marvin-jens/find_circ")
  11009. (commit commit)))
  11010. (file-name (git-file-name name version))
  11011. (sha256
  11012. (base32
  11013. "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
  11014. (build-system gnu-build-system)
  11015. (arguments
  11016. `(#:tests? #f ; there are none
  11017. #:phases
  11018. ;; There is no actual build system.
  11019. (modify-phases %standard-phases
  11020. (delete 'configure)
  11021. (delete 'build)
  11022. (replace 'install
  11023. (lambda* (#:key outputs #:allow-other-keys)
  11024. (let* ((out (assoc-ref outputs "out"))
  11025. (bin (string-append out "/bin"))
  11026. (path (getenv "GUIX_PYTHONPATH")))
  11027. (for-each (lambda (script)
  11028. (install-file script bin)
  11029. (wrap-program (string-append bin "/" script)
  11030. `("GUIX_PYTHONPATH" ":" prefix (,path))))
  11031. '("cmp_bed.py"
  11032. "find_circ.py"
  11033. "maxlength.py"
  11034. "merge_bed.py"
  11035. "unmapped2anchors.py")))
  11036. #t)))))
  11037. (inputs
  11038. `(("python2" ,python-2)
  11039. ("python2-pysam" ,python2-pysam)
  11040. ("python2-numpy" ,python2-numpy)))
  11041. (home-page "https://github.com/marvin-jens/find_circ")
  11042. (synopsis "circRNA detection from RNA-seq reads")
  11043. (description "This package provides tools to detect head-to-tail
  11044. spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
  11045. in RNA-seq data.")
  11046. (license license:gpl3))))
  11047. (define-public fit-sne
  11048. (package
  11049. (name "fit-sne")
  11050. (version "1.2.1")
  11051. (source
  11052. (origin
  11053. (method git-fetch)
  11054. (uri (git-reference
  11055. (url "https://github.com/KlugerLab/FIt-SNE")
  11056. (commit (string-append "v" version))))
  11057. (file-name (git-file-name name version))
  11058. (sha256
  11059. (base32
  11060. "1imq4577awc226wvygf94kpz156qdfw8xl0w0f7ss4w10lhmpmf5"))))
  11061. (build-system gnu-build-system)
  11062. (arguments
  11063. `(#:tests? #false ; there are none
  11064. #:phases
  11065. ;; There is no build system.
  11066. (modify-phases %standard-phases
  11067. (delete 'configure)
  11068. (replace 'build
  11069. (lambda _
  11070. (invoke "g++" "-std=c++11" "-O3"
  11071. "src/sptree.cpp"
  11072. "src/tsne.cpp"
  11073. "src/nbodyfft.cpp"
  11074. "-o" "bin/fast_tsne"
  11075. "-pthread" "-lfftw3" "-lm"
  11076. "-Wno-address-of-packed-member")))
  11077. (replace 'install
  11078. (lambda* (#:key outputs #:allow-other-keys)
  11079. (let* ((out (assoc-ref outputs "out"))
  11080. (bin (string-append out "/bin"))
  11081. (share (string-append out "/share/fit-sne")))
  11082. (for-each (lambda (file) (install-file file bin))
  11083. (find-files "bin"))
  11084. (substitute* "fast_tsne.R"
  11085. (("^FAST_TSNE_SCRIPT_DIR.*")
  11086. (string-append "FAST_TSNE_SCRIPT_DIR = \"" out "\"\n")))
  11087. (install-file "fast_tsne.R" share)))))))
  11088. (inputs
  11089. `(("fftw" ,fftw)))
  11090. (home-page "https://github.com/KlugerLab/FIt-SNE")
  11091. (synopsis "Fast Fourier Transform-accelerated interpolation-based t-SNE")
  11092. (description "@dfn{t-Stochastic Neighborhood Embedding} (t-SNE) is a
  11093. method for dimensionality reduction and visualization of high dimensional
  11094. datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
  11095. approximate the gradient at each iteration of gradient descent. This
  11096. implementation differs in these ways:
  11097. @itemize
  11098. @item Instead of approximating the N-body simulation using Barnes-Hut, we
  11099. interpolate onto an equispaced grid and use FFT to perform the convolution.
  11100. @item Instead of computing nearest neighbors using vantage-point trees, we
  11101. approximate nearest neighbors using the Annoy library. The neighbor lookups
  11102. are multithreaded to take advantage of machines with multiple cores.
  11103. @end itemize
  11104. ")
  11105. ;; See LICENSE.txt for details on what license applies to what files.
  11106. (license (list license:bsd-4 license:expat license:asl2.0))))
  11107. (define-public python-scanpy
  11108. (package
  11109. (name "python-scanpy")
  11110. (version "1.8.1")
  11111. (source
  11112. (origin
  11113. (method git-fetch)
  11114. (uri (git-reference
  11115. (url "https://github.com/theislab/scanpy")
  11116. (commit version)))
  11117. (file-name (git-file-name name version))
  11118. (sha256
  11119. (base32
  11120. "0w1qmv3djqi8q0sn5hv34ivzs157fwjjb9nflfnagnhpxmw8vx5g"))))
  11121. (build-system python-build-system)
  11122. (arguments
  11123. `(#:phases
  11124. (modify-phases %standard-phases
  11125. (replace 'build
  11126. (lambda _
  11127. (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" ,version)
  11128. ;; ZIP does not support timestamps before 1980.
  11129. (setenv "SOURCE_DATE_EPOCH" "315532800")
  11130. (invoke "flit" "build")))
  11131. (replace 'install
  11132. (lambda* (#:key inputs outputs #:allow-other-keys)
  11133. (add-installed-pythonpath inputs outputs)
  11134. (let ((out (assoc-ref outputs "out")))
  11135. (for-each (lambda (wheel)
  11136. (format #true wheel)
  11137. (invoke "python" "-m" "pip" "install"
  11138. wheel (string-append "--prefix=" out)))
  11139. (find-files "dist" "\\.whl$")))))
  11140. (replace 'check
  11141. (lambda* (#:key inputs #:allow-other-keys)
  11142. ;; These tests require Internet access.
  11143. (delete-file-recursively "scanpy/tests/notebooks")
  11144. (delete-file "scanpy/tests/test_clustering.py")
  11145. (delete-file "scanpy/tests/test_datasets.py")
  11146. (delete-file "scanpy/tests/test_score_genes.py")
  11147. (delete-file "scanpy/tests/test_highly_variable_genes.py")
  11148. ;; TODO: I can't get the plotting tests to work, even with Xvfb.
  11149. (delete-file "scanpy/tests/test_embedding_plots.py")
  11150. (delete-file "scanpy/tests/test_preprocessing.py")
  11151. (delete-file "scanpy/tests/test_read_10x.py")
  11152. ;; TODO: these fail with TypingError and "Use of unsupported
  11153. ;; NumPy function 'numpy.split'".
  11154. (delete-file "scanpy/tests/test_metrics.py")
  11155. ;; The following tests requires 'scanorama', which isn't
  11156. ;; packaged yet.
  11157. (delete-file "scanpy/tests/external/test_scanorama_integrate.py")
  11158. (setenv "PYTHONPATH"
  11159. (string-append (getcwd) ":"
  11160. (assoc-ref inputs "python-anndata:source") ":"
  11161. (getenv "PYTHONPATH")))
  11162. (invoke "pytest" "-vv"
  11163. "-k"
  11164. ;; Plot tests that fail.
  11165. (string-append "not test_dotplot_matrixplot_stacked_violin"
  11166. " and not test_violin_without_raw"
  11167. " and not test_correlation"
  11168. " and not test_scatterplots"
  11169. " and not test_scatter_embedding_add_outline_vmin_vmax_norm"
  11170. " and not test_paga"
  11171. " and not test_paga_compare"
  11172. ;; These try to connect to the network
  11173. " and not test_plot_rank_genes_groups_gene_symbols"
  11174. " and not test_pca_chunked"
  11175. " and not test_pca_sparse"
  11176. " and not test_pca_reproducible")))))))
  11177. (propagated-inputs
  11178. `(("python-anndata" ,python-anndata)
  11179. ("python-h5py" ,python-h5py)
  11180. ("python-igraph" ,python-igraph)
  11181. ("python-joblib" ,python-joblib)
  11182. ("python-legacy-api-wrap" ,python-legacy-api-wrap)
  11183. ("python-louvain" ,python-louvain-0.6)
  11184. ("python-matplotlib" ,python-matplotlib)
  11185. ("python-natsort" ,python-natsort)
  11186. ("python-networkx" ,python-networkx)
  11187. ("python-numba" ,python-numba)
  11188. ("python-packaging" ,python-packaging)
  11189. ("python-pandas" ,python-pandas)
  11190. ("python-patsy" ,python-patsy)
  11191. ("python-scikit-learn" ,python-scikit-learn)
  11192. ("python-scipy" ,python-scipy)
  11193. ("python-seaborn" ,python-seaborn)
  11194. ("python-sinfo" ,python-sinfo)
  11195. ("python-statsmodels" ,python-statsmodels)
  11196. ("python-tables" ,python-tables)
  11197. ("python-pytoml" ,python-pytoml)
  11198. ("python-tqdm" ,python-tqdm)
  11199. ("python-umap-learn" ,python-umap-learn)))
  11200. (native-inputs
  11201. `(;; This package needs anndata.tests, which is not installed.
  11202. ("python-anndata:source" ,(package-source python-anndata))
  11203. ("python-flit" ,python-flit)
  11204. ("python-leidenalg" ,python-leidenalg)
  11205. ("python-pytest" ,python-pytest)
  11206. ("python-setuptools-scm" ,python-setuptools-scm)))
  11207. (home-page "https://github.com/theislab/scanpy")
  11208. (synopsis "Single-Cell Analysis in Python.")
  11209. (description "Scanpy is a scalable toolkit for analyzing single-cell gene
  11210. expression data. It includes preprocessing, visualization, clustering,
  11211. pseudotime and trajectory inference and differential expression testing. The
  11212. Python-based implementation efficiently deals with datasets of more than one
  11213. million cells.")
  11214. (license license:bsd-3)))
  11215. (define-public python-bbknn
  11216. (package
  11217. (name "python-bbknn")
  11218. (version "1.3.6")
  11219. (source
  11220. (origin
  11221. (method url-fetch)
  11222. (uri (pypi-uri "bbknn" version))
  11223. (sha256
  11224. (base32
  11225. "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
  11226. (build-system python-build-system)
  11227. (arguments
  11228. `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
  11229. (propagated-inputs
  11230. `(("python-annoy" ,python-annoy)
  11231. ("python-cython" ,python-cython)
  11232. ("python-numpy" ,python-numpy)
  11233. ("python-scipy" ,python-scipy)
  11234. ("python-umap-learn" ,python-umap-learn)))
  11235. (home-page "https://github.com/Teichlab/bbknn")
  11236. (synopsis "Batch balanced KNN")
  11237. (description "BBKNN is a batch effect removal tool that can be directly
  11238. used in the Scanpy workflow. It serves as an alternative to
  11239. @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
  11240. graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
  11241. technical artifacts are present in the data, they will make it challenging to
  11242. link corresponding cell types across different batches. BBKNN actively
  11243. combats this effect by splitting your data into batches and finding a smaller
  11244. number of neighbours for each cell within each of the groups. This helps
  11245. create connections between analogous cells in different batches without
  11246. altering the counts or PCA space.")
  11247. (license license:expat)))
  11248. (define-public python-drep
  11249. (package
  11250. (name "python-drep")
  11251. (version "3.2.0")
  11252. (source
  11253. (origin
  11254. (method url-fetch)
  11255. (uri (pypi-uri "drep" version))
  11256. (sha256
  11257. (base32
  11258. "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77"))))
  11259. (build-system python-build-system)
  11260. (propagated-inputs
  11261. `(("python-biopython" ,python-biopython)
  11262. ("python-matplotlib" ,python-matplotlib)
  11263. ("python-numpy" ,python-numpy)
  11264. ("python-pandas" ,python-pandas)
  11265. ("python-pytest" ,python-pytest)
  11266. ("python-scikit-learn" ,python-scikit-learn)
  11267. ("python-seaborn" ,python-seaborn)
  11268. ("python-tqdm" ,python-tqdm)))
  11269. (home-page "https://github.com/MrOlm/drep")
  11270. (synopsis "De-replication of microbial genomes assembled from multiple samples")
  11271. (description
  11272. "dRep is a Python program for rapidly comparing large numbers of genomes.
  11273. dRep can also \"de-replicate\" a genome set by identifying groups of highly
  11274. similar genomes and choosing the best representative genome for each genome
  11275. set.")
  11276. (license license:expat)))
  11277. (define-public instrain
  11278. (package
  11279. (name "instrain")
  11280. (version "1.5.2")
  11281. (source
  11282. (origin
  11283. (method url-fetch)
  11284. (uri (pypi-uri "inStrain" version))
  11285. (sha256
  11286. (base32
  11287. "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0"))))
  11288. (build-system python-build-system)
  11289. (arguments
  11290. `(#:phases
  11291. (modify-phases %standard-phases
  11292. (add-after 'unpack 'patch-relative-imports
  11293. (lambda _
  11294. (substitute* "docker/run_instrain.py"
  11295. (("from s3_utils")
  11296. "from .s3_utils")
  11297. (("from job_utils")
  11298. "from .job_utils")))))))
  11299. (inputs
  11300. `(("python-biopython" ,python-biopython)
  11301. ("python-boto3" ,python-boto3)
  11302. ("python-h5py" ,python-h5py)
  11303. ("python-lmfit" ,python-lmfit)
  11304. ("python-matplotlib" ,python-matplotlib)
  11305. ("python-networkx" ,python-networkx)
  11306. ("python-numba" ,python-numba)
  11307. ("python-numpy" ,python-numpy)
  11308. ("python-pandas" ,python-pandas)
  11309. ("python-psutil" ,python-psutil)
  11310. ("python-pysam" ,python-pysam)
  11311. ("python-scikit-learn" ,python-scikit-learn)
  11312. ("python-seaborn" ,python-seaborn)
  11313. ("python-tqdm" ,python-tqdm)
  11314. ;; drep is needed for deprecated plot utilities
  11315. ("python-drep" ,python-drep)))
  11316. (native-inputs
  11317. `(("python-pytest" ,python-pytest)))
  11318. (home-page "https://github.com/MrOlm/inStrain")
  11319. (synopsis "Calculation of strain-level metrics")
  11320. (description
  11321. "inStrain is a Python program for analysis of co-occurring genome
  11322. populations from metagenomes that allows highly accurate genome comparisons,
  11323. analysis of coverage, microdiversity, and linkage, and sensitive SNP detection
  11324. with gene localization and synonymous non-synonymous identification.")
  11325. ;; The tool itself says that the license is "MIT", but the repository
  11326. ;; contains a LICENSE file with the GPLv3.
  11327. ;; See https://github.com/MrOlm/inStrain/issues/51
  11328. (license license:expat)))
  11329. (define-public gffcompare
  11330. (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
  11331. (revision "1"))
  11332. (package
  11333. (name "gffcompare")
  11334. (version (git-version "0.10.15" revision commit))
  11335. (source
  11336. (origin
  11337. (method git-fetch)
  11338. (uri (git-reference
  11339. (url "https://github.com/gpertea/gffcompare/")
  11340. (commit commit)))
  11341. (file-name (git-file-name name version))
  11342. (sha256
  11343. (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
  11344. (build-system gnu-build-system)
  11345. (arguments
  11346. `(#:tests? #f ; no check target
  11347. #:phases
  11348. (modify-phases %standard-phases
  11349. (delete 'configure)
  11350. (add-before 'build 'copy-gclib-source
  11351. (lambda* (#:key inputs #:allow-other-keys)
  11352. (mkdir "../gclib")
  11353. (copy-recursively
  11354. (assoc-ref inputs "gclib-source") "../gclib")
  11355. #t))
  11356. (replace 'install
  11357. (lambda* (#:key outputs #:allow-other-keys)
  11358. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  11359. (install-file "gffcompare" bin)
  11360. #t))))))
  11361. (native-inputs
  11362. `(("gclib-source" ; see 'README.md' of gffcompare
  11363. ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
  11364. (revision "1")
  11365. (name "gclib")
  11366. (version (git-version "0.10.3" revision commit)))
  11367. (origin
  11368. (method git-fetch)
  11369. (uri (git-reference
  11370. (url "https://github.com/gpertea/gclib/")
  11371. (commit commit)))
  11372. (file-name (git-file-name name version))
  11373. (sha256
  11374. (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
  11375. (home-page "https://github.com/gpertea/gffcompare/")
  11376. (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
  11377. (description
  11378. "@code{gffcompare} is a tool that can:
  11379. @enumerate
  11380. @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
  11381. (Cufflinks, Stringtie);
  11382. @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
  11383. resulted from assembly of different samples);
  11384. @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
  11385. reference transcripts provided in a annotation file (also in GTF/GFF3 format).
  11386. @end enumerate")
  11387. (license
  11388. (list
  11389. license:expat ;license for gffcompare
  11390. license:artistic2.0))))) ;license for gclib
  11391. (define-public intervaltree
  11392. (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
  11393. (package
  11394. (name "intervaltree")
  11395. (version (git-version "0.0.0" "1" commit))
  11396. (source
  11397. (origin
  11398. (method git-fetch)
  11399. (uri (git-reference
  11400. (url "https://github.com/ekg/intervaltree/")
  11401. (commit commit)))
  11402. (file-name (git-file-name name version))
  11403. (sha256
  11404. (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
  11405. (build-system gnu-build-system)
  11406. (arguments
  11407. '(#:tests? #f ; No tests.
  11408. #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
  11409. "DESTDIR=\"\"")
  11410. #:phases
  11411. (modify-phases %standard-phases
  11412. (delete 'configure)))) ; There is no configure phase.
  11413. (home-page "https://github.com/ekg/intervaltree")
  11414. (synopsis "Minimal C++ interval tree implementation")
  11415. (description "An interval tree can be used to efficiently find a set of
  11416. numeric intervals overlapping or containing another interval. This library
  11417. provides a basic implementation of an interval tree using C++ templates,
  11418. allowing the insertion of arbitrary types into the tree.")
  11419. (license license:expat))))
  11420. (define-public python-intervaltree
  11421. (package
  11422. (name "python-intervaltree")
  11423. (version "3.0.2")
  11424. (source
  11425. (origin
  11426. (method url-fetch)
  11427. (uri (pypi-uri "intervaltree" version))
  11428. (sha256
  11429. (base32
  11430. "0wz234g6irlm4hivs2qzmnywk0ss06ckagwh15nflkyb3p462kyb"))))
  11431. (build-system python-build-system)
  11432. (arguments
  11433. `(#:phases
  11434. (modify-phases %standard-phases
  11435. ;; pytest seems to have a check to make sure the user is testing
  11436. ;; their checked-out code and not an installed, potentially
  11437. ;; out-of-date copy. This is harmless here, since we just installed
  11438. ;; the package, so we disable the check to avoid skipping tests
  11439. ;; entirely.
  11440. (add-before 'check 'import-mismatch-error-workaround
  11441. (lambda _
  11442. (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
  11443. #t)))))
  11444. (propagated-inputs
  11445. `(("python-sortedcontainers" ,python-sortedcontainers)))
  11446. (native-inputs
  11447. `(("python-pytest" ,python-pytest)))
  11448. (home-page "https://github.com/chaimleib/intervaltree")
  11449. (synopsis "Editable interval tree data structure")
  11450. (description
  11451. "This package provides a mutable, self-balancing interval tree
  11452. implementation for Python. Queries may be by point, by range overlap, or by
  11453. range envelopment. This library was designed to allow tagging text and time
  11454. intervals, where the intervals include the lower bound but not the upper
  11455. bound.")
  11456. (license license:asl2.0)))
  11457. (define-public python-pypairix
  11458. (package
  11459. (name "python-pypairix")
  11460. (version "0.3.7")
  11461. ;; The tarball on pypi does not include the makefile to build the
  11462. ;; programs.
  11463. (source
  11464. (origin
  11465. (method git-fetch)
  11466. (uri (git-reference
  11467. (url "https://github.com/4dn-dcic/pairix")
  11468. (commit version)))
  11469. (file-name (git-file-name name version))
  11470. (sha256
  11471. (base32
  11472. "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
  11473. (build-system python-build-system)
  11474. (arguments
  11475. `(#:phases
  11476. (modify-phases %standard-phases
  11477. (add-before 'build 'build-programs
  11478. (lambda _ (invoke "make")))
  11479. (add-after 'install 'install-programs
  11480. (lambda* (#:key outputs #:allow-other-keys)
  11481. (copy-recursively "bin" (string-append
  11482. (assoc-ref outputs "out")
  11483. "/bin"))
  11484. #t)))))
  11485. (inputs
  11486. `(("zlib" ,zlib)))
  11487. (home-page "https://github.com/4dn-dcic/pairix")
  11488. (synopsis "Support for querying pairix-indexed bgzipped text files")
  11489. (description
  11490. "Pypairix is a Python module for fast querying on a pairix-indexed
  11491. bgzipped text file that contains a pair of genomic coordinates per line.")
  11492. (license license:expat)))
  11493. (define-public python-pyfaidx
  11494. (package
  11495. (name "python-pyfaidx")
  11496. (version "0.5.8")
  11497. (source
  11498. (origin
  11499. (method url-fetch)
  11500. (uri (pypi-uri "pyfaidx" version))
  11501. (sha256
  11502. (base32
  11503. "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
  11504. (build-system python-build-system)
  11505. (propagated-inputs
  11506. `(("python-six" ,python-six)))
  11507. (home-page "http://mattshirley.com")
  11508. (synopsis "Random access to fasta subsequences")
  11509. (description
  11510. "This package provides procedures for efficient pythonic random access to
  11511. fasta subsequences.")
  11512. (license license:bsd-3)))
  11513. (define-public python2-pyfaidx
  11514. (package-with-python2 python-pyfaidx))
  11515. (define-public python-cooler
  11516. (package
  11517. (name "python-cooler")
  11518. (version "0.8.11")
  11519. (source
  11520. (origin
  11521. (method url-fetch)
  11522. (uri (pypi-uri "cooler" version))
  11523. (sha256
  11524. (base32
  11525. "1i96fmpsimj4wrx51rxn8lw2gqxf5a2pvrj5rwdd6ivnm3pmhyrn"))))
  11526. (build-system python-build-system)
  11527. (arguments
  11528. `(#:phases
  11529. (modify-phases %standard-phases
  11530. (add-after 'unpack 'patch-tests
  11531. (lambda _
  11532. (substitute* "tests/test_create.py"
  11533. (("def test_roundtrip")
  11534. (string-append "@pytest.mark.skip(reason=\"requires network "
  11535. "access to genome.ucsc.edu\")\n"
  11536. "def test_roundtrip")))
  11537. (substitute* "tests/test_util.py"
  11538. (("def test_fetch_chromsizes")
  11539. (string-append "@pytest.mark.skip(reason=\"requires network "
  11540. "access to genome.ucsc.edu\")\n"
  11541. "def test_fetch_chromsizes")))
  11542. ;; This test depends on ipytree, which contains a lot of minified
  11543. ;; JavaScript.
  11544. (substitute* "tests/test_fileops.py"
  11545. (("def test_print_trees")
  11546. "def _test_print_trees"))))
  11547. (replace 'check
  11548. (lambda* (#:key tests? #:allow-other-keys)
  11549. (when tests?
  11550. (invoke "python" "-m" "pytest" "-v")))))))
  11551. (propagated-inputs
  11552. `(("python-asciitree" ,python-asciitree)
  11553. ("python-biopython" ,python-biopython)
  11554. ("python-click" ,python-click)
  11555. ("python-cytoolz" ,python-cytoolz)
  11556. ("python-dask" ,python-dask)
  11557. ("python-h5py" ,python-h5py)
  11558. ("python-multiprocess" ,python-multiprocess)
  11559. ("python-numpy" ,python-numpy)
  11560. ("python-pandas" ,python-pandas)
  11561. ("python-pyfaidx" ,python-pyfaidx)
  11562. ("python-pypairix" ,python-pypairix)
  11563. ("python-pysam" ,python-pysam)
  11564. ("python-pyyaml" ,python-pyyaml)
  11565. ("python-scipy" ,python-scipy)
  11566. ("python-simplejson" ,python-simplejson)
  11567. ("python-six" ,python-six)
  11568. ("python-sparse" ,python-sparse)))
  11569. (native-inputs
  11570. `(("python-codecov" ,python-codecov)
  11571. ("python-mock" ,python-mock)
  11572. ("python-pytest" ,python-pytest)
  11573. ("python-pytest-cov" ,python-pytest-cov)
  11574. ("python-pytest-flake8" ,python-pytest-flake8)))
  11575. ;; Almost all the projects of the Mirnylab are moved under Open2C umbrella
  11576. (home-page "https://github.com/open2c/cooler")
  11577. (synopsis "Sparse binary format for genomic interaction matrices")
  11578. (description
  11579. "Cooler is a support library for a sparse, compressed, binary persistent
  11580. storage format, called @code{cool}, used to store genomic interaction data,
  11581. such as Hi-C contact matrices.")
  11582. (license license:bsd-3)))
  11583. (define-public python-hicmatrix
  11584. (package
  11585. (name "python-hicmatrix")
  11586. (version "15")
  11587. (source
  11588. (origin
  11589. ;;Pypi sources do not contain any test
  11590. (method git-fetch)
  11591. (uri (git-reference
  11592. (url "https://github.com/deeptools/HiCMatrix")
  11593. (commit version)))
  11594. (file-name (git-file-name name version))
  11595. (sha256
  11596. (base32
  11597. "1dshjxgb16sdfg9k1bhw2yhyngac04k4ca7aqy8g3i3pprr068r5"))))
  11598. (build-system python-build-system)
  11599. (arguments
  11600. `(#:phases
  11601. (modify-phases %standard-phases
  11602. (replace 'check
  11603. (lambda* (#:key tests? #:allow-other-keys)
  11604. (when tests?
  11605. (invoke "python" "-m" "pytest" "-v")))))))
  11606. (propagated-inputs
  11607. `(("python-cooler" ,python-cooler)
  11608. ("python-intervaltree" ,python-intervaltree)
  11609. ("python-numpy" ,python-numpy)
  11610. ("python-pandas" ,python-pandas)
  11611. ("python-scipy" ,python-scipy)
  11612. ("python-tables" ,python-tables)))
  11613. (native-inputs
  11614. `(("python-pytest" ,python-pytest)))
  11615. (home-page "https://github.com/deeptools/HiCMatrix/")
  11616. (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
  11617. (description
  11618. "This helper package implements the @code{HiCMatrix} class for
  11619. the HiCExplorer and pyGenomeTracks packages.")
  11620. (license license:gpl3+)))
  11621. (define-public python-hicexplorer
  11622. (package
  11623. (name "python-hicexplorer")
  11624. (version "2.1.4")
  11625. (source
  11626. (origin
  11627. ;; The latest version is not available on Pypi.
  11628. (method git-fetch)
  11629. (uri (git-reference
  11630. (url "https://github.com/deeptools/HiCExplorer")
  11631. (commit version)))
  11632. (file-name (git-file-name name version))
  11633. (sha256
  11634. (base32
  11635. "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
  11636. (build-system python-build-system)
  11637. (arguments
  11638. `(#:phases
  11639. (modify-phases %standard-phases
  11640. (add-after 'unpack 'loosen-up-requirements
  11641. (lambda _
  11642. (substitute* "setup.py"
  11643. (("==") ">="))
  11644. #t)))))
  11645. (propagated-inputs
  11646. `(("python-biopython" ,python-biopython)
  11647. ("python-configparser" ,python-configparser)
  11648. ("python-cooler" ,python-cooler)
  11649. ("python-future" ,python-future)
  11650. ("python-intervaltree" ,python-intervaltree)
  11651. ("python-jinja2" ,python-jinja2)
  11652. ("python-matplotlib" ,python-matplotlib)
  11653. ("python-numpy" ,python-numpy)
  11654. ("python-pandas" ,python-pandas)
  11655. ("python-pybigwig" ,python-pybigwig)
  11656. ("python-pysam" ,python-pysam)
  11657. ("python-scipy" ,python-scipy)
  11658. ("python-six" ,python-six)
  11659. ("python-tables" ,python-tables)
  11660. ("python-unidecode" ,python-unidecode)))
  11661. (home-page "https://hicexplorer.readthedocs.io")
  11662. (synopsis "Process, analyze and visualize Hi-C data")
  11663. (description
  11664. "HiCExplorer is a powerful and easy to use set of tools to process,
  11665. normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
  11666. contact matrices, correction of contacts, TAD detection, A/B compartments,
  11667. merging, reordering or chromosomes, conversion from different formats
  11668. including cooler and detection of long-range contacts. Moreover, it allows
  11669. the visualization of multiple contact matrices along with other types of data
  11670. like genes, compartments, ChIP-seq coverage tracks (and in general any type of
  11671. genomic scores), long range contacts and the visualization of viewpoints.")
  11672. (license license:gpl3)))
  11673. (define-public python-pygenometracks
  11674. (package
  11675. (name "python-pygenometracks")
  11676. (version "3.3")
  11677. (source
  11678. (origin
  11679. (method url-fetch)
  11680. (uri (pypi-uri "pyGenomeTracks" version))
  11681. (sha256
  11682. (base32
  11683. "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
  11684. (build-system python-build-system)
  11685. (arguments
  11686. `(#:tests? #f ; there are none
  11687. #:phases
  11688. (modify-phases %standard-phases
  11689. (add-after 'unpack 'relax-requirements
  11690. (lambda _
  11691. (substitute* "setup.py"
  11692. (("matplotlib ==3.1.1")
  11693. "matplotlib >=3.1.1"))
  11694. #t)))))
  11695. (propagated-inputs
  11696. `(("python-future" ,python-future)
  11697. ("python-gffutils" ,python-gffutils)
  11698. ("python-hicmatrix" ,python-hicmatrix)
  11699. ("python-intervaltree" ,python-intervaltree)
  11700. ("python-matplotlib" ,python-matplotlib)
  11701. ("python-numpy" ,python-numpy)
  11702. ("python-pybigwig" ,python-pybigwig)
  11703. ("python-pysam" ,python-pysam)
  11704. ("python-tqdm" ,python-tqdm)))
  11705. (native-inputs
  11706. `(("python-pytest" ,python-pytest)))
  11707. (home-page "https://pygenometracks.readthedocs.io")
  11708. (synopsis "Program and library to plot beautiful genome browser tracks")
  11709. (description
  11710. "This package aims to produce high-quality genome browser tracks that
  11711. are highly customizable. Currently, it is possible to plot: bigwig, bed (many
  11712. options), bedgraph, links (represented as arcs), and Hi-C matrices.
  11713. pyGenomeTracks can make plots with or without Hi-C data.")
  11714. (license license:gpl3+)))
  11715. (define-public python-iced
  11716. (package
  11717. (name "python-iced")
  11718. (version "0.5.8")
  11719. (source
  11720. (origin
  11721. (method url-fetch)
  11722. (uri (pypi-uri "iced" version))
  11723. (sha256
  11724. (base32
  11725. "1avcjmpyyvhgbj5qca4l70ipiz7j3xmcw9p6rd9c06j99faa0r71"))))
  11726. (build-system python-build-system)
  11727. (arguments `(#:tests? #false)) ; there are none
  11728. (propagated-inputs
  11729. `(("python-numpy" ,python-numpy)
  11730. ("python-pandas" ,python-pandas)
  11731. ("python-scipy" ,python-scipy)
  11732. ("python-scikit-learn" ,python-scikit-learn)))
  11733. (home-page "https://github.com/hiclib/iced")
  11734. (synopsis "ICE normalization")
  11735. (description "This is a package for normalizing Hi-C contact counts
  11736. efficiently.")
  11737. (license license:bsd-3)))
  11738. (define-public python-hic2cool
  11739. (package
  11740. (name "python-hic2cool")
  11741. (version "0.8.3")
  11742. ;; pypi sources do not contain the test_data directory and no test can be
  11743. ;; run
  11744. (source
  11745. (origin
  11746. (method git-fetch)
  11747. (uri (git-reference
  11748. (url "https://github.com/4dn-dcic/hic2cool")
  11749. (commit version)))
  11750. (file-name (git-file-name name version))
  11751. (sha256
  11752. (base32
  11753. "0dlnf0qfcp4jrc1nyya32a035c13xicyq16bwfnwhbb9s47mz7gl"))))
  11754. (build-system python-build-system)
  11755. (arguments
  11756. `(#:phases
  11757. (modify-phases %standard-phases
  11758. ;; Two of the test-data files need to be writable.
  11759. (add-after 'unpack 'make-test-data-writable
  11760. (lambda _
  11761. (for-each make-file-writable
  11762. (list "test_data/hic2cool_0.4.2_single_res.cool"
  11763. "test_data/hic2cool_0.7.0_multi_res.mcool")))))))
  11764. (propagated-inputs
  11765. `(("python-cooler" ,python-cooler)
  11766. ("python-h5py" ,python-h5py)
  11767. ("python-numpy" ,python-numpy)
  11768. ("python-pandas" ,python-pandas)
  11769. ("python-scipy" ,python-scipy)))
  11770. (home-page "https://github.com/4dn-dcic/hic2cool")
  11771. (synopsis "Converter for .hic and .cool files")
  11772. (description
  11773. "This package provides a converter between @code{.hic} files (from
  11774. juicer) and single-resolution or multi-resolution @code{.cool} files (for
  11775. cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
  11776. matrices.")
  11777. (license license:expat)))
  11778. (define-public r-pore
  11779. (package
  11780. (name "r-pore")
  11781. (version "0.24")
  11782. (source
  11783. (origin
  11784. (method url-fetch)
  11785. (uri
  11786. (string-append "mirror://sourceforge/rpore/" version
  11787. "/poRe_" version ".tar.gz"))
  11788. (sha256
  11789. (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
  11790. (properties `((upstream-name . "poRe")))
  11791. (build-system r-build-system)
  11792. (propagated-inputs
  11793. `(("r-bit64" ,r-bit64)
  11794. ("r-data-table" ,r-data-table)
  11795. ("r-rhdf5" ,r-rhdf5)
  11796. ("r-shiny" ,r-shiny)
  11797. ("r-svdialogs" ,r-svdialogs)))
  11798. (home-page "https://sourceforge.net/projects/rpore/")
  11799. (synopsis "Visualize Nanopore sequencing data")
  11800. (description
  11801. "This package provides graphical user interfaces to organize and visualize Nanopore
  11802. sequencing data.")
  11803. ;; This is free software but the license variant is unclear:
  11804. ;; <https://github.com/mw55309/poRe_docs/issues/10>.
  11805. (license license:bsd-3)))
  11806. (define-public r-xbioc
  11807. (let ((revision "1")
  11808. (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
  11809. (package
  11810. (name "r-xbioc")
  11811. (version (git-version "0.1.16" revision commit))
  11812. (source (origin
  11813. (method git-fetch)
  11814. (uri (git-reference
  11815. (url "https://github.com/renozao/xbioc")
  11816. (commit commit)))
  11817. (file-name (git-file-name name version))
  11818. (sha256
  11819. (base32
  11820. "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
  11821. (build-system r-build-system)
  11822. (propagated-inputs
  11823. `(("r-annotationdbi" ,r-annotationdbi)
  11824. ("r-assertthat" ,r-assertthat)
  11825. ("r-biobase" ,r-biobase)
  11826. ("r-biocmanager" ,r-biocmanager)
  11827. ("r-digest" ,r-digest)
  11828. ("r-pkgmaker" ,r-pkgmaker)
  11829. ("r-plyr" ,r-plyr)
  11830. ("r-reshape2" ,r-reshape2)
  11831. ("r-stringr" ,r-stringr)))
  11832. (home-page "https://github.com/renozao/xbioc/")
  11833. (synopsis "Extra base functions for Bioconductor")
  11834. (description "This package provides extra utility functions to perform
  11835. common tasks in the analysis of omics data, leveraging and enhancing features
  11836. provided by Bioconductor packages.")
  11837. (license license:gpl3+))))
  11838. (define-public r-cssam
  11839. (let ((revision "1")
  11840. (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
  11841. (package
  11842. (name "r-cssam")
  11843. (version (git-version "1.4" revision commit))
  11844. (source (origin
  11845. (method git-fetch)
  11846. (uri (git-reference
  11847. (url "https://github.com/shenorrLab/csSAM")
  11848. (commit commit)))
  11849. (file-name (git-file-name name version))
  11850. (sha256
  11851. (base32
  11852. "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
  11853. (build-system r-build-system)
  11854. (propagated-inputs
  11855. `(("r-formula" ,r-formula)
  11856. ("r-ggplot2" ,r-ggplot2)
  11857. ("r-pkgmaker" ,r-pkgmaker)
  11858. ("r-plyr" ,r-plyr)
  11859. ("r-rngtools" ,r-rngtools)
  11860. ("r-scales" ,r-scales)))
  11861. (home-page "https://github.com/shenorrLab/csSAM/")
  11862. (synopsis "Cell type-specific statistical analysis of microarray")
  11863. (description "This package implements the method csSAM that computes
  11864. cell-specific differential expression from measured cell proportions using
  11865. SAM.")
  11866. ;; Any version
  11867. (license license:lgpl2.1+))))
  11868. (define-public r-bseqsc
  11869. (let ((revision "1")
  11870. (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
  11871. (package
  11872. (name "r-bseqsc")
  11873. (version (git-version "1.0" revision commit))
  11874. (source (origin
  11875. (method git-fetch)
  11876. (uri (git-reference
  11877. (url "https://github.com/shenorrLab/bseqsc")
  11878. (commit commit)))
  11879. (file-name (git-file-name name version))
  11880. (sha256
  11881. (base32
  11882. "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
  11883. (build-system r-build-system)
  11884. (propagated-inputs
  11885. `(("r-abind" ,r-abind)
  11886. ("r-annotationdbi" ,r-annotationdbi)
  11887. ("r-biobase" ,r-biobase)
  11888. ("r-cssam" ,r-cssam)
  11889. ("r-dplyr" ,r-dplyr)
  11890. ("r-e1071" ,r-e1071)
  11891. ("r-edger" ,r-edger)
  11892. ("r-ggplot2" ,r-ggplot2)
  11893. ("r-nmf" ,r-nmf)
  11894. ("r-openxlsx" ,r-openxlsx)
  11895. ("r-pkgmaker" ,r-pkgmaker)
  11896. ("r-plyr" ,r-plyr)
  11897. ("r-preprocesscore" ,r-preprocesscore)
  11898. ("r-rngtools" ,r-rngtools)
  11899. ("r-scales" ,r-scales)
  11900. ("r-stringr" ,r-stringr)
  11901. ("r-xbioc" ,r-xbioc)))
  11902. (home-page "https://github.com/shenorrLab/bseqsc")
  11903. (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
  11904. (description "BSeq-sc is a bioinformatics analysis pipeline that
  11905. leverages single-cell sequencing data to estimate cell type proportion and
  11906. cell type-specific gene expression differences from RNA-seq data from bulk
  11907. tissue samples. This is a companion package to the publication \"A
  11908. single-cell transcriptomic map of the human and mouse pancreas reveals inter-
  11909. and intra-cell population structure.\" Baron et al. Cell Systems (2016)
  11910. @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
  11911. (license license:gpl2+))))
  11912. (define-public porechop
  11913. ;; The recommended way to install is to clone the git repository
  11914. ;; https://github.com/rrwick/Porechop#installation
  11915. (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
  11916. (revision "1"))
  11917. (package
  11918. (name "porechop")
  11919. (version (git-version "0.2.3" revision commit))
  11920. (source
  11921. (origin
  11922. (method git-fetch)
  11923. (uri (git-reference
  11924. (url "https://github.com/rrwick/Porechop")
  11925. (commit commit)))
  11926. (file-name (git-file-name name version))
  11927. (sha256
  11928. (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
  11929. (build-system python-build-system)
  11930. (home-page "https://github.com/rrwick/porechop")
  11931. (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
  11932. (description
  11933. "The porechop package is a tool for finding and removing adapters from Oxford
  11934. Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
  11935. has an adapter in its middle, it is treated as chimeric and chopped into
  11936. separate reads. Porechop performs thorough alignments to effectively find
  11937. adapters, even at low sequence identity. Porechop also supports demultiplexing
  11938. of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
  11939. Barcoding Kit or Rapid Barcoding Kit.")
  11940. (license license:gpl3+))))
  11941. (define-public poretools
  11942. ;; The latest release was in 2016 and the latest commit is from 2017
  11943. ;; the recommended way to install is to clone the git repository
  11944. ;; https://poretools.readthedocs.io/en/latest/content/installation.html
  11945. (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
  11946. (revision "1"))
  11947. (package
  11948. (name "poretools")
  11949. (version (git-version "0.6.0" revision commit))
  11950. (source
  11951. (origin
  11952. (method git-fetch)
  11953. (uri (git-reference
  11954. (url "https://github.com/arq5x/poretools")
  11955. (commit commit)))
  11956. (file-name (git-file-name name version))
  11957. (sha256
  11958. (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
  11959. (build-system python-build-system)
  11960. ;; requires python >=2.7, <3.0, and the same for python dependencies
  11961. (arguments `(#:python ,python-2))
  11962. (inputs
  11963. `(("hdf5" ,hdf5)))
  11964. (propagated-inputs
  11965. `(("python-dateutil" ,python2-dateutil)
  11966. ("python-h5py" ,python2-h5py)
  11967. ("python-matplotlib" ,python2-matplotlib)
  11968. ("python-pandas" ,python2-pandas)
  11969. ("python-seaborn" ,python2-seaborn)))
  11970. (home-page "https://poretools.readthedocs.io")
  11971. (synopsis "Toolkit for working with nanopore sequencing data")
  11972. (description
  11973. "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
  11974. This @code{poretools} package is a flexible toolkit for exploring datasets
  11975. generated by nanopore sequencing devices for the purposes of quality control and
  11976. downstream analysis. Poretools operates directly on the native FAST5, a variant
  11977. of the Hierarchical Data Format (HDF5) standard.")
  11978. (license license:expat))))
  11979. (define-public jamm
  11980. (package
  11981. (name "jamm")
  11982. (version "1.0.7.6")
  11983. (source
  11984. (origin
  11985. (method git-fetch)
  11986. (uri (git-reference
  11987. (url "https://github.com/mahmoudibrahim/JAMM")
  11988. (commit (string-append "JAMMv" version))))
  11989. (file-name (git-file-name name version))
  11990. (sha256
  11991. (base32
  11992. "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
  11993. (build-system gnu-build-system)
  11994. (arguments
  11995. `(#:tests? #f ; there are none
  11996. #:phases
  11997. (modify-phases %standard-phases
  11998. (delete 'configure)
  11999. (delete 'build)
  12000. (replace 'install
  12001. (lambda* (#:key inputs outputs #:allow-other-keys)
  12002. (let* ((out (assoc-ref outputs "out"))
  12003. (libexec (string-append out "/libexec/jamm"))
  12004. (bin (string-append out "/bin")))
  12005. (substitute* '("JAMM.sh"
  12006. "SignalGenerator.sh")
  12007. (("^sPath=.*")
  12008. (string-append "sPath=\"" libexec "\"\n")))
  12009. (for-each (lambda (file)
  12010. (install-file file libexec))
  12011. (list "bincalculator.r"
  12012. "peakfinder.r"
  12013. "peakhelper.r"
  12014. "signalmaker.r"
  12015. "xcorr.r"
  12016. "xcorrhelper.r"
  12017. ;; Perl scripts
  12018. "peakfilter.pl"
  12019. "readshifter.pl"))
  12020. (for-each
  12021. (lambda (script)
  12022. (chmod script #o555)
  12023. (install-file script bin)
  12024. (wrap-program (string-append bin "/" script)
  12025. `("PATH" ":" prefix
  12026. (,(string-append (assoc-ref inputs "coreutils") "/bin")
  12027. ,(string-append (assoc-ref inputs "gawk") "/bin")
  12028. ,(string-append (assoc-ref inputs "perl") "/bin")
  12029. ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
  12030. `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
  12031. `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
  12032. (list "JAMM.sh" "SignalGenerator.sh")))
  12033. #t)))))
  12034. (inputs
  12035. `(("bash" ,bash)
  12036. ("coreutils" ,coreutils)
  12037. ("gawk" ,gawk)
  12038. ("perl" ,perl)
  12039. ("r-minimal" ,r-minimal)
  12040. ;;("r-parallel" ,r-parallel)
  12041. ("r-signal" ,r-signal)
  12042. ("r-mclust" ,r-mclust)))
  12043. (home-page "https://github.com/mahmoudibrahim/JAMM")
  12044. (synopsis "Peak finder for NGS datasets")
  12045. (description
  12046. "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
  12047. ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
  12048. boundaries accurately. JAMM is applicable to both broad and narrow
  12049. datasets.")
  12050. (license license:gpl3+)))
  12051. (define-public ngless
  12052. (package
  12053. (name "ngless")
  12054. (version "1.1.0")
  12055. (source
  12056. (origin
  12057. (method git-fetch)
  12058. (uri (git-reference
  12059. (url "https://gitlab.com/ngless/ngless.git")
  12060. (commit (string-append "v" version))))
  12061. (file-name (git-file-name name version))
  12062. (sha256
  12063. (base32
  12064. "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
  12065. (build-system haskell-build-system)
  12066. (arguments
  12067. `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
  12068. ; error: parse error on input import
  12069. ; import Options.Applicative
  12070. #:phases
  12071. (modify-phases %standard-phases
  12072. (add-after 'unpack 'create-Versions.hs
  12073. (lambda _
  12074. (substitute* "Makefile"
  12075. (("BWA_VERSION = .*")
  12076. (string-append "BWA_VERSION = "
  12077. ,(package-version bwa) "\n"))
  12078. (("SAM_VERSION = .*")
  12079. (string-append "SAM_VERSION = "
  12080. ,(package-version samtools) "\n"))
  12081. (("PRODIGAL_VERSION = .*")
  12082. (string-append "PRODIGAL_VERSION = "
  12083. ,(package-version prodigal) "\n"))
  12084. (("MINIMAP2_VERSION = .*")
  12085. (string-append "MINIMAP2_VERSION = "
  12086. ,(package-version minimap2) "\n")))
  12087. (invoke "make" "NGLess/Dependencies/Versions.hs")
  12088. #t))
  12089. (add-after 'create-Versions.hs 'create-cabal-file
  12090. (lambda _ (invoke "hpack") #t))
  12091. ;; These tools are expected to be installed alongside ngless.
  12092. (add-after 'install 'link-tools
  12093. (lambda* (#:key inputs outputs #:allow-other-keys)
  12094. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  12095. (symlink (search-input-file inputs "/bin/prodigal")
  12096. (string-append bin "ngless-" ,version "-prodigal"))
  12097. (symlink (search-input-file inputs "/bin/minimap2")
  12098. (string-append bin "ngless-" ,version "-minimap2"))
  12099. (symlink (search-input-file inputs "/bin/samtools")
  12100. (string-append bin "ngless-" ,version "-samtools"))
  12101. (symlink (search-input-file inputs "/bin/bwa")
  12102. (string-append bin "ngless-" ,version "-bwa"))
  12103. #t))))))
  12104. (inputs
  12105. `(("prodigal" ,prodigal)
  12106. ("bwa" ,bwa)
  12107. ("samtools" ,samtools)
  12108. ("minimap2" ,minimap2)
  12109. ("ghc-aeson" ,ghc-aeson)
  12110. ("ghc-ansi-terminal" ,ghc-ansi-terminal)
  12111. ("ghc-async" ,ghc-async)
  12112. ("ghc-atomic-write" ,ghc-atomic-write)
  12113. ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
  12114. ("ghc-conduit" ,ghc-conduit)
  12115. ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
  12116. ("ghc-conduit-extra" ,ghc-conduit-extra)
  12117. ("ghc-configurator" ,ghc-configurator)
  12118. ("ghc-convertible" ,ghc-convertible)
  12119. ("ghc-data-default" ,ghc-data-default)
  12120. ("ghc-diagrams-core" ,ghc-diagrams-core)
  12121. ("ghc-diagrams-lib" ,ghc-diagrams-lib)
  12122. ("ghc-diagrams-svg" ,ghc-diagrams-svg)
  12123. ("ghc-double-conversion" ,ghc-double-conversion)
  12124. ("ghc-edit-distance" ,ghc-edit-distance)
  12125. ("ghc-either" ,ghc-either)
  12126. ("ghc-errors" ,ghc-errors)
  12127. ("ghc-extra" ,ghc-extra)
  12128. ("ghc-filemanip" ,ghc-filemanip)
  12129. ("ghc-file-embed" ,ghc-file-embed)
  12130. ("ghc-gitrev" ,ghc-gitrev)
  12131. ("ghc-hashtables" ,ghc-hashtables)
  12132. ("ghc-http-conduit" ,ghc-http-conduit)
  12133. ("ghc-inline-c" ,ghc-inline-c)
  12134. ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
  12135. ("ghc-intervalmap" ,ghc-intervalmap)
  12136. ("ghc-missingh" ,ghc-missingh)
  12137. ("ghc-optparse-applicative" ,ghc-optparse-applicative)
  12138. ("ghc-regex" ,ghc-regex)
  12139. ("ghc-safe" ,ghc-safe)
  12140. ("ghc-safeio" ,ghc-safeio)
  12141. ("ghc-strict" ,ghc-strict)
  12142. ("ghc-tar" ,ghc-tar)
  12143. ("ghc-tar-conduit" ,ghc-tar-conduit)
  12144. ("ghc-unliftio" ,ghc-unliftio)
  12145. ("ghc-unliftio-core" ,ghc-unliftio-core)
  12146. ("ghc-vector" ,ghc-vector)
  12147. ("ghc-yaml" ,ghc-yaml)
  12148. ("ghc-zlib" ,ghc-zlib)))
  12149. (propagated-inputs
  12150. `(("r-r6" ,r-r6)
  12151. ("r-hdf5r" ,r-hdf5r)
  12152. ("r-iterators" ,r-iterators)
  12153. ("r-itertools" ,r-itertools)
  12154. ("r-matrix" ,r-matrix)))
  12155. (native-inputs
  12156. `(("ghc-hpack" ,ghc-hpack)
  12157. ("ghc-quickcheck" ,ghc-quickcheck)
  12158. ("ghc-test-framework" ,ghc-test-framework)
  12159. ("ghc-test-framework-hunit",ghc-test-framework-hunit)
  12160. ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
  12161. ("ghc-test-framework-th" ,ghc-test-framework-th)))
  12162. (home-page "https://gitlab.com/ngless/ngless")
  12163. (synopsis "DSL for processing next-generation sequencing data")
  12164. (description "Ngless is a domain-specific language for
  12165. @dfn{next-generation sequencing} (NGS) data processing.")
  12166. (license license:expat)))
  12167. (define-public filtlong
  12168. ;; The recommended way to install is to clone the git repository
  12169. ;; https://github.com/rrwick/Filtlong#installation
  12170. ;; and the lastest release is more than nine months old
  12171. (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
  12172. (revision "1"))
  12173. (package
  12174. (name "filtlong")
  12175. (version (git-version "0.2.0" revision commit))
  12176. (source
  12177. (origin
  12178. (method git-fetch)
  12179. (uri (git-reference
  12180. (url "https://github.com/rrwick/Filtlong")
  12181. (commit commit)))
  12182. (file-name (git-file-name name version))
  12183. (sha256
  12184. (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
  12185. (build-system gnu-build-system)
  12186. (arguments
  12187. `(#:tests? #f ; no check target
  12188. #:phases
  12189. (modify-phases %standard-phases
  12190. (delete 'configure)
  12191. (replace 'install
  12192. (lambda* (#:key outputs #:allow-other-keys)
  12193. (let* ((out (assoc-ref outputs "out"))
  12194. (bin (string-append out "/bin"))
  12195. (scripts (string-append out "/share/filtlong/scripts")))
  12196. (install-file "bin/filtlong" bin)
  12197. (install-file "scripts/histogram.py" scripts)
  12198. (install-file "scripts/read_info_histograms.sh" scripts))
  12199. #t))
  12200. (add-after 'install 'wrap-program
  12201. (lambda* (#:key inputs outputs #:allow-other-keys)
  12202. (let* ((out (assoc-ref outputs "out"))
  12203. (path (getenv "GUIX_PYTHONPATH")))
  12204. (wrap-program (string-append out
  12205. "/share/filtlong/scripts/histogram.py")
  12206. `("GUIX_PYTHONPATH" ":" prefix (,path))))
  12207. #t))
  12208. (add-before 'check 'patch-tests
  12209. (lambda _
  12210. (substitute* "scripts/read_info_histograms.sh"
  12211. (("awk") (which "gawk")))
  12212. #t)))))
  12213. (inputs
  12214. `(("gawk" ,gawk) ;for read_info_histograms.sh
  12215. ("python" ,python-2) ;required for histogram.py
  12216. ("zlib" ,zlib)))
  12217. (home-page "https://github.com/rrwick/Filtlong/")
  12218. (synopsis "Tool for quality filtering of Nanopore and PacBio data")
  12219. (description
  12220. "The Filtlong package is a tool for filtering long reads by quality.
  12221. It can take a set of long reads and produce a smaller, better subset. It uses
  12222. both read length (longer is better) and read identity (higher is better) when
  12223. choosing which reads pass the filter.")
  12224. (license (list license:gpl3 ;filtlong
  12225. license:asl2.0))))) ;histogram.py
  12226. (define-public nanopolish
  12227. ;; The recommended way to install is to clone the git repository
  12228. ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
  12229. ;; Also, the differences between release and current version seem to be
  12230. ;; significant.
  12231. (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
  12232. (revision "1"))
  12233. (package
  12234. (name "nanopolish")
  12235. (version (git-version "0.11.1" revision commit))
  12236. (source
  12237. (origin
  12238. (method git-fetch)
  12239. (uri (git-reference
  12240. (url "https://github.com/jts/nanopolish")
  12241. (commit commit)
  12242. (recursive? #t)))
  12243. (file-name (git-file-name name version))
  12244. (sha256
  12245. (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
  12246. (modules '((guix build utils)))
  12247. (snippet
  12248. '(begin
  12249. (delete-file-recursively "htslib")
  12250. #t))))
  12251. (build-system gnu-build-system)
  12252. (arguments
  12253. `(#:make-flags
  12254. `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
  12255. #:tests? #f ; no check target
  12256. #:phases
  12257. (modify-phases %standard-phases
  12258. (add-after 'unpack 'find-eigen
  12259. (lambda* (#:key inputs #:allow-other-keys)
  12260. (setenv "CPATH"
  12261. (string-append
  12262. (search-input-directory inputs "/include/eigen3")
  12263. ":" (or (getenv "CPATH") "")))))
  12264. (delete 'configure)
  12265. (replace 'install
  12266. (lambda* (#:key outputs #:allow-other-keys)
  12267. (let* ((out (assoc-ref outputs "out"))
  12268. (bin (string-append out "/bin"))
  12269. (scripts (string-append out "/share/nanopolish/scripts")))
  12270. (install-file "nanopolish" bin)
  12271. (for-each (lambda (file) (install-file file scripts))
  12272. (find-files "scripts" ".*"))
  12273. #t)))
  12274. (add-after 'install 'wrap-programs
  12275. (lambda* (#:key inputs outputs #:allow-other-keys)
  12276. (let ((pythonpath (getenv "GUIX_PYTHONPATH"))
  12277. (perl5lib (getenv "PERL5LIB"))
  12278. (scripts (string-append (assoc-ref outputs "out")
  12279. "/share/nanopolish/scripts"))
  12280. (guile (search-input-file inputs "bin/guile")))
  12281. (for-each (lambda (file)
  12282. (wrap-program file `("GUIX_PYTHONPATH" ":" prefix (,pythonpath))))
  12283. (find-files scripts "\\.py"))
  12284. (for-each (lambda (file)
  12285. (wrap-script file #:guile guile
  12286. `("PERL5LIB" ":" prefix (,perl5lib))))
  12287. (find-files scripts "\\.pl"))))))))
  12288. (inputs
  12289. `(("guile" ,guile-3.0) ; for wrappers
  12290. ("eigen" ,eigen)
  12291. ("hdf5" ,hdf5)
  12292. ("htslib" ,htslib)
  12293. ("perl" ,perl)
  12294. ("bioperl" ,bioperl-minimal)
  12295. ("perl-getopt-long" ,perl-getopt-long)
  12296. ("python" ,python-wrapper)
  12297. ("python-biopython" ,python-biopython)
  12298. ("python-numpy" ,python-numpy)
  12299. ("python-pysam" ,python-pysam)
  12300. ("python-scikit-learn" , python-scikit-learn)
  12301. ("python-scipy" ,python-scipy)
  12302. ("zlib" ,zlib)))
  12303. (home-page "https://github.com/jts/nanopolish")
  12304. (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
  12305. (description
  12306. "This package analyses the Oxford Nanopore sequencing data at signal-level.
  12307. Nanopolish can calculate an improved consensus sequence for a draft genome
  12308. assembly, detect base modifications, call SNPs (Single nucleotide
  12309. polymorphisms) and indels with respect to a reference genome and more.")
  12310. (license license:expat))))
  12311. (define-public cnvkit
  12312. (package
  12313. (name "cnvkit")
  12314. (version "0.9.5")
  12315. (source
  12316. (origin
  12317. (method git-fetch)
  12318. (uri (git-reference
  12319. (url "https://github.com/etal/cnvkit")
  12320. (commit (string-append "v" version))))
  12321. (file-name (git-file-name name version))
  12322. (sha256
  12323. (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
  12324. (build-system python-build-system)
  12325. (propagated-inputs
  12326. `(("python-biopython" ,python-biopython)
  12327. ("python-future" ,python-future)
  12328. ("python-matplotlib" ,python-matplotlib)
  12329. ("python-numpy" ,python-numpy)
  12330. ("python-reportlab" ,python-reportlab)
  12331. ("python-pandas" ,python-pandas)
  12332. ("python-pysam" ,python-pysam)
  12333. ("python-pyfaidx" ,python-pyfaidx)
  12334. ("python-scipy" ,python-scipy)
  12335. ;; R packages
  12336. ("r-dnacopy" ,r-dnacopy)))
  12337. (home-page "https://cnvkit.readthedocs.org/")
  12338. (synopsis "Copy number variant detection from targeted DNA sequencing")
  12339. (description
  12340. "CNVkit is a Python library and command-line software toolkit to infer
  12341. and visualize copy number from high-throughput DNA sequencing data. It is
  12342. designed for use with hybrid capture, including both whole-exome and custom
  12343. target panels, and short-read sequencing platforms such as Illumina and Ion
  12344. Torrent.")
  12345. (license license:asl2.0)))
  12346. (define-public python-pyfit-sne
  12347. (package
  12348. (name "python-pyfit-sne")
  12349. (version "1.0.1")
  12350. (source
  12351. (origin
  12352. (method git-fetch)
  12353. (uri (git-reference
  12354. (url "https://github.com/KlugerLab/pyFIt-SNE")
  12355. (commit version)))
  12356. (file-name (git-file-name name version))
  12357. (sha256
  12358. (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
  12359. (build-system python-build-system)
  12360. (propagated-inputs
  12361. `(("python-numpy" ,python-numpy)))
  12362. (inputs
  12363. `(("fftw" ,fftw)))
  12364. (native-inputs
  12365. `(("python-cython" ,python-cython)))
  12366. (home-page "https://github.com/KlugerLab/pyFIt-SNE")
  12367. (synopsis "FFT-accelerated Interpolation-based t-SNE")
  12368. (description
  12369. "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
  12370. method for dimensionality reduction and visualization of high dimensional
  12371. datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
  12372. approximate the gradient at each iteration of gradient descent. This package
  12373. is a Cython wrapper for FIt-SNE.")
  12374. (license license:bsd-4)))
  12375. (define-public bbmap
  12376. (package
  12377. (name "bbmap")
  12378. (version "38.90")
  12379. (source (origin
  12380. (method url-fetch)
  12381. (uri (string-append
  12382. "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
  12383. (sha256
  12384. (base32
  12385. "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3"))))
  12386. (build-system ant-build-system)
  12387. (arguments
  12388. `(#:build-target "dist"
  12389. #:tests? #f ; there are none
  12390. #:make-flags
  12391. (list (string-append "-Dmpijar="
  12392. (assoc-ref %build-inputs "java-openmpi")
  12393. "/lib/mpi.jar"))
  12394. #:modules ((guix build ant-build-system)
  12395. (guix build utils)
  12396. (guix build java-utils))
  12397. #:phases
  12398. (modify-phases %standard-phases
  12399. (add-after 'build 'build-jni-library
  12400. (lambda _
  12401. (with-directory-excursion "jni"
  12402. (invoke "make" "-f" "makefile.linux"))))
  12403. ;; There is no install target
  12404. (replace 'install (install-jars "dist"))
  12405. (add-after 'install 'install-scripts-and-documentation
  12406. (lambda* (#:key outputs #:allow-other-keys)
  12407. (substitute* "calcmem.sh"
  12408. (("\\| awk ") (string-append "| " (which "awk") " ")))
  12409. (let* ((scripts (find-files "." "\\.sh$"))
  12410. (out (assoc-ref outputs "out"))
  12411. (bin (string-append out "/bin"))
  12412. (doc (string-append out "/share/doc/bbmap"))
  12413. (jni (string-append out "/lib/jni")))
  12414. (substitute* scripts
  12415. (("\\$DIR\"\"docs") doc)
  12416. (("^CP=.*")
  12417. (string-append "CP=" out "/share/java/BBTools.jar\n"))
  12418. (("^NATIVELIBDIR.*")
  12419. (string-append "NATIVELIBDIR=" jni "\n"))
  12420. (("CMD=\"java")
  12421. (string-append "CMD=\"" (which "java"))))
  12422. (for-each (lambda (script) (install-file script bin)) scripts)
  12423. ;; Install JNI library
  12424. (install-file "jni/libbbtoolsjni.so" jni)
  12425. ;; Install documentation
  12426. (install-file "docs/readme.txt" doc)
  12427. (copy-recursively "docs/guides" doc))
  12428. #t)))
  12429. #:jdk ,openjdk11))
  12430. (inputs
  12431. `(("gawk" ,gawk)
  12432. ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
  12433. ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
  12434. ("java-openmpi" ,java-openmpi)))
  12435. (home-page "https://sourceforge.net/projects/bbmap/")
  12436. (synopsis "Aligner and other tools for short sequencing reads")
  12437. (description
  12438. "This package provides bioinformatic tools to align, deduplicate,
  12439. reformat, filter and normalize DNA and RNA-seq data. It includes the
  12440. following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
  12441. a kmer-based error-correction and normalization tool; Dedupe, a tool to
  12442. simplify assemblies by removing duplicate or contained subsequences that share
  12443. a target percent identity; Reformat, to convert reads between
  12444. fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
  12445. 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
  12446. to an artifact/contaminant file.")
  12447. (license license:bsd-3)))
  12448. (define-public velvet
  12449. (package
  12450. (name "velvet")
  12451. (version "1.2.10")
  12452. (source (origin
  12453. (method url-fetch)
  12454. (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
  12455. "velvet_" version ".tgz"))
  12456. (sha256
  12457. (base32
  12458. "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
  12459. ;; Delete bundled libraries
  12460. (modules '((guix build utils)))
  12461. (snippet
  12462. '(begin
  12463. (delete-file "Manual.pdf")
  12464. (delete-file-recursively "third-party")
  12465. #t))))
  12466. (build-system gnu-build-system)
  12467. (arguments
  12468. `(#:make-flags '("OPENMP=t")
  12469. #:test-target "test"
  12470. #:phases
  12471. (modify-phases %standard-phases
  12472. (delete 'configure)
  12473. (add-after 'unpack 'fix-zlib-include
  12474. (lambda _
  12475. (substitute* "src/binarySequences.c"
  12476. (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
  12477. #t))
  12478. (replace 'install
  12479. (lambda* (#:key outputs #:allow-other-keys)
  12480. (let* ((out (assoc-ref outputs "out"))
  12481. (bin (string-append out "/bin"))
  12482. (doc (string-append out "/share/doc/velvet")))
  12483. (mkdir-p bin)
  12484. (mkdir-p doc)
  12485. (install-file "velveth" bin)
  12486. (install-file "velvetg" bin)
  12487. (install-file "Manual.pdf" doc)
  12488. (install-file "Columbus_manual.pdf" doc)
  12489. #t))))))
  12490. (inputs
  12491. `(("openmpi" ,openmpi)
  12492. ("zlib" ,zlib)))
  12493. (native-inputs
  12494. `(("texlive" ,(texlive-updmap.cfg (list texlive-latex-graphics
  12495. texlive-hyperref)))))
  12496. (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
  12497. (synopsis "Nucleic acid sequence assembler for very short reads")
  12498. (description
  12499. "Velvet is a de novo genomic assembler specially designed for short read
  12500. sequencing technologies, such as Solexa or 454. Velvet currently takes in
  12501. short read sequences, removes errors then produces high quality unique
  12502. contigs. It then uses paired read information, if available, to retrieve the
  12503. repeated areas between contigs.")
  12504. (license license:gpl2+)))
  12505. (define-public python-velocyto
  12506. (package
  12507. (name "python-velocyto")
  12508. (version "0.17.17")
  12509. (source
  12510. (origin
  12511. (method url-fetch)
  12512. (uri (pypi-uri "velocyto" version))
  12513. (sha256
  12514. (base32
  12515. "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
  12516. (build-system python-build-system)
  12517. (native-inputs
  12518. `(("python-joblib" ,python-joblib)))
  12519. (propagated-inputs
  12520. `(("python-click" ,python-click)
  12521. ("python-cython" ,python-cython)
  12522. ("python-h5py" ,python-h5py)
  12523. ("python-loompy" ,python-loompy)
  12524. ("python-matplotlib" ,python-matplotlib)
  12525. ("python-numba" ,python-numba)
  12526. ("python-numpy" ,python-numpy)
  12527. ("python-pandas" ,python-pandas)
  12528. ("python-pysam" ,python-pysam)
  12529. ("python-scikit-learn" ,python-scikit-learn)
  12530. ("python-scipy" ,python-scipy)))
  12531. (home-page "https://github.com/velocyto-team/velocyto.py")
  12532. (synopsis "RNA velocity analysis for single cell RNA-seq data")
  12533. (description
  12534. "Velocyto is a library for the analysis of RNA velocity. Velocyto
  12535. includes a command line tool and an analysis pipeline.")
  12536. (license license:bsd-2)))
  12537. (define-public arriba
  12538. (package
  12539. (name "arriba")
  12540. (version "1.0.1")
  12541. (source
  12542. (origin
  12543. (method url-fetch)
  12544. (uri (string-append "https://github.com/suhrig/arriba/releases/"
  12545. "download/v" version "/arriba_v" version ".tar.gz"))
  12546. (sha256
  12547. (base32
  12548. "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
  12549. (build-system gnu-build-system)
  12550. (arguments
  12551. `(#:tests? #f ; there are none
  12552. #:phases
  12553. (modify-phases %standard-phases
  12554. (replace 'configure
  12555. (lambda* (#:key inputs #:allow-other-keys)
  12556. (let ((htslib (assoc-ref inputs "htslib")))
  12557. (substitute* "Makefile"
  12558. (("-I\\$\\(HTSLIB\\)/htslib")
  12559. (string-append "-I" htslib "/include/htslib"))
  12560. ((" \\$\\(HTSLIB\\)/libhts.a")
  12561. (string-append " " htslib "/lib/libhts.so"))))
  12562. (substitute* "run_arriba.sh"
  12563. (("^STAR ") (string-append (which "STAR") " "))
  12564. (("samtools --version-only")
  12565. (string-append (which "samtools") " --version-only"))
  12566. (("samtools index")
  12567. (string-append (which "samtools") " index"))
  12568. (("samtools sort")
  12569. (string-append (which "samtools") " sort")))
  12570. #t))
  12571. (replace 'install
  12572. (lambda* (#:key outputs #:allow-other-keys)
  12573. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  12574. (install-file "arriba" bin)
  12575. (install-file "run_arriba.sh" bin)
  12576. (install-file "draw_fusions.R" bin)
  12577. (wrap-program (string-append bin "/draw_fusions.R")
  12578. `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
  12579. #t)))))
  12580. (inputs
  12581. `(("htslib" ,htslib)
  12582. ("r-minimal" ,r-minimal)
  12583. ("r-circlize" ,r-circlize)
  12584. ("r-genomicalignments" ,r-genomicalignments)
  12585. ("r-genomicranges" ,r-genomicranges)
  12586. ("samtools" ,samtools)
  12587. ("star" ,star)
  12588. ("zlib" ,zlib)))
  12589. (home-page "https://github.com/suhrig/arriba")
  12590. (synopsis "Gene fusion detection from RNA-Seq data ")
  12591. (description
  12592. "Arriba is a command-line tool for the detection of gene fusions from
  12593. RNA-Seq data. It was developed for the use in a clinical research setting.
  12594. Therefore, short runtimes and high sensitivity were important design criteria.
  12595. It is based on the fast STAR aligner and the post-alignment runtime is
  12596. typically just around two minutes. In contrast to many other fusion detection
  12597. tools which build on STAR, Arriba does not require to reduce the
  12598. @code{alignIntronMax} parameter of STAR to detect small deletions.")
  12599. ;; All code is under the Expat license with the exception of
  12600. ;; "draw_fusions.R", which is under GPLv3.
  12601. (license (list license:expat license:gpl3))))
  12602. (define-public adapterremoval
  12603. (package
  12604. (name "adapterremoval")
  12605. (version "2.3.0")
  12606. (source
  12607. (origin
  12608. (method git-fetch)
  12609. (uri (git-reference
  12610. (url "https://github.com/MikkelSchubert/adapterremoval")
  12611. (commit (string-append "v" version))))
  12612. (file-name (git-file-name name version))
  12613. (sha256
  12614. (base32
  12615. "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
  12616. (build-system gnu-build-system)
  12617. (arguments
  12618. `(#:make-flags (list "COLOR_BUILD=no"
  12619. (string-append "PREFIX="
  12620. (assoc-ref %outputs "out")))
  12621. #:test-target "test"
  12622. #:phases
  12623. (modify-phases %standard-phases
  12624. (delete 'configure))))
  12625. (inputs
  12626. `(("zlib" ,zlib)))
  12627. (home-page "https://adapterremoval.readthedocs.io/")
  12628. (synopsis "Rapid sequence adapter trimming, identification, and read merging")
  12629. (description
  12630. "This program searches for and removes remnant adapter sequences from
  12631. @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
  12632. bases from the 3' end of reads following adapter removal. AdapterRemoval can
  12633. analyze both single end and paired end data, and can be used to merge
  12634. overlapping paired-ended reads into (longer) consensus sequences.
  12635. Additionally, the AdapterRemoval may be used to recover a consensus adapter
  12636. sequence for paired-ended data, for which this information is not available.")
  12637. (license license:gpl3+)))
  12638. (define-public pplacer
  12639. (let ((commit "807f6f3"))
  12640. (package
  12641. (name "pplacer")
  12642. ;; The commit should be updated with each version change.
  12643. (version "1.1.alpha19")
  12644. (source
  12645. (origin
  12646. (method git-fetch)
  12647. (uri (git-reference
  12648. (url "https://github.com/matsen/pplacer")
  12649. (commit (string-append "v" version))))
  12650. (file-name (git-file-name name version))
  12651. (sha256
  12652. (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
  12653. (build-system ocaml-build-system)
  12654. (arguments
  12655. `(#:modules ((guix build ocaml-build-system)
  12656. (guix build utils)
  12657. (ice-9 ftw))
  12658. #:phases
  12659. (modify-phases %standard-phases
  12660. (delete 'configure)
  12661. (add-after 'unpack 'fix-build-with-latest-ocaml
  12662. (lambda _
  12663. (substitute* "myocamlbuild.ml"
  12664. (("dep \\[\"c_pam\"\\]" m)
  12665. (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
  12666. m))
  12667. (("let run_and_read" m)
  12668. (string-append "
  12669. let split s ch =
  12670. let x = ref [] in
  12671. let rec go s =
  12672. let pos = String.index s ch in
  12673. x := (String.before s pos)::!x;
  12674. go (String.after s (pos + 1))
  12675. in
  12676. try go s
  12677. with Not_found -> !x
  12678. let split_nl s = split s '\\n'
  12679. let before_space s =
  12680. try String.before s (String.index s ' ')
  12681. with Not_found -> s
  12682. " m))
  12683. (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
  12684. (string-append "List.map before_space (split_nl & " m ")"))
  12685. ((" blank_sep_strings &") "")
  12686. ((" Lexing.from_string &") ""))
  12687. #t))
  12688. (add-after 'unpack 'replace-bundled-cddlib
  12689. (lambda* (#:key inputs #:allow-other-keys)
  12690. (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
  12691. (local-dir "cddlib_guix"))
  12692. (mkdir local-dir)
  12693. (with-directory-excursion local-dir
  12694. (invoke "tar" "xvf" cddlib-src))
  12695. (let ((cddlib-src-folder
  12696. (string-append local-dir "/"
  12697. (list-ref (scandir local-dir) 2)
  12698. "/lib-src")))
  12699. (for-each make-file-writable (find-files "cdd_src" ".*"))
  12700. (for-each
  12701. (lambda (file)
  12702. (copy-file file
  12703. (string-append "cdd_src/" (basename file))))
  12704. (find-files cddlib-src-folder ".*[ch]$")))
  12705. #t)))
  12706. (add-after 'unpack 'fix-makefile
  12707. (lambda _
  12708. ;; Remove system calls to 'git'.
  12709. (substitute* "Makefile"
  12710. (("^DESCRIPT:=pplacer-.*")
  12711. (string-append
  12712. "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
  12713. (substitute* "myocamlbuild.ml"
  12714. (("git describe --tags --long .*\\\" with")
  12715. (string-append
  12716. "echo -n v" ,version "-" ,commit "\" with")))
  12717. #t))
  12718. (replace 'install
  12719. (lambda* (#:key outputs #:allow-other-keys)
  12720. (let* ((out (assoc-ref outputs "out"))
  12721. (bin (string-append out "/bin")))
  12722. (copy-recursively "bin" bin))
  12723. #t)))
  12724. #:ocaml ,ocaml-4.07
  12725. #:findlib ,ocaml4.07-findlib))
  12726. (inputs
  12727. `(("zlib" ,zlib "static")
  12728. ("gsl" ,gsl)
  12729. ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
  12730. ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
  12731. ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
  12732. ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
  12733. ("ocaml-sqlite3" ,(package-with-ocaml4.07 ocaml-sqlite3))
  12734. ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
  12735. ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
  12736. ("ocaml-gsl" ,ocaml4.07-gsl-1)))
  12737. (native-inputs
  12738. `(("cddlib-src" ,(package-source cddlib))
  12739. ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
  12740. ("pkg-config" ,pkg-config)))
  12741. (propagated-inputs
  12742. `(("pplacer-scripts" ,pplacer-scripts)))
  12743. (synopsis "Phylogenetic placement of biological sequences")
  12744. (description
  12745. "Pplacer places query sequences on a fixed reference phylogenetic tree
  12746. to maximize phylogenetic likelihood or posterior probability according to a
  12747. reference alignment. Pplacer is designed to be fast, to give useful
  12748. information about uncertainty, and to offer advanced visualization and
  12749. downstream analysis.")
  12750. (home-page "https://matsen.fhcrc.org/pplacer/")
  12751. (license license:gpl3))))
  12752. ;; This package is installed alongside 'pplacer'. It is a separate package so
  12753. ;; that it can use the python-build-system for the scripts that are
  12754. ;; distributed alongside the main OCaml binaries.
  12755. (define pplacer-scripts
  12756. (package
  12757. (inherit pplacer)
  12758. (name "pplacer-scripts")
  12759. (build-system python-build-system)
  12760. (arguments
  12761. `(#:python ,python-2
  12762. #:phases
  12763. (modify-phases %standard-phases
  12764. (add-after 'unpack 'enter-scripts-dir
  12765. (lambda _ (chdir "scripts") #t))
  12766. (replace 'check
  12767. (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
  12768. (add-after 'install 'wrap-executables
  12769. (lambda* (#:key inputs outputs #:allow-other-keys)
  12770. (let* ((out (assoc-ref outputs "out"))
  12771. (bin (string-append out "/bin")))
  12772. (let ((path (string-append
  12773. (assoc-ref inputs "hmmer") "/bin:"
  12774. (assoc-ref inputs "infernal") "/bin")))
  12775. (display path)
  12776. (wrap-program (string-append bin "/refpkg_align.py")
  12777. `("PATH" ":" prefix (,path))))
  12778. (let ((path (string-append
  12779. (assoc-ref inputs "hmmer") "/bin")))
  12780. (wrap-program (string-append bin "/hrefpkg_query.py")
  12781. `("PATH" ":" prefix (,path)))))
  12782. #t)))))
  12783. (inputs
  12784. `(("infernal" ,infernal)
  12785. ("hmmer" ,hmmer)))
  12786. (propagated-inputs
  12787. `(("python-biopython" ,python2-biopython)
  12788. ("taxtastic" ,taxtastic)))
  12789. (synopsis "Pplacer Python scripts")))
  12790. (define-public checkm
  12791. (package
  12792. (name "checkm")
  12793. (version "1.1.3")
  12794. (source
  12795. (origin
  12796. (method url-fetch)
  12797. (uri (pypi-uri "checkm-genome" version))
  12798. (sha256
  12799. (base32
  12800. "0i2nnki639hgjag17wlva2x0ymn37b4krqsf6akxddykhfbkdnkz"))))
  12801. (build-system python-build-system)
  12802. (arguments
  12803. `(#:tests? #f ; Some tests fail for unknown reasons.
  12804. #:phases
  12805. (modify-phases %standard-phases
  12806. (add-before 'check 'set-HOME
  12807. (lambda _
  12808. (setenv "HOME" "/tmp"))))))
  12809. (inputs
  12810. `(("python-dendropy" ,python-dendropy)
  12811. ("python-matplotlib" ,python-matplotlib)
  12812. ("python-numpy" ,python-numpy)
  12813. ("python-pysam" ,python-pysam)
  12814. ("python-scipy" ,python-scipy)))
  12815. (home-page "https://ecogenomics.github.io/CheckM/")
  12816. (synopsis "Assess the quality of putative genome bins")
  12817. (description
  12818. "CheckM provides a set of tools for assessing the quality of genomes
  12819. recovered from isolates, single cells, or metagenomes. It provides robust
  12820. estimates of genome completeness and contamination by using collocated sets of
  12821. genes that are ubiquitous and single-copy within a phylogenetic lineage.
  12822. Assessment of genome quality can also be examined using plots depicting key
  12823. genomic characteristics (e.g., GC, coding density) which highlight sequences
  12824. outside the expected distributions of a typical genome. CheckM also provides
  12825. tools for identifying genome bins that are likely candidates for merging based
  12826. on marker set compatibility, similarity in genomic characteristics, and
  12827. proximity within a reference genome.")
  12828. (license license:gpl3+)))
  12829. (define-public python2-checkm-genome
  12830. (deprecated-package "python2-checkm-genome" checkm))
  12831. (define-public umi-tools
  12832. (package
  12833. (name "umi-tools")
  12834. (version "1.0.0")
  12835. (source
  12836. (origin
  12837. (method url-fetch)
  12838. (uri (pypi-uri "umi_tools" version))
  12839. (sha256
  12840. (base32
  12841. "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
  12842. (build-system python-build-system)
  12843. (inputs
  12844. `(("python-pandas" ,python-pandas)
  12845. ("python-future" ,python-future)
  12846. ("python-scipy" ,python-scipy)
  12847. ("python-matplotlib" ,python-matplotlib)
  12848. ("python-regex" ,python-regex)
  12849. ("python-pysam" ,python-pysam)))
  12850. (native-inputs
  12851. `(("python-cython" ,python-cython)))
  12852. (home-page "https://github.com/CGATOxford/UMI-tools")
  12853. (synopsis "Tools for analyzing unique modular identifiers")
  12854. (description "This package provides tools for dealing with @dfn{Unique
  12855. Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
  12856. genetic sequences. There are six tools: the @code{extract} and
  12857. @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
  12858. cell barcodes for alignment. The remaining commands, @code{group},
  12859. @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
  12860. duplicates using the UMIs and perform different levels of analysis depending
  12861. on the needs of the user.")
  12862. (license license:expat)))
  12863. (define-public ataqv
  12864. (package
  12865. (name "ataqv")
  12866. (version "1.0.0")
  12867. (source
  12868. (origin
  12869. (method git-fetch)
  12870. (uri (git-reference
  12871. (url "https://github.com/ParkerLab/ataqv")
  12872. (commit version)))
  12873. (file-name (git-file-name name version))
  12874. (sha256
  12875. (base32
  12876. "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
  12877. (build-system gnu-build-system)
  12878. (arguments
  12879. `(#:make-flags
  12880. (list (string-append "prefix=" (assoc-ref %outputs "out"))
  12881. (string-append "BOOST_ROOT="
  12882. (assoc-ref %build-inputs "boost"))
  12883. (string-append "HTSLIB_ROOT="
  12884. (assoc-ref %build-inputs "htslib")))
  12885. #:test-target "test"
  12886. #:phases
  12887. (modify-phases %standard-phases
  12888. (delete 'configure))))
  12889. (inputs
  12890. `(("boost" ,boost)
  12891. ("htslib" ,htslib)
  12892. ("ncurses" ,ncurses)
  12893. ("zlib" ,zlib)))
  12894. (native-inputs
  12895. `(("lcov" ,lcov)))
  12896. (home-page "https://github.com/ParkerLab/ataqv")
  12897. (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
  12898. (description "This package provides a toolkit for measuring and comparing
  12899. ATAC-seq results. It was written to make it easier to spot differences that
  12900. might be caused by ATAC-seq library prep or sequencing. The main program,
  12901. @code{ataqv}, examines aligned reads and reports some basic metrics.")
  12902. (license license:gpl3+)))
  12903. (define-public r-psiplot
  12904. (package
  12905. (name "r-psiplot")
  12906. (version "2.3.0")
  12907. (source
  12908. (origin
  12909. (method git-fetch)
  12910. (uri (git-reference
  12911. (url "https://github.com/kcha/psiplot")
  12912. (commit (string-append "v" version))))
  12913. (file-name (git-file-name name version))
  12914. (sha256
  12915. (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
  12916. (build-system r-build-system)
  12917. (propagated-inputs
  12918. `(("r-mass" ,r-mass)
  12919. ("r-dplyr" ,r-dplyr)
  12920. ("r-tidyr" ,r-tidyr)
  12921. ("r-purrr" ,r-purrr)
  12922. ("r-readr" ,r-readr)
  12923. ("r-magrittr" ,r-magrittr)
  12924. ("r-ggplot2" ,r-ggplot2)))
  12925. (home-page "https://github.com/kcha/psiplot")
  12926. (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
  12927. (description
  12928. "PSIplot is an R package for generating plots of @dfn{percent
  12929. spliced-in} (PSI) values of alternatively-spliced exons that were computed by
  12930. vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
  12931. are generated using @code{ggplot2}.")
  12932. (license license:expat)))
  12933. (define-public python-ont-fast5-api
  12934. (package
  12935. (name "python-ont-fast5-api")
  12936. (version "1.4.4")
  12937. (source
  12938. (origin
  12939. (method git-fetch)
  12940. (uri (git-reference
  12941. (url "https://github.com/nanoporetech/ont_fast5_api")
  12942. (commit (string-append "release_" version))))
  12943. (file-name (git-file-name name version))
  12944. (sha256
  12945. (base32
  12946. "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
  12947. (build-system python-build-system)
  12948. (propagated-inputs
  12949. `(("python-numpy" ,python-numpy)
  12950. ("python-six" ,python-six)
  12951. ("python-h5py" ,python-h5py)
  12952. ("python-progressbar33" ,python-progressbar33)))
  12953. (home-page "https://github.com/nanoporetech/ont_fast5_api")
  12954. (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
  12955. (description
  12956. "This package provides a concrete implementation of the fast5 file schema
  12957. using the generic @code{h5py} library, plain-named methods to interact with
  12958. and reflect the fast5 file schema, and tools to convert between
  12959. @code{multi_read} and @code{single_read} formats.")
  12960. (license license:mpl2.0)))
  12961. (define-public tbsp
  12962. (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
  12963. (revision "1"))
  12964. (package
  12965. (name "tbsp")
  12966. (version (git-version "1.0.0" revision commit))
  12967. (source
  12968. (origin
  12969. (method git-fetch)
  12970. (uri (git-reference
  12971. (url "https://github.com/phoenixding/tbsp")
  12972. (commit commit)))
  12973. (file-name (git-file-name name version))
  12974. (sha256
  12975. (base32
  12976. "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
  12977. (build-system python-build-system)
  12978. (arguments '(#:tests? #f)) ; no tests included
  12979. (inputs
  12980. `(("python-matplotlib" ,python-matplotlib)
  12981. ("python-networkx" ,python-networkx)
  12982. ("python-numpy" ,python-numpy)
  12983. ("python-pybigwig" ,python-pybigwig)
  12984. ("python-biopython" ,python-biopython)
  12985. ("python-scikit-learn" ,python-scikit-learn)
  12986. ("python-scipy" ,python-scipy)))
  12987. (home-page "https://github.com/phoenixding/tbsp/")
  12988. (synopsis "SNP-based trajectory inference")
  12989. (description
  12990. "Several studies focus on the inference of developmental and response
  12991. trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
  12992. computational methods, often referred to as pseudo-time ordering, have been
  12993. developed for this task. CRISPR has also been used to reconstruct lineage
  12994. trees by inserting random mutations. The tbsp package implements an
  12995. alternative method to detect significant, cell type specific sequence
  12996. mutations from scRNA-Seq data.")
  12997. (license license:expat))))
  12998. (define-public tabixpp
  12999. (package
  13000. (name "tabixpp")
  13001. (version "1.1.0")
  13002. (source (origin
  13003. (method git-fetch)
  13004. (uri (git-reference
  13005. (url "https://github.com/ekg/tabixpp")
  13006. (commit (string-append "v" version))))
  13007. (file-name (git-file-name name version))
  13008. (sha256
  13009. (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
  13010. (modules '((guix build utils)))
  13011. (snippet
  13012. `(begin
  13013. (delete-file-recursively "htslib") #t))))
  13014. (build-system gnu-build-system)
  13015. (inputs
  13016. `(("htslib" ,htslib)
  13017. ("zlib" ,zlib)))
  13018. (arguments
  13019. `(#:tests? #f ; There are no tests to run.
  13020. #:phases
  13021. (modify-phases %standard-phases
  13022. (delete 'configure) ; There is no configure phase.
  13023. ;; The build phase needs overriding the location of htslib.
  13024. (replace 'build
  13025. (lambda* (#:key inputs #:allow-other-keys)
  13026. (let ((htslib-ref (assoc-ref inputs "htslib")))
  13027. (invoke "make"
  13028. (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
  13029. (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
  13030. "HTS_HEADERS=" ; No need to check for headers here.
  13031. (string-append "LIBPATH=-L. -L" htslib-ref "/include"))
  13032. (invoke "g++" "-shared" "-o" "libtabixpp.so" "tabix.o" "-lhts")
  13033. (invoke "ar" "rcs" "libtabixpp.a" "tabix.o"))))
  13034. (replace 'install
  13035. (lambda* (#:key outputs #:allow-other-keys)
  13036. (let* ((out (assoc-ref outputs "out"))
  13037. (lib (string-append out "/lib"))
  13038. (bin (string-append out "/bin")))
  13039. (install-file "tabix++" bin)
  13040. (install-file "libtabixpp.so" lib)
  13041. (install-file "libtabixpp.a" lib)
  13042. (install-file "tabix.hpp" (string-append out "/include"))
  13043. (mkdir-p (string-append lib "/pkgconfig"))
  13044. (with-output-to-file (string-append lib "/pkgconfig/tabixpp.pc")
  13045. (lambda _
  13046. (format #t "prefix=~a~@
  13047. exec_prefix=${prefix}~@
  13048. libdir=${exec_prefix}/lib~@
  13049. includedir=${prefix}/include~@
  13050. ~@
  13051. ~@
  13052. Name: libtabixpp~@
  13053. Version: ~a~@
  13054. Description: C++ wrapper around tabix project~@
  13055. Libs: -L${libdir} -ltabixpp~@
  13056. Cflags: -I${includedir}~%"
  13057. out ,version)))
  13058. #t))))))
  13059. (home-page "https://github.com/ekg/tabixpp")
  13060. (synopsis "C++ wrapper around tabix project")
  13061. (description "This is a C++ wrapper around the Tabix project which abstracts
  13062. some of the details of opening and jumping in tabix-indexed files.")
  13063. (license license:expat)))
  13064. (define-public smithwaterman
  13065. (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
  13066. (package
  13067. (name "smithwaterman")
  13068. (version (git-version "0.0.0" "2" commit))
  13069. (source (origin
  13070. (method git-fetch)
  13071. (uri (git-reference
  13072. (url "https://github.com/ekg/smithwaterman/")
  13073. (commit commit)))
  13074. (file-name (git-file-name name version))
  13075. (sha256
  13076. (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
  13077. (build-system gnu-build-system)
  13078. (arguments
  13079. `(#:tests? #f ; There are no tests to run.
  13080. #:make-flags '("libsw.a" "all")
  13081. #:phases
  13082. (modify-phases %standard-phases
  13083. (delete 'configure) ; There is no configure phase.
  13084. (add-after 'unpack 'patch-source
  13085. (lambda _
  13086. (substitute* "Makefile"
  13087. (("-c ") "-c -fPIC "))
  13088. #t))
  13089. (add-after 'build 'build-dynamic
  13090. (lambda _
  13091. (invoke "g++"
  13092. "-shared" "-o" "libsmithwaterman.so"
  13093. "smithwaterman.o" "SmithWatermanGotoh.o"
  13094. "disorder.o" "BandedSmithWaterman.o"
  13095. "LeftAlign.o" "Repeats.o" "IndelAllele.o")))
  13096. (replace 'install
  13097. (lambda* (#:key outputs #:allow-other-keys)
  13098. (let* ((out (assoc-ref outputs "out"))
  13099. (bin (string-append out "/bin"))
  13100. (lib (string-append out "/lib")))
  13101. (install-file "smithwaterman" bin)
  13102. (for-each
  13103. (lambda (file)
  13104. (install-file file (string-append out "/include/smithwaterman")))
  13105. (find-files "." "\\.h$"))
  13106. (install-file "libsmithwaterman.so" lib)
  13107. (install-file "libsw.a" lib)
  13108. (mkdir-p (string-append lib "/pkgconfig"))
  13109. (with-output-to-file (string-append lib "/pkgconfig/smithwaterman.pc")
  13110. (lambda _
  13111. (format #t "prefix=~a~@
  13112. exec_prefix=${prefix}~@
  13113. libdir=${exec_prefix}/lib~@
  13114. includedir=${prefix}/include/smithwaterman~@
  13115. ~@
  13116. ~@
  13117. Name: smithwaterman~@
  13118. Version: ~a~@
  13119. Description: smith-waterman-gotoh alignment algorithm~@
  13120. Libs: -L${libdir} -lsmithwaterman~@
  13121. Cflags: -I${includedir}~%"
  13122. out ,version))))
  13123. #t)))))
  13124. (home-page "https://github.com/ekg/smithwaterman")
  13125. (synopsis "Implementation of the Smith-Waterman algorithm")
  13126. (description "Implementation of the Smith-Waterman algorithm.")
  13127. ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
  13128. (license (list license:gpl2 license:expat)))))
  13129. (define-public multichoose
  13130. (package
  13131. (name "multichoose")
  13132. (version "1.0.3")
  13133. (source (origin
  13134. (method git-fetch)
  13135. (uri (git-reference
  13136. (url "https://github.com/ekg/multichoose/")
  13137. (commit (string-append "v" version))))
  13138. (file-name (git-file-name name version))
  13139. (sha256
  13140. (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
  13141. (build-system gnu-build-system)
  13142. (arguments
  13143. `(#:tests? #f ; Tests require node.
  13144. #:phases
  13145. (modify-phases %standard-phases
  13146. (delete 'configure) ; There is no configure phase.
  13147. (replace 'install
  13148. (lambda* (#:key outputs #:allow-other-keys)
  13149. (let* ((out (assoc-ref outputs "out"))
  13150. (bin (string-append out "/bin"))
  13151. (include (string-append out "/include")))
  13152. ;; TODO: There are Python modules for these programs too.
  13153. (install-file "multichoose" bin)
  13154. (install-file "multipermute" bin)
  13155. (install-file "multichoose.h" include)
  13156. (install-file "multipermute.h" include))
  13157. #t)))))
  13158. (home-page "https://github.com/ekg/multichoose")
  13159. (synopsis "Efficient loopless multiset combination generation algorithm")
  13160. (description "This library implements an efficient loopless multiset
  13161. combination generation algorithm which is (approximately) described in
  13162. \"Loopless algorithms for generating permutations, combinations, and other
  13163. combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
  13164. 1973. (Algorithm 7.)")
  13165. (license license:expat)))
  13166. (define-public fsom
  13167. (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
  13168. (package
  13169. (name "fsom")
  13170. (version (git-version "0.0.0" "1" commit))
  13171. (source (origin
  13172. (method git-fetch)
  13173. (uri (git-reference
  13174. (url "https://github.com/ekg/fsom/")
  13175. (commit commit)))
  13176. (file-name (git-file-name name version))
  13177. (sha256
  13178. (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
  13179. (build-system gnu-build-system)
  13180. (arguments
  13181. `(#:tests? #f ; There are no tests to run.
  13182. #:phases
  13183. (modify-phases %standard-phases
  13184. (delete 'configure) ; There is no configure phase.
  13185. (replace 'install
  13186. (lambda* (#:key outputs #:allow-other-keys)
  13187. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  13188. (install-file "fsom" bin))
  13189. #t)))))
  13190. (home-page "https://github.com/ekg/fsom")
  13191. (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
  13192. (description "A tiny C library for managing SOM (Self-Organizing Maps)
  13193. neural networks.")
  13194. (license license:gpl3))))
  13195. (define-public fastahack
  13196. (package
  13197. (name "fastahack")
  13198. (version "1.0.0")
  13199. (source (origin
  13200. (method git-fetch)
  13201. (uri (git-reference
  13202. (url "https://github.com/ekg/fastahack/")
  13203. (commit (string-append "v" version))))
  13204. (file-name (git-file-name name version))
  13205. (sha256
  13206. (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
  13207. (build-system gnu-build-system)
  13208. (arguments
  13209. `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
  13210. #:phases
  13211. (modify-phases %standard-phases
  13212. (delete 'configure) ; There is no configure phase.
  13213. (add-after 'unpack 'patch-source
  13214. (lambda _
  13215. (substitute* "Makefile"
  13216. (("-c ") "-c -fPIC "))
  13217. #t))
  13218. (add-after 'build 'build-dynamic
  13219. (lambda _
  13220. (invoke "g++"
  13221. "-shared" "-o" "libfastahack.so"
  13222. "Fasta.o" "FastaHack.o" "split.o" "disorder.o")))
  13223. (replace 'install
  13224. (lambda* (#:key outputs #:allow-other-keys)
  13225. (let* ((out (assoc-ref outputs "out"))
  13226. (lib (string-append out "/lib"))
  13227. (bin (string-append out "/bin")))
  13228. (mkdir-p (string-append out "/include/fastahack"))
  13229. (for-each
  13230. (lambda (file)
  13231. (install-file file (string-append out "/include/fastahack")))
  13232. (find-files "." "\\.h$"))
  13233. (install-file "fastahack" bin)
  13234. (install-file "libfastahack.so" lib)
  13235. (mkdir-p (string-append lib "/pkgconfig"))
  13236. (with-output-to-file (string-append lib "/pkgconfig/fastahack.pc")
  13237. (lambda _
  13238. (format #t "prefix=~a~@
  13239. exec_prefix=${prefix}~@
  13240. libdir=${exec_prefix}/lib~@
  13241. includedir=${prefix}/include/fastahack~@
  13242. ~@
  13243. ~@
  13244. Name: fastahack~@
  13245. Version: ~a~@
  13246. Description: Indexing and sequence extraction from FASTA files~@
  13247. Libs: -L${libdir} -lfastahack~@
  13248. Cflags: -I${includedir}~%"
  13249. out ,version))))
  13250. #t)))))
  13251. (home-page "https://github.com/ekg/fastahack")
  13252. (synopsis "Indexing and sequence extraction from FASTA files")
  13253. (description "Fastahack is a small application for indexing and
  13254. extracting sequences and subsequences from FASTA files. The included library
  13255. provides a FASTA reader and indexer that can be embedded into applications
  13256. which would benefit from directly reading subsequences from FASTA files. The
  13257. library automatically handles index file generation and use.")
  13258. (license (list license:expat license:gpl2))))
  13259. (define-public vcflib
  13260. (package
  13261. (name "vcflib")
  13262. (version "1.0.2")
  13263. (source
  13264. (origin
  13265. (method git-fetch)
  13266. (uri (git-reference
  13267. (url "https://github.com/vcflib/vcflib")
  13268. (commit (string-append "v" version))))
  13269. (file-name (git-file-name name version))
  13270. (sha256
  13271. (base32 "1k1z3876kbzifj1sqfzsf3lgb4rw779hvkg6ryxbyq5bc2paj9kh"))
  13272. (modules '((guix build utils)))
  13273. (snippet
  13274. '(begin
  13275. (substitute* "CMakeLists.txt"
  13276. ((".*fastahack.*") "")
  13277. ((".*smithwaterman.*") "")
  13278. (("(pkg_check_modules\\(TABIXPP)" text)
  13279. (string-append
  13280. "pkg_check_modules(FASTAHACK REQUIRED fastahack)\n"
  13281. "pkg_check_modules(SMITHWATERMAN REQUIRED smithwaterman)\n"
  13282. text))
  13283. (("\\$\\{TABIXPP_LIBRARIES\\}" text)
  13284. (string-append "${FASTAHACK_LIBRARIES} "
  13285. "${SMITHWATERMAN_LIBRARIES} "
  13286. text)))
  13287. (substitute* (find-files "." "\\.(h|c)(pp)?$")
  13288. (("\"SmithWatermanGotoh.h\"") "<smithwaterman/SmithWatermanGotoh.h>")
  13289. (("\"convert.h\"") "<smithwaterman/convert.h>")
  13290. (("\"disorder.h\"") "<smithwaterman/disorder.h>")
  13291. (("Fasta.h") "fastahack/Fasta.h"))
  13292. (for-each delete-file-recursively
  13293. '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
  13294. "libVCFH" "multichoose" "smithwaterman"))
  13295. #t))))
  13296. (build-system cmake-build-system)
  13297. (inputs
  13298. `(("bzip2" ,bzip2)
  13299. ("htslib" ,htslib)
  13300. ("fastahack" ,fastahack)
  13301. ("perl" ,perl)
  13302. ("python" ,python)
  13303. ("smithwaterman" ,smithwaterman)
  13304. ("tabixpp" ,tabixpp)
  13305. ("xz" ,xz)
  13306. ("zlib" ,zlib)))
  13307. (native-inputs
  13308. `(("pkg-config" ,pkg-config)
  13309. ;; Submodules.
  13310. ;; This package builds against the .o files so we need to extract the source.
  13311. ("filevercmp-src" ,(package-source filevercmp))
  13312. ("fsom-src" ,(package-source fsom))
  13313. ("intervaltree-src" ,(package-source intervaltree))
  13314. ("multichoose-src" ,(package-source multichoose))))
  13315. (arguments
  13316. `(#:tests? #f ; no tests
  13317. #:phases
  13318. (modify-phases %standard-phases
  13319. (add-after 'unpack 'build-shared-library
  13320. (lambda _
  13321. (substitute* "CMakeLists.txt"
  13322. (("vcflib STATIC") "vcflib SHARED"))
  13323. (substitute* "test/Makefile"
  13324. (("libvcflib.a") "libvcflib.so"))
  13325. #t))
  13326. (add-after 'unpack 'unpack-submodule-sources
  13327. (lambda* (#:key inputs #:allow-other-keys)
  13328. (let ((unpack (lambda (source target)
  13329. (mkdir target)
  13330. (with-directory-excursion target
  13331. (if (file-is-directory? (assoc-ref inputs source))
  13332. (copy-recursively (assoc-ref inputs source) ".")
  13333. (invoke "tar" "xvf"
  13334. (assoc-ref inputs source)
  13335. "--strip-components=1"))))))
  13336. (and
  13337. (unpack "filevercmp-src" "filevercmp")
  13338. (unpack "fsom-src" "fsom")
  13339. (unpack "intervaltree-src" "intervaltree")
  13340. (unpack "multichoose-src" "multichoose"))
  13341. #t)))
  13342. ;; This pkg-config file is provided by other distributions.
  13343. (add-after 'install 'install-pkg-config-file
  13344. (lambda* (#:key outputs #:allow-other-keys)
  13345. (let* ((out (assoc-ref outputs "out"))
  13346. (pkgconfig (string-append out "/lib/pkgconfig")))
  13347. (mkdir-p pkgconfig)
  13348. (with-output-to-file (string-append pkgconfig "/vcflib.pc")
  13349. (lambda _
  13350. (format #t "prefix=~a~@
  13351. exec_prefix=${prefix}~@
  13352. libdir=${exec_prefix}/lib~@
  13353. includedir=${prefix}/include~@
  13354. ~@
  13355. Name: vcflib~@
  13356. Version: ~a~@
  13357. Requires: smithwaterman, fastahack, tabixpp~@
  13358. Description: C++ library for parsing and manipulating VCF files~@
  13359. Libs: -L${libdir} -lvcflib~@
  13360. Cflags: -I${includedir}~%"
  13361. out ,version)))
  13362. #t))))))
  13363. (home-page "https://github.com/vcflib/vcflib/")
  13364. (synopsis "Library for parsing and manipulating VCF files")
  13365. (description "Vcflib provides methods to manipulate and interpret
  13366. sequence variation as it can be described by VCF. It is both an API for parsing
  13367. and operating on records of genomic variation as it can be described by the VCF
  13368. format, and a collection of command-line utilities for executing complex
  13369. manipulations on VCF files.")
  13370. (license license:expat)))
  13371. (define-public freebayes
  13372. (package
  13373. (name "freebayes")
  13374. (version "1.3.5")
  13375. (source (origin
  13376. (method git-fetch)
  13377. (uri (git-reference
  13378. (url "https://github.com/freebayes/freebayes")
  13379. (commit (string-append "v" version))))
  13380. (file-name (git-file-name name version))
  13381. (sha256
  13382. (base32 "1l0z88gq57kva677a6xri5g9k2d9h9lk5yk1q2xmq64wqhv7dvc3"))
  13383. (modules '((guix build utils)))
  13384. (snippet
  13385. '(begin
  13386. (delete-file-recursively "contrib/htslib")
  13387. #t))))
  13388. (build-system meson-build-system)
  13389. (inputs
  13390. `(("fastahack" ,fastahack)
  13391. ("htslib" ,htslib)
  13392. ("smithwaterman" ,smithwaterman)
  13393. ("tabixpp" ,tabixpp)
  13394. ("vcflib" ,vcflib)))
  13395. (native-inputs
  13396. `(("bash-tap" ,bash-tap)
  13397. ("bc" ,bc)
  13398. ("grep" ,grep) ; Built with perl support.
  13399. ("parallel" ,parallel)
  13400. ("perl" ,perl)
  13401. ("pkg-config" ,pkg-config)
  13402. ("samtools" ,samtools)
  13403. ("simde" ,simde)
  13404. ;; This submodule is needed to run the tests.
  13405. ("test-simple-bash-src"
  13406. ,(origin
  13407. (method git-fetch)
  13408. (uri (git-reference
  13409. (url "https://github.com/ingydotnet/test-simple-bash/")
  13410. (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
  13411. (file-name "test-simple-bash-src-checkout")
  13412. (sha256
  13413. (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
  13414. (arguments
  13415. `(#:phases
  13416. (modify-phases %standard-phases
  13417. (add-after 'unpack 'patch-source
  13418. (lambda* (#:key inputs #:allow-other-keys)
  13419. (let ((bash-tap (assoc-ref inputs "bash-tap")))
  13420. (substitute* (find-files "test/t")
  13421. (("BASH_TAP_ROOT=bash-tap")
  13422. (string-append "BASH_TAP_ROOT=" bash-tap "/bin"))
  13423. (("bash-tap/bash-tap-bootstrap")
  13424. (string-append bash-tap "/bin/bash-tap-bootstrap"))
  13425. (("source.*bash-tap-bootstrap")
  13426. (string-append "source " bash-tap "/bin/bash-tap-bootstrap")))
  13427. (substitute* '("src/BedReader.cpp"
  13428. "src/BedReader.h")
  13429. (("../intervaltree/IntervalTree.h") "IntervalTree.h"))
  13430. (substitute* "meson.build"
  13431. ;; Our pkg-config file is vcflib.pc
  13432. (("libvcflib") "vcflib")
  13433. (("vcflib_inc,") ""))
  13434. #t)))
  13435. (add-after 'unpack 'unpack-submodule-sources
  13436. (lambda* (#:key inputs #:allow-other-keys)
  13437. (mkdir-p "test/test-simple-bash")
  13438. (copy-recursively (assoc-ref inputs "test-simple-bash-src")
  13439. "test/test-simple-bash")
  13440. #t))
  13441. ;; The slow tests take longer than the specified timeout.
  13442. ,@(if (any (cute string=? <> (%current-system))
  13443. '("armhf-linux" "aarch64-linux"))
  13444. '((replace 'check
  13445. (lambda* (#:key tests? #:allow-other-keys)
  13446. (when tests?
  13447. (invoke "meson" "test" "--timeout-multiplier" "5"))
  13448. #t)))
  13449. '()))))
  13450. (home-page "https://github.com/freebayes/freebayes")
  13451. (synopsis "Haplotype-based variant detector")
  13452. (description "FreeBayes is a Bayesian genetic variant detector designed to
  13453. find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
  13454. indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
  13455. complex events (composite insertion and substitution events) smaller than the
  13456. length of a short-read sequencing alignment.")
  13457. (license license:expat)))
  13458. (define-public samblaster
  13459. (package
  13460. (name "samblaster")
  13461. (version "0.1.24")
  13462. (source (origin
  13463. (method git-fetch)
  13464. (uri (git-reference
  13465. (url "https://github.com/GregoryFaust/samblaster")
  13466. (commit (string-append "v." version))))
  13467. (file-name (git-file-name name version))
  13468. (sha256
  13469. (base32
  13470. "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
  13471. (build-system gnu-build-system)
  13472. (arguments
  13473. `(#:tests? #f ; there are none
  13474. #:phases
  13475. (modify-phases %standard-phases
  13476. (delete 'configure) ; There is no configure phase.
  13477. (replace 'install
  13478. (lambda* (#:key outputs #:allow-other-keys)
  13479. (install-file "samblaster"
  13480. (string-append (assoc-ref outputs "out") "/bin"))
  13481. #t)))))
  13482. (home-page "https://github.com/GregoryFaust/samblaster")
  13483. (synopsis "Mark duplicates in paired-end SAM files")
  13484. (description "Samblaster is a fast and flexible program for marking
  13485. duplicates in read-id grouped paired-end SAM files. It can also optionally
  13486. output discordant read pairs and/or split read mappings to separate SAM files,
  13487. and/or unmapped/clipped reads to a separate FASTQ file. When marking
  13488. duplicates, samblaster will require approximately 20MB of memory per 1M read
  13489. pairs.")
  13490. (license license:expat)))
  13491. (define-public r-velocyto
  13492. (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
  13493. (revision "1"))
  13494. (package
  13495. (name "r-velocyto")
  13496. (version (git-version "0.6" revision commit))
  13497. (source
  13498. (origin
  13499. (method git-fetch)
  13500. (uri (git-reference
  13501. (url "https://github.com/velocyto-team/velocyto.R")
  13502. (commit commit)))
  13503. (file-name (git-file-name name version))
  13504. (sha256
  13505. (base32
  13506. "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
  13507. (build-system r-build-system)
  13508. (inputs
  13509. `(("boost" ,boost)))
  13510. (propagated-inputs
  13511. `(("r-hdf5r" ,r-hdf5r)
  13512. ("r-mass" ,r-mass)
  13513. ("r-mgcv" ,r-mgcv)
  13514. ("r-pcamethods" ,r-pcamethods)
  13515. ("r-rcpp" ,r-rcpp)
  13516. ("r-rcpparmadillo" ,r-rcpparmadillo)
  13517. ;; Suggested packages
  13518. ("r-rtsne" ,r-rtsne)
  13519. ("r-cluster" ,r-cluster)
  13520. ("r-abind" ,r-abind)
  13521. ("r-h5" ,r-h5)
  13522. ("r-biocgenerics" ,r-biocgenerics)
  13523. ("r-genomicalignments" ,r-genomicalignments)
  13524. ("r-rsamtools" ,r-rsamtools)
  13525. ("r-edger" ,r-edger)
  13526. ("r-igraph" ,r-igraph)))
  13527. (home-page "https://velocyto.org")
  13528. (synopsis "RNA velocity estimation in R")
  13529. (description
  13530. "This package provides basic routines for estimation of gene-specific
  13531. transcriptional derivatives and visualization of the resulting velocity
  13532. patterns.")
  13533. (license license:gpl3))))
  13534. (define-public methyldackel
  13535. (package
  13536. (name "methyldackel")
  13537. (version "0.5.1")
  13538. (source (origin
  13539. (method git-fetch)
  13540. (uri (git-reference
  13541. (url "https://github.com/dpryan79/MethylDackel")
  13542. (commit version)))
  13543. (file-name (git-file-name name version))
  13544. (sha256
  13545. (base32
  13546. "1sfhf2ap75qxpnmy1ifgmxqs18rq8mah9mcgkby73vc6h0sw99ws"))))
  13547. (build-system gnu-build-system)
  13548. (arguments
  13549. `(#:test-target "test"
  13550. #:make-flags
  13551. (list "CC=gcc"
  13552. (string-append "prefix="
  13553. (assoc-ref %outputs "out") "/bin/"))
  13554. #:phases
  13555. (modify-phases %standard-phases
  13556. (replace 'configure
  13557. (lambda* (#:key outputs #:allow-other-keys)
  13558. (substitute* "Makefile"
  13559. (("-lhts ") "-lhts -lBigWig ")
  13560. (("install MethylDackel \\$\\(prefix\\)" match)
  13561. (string-append "install -d $(prefix); " match)))
  13562. #t)))))
  13563. (inputs
  13564. `(("curl" ,curl) ; XXX: needed by libbigwig
  13565. ("htslib" ,htslib-1.9)
  13566. ("libbigwig" ,libbigwig)
  13567. ("zlib" ,zlib)))
  13568. ;; Needed for tests
  13569. (native-inputs
  13570. `(("python" ,python-wrapper)))
  13571. (home-page "https://github.com/dpryan79/MethylDackel")
  13572. (synopsis "Universal methylation extractor for BS-seq experiments")
  13573. (description
  13574. "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
  13575. file containing some form of BS-seq alignments and extract per-base
  13576. methylation metrics from them. MethylDackel requires an indexed fasta file
  13577. containing the reference genome as well.")
  13578. ;; See https://github.com/dpryan79/MethylDackel/issues/85
  13579. (license license:expat)))
  13580. ;; This package bundles PCRE 8.02 and cannot be built with the current
  13581. ;; version.
  13582. (define-public phast
  13583. (package
  13584. (name "phast")
  13585. (version "1.5")
  13586. (source (origin
  13587. (method git-fetch)
  13588. (uri (git-reference
  13589. (url "https://github.com/CshlSiepelLab/phast")
  13590. (commit (string-append "v" version))))
  13591. (file-name (git-file-name name version))
  13592. (sha256
  13593. (base32
  13594. "10lpbllvny923jjbbyrpxahhd1m5h7sbj9gx7rd123rg10mlidki"))))
  13595. (build-system gnu-build-system)
  13596. (arguments
  13597. `(#:make-flags
  13598. (list "CC=gcc"
  13599. (string-append "DESTDIR=" (assoc-ref %outputs "out")))
  13600. #:phases
  13601. (modify-phases %standard-phases
  13602. (replace 'configure
  13603. (lambda* (#:key inputs outputs #:allow-other-keys)
  13604. ;; Fix syntax
  13605. (substitute* "test/Makefile"
  13606. ((" ") " "))
  13607. (substitute* "Makefile"
  13608. (("CLAPACKPATH=/usr/lib")
  13609. (string-append "CLAPACKPATH="
  13610. (assoc-ref inputs "clapack") "/lib")))
  13611. ;; Renaming the libraries is not necessary with our version of
  13612. ;; CLAPACK.
  13613. (substitute* "src/lib/Makefile"
  13614. (("ifdef CLAPACKPATH") "ifdef UNNECESSARY"))
  13615. (substitute* "src/make-include.mk"
  13616. (("-lblaswr") "-lblas")
  13617. (("-ltmg") "-ltmglib")
  13618. (("liblapack.a") "liblapack.so")
  13619. (("libblas.a") "libblas.so")
  13620. (("libf2c.a") "libf2c.so"))
  13621. (substitute* "src/Makefile"
  13622. (("/opt") "/share")
  13623. (("/usr/") "/"))
  13624. #t))
  13625. (replace 'check
  13626. (lambda _
  13627. (setenv "PATH"
  13628. (string-append (getcwd) "/bin:" (getenv "PATH")))
  13629. ;; Disable broken test
  13630. (substitute* "test/Makefile"
  13631. ((".*if.*hmrc_summary" m) (string-append "#" m)))
  13632. ;; Only run the msa_view tests because the others fail for
  13633. ;; unknown reasons.
  13634. (invoke "make" "-C" "test" "msa_view"))))))
  13635. (inputs
  13636. `(("clapack" ,clapack)))
  13637. (native-inputs
  13638. `(("perl" ,perl)))
  13639. (home-page "http://compgen.cshl.edu/phast/")
  13640. (synopsis "Phylogenetic analysis with space/time models")
  13641. (description
  13642. "Phylogenetic Analysis with Space/Time models (PHAST) is a collection of
  13643. command-line programs and supporting libraries for comparative and
  13644. evolutionary genomics. Best known as the search engine behind the
  13645. Conservation tracks in the University of California, Santa Cruz (UCSC) Genome
  13646. Browser, PHAST also includes several tools for phylogenetic modeling,
  13647. functional element identification, as well as utilities for manipulating
  13648. alignments, trees and genomic annotations.")
  13649. (license license:bsd-3)))
  13650. (define-public python-gffutils
  13651. ;; The latest release is older more than a year than the latest commit
  13652. (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
  13653. (revision "1"))
  13654. (package
  13655. (name "python-gffutils")
  13656. (version (git-version "0.9" revision commit))
  13657. (source
  13658. (origin
  13659. (method git-fetch)
  13660. (uri (git-reference
  13661. (url "https://github.com/daler/gffutils")
  13662. (commit commit)))
  13663. (file-name (git-file-name name version))
  13664. (sha256
  13665. (base32
  13666. "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
  13667. (build-system python-build-system)
  13668. (arguments
  13669. `(#:phases
  13670. (modify-phases %standard-phases
  13671. (replace 'check
  13672. (lambda _
  13673. ;; Tests need to access the HOME directory
  13674. (setenv "HOME" "/tmp")
  13675. (invoke "nosetests" "-a" "!slow")))
  13676. (add-after 'unpack 'make-gz-files-writable
  13677. (lambda _
  13678. (for-each make-file-writable
  13679. (find-files "." "\\.gz"))
  13680. #t)))))
  13681. (propagated-inputs
  13682. `(("python-argcomplete" ,python-argcomplete)
  13683. ("python-argh" ,python-argh)
  13684. ("python-biopython" ,python-biopython)
  13685. ("python-pybedtools" ,python-pybedtools)
  13686. ("python-pyfaidx" ,python-pyfaidx)
  13687. ("python-simplejson" ,python-simplejson)
  13688. ("python-six" ,python-six)))
  13689. (native-inputs
  13690. `(("python-nose" , python-nose)))
  13691. (home-page "https://github.com/daler/gffutils")
  13692. (synopsis "Tool for manipulation of GFF and GTF files")
  13693. (description
  13694. "python-gffutils is a Python package for working with and manipulating
  13695. the GFF and GTF format files typically used for genomic annotations. The
  13696. files are loaded into a SQLite database, allowing much more complex
  13697. manipulation of hierarchical features (e.g., genes, transcripts, and exons)
  13698. than is possible with plain-text methods alone.")
  13699. (license license:expat))))
  13700. (define-public indelfixer
  13701. (package
  13702. (name "indelfixer")
  13703. (version "1.1")
  13704. (source (origin
  13705. (method git-fetch)
  13706. (uri (git-reference
  13707. (url "https://github.com/cbg-ethz/InDelFixer/")
  13708. (commit (string-append "v" version))))
  13709. (file-name (git-file-name name version))
  13710. (sha256
  13711. (base32
  13712. "10ak05x8i1bx2p7rriv2rglqg1wr7c8wrhjrqlq1wm7ka99w8i79"))))
  13713. (build-system ant-build-system)
  13714. (arguments
  13715. `(#:jar-name "InDelFixer.jar"
  13716. #:source-dir "src/main/java"
  13717. #:test-dir "src/test"))
  13718. (inputs
  13719. `(("java-commons-lang2" ,java-commons-lang)
  13720. ("java-args4j" ,java-args4j)))
  13721. (native-inputs
  13722. `(("java-junit" ,java-junit)))
  13723. (home-page "https://github.com/cbg-ethz/InDelFixer/")
  13724. (synopsis "Iterative and sensitive NGS sequence aligner")
  13725. (description "InDelFixer is a sensitive aligner for 454, Illumina and
  13726. PacBio data, employing a full Smith-Waterman alignment against a reference.
  13727. This Java command line application aligns Next-Generation Sequencing (NGS) and
  13728. third-generation reads to a set of reference sequences, by a prior fast k-mer
  13729. matching and removes indels, causing frame shifts. In addition, only a
  13730. specific region can be considered. An iterative refinement of the alignment
  13731. can be performed, by alignment against the consensus sequence with wobbles.
  13732. The output is in SAM format.")
  13733. (license license:gpl3+)))
  13734. (define-public libsbml
  13735. (package
  13736. (name "libsbml")
  13737. (version "5.18.0")
  13738. (source (origin
  13739. (method url-fetch)
  13740. (uri (string-append "mirror://sourceforge/sbml/libsbml/"
  13741. version "/stable/libSBML-"
  13742. version "-core-src.tar.gz"))
  13743. (sha256
  13744. (base32
  13745. "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
  13746. (build-system cmake-build-system)
  13747. (arguments
  13748. `(#:test-target "test"
  13749. #:configure-flags
  13750. (list "-DWITH_CHECK=ON"
  13751. (string-append "-DLIBXML_LIBRARY="
  13752. (assoc-ref %build-inputs "libxml2")
  13753. "/lib/libxml2.so")
  13754. (string-append "-DLIBXML_INCLUDE_DIR="
  13755. (assoc-ref %build-inputs "libxml2")
  13756. "/include/libxml2"))))
  13757. (propagated-inputs
  13758. `(("libxml2" ,libxml2)))
  13759. (native-inputs
  13760. `(("check" ,check-0.14)
  13761. ("swig" ,swig)))
  13762. (home-page "http://sbml.org/Software/libSBML")
  13763. (synopsis "Process SBML files and data streams")
  13764. (description "LibSBML is a library to help you read, write, manipulate,
  13765. translate, and validate SBML files and data streams. The @dfn{Systems Biology
  13766. Markup Language} (SBML) is an interchange format for computer models of
  13767. biological processes. SBML is useful for models of metabolism, cell
  13768. signaling, and more. It continues to be evolved and expanded by an
  13769. international community.")
  13770. (license license:lgpl2.1+)))
  13771. (define-public kraken2
  13772. (package
  13773. (name "kraken2")
  13774. (version "2.1.1")
  13775. (source (origin
  13776. (method git-fetch)
  13777. (uri (git-reference
  13778. (url "https://github.com/DerrickWood/kraken2")
  13779. (commit (string-append "v" version))))
  13780. (file-name (git-file-name name version))
  13781. (sha256
  13782. (base32
  13783. "0h7a7vygd7y5isbrnc6srwq6xj1rmyd33pm8mmcgfkmlxlg5vkg3"))))
  13784. (build-system gnu-build-system)
  13785. (arguments
  13786. `(#:tests? #false ; there are none
  13787. #:make-flags (list "-C" "src"
  13788. (string-append "KRAKEN2_DIR="
  13789. (assoc-ref %outputs "out") "/bin"))
  13790. #:phases
  13791. (modify-phases %standard-phases
  13792. (delete 'configure)
  13793. (add-before 'install 'install-scripts
  13794. (lambda* (#:key outputs #:allow-other-keys)
  13795. (let* ((bin (string-append (assoc-ref outputs "out") "/bin"))
  13796. (replacements `(("KRAKEN2_DIR" . ,bin)
  13797. ("VERSION" . ,,version))))
  13798. (mkdir-p bin)
  13799. (with-directory-excursion "scripts"
  13800. (let ((scripts (find-files "." ".*")))
  13801. (substitute* scripts
  13802. (("#####=([^=]+)=#####" _ key)
  13803. (or (assoc-ref replacements key)
  13804. (error (format #false "unknown key: ~a~%" key)))))
  13805. (substitute* "kraken2"
  13806. (("compression_program = \"bzip2\"")
  13807. (string-append "compression_program = \""
  13808. (which "bzip2")
  13809. "\""))
  13810. (("compression_program = \"gzip\"")
  13811. (string-append "compression_program = \""
  13812. (which "gzip")
  13813. "\"")))
  13814. (substitute* '("download_genomic_library.sh"
  13815. "download_taxonomy.sh"
  13816. "16S_gg_installation.sh"
  13817. "16S_silva_installation.sh"
  13818. "16S_rdp_installation.sh")
  13819. (("wget") (which "wget")))
  13820. (substitute* '("download_taxonomy.sh"
  13821. "download_genomic_library.sh"
  13822. "rsync_from_ncbi.pl")
  13823. (("rsync -")
  13824. (string-append (which "rsync") " -")))
  13825. (substitute* "mask_low_complexity.sh"
  13826. (("which") (which "which")))
  13827. (substitute* '("mask_low_complexity.sh"
  13828. "download_genomic_library.sh"
  13829. "16S_silva_installation.sh")
  13830. (("sed -e ")
  13831. (string-append (which "sed") " -e ")))
  13832. (substitute* '("rsync_from_ncbi.pl"
  13833. "16S_rdp_installation.sh"
  13834. "16S_silva_installation.sh"
  13835. "16S_gg_installation.sh"
  13836. "download_taxonomy.sh"
  13837. "download_genomic_library.sh")
  13838. (("gunzip") (which "gunzip")))
  13839. (for-each (lambda (script)
  13840. (chmod script #o555)
  13841. (install-file script bin))
  13842. scripts)))))))))
  13843. (inputs
  13844. `(("gzip" ,gzip)
  13845. ("perl" ,perl)
  13846. ("rsync" ,rsync)
  13847. ("sed" ,sed)
  13848. ("wget" ,wget)
  13849. ("which" ,which)))
  13850. (home-page "https://github.com/DerrickWood/kraken2")
  13851. (synopsis "Taxonomic sequence classification system")
  13852. (description "Kraken is a taxonomic sequence classifier that assigns
  13853. taxonomic labels to DNA sequences. Kraken examines the k-mers within a query
  13854. sequence and uses the information within those k-mers to query a
  13855. database. That database maps k-mers to the lowest common ancestor (LCA) of all
  13856. genomes known to contain a given k-mer.")
  13857. (license license:expat)))
  13858. (define-public lofreq
  13859. (package
  13860. (name "lofreq")
  13861. (version "2.1.5")
  13862. (source (origin
  13863. (method git-fetch)
  13864. (uri (git-reference
  13865. (url "https://github.com/CSB5/lofreq")
  13866. (commit (string-append "v" version))))
  13867. (file-name (git-file-name name version))
  13868. (sha256
  13869. (base32
  13870. "0qssrn3mgjak7df6iqc1rljqd3g3a5syvg0lsv4vds43s3fq23bl"))))
  13871. (build-system gnu-build-system)
  13872. (arguments
  13873. '(#:test-target "bug-tests"
  13874. #:tests? #false)) ; test data are not included
  13875. (inputs
  13876. `(("htslib" ,htslib)
  13877. ("python" ,python-wrapper)
  13878. ("zlib" ,zlib)))
  13879. (native-inputs
  13880. `(("autoconf" ,autoconf)
  13881. ("automake" ,automake)
  13882. ("which" ,which)))
  13883. (home-page "https://csb5.github.io/lofreq/")
  13884. (synopsis "Sensitive variant calling from sequencing data ")
  13885. (description "LoFreq is a fast and sensitive variant-caller for inferring
  13886. SNVs and indels from next-generation sequencing data. It makes full use of
  13887. base-call qualities and other sources of errors inherent in
  13888. sequencing (e.g. mapping or base/indel alignment uncertainty), which are
  13889. usually ignored by other methods or only used for filtering.")
  13890. (license license:expat)))
  13891. (define-public ivar
  13892. (package
  13893. (name "ivar")
  13894. (version "1.3.1")
  13895. (source (origin
  13896. (method git-fetch)
  13897. (uri (git-reference
  13898. (url "https://github.com/andersen-lab/ivar")
  13899. (commit (string-append "v" version))))
  13900. (file-name (git-file-name name version))
  13901. (sha256
  13902. (base32
  13903. "044xa0hm3b8fga64csrdx05ih8w7kwmvcdrdrhkg8j11ml4bi4xv"))))
  13904. (build-system gnu-build-system)
  13905. (inputs
  13906. `(("htslib" ,htslib)
  13907. ("zlib" ,zlib)))
  13908. (native-inputs
  13909. `(("autoconf" ,autoconf)
  13910. ("automake" ,automake)))
  13911. (home-page "https://andersen-lab.github.io/ivar/html/")
  13912. (synopsis "Tools for amplicon-based sequencing")
  13913. (description "iVar is a computational package that contains functions
  13914. broadly useful for viral amplicon-based sequencing. ")
  13915. (license license:gpl3+)))
  13916. (define-public python-pyliftover
  13917. (package
  13918. (name "python-pyliftover")
  13919. (version "0.4")
  13920. ;; The version of pypi does not include test data.
  13921. (source (origin
  13922. (method git-fetch)
  13923. (uri (git-reference
  13924. (url "https://github.com/konstantint/pyliftover")
  13925. (commit version)))
  13926. (file-name (git-file-name name version))
  13927. (sha256
  13928. (base32
  13929. "1j8jp9iynv2l3jv5pr0pn0p3azlama1bqg233piglzm6bqh3m2m3"))))
  13930. (build-system python-build-system)
  13931. (arguments `(#:tests? #false)) ; the tests access the web
  13932. (native-inputs
  13933. `(("python-pytest" ,python-pytest)))
  13934. (home-page "https://github.com/konstantint/pyliftover")
  13935. (synopsis "Python implementation of UCSC liftOver genome coordinate conversion")
  13936. (description
  13937. "PyLiftover is a library for quick and easy conversion of genomic (point)
  13938. coordinates between different assemblies.")
  13939. (license license:expat)))
  13940. (define-public python-cgatcore
  13941. (package
  13942. (name "python-cgatcore")
  13943. (version "0.6.7")
  13944. ;; The version of pypi does not include test data.
  13945. (source (origin
  13946. (method git-fetch)
  13947. (uri (git-reference
  13948. (url "https://github.com/cgat-developers/cgat-core")
  13949. (commit (string-append "v" version))))
  13950. (file-name (git-file-name name version))
  13951. (sha256
  13952. (base32
  13953. "17vk88v1bx7x02ibzkc9i7ir4b5p1hcjr38jpsfzyzxr68352d5k"))))
  13954. (build-system python-build-system)
  13955. (arguments
  13956. `(#:phases
  13957. (modify-phases %standard-phases
  13958. (add-after 'unpack 'fix-references
  13959. (lambda _
  13960. (substitute* "cgatcore/pipeline/execution.py"
  13961. (("#!/bin/bash") (string-append "#!" (which "bash")))
  13962. (("executable=\"/bin/bash\"")
  13963. (string-append "executable=\"" (which "bash") "\""))
  13964. (("\\\\time") (which "time")))))
  13965. (delete 'check)
  13966. (add-after 'install 'check
  13967. (lambda* (#:key tests? inputs outputs #:allow-other-keys)
  13968. (when tests?
  13969. (add-installed-pythonpath inputs outputs)
  13970. ;; Requires network access
  13971. (delete-file "tests/test_pipeline_execution.py")
  13972. (invoke "python" "-m" "pytest" "-v")))))))
  13973. (native-inputs
  13974. `(("python-pytest" ,python-pytest)
  13975. ("lsof" ,lsof)
  13976. ("hostname" ,inetutils)
  13977. ("openssl" ,openssl)))
  13978. (inputs
  13979. `(("time" ,time)))
  13980. (propagated-inputs
  13981. `(("python-apsw" ,python-apsw)
  13982. ("python-gevent" ,python-gevent)
  13983. ("python-pandas" ,python-pandas)
  13984. ("python-paramiko" ,python-paramiko)
  13985. ("python-pyyaml" ,python-pyyaml)
  13986. ("python-ruffus" ,python-ruffus)
  13987. ("python-sqlalchemy" ,python-sqlalchemy)))
  13988. (home-page "https://github.com/cgat-developers/cgat-core")
  13989. (synopsis "Computational genomics analysis toolkit")
  13990. (description
  13991. "CGAT-core is a set of libraries and helper functions used to enable
  13992. researchers to design and build computational workflows for the analysis of
  13993. large-scale data-analysis.")
  13994. (license license:expat)))
  13995. (define-public perl-cworld-dekker
  13996. (package
  13997. (name "perl-cworld-dekker")
  13998. (version "1.01")
  13999. (source (origin
  14000. (method git-fetch)
  14001. (uri (git-reference
  14002. (url "https://github.com/dekkerlab/cworld-dekker.git")
  14003. (commit (string-append "v" version))))
  14004. (file-name (git-file-name name version))
  14005. (sha256
  14006. (base32
  14007. "1dvh23fx52m59y6304xi2j2pl2hiqadlqg8jyv2pm14j1hy71ych"))))
  14008. (build-system perl-build-system)
  14009. (arguments
  14010. `(#:modules ((guix build perl-build-system)
  14011. (guix build utils)
  14012. (srfi srfi-26))
  14013. #:phases
  14014. (modify-phases %standard-phases
  14015. (add-after 'unpack 'hardcode-references
  14016. (lambda* (#:key inputs #:allow-other-keys)
  14017. (let ((bedtools (assoc-ref inputs "bedtools"))
  14018. (r (assoc-ref inputs "r-minimal")))
  14019. (substitute* '("scripts/python/getEigenVectors.py"
  14020. "scripts/python/matrix2EigenVectors.py")
  14021. (("bedtools intersect")
  14022. (string-append bedtools "/bin/bedtools intersect")))
  14023. (substitute* "lib/cworld/dekker.pm"
  14024. (("bedtools --version")
  14025. (string-append bedtools "/bin/bedtools --version")))
  14026. (substitute* '("scripts/perl/correlateMatrices.pl"
  14027. "scripts/perl/matrix2scaling.pl"
  14028. "scripts/perl/matrix2distance.pl"
  14029. "scripts/perl/coverageCorrect.pl"
  14030. "scripts/perl/matrix2anchorPlot.pl"
  14031. "scripts/python/matrix2EigenVectors.py"
  14032. "scripts/python/matrix2insulation-lite.py"
  14033. "scripts/perl/matrix2compartment.pl"
  14034. "scripts/perl/anchorPurge.pl"
  14035. "scripts/perl/applyCorrection.pl"
  14036. "scripts/perl/compareInsulation.pl"
  14037. "scripts/perl/fillMissingData.pl"
  14038. "scripts/perl/matrix2loess.pl"
  14039. "scripts/python/getEigenVectors.py"
  14040. "scripts/perl/aggregateBED.pl"
  14041. "scripts/perl/collapseMatrix.pl"
  14042. "scripts/perl/matrix2direction.pl"
  14043. "scripts/perl/singletonRemoval.pl"
  14044. "lib/cworld/dekker.pm"
  14045. "scripts/perl/matrix2insulation.pl")
  14046. (("(`|\")Rscript" _ pre)
  14047. (string-append pre r "/bin/Rscript"))))))
  14048. (add-after 'install 'install-scripts
  14049. (lambda* (#:key outputs #:allow-other-keys)
  14050. (let* ((out (assoc-ref outputs "out"))
  14051. (share (string-append out "/share/cworld-dekker")))
  14052. (mkdir-p share)
  14053. (copy-recursively "scripts" share)
  14054. ;; Make all scripts executable and wrap them.
  14055. (let ((r (find-files share "\\.R$"))
  14056. (py (find-files share "\\.py$"))
  14057. (pl (find-files share "\\.pl$"))
  14058. (wrap (lambda* (script var #:optional (extra ""))
  14059. (let ((path (string-append (getenv var)
  14060. extra)))
  14061. (wrap-program script
  14062. `(,var ":" prefix (,path)))))))
  14063. (for-each (cut chmod <> #o555) (append r py pl))
  14064. (for-each (cut wrap <> "PERL5LIB"
  14065. (string-append ":" out
  14066. "/lib/perl5/site_perl"))
  14067. pl)
  14068. (for-each (cut wrap <> "PYTHONPATH") py))))))))
  14069. (inputs
  14070. `(("libgd" ,gd)
  14071. ("perl-gd" ,perl-gd)
  14072. ("bedtools" ,bedtools)
  14073. ("python" ,python-wrapper)
  14074. ("python-scipy" ,python-scipy)
  14075. ("python-numpy" ,python-numpy)
  14076. ("python-matplotlib" ,python-matplotlib)
  14077. ("python-h5py" ,python-h5py)
  14078. ("python-scikit-learn" ,python-scikit-learn)
  14079. ("r-minimal" ,r-minimal)))
  14080. (native-inputs
  14081. `(("perl-module-build" ,perl-module-build)))
  14082. (home-page "https://github.com/dekkerlab/cworld-dekker")
  14083. (synopsis "Utility and analysis scripts for 3C, 4C, 5C, and Hi-C data")
  14084. (description "This package is a collection of Perl, Python, and R
  14085. scripts for manipulating 3C/4C/5C/Hi-C data.")
  14086. (license license:asl2.0)))
  14087. (define-public ensembl-vep
  14088. (let* ((api-version "103")
  14089. (api-module
  14090. (lambda (name hash)
  14091. (origin (method git-fetch)
  14092. (uri (git-reference
  14093. (url (string-append "https://github.com/Ensembl/"
  14094. name ".git"))
  14095. (commit (string-append "release/" api-version))))
  14096. (file-name (string-append name "-" api-version "-checkout"))
  14097. (sha256 (base32 hash))))))
  14098. (package
  14099. (name "ensembl-vep")
  14100. (version (string-append api-version ".1"))
  14101. (source
  14102. (origin
  14103. (method git-fetch)
  14104. (uri (git-reference
  14105. (url "https://github.com/Ensembl/ensembl-vep.git")
  14106. (commit (string-append "release/" version))))
  14107. (file-name (git-file-name name version))
  14108. (sha256
  14109. (base32
  14110. "1iq7p72cv9b38jz2v8a4slzy2n8y0md487943180ym9xc8qvw09c"))))
  14111. (build-system gnu-build-system)
  14112. (arguments
  14113. `(#:modules ((guix build gnu-build-system)
  14114. (guix build utils)
  14115. (ice-9 match))
  14116. #:phases
  14117. (modify-phases %standard-phases
  14118. (delete 'configure)
  14119. (delete 'build)
  14120. ;; Tests need to run after installation
  14121. (delete 'check)
  14122. (replace 'install
  14123. (lambda* (#:key inputs outputs #:allow-other-keys)
  14124. (let* ((modules '(("ensembl" "/")
  14125. ("ensembl-variation" "/Variation")
  14126. ("ensembl-funcgen" "/Funcgen")
  14127. ("ensembl-io" "/")))
  14128. (scripts '(("convert_cache.pl" "vep_convert_cache.pl")
  14129. ("INSTALL.pl" "vep_install.pl")
  14130. ("haplo" #f)
  14131. ("variant_recoder" #f)
  14132. ("filter_vep" #f)
  14133. ("vep" #f)))
  14134. (out (assoc-ref outputs "out"))
  14135. (bin (string-append out "/bin"))
  14136. (perl (string-append out "/lib/perl5/site_perl")))
  14137. (for-each
  14138. (match-lambda
  14139. ((name path)
  14140. (let ((dir (string-append perl "/Bio/EnsEMBL" path)))
  14141. (mkdir-p dir)
  14142. (copy-recursively
  14143. (string-append (assoc-ref inputs (string-append "api-module-" name))
  14144. "/modules/Bio/EnsEMBL" path)
  14145. dir))))
  14146. modules)
  14147. (copy-recursively "modules/" perl)
  14148. (mkdir-p bin)
  14149. (for-each
  14150. (match-lambda
  14151. ((script new-name)
  14152. (let ((location (string-append bin "/"
  14153. (or new-name (basename script)))))
  14154. (copy-file script location)
  14155. (chmod location #o555)
  14156. (wrap-program location
  14157. `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
  14158. ,perl))))))
  14159. scripts)
  14160. ;; Fix path to tools
  14161. (with-directory-excursion (string-append perl "/Bio/EnsEMBL")
  14162. (substitute* '("Funcgen/RunnableDB/ProbeMapping/PrePipelineChecks.pm"
  14163. "VEP/BaseRunner.pm"
  14164. "VEP/Utils.pm"
  14165. "VEP/AnnotationSource/Cache/VariationTabix.pm"
  14166. "VEP/AnnotationSource/Cache/BaseSerialized.pm"
  14167. "Variation/Utils/BaseVepTabixPlugin.pm"
  14168. "Variation/Utils/VEP.pm"
  14169. "Variation/Pipeline/ReleaseDataDumps/PreRunChecks.pm")
  14170. (("`which")
  14171. (string-append "`"
  14172. (assoc-ref inputs "which")
  14173. "/bin/which")))))))
  14174. (add-after 'install 'check
  14175. (lambda* (#:key tests? inputs outputs #:allow-other-keys)
  14176. (when tests?
  14177. (setenv "PERL5LIB"
  14178. (string-append (getenv "PERL5LIB")
  14179. ":"
  14180. (assoc-ref outputs "out")
  14181. "/lib/perl5/site_perl"))
  14182. (copy-recursively (string-append (assoc-ref inputs "source") "/t")
  14183. "/tmp/t")
  14184. (for-each make-file-writable (find-files "/tmp/t"))
  14185. ;; TODO: haplo needs Set/IntervalTree.pm
  14186. (invoke "perl" "-e" (string-append "
  14187. use Test::Harness; use Test::Exception;
  14188. my $dirname = \"/tmp\";
  14189. opendir TEST, \"$dirname\\/t\";
  14190. my @test_files = map {\"$dirname\\/t\\/\".$_} grep {!/^\\./ && /\\.t$/} readdir TEST; closedir TEST;
  14191. @test_files = grep {!/Haplo/} @test_files;
  14192. runtests(@test_files);
  14193. "))))))))
  14194. (inputs
  14195. `(("bioperl-minimal" ,bioperl-minimal)
  14196. ("perl-bio-db-hts" ,perl-bio-db-hts)
  14197. ("perl-dbi" ,perl-dbi)
  14198. ("perl-dbd-mysql" ,perl-dbd-mysql)
  14199. ("perl-libwww" ,perl-libwww)
  14200. ("perl-http-tiny" ,perl-http-tiny)
  14201. ("perl-json" ,perl-json)
  14202. ("which" ,which)))
  14203. (propagated-inputs
  14204. `(("kentutils" ,kentutils)))
  14205. (native-inputs
  14206. `(("unzip" ,unzip)
  14207. ("perl" ,perl)
  14208. ("api-module-ensembl"
  14209. ,(api-module "ensembl"
  14210. "0s59rj905g72hljzfpvnx5nxwz925b917y4jp912i23f5gwxh14v"))
  14211. ("api-module-ensembl-variation"
  14212. ,(api-module "ensembl-variation"
  14213. "1dvwdzzfjhzymq02b6n4p6j3a9q4jgq0g89hs7hj1apd7zhirgkq"))
  14214. ("api-module-ensembl-funcgen"
  14215. ,(api-module "ensembl-funcgen"
  14216. "1x23pv38dmv0w0gby6rv3wds50qghb4v3v1mf43vk55msfxzry8n"))
  14217. ("api-module-ensembl-io"
  14218. ,(api-module "ensembl-io"
  14219. "14adb2x934lzsq20035mazdkhrkcw0qzb0xhz6zps9vk4wixwaix"))
  14220. ("perl-test-harness" ,perl-test-harness)
  14221. ("perl-test-exception" ,perl-test-exception)))
  14222. (home-page "http://www.ensembl.org/vep")
  14223. (synopsis "Predict functional effects of genomic variants")
  14224. (description
  14225. "This package provides a Variant Effect Predictor, which predicts
  14226. the functional effects of genomic variants. It also provides
  14227. Haplosaurus, which uses phased genotype data to predict
  14228. whole-transcript haplotype sequences, and Variant Recoder, which
  14229. translates between different variant encodings.")
  14230. (license license:asl2.0))))
  14231. (define-public r-signac
  14232. (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c")
  14233. (revision "1"))
  14234. (package
  14235. (name "r-signac")
  14236. (version (git-version "1.1.1" revision commit))
  14237. (source
  14238. (origin
  14239. (method git-fetch)
  14240. (uri (git-reference
  14241. (url "https://github.com/timoast/signac/")
  14242. (commit commit)))
  14243. (file-name (git-file-name name version))
  14244. (sha256
  14245. (base32
  14246. "1yihhrv7zs87ax61la1nb4y12lg3knraw4b20k5digbcwm8488lb"))))
  14247. (properties `((upstream-name . "Signac")))
  14248. (build-system r-build-system)
  14249. (inputs `(("zlib" ,zlib)))
  14250. (propagated-inputs
  14251. `(("r-annotationfilter" ,r-annotationfilter)
  14252. ("r-biocgenerics" ,r-biocgenerics)
  14253. ("r-biostrings" ,r-biostrings)
  14254. ("r-biovizbase" ,r-biovizbase)
  14255. ("r-data-table" ,r-data-table)
  14256. ("r-dplyr" ,r-dplyr)
  14257. ("r-fastmatch" ,r-fastmatch)
  14258. ("r-future" ,r-future)
  14259. ("r-future-apply" ,r-future-apply)
  14260. ("r-genomeinfodb" ,r-genomeinfodb)
  14261. ("r-genomicranges" ,r-genomicranges)
  14262. ("r-ggbio" ,r-ggbio)
  14263. ("r-ggforce" ,r-ggforce)
  14264. ("r-ggplot2" ,r-ggplot2)
  14265. ("r-ggrepel" ,r-ggrepel)
  14266. ("r-ggseqlogo" ,r-ggseqlogo)
  14267. ("r-iranges" ,r-iranges)
  14268. ("r-irlba" ,r-irlba)
  14269. ("r-lsa" ,r-lsa)
  14270. ("r-matrix" ,r-matrix)
  14271. ("r-patchwork" ,r-patchwork)
  14272. ("r-pbapply" ,r-pbapply)
  14273. ("r-rcpp" ,r-rcpp)
  14274. ("r-rcpproll" ,r-rcpproll)
  14275. ("r-rsamtools" ,r-rsamtools)
  14276. ("r-s4vectors" ,r-s4vectors)
  14277. ("r-scales" ,r-scales)
  14278. ("r-seurat" ,r-seurat)
  14279. ("r-seuratobject" ,r-seuratobject)
  14280. ("r-stringi" ,r-stringi)
  14281. ("r-tidyr" ,r-tidyr)))
  14282. (home-page "https://github.com/timoast/signac/")
  14283. (synopsis "Analysis of single-cell chromatin data")
  14284. (description
  14285. "This package provides a framework for the analysis and exploration of
  14286. single-cell chromatin data. The Signac package contains functions for
  14287. quantifying single-cell chromatin data, computing per-cell quality control
  14288. metrics, dimension reduction and normalization, visualization, and DNA
  14289. sequence motif analysis.")
  14290. (license license:expat))))
  14291. (define-public tombo
  14292. (package
  14293. (name "tombo")
  14294. (version "1.5.1")
  14295. (source
  14296. (origin
  14297. (method url-fetch)
  14298. (uri (pypi-uri "ont-tombo" version))
  14299. (sha256
  14300. (base32
  14301. "1023hadgcsgi53kz53ql45207hfizf9sw57z0qij3ay1bx68zbpm"))))
  14302. (build-system python-build-system)
  14303. (native-inputs
  14304. `(("python-cython" ,python-cython)
  14305. ("python-nose2" ,python-nose2)))
  14306. ;; The package mainly consists of a command-line tool, but also has a
  14307. ;; Python-API. Thus these must be propagated.
  14308. (propagated-inputs
  14309. `(("python-future" ,python-future)
  14310. ("python-h5py" ,python-h5py)
  14311. ("python-mappy" ,python-mappy)
  14312. ("python-numpy" ,python-numpy)
  14313. ("python-scipy" ,python-scipy)
  14314. ("python-tqdm" ,python-tqdm)
  14315. ("python-rpy2" ,python-rpy2)))
  14316. (home-page "https://github.com/nanoporetech/tombo")
  14317. (synopsis "Analysis of raw nanopore sequencing data")
  14318. (description "Tombo is a suite of tools primarily for the identification of
  14319. modified nucleotides from nanopore sequencing data. Tombo also provides tools
  14320. for the analysis and visualization of raw nanopore signal.")
  14321. ;; Some parts may be BSD-3-licensed.
  14322. (license license:mpl2.0)))
  14323. (define-public python-pyvcf
  14324. (package
  14325. (name "python-pyvcf")
  14326. (version "0.6.8")
  14327. ;; Use git, because the PyPI tarballs lack test data.
  14328. (source
  14329. (origin
  14330. (method git-fetch)
  14331. (uri (git-reference
  14332. (url "https://github.com/jamescasbon/PyVCF.git")
  14333. ;; Latest release is not tagged.
  14334. (commit "bfcedb9bad1a14074ac4526ffdb610611e073810")))
  14335. (file-name (git-file-name name version))
  14336. (sha256
  14337. (base32
  14338. "0c7lsssns3zp8fh2ibllzzra003srg9vbxqzmq6654akbzdb7lrf"))))
  14339. (build-system python-build-system)
  14340. (arguments
  14341. `(#:phases
  14342. (modify-phases %standard-phases
  14343. (add-after 'install 'remove-installed-tests
  14344. ;; Do not install test files.
  14345. (lambda* (#:key inputs outputs #:allow-other-keys)
  14346. (delete-file-recursively (string-append
  14347. (site-packages inputs outputs)
  14348. "vcf/test"))
  14349. #t)))))
  14350. (native-inputs `(("python-cython" ,python-cython)))
  14351. (propagated-inputs
  14352. `(("python-pysam" ,python-pysam)
  14353. ("python-rpy2" ,python-rpy2)))
  14354. (home-page "https://github.com/jamescasbon/PyVCF")
  14355. (synopsis "Variant Call Format parser for Python")
  14356. (description "This package provides a @acronym{VCF,Variant Call Format}
  14357. parser for Python.")
  14358. (license license:expat)))
  14359. (define-public nanosv
  14360. (package
  14361. (name "nanosv")
  14362. (version "1.2.4")
  14363. (source (origin
  14364. (method url-fetch)
  14365. (uri (pypi-uri "NanoSV" version))
  14366. (sha256
  14367. (base32
  14368. "1wl2daj0bwrl8fx5xi8j8hfs3mp3vg3qycy66538n032v1qkc6xg"))))
  14369. (build-system python-build-system)
  14370. (inputs
  14371. `(("python-configparser" ,python-configparser)
  14372. ("python-pysam" ,python-pysam)
  14373. ("python-pyvcf" ,python-pyvcf)))
  14374. (home-page "https://github.com/mroosmalen/nanosv")
  14375. (synopsis "Structural variation detection tool for Oxford Nanopore data.")
  14376. (description "NanoSV is a software package that can be used to identify
  14377. structural genomic variations in long-read sequencing data, such as data
  14378. produced by Oxford Nanopore Technologies’ MinION, GridION or PromethION
  14379. instruments, or Pacific Biosciences RSII or Sequel sequencers.")
  14380. (license license:expat)))
  14381. (define-public python-strawc
  14382. (package
  14383. (name "python-strawc")
  14384. (version "0.0.2.1")
  14385. (source
  14386. (origin
  14387. (method url-fetch)
  14388. (uri (pypi-uri "strawC" version))
  14389. (sha256
  14390. (base32
  14391. "1z1gy8n56lhriy6hdkh9r82ndikndipq2cy2wh8q185qig4rimr6"))))
  14392. (build-system python-build-system)
  14393. (inputs
  14394. `(("curl" ,curl)
  14395. ("zlib" ,zlib)))
  14396. (propagated-inputs
  14397. `(("pybind11" ,pybind11)))
  14398. (home-page "https://github.com/aidenlab/straw")
  14399. (synopsis "Stream data from .hic files")
  14400. (description "Straw is library which allows rapid streaming of contact
  14401. data from @file{.hic} files. This package provides Python bindings.")
  14402. (license license:expat)))
  14403. (define-public python-pybbi
  14404. (package
  14405. (name "python-pybbi")
  14406. (version "0.3.0")
  14407. (source
  14408. (origin
  14409. (method url-fetch)
  14410. (uri (pypi-uri "pybbi" version))
  14411. (sha256
  14412. (base32
  14413. "1hvy2f28i2b41l1pq15vciqbj538n0lichp8yr6413jmgg06xdsk"))))
  14414. (build-system python-build-system)
  14415. (arguments
  14416. `(#:tests? #false ; tests require network access
  14417. #:phases
  14418. (modify-phases %standard-phases
  14419. (add-after 'unpack 'set-cc
  14420. (lambda _ (setenv "CC" "gcc")))
  14421. (replace 'check
  14422. (lambda* (#:key inputs outputs tests? #:allow-other-keys)
  14423. (when tests?
  14424. (add-installed-pythonpath inputs outputs)
  14425. (copy-recursively "tests" "/tmp/tests")
  14426. (with-directory-excursion "/tmp/tests"
  14427. (invoke "python" "-m" "pytest" "-v"))))))))
  14428. (native-inputs
  14429. `(("pkg-config" ,pkg-config)
  14430. ("python-pkgconfig" ,python-pkgconfig)
  14431. ("python-pytest" ,python-pytest)))
  14432. (inputs
  14433. `(("libpng" ,libpng)
  14434. ("openssl" ,openssl)
  14435. ("zlib" ,zlib)))
  14436. (propagated-inputs
  14437. `(("python-cython" ,python-cython)
  14438. ("python-numpy" ,python-numpy)
  14439. ("python-pandas" ,python-pandas)
  14440. ("python-six" ,python-six)))
  14441. (home-page "https://github.com/nvictus/pybbi")
  14442. (synopsis "Python bindings to UCSC Big Binary file library")
  14443. (description
  14444. "This package provides Python bindings to the UCSC Big
  14445. Binary (bigWig/bigBed) file library. This provides read-level access to local
  14446. and remote bigWig and bigBed files but no write capabilitites. The main
  14447. feature is fast retrieval of range queries into numpy arrays.")
  14448. (license license:expat)))
  14449. (define-public python-dna-features-viewer
  14450. (package
  14451. (name "python-dna-features-viewer")
  14452. (version "3.0.3")
  14453. (source
  14454. (origin
  14455. (method url-fetch)
  14456. (uri (pypi-uri "dna_features_viewer" version))
  14457. (sha256
  14458. (base32
  14459. "0vci6kg2id6r6rh3cifq7ccnh7j0mb8iqg3hji6rva0ayrdqzafc"))))
  14460. (build-system python-build-system)
  14461. (arguments '(#:tests? #false)) ; there are none
  14462. (propagated-inputs
  14463. `(("python-biopython" ,python-biopython)
  14464. ("python-matplotlib" ,python-matplotlib)))
  14465. (home-page
  14466. "https://github.com/Edinburgh-Genome-Foundry/DnaFeaturesViewer")
  14467. (synopsis "Plot features from DNA sequences")
  14468. (description
  14469. "DNA Features Viewer is a Python library to visualize DNA features,
  14470. e.g. from GenBank or Gff files, or Biopython SeqRecords.")
  14471. (license license:expat)))
  14472. (define-public python-coolbox
  14473. (package
  14474. (name "python-coolbox")
  14475. (version "0.3.8")
  14476. (source
  14477. (origin
  14478. (method url-fetch)
  14479. (uri (pypi-uri "coolbox" version))
  14480. (sha256
  14481. (base32
  14482. "0gqp76285w9klswr47y6kxbzwhv033b26jfa179kccfhiaq5p2xa"))))
  14483. (build-system python-build-system)
  14484. (arguments '(#:tests? #false)) ; there are none
  14485. (inputs
  14486. `(("pybind11" ,pybind11)))
  14487. (propagated-inputs
  14488. `(("python-cooler" ,python-cooler)
  14489. ("python-dna-features-viewer" ,python-dna-features-viewer)
  14490. ("python-fire" ,python-fire)
  14491. ("python-h5py" ,python-h5py)
  14492. ("python-intervaltree" ,python-intervaltree)
  14493. ("python-ipywidgets" ,python-ipywidgets)
  14494. ("jupyter" ,jupyter)
  14495. ("python-matplotlib" ,python-matplotlib)
  14496. ("python-nbformat" ,python-nbformat)
  14497. ("python-numpy" ,python-numpy)
  14498. ("python-numpydoc" ,python-numpydoc)
  14499. ("python-pandas" ,python-pandas)
  14500. ("python-pybbi" ,python-pybbi)
  14501. ("python-pytest" ,python-pytest)
  14502. ("python-scipy" ,python-scipy)
  14503. ("python-statsmodels" ,python-statsmodels)
  14504. ("python-strawc" ,python-strawc)
  14505. ("python-svgutils" ,python-svgutils)
  14506. ("python-termcolor" ,python-termcolor)
  14507. ("python-voila" ,python-voila)))
  14508. (home-page "https://github.com/GangCaoLab/CoolBox")
  14509. (synopsis "Genomic data visualization toolkit")
  14510. (description
  14511. "CoolBox is a toolkit for visual analysis of genomics data. It aims to
  14512. be highly compatible with the Python ecosystem, easy to use and highly
  14513. customizable with a well-designed user interface. It can be used in various
  14514. visualization situations, for example, to produce high-quality genome track
  14515. plots or fetch common used genomic data files with a Python script or command
  14516. line, interactively explore genomic data within Jupyter environment or web
  14517. browser.")
  14518. (license license:gpl3+)))
  14519. (define-public scregseg
  14520. (package
  14521. (name "scregseg")
  14522. (version "0.1.1")
  14523. (source (origin
  14524. (method git-fetch)
  14525. (uri (git-reference
  14526. (url "https://github.com/BIMSBbioinfo/scregseg")
  14527. (commit (string-append "v" version))))
  14528. (file-name (git-file-name name version))
  14529. (sha256
  14530. (base32
  14531. "1k8hllr5if6k2mm2zj391fv40sfc008cjm04l9vgfsdppb80i112"))))
  14532. (build-system python-build-system)
  14533. (arguments
  14534. `(#:tests? #false ; tests require network access
  14535. #:phases
  14536. (modify-phases %standard-phases
  14537. (add-after 'unpack 'do-not-fail-to-find-sklearn
  14538. (lambda _
  14539. ;; XXX: I have no idea why it cannot seem to find sklearn.
  14540. (substitute* "setup.py"
  14541. (("'sklearn',") "")))))))
  14542. (native-inputs
  14543. `(("python-cython" ,python-cython)))
  14544. (propagated-inputs
  14545. `(("python-scikit-learn" ,python-scikit-learn)
  14546. ("python-scipy" ,python-scipy)
  14547. ("python-numpy" ,python-numpy)
  14548. ("python-hmmlearn" ,python-hmmlearn)
  14549. ("python-pandas" ,python-pandas)
  14550. ("python-numba" ,python-numba)
  14551. ("python-anndata" ,python-anndata)
  14552. ("python-scanpy" ,python-scanpy)
  14553. ("python-pybedtools" ,python-pybedtools)
  14554. ("python-pysam" ,python-pysam)
  14555. ("python-matplotlib" ,python-matplotlib)
  14556. ("python-seaborn" ,python-seaborn)
  14557. ("python-coolbox" ,python-coolbox)))
  14558. (home-page "https://github.com/BIMSBbioinfo/scregseg")
  14559. (synopsis "Single-cell regulatory landscape segmentation")
  14560. (description "Scregseg (Single-Cell REGulatory landscape SEGmentation) is a
  14561. tool that facilitates the analysis of single cell ATAC-seq data by an
  14562. HMM-based segmentation algorithm. Scregseg uses an HMM with
  14563. Dirichlet-Multinomial emission probabilities to segment the genome either
  14564. according to distinct relative cross-cell accessibility profiles or (after
  14565. collapsing the single-cell tracks to pseudo-bulk tracks) to capture distinct
  14566. cross-cluster accessibility profiles.")
  14567. (license license:gpl3+)))
  14568. (define-public megadepth
  14569. (package
  14570. (name "megadepth")
  14571. (version "1.1.1")
  14572. (source (origin
  14573. (method git-fetch)
  14574. (uri (git-reference
  14575. (url "https://github.com/ChristopherWilks/megadepth")
  14576. (commit version)))
  14577. (file-name (git-file-name name version))
  14578. (sha256
  14579. (base32
  14580. "0hj69d2dgmk2zwgazik7xzc04fxxlk93p888kpgc52fmhd95qph7"))))
  14581. (build-system cmake-build-system)
  14582. (arguments
  14583. `(#:tests? #false ; some tests seem to require connection to
  14584. ; www.ebi.ac.uk; this may be caused by htslib.
  14585. #:phases
  14586. (modify-phases %standard-phases
  14587. (add-after 'unpack 'prepare-CMakeLists.txt
  14588. (lambda _
  14589. (rename-file "CMakeLists.txt.ci" "CMakeLists.txt")
  14590. (substitute* "CMakeLists.txt"
  14591. (("`cat ../VERSION`") ,version)
  14592. (("target_link_libraries\\(megadepth_static") "#")
  14593. (("target_link_libraries\\(megadepth_statlib") "#")
  14594. (("add_executable\\(megadepth_static") "#")
  14595. (("add_executable\\(megadepth_statlib") "#"))
  14596. (substitute* "tests/test.sh"
  14597. ;; Disable remote test
  14598. (("./megadepth http://stingray.cs.jhu.edu/data/temp/test.bam") "#")
  14599. ;; Prior to installation the binary's name differs from what
  14600. ;; the test script assumes.
  14601. (("./megadepth") "../build/megadepth_dynamic"))))
  14602. (replace 'check
  14603. (lambda* (#:key tests? #:allow-other-keys)
  14604. (when tests?
  14605. (with-directory-excursion "../source"
  14606. (invoke "bash" "tests/test.sh" "use-local-test-data")))))
  14607. (replace 'install
  14608. (lambda* (#:key outputs #:allow-other-keys)
  14609. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  14610. (mkdir-p bin)
  14611. (copy-file "megadepth_dynamic"
  14612. (string-append bin "/megadepth"))))))))
  14613. (native-inputs
  14614. `(("diffutils" ,diffutils)
  14615. ("perl" ,perl)
  14616. ("grep" ,grep)))
  14617. (inputs
  14618. `(("curl" ,curl)
  14619. ("htslib" ,htslib)
  14620. ("libdeflate" ,libdeflate)
  14621. ("libbigwig" ,libbigwig)
  14622. ("zlib" ,zlib)))
  14623. (home-page "https://github.com/ChristopherWilks/megadepth")
  14624. (synopsis "BigWig and BAM/CRAM related utilities")
  14625. (description "Megadepth is an efficient tool for extracting coverage
  14626. related information from RNA and DNA-seq BAM and BigWig files. It supports
  14627. reading whole-genome coverage from BAM files and writing either indexed TSV or
  14628. BigWig files, as well as efficient region coverage summary over intervals from
  14629. both types of files.")
  14630. (license license:expat)))
  14631. (define-public r-ascat
  14632. (package
  14633. (name "r-ascat")
  14634. (version "2.5.2")
  14635. (source (origin
  14636. (method git-fetch)
  14637. (uri (git-reference
  14638. (url "https://github.com/Crick-CancerGenomics/ascat.git")
  14639. (commit (string-append "v" version))))
  14640. (file-name (git-file-name name version))
  14641. (sha256
  14642. (base32
  14643. "0cc0y3as6cb64iwnx0pgbajiig7m4z723mns9d5i4j09ccid3ccm"))))
  14644. (build-system r-build-system)
  14645. (arguments
  14646. `(#:phases
  14647. (modify-phases %standard-phases
  14648. (add-after 'unpack 'move-to-ascat-dir
  14649. (lambda _
  14650. (chdir "ASCAT"))))))
  14651. (propagated-inputs
  14652. `(("r-rcolorbrewer" ,r-rcolorbrewer)))
  14653. (home-page "https://github.com/VanLoo-lab/ascat/")
  14654. (synopsis "Allele-Specific Copy Number Analysis of Tumors in R")
  14655. (description "This package provides the @acronym{ASCAT,Allele-Specific Copy
  14656. Number Analysis of Tumors} R package that can be used to infer tumour purity,
  14657. ploidy and allele-specific copy number profiles.")
  14658. (license license:gpl3)))
  14659. (define-public r-battenberg
  14660. (package
  14661. (name "r-battenberg")
  14662. (version "2.2.9")
  14663. (source (origin
  14664. (method git-fetch)
  14665. (uri (git-reference
  14666. (url "https://github.com/Wedge-lab/battenberg.git")
  14667. (commit (string-append "v" version))))
  14668. (file-name (git-file-name name version))
  14669. (sha256
  14670. (base32
  14671. "0nmcq4c7y5g8h8lxsq9vadz9bj4qgqn118alip520ny6czaxki4h"))))
  14672. (build-system r-build-system)
  14673. (propagated-inputs
  14674. `(("r-devtools" ,r-devtools)
  14675. ("r-readr" ,r-readr)
  14676. ("r-doparallel" ,r-doparallel)
  14677. ("r-ggplot2" ,r-ggplot2)
  14678. ("r-rcolorbrewer" ,r-rcolorbrewer)
  14679. ("r-gridextra" ,r-gridextra)
  14680. ("r-gtools" ,r-gtools)
  14681. ("r-ascat" ,r-ascat)))
  14682. (home-page "https://github.com/Wedge-lab/battenberg")
  14683. (synopsis "Subclonal copy number estimation in R")
  14684. (description "This package contains the Battenberg R package for subclonal
  14685. copy number estimation, as described by
  14686. @url{doi:10.1016/j.cell.2012.04.023,Nik-Zainal et al.}")
  14687. (license license:gpl3)))
  14688. (define-public r-catch
  14689. (let ((commit "196ddd5a51b1a5f5daa01de53fdaad9b7505e084")
  14690. (revision "1"))
  14691. (package
  14692. (name "r-catch")
  14693. (version (git-version "1.0" revision commit))
  14694. (source (origin
  14695. (method git-fetch)
  14696. (uri (git-reference
  14697. (url "https://github.com/zhanyinx/CaTCH")
  14698. (commit commit)))
  14699. (file-name (git-file-name name version))
  14700. (sha256
  14701. (base32
  14702. "11c7f1fc8f57wnwk1hrgr5y814m80zj8gkz5021vxyxy2v02cqgd"))))
  14703. (build-system r-build-system)
  14704. (arguments
  14705. `(#:phases
  14706. (modify-phases %standard-phases
  14707. (add-after 'unpack 'chdir
  14708. (lambda _ (chdir "CaTCH"))))))
  14709. (home-page "https://github.com/zhanyinx/CaTCH_R")
  14710. (synopsis "Call a hierarchy of domains based on Hi-C data")
  14711. (description "This package allows building the hierarchy of domains
  14712. starting from Hi-C data. Each hierarchical level is identified by a minimum
  14713. value of physical insulation between neighboring domains.")
  14714. (license license:gpl2+))))
  14715. (define-public r-spectre
  14716. (let ((commit "f6648ab3eb9499300d86502b5d60ec370ae9b61a")
  14717. (revision "1"))
  14718. (package
  14719. (name "r-spectre")
  14720. (version (git-version "0.5.5" revision commit))
  14721. (source
  14722. (origin
  14723. (method git-fetch)
  14724. (uri (git-reference
  14725. (url "https://github.com/ImmuneDynamics/Spectre")
  14726. (commit commit)))
  14727. (file-name (git-file-name name version))
  14728. (sha256
  14729. (base32
  14730. "0g38grrhbqqa4bmcilvdyawbkcnax6k4vffx2giywp18mbirmj0x"))))
  14731. (properties `((upstream-name . "Spectre")))
  14732. (build-system r-build-system)
  14733. (propagated-inputs
  14734. `(("r-biobase" ,r-biobase)
  14735. ("r-biocmanager" ,r-biocmanager)
  14736. ("r-caret" ,r-caret)
  14737. ("r-class" ,r-class)
  14738. ("r-colorramps" ,r-colorramps)
  14739. ("r-data-table" ,r-data-table)
  14740. ("r-devtools" ,r-devtools)
  14741. ("r-dplyr" ,r-dplyr)
  14742. ("r-exactextractr" ,r-exactextractr)
  14743. ("r-factoextra" ,r-factoextra)
  14744. ("r-flowcore" ,r-flowcore)
  14745. ("r-flowsom" ,r-flowsom)
  14746. ("r-flowviz" ,r-flowviz)
  14747. ("r-fnn" ,r-fnn)
  14748. ("r-ggplot2" ,r-ggplot2)
  14749. ("r-ggpointdensity" ,r-ggpointdensity)
  14750. ("r-ggpubr" ,r-ggpubr)
  14751. ("r-ggraph" ,r-ggraph)
  14752. ("r-ggthemes" ,r-ggthemes)
  14753. ("r-gridextra" ,r-gridextra)
  14754. ("r-gridextra" ,r-gridextra)
  14755. ("r-gtools" ,r-gtools)
  14756. ("r-hdf5array" ,r-hdf5array)
  14757. ("r-irlba" ,r-irlba)
  14758. ("r-pheatmap" ,r-pheatmap)
  14759. ("r-plyr" ,r-plyr)
  14760. ("r-qs" ,r-qs)
  14761. ("r-raster" ,r-raster)
  14762. ("r-rcolorbrewer" ,r-rcolorbrewer)
  14763. ("r-rgeos" ,r-rgeos)
  14764. ("r-rhdf5" ,r-rhdf5)
  14765. ("r-rstudioapi" ,r-rstudioapi)
  14766. ("r-rsvd" ,r-rsvd)
  14767. ("r-rtsne" ,r-rtsne)
  14768. ("r-s2" ,r-s2)
  14769. ("r-scales" ,r-scales)
  14770. ("r-sf" ,r-sf)
  14771. ("r-sp" ,r-sp)
  14772. ("r-stars" ,r-stars)
  14773. ("r-stringr" ,r-stringr)
  14774. ("r-tidygraph" ,r-tidygraph)
  14775. ("r-tidyr" ,r-tidyr)
  14776. ("r-tidyr" ,r-tidyr)
  14777. ("r-tiff" ,r-tiff)
  14778. ("r-umap" ,r-umap)))
  14779. (home-page "https://github.com/ImmuneDynamics/Spectre")
  14780. (synopsis "High-dimensional cytometry and imaging analysis")
  14781. (description
  14782. "This package provides a computational toolkit in R for the
  14783. integration, exploration, and analysis of high-dimensional single-cell
  14784. cytometry and imaging data.")
  14785. (license license:expat))))
  14786. (define-public r-cytonorm
  14787. (let ((commit "e4b9d343ee65db3c422800f1db3e77c25abde987")
  14788. (revision "1"))
  14789. (package
  14790. (name "r-cytonorm")
  14791. (version (git-version "0.0.7" revision commit))
  14792. (source
  14793. (origin
  14794. (method git-fetch)
  14795. (uri (git-reference
  14796. (url "https://github.com/saeyslab/CytoNorm")
  14797. (commit commit)))
  14798. (file-name (git-file-name name version))
  14799. (sha256
  14800. (base32
  14801. "0h2rdy15i4zymd4dv60n5w0frbsdbmzpv99dgm0l2dn041qv7fah"))))
  14802. (properties `((upstream-name . "CytoNorm")))
  14803. (build-system r-build-system)
  14804. (propagated-inputs
  14805. `(("r-cytoml" ,r-cytoml)
  14806. ("r-dplyr" ,r-dplyr)
  14807. ("r-emdist" ,r-emdist)
  14808. ("r-flowcore" ,r-flowcore)
  14809. ("r-flowsom" ,r-flowsom)
  14810. ("r-flowworkspace" ,r-flowworkspace)
  14811. ("r-ggplot2" ,r-ggplot2)
  14812. ("r-gridextra" ,r-gridextra)
  14813. ("r-pheatmap" ,r-pheatmap)
  14814. ("r-stringr" ,r-stringr)))
  14815. (home-page "https://github.com/saeyslab/CytoNorm")
  14816. (synopsis "Normalize cytometry data measured across multiple batches")
  14817. (description
  14818. "This package can be used to normalize cytometry samples when a control
  14819. sample is taken along in each of the batches. This is done by first
  14820. identifying multiple clusters/cell types, learning the batch effects from the
  14821. control samples and applying quantile normalization on all markers of
  14822. interest.")
  14823. (license license:gpl2+))))