pypi.scm 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2014 David Thompson <davet@gnu.org>
  3. ;;; Copyright © 2015 Cyril Roelandt <tipecaml@gmail.com>
  4. ;;; Copyright © 2015, 2016, 2017, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
  5. ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
  6. ;;; Copyright © 2018 Ricardo Wurmus <rekado@elephly.net>
  7. ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
  8. ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
  9. ;;; Copyright © 2020 Lars-Dominik Braun <ldb@leibniz-psychology.org>
  10. ;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
  11. ;;; Copyright © 2020 Martin Becze <mjbecze@riseup.net>
  12. ;;; Copyright © 2021 Xinglu Chen <public@yoctocell.xyz>
  13. ;;; Copyright © 2021 Marius Bakke <marius@gnu.org>
  14. ;;;
  15. ;;; This file is part of GNU Guix.
  16. ;;;
  17. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  18. ;;; under the terms of the GNU General Public License as published by
  19. ;;; the Free Software Foundation; either version 3 of the License, or (at
  20. ;;; your option) any later version.
  21. ;;;
  22. ;;; GNU Guix is distributed in the hope that it will be useful, but
  23. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  24. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. ;;; GNU General Public License for more details.
  26. ;;;
  27. ;;; You should have received a copy of the GNU General Public License
  28. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  29. (define-module (guix import pypi)
  30. #:use-module (ice-9 match)
  31. #:use-module (ice-9 regex)
  32. #:use-module (ice-9 receive)
  33. #:use-module ((ice-9 rdelim) #:select (read-line))
  34. #:use-module (srfi srfi-1)
  35. #:use-module (srfi srfi-11)
  36. #:use-module (srfi srfi-26)
  37. #:use-module (srfi srfi-34)
  38. #:use-module (srfi srfi-35)
  39. #:use-module (guix utils)
  40. #:use-module (guix memoization)
  41. #:use-module (guix diagnostics)
  42. #:use-module (guix i18n)
  43. #:use-module ((guix build utils)
  44. #:select ((package-name->name+version
  45. . hyphen-package-name->name+version)
  46. find-files
  47. invoke))
  48. #:use-module (guix import utils)
  49. #:use-module ((guix download) #:prefix download:)
  50. #:use-module (guix import json)
  51. #:use-module (json)
  52. #:use-module (guix packages)
  53. #:use-module (guix upstream)
  54. #:use-module ((guix licenses) #:prefix license:)
  55. #:use-module (guix build-system python)
  56. #:export (parse-requires.txt
  57. parse-wheel-metadata
  58. specification->requirement-name
  59. guix-package->pypi-name
  60. pypi-recursive-import
  61. pypi->guix-package
  62. %pypi-updater))
  63. ;; The PyPI API (notice the rhyme) is "documented" at:
  64. ;; <https://warehouse.readthedocs.io/api-reference/json/>.
  65. (define non-empty-string-or-false
  66. (match-lambda
  67. ("" #f)
  68. ((? string? str) str)
  69. ((or 'null #f) #f)))
  70. ;; PyPI project.
  71. (define-json-mapping <pypi-project> make-pypi-project pypi-project?
  72. json->pypi-project
  73. (info pypi-project-info "info" json->project-info) ;<project-info>
  74. (last-serial pypi-project-last-serial "last_serial") ;integer
  75. (releases pypi-project-releases "releases" ;string/<distribution>* pairs
  76. (match-lambda
  77. (((versions . dictionaries) ...)
  78. (map (lambda (version vector)
  79. (cons version
  80. (map json->distribution
  81. (vector->list vector))))
  82. versions dictionaries))))
  83. (distributions pypi-project-distributions "urls" ;<distribution>*
  84. (lambda (vector)
  85. (map json->distribution (vector->list vector)))))
  86. ;; Project metadata.
  87. (define-json-mapping <project-info> make-project-info project-info?
  88. json->project-info
  89. (name project-info-name) ;string
  90. (author project-info-author) ;string
  91. (maintainer project-info-maintainer) ;string
  92. (classifiers project-info-classifiers ;list of strings
  93. "classifiers" vector->list)
  94. (description project-info-description) ;string
  95. (summary project-info-summary) ;string
  96. (keywords project-info-keywords) ;string
  97. (license project-info-license) ;string
  98. (download-url project-info-download-url ;string | #f
  99. "download_url" non-empty-string-or-false)
  100. (home-page project-info-home-page ;string
  101. "home_page")
  102. (url project-info-url "project_url") ;string
  103. (release-url project-info-release-url "release_url") ;string
  104. (version project-info-version)) ;string
  105. ;; Distribution: a URL along with cryptographic hashes and metadata.
  106. (define-json-mapping <distribution> make-distribution distribution?
  107. json->distribution
  108. (url distribution-url) ;string
  109. (digests distribution-digests) ;list of string pairs
  110. (file-name distribution-file-name "filename") ;string
  111. (has-signature? distribution-has-signature? "has_sig") ;Boolean
  112. (package-type distribution-package-type "packagetype") ;"bdist_wheel" | ...
  113. (python-version distribution-package-python-version
  114. "python_version"))
  115. (define (pypi-fetch name)
  116. "Return a <pypi-project> record for package NAME, or #f on failure."
  117. (and=> (json-fetch (string-append "https://pypi.org/pypi/" name "/json"))
  118. json->pypi-project))
  119. ;; For packages found on PyPI that lack a source distribution.
  120. (define-condition-type &missing-source-error &error
  121. missing-source-error?
  122. (package missing-source-error-package))
  123. (define (latest-version project)
  124. "Return the latest version of PROJECT, a <pypi-project> record."
  125. (project-info-version (pypi-project-info project)))
  126. (define* (source-release pypi-package
  127. #:optional (version (latest-version pypi-package)))
  128. "Return the source release of VERSION for PYPI-PACKAGE, a <pypi-project>
  129. record, by default the latest version."
  130. (let ((releases (or (assoc-ref (pypi-project-releases pypi-package) version)
  131. '())))
  132. (or (find (lambda (release)
  133. (string=? "sdist" (distribution-package-type release)))
  134. releases)
  135. (raise (condition (&missing-source-error
  136. (package pypi-package)))))))
  137. (define* (wheel-release pypi-package
  138. #:optional (version (latest-version pypi-package)))
  139. "Return the url of the wheel for the latest release of pypi-package,
  140. or #f if there isn't any."
  141. (let ((releases (assoc-ref (pypi-project-releases pypi-package) version)))
  142. (find (lambda (release)
  143. (string=? "bdist_wheel" (distribution-package-type release)))
  144. releases)))
  145. (define (python->package-name name)
  146. "Given the NAME of a package on PyPI, return a Guix-compliant name for the
  147. package."
  148. (if (string-prefix? "python-" name)
  149. (snake-case name)
  150. (string-append "python-" (snake-case name))))
  151. (define (guix-package->pypi-name package)
  152. "Given a Python PACKAGE built from pypi.org, return the name of the
  153. package on PyPI."
  154. (define (url->pypi-name url)
  155. (hyphen-package-name->name+version
  156. (basename (file-sans-extension url))))
  157. (or (assoc-ref (package-properties package) 'upstream-name)
  158. (match (and=> (package-source package) origin-uri)
  159. ((? string? url)
  160. (url->pypi-name url))
  161. ((lst ...)
  162. (any url->pypi-name lst))
  163. (#f #f))))
  164. (define (wheel-url->extracted-directory wheel-url)
  165. (match (string-split (basename wheel-url) #\-)
  166. ((name version _ ...)
  167. (string-append name "-" version ".dist-info"))))
  168. (define (maybe-inputs package-inputs input-type)
  169. "Given a list of PACKAGE-INPUTS, tries to generate the 'inputs' field of a
  170. package definition. INPUT-TYPE, a symbol, is used to populate the name of
  171. the input field."
  172. (match package-inputs
  173. (()
  174. '())
  175. ((package-inputs ...)
  176. `((,input-type (list ,@package-inputs))))))
  177. (define %requirement-name-regexp
  178. ;; Regexp to match the requirement name in a requirement specification.
  179. ;; Some grammar, taken from PEP-0508 (see:
  180. ;; https://www.python.org/dev/peps/pep-0508/).
  181. ;; Using this grammar makes the PEP-0508 regexp easier to understand for
  182. ;; humans. The use of a regexp is preferred to more primitive string
  183. ;; manipulations because we can more directly match what upstream uses
  184. ;; (again, per PEP-0508). The regexp approach is also easier to extend,
  185. ;; should we want to implement more completely the grammar of PEP-0508.
  186. ;; The unified rule can be expressed as:
  187. ;; specification = wsp* ( url_req | name_req ) wsp*
  188. ;; where url_req is:
  189. ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker?
  190. ;; and where name_req is:
  191. ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker?
  192. ;; Thus, we need only matching NAME, which is expressed as:
  193. ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
  194. ;; identifier = letterOrDigit identifier_end*
  195. ;; name = identifier
  196. (let* ((letter-or-digit "[A-Za-z0-9]")
  197. (identifier-end (string-append "(" letter-or-digit "|"
  198. "[-_.]*" letter-or-digit ")"))
  199. (identifier (string-append "^" letter-or-digit identifier-end "*"))
  200. (name identifier))
  201. (make-regexp name)))
  202. (define (specification->requirement-name spec)
  203. "Given a specification SPEC, return the requirement name."
  204. (match:substring
  205. (or (regexp-exec %requirement-name-regexp spec)
  206. (error (G_ "Could not extract requirement name in spec:") spec))))
  207. (define (test-section? name)
  208. "Return #t if the section name contains 'test' or 'dev'."
  209. (any (cut string-contains-ci name <>)
  210. '("test" "dev")))
  211. (define (parse-requires.txt requires.txt)
  212. "Given REQUIRES.TXT, a path to a Setuptools requires.txt file, return a list
  213. of lists of requirements.
  214. The first list contains the required dependencies while the second the
  215. optional test dependencies. Note that currently, optional, non-test
  216. dependencies are omitted since these can be difficult or expensive to
  217. satisfy."
  218. (define (comment? line)
  219. ;; Return #t if the given LINE is a comment, #f otherwise.
  220. (string-prefix? "#" (string-trim line)))
  221. (define (section-header? line)
  222. ;; Return #t if the given LINE is a section header, #f otherwise.
  223. (string-prefix? "[" (string-trim line)))
  224. (call-with-input-file requires.txt
  225. (lambda (port)
  226. (let loop ((required-deps '())
  227. (test-deps '())
  228. (inside-test-section? #f)
  229. (optional? #f))
  230. (let ((line (read-line port)))
  231. (cond
  232. ((eof-object? line)
  233. ;; Duplicates can occur, since the same requirement can be
  234. ;; listed multiple times with different conditional markers, e.g.
  235. ;; pytest >= 3 ; python_version >= "3.3"
  236. ;; pytest < 3 ; python_version < "3.3"
  237. (map (compose reverse delete-duplicates)
  238. (list required-deps test-deps)))
  239. ((or (string-null? line) (comment? line))
  240. (loop required-deps test-deps inside-test-section? optional?))
  241. ((section-header? line)
  242. ;; Encountering a section means that all the requirements
  243. ;; listed below are optional. Since we want to pick only the
  244. ;; test dependencies from the optional dependencies, we must
  245. ;; track those separately.
  246. (loop required-deps test-deps (test-section? line) #t))
  247. (inside-test-section?
  248. (loop required-deps
  249. (cons (specification->requirement-name line)
  250. test-deps)
  251. inside-test-section? optional?))
  252. ((not optional?)
  253. (loop (cons (specification->requirement-name line)
  254. required-deps)
  255. test-deps inside-test-section? optional?))
  256. (optional?
  257. ;; Skip optional items.
  258. (loop required-deps test-deps inside-test-section? optional?))
  259. (else
  260. (warning (G_ "parse-requires.txt reached an unexpected \
  261. condition on line ~a~%") line))))))))
  262. (define (parse-wheel-metadata metadata)
  263. "Given METADATA, a Wheel metadata file, return a list of lists of
  264. requirements.
  265. Refer to the documentation of PARSE-REQUIRES.TXT for a description of the
  266. returned value."
  267. ;; METADATA is a RFC-2822-like, header based file.
  268. (define (requires-dist-header? line)
  269. ;; Return #t if the given LINE is a Requires-Dist header.
  270. (string-match "^Requires-Dist: " line))
  271. (define (requires-dist-value line)
  272. (string-drop line (string-length "Requires-Dist: ")))
  273. (define (extra? line)
  274. ;; Return #t if the given LINE is an "extra" requirement.
  275. (string-match "extra == '(.*)'" line))
  276. (define (test-requirement? line)
  277. (and=> (match:substring (extra? line) 1) test-section?))
  278. (call-with-input-file metadata
  279. (lambda (port)
  280. (let loop ((required-deps '())
  281. (test-deps '()))
  282. (let ((line (read-line port)))
  283. (cond
  284. ((eof-object? line)
  285. (map (compose reverse delete-duplicates)
  286. (list required-deps test-deps)))
  287. ((and (requires-dist-header? line) (not (extra? line)))
  288. (loop (cons (specification->requirement-name
  289. (requires-dist-value line))
  290. required-deps)
  291. test-deps))
  292. ((and (requires-dist-header? line) (test-requirement? line))
  293. (loop required-deps
  294. (cons (specification->requirement-name (requires-dist-value line))
  295. test-deps)))
  296. (else
  297. (loop required-deps test-deps)))))))) ;skip line
  298. (define (guess-requirements source-url wheel-url archive)
  299. "Given SOURCE-URL, WHEEL-URL and an ARCHIVE of the package, return a list
  300. of the required packages specified in the requirements.txt file. ARCHIVE will
  301. be extracted in a temporary directory."
  302. (define (read-wheel-metadata wheel-archive)
  303. ;; Given WHEEL-ARCHIVE, a ZIP Python wheel archive, return the package's
  304. ;; requirements, or #f if the metadata file contained therein couldn't be
  305. ;; extracted.
  306. (let* ((dirname (wheel-url->extracted-directory wheel-url))
  307. (metadata (string-append dirname "/METADATA")))
  308. (call-with-temporary-directory
  309. (lambda (dir)
  310. (if (zero?
  311. (parameterize ((current-error-port (%make-void-port "rw+"))
  312. (current-output-port (%make-void-port "rw+")))
  313. (system* "unzip" wheel-archive "-d" dir metadata)))
  314. (parse-wheel-metadata (string-append dir "/" metadata))
  315. (begin
  316. (warning
  317. (G_ "Failed to extract file: ~a from wheel.~%") metadata)
  318. #f))))))
  319. (define (guess-requirements-from-wheel)
  320. ;; Return the package's requirements using the wheel, or #f if an error
  321. ;; occurs.
  322. (call-with-temporary-output-file
  323. (lambda (temp port)
  324. (if wheel-url
  325. (and (url-fetch wheel-url temp)
  326. (read-wheel-metadata temp))
  327. #f))))
  328. (define (guess-requirements-from-source)
  329. ;; Return the package's requirements by guessing them from the source.
  330. (if (compressed-file? source-url)
  331. (call-with-temporary-directory
  332. (lambda (dir)
  333. (parameterize ((current-error-port (%make-void-port "rw+"))
  334. (current-output-port (%make-void-port "rw+")))
  335. (if (string=? "zip" (file-extension source-url))
  336. (invoke "unzip" archive "-d" dir)
  337. (invoke "tar" "xf" archive "-C" dir)))
  338. (let ((requires.txt-files
  339. (find-files dir (lambda (abs-file-name _)
  340. (string-match "\\.egg-info/requires.txt$"
  341. abs-file-name)))))
  342. (match requires.txt-files
  343. (()
  344. (warning (G_ "Cannot guess requirements from source archive:\
  345. no requires.txt file found.~%"))
  346. (list '() '()))
  347. (else (parse-requires.txt (first requires.txt-files)))))))
  348. (begin
  349. (warning (G_ "Unsupported archive format; \
  350. cannot determine package dependencies from source archive: ~a~%")
  351. (basename source-url))
  352. (list '() '()))))
  353. ;; First, try to compute the requirements using the wheel, else, fallback to
  354. ;; reading the "requires.txt" from the egg-info directory from the source
  355. ;; archive.
  356. (or (guess-requirements-from-wheel)
  357. (guess-requirements-from-source)))
  358. (define (compute-inputs source-url wheel-url archive)
  359. "Given the SOURCE-URL and WHEEL-URL of an already downloaded ARCHIVE, return
  360. a pair of lists, each consisting of a list of name/variable pairs, for the
  361. propagated inputs and the native inputs, respectively. Also
  362. return the unaltered list of upstream dependency names."
  363. (define (strip-argparse deps)
  364. (remove (cut string=? "argparse" <>) deps))
  365. (define (requirement->package-name/sort deps)
  366. (map string->symbol
  367. (sort (map python->package-name deps) string-ci<?)))
  368. (define process-requirements
  369. (compose requirement->package-name/sort strip-argparse))
  370. (let ((dependencies (guess-requirements source-url wheel-url archive)))
  371. (values (map process-requirements dependencies)
  372. (concatenate dependencies))))
  373. (define (make-pypi-sexp name version source-url wheel-url home-page synopsis
  374. description license)
  375. "Return the `package' s-expression for a python package with the given NAME,
  376. VERSION, SOURCE-URL, HOME-PAGE, SYNOPSIS, DESCRIPTION, and LICENSE."
  377. (define (maybe-upstream-name name)
  378. (if (string-match ".*\\-[0-9]+" name)
  379. `((properties ,`'(("upstream-name" . ,name))))
  380. '()))
  381. (call-with-temporary-output-file
  382. (lambda (temp port)
  383. (and (url-fetch source-url temp)
  384. (receive (guix-dependencies upstream-dependencies)
  385. (compute-inputs source-url wheel-url temp)
  386. (match guix-dependencies
  387. ((required-inputs native-inputs)
  388. (when (string-suffix? ".zip" source-url)
  389. (set! native-inputs (cons
  390. '("unzip" ,unzip)
  391. native-inputs)))
  392. (values
  393. `(package
  394. (name ,(python->package-name name))
  395. (version ,version)
  396. (source
  397. (origin
  398. (method url-fetch)
  399. (uri (pypi-uri
  400. ;; PyPI URL are case sensitive, but sometimes
  401. ;; a project named using mixed case has a URL
  402. ;; using lower case, so we must work around this
  403. ;; inconsistency. For actual examples, compare
  404. ;; the URLs of the "Deprecated" and "uWSGI" PyPI
  405. ;; packages.
  406. ,(if (string-contains source-url name)
  407. name
  408. (string-downcase name))
  409. version
  410. ;; Some packages have been released as `.zip`
  411. ;; instead of the more common `.tar.gz`. For
  412. ;; example, see "path-and-address".
  413. ,@(if (string-suffix? ".zip" source-url)
  414. '(".zip")
  415. '())))
  416. (sha256
  417. (base32
  418. ,(guix-hash-url temp)))))
  419. ,@(maybe-upstream-name name)
  420. (build-system python-build-system)
  421. ,@(maybe-inputs required-inputs 'propagated-inputs)
  422. ,@(maybe-inputs native-inputs 'native-inputs)
  423. (home-page ,home-page)
  424. (synopsis ,synopsis)
  425. (description ,(beautify-description description))
  426. (license ,(license->symbol license)))
  427. upstream-dependencies))))))))
  428. (define pypi->guix-package
  429. (memoize
  430. (lambda* (package-name #:key repo version)
  431. "Fetch the metadata for PACKAGE-NAME from pypi.org, and return the
  432. `package' s-expression corresponding to that package, or #f on failure."
  433. (let* ((project (pypi-fetch package-name))
  434. (info (and=> project pypi-project-info))
  435. (version (or version (and=> project latest-version))))
  436. (and project
  437. (guard (c ((missing-source-error? c)
  438. (let ((package (missing-source-error-package c)))
  439. (leave (G_ "no source release for pypi package ~a ~a~%")
  440. (project-info-name info) version))))
  441. (make-pypi-sexp (project-info-name info) version
  442. (and=> (source-release project version)
  443. distribution-url)
  444. (and=> (wheel-release project version)
  445. distribution-url)
  446. (project-info-home-page info)
  447. (project-info-summary info)
  448. (project-info-summary info)
  449. (string->license
  450. (project-info-license info)))))))))
  451. (define* (pypi-recursive-import package-name #:optional version)
  452. (recursive-import package-name
  453. #:version version
  454. #:repo->guix-package pypi->guix-package
  455. #:guix-name python->package-name))
  456. (define (string->license str)
  457. "Convert the string STR into a license object."
  458. (match str
  459. ("GNU LGPL" license:lgpl2.0)
  460. ("GPL" license:gpl3)
  461. ((or "BSD" "BSD-3" "BSD License") license:bsd-3)
  462. ("BSD-2-Clause" license:bsd-2)
  463. ((or "MIT" "MIT license" "MIT License" "Expat license") license:expat)
  464. ("Public domain" license:public-domain)
  465. ((or "Apache License, Version 2.0" "Apache 2.0") license:asl2.0)
  466. ("MPL 2.0" license:mpl2.0)
  467. (_ #f)))
  468. (define pypi-package?
  469. (url-predicate
  470. (lambda (url)
  471. (or (string-prefix? "https://pypi.org/" url)
  472. (string-prefix? "https://pypi.python.org/" url)
  473. (string-prefix? "https://pypi.org/packages" url)
  474. (string-prefix? "https://files.pythonhosted.org/packages" url)))))
  475. (define (latest-release package)
  476. "Return an <upstream-source> for the latest release of PACKAGE."
  477. (let* ((pypi-name (guix-package->pypi-name package))
  478. (pypi-package (pypi-fetch pypi-name)))
  479. (and pypi-package
  480. (guard (c ((missing-source-error? c) #f))
  481. (let* ((info (pypi-project-info pypi-package))
  482. (version (project-info-version info))
  483. (dist (source-release pypi-package))
  484. (url (distribution-url dist)))
  485. (upstream-source
  486. (urls (list url))
  487. (signature-urls
  488. (if (distribution-has-signature? dist)
  489. (list (string-append url ".asc"))
  490. #f))
  491. (input-changes
  492. (changed-inputs package
  493. (pypi->guix-package pypi-name)))
  494. (package (package-name package))
  495. (version version)))))))
  496. (define %pypi-updater
  497. (upstream-updater
  498. (name 'pypi)
  499. (description "Updater for PyPI packages")
  500. (pred pypi-package?)
  501. (latest latest-release)))