123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """PubMed (Scholar publications)
- """
- from datetime import datetime
- from urllib.parse import urlencode
- from lxml import etree
- from searx.network import get
- from searx.utils import (
- eval_xpath_getindex,
- eval_xpath_list,
- extract_text,
- )
- # about
- about = {
- "website": 'https://www.ncbi.nlm.nih.gov/pubmed/',
- "wikidata_id": 'Q1540899',
- "official_api_documentation": {
- 'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
- 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/',
- },
- "use_official_api": True,
- "require_api_key": False,
- "results": 'XML',
- }
- categories = ['science', 'scientific publications']
- base_url = (
- 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
- )
- # engine dependent config
- number_of_results = 10
- pubmed_url = 'https://www.ncbi.nlm.nih.gov/pubmed/'
- def request(query, params):
- # basic search
- offset = (params['pageno'] - 1) * number_of_results
- string_args = {
- 'query': urlencode({'term': query}),
- 'offset': offset,
- 'hits': number_of_results,
- }
- params['url'] = base_url.format(**string_args)
- return params
- def response(resp): # pylint: disable=too-many-locals
- results = []
- # First retrieve notice of each result
- pubmed_retrieve_api_url = (
- 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + 'db=pubmed&retmode=xml&id={pmids_string}'
- )
- pmids_results = etree.XML(resp.content)
- pmids = pmids_results.xpath('//eSearchResult/IdList/Id')
- pmids_string = ''
- for item in pmids:
- pmids_string += item.text + ','
- retrieve_notice_args = {'pmids_string': pmids_string}
- retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
- search_results_response = get(retrieve_url_encoded).content
- search_results = etree.XML(search_results_response)
- for entry in eval_xpath_list(search_results, '//PubmedArticle'):
- medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
- title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
- pmid = eval_xpath_getindex(medline, './/PMID', 0).text
- url = pubmed_url + pmid
- content = extract_text(
- eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
- )
- doi = extract_text(
- eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
- )
- journal = extract_text(
- eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
- )
- issn = extract_text(
- eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
- )
- authors = []
- for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
- f = eval_xpath_getindex(author, './ForeName', 0, default=None)
- l = eval_xpath_getindex(author, './LastName', 0, default=None)
- f = '' if f is None else f.text
- l = '' if l is None else l.text
- authors.append((f + ' ' + l).strip())
- res_dict = {
- 'template': 'paper.html',
- 'url': url,
- 'title': title,
- 'content': content or "",
- 'journal': journal,
- 'issn': [issn],
- 'authors': authors,
- 'doi': doi,
- }
- accepted_date = eval_xpath_getindex(
- entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
- )
- if accepted_date is not None:
- year = eval_xpath_getindex(accepted_date, './Year', 0)
- month = eval_xpath_getindex(accepted_date, './Month', 0)
- day = eval_xpath_getindex(accepted_date, './Day', 0)
- try:
- publishedDate = datetime.strptime(
- year.text + '-' + month.text + '-' + day.text,
- '%Y-%m-%d',
- )
- res_dict['publishedDate'] = publishedDate
- except Exception as e: # pylint: disable=broad-exception-caught
- print(e)
- results.append(res_dict)
- return results
|