123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """Semantic Scholar (Science)
- """
- from json import dumps, loads
- from datetime import datetime
- from flask_babel import gettext
- about = {
- "website": 'https://www.semanticscholar.org/',
- "wikidata_id": 'Q22908627',
- "official_api_documentation": 'https://api.semanticscholar.org/',
- "use_official_api": True,
- "require_api_key": False,
- "results": 'JSON',
- }
- categories = ['science', 'scientific publications']
- paging = True
- search_url = 'https://www.semanticscholar.org/api/1/search'
- paper_url = 'https://www.semanticscholar.org/paper'
- def request(query, params):
- params['url'] = search_url
- params['method'] = 'POST'
- params['headers']['content-type'] = 'application/json'
- params['data'] = dumps(
- {
- "queryString": query,
- "page": params['pageno'],
- "pageSize": 10,
- "sort": "relevance",
- "getQuerySuggestions": False,
- "authors": [],
- "coAuthors": [],
- "venues": [],
- "performTitleMatch": True,
- }
- )
- return params
- def response(resp):
- res = loads(resp.text)
- results = []
- for result in res['results']:
- url = result.get('primaryPaperLink', {}).get('url')
- if not url and result.get('links'):
- url = result.get('links')[0]
- if not url:
- alternatePaperLinks = result.get('alternatePaperLinks')
- if alternatePaperLinks:
- url = alternatePaperLinks[0].get('url')
- if not url:
- url = paper_url + '/%s' % result['id']
- # publishedDate
- if 'pubDate' in result:
- publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
- else:
- publishedDate = None
- # authors
- authors = [author[0]['name'] for author in result.get('authors', [])]
- # pick for the first alternate link, but not from the crawler
- pdf_url = None
- for doc in result.get('alternatePaperLinks', []):
- if doc['linkType'] not in ('crawler', 'doi'):
- pdf_url = doc['url']
- break
- # comments
- comments = None
- if 'citationStats' in result:
- comments = gettext(
- '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
- ).format(
- numCitations=result['citationStats']['numCitations'],
- firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
- lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
- )
- results.append(
- {
- 'template': 'paper.html',
- 'url': url,
- 'title': result['title']['text'],
- 'content': result['paperAbstract']['text'],
- 'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
- 'doi': result.get('doiInfo', {}).get('doi'),
- 'tags': result.get('fieldsOfStudy'),
- 'authors': authors,
- 'pdf_url': pdf_url,
- 'publishedDate': publishedDate,
- 'comments': comments,
- }
- )
- return results
|