semantic_scholar.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Semantic Scholar (Science)
  3. """
  4. from json import dumps, loads
  5. from datetime import datetime
  6. from flask_babel import gettext
  7. about = {
  8. "website": 'https://www.semanticscholar.org/',
  9. "wikidata_id": 'Q22908627',
  10. "official_api_documentation": 'https://api.semanticscholar.org/',
  11. "use_official_api": True,
  12. "require_api_key": False,
  13. "results": 'JSON',
  14. }
  15. categories = ['science', 'scientific publications']
  16. paging = True
  17. search_url = 'https://www.semanticscholar.org/api/1/search'
  18. paper_url = 'https://www.semanticscholar.org/paper'
  19. def request(query, params):
  20. params['url'] = search_url
  21. params['method'] = 'POST'
  22. params['headers']['content-type'] = 'application/json'
  23. params['data'] = dumps(
  24. {
  25. "queryString": query,
  26. "page": params['pageno'],
  27. "pageSize": 10,
  28. "sort": "relevance",
  29. "getQuerySuggestions": False,
  30. "authors": [],
  31. "coAuthors": [],
  32. "venues": [],
  33. "performTitleMatch": True,
  34. }
  35. )
  36. return params
  37. def response(resp):
  38. res = loads(resp.text)
  39. results = []
  40. for result in res['results']:
  41. url = result.get('primaryPaperLink', {}).get('url')
  42. if not url and result.get('links'):
  43. url = result.get('links')[0]
  44. if not url:
  45. alternatePaperLinks = result.get('alternatePaperLinks')
  46. if alternatePaperLinks:
  47. url = alternatePaperLinks[0].get('url')
  48. if not url:
  49. url = paper_url + '/%s' % result['id']
  50. # publishedDate
  51. if 'pubDate' in result:
  52. publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
  53. else:
  54. publishedDate = None
  55. # authors
  56. authors = [author[0]['name'] for author in result.get('authors', [])]
  57. # pick for the first alternate link, but not from the crawler
  58. pdf_url = None
  59. for doc in result.get('alternatePaperLinks', []):
  60. if doc['linkType'] not in ('crawler', 'doi'):
  61. pdf_url = doc['url']
  62. break
  63. # comments
  64. comments = None
  65. if 'citationStats' in result:
  66. comments = gettext(
  67. '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
  68. ).format(
  69. numCitations=result['citationStats']['numCitations'],
  70. firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
  71. lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
  72. )
  73. results.append(
  74. {
  75. 'template': 'paper.html',
  76. 'url': url,
  77. 'title': result['title']['text'],
  78. 'content': result['paperAbstract']['text'],
  79. 'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
  80. 'doi': result.get('doiInfo', {}).get('doi'),
  81. 'tags': result.get('fieldsOfStudy'),
  82. 'authors': authors,
  83. 'pdf_url': pdf_url,
  84. 'publishedDate': publishedDate,
  85. 'comments': comments,
  86. }
  87. )
  88. return results