semantic_scholar.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Semantic Scholar (Science)
  4. """
  5. from json import dumps, loads
  6. from datetime import datetime
  7. about = {
  8. "website": 'https://www.semanticscholar.org/',
  9. "wikidata_id": 'Q22908627',
  10. "official_api_documentation": 'https://api.semanticscholar.org/',
  11. "use_official_api": True,
  12. "require_api_key": False,
  13. "results": 'JSON',
  14. }
  15. paging = True
  16. search_url = 'https://www.semanticscholar.org/api/1/search'
  17. paper_url = 'https://www.semanticscholar.org/paper'
  18. def request(query, params):
  19. params['url'] = search_url
  20. params['method'] = 'POST'
  21. params['headers']['content-type'] = 'application/json'
  22. params['data'] = dumps({
  23. "queryString": query,
  24. "page": params['pageno'],
  25. "pageSize": 10,
  26. "sort": "relevance",
  27. "useFallbackRankerService": False,
  28. "useFallbackSearchCluster": False,
  29. "getQuerySuggestions": False,
  30. "authors": [],
  31. "coAuthors": [],
  32. "venues": [],
  33. "performTitleMatch": True,
  34. })
  35. return params
  36. def response(resp):
  37. res = loads(resp.text)
  38. results = []
  39. for result in res['results']:
  40. item = {}
  41. metadata = []
  42. url = result.get('primaryPaperLink', {}).get('url')
  43. if not url and result.get('links'):
  44. url = result.get('links')[0]
  45. if not url:
  46. alternatePaperLinks = result.get('alternatePaperLinks')
  47. if alternatePaperLinks:
  48. url = alternatePaperLinks[0].get('url')
  49. if not url:
  50. url = paper_url + '/%s' % result['id']
  51. item['url'] = url
  52. item['title'] = result['title']['text']
  53. item['content'] = result['paperAbstract']['text']
  54. metadata = result.get('fieldsOfStudy') or []
  55. venue = result.get('venue', {}).get('text')
  56. if venue:
  57. metadata.append(venue)
  58. if metadata:
  59. item['metadata'] = ', '.join(metadata)
  60. pubDate = result.get('pubDate')
  61. if pubDate:
  62. item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
  63. results.append(item)
  64. return results