loc.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Library of Congress: query Photo, Print and Drawing from API endpoint_
  3. ``photos``.
  4. .. _endpoint: https://www.loc.gov/apis/json-and-yaml/requests/endpoints/
  5. .. note::
  6. Beside the ``photos`` endpoint_ there are more endpoints available / we are
  7. looking forward for contributions implementing more endpoints.
  8. """
  9. from urllib.parse import urlencode
  10. from searx.network import raise_for_httperror
  11. about = {
  12. "website": 'https://www.loc.gov/pictures/',
  13. "wikidata_id": 'Q131454',
  14. "official_api_documentation": 'https://www.loc.gov/api',
  15. "use_official_api": True,
  16. "require_api_key": False,
  17. "results": 'JSON',
  18. }
  19. categories = ['images']
  20. paging = True
  21. endpoint = 'photos'
  22. base_url = 'https://www.loc.gov'
  23. search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
  24. def request(query, params):
  25. search_path = search_string.format(
  26. endpoint=endpoint,
  27. query=urlencode({'q': query}),
  28. page=params['pageno'],
  29. )
  30. params['url'] = base_url + search_path
  31. params['raise_for_httperror'] = False
  32. return params
  33. def response(resp):
  34. results = []
  35. json_data = resp.json()
  36. json_results = json_data.get('results')
  37. if not json_results:
  38. # when a search term has none results, loc sends a JSON in a HTTP 404
  39. # response and the HTTP status code is set in the 'status' element.
  40. if json_data.get('status') == 404:
  41. return results
  42. raise_for_httperror(resp)
  43. for result in json_results:
  44. url = result["item"].get("link")
  45. if not url:
  46. continue
  47. img_list = result.get('image_url')
  48. if not img_list:
  49. continue
  50. title = result['title']
  51. if title.startswith('['):
  52. title = title.strip('[]')
  53. content_items = [
  54. result['item'].get('created_published_date'),
  55. result['item'].get('summary', [None])[0],
  56. result['item'].get('notes', [None])[0],
  57. result['item'].get('part_of', [None])[0],
  58. ]
  59. author = None
  60. if result['item'].get('creators'):
  61. author = result['item']['creators'][0]['title']
  62. results.append(
  63. {
  64. 'template': 'images.html',
  65. 'url': url,
  66. 'title': title,
  67. 'content': ' / '.join([i for i in content_items if i]),
  68. 'img_src': img_list[-1],
  69. 'thumbnail_src': img_list[0],
  70. 'author': author,
  71. }
  72. )
  73. return results