wikicommons.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Wikimedia Commons (images)
  3. """
  4. import datetime
  5. from urllib.parse import urlencode
  6. from searx.utils import html_to_text, humanize_bytes
  7. # about
  8. about = {
  9. "website": 'https://commons.wikimedia.org/',
  10. "wikidata_id": 'Q565',
  11. "official_api_documentation": 'https://commons.wikimedia.org/w/api.php',
  12. "use_official_api": True,
  13. "require_api_key": False,
  14. "results": 'JSON',
  15. }
  16. categories = ['images']
  17. search_type = 'images'
  18. base_url = "https://commons.wikimedia.org"
  19. search_prefix = (
  20. '?action=query'
  21. '&format=json'
  22. '&generator=search'
  23. '&gsrnamespace=6'
  24. '&gsrprop=snippet'
  25. '&prop=info|imageinfo'
  26. '&iiprop=url|size|mime'
  27. '&iiurlheight=180' # needed for the thumb url
  28. )
  29. paging = True
  30. number_of_results = 10
  31. search_types = {
  32. 'images': 'bitmap|drawing',
  33. 'videos': 'video',
  34. 'audio': 'audio',
  35. 'files': 'multimedia|office|archive|3d',
  36. }
  37. def request(query, params):
  38. language = 'en'
  39. if params['language'] != 'all':
  40. language = params['language'].split('-')[0]
  41. if search_type not in search_types:
  42. raise ValueError(f"Unsupported search type: {search_type}")
  43. filetype = search_types[search_type]
  44. args = {
  45. 'uselang': language,
  46. 'gsrlimit': number_of_results,
  47. 'gsroffset': number_of_results * (params["pageno"] - 1),
  48. 'gsrsearch': f"filetype:{filetype} {query}",
  49. }
  50. params["url"] = f"{base_url}/w/api.php{search_prefix}&{urlencode(args, safe=':|')}"
  51. return params
  52. def response(resp):
  53. results = []
  54. json = resp.json()
  55. if not json.get("query", {}).get("pages"):
  56. return results
  57. for item in json["query"]["pages"].values():
  58. imageinfo = item["imageinfo"][0]
  59. title = item["title"].replace("File:", "").rsplit('.', 1)[0]
  60. result = {
  61. 'url': imageinfo["descriptionurl"],
  62. 'title': title,
  63. 'content': html_to_text(item["snippet"]),
  64. }
  65. if search_type == "images":
  66. result['template'] = 'images.html'
  67. result['img_src'] = imageinfo["url"]
  68. result['thumbnail_src'] = imageinfo["thumburl"]
  69. result['resolution'] = f'{imageinfo["width"]} x {imageinfo["height"]}'
  70. else:
  71. result['thumbnail'] = imageinfo["thumburl"]
  72. if search_type == "videos":
  73. result['template'] = 'videos.html'
  74. if imageinfo.get('duration'):
  75. result['length'] = datetime.timedelta(seconds=int(imageinfo['duration']))
  76. result['iframe_src'] = imageinfo['url']
  77. elif search_type == "files":
  78. result['template'] = 'files.html'
  79. result['metadata'] = imageinfo['mime']
  80. result['size'] = humanize_bytes(imageinfo['size'])
  81. elif search_type == "audio":
  82. result['iframe_src'] = imageinfo['url']
  83. results.append(result)
  84. return results