flickr_noapi.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Flickr (Images)
  4. """
  5. from json import loads
  6. from time import time
  7. import re
  8. from urllib.parse import urlencode
  9. from searx.engines import logger
  10. from searx.utils import ecma_unescape, html_to_text
  11. logger = logger.getChild('flickr-noapi')
  12. # about
  13. about = {
  14. "website": 'https://www.flickr.com',
  15. "wikidata_id": 'Q103204',
  16. "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
  17. "use_official_api": False,
  18. "require_api_key": False,
  19. "results": 'HTML',
  20. }
  21. categories = ['images']
  22. url = 'https://www.flickr.com/'
  23. search_url = url + 'search?{query}&page={page}'
  24. time_range_url = '&min_upload_date={start}&max_upload_date={end}'
  25. photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
  26. modelexport_re = re.compile(r"^\s*modelExport:\s*({.*}),$", re.M)
  27. image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
  28. paging = True
  29. time_range_support = True
  30. time_range_dict = {'day': 60 * 60 * 24,
  31. 'week': 60 * 60 * 24 * 7,
  32. 'month': 60 * 60 * 24 * 7 * 4,
  33. 'year': 60 * 60 * 24 * 7 * 52}
  34. def build_flickr_url(user_id, photo_id):
  35. return photo_url.format(userid=user_id, photoid=photo_id)
  36. def _get_time_range_url(time_range):
  37. if time_range in time_range_dict:
  38. return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
  39. return ''
  40. def request(query, params):
  41. params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
  42. + _get_time_range_url(params['time_range']))
  43. return params
  44. def response(resp):
  45. results = []
  46. matches = modelexport_re.search(resp.text)
  47. if matches is None:
  48. return results
  49. match = matches.group(1)
  50. model_export = loads(match)
  51. if 'legend' not in model_export:
  52. return results
  53. legend = model_export['legend']
  54. # handle empty page
  55. if not legend or not legend[0]:
  56. return results
  57. for index in legend:
  58. photo = model_export['main'][index[0]][int(index[1])][index[2]][index[3]][int(index[4])]
  59. author = ecma_unescape(photo.get('realname', ''))
  60. source = ecma_unescape(photo.get('username', '')) + ' @ Flickr'
  61. title = ecma_unescape(photo.get('title', ''))
  62. content = html_to_text(ecma_unescape(photo.get('description', '')))
  63. img_src = None
  64. # From the biggest to the lowest format
  65. for image_size in image_sizes:
  66. if image_size in photo['sizes']:
  67. img_src = photo['sizes'][image_size]['url']
  68. img_format = 'jpg ' \
  69. + str(photo['sizes'][image_size]['width']) \
  70. + 'x' \
  71. + str(photo['sizes'][image_size]['height'])
  72. break
  73. if not img_src:
  74. logger.debug('cannot find valid image size: {0}'.format(repr(photo)))
  75. continue
  76. # For a bigger thumbnail, keep only the url_z, not the url_n
  77. if 'n' in photo['sizes']:
  78. thumbnail_src = photo['sizes']['n']['url']
  79. elif 'z' in photo['sizes']:
  80. thumbnail_src = photo['sizes']['z']['url']
  81. else:
  82. thumbnail_src = img_src
  83. if 'ownerNsid' not in photo:
  84. # should not happen, disowned photo? Show it anyway
  85. url = img_src
  86. else:
  87. url = build_flickr_url(photo['ownerNsid'], photo['id'])
  88. result = {
  89. 'url': url,
  90. 'img_src': img_src,
  91. 'thumbnail_src': thumbnail_src,
  92. 'source': source,
  93. 'img_format': img_format,
  94. 'template': 'images.html'
  95. }
  96. result['author'] = author.encode(errors='ignore').decode()
  97. result['source'] = source.encode(errors='ignore').decode()
  98. result['title'] = title.encode(errors='ignore').decode()
  99. result['content'] = content.encode(errors='ignore').decode()
  100. results.append(result)
  101. return results