tineye.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. """
  2. Tineye - Reverse search images
  3. """
  4. from urllib.parse import urlencode
  5. from datetime import datetime
  6. from flask_babel import gettext
  7. from searx import logger
  8. about = {
  9. "website": "https://tineye.com",
  10. "wikidata_id": "Q2382535",
  11. "use_official_api": False,
  12. "require_api_key": False,
  13. "results": "JSON",
  14. }
  15. categories = ['images']
  16. paging = True
  17. safesearch = False
  18. base_url = 'https://tineye.com'
  19. search_string = '/result_json/?page={page}&{query}'
  20. logger = logger.getChild('tineye')
  21. FORMAT_NOT_SUPPORTED = gettext(
  22. "Could not read that image url. This may be due to an unsupported file"
  23. " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
  24. )
  25. """TinEye error message"""
  26. NO_SIGNATURE_ERROR = gettext(
  27. "The image is too simple to find matches. TinEye requires a basic level of"
  28. " visual detail to successfully identify matches."
  29. )
  30. """TinEye error message"""
  31. DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
  32. """TinEye error message"""
  33. def request(query, params):
  34. params['url'] = base_url +\
  35. search_string.format(
  36. query=urlencode({'url': query}),
  37. page=params['pageno'])
  38. params['headers'].update({
  39. 'Connection': 'keep-alive',
  40. 'Accept-Encoding': 'gzip, defalte, br',
  41. 'Host': 'tineye.com',
  42. 'DNT': '1',
  43. 'TE': 'trailers',
  44. })
  45. query = urlencode({'url': query})
  46. # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
  47. params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
  48. return params
  49. def parse_tineye_match(match_json):
  50. """Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
  51. object.
  52. Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__
  53. - `image_url`, link to the result image.
  54. - `domain`, domain this result was found on.
  55. - `score`, a number (0 to 100) that indicates how closely the images match.
  56. - `width`, image width in pixels.
  57. - `height`, image height in pixels.
  58. - `size`, image area in pixels.
  59. - `format`, image format.
  60. - `filesize`, image size in bytes.
  61. - `overlay`, overlay URL.
  62. - `tags`, whether this match belongs to a collection or stock domain.
  63. - `backlinks`, a list of Backlink objects pointing to the original websites
  64. and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
  65. <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):
  66. - `url`, the image URL to the image.
  67. - `backlink`, the original website URL.
  68. - `crawl_date`, the date the image was crawled.
  69. """
  70. # HINT: there exists an alternative backlink dict in the domains list / e.g.::
  71. #
  72. # match_json['domains'][0]['backlinks']
  73. backlinks = []
  74. if "backlinks" in match_json:
  75. for backlink_json in match_json["backlinks"]:
  76. if not isinstance(backlink_json, dict):
  77. continue
  78. crawl_date = backlink_json.get("crawl_date")
  79. if crawl_date:
  80. crawl_date = datetime.fromisoformat(crawl_date[:-3])
  81. else:
  82. crawl_date = datetime.min
  83. backlinks.append({
  84. 'url': backlink_json.get("url"),
  85. 'backlink': backlink_json.get("backlink"),
  86. 'crawl_date': crawl_date,
  87. 'image_name': backlink_json.get("image_name")}
  88. )
  89. return {
  90. 'image_url': match_json.get("image_url"),
  91. 'domain': match_json.get("domain"),
  92. 'score': match_json.get("score"),
  93. 'width': match_json.get("width"),
  94. 'height': match_json.get("height"),
  95. 'size': match_json.get("size"),
  96. 'image_format': match_json.get("format"),
  97. 'filesize': match_json.get("filesize"),
  98. 'overlay': match_json.get("overlay"),
  99. 'tags': match_json.get("tags"),
  100. 'backlinks': backlinks,
  101. }
  102. def response(resp):
  103. """Parse HTTP response from TinEye."""
  104. results = []
  105. try:
  106. json_data = resp.json()
  107. except Exception as exc: # pylint: disable=broad-except
  108. msg = "can't parse JSON response // %s" % exc
  109. logger.error(msg)
  110. json_data = {'error': msg}
  111. # handle error codes from Tineye
  112. if resp.is_error:
  113. if resp.status_code in (400, 422):
  114. message = 'HTTP status: %s' % resp.status_code
  115. error = json_data.get('error')
  116. s_key = json_data.get('suggestions', {}).get('key', '')
  117. if error and s_key:
  118. message = "%s (%s)" % (error, s_key)
  119. elif error:
  120. message = error
  121. if s_key == "Invalid image URL":
  122. # test https://docs.searxng.org/_static/searxng-wordmark.svg
  123. message = FORMAT_NOT_SUPPORTED
  124. elif s_key == 'NO_SIGNATURE_ERROR':
  125. # test https://pngimg.com/uploads/dot/dot_PNG4.png
  126. message = NO_SIGNATURE_ERROR
  127. elif s_key == 'Download Error':
  128. # test https://notexists
  129. message = DOWNLOAD_ERROR
  130. logger.error(message)
  131. return results
  132. resp.raise_for_status()
  133. # append results from matches
  134. for match_json in json_data['matches']:
  135. tineye_match = parse_tineye_match(match_json)
  136. if not tineye_match['backlinks']:
  137. continue
  138. backlink = tineye_match['backlinks'][0]
  139. results.append(
  140. {
  141. 'template': 'images.html',
  142. 'url': backlink['backlink'],
  143. 'thumbnail_src': tineye_match['image_url'],
  144. 'source': backlink['url'],
  145. 'title': backlink['image_name'],
  146. 'img_src': backlink['url'],
  147. 'format': tineye_match['image_format'],
  148. 'widht': tineye_match['width'],
  149. 'height': tineye_match['height'],
  150. 'publishedDate': backlink['crawl_date'],
  151. }
  152. )
  153. # append number of results
  154. number_of_results = json_data.get('num_matches')
  155. if number_of_results:
  156. results.append({'number_of_results': number_of_results})
  157. return results