SaschaMester
/
Searx
镜像自地址 https://github.com/asciimoo/searx


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
							"""
Tineye - Reverse search images
"""

from urllib.parse import urlencode

from datetime import datetime
from flask_babel import gettext

from searx import logger

about = {
    "website": "https://tineye.com",
    "wikidata_id": "Q2382535",
    "use_official_api": False,
    "require_api_key": False,
    "results": "JSON",
}


categories = ['images']
paging = True
safesearch = False


base_url = 'https://tineye.com'
search_string = '/result_json/?page={page}&{query}'

logger = logger.getChild('tineye')

FORMAT_NOT_SUPPORTED = gettext(
    "Could not read that image url. This may be due to an unsupported file"
    " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
)
"""TinEye error message"""

NO_SIGNATURE_ERROR = gettext(
    "The image is too simple to find matches. TinEye requires a basic level of"
    " visual detail to successfully identify matches."
)
"""TinEye error message"""

DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
"""TinEye error message"""


def request(query, params):
    params['url'] = base_url +\
        search_string.format(
            query=urlencode({'url': query}),
            page=params['pageno'])

    params['headers'].update({
        'Connection': 'keep-alive',
        'Accept-Encoding': 'gzip, defalte, br',
        'Host': 'tineye.com',
        'DNT': '1',
        'TE': 'trailers',
    })

    query = urlencode({'url': query})

    # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
    params['url'] = base_url + search_string.format(query=query, page=params['pageno'])

    return params


def parse_tineye_match(match_json):
    """Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
    object.

    Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__

    - `image_url`, link to the result image.
    - `domain`, domain this result was found on.
    - `score`, a number (0 to 100) that indicates how closely the images match.
    - `width`, image width in pixels.
    - `height`, image height in pixels.
    - `size`, image area in pixels.
    - `format`, image format.
    - `filesize`, image size in bytes.
    - `overlay`, overlay URL.
    - `tags`, whether this match belongs to a collection or stock domain.

    - `backlinks`, a list of Backlink objects pointing to the original websites
      and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
      <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):

      - `url`, the image URL to the image.
      - `backlink`, the original website URL.
      - `crawl_date`, the date the image was crawled.

    """

    # HINT: there exists an alternative backlink dict in the domains list / e.g.::
    #
    #     match_json['domains'][0]['backlinks']

    backlinks = []
    if "backlinks" in match_json:

        for backlink_json in match_json["backlinks"]:
            if not isinstance(backlink_json, dict):
                continue

            crawl_date = backlink_json.get("crawl_date")
            if crawl_date:
                crawl_date = datetime.fromisoformat(crawl_date[:-3])
            else:
                crawl_date = datetime.min

            backlinks.append({
                'url': backlink_json.get("url"),
                'backlink': backlink_json.get("backlink"),
                'crawl_date': crawl_date,
                'image_name': backlink_json.get("image_name")}
            )

    return {
        'image_url': match_json.get("image_url"),
        'domain': match_json.get("domain"),
        'score': match_json.get("score"),
        'width': match_json.get("width"),
        'height': match_json.get("height"),
        'size': match_json.get("size"),
        'image_format': match_json.get("format"),
        'filesize': match_json.get("filesize"),
        'overlay': match_json.get("overlay"),
        'tags': match_json.get("tags"),
        'backlinks': backlinks,
    }


def response(resp):
    """Parse HTTP response from TinEye."""
    results = []

    try:
        json_data = resp.json()
    except Exception as exc:  # pylint: disable=broad-except
        msg = "can't parse JSON response // %s" % exc
        logger.error(msg)
        json_data = {'error': msg}

    # handle error codes from Tineye

    if resp.is_error:
        if resp.status_code in (400, 422):

            message = 'HTTP status: %s' % resp.status_code
            error = json_data.get('error')
            s_key = json_data.get('suggestions', {}).get('key', '')

            if error and s_key:
                message = "%s (%s)" % (error, s_key)
            elif error:
                message = error

            if s_key == "Invalid image URL":
                # test https://docs.searxng.org/_static/searxng-wordmark.svg
                message = FORMAT_NOT_SUPPORTED
            elif s_key == 'NO_SIGNATURE_ERROR':
                # test https://pngimg.com/uploads/dot/dot_PNG4.png
                message = NO_SIGNATURE_ERROR
            elif s_key == 'Download Error':
                # test https://notexists
                message = DOWNLOAD_ERROR

            logger.error(message)

            return results

        resp.raise_for_status()

    # append results from matches
    for match_json in json_data['matches']:

        tineye_match = parse_tineye_match(match_json)
        if not tineye_match['backlinks']:
            continue

        backlink = tineye_match['backlinks'][0]
        results.append(
            {
                'template': 'images.html',
                'url': backlink['backlink'],
                'thumbnail_src': tineye_match['image_url'],
                'source': backlink['url'],
                'title': backlink['image_name'],
                'img_src': backlink['url'],
                'format': tineye_match['image_format'],
                'widht': tineye_match['width'],
                'height': tineye_match['height'],
                'publishedDate': backlink['crawl_date'],
            }
        )

    # append number of results
    number_of_results = json_data.get('num_matches')
    if number_of_results:
        results.append({'number_of_results': number_of_results})

    return results