123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640 |
- ########################################################################
- # Searx-Qt - Lightweight desktop application for Searx.
- # Copyright (C) 2020-2022 CYBERDEViL
- #
- # This file is part of Searx-Qt.
- #
- # Searx-Qt is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Searx-Qt is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- ########################################################################
- import time
- import urllib.parse
- from copy import deepcopy
- from searxqt.core.requests import JsonResult, ErrorType
- from searxqt.core import jsonVerify
- from searxqt.core.handler import HandlerProto, NetworkTypes
- from searxqt.translations import _
- # Values used for the expected json structure verification.
- v_str = jsonVerify.Value(str)
- v_int = jsonVerify.Value(int)
- v_float = jsonVerify.Value(float)
- v_bool = jsonVerify.Value(bool)
- v_intFloat = jsonVerify.MultiValue((int, float))
- v_noneStr = jsonVerify.MultiValue((jsonVerify.NoneType, str))
- v_noneStrInt = jsonVerify.MultiValue((jsonVerify.NoneType, str, int))
- v_intStr = jsonVerify.MultiValue((int, str))
- v_intFloat = jsonVerify.MultiValue((int, float))
- v_intFloatNone = jsonVerify.MultiValue((int, float, jsonVerify.NoneType))
- v_strFloat = jsonVerify.MultiValue((str, float))
- v_ignore = jsonVerify.IgnoreValue()
- class SearchResult(JsonResult):
- ExpectedStructure = {
- "query": v_str,
- "number_of_results": v_intFloat,
- "results": [{
- "url": v_str,
- "title": v_str,
- "engine": v_str,
- "parsed_url": [
- v_str,
- v_str,
- v_str,
- v_str,
- v_str,
- v_str
- ],
- "engines": [v_str],
- "positions": [v_int],
- "content": v_str,
- "score": v_float,
- "category": v_str,
- "pretty_url": v_str,
- "is_onion": False,
- "publishedDate": v_str,
- "pubdate": v_str,
- "thumbnail_src": v_str,
- "template": v_str,
- "author": v_noneStr,
- "source": v_str,
- "img_format": v_str,
- "img_src": v_noneStr,
- # Files
- "seed": v_intStr,
- "leech": v_intStr,
- "magnetlink": v_str,
- "torrentfile": v_str,
- "filesize": v_intFloatNone,
- "infohash": v_str,
- "files": v_noneStrInt,
- "link": v_str,
- # Videos
- "thumbnail": v_str,
- "embedded": v_str,
- "length": v_str,
- # IT
- "code_language": v_str,
- "codelines": [[v_int, v_str]],
- "repository": v_str,
- # Location
- "longitude": v_strFloat,
- "latitude": v_strFloat,
- "boundingbox": [v_strFloat],
- # Ignore; to many types/structures. TODO
- "geojson": v_ignore,
- # Ignore; to many types/structures. TODO
- "address": v_ignore,
- # Ignore; to many types/structures. TODO
- "osm": v_ignore,
- }],
- "answers": [v_str],
- "corrections": [v_str],
- "infoboxes": [{
- "infobox": v_str,
- "id": v_str,
- "content": v_str, # TODO some instances return empty list..
- "img_src": v_noneStr,
- "attributes": [
- {
- "label": v_str,
- "value": v_str,
- "entity": v_str
- }
- ],
- "urls": [
- {
- "title": v_str,
- "url": v_str,
- "official": v_bool,
- "entity": v_str
- }
- ],
- "relatedTopics": v_ignore, #[v_strDict],
- "engine": v_str,
- "engines": [v_str]
- }],
- "suggestions": [v_str],
- "unresponsive_engines": [[v_str, v_str]]
- }
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- JsonResult.__init__(self, url, response, err=err, errType=errType)
- def verifyFurther(self):
- JsonResult.verifyFurther(self)
- # One of the following keys have to be not empty.
- validKeys = [
- 'results',
- 'answers',
- 'corrections',
- 'infoboxes',
- 'suggestions'
- ]
- if self._errType == ErrorType.Success:
- data = self.json()
- valid = False
- for key in validKeys:
- if len(data.get(key, [])):
- valid = True
- break
- if not valid:
- self._errType = ErrorType.NoResults
- self._err = "NoResults: got: `{0}`".format(self.json())
- class SearxConfigResult(JsonResult):
- ExpectedStructure = {
- "autocomplete": v_str,
- "brand": {
- "CONTACT_URL": v_str,
- "DOCS_URL": v_str,
- "GIT_URL": v_str,
- "GIT_BRANCH": v_str
- },
- "categories": [v_str],
- "default_doi_resolver": v_str,
- "default_locale": v_str,
- "default_theme": v_str,
- "doi_resolvers": [v_str],
- "engines": [
- {
- "categories": [v_str],
- "enabled": v_bool,
- "language_support": v_bool,
- "name": v_str,
- "paging": v_bool,
- "safesearch": v_bool,
- "shortcut": v_str,
- "supported_languages": [v_str],
- "time_range_support": v_bool,
- "timeout": v_intFloat
- }
- ],
- "instance_name": v_str,
- "locales": {
- "": v_str
- },
- "plugins": [
- {
- "enabled": v_bool,
- "name": v_str
- }
- ],
- "safe_search": v_int,
- "version": v_str
- }
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- JsonResult.__init__(self, url, response, err=err, errType=errType)
- class Categories:
- types = {
- 'general': (_('General'), 'category_general'),
- 'files': (_('Files'), 'category_files'),
- 'images': (_('Images'), 'category_images'),
- 'videos': (_('Videos'), 'category_videos'),
- 'it': (_('IT'), 'category_it'),
- 'map': (_('Location'), 'category_map'),
- 'music': (_('Music'), 'category_music'),
- 'news': (_('News'), 'category_news'),
- 'science': (_('Science'), 'category_science'),
- 'social media': (_('Social'), 'category_social media'),
- 'onions': (_('Onions'), 'category_onions'),
- 'shopping': (_('Shopping'), 'category_shopping')
- }
- def __init__(self):
- self._options = {}
- self.__makeOptions()
- def __makeOptions(self):
- self._options.clear()
- for key, t in self.types.items():
- self._options.update({key: False})
- def reset(self):
- self.__makeOptions()
- def get(self, key):
- return self._options[key]
- def set(self, key, state):
- """
- @param key: One of the keys in Categories.types
- @type key: str
- @param state: Enabled / disabled state
- @type state: bool
- """
- self._options[key] = state
- def dict(self):
- newDict = {}
- for key, state in self._options.items():
- if state:
- newDict.update({self.types[key][1]: 'on'})
- return newDict
- def enabledKeys(self):
- """ Returns a list with enabled engine strings (key from
- Categories.types)
- """
- return [key for key, state in self._options.items() if state]
- class Engines(list):
- def __init__(self):
- list.__init__(self)
- def dict(self):
- if not self:
- return {}
- return {
- 'engines': ",".join(self)
- }
- class SearX:
- Periods = {
- '': _('Anytime'),
- 'day': _('Last day'),
- 'week': _('Last week'),
- 'month': _('Last month'),
- 'year': _('Last year')
- }
- # https://github.com/asciimoo/searx/blob/master/searx/languages.py
- Languages = {
- '': _('No language'),
- 'all': _('Default language'),
- 'af-NA': 'Afrikaans - af-NA',
- 'ca-AD': 'Català - ca-AD',
- 'da-DK': 'Dansk - da-DK',
- 'de': 'Deutsch - de',
- 'de-AT': 'Deutsch (Österreich) - de-AT',
- 'de-CH': 'Deutsch (Schweiz) - de-CH',
- 'de-DE': 'Deutsch (Deutschland) - de-DE',
- 'et-EE': 'Eesti - et-EE',
- 'en': 'English - en',
- 'en-AU': 'English (Australia) - en-AU',
- 'en-CA': 'English (Canada) - en-CA',
- 'en-GB': 'English (United Kingdom) - en-GB',
- 'en-IE': 'English (Ireland) - en-IE',
- 'en-IN': 'English (India) - en-IN',
- 'en-NZ': 'English (New Zealand) - en-NZ',
- 'en-PH': 'English (Philippines) - en-PH',
- 'en-SG': 'English (Singapore) - en-SG',
- 'en-US': 'English (United States) - en-US',
- 'es': 'Español - es',
- 'es-AR': 'Español (Argentina) - es-AR',
- 'es-CL': 'Español (Chile) - es-CL',
- 'es-ES': 'Español (España) - es-ES',
- 'es-MX': 'Español (México) - es-MX',
- 'fr': 'Français - fr',
- 'fr-BE': 'Français (Belgique) - fr-BE',
- 'fr-CA': 'Français (Canada) - fr-CA',
- 'fr-CH': 'Français (Suisse) - fr-CH',
- 'fr-FR': 'Français (France) - fr-FR',
- 'hr-HR': 'Hrvatski - hr-HR',
- 'id-ID': 'Indonesia - id-ID',
- 'it-IT': 'Italiano - it-IT',
- 'sw-KE': 'Kiswahili - sw-KE',
- 'lv-LV': 'Latviešu - lv-LV',
- 'lt-LT': 'Lietuvių - lt-LT',
- 'hu-HU': 'Magyar - hu-HU',
- 'ms-MY': 'Melayu - ms-MY',
- 'nl': 'Nederlands - nl',
- 'nl-BE': 'Nederlands (België) - nl-BE',
- 'nl-NL': 'Nederlands (Nederland) - nl-NL',
- 'nb-NO': 'Norsk Bokmål - nb-NO',
- 'pl-PL': 'Polski - pl-PL',
- 'pt': 'Português - pt',
- 'pt-BR': 'Português (Brasil) - pt-BR',
- 'pt-PT': 'Português (Portugal) - pt-PT',
- 'ro-RO': 'Română - ro-RO',
- 'sk-SK': 'Slovenčina - sk-SK',
- 'sl-SI': 'Slovenščina - sl-SI',
- 'sr-RS': 'Srpski - sr-RS',
- 'fi-FI': 'Suomi - fi-FI',
- 'sv-SE': 'Svenska - sv-SE',
- 'vi-VN': 'Tiếng Việt - vi-VN',
- 'tr-TR': 'Türkçe - tr-TR',
- 'is-IS': 'Íslenska - is-IS',
- 'cs-CZ': 'Čeština - cs-CZ',
- 'el-GR': 'Ελληνικά - el-GR',
- 'be-BY': 'Беларуская - be-BY',
- 'bg-BG': 'Български - bg-BG',
- 'ru-RU': 'Русский - ru-RU',
- 'uk-UA': 'Українська - uk-UA',
- 'hy-AM': 'Հայերեն - hy-AM',
- 'he-IL': 'עברית - he-IL',
- 'ar-SA': 'العربية - ar-SA',
- 'fa-IR': 'فارسی - fa-IR',
- 'th-TH': 'ไทย - th-TH',
- 'zh': '中文 - zh',
- 'zh-CN': '中文 (中国) - zh-CN',
- 'zh-TW': '中文 (台灣) - zh-TW',
- 'ja-JP': '日本語 - ja-JP',
- 'ko-KR': '한국어 - ko-KR'
- }
- def __init__(self, requestHandler):
- self._requestHandler = requestHandler
- self._url = ""
- self._categories = Categories()
- self._engines = Engines()
- self._lang = ''
- self._pageno = '' # int formatted as string
- self._timeRange = '' # '', 'day', 'week', 'month' or 'year'
- self._kwargs = {
- 'data': {
- 'q': '',
- 'format': 'json'
- },
- }
- @property
- def categories(self): return self._categories
- @property
- def engines(self): return self._engines
- @property
- def url(self):
- """
- @return: Instance url
- @rtype: str
- """
- return self._url
- @url.setter
- def url(self, url):
- """
- @param url: Instance url
- @type url: str
- """
- self._url = url
- @property
- def query(self):
- """
- @return: Search query
- @rtype: str
- """
- return self._kwargs['data']['q']
- @query.setter
- def query(self, q):
- """
- @param q: Search query
- @type q: str
- """
- self._kwargs['data']['q'] = q
- @property
- def lang(self):
- """
- @return: Language code
- @rtype: str
- """
- return self._lang
- @lang.setter
- def lang(self, lang):
- """
- @param lang: Language code
- @type lang: str
- """
- self._lang = lang
- @property
- def pageno(self):
- """
- @return: Page number
- @rtype: int
- """
- return int(self._pageno)
- @pageno.setter
- def pageno(self, i):
- """
- @param i: Page number
- @type i: int
- """
- self._pageno = str(i)
- @property
- def timeRange(self):
- """
- @return: Search time range ('', 'day', 'week', 'month' or 'year')
- @rtype: str
- """
- return self._timeRange
- @timeRange.setter
- def timeRange(self, value):
- """
- @param value: Key from SearX.Periods
- @type value: str
- """
- self._timeRange = value
- @property
- def requestKwargs(self):
- """ Returns current data that will be send with the POST
- request used for the search operation. The search query,
- language, page-number and enabled categories/engines.
- @rtype: dict
- """
- kwargs = deepcopy(self._kwargs['data'])
- # After testing found that searx will honor only engines when
- # both engines and categories are set.
- if self.engines:
- kwargs.update(self.engines.dict())
- elif self.categories:
- kwargs.update(self.categories.dict())
- if self.lang:
- kwargs.update({'lang': self.lang})
- if self.pageno:
- kwargs.update({'pageno': self.pageno})
- if self.timeRange:
- kwargs.update({'timeRange': self.timeRange})
- return kwargs
- def reset(self):
- self.url = ""
- self.timeRange = ""
- self.lang = ""
- self.pageno = 1
- self.categories.reset()
- self.engines.clear()
- def search(self):
- """ Preform search operation with current set values.
- @returns: The result of this search.
- @rtype: SearchResult
- """
- return self._requestHandler.get(
- self.url,
- data=self.requestKwargs,
- ResultType=SearchResult
- )
- class SearxConfigHandler(HandlerProto):
- def __init__(self, requestsHandler):
- HandlerProto.__init__(self, requestsHandler)
- def updateInstance(self, url):
- newUrl = urllib.parse.urljoin(url, "/config")
- result = self.requestsHandler.get(newUrl, ResultType=SearxConfigResult)
- if result:
- instance = self.instances[url]
- j = result.json()
- """ Update instance version
- """
- instance.update({
- "version": j.get("version", "")
- })
- """ Update instance network_type to use our own network type
- definitions as class NetworkTypes (core/handler.py)
- """
- instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
- """ Update Engines
- What we get:
- "engines": [
- categories (list, str)
- enabled (bool)
- language_support (bool)
- name (str)
- paging (bool)
- safesearch (bool)
- shortcut (str)
- supported_languages (list, str)
- time_range_support (bool)
- timeout (float)
- ]
- What instanceModel wants
- "engines" : {
- "not evil": {
- "error_rate": 15,
- "errors": [
- 0
- ]
- }
- }
- What enginesModel wants
- "engines": {
- "1337x": {
- "categories": [
- "videos"
- ],
- "language_support": true,
- "paging": true,
- "safesearch": false,
- "shortcut": "1337x",
- "time_range_support": false
- },
- """
- newInstanceEngines = {}
- newEnginesEngines = {}
- for engine in j.get('engines', []):
- name = engine.get('name', "")
- if not name:
- continue
- newInstanceEngines.update({
- name: {}
- })
- if name not in self.engines:
- newEnginesEngines.update({
- name: {
- "categories": list(engine.get('categories', [])),
- "language_support": engine.get(
- 'language_support',
- False
- ),
- "paging": engine.get('paging', False),
- "safesearch": engine.get('safesearch', False),
- "shortcut": engine.get('shortcut', ""),
- "time_range_support": engine.get(
- 'time_range_support',
- False
- )
- }
- })
- instance.update({
- "engines": dict(newInstanceEngines)
- })
- self.engines.update(newEnginesEngines)
- """ Update instance lastUpdated
- """
- instance.update({
- "lastUpdated": time.time()
- })
- return True
- return False
- def addInstance(self, url):
- if url not in self.instances:
- self._instances[url] = {}
- return True
- return False
- def removeInstance(self, url):
- """
- @param url: url of the instance to remove.
- @type url: str
- """
- del self._instances[url]
- def removeMultiInstances(self, urls):
- """ Remove instance(s) by url without emitting changed for every
- instance that got removed.
- @param urls: list with urls of instances to remove.
- @type urls: list
- """
- for url in urls:
- del self._instances[url]
|