searx.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. ########################################################################
  2. # Searx-qt - Lightweight desktop application for SearX.
  3. # Copyright (C) 2020 CYBERDEViL
  4. #
  5. # This file is part of Searx-qt.
  6. #
  7. # Searx-qt is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # Searx-qt is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. ########################################################################
  21. import time
  22. import urllib.parse
  23. from copy import deepcopy
  24. from searxqt.core.requests import JsonResult, ErrorType
  25. from searxqt.core.handler import HandlerProto, NetworkTypes
  26. from searxqt.translations import _
  27. class SearchResult(JsonResult):
  28. def __init__(self, response=None, err="", errType=None):
  29. JsonResult.__init__(self, response=response, err=err, errType=errType)
  30. def verifyFurther(self):
  31. JsonResult.verifyFurther(self)
  32. if self._errType is None and not len(self.json().get('results', {})):
  33. self._errType = ErrorType.NoResults
  34. self._err = "NoResults: got: `{0}`".format(self.json())
  35. class Categories:
  36. types = {
  37. 'general': (_('General'), 'category_general'),
  38. 'files': (_('Files'), 'category_files'),
  39. 'images': (_('Images'), 'category_images'),
  40. 'videos': (_('Videos'), 'category_videos'),
  41. 'it': (_('IT'), 'category_it'),
  42. 'map': (_('Location'), 'category_map'),
  43. 'music': (_('Music'), 'category_music'),
  44. 'news': (_('News'), 'category_news'),
  45. 'science': (_('Science'), 'category_science'),
  46. 'social media': (_('Social'), 'category_social media'),
  47. 'onions': (_('Onions'), 'category_onions')
  48. }
  49. def __init__(self):
  50. self._options = {}
  51. self.__makeOptions()
  52. def __makeOptions(self):
  53. self._options.clear()
  54. for key, t in self.types.items():
  55. self._options.update({key: False})
  56. def reset(self):
  57. self.__makeOptions()
  58. def get(self, key):
  59. return self._options[key]
  60. def set(self, key, state):
  61. """
  62. @param key: One of the keys in Categories.types
  63. @type key: str
  64. @param state: Enabled / disabled state
  65. @type state: bool
  66. """
  67. self._options[key] = state
  68. def dict(self):
  69. newDict = {}
  70. for key, state in self._options.items():
  71. if state:
  72. newDict.update({self.types[key][1]: 'on'})
  73. return newDict
  74. def enabledKeys(self):
  75. """ Returns a list with enabled engine strings (key from
  76. Categories.types)
  77. """
  78. return [key for key, state in self._options.items() if state]
  79. class Engines(list):
  80. def __init__(self):
  81. list.__init__(self)
  82. def dict(self):
  83. if not self:
  84. return {}
  85. return {
  86. 'engines': ",".join(self)
  87. }
  88. class SearX:
  89. Periods = {
  90. '': _('Anytime'),
  91. 'day': _('Last day'),
  92. 'week': _('Last week'),
  93. 'month': _('Last month'),
  94. 'year': _('Last year')
  95. }
  96. # https://github.com/asciimoo/searx/blob/master/searx/languages.py
  97. Languages = {
  98. '': _('No language'),
  99. 'all': _('Default language'),
  100. 'af-NA': 'Afrikaans - af-NA',
  101. 'ca-AD': 'Català - ca-AD',
  102. 'da-DK': 'Dansk - da-DK',
  103. 'de': 'Deutsch - de',
  104. 'de-AT': 'Deutsch (Österreich) - de-AT',
  105. 'de-CH': 'Deutsch (Schweiz) - de-CH',
  106. 'de-DE': 'Deutsch (Deutschland) - de-DE',
  107. 'et-EE': 'Eesti - et-EE',
  108. 'en': 'English - en',
  109. 'en-AU': 'English (Australia) - en-AU',
  110. 'en-CA': 'English (Canada) - en-CA',
  111. 'en-GB': 'English (United Kingdom) - en-GB',
  112. 'en-IE': 'English (Ireland) - en-IE',
  113. 'en-IN': 'English (India) - en-IN',
  114. 'en-NZ': 'English (New Zealand) - en-NZ',
  115. 'en-PH': 'English (Philippines) - en-PH',
  116. 'en-SG': 'English (Singapore) - en-SG',
  117. 'en-US': 'English (United States) - en-US',
  118. 'es': 'Español - es',
  119. 'es-AR': 'Español (Argentina) - es-AR',
  120. 'es-CL': 'Español (Chile) - es-CL',
  121. 'es-ES': 'Español (España) - es-ES',
  122. 'es-MX': 'Español (México) - es-MX',
  123. 'fr': 'Français - fr',
  124. 'fr-BE': 'Français (Belgique) - fr-BE',
  125. 'fr-CA': 'Français (Canada) - fr-CA',
  126. 'fr-CH': 'Français (Suisse) - fr-CH',
  127. 'fr-FR': 'Français (France) - fr-FR',
  128. 'hr-HR': 'Hrvatski - hr-HR',
  129. 'id-ID': 'Indonesia - id-ID',
  130. 'it-IT': 'Italiano - it-IT',
  131. 'sw-KE': 'Kiswahili - sw-KE',
  132. 'lv-LV': 'Latviešu - lv-LV',
  133. 'lt-LT': 'Lietuvių - lt-LT',
  134. 'hu-HU': 'Magyar - hu-HU',
  135. 'ms-MY': 'Melayu - ms-MY',
  136. 'nl': 'Nederlands - nl',
  137. 'nl-BE': 'Nederlands (België) - nl-BE',
  138. 'nl-NL': 'Nederlands (Nederland) - nl-NL',
  139. 'nb-NO': 'Norsk Bokmål - nb-NO',
  140. 'pl-PL': 'Polski - pl-PL',
  141. 'pt': 'Português - pt',
  142. 'pt-BR': 'Português (Brasil) - pt-BR',
  143. 'pt-PT': 'Português (Portugal) - pt-PT',
  144. 'ro-RO': 'Română - ro-RO',
  145. 'sk-SK': 'Slovenčina - sk-SK',
  146. 'sl-SI': 'Slovenščina - sl-SI',
  147. 'sr-RS': 'Srpski - sr-RS',
  148. 'fi-FI': 'Suomi - fi-FI',
  149. 'sv-SE': 'Svenska - sv-SE',
  150. 'vi-VN': 'Tiếng Việt - vi-VN',
  151. 'tr-TR': 'Türkçe - tr-TR',
  152. 'is-IS': 'Íslenska - is-IS',
  153. 'cs-CZ': 'Čeština - cs-CZ',
  154. 'el-GR': 'Ελληνικά - el-GR',
  155. 'be-BY': 'Беларуская - be-BY',
  156. 'bg-BG': 'Български - bg-BG',
  157. 'ru-RU': 'Русский - ru-RU',
  158. 'uk-UA': 'Українська - uk-UA',
  159. 'hy-AM': 'Հայերեն - hy-AM',
  160. 'he-IL': 'עברית - he-IL',
  161. 'ar-SA': 'العربية - ar-SA',
  162. 'fa-IR': 'فارسی - fa-IR',
  163. 'th-TH': 'ไทย - th-TH',
  164. 'zh': '中文 - zh',
  165. 'zh-CN': '中文 (中国) - zh-CN',
  166. 'zh-TW': '中文 (台灣) - zh-TW',
  167. 'ja-JP': '日本語 - ja-JP',
  168. 'ko-KR': '한국어 - ko-KR'
  169. }
  170. def __init__(self, requestHandler):
  171. self._requestHandler = requestHandler
  172. self._url = ""
  173. self._categories = Categories()
  174. self._engines = Engines()
  175. self._lang = ''
  176. self._pageno = '' # int formated as string
  177. self._timeRange = '' # '', 'day', 'week', 'month' or 'year'
  178. self._kwargs = {
  179. 'data': {
  180. 'q': '',
  181. 'format': 'json'
  182. },
  183. }
  184. @property
  185. def categories(self): return self._categories
  186. @property
  187. def engines(self): return self._engines
  188. @property
  189. def url(self):
  190. """
  191. @return: Instance url
  192. @rtype: str
  193. """
  194. return self._url
  195. @url.setter
  196. def url(self, url):
  197. """
  198. @param url: Instance url
  199. @type url: str
  200. """
  201. self._url = url
  202. @property
  203. def query(self):
  204. """
  205. @return: Search query
  206. @rtype: str
  207. """
  208. return self._kwargs['data']['q']
  209. @query.setter
  210. def query(self, q):
  211. """
  212. @param q: Search query
  213. @type q: str
  214. """
  215. print("Set query: {}".format(q))
  216. self._kwargs['data']['q'] = q
  217. @property
  218. def lang(self):
  219. """
  220. @return: Language code
  221. @rtype: str
  222. """
  223. return self._lang
  224. @lang.setter
  225. def lang(self, lang):
  226. """
  227. @param lang: Language code
  228. @type lang: str
  229. """
  230. self._lang = lang
  231. @property
  232. def pageno(self):
  233. """
  234. @return: Page number
  235. @rtype: int
  236. """
  237. return int(self._pageno)
  238. @pageno.setter
  239. def pageno(self, i):
  240. """
  241. @param i: Page number
  242. @type i: int
  243. """
  244. self._pageno = str(i)
  245. @property
  246. def timeRange(self):
  247. """
  248. @return: Search time range ('', 'day', 'week', 'month' or 'year')
  249. @rtype: str
  250. """
  251. return self._timeRange
  252. @timeRange.setter
  253. def timeRange(self, value):
  254. """
  255. @param value: Key from SearX.Periods
  256. @type value: str
  257. """
  258. self._timeRange = value
  259. @property
  260. def requestKwargs(self):
  261. """ Returns current data that will be send with the POST
  262. request used for the search operation. The search query,
  263. language, page-number and enabled categories/engines.
  264. @rtype: dict
  265. """
  266. kwargs = deepcopy(self._kwargs['data'])
  267. # After testing found that searx will honor only engines when
  268. # both engines and categories are set.
  269. if self.engines:
  270. kwargs.update(self.engines.dict())
  271. elif self.categories:
  272. kwargs.update(self.categories.dict())
  273. if self.lang:
  274. kwargs.update({'lang': self.lang})
  275. if self.pageno:
  276. kwargs.update({'pageno': self.pageno})
  277. if self.timeRange:
  278. kwargs.update({'timeRange': self.timeRange})
  279. return kwargs
  280. def reset(self):
  281. self.url = ""
  282. self.timeRange = ""
  283. self.lang = ""
  284. self.pageno = 1
  285. self.categories.reset()
  286. self.engines.clear()
  287. def search(self):
  288. """ Preform search operation with current set values.
  289. @returns: The result of this search.
  290. @rtype: SearchResult
  291. """
  292. return self._requestHandler.get(
  293. self.url,
  294. data=self.requestKwargs,
  295. ResultType=SearchResult
  296. )
  297. class SearxConfigHandler(HandlerProto):
  298. def __init__(self, requestsHandler):
  299. HandlerProto.__init__(self, requestsHandler)
  300. def updateInstance(self, url):
  301. newUrl = urllib.parse.urljoin(url, "/config")
  302. result = self.requestsHandler.get(newUrl)
  303. if result:
  304. instance = self.instances[url]
  305. j = result.json()
  306. """ Update instance version
  307. """
  308. instance.update({
  309. "version": j.get("version", "")
  310. })
  311. """ Update instance network_type to use our own network type
  312. definitions as class NetworkTypes (core/handler.py)
  313. """
  314. instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
  315. """ Update Engines
  316. What we get:
  317. "engines": [
  318. categories (list, str)
  319. enabled (bool)
  320. language_support (bool)
  321. name (str)
  322. paging (bool)
  323. safesearch (bool)
  324. shortcut (str)
  325. supported_languages (list, str)
  326. time_range_support (bool)
  327. timeout (float)
  328. ]
  329. What instanceModel wants
  330. "engines" : {
  331. "not evil": {
  332. "error_rate": 15,
  333. "errors": [
  334. 0
  335. ]
  336. }
  337. }
  338. What enginesModel wants
  339. "engines": {
  340. "1337x": {
  341. "categories": [
  342. "videos"
  343. ],
  344. "language_support": true,
  345. "paging": true,
  346. "safesearch": false,
  347. "shortcut": "1337x",
  348. "time_range_support": false
  349. },
  350. """
  351. newInstanceEngines = {}
  352. newEnginesEngines = {}
  353. for engine in j.get('engines', []):
  354. name = engine.get('name', "")
  355. if not name:
  356. continue
  357. newInstanceEngines.update({
  358. name: {}
  359. })
  360. if name not in self.engines:
  361. newEnginesEngines.update({
  362. name: {
  363. "categories": list(engine.get('categories', [])),
  364. "language_support": engine.get(
  365. 'language_support',
  366. False
  367. ),
  368. "paging": engine.get('paging', False),
  369. "safesearch": engine.get('safesearch', False),
  370. "shortcut": engine.get('shortcut', ""),
  371. "time_range_support": engine.get(
  372. 'time_range_support',
  373. False
  374. )
  375. }
  376. })
  377. instance.update({
  378. "engines": dict(newInstanceEngines)
  379. })
  380. self.engines.update(newEnginesEngines)
  381. """ Update instance lastUpdated
  382. """
  383. instance.update({
  384. "lastUpdated": time.time()
  385. })
  386. return True
  387. return False
  388. def addInstance(self, url):
  389. if url not in self.instances:
  390. self._instances[url] = {}
  391. return True
  392. return False
  393. def removeInstance(self, url):
  394. """
  395. @param url: url of the instance to remove.
  396. @type url: str
  397. """
  398. del self._instances[url]
  399. def removeMultiInstances(self, urls):
  400. """ Remove instance(s) by url without emitting changed for every
  401. instance that got removed.
  402. @param urls: list with urls of instances to remove.
  403. @type urls: list
  404. """
  405. for url in urls:
  406. del self._instances[url]