searx.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. ########################################################################
  2. # Searx-qt - Lightweight desktop application for SearX.
  3. # Copyright (C) 2020 CYBERDEViL
  4. #
  5. # This file is part of Searx-qt.
  6. #
  7. # Searx-qt is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # Searx-qt is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. ########################################################################
  21. import time
  22. import urllib.parse
  23. from copy import deepcopy
  24. from searxqt.core.requests import JsonResult, ErrorType
  25. from searxqt.core import jsonVerify
  26. from searxqt.core.handler import HandlerProto, NetworkTypes
  27. from searxqt.translations import _
  28. # Values used for the expected json structure verification.
  29. v_str = jsonVerify.Value(str)
  30. v_int = jsonVerify.Value(int)
  31. v_float = jsonVerify.Value(float)
  32. v_bool = jsonVerify.Value(bool)
  33. v_intFloat = jsonVerify.MultiValue((int, float))
  34. v_noneStr = jsonVerify.MultiValue((jsonVerify.NoneType, str))
  35. v_intStr = jsonVerify.MultiValue((int, str))
  36. v_intFloat = jsonVerify.MultiValue((int, float))
  37. v_strFloat = jsonVerify.MultiValue((str, float))
  38. v_ignore = jsonVerify.IgnoreValue()
  39. class SearchResult(JsonResult):
  40. ExpectedStructure = {
  41. "query": v_str,
  42. "number_of_results": v_intFloat,
  43. "results": [{
  44. "url": v_str,
  45. "title": v_str,
  46. "engine": v_str,
  47. "parsed_url": [
  48. v_str,
  49. v_str,
  50. v_str,
  51. v_str,
  52. v_str,
  53. v_str
  54. ],
  55. "engines": [v_str],
  56. "positions": [v_int],
  57. "content": v_str,
  58. "score": v_float,
  59. "category": v_str,
  60. "pretty_url": v_str,
  61. "is_onion": False,
  62. "publishedDate": v_str,
  63. "pubdate": v_str,
  64. "thumbnail_src": v_str,
  65. "template": v_str,
  66. "author": v_noneStr,
  67. "source": v_str,
  68. "img_format": v_str,
  69. "img_src": v_noneStr,
  70. # Files
  71. "seed": v_intStr,
  72. "leech": v_intStr,
  73. "magnetlink": v_str,
  74. "torrentfile": v_str,
  75. "filesize": v_intFloat,
  76. "infohash": v_str,
  77. "files": v_noneStr,
  78. "link": v_str,
  79. # Videos
  80. "thumbnail": v_str,
  81. "embedded": v_str,
  82. "length": v_str,
  83. # IT
  84. "code_language": v_str,
  85. "codelines": [[v_int, v_str]],
  86. "repository": v_str,
  87. # Location
  88. "longitude": v_strFloat,
  89. "latitude": v_strFloat,
  90. "boundingbox": [v_strFloat],
  91. # Ignore; to many types/structures. TODO
  92. "geojson": v_ignore,
  93. # Ignore; to many types/structures. TODO
  94. "address": v_ignore,
  95. # Ignore; to many types/structures. TODO
  96. "osm": v_ignore,
  97. }],
  98. "answers": [v_str],
  99. "corrections": [v_str],
  100. "infoboxes": [{
  101. "infobox": v_str,
  102. "id": v_str,
  103. "content": v_str,
  104. "img_src": v_noneStr,
  105. "attributes": [
  106. {
  107. "label": v_str,
  108. "value": v_str,
  109. "entity": v_str
  110. }
  111. ],
  112. "urls": [
  113. {
  114. "title": v_str,
  115. "url": v_str,
  116. "official": v_bool,
  117. "entity": v_str
  118. }
  119. ],
  120. "relatedTopics": [v_str],
  121. "engine": v_str,
  122. "engines": [v_str]
  123. }],
  124. "suggestions": [v_str],
  125. "unresponsive_engines": [[v_str, v_str]]
  126. }
  127. def __init__(self, url, response, err="", errType=None):
  128. JsonResult.__init__(self, url, response, err=err, errType=errType)
  129. def verifyFurther(self):
  130. JsonResult.verifyFurther(self)
  131. if self._errType is None and not len(self.json().get('results', {})):
  132. self._errType = ErrorType.NoResults
  133. self._err = "NoResults: got: `{0}`".format(self.json())
  134. class SearxConfigResult(JsonResult):
  135. ExpectedStructure = {
  136. "autocomplete": v_str,
  137. "brand": {
  138. "CONTACT_URL": v_str,
  139. "DOCS_URL": v_str,
  140. "GIT_URL": v_str,
  141. "GIT_BRANCH": v_str
  142. },
  143. "categories": [v_str],
  144. "default_doi_resolver": v_str,
  145. "default_locale": v_str,
  146. "default_theme": v_str,
  147. "doi_resolvers": [v_str],
  148. "engines": [
  149. {
  150. "categories": [v_str],
  151. "enabled": v_bool,
  152. "language_support": v_bool,
  153. "name": v_str,
  154. "paging": v_bool,
  155. "safesearch": v_bool,
  156. "shortcut": v_str,
  157. "supported_languages": [v_str],
  158. "time_range_support": v_bool,
  159. "timeout": v_intFloat
  160. }
  161. ],
  162. "instance_name": v_str,
  163. "locales": {
  164. "": v_str
  165. },
  166. "plugins": [
  167. {
  168. "enabled": v_bool,
  169. "name": v_str
  170. }
  171. ],
  172. "safe_search": v_int,
  173. "version": v_str
  174. }
  175. def __init__(self, url, response, err="", errType=None):
  176. JsonResult.__init__(self, url, response, err=err, errType=errType)
  177. class Categories:
  178. types = {
  179. 'general': (_('General'), 'category_general'),
  180. 'files': (_('Files'), 'category_files'),
  181. 'images': (_('Images'), 'category_images'),
  182. 'videos': (_('Videos'), 'category_videos'),
  183. 'it': (_('IT'), 'category_it'),
  184. 'map': (_('Location'), 'category_map'),
  185. 'music': (_('Music'), 'category_music'),
  186. 'news': (_('News'), 'category_news'),
  187. 'science': (_('Science'), 'category_science'),
  188. 'social media': (_('Social'), 'category_social media'),
  189. 'onions': (_('Onions'), 'category_onions')
  190. }
  191. def __init__(self):
  192. self._options = {}
  193. self.__makeOptions()
  194. def __makeOptions(self):
  195. self._options.clear()
  196. for key, t in self.types.items():
  197. self._options.update({key: False})
  198. def reset(self):
  199. self.__makeOptions()
  200. def get(self, key):
  201. return self._options[key]
  202. def set(self, key, state):
  203. """
  204. @param key: One of the keys in Categories.types
  205. @type key: str
  206. @param state: Enabled / disabled state
  207. @type state: bool
  208. """
  209. self._options[key] = state
  210. def dict(self):
  211. newDict = {}
  212. for key, state in self._options.items():
  213. if state:
  214. newDict.update({self.types[key][1]: 'on'})
  215. return newDict
  216. def enabledKeys(self):
  217. """ Returns a list with enabled engine strings (key from
  218. Categories.types)
  219. """
  220. return [key for key, state in self._options.items() if state]
  221. class Engines(list):
  222. def __init__(self):
  223. list.__init__(self)
  224. def dict(self):
  225. if not self:
  226. return {}
  227. return {
  228. 'engines': ",".join(self)
  229. }
  230. class SearX:
  231. Periods = {
  232. '': _('Anytime'),
  233. 'day': _('Last day'),
  234. 'week': _('Last week'),
  235. 'month': _('Last month'),
  236. 'year': _('Last year')
  237. }
  238. # https://github.com/asciimoo/searx/blob/master/searx/languages.py
  239. Languages = {
  240. '': _('No language'),
  241. 'all': _('Default language'),
  242. 'af-NA': 'Afrikaans - af-NA',
  243. 'ca-AD': 'Català - ca-AD',
  244. 'da-DK': 'Dansk - da-DK',
  245. 'de': 'Deutsch - de',
  246. 'de-AT': 'Deutsch (Österreich) - de-AT',
  247. 'de-CH': 'Deutsch (Schweiz) - de-CH',
  248. 'de-DE': 'Deutsch (Deutschland) - de-DE',
  249. 'et-EE': 'Eesti - et-EE',
  250. 'en': 'English - en',
  251. 'en-AU': 'English (Australia) - en-AU',
  252. 'en-CA': 'English (Canada) - en-CA',
  253. 'en-GB': 'English (United Kingdom) - en-GB',
  254. 'en-IE': 'English (Ireland) - en-IE',
  255. 'en-IN': 'English (India) - en-IN',
  256. 'en-NZ': 'English (New Zealand) - en-NZ',
  257. 'en-PH': 'English (Philippines) - en-PH',
  258. 'en-SG': 'English (Singapore) - en-SG',
  259. 'en-US': 'English (United States) - en-US',
  260. 'es': 'Español - es',
  261. 'es-AR': 'Español (Argentina) - es-AR',
  262. 'es-CL': 'Español (Chile) - es-CL',
  263. 'es-ES': 'Español (España) - es-ES',
  264. 'es-MX': 'Español (México) - es-MX',
  265. 'fr': 'Français - fr',
  266. 'fr-BE': 'Français (Belgique) - fr-BE',
  267. 'fr-CA': 'Français (Canada) - fr-CA',
  268. 'fr-CH': 'Français (Suisse) - fr-CH',
  269. 'fr-FR': 'Français (France) - fr-FR',
  270. 'hr-HR': 'Hrvatski - hr-HR',
  271. 'id-ID': 'Indonesia - id-ID',
  272. 'it-IT': 'Italiano - it-IT',
  273. 'sw-KE': 'Kiswahili - sw-KE',
  274. 'lv-LV': 'Latviešu - lv-LV',
  275. 'lt-LT': 'Lietuvių - lt-LT',
  276. 'hu-HU': 'Magyar - hu-HU',
  277. 'ms-MY': 'Melayu - ms-MY',
  278. 'nl': 'Nederlands - nl',
  279. 'nl-BE': 'Nederlands (België) - nl-BE',
  280. 'nl-NL': 'Nederlands (Nederland) - nl-NL',
  281. 'nb-NO': 'Norsk Bokmål - nb-NO',
  282. 'pl-PL': 'Polski - pl-PL',
  283. 'pt': 'Português - pt',
  284. 'pt-BR': 'Português (Brasil) - pt-BR',
  285. 'pt-PT': 'Português (Portugal) - pt-PT',
  286. 'ro-RO': 'Română - ro-RO',
  287. 'sk-SK': 'Slovenčina - sk-SK',
  288. 'sl-SI': 'Slovenščina - sl-SI',
  289. 'sr-RS': 'Srpski - sr-RS',
  290. 'fi-FI': 'Suomi - fi-FI',
  291. 'sv-SE': 'Svenska - sv-SE',
  292. 'vi-VN': 'Tiếng Việt - vi-VN',
  293. 'tr-TR': 'Türkçe - tr-TR',
  294. 'is-IS': 'Íslenska - is-IS',
  295. 'cs-CZ': 'Čeština - cs-CZ',
  296. 'el-GR': 'Ελληνικά - el-GR',
  297. 'be-BY': 'Беларуская - be-BY',
  298. 'bg-BG': 'Български - bg-BG',
  299. 'ru-RU': 'Русский - ru-RU',
  300. 'uk-UA': 'Українська - uk-UA',
  301. 'hy-AM': 'Հայերեն - hy-AM',
  302. 'he-IL': 'עברית - he-IL',
  303. 'ar-SA': 'العربية - ar-SA',
  304. 'fa-IR': 'فارسی - fa-IR',
  305. 'th-TH': 'ไทย - th-TH',
  306. 'zh': '中文 - zh',
  307. 'zh-CN': '中文 (中国) - zh-CN',
  308. 'zh-TW': '中文 (台灣) - zh-TW',
  309. 'ja-JP': '日本語 - ja-JP',
  310. 'ko-KR': '한국어 - ko-KR'
  311. }
  312. def __init__(self, requestHandler):
  313. self._requestHandler = requestHandler
  314. self._url = ""
  315. self._categories = Categories()
  316. self._engines = Engines()
  317. self._lang = ''
  318. self._pageno = '' # int formatted as string
  319. self._timeRange = '' # '', 'day', 'week', 'month' or 'year'
  320. self._kwargs = {
  321. 'data': {
  322. 'q': '',
  323. 'format': 'json'
  324. },
  325. }
  326. @property
  327. def categories(self): return self._categories
  328. @property
  329. def engines(self): return self._engines
  330. @property
  331. def url(self):
  332. """
  333. @return: Instance url
  334. @rtype: str
  335. """
  336. return self._url
  337. @url.setter
  338. def url(self, url):
  339. """
  340. @param url: Instance url
  341. @type url: str
  342. """
  343. self._url = url
  344. @property
  345. def query(self):
  346. """
  347. @return: Search query
  348. @rtype: str
  349. """
  350. return self._kwargs['data']['q']
  351. @query.setter
  352. def query(self, q):
  353. """
  354. @param q: Search query
  355. @type q: str
  356. """
  357. self._kwargs['data']['q'] = q
  358. @property
  359. def lang(self):
  360. """
  361. @return: Language code
  362. @rtype: str
  363. """
  364. return self._lang
  365. @lang.setter
  366. def lang(self, lang):
  367. """
  368. @param lang: Language code
  369. @type lang: str
  370. """
  371. self._lang = lang
  372. @property
  373. def pageno(self):
  374. """
  375. @return: Page number
  376. @rtype: int
  377. """
  378. return int(self._pageno)
  379. @pageno.setter
  380. def pageno(self, i):
  381. """
  382. @param i: Page number
  383. @type i: int
  384. """
  385. self._pageno = str(i)
  386. @property
  387. def timeRange(self):
  388. """
  389. @return: Search time range ('', 'day', 'week', 'month' or 'year')
  390. @rtype: str
  391. """
  392. return self._timeRange
  393. @timeRange.setter
  394. def timeRange(self, value):
  395. """
  396. @param value: Key from SearX.Periods
  397. @type value: str
  398. """
  399. self._timeRange = value
  400. @property
  401. def requestKwargs(self):
  402. """ Returns current data that will be send with the POST
  403. request used for the search operation. The search query,
  404. language, page-number and enabled categories/engines.
  405. @rtype: dict
  406. """
  407. kwargs = deepcopy(self._kwargs['data'])
  408. # After testing found that searx will honor only engines when
  409. # both engines and categories are set.
  410. if self.engines:
  411. kwargs.update(self.engines.dict())
  412. elif self.categories:
  413. kwargs.update(self.categories.dict())
  414. if self.lang:
  415. kwargs.update({'lang': self.lang})
  416. if self.pageno:
  417. kwargs.update({'pageno': self.pageno})
  418. if self.timeRange:
  419. kwargs.update({'timeRange': self.timeRange})
  420. return kwargs
  421. def reset(self):
  422. self.url = ""
  423. self.timeRange = ""
  424. self.lang = ""
  425. self.pageno = 1
  426. self.categories.reset()
  427. self.engines.clear()
  428. def search(self):
  429. """ Preform search operation with current set values.
  430. @returns: The result of this search.
  431. @rtype: SearchResult
  432. """
  433. return self._requestHandler.get(
  434. self.url,
  435. data=self.requestKwargs,
  436. ResultType=SearchResult
  437. )
  438. class SearxConfigHandler(HandlerProto):
  439. def __init__(self, requestsHandler):
  440. HandlerProto.__init__(self, requestsHandler)
  441. def updateInstance(self, url):
  442. newUrl = urllib.parse.urljoin(url, "/config")
  443. result = self.requestsHandler.get(newUrl, ResultType=SearxConfigResult)
  444. if result:
  445. instance = self.instances[url]
  446. j = result.json()
  447. """ Update instance version
  448. """
  449. instance.update({
  450. "version": j.get("version", "")
  451. })
  452. """ Update instance network_type to use our own network type
  453. definitions as class NetworkTypes (core/handler.py)
  454. """
  455. instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
  456. """ Update Engines
  457. What we get:
  458. "engines": [
  459. categories (list, str)
  460. enabled (bool)
  461. language_support (bool)
  462. name (str)
  463. paging (bool)
  464. safesearch (bool)
  465. shortcut (str)
  466. supported_languages (list, str)
  467. time_range_support (bool)
  468. timeout (float)
  469. ]
  470. What instanceModel wants
  471. "engines" : {
  472. "not evil": {
  473. "error_rate": 15,
  474. "errors": [
  475. 0
  476. ]
  477. }
  478. }
  479. What enginesModel wants
  480. "engines": {
  481. "1337x": {
  482. "categories": [
  483. "videos"
  484. ],
  485. "language_support": true,
  486. "paging": true,
  487. "safesearch": false,
  488. "shortcut": "1337x",
  489. "time_range_support": false
  490. },
  491. """
  492. newInstanceEngines = {}
  493. newEnginesEngines = {}
  494. for engine in j.get('engines', []):
  495. name = engine.get('name', "")
  496. if not name:
  497. continue
  498. newInstanceEngines.update({
  499. name: {}
  500. })
  501. if name not in self.engines:
  502. newEnginesEngines.update({
  503. name: {
  504. "categories": list(engine.get('categories', [])),
  505. "language_support": engine.get(
  506. 'language_support',
  507. False
  508. ),
  509. "paging": engine.get('paging', False),
  510. "safesearch": engine.get('safesearch', False),
  511. "shortcut": engine.get('shortcut', ""),
  512. "time_range_support": engine.get(
  513. 'time_range_support',
  514. False
  515. )
  516. }
  517. })
  518. instance.update({
  519. "engines": dict(newInstanceEngines)
  520. })
  521. self.engines.update(newEnginesEngines)
  522. """ Update instance lastUpdated
  523. """
  524. instance.update({
  525. "lastUpdated": time.time()
  526. })
  527. return True
  528. return False
  529. def addInstance(self, url):
  530. if url not in self.instances:
  531. self._instances[url] = {}
  532. return True
  533. return False
  534. def removeInstance(self, url):
  535. """
  536. @param url: url of the instance to remove.
  537. @type url: str
  538. """
  539. del self._instances[url]
  540. def removeMultiInstances(self, urls):
  541. """ Remove instance(s) by url without emitting changed for every
  542. instance that got removed.
  543. @param urls: list with urls of instances to remove.
  544. @type urls: list
  545. """
  546. for url in urls:
  547. del self._instances[url]