searx.py 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063
  1. ########################################################################
  2. # Searx-Qt - Lightweight desktop application for Searx.
  3. # Copyright (C) 2020-2022 CYBERDEViL
  4. #
  5. # This file is part of Searx-Qt.
  6. #
  7. # Searx-Qt is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # Searx-Qt is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. ########################################################################
  21. import time
  22. import urllib.parse
  23. from bs4 import BeautifulSoup
  24. from searxqt.core.schema import Schemas
  25. from searxqt.core.http import HttpRequest, HttpReponse, HttpJsonReponse, ErrorType
  26. from searxqt.core.handler import HandlerProto, NetworkTypes
  27. from searxqt.utils.string import parseFilesize
  28. from searxqt.translations import _
  29. ## API result (format=json)
  30. class SearchResult(HttpJsonReponse):
  31. Schema = Schemas['searxng_query']
  32. def verifyFurther(self):
  33. # One of the following keys has to be not empty, else we count it as
  34. # no (usable) result.
  35. validKeys = [
  36. 'results',
  37. 'answers',
  38. 'corrections',
  39. 'infoboxes',
  40. 'suggestions'
  41. ]
  42. if self.error == ErrorType.Success:
  43. data = self.json()
  44. valid = False
  45. for key in validKeys:
  46. if len(data.get(key, [])):
  47. valid = True
  48. break
  49. if not valid:
  50. self.setError(ErrorType.NoResults,
  51. f"NoResults: got: `{self.json()}`")
  52. def verifyContent(self, httpThread):
  53. HttpJsonReponse.verifyContent(self, httpThread)
  54. self.verifyFurther()
  55. ## HTML result that will be parsed into JSON
  56. class SearchResult2(SearchResult):
  57. Schema = Schemas['searxng_query']
  58. def __init__(self, response, callback):
  59. ## @see https://github.com/searxng/searxng/blob/master/searx/botdetection/link_token.py
  60. self._linktoken = None
  61. SearchResult.__init__(self, response, callback)
  62. @property
  63. def linktoken(self):
  64. return self._linktoken
  65. def makeUrlAbsolute(self, url):
  66. """! Returns a absolute URL. It will add the SearXNG instance its
  67. schema and location in front when they are missing."""
  68. parsedUrl = urllib.parse.urlparse(url)
  69. instanceUrl = urllib.parse.urlparse(self.request.url)
  70. if not parsedUrl.netloc:
  71. url = f"{instanceUrl.netloc}{url}"
  72. if not parsedUrl.scheme:
  73. url = f"{instanceUrl.scheme}://{url}"
  74. return url
  75. def verifyContent(self, httpThread):
  76. HttpReponse.verifyContent(self, httpThread)
  77. if self.error != ErrorType.Success:
  78. return
  79. self._json = self.parseHtml()
  80. self.verifySchema()
  81. if self.error != ErrorType.Success:
  82. return
  83. self.verifyFurther()
  84. # First request and need to request the dummy css first..
  85. if self.error == ErrorType.InvalidSchema and self.linktoken:
  86. self.setError(ErrorType.NoResults, "")
  87. def parseHtml(self):
  88. if self.error != ErrorType.Success:
  89. return {}
  90. jsonResult = {
  91. 'results': [],
  92. 'answers': [],
  93. 'corrections': [],
  94. 'infoboxes': [],
  95. 'suggestions': [],
  96. 'unresponsive_engines': []
  97. }
  98. soup = BeautifulSoup(self.content, "html.parser")
  99. # Find css bot detection file
  100. # <link rel="stylesheet" href="/client8uw9qw2jc3yhiq2c.css" type="text/css">
  101. for link in soup.find_all("link", {"rel": "stylesheet"}, href=True):
  102. href = link.get("href")
  103. if href.startswith("/client"):
  104. self._linktoken = self.makeUrlAbsolute(href)
  105. break
  106. #######################################################################
  107. ## 'results' key
  108. ##########################################################################
  109. for result in soup.find_all("article", {"class": "result"}):
  110. """
  111. <article class="result result-default category-general qwant duckduckgo google">
  112. <a href="https://linuxize.com/post/curl-post-request/" class="url_wrapper" rel="noreferrer">
  113. <span class="url_o1">
  114. <span class="url_i1">https://linuxize.com</span>
  115. </span>
  116. <span class="url_o2">
  117. <span class="url_i2"> › post › curl-post-request</span>
  118. </span>
  119. </a>
  120. <h3>
  121. <a href="https://linuxize.com/post/curl-post-request/" rel="noreferrer">
  122. How to make a <span class="highlight">POST</span>
  123. <span class="highlight">request</span>
  124. with <span class="highlight">curl</span>
  125. </a>
  126. </h3>
  127. <p class="content">
  128. Learn how to use <span class="highlight">curl</span>, a command-line utility for transferring data from or to a remote server, to make <span class="highlight">POST</span> requests. See examples of sending data, files, and JSON data with <span class="highlight">curl</span> options and options.
  129. </p>
  130. <div class="engines">
  131. <span>qwant</span>
  132. <span>duckduckgo</span>
  133. <span>google</span>
  134. <a href="https://web.archive.org/web/https://linuxize.com/post/curl-post-request/" class="cache_link" rel="noreferrer">
  135. <svg SVG_STUFF .../></svg>
  136. cached
  137. </a>
  138. &lrm;
  139. </div>
  140. <div class="break"></div>
  141. </article>
  142. """
  143. """
  144. <article class="result result-torrent category-files solidtorrents">
  145. <a href="https://solidtorrents.to/torrents/STUFF .../" class="url_wrapper" rel="noreferrer">
  146. <span class="url_o1">
  147. <span class="url_i1">https://solidtorrents.to</span>
  148. </span>
  149. <span class="url_o2">
  150. <span class="url_i2"> › torrents › SOME_NAME › SOME_HASH</span>
  151. </span>
  152. </a>
  153. <h3>
  154. <a href="https://solidtorrents.to/torrents/SOME_NAME/SOME_HASH/" rel="noreferrer">
  155. <span class="highlight">SOME</span>-<span class="highlight">NAME</span>
  156. </a>
  157. </h3>
  158. <time class="published_date" datetime="2018-10-20 00:00:00" >Oct 20, 2018</time>
  159. <div class="highlight">Other/Archive</div>
  160. <p class="altlink">
  161. &bull;
  162. <a href="magnet:MAGNET_LINK ..." class="magnetlink" rel="noreferrer"><svg SVG_STUFF .../></svg>magnet link</a>
  163. </p>
  164. <p class="altlink">
  165. &bull;
  166. <a href="https://itorrents.org/torrent/TORRENT_LINK ..." class="torrentfile" rel="noreferrer">torrent file</a>
  167. </p>
  168. <p class="stat">
  169. &bull; Seeder
  170. <span class="badge">407</span>
  171. &bull; Leecher
  172. <span class="badge">748</span>
  173. </p>
  174. <p class="stat"> Filesize
  175. <span class="badge">2.88 GiB</span>
  176. </p>
  177. <div class="engines">
  178. <span>solidtorrents</span>
  179. <a href="https://web.archive.org/web/https://solidtorrents.to/torrents/TORRENT_STUFF ..." class="cache_link" rel="noreferrer"><svg SVG_STUFF .../></svg>cached</a>
  180. &lrm;
  181. </div>
  182. <div class="break"></div>
  183. </article>
  184. """
  185. title = ''
  186. url = ''
  187. content = ''
  188. engines = []
  189. publishedDate = ''
  190. magnetlink = ''
  191. torrentfile = ''
  192. filesize = 0
  193. files = 0 # TODO unused for now
  194. seed = None
  195. leech = None
  196. # !! GET Title
  197. try:
  198. title = result.h3.a.get_text().lstrip().rstrip()
  199. except AttributeError:
  200. print("Failed to get title")
  201. # !! GET URL
  202. try:
  203. url = result.h3.a.get("href")
  204. except AttributeError:
  205. print("Failed to get url")
  206. # !! GET Content
  207. felem = result.find("p", {"class": "content"})
  208. if felem:
  209. content = felem.get_text().lstrip().rstrip()
  210. # !! GET Engines
  211. felem = result.find("div", {"class": "engines"})
  212. if felem:
  213. for engine in felem.find_all("span"):
  214. engines.append(engine.get_text().rstrip().lstrip())
  215. ## !! Get publishDate
  216. felem = result.find("time", {"class": "published_date"})
  217. if felem:
  218. publishedDate = felem.get("datetime", "")
  219. ## !! Get magnetlink
  220. felem = result.find("a", {"class": "magnetlink"})
  221. if felem:
  222. magnetlink = felem.get('href')
  223. ## !! Get torrentfile
  224. felem = result.find("a", {"class": "torrentfile"})
  225. if felem:
  226. torrentfile = felem.get('href')
  227. ## !! Get filesize
  228. for felem in result.find_all("span", {"class": "badge"}):
  229. if felem.previousSibling:
  230. precedingText = felem.previousSibling
  231. if "Filesize" in precedingText:
  232. filesize = parseFilesize(felem.get_text().rstrip().lstrip())
  233. elif "Seeder" in precedingText:
  234. seed = felem.get_text()
  235. elif "Leecher" in precedingText:
  236. leech = felem.get_text()
  237. # !! Add result
  238. resultData = {
  239. 'title': title,
  240. 'url': url,
  241. 'content': content,
  242. 'engines': [engine for engine in engines],
  243. # Optional
  244. 'publishedDate': publishedDate,
  245. # File attributes
  246. 'magnetlink': magnetlink,
  247. 'torrentfile': torrentfile,
  248. 'filesize': filesize,
  249. 'files': files,
  250. 'img_format': '' # TODO
  251. }
  252. if seed is not None:
  253. resultData.update({'seed': seed})
  254. if leech is not None:
  255. resultData.update({'leech': leech})
  256. jsonResult['results'].append(resultData)
  257. ##########################################################################
  258. ## 'suggestions' key
  259. ##########################################################################
  260. """
  261. <div id="sidebar">
  262. <div id="suggestions" role="complementary" aria-labelledby="suggestions-title">
  263. <details class="sidebar-collapsable">
  264. <summary class="title" id="suggestions-title">Suggestions</summary>
  265. <div class="wrapper">
  266. <form method="POST" action="/search">
  267. <input type="hidden" name="q" value="curl post request json">
  268. <input type="hidden" name="category_general" value="1">
  269. <input type="hidden" name="language" value="auto">
  270. <input type="hidden" name="time_range" value="">
  271. <input type="hidden" name="safesearch" value="0">
  272. <input type="hidden" name="theme" value="simple">
  273. <input type="submit" class="suggestion" role="link" value="&bull; curl post request json">
  274. """
  275. felem = soup.find("div", {"id": "suggestions"})
  276. if felem:
  277. for suggestion in felem.find_all("input", {"name": "q"}):
  278. jsonResult['suggestions'].append(suggestion.get("value"))
  279. ##########################################################################
  280. ## 'answers' key
  281. ##########################################################################
  282. """
  283. <h4 class="title" id="answers-title">Answers : </h4>
  284. <div class="answer">
  285. <span>LONG TEXT ...</span>
  286. <a href="some url ..." class="answer-url">url text ...</a>
  287. </div>
  288. """
  289. for answer in soup.find_all("div", {"class": "answer"}):
  290. felem = answer.find("span")
  291. if felem:
  292. jsonResult['answers'].append(felem.get_text())
  293. ##########################################################################
  294. ## 'corrections' key
  295. ##########################################################################
  296. """ TODO """
  297. ##########################################################################
  298. ## 'infoboxes' key
  299. ##########################################################################
  300. """
  301. <details open="" class="sidebar-collapsable">
  302. <summary class="title">Info</summary>
  303. <aside class="infobox" aria-label="Banana">
  304. <h2 class="title"><bdi>Banana</bdi></h2>
  305. <img src="/image_proxy?url=long_image_url" title="Banana" alt="Banana">
  306. <p><bdi>LOGNG TEXT HERE ...</bdi></p>
  307. <div class="urls">
  308. <ul>
  309. <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Banana" rel="noreferrer">Wikipedia</a></bdi></li>
  310. <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q503" rel="noreferrer">Wikidata</a></bdi></li>
  311. </ul>
  312. </div>
  313. </aside>
  314. </details>
  315. """
  316. """
  317. <details open="" class="sidebar-collapsable">
  318. <summary class="title">Info</summary>
  319. <aside class="infobox" aria-label="Water">
  320. <h2 class="title"><bdi>Water</bdi></h2>
  321. <img src="/image_proxy?url=long url .." title="Water" alt="Water">
  322. <p><bdi>LONG TEXT ...</bdi></p>
  323. <div class="attributes">
  324. <dl>
  325. <dt><bdi>Chemical formula :</bdi></dt>
  326. <dd><bdi>H₂O</bdi></dd>
  327. </dl>
  328. </div>
  329. <div class="urls">
  330. <ul>
  331. <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Water" rel="noreferrer">Wikipedia</a></bdi></li>
  332. <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q283" rel="noreferrer">Wikidata</a></bdi></li>
  333. </ul>
  334. </div>
  335. </aside>
  336. </details>
  337. """
  338. """
  339. infoboxes = []
  340. ibox = {
  341. 'infobox': 'str',
  342. 'id': 'uri',
  343. 'content': 'str',
  344. 'img_src': 'uri' | null
  345. 'urls': [
  346. {
  347. 'title': 'str',
  348. 'url': 'uri',
  349. 'entity': 'str',
  350. 'official': true
  351. }
  352. ],
  353. 'attributes': [
  354. {
  355. 'label': 'str',
  356. 'value': 'str',
  357. 'entity': 'str'
  358. }
  359. ],
  360. 'engines': ['str'],
  361. 'engine': 'str'
  362. }
  363. """
  364. for infobox in soup.find_all("aside", {"class": "infobox"}):
  365. title = ""
  366. id = ""
  367. content = ""
  368. img_src = ""
  369. urls = []
  370. attributes = []
  371. engines = []
  372. # Title
  373. felem = infobox.find("h2", {"class": "title"})
  374. if felem:
  375. title = felem.get_text().rstrip().lstrip()
  376. # ID
  377. # TODO
  378. # Content
  379. felem = infobox.find("p")
  380. if felem:
  381. felem = felem.find("bdi")
  382. if felem:
  383. content = felem.get_text().rstrip().lstrip()
  384. # Image
  385. felem = infobox.find("img")
  386. if felem:
  387. img_src = felem.get("src")
  388. # URLs
  389. for felem in infobox.find_all("li", {"class": "url"}):
  390. felem = felem.find("a")
  391. if felem:
  392. urls.append({
  393. 'title': felem.get_text().lstrip().rstrip(),
  394. 'url': felem.get("href", ""),
  395. 'entity': '', # TODO
  396. 'official': False # TODO
  397. })
  398. # Attributes
  399. """
  400. <div class="attributes">
  401. <dl>
  402. <dt><bdi>Chemical formula :</bdi></dt>
  403. <dd><bdi>H₂O</bdi></dd>
  404. </dl>
  405. </div>
  406. """
  407. felem = infobox.find("div", {"class": "attributes"})
  408. if felem:
  409. for item in felem.find_all("dl"):
  410. label = ""
  411. value = ""
  412. entity = "" # TODO
  413. try:
  414. label = item.dt.bdi.get_text().rstrip().lstrip()
  415. value = item.dd.bdi.get_text().rstrip().lstrip()
  416. except AttributeError:
  417. continue
  418. attributes.append({
  419. "label": label,
  420. "value": value,
  421. "entity": entity
  422. })
  423. # Engines
  424. for url in urls:
  425. engines.append(url['title'].lower())
  426. jsonResult['infoboxes'].append({
  427. "infobox": title,
  428. "id": id,
  429. "content": content,
  430. "img_src": img_src,
  431. "urls": urls,
  432. "attributes": attributes,
  433. "engines": engines
  434. })
  435. ##########################################################################
  436. ## 'unresponsive_engines' key
  437. ##########################################################################
  438. """
  439. <div id="engines_msg">
  440. <details class="sidebar-collapsable" open="">
  441. <summary class="title" id="engines_msg-title">Messages from the search engines</summary>
  442. <div class="dialog-error" role="alert">
  443. <svg class="ion-icon-big" etc..></svg>
  444. <div>
  445. <p>
  446. <strong>Error!</strong>
  447. Engines cannot retrieve results:
  448. </p>
  449. <p>
  450. brave (<a href="/stats?engine=brave" title="View error logs and submit a bug report">Suspended: too many requests</a>)
  451. </p>
  452. <p>
  453. qwant (<a href="/stats?engine=qwant" title="View error logs and submit a bug report">Suspended: too many requests</a>)
  454. </p>
  455. </div>
  456. </div>
  457. </details>
  458. </div>
  459. """
  460. felem = soup.find("div", {"id": "engines_msg"})
  461. if felem:
  462. for errDialog in felem.find_all("div", {"class": "dialog-error"}):
  463. for p in errDialog.find_all("p"):
  464. a = p.find("a")
  465. if not a:
  466. continue
  467. engine, msg = p.get_text().split(" ", 1)
  468. jsonResult['unresponsive_engines'].append([engine, msg])
  469. return jsonResult
  470. class SearxConfigResult(HttpJsonReponse):
  471. Schema = Schemas['searxng_config']
  472. class Categories:
  473. types = {
  474. 'general': (_('General'), 'category_general'),
  475. 'files': (_('Files'), 'category_files'),
  476. 'images': (_('Images'), 'category_images'),
  477. 'videos': (_('Videos'), 'category_videos'),
  478. 'it': (_('IT'), 'category_it'),
  479. 'map': (_('Location'), 'category_map'),
  480. 'music': (_('Music'), 'category_music'),
  481. 'news': (_('News'), 'category_news'),
  482. 'science': (_('Science'), 'category_science'),
  483. 'social media': (_('Social'), 'category_social media'),
  484. 'onions': (_('Onions'), 'category_onions'),
  485. 'shopping': (_('Shopping'), 'category_shopping')
  486. }
  487. def __init__(self):
  488. self._options = {}
  489. self.__makeOptions()
  490. def __makeOptions(self):
  491. self._options.clear()
  492. for key, t in self.types.items():
  493. self._options.update({key: False})
  494. def reset(self):
  495. self.__makeOptions()
  496. def get(self, key):
  497. return self._options[key]
  498. def set(self, key, state):
  499. """
  500. @param key: One of the keys in Categories.types
  501. @type key: str
  502. @param state: Enabled / disabled state
  503. @type state: bool
  504. """
  505. self._options[key] = state
  506. def dict(self):
  507. newDict = {}
  508. for key, state in self._options.items():
  509. if state:
  510. newDict.update({self.types[key][1]: 'on'})
  511. return newDict
  512. def enabledKeys(self):
  513. """ Returns a list with enabled engine strings (key from
  514. Categories.types)
  515. """
  516. return [key for key, state in self._options.items() if state]
  517. class Engines(list):
  518. def __init__(self):
  519. list.__init__(self)
  520. def dict(self):
  521. if not self:
  522. return {}
  523. return {
  524. 'engines': ",".join(self)
  525. }
  526. class SearX:
  527. Periods = {
  528. '': _('Anytime'),
  529. 'day': _('Last day'),
  530. 'week': _('Last week'),
  531. 'month': _('Last month'),
  532. 'year': _('Last year')
  533. }
  534. # https://github.com/asciimoo/searx/blob/master/searx/languages.py
  535. Languages = {
  536. '': _('No language'),
  537. 'all': _('Default language'),
  538. 'af-NA': 'Afrikaans - af-NA',
  539. 'ca-AD': 'Català - ca-AD',
  540. 'da-DK': 'Dansk - da-DK',
  541. 'de': 'Deutsch - de',
  542. 'de-AT': 'Deutsch (Österreich) - de-AT',
  543. 'de-CH': 'Deutsch (Schweiz) - de-CH',
  544. 'de-DE': 'Deutsch (Deutschland) - de-DE',
  545. 'et-EE': 'Eesti - et-EE',
  546. 'en': 'English - en',
  547. 'en-AU': 'English (Australia) - en-AU',
  548. 'en-CA': 'English (Canada) - en-CA',
  549. 'en-GB': 'English (United Kingdom) - en-GB',
  550. 'en-IE': 'English (Ireland) - en-IE',
  551. 'en-IN': 'English (India) - en-IN',
  552. 'en-NZ': 'English (New Zealand) - en-NZ',
  553. 'en-PH': 'English (Philippines) - en-PH',
  554. 'en-SG': 'English (Singapore) - en-SG',
  555. 'en-US': 'English (United States) - en-US',
  556. 'es': 'Español - es',
  557. 'es-AR': 'Español (Argentina) - es-AR',
  558. 'es-CL': 'Español (Chile) - es-CL',
  559. 'es-ES': 'Español (España) - es-ES',
  560. 'es-MX': 'Español (México) - es-MX',
  561. 'fr': 'Français - fr',
  562. 'fr-BE': 'Français (Belgique) - fr-BE',
  563. 'fr-CA': 'Français (Canada) - fr-CA',
  564. 'fr-CH': 'Français (Suisse) - fr-CH',
  565. 'fr-FR': 'Français (France) - fr-FR',
  566. 'hr-HR': 'Hrvatski - hr-HR',
  567. 'id-ID': 'Indonesia - id-ID',
  568. 'it-IT': 'Italiano - it-IT',
  569. 'sw-KE': 'Kiswahili - sw-KE',
  570. 'lv-LV': 'Latviešu - lv-LV',
  571. 'lt-LT': 'Lietuvių - lt-LT',
  572. 'hu-HU': 'Magyar - hu-HU',
  573. 'ms-MY': 'Melayu - ms-MY',
  574. 'nl': 'Nederlands - nl',
  575. 'nl-BE': 'Nederlands (België) - nl-BE',
  576. 'nl-NL': 'Nederlands (Nederland) - nl-NL',
  577. 'nb-NO': 'Norsk Bokmål - nb-NO',
  578. 'pl-PL': 'Polski - pl-PL',
  579. 'pt': 'Português - pt',
  580. 'pt-BR': 'Português (Brasil) - pt-BR',
  581. 'pt-PT': 'Português (Portugal) - pt-PT',
  582. 'ro-RO': 'Română - ro-RO',
  583. 'sk-SK': 'Slovenčina - sk-SK',
  584. 'sl-SI': 'Slovenščina - sl-SI',
  585. 'sr-RS': 'Srpski - sr-RS',
  586. 'fi-FI': 'Suomi - fi-FI',
  587. 'sv-SE': 'Svenska - sv-SE',
  588. 'vi-VN': 'Tiếng Việt - vi-VN',
  589. 'tr-TR': 'Türkçe - tr-TR',
  590. 'is-IS': 'Íslenska - is-IS',
  591. 'cs-CZ': 'Čeština - cs-CZ',
  592. 'el-GR': 'Ελληνικά - el-GR',
  593. 'be-BY': 'Беларуская - be-BY',
  594. 'bg-BG': 'Български - bg-BG',
  595. 'ru-RU': 'Русский - ru-RU',
  596. 'uk-UA': 'Українська - uk-UA',
  597. 'hy-AM': 'Հայերեն - hy-AM',
  598. 'he-IL': 'עברית - he-IL',
  599. 'ar-SA': 'العربية - ar-SA',
  600. 'fa-IR': 'فارسی - fa-IR',
  601. 'th-TH': 'ไทย - th-TH',
  602. 'zh': '中文 - zh',
  603. 'zh-CN': '中文 (中国) - zh-CN',
  604. 'zh-TW': '中文 (台灣) - zh-TW',
  605. 'ja-JP': '日本語 - ja-JP',
  606. 'ko-KR': '한국어 - ko-KR'
  607. }
  608. def __init__(self, httpThread, httpSettings):
  609. self._httpThread = httpThread
  610. self._httpSettings = httpSettings
  611. self._url = ""
  612. self._categories = Categories()
  613. self._engines = Engines()
  614. self._query = ""
  615. self._lang = ""
  616. self._pageno = "" # int formatted as string
  617. self._timeRange = "" # '', 'day', 'week', 'month' or 'year'
  618. self._safesearch = False
  619. self._parseHtml = True
  620. @property
  621. def categories(self): return self._categories
  622. @property
  623. def engines(self): return self._engines
  624. @property
  625. def url(self):
  626. """
  627. @return: Instance url
  628. @rtype: str
  629. """
  630. return self._url
  631. @url.setter
  632. def url(self, url):
  633. """
  634. @param url: Instance url
  635. @type url: str
  636. """
  637. self._url = url
  638. @property
  639. def query(self):
  640. """
  641. @return: Search query
  642. @rtype: str
  643. """
  644. return self._query
  645. @query.setter
  646. def query(self, q):
  647. """
  648. @param q: Search query
  649. @type q: str
  650. """
  651. self._query = q
  652. @property
  653. def lang(self):
  654. """
  655. @return: Language code
  656. @rtype: str
  657. """
  658. return self._lang
  659. @lang.setter
  660. def lang(self, lang):
  661. """
  662. @param lang: Language code
  663. @type lang: str
  664. """
  665. self._lang = lang
  666. @property
  667. def pageno(self):
  668. """
  669. @return: Page number
  670. @rtype: int
  671. """
  672. return int(self._pageno)
  673. @pageno.setter
  674. def pageno(self, i):
  675. """
  676. @param i: Page number
  677. @type i: int
  678. """
  679. self._pageno = str(i)
  680. @property
  681. def timeRange(self):
  682. """
  683. @return: Search time range ('', 'day', 'week', 'month' or 'year')
  684. @rtype: str
  685. """
  686. return self._timeRange
  687. @timeRange.setter
  688. def timeRange(self, value):
  689. """
  690. @param value: Key from SearX.Periods
  691. @type value: str
  692. """
  693. self._timeRange = value
  694. @property
  695. def safeSearch(self):
  696. """
  697. @return: Whether safe search is enabled or not.
  698. @rtype: bool
  699. """
  700. return self._safesearch
  701. @safeSearch.setter
  702. def safeSearch(self, state):
  703. """
  704. @param state: Enable/disable safe search.
  705. @type state: bool
  706. """
  707. self._safesearch = state
  708. @property
  709. def parseHtml(self):
  710. """
  711. @return: Whether parsing HTML is enabled, this will not use the
  712. JSON API when it returns True.
  713. @rtype: bool
  714. """
  715. return self._parseHtml
  716. @parseHtml.setter
  717. def parseHtml(self, state):
  718. """
  719. @param value: Enable/disable parsing HTML instead of using the JSON API
  720. @type value: bool
  721. """
  722. self._parseHtml = state
  723. @property
  724. def requestKwargs(self):
  725. """ Returns current data that will be send with the POST
  726. request used for the search operation. The search query,
  727. language, page-number and enabled categories/engines.
  728. @rtype: dict
  729. """
  730. data = {
  731. "q": self.query,
  732. "safesearch": "1" if self.safeSearch else "0"
  733. }
  734. # Choose what resource to use (JSON API or HTML parser)
  735. if self.parseHtml:
  736. data.update({"theme": "simple"})
  737. else:
  738. data.update({"format": "json"})
  739. # After testing found that searx will honor only engines when
  740. # both engines and categories are set.
  741. if self.engines:
  742. data.update(self.engines.dict())
  743. elif self.categories:
  744. data.update(self.categories.dict())
  745. if self.lang:
  746. data.update({"language": self.lang})
  747. if self.pageno:
  748. data.update({"pageno": self.pageno})
  749. if self.timeRange:
  750. data.update({"time_range": self.timeRange})
  751. return data
  752. def reset(self):
  753. self.url = ""
  754. self.timeRange = ""
  755. self.lang = ""
  756. self.pageno = 1
  757. self.categories.reset()
  758. self.engines.clear()
  759. self.query = ""
  760. def searchFinishedCb(self, response):
  761. pass # TODO reimplement
  762. def search(self):
  763. """ Preform search operation with current set values.
  764. @returns: The result of this search.
  765. @rtype: SearchResult
  766. """
  767. rtype = SearchResult
  768. if self.parseHtml:
  769. rtype = SearchResult2
  770. request = HttpRequest(urllib.parse.urljoin(self.url, "/search"),
  771. self._httpSettings.newRequestSettings(),
  772. self.requestKwargs)
  773. response = rtype(request, self.handleLinkToken)
  774. self._httpThread.get(response)
  775. def _linkTokenReponse(self, response):
  776. # Failed to get dummy css
  777. if response.error != ErrorType.Success:
  778. self.searchFinishedCb(response) # TODO HttpReponse is returned here
  779. return
  780. # Redo original request
  781. request = HttpRequest(urllib.parse.urljoin(self.url, "/search"),
  782. response.request.settings,
  783. data=self.requestKwargs)
  784. response = SearchResult2(request, self.searchFinishedCb)
  785. self._httpThread.get(response)
  786. def handleLinkToken(self, response):
  787. """! Searx-Qt is not a bot
  788. @see https://github.com/searxng/searxng/blob/master/searx/botdetection/link_token.py
  789. @note variables in https://searx.instance/config:
  790. - bool ["limiter"]["botdetection.ip_limit.link_token"]
  791. - bool ["limiter"]["botdetection.ip_limit.pass_searxng_org"]
  792. """
  793. # Not relevant
  794. if response.error != ErrorType.NoResults or not self.parseHtml:
  795. self.searchFinishedCb(response)
  796. return
  797. # No linktoken found
  798. if response.linktoken is None:
  799. self.searchFinishedCb(response)
  800. return
  801. # Request the dummy css
  802. request = HttpRequest(response.linktoken,
  803. response.request.settings)
  804. response = HttpReponse(request, self._linkTokenReponse)
  805. self._httpThread.get(response)
  806. class SearxConfigHandler(HandlerProto):
  807. def __init__(self, httpThread, httpSettings):
  808. HandlerProto.__init__(self, httpThread, httpSettings)
  809. def updateInstanceFinished(self, response):
  810. pass # Reimplement this TODO
  811. def _requestFinished(self, response):
  812. if response.error != ErrorType.Success:
  813. self.updateInstanceFinished(response)
  814. return
  815. url = response.request.url
  816. instance = self.instances[url]
  817. j = response.json()
  818. """ Update instance version
  819. """
  820. instance.update({
  821. "version": j.get("version", "")
  822. })
  823. """ Update instance network_type to use our own network type
  824. definitions as class NetworkTypes (core/handler.py)
  825. """
  826. instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
  827. """ Update Engines
  828. What we get:
  829. "engines": [
  830. categories (list, str)
  831. enabled (bool)
  832. language_support (bool)
  833. name (str)
  834. paging (bool)
  835. safesearch (bool)
  836. shortcut (str)
  837. supported_languages (list, str)
  838. time_range_support (bool)
  839. timeout (float)
  840. ]
  841. What instanceModel wants
  842. "engines" : {
  843. "not evil": {
  844. "error_rate": 15,
  845. "errors": [
  846. 0
  847. ]
  848. }
  849. }
  850. What enginesModel wants
  851. "engines": {
  852. "1337x": {
  853. "categories": [
  854. "videos"
  855. ],
  856. "language_support": true,
  857. "paging": true,
  858. "safesearch": false,
  859. "shortcut": "1337x",
  860. "time_range_support": false
  861. },
  862. """
  863. newInstanceEngines = {}
  864. newEnginesEngines = {}
  865. for engine in j.get('engines', []):
  866. name = engine.get('name', "")
  867. if not name:
  868. continue
  869. newInstanceEngines.update({
  870. name: {}
  871. })
  872. if name not in self.engines:
  873. newEnginesEngines.update({
  874. name: {
  875. "categories": list(engine.get('categories', [])),
  876. "language_support": engine.get(
  877. 'language_support',
  878. False
  879. ),
  880. "paging": engine.get('paging', False),
  881. "safesearch": engine.get('safesearch', False),
  882. "shortcut": engine.get('shortcut', ""),
  883. "time_range_support": engine.get(
  884. 'time_range_support',
  885. False
  886. )
  887. }
  888. })
  889. instance.update({
  890. "engines": dict(newInstanceEngines)
  891. })
  892. self.engines.update(newEnginesEngines)
  893. """ Update instance lastUpdated
  894. """
  895. instance.update({
  896. "lastUpdated": time.time()
  897. })
  898. self.updateInstanceFinished(response)
  899. def updateInstance(self, url):
  900. newUrl = urllib.parse.urljoin(url, "/config")
  901. request = HttpRequest(newUrl,
  902. self.httpSettings.newRequestSettings())
  903. response = SearxConfigResult(request, self.searchFinished)
  904. self._httpThread.get(response)
  905. def addInstance(self, url):
  906. if url not in self.instances:
  907. self._instances[url] = {}
  908. return True
  909. return False
  910. def removeInstance(self, url):
  911. """
  912. @param url: url of the instance to remove.
  913. @type url: str
  914. """
  915. del self._instances[url]
  916. def removeMultiInstances(self, urls):
  917. """ Remove instance(s) by url without emitting changed for every
  918. instance that got removed.
  919. @param urls: list with urls of instances to remove.
  920. @type urls: list
  921. """
  922. for url in urls:
  923. del self._instances[url]