1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063 |
- ########################################################################
- # Searx-Qt - Lightweight desktop application for Searx.
- # Copyright (C) 2020-2022 CYBERDEViL
- #
- # This file is part of Searx-Qt.
- #
- # Searx-Qt is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Searx-Qt is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- ########################################################################
- import time
- import urllib.parse
- from bs4 import BeautifulSoup
- from searxqt.core.schema import Schemas
- from searxqt.core.http import HttpRequest, HttpReponse, HttpJsonReponse, ErrorType
- from searxqt.core.handler import HandlerProto, NetworkTypes
- from searxqt.utils.string import parseFilesize
- from searxqt.translations import _
- ## API result (format=json)
- class SearchResult(HttpJsonReponse):
- Schema = Schemas['searxng_query']
- def verifyFurther(self):
- # One of the following keys has to be not empty, else we count it as
- # no (usable) result.
- validKeys = [
- 'results',
- 'answers',
- 'corrections',
- 'infoboxes',
- 'suggestions'
- ]
- if self.error == ErrorType.Success:
- data = self.json()
- valid = False
- for key in validKeys:
- if len(data.get(key, [])):
- valid = True
- break
- if not valid:
- self.setError(ErrorType.NoResults,
- f"NoResults: got: `{self.json()}`")
- def verifyContent(self, httpThread):
- HttpJsonReponse.verifyContent(self, httpThread)
- self.verifyFurther()
- ## HTML result that will be parsed into JSON
- class SearchResult2(SearchResult):
- Schema = Schemas['searxng_query']
- def __init__(self, response, callback):
- ## @see https://github.com/searxng/searxng/blob/master/searx/botdetection/link_token.py
- self._linktoken = None
- SearchResult.__init__(self, response, callback)
- @property
- def linktoken(self):
- return self._linktoken
- def makeUrlAbsolute(self, url):
- """! Returns a absolute URL. It will add the SearXNG instance its
- schema and location in front when they are missing."""
- parsedUrl = urllib.parse.urlparse(url)
- instanceUrl = urllib.parse.urlparse(self.request.url)
- if not parsedUrl.netloc:
- url = f"{instanceUrl.netloc}{url}"
- if not parsedUrl.scheme:
- url = f"{instanceUrl.scheme}://{url}"
- return url
- def verifyContent(self, httpThread):
- HttpReponse.verifyContent(self, httpThread)
- if self.error != ErrorType.Success:
- return
- self._json = self.parseHtml()
- self.verifySchema()
- if self.error != ErrorType.Success:
- return
- self.verifyFurther()
- # First request and need to request the dummy css first..
- if self.error == ErrorType.InvalidSchema and self.linktoken:
- self.setError(ErrorType.NoResults, "")
- def parseHtml(self):
- if self.error != ErrorType.Success:
- return {}
- jsonResult = {
- 'results': [],
- 'answers': [],
- 'corrections': [],
- 'infoboxes': [],
- 'suggestions': [],
- 'unresponsive_engines': []
- }
- soup = BeautifulSoup(self.content, "html.parser")
- # Find css bot detection file
- # <link rel="stylesheet" href="/client8uw9qw2jc3yhiq2c.css" type="text/css">
- for link in soup.find_all("link", {"rel": "stylesheet"}, href=True):
- href = link.get("href")
- if href.startswith("/client"):
- self._linktoken = self.makeUrlAbsolute(href)
- break
- #######################################################################
- ## 'results' key
- ##########################################################################
- for result in soup.find_all("article", {"class": "result"}):
- """
- <article class="result result-default category-general qwant duckduckgo google">
- <a href="https://linuxize.com/post/curl-post-request/" class="url_wrapper" rel="noreferrer">
- <span class="url_o1">
- <span class="url_i1">https://linuxize.com</span>
- </span>
- <span class="url_o2">
- <span class="url_i2"> › post › curl-post-request</span>
- </span>
- </a>
- <h3>
- <a href="https://linuxize.com/post/curl-post-request/" rel="noreferrer">
- How to make a <span class="highlight">POST</span>
- <span class="highlight">request</span>
- with <span class="highlight">curl</span>
- </a>
- </h3>
- <p class="content">
- Learn how to use <span class="highlight">curl</span>, a command-line utility for transferring data from or to a remote server, to make <span class="highlight">POST</span> requests. See examples of sending data, files, and JSON data with <span class="highlight">curl</span> options and options.
- </p>
- <div class="engines">
- <span>qwant</span>
- <span>duckduckgo</span>
- <span>google</span>
- <a href="https://web.archive.org/web/https://linuxize.com/post/curl-post-request/" class="cache_link" rel="noreferrer">
- <svg SVG_STUFF .../></svg>
- cached
- </a>
- ‎
- </div>
- <div class="break"></div>
- </article>
- """
- """
- <article class="result result-torrent category-files solidtorrents">
- <a href="https://solidtorrents.to/torrents/STUFF .../" class="url_wrapper" rel="noreferrer">
- <span class="url_o1">
- <span class="url_i1">https://solidtorrents.to</span>
- </span>
- <span class="url_o2">
- <span class="url_i2"> › torrents › SOME_NAME › SOME_HASH</span>
- </span>
- </a>
- <h3>
- <a href="https://solidtorrents.to/torrents/SOME_NAME/SOME_HASH/" rel="noreferrer">
- <span class="highlight">SOME</span>-<span class="highlight">NAME</span>
- </a>
- </h3>
- <time class="published_date" datetime="2018-10-20 00:00:00" >Oct 20, 2018</time>
- <div class="highlight">Other/Archive</div>
- <p class="altlink">
- •
- <a href="magnet:MAGNET_LINK ..." class="magnetlink" rel="noreferrer"><svg SVG_STUFF .../></svg>magnet link</a>
- </p>
- <p class="altlink">
- •
- <a href="https://itorrents.org/torrent/TORRENT_LINK ..." class="torrentfile" rel="noreferrer">torrent file</a>
- </p>
- <p class="stat">
- • Seeder
- <span class="badge">407</span>
- • Leecher
- <span class="badge">748</span>
- </p>
- <p class="stat"> Filesize
- <span class="badge">2.88 GiB</span>
- </p>
- <div class="engines">
- <span>solidtorrents</span>
- <a href="https://web.archive.org/web/https://solidtorrents.to/torrents/TORRENT_STUFF ..." class="cache_link" rel="noreferrer"><svg SVG_STUFF .../></svg>cached</a>
- ‎
- </div>
- <div class="break"></div>
- </article>
- """
- title = ''
- url = ''
- content = ''
- engines = []
- publishedDate = ''
- magnetlink = ''
- torrentfile = ''
- filesize = 0
- files = 0 # TODO unused for now
- seed = None
- leech = None
- # !! GET Title
- try:
- title = result.h3.a.get_text().lstrip().rstrip()
- except AttributeError:
- print("Failed to get title")
- # !! GET URL
- try:
- url = result.h3.a.get("href")
- except AttributeError:
- print("Failed to get url")
- # !! GET Content
- felem = result.find("p", {"class": "content"})
- if felem:
- content = felem.get_text().lstrip().rstrip()
- # !! GET Engines
- felem = result.find("div", {"class": "engines"})
- if felem:
- for engine in felem.find_all("span"):
- engines.append(engine.get_text().rstrip().lstrip())
- ## !! Get publishDate
- felem = result.find("time", {"class": "published_date"})
- if felem:
- publishedDate = felem.get("datetime", "")
- ## !! Get magnetlink
- felem = result.find("a", {"class": "magnetlink"})
- if felem:
- magnetlink = felem.get('href')
- ## !! Get torrentfile
- felem = result.find("a", {"class": "torrentfile"})
- if felem:
- torrentfile = felem.get('href')
- ## !! Get filesize
- for felem in result.find_all("span", {"class": "badge"}):
- if felem.previousSibling:
- precedingText = felem.previousSibling
- if "Filesize" in precedingText:
- filesize = parseFilesize(felem.get_text().rstrip().lstrip())
- elif "Seeder" in precedingText:
- seed = felem.get_text()
- elif "Leecher" in precedingText:
- leech = felem.get_text()
- # !! Add result
- resultData = {
- 'title': title,
- 'url': url,
- 'content': content,
- 'engines': [engine for engine in engines],
- # Optional
- 'publishedDate': publishedDate,
- # File attributes
- 'magnetlink': magnetlink,
- 'torrentfile': torrentfile,
- 'filesize': filesize,
- 'files': files,
- 'img_format': '' # TODO
- }
- if seed is not None:
- resultData.update({'seed': seed})
- if leech is not None:
- resultData.update({'leech': leech})
- jsonResult['results'].append(resultData)
- ##########################################################################
- ## 'suggestions' key
- ##########################################################################
- """
- <div id="sidebar">
- <div id="suggestions" role="complementary" aria-labelledby="suggestions-title">
- <details class="sidebar-collapsable">
- <summary class="title" id="suggestions-title">Suggestions</summary>
- <div class="wrapper">
- <form method="POST" action="/search">
- <input type="hidden" name="q" value="curl post request json">
- <input type="hidden" name="category_general" value="1">
- <input type="hidden" name="language" value="auto">
- <input type="hidden" name="time_range" value="">
- <input type="hidden" name="safesearch" value="0">
- <input type="hidden" name="theme" value="simple">
- <input type="submit" class="suggestion" role="link" value="• curl post request json">
- """
- felem = soup.find("div", {"id": "suggestions"})
- if felem:
- for suggestion in felem.find_all("input", {"name": "q"}):
- jsonResult['suggestions'].append(suggestion.get("value"))
- ##########################################################################
- ## 'answers' key
- ##########################################################################
- """
- <h4 class="title" id="answers-title">Answers : </h4>
- <div class="answer">
- <span>LONG TEXT ...</span>
- <a href="some url ..." class="answer-url">url text ...</a>
- </div>
- """
- for answer in soup.find_all("div", {"class": "answer"}):
- felem = answer.find("span")
- if felem:
- jsonResult['answers'].append(felem.get_text())
- ##########################################################################
- ## 'corrections' key
- ##########################################################################
- """ TODO """
- ##########################################################################
- ## 'infoboxes' key
- ##########################################################################
- """
- <details open="" class="sidebar-collapsable">
- <summary class="title">Info</summary>
- <aside class="infobox" aria-label="Banana">
- <h2 class="title"><bdi>Banana</bdi></h2>
- <img src="/image_proxy?url=long_image_url" title="Banana" alt="Banana">
- <p><bdi>LOGNG TEXT HERE ...</bdi></p>
- <div class="urls">
- <ul>
- <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Banana" rel="noreferrer">Wikipedia</a></bdi></li>
- <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q503" rel="noreferrer">Wikidata</a></bdi></li>
- </ul>
- </div>
- </aside>
- </details>
- """
- """
- <details open="" class="sidebar-collapsable">
- <summary class="title">Info</summary>
- <aside class="infobox" aria-label="Water">
- <h2 class="title"><bdi>Water</bdi></h2>
- <img src="/image_proxy?url=long url .." title="Water" alt="Water">
- <p><bdi>LONG TEXT ...</bdi></p>
- <div class="attributes">
- <dl>
- <dt><bdi>Chemical formula :</bdi></dt>
- <dd><bdi>H₂O</bdi></dd>
- </dl>
- </div>
- <div class="urls">
- <ul>
- <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Water" rel="noreferrer">Wikipedia</a></bdi></li>
- <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q283" rel="noreferrer">Wikidata</a></bdi></li>
- </ul>
- </div>
- </aside>
- </details>
- """
- """
- infoboxes = []
- ibox = {
- 'infobox': 'str',
- 'id': 'uri',
- 'content': 'str',
- 'img_src': 'uri' | null
- 'urls': [
- {
- 'title': 'str',
- 'url': 'uri',
- 'entity': 'str',
- 'official': true
- }
- ],
- 'attributes': [
- {
- 'label': 'str',
- 'value': 'str',
- 'entity': 'str'
- }
- ],
- 'engines': ['str'],
- 'engine': 'str'
- }
- """
- for infobox in soup.find_all("aside", {"class": "infobox"}):
- title = ""
- id = ""
- content = ""
- img_src = ""
- urls = []
- attributes = []
- engines = []
- # Title
- felem = infobox.find("h2", {"class": "title"})
- if felem:
- title = felem.get_text().rstrip().lstrip()
- # ID
- # TODO
- # Content
- felem = infobox.find("p")
- if felem:
- felem = felem.find("bdi")
- if felem:
- content = felem.get_text().rstrip().lstrip()
- # Image
- felem = infobox.find("img")
- if felem:
- img_src = felem.get("src")
- # URLs
- for felem in infobox.find_all("li", {"class": "url"}):
- felem = felem.find("a")
- if felem:
- urls.append({
- 'title': felem.get_text().lstrip().rstrip(),
- 'url': felem.get("href", ""),
- 'entity': '', # TODO
- 'official': False # TODO
- })
- # Attributes
- """
- <div class="attributes">
- <dl>
- <dt><bdi>Chemical formula :</bdi></dt>
- <dd><bdi>H₂O</bdi></dd>
- </dl>
- </div>
- """
- felem = infobox.find("div", {"class": "attributes"})
- if felem:
- for item in felem.find_all("dl"):
- label = ""
- value = ""
- entity = "" # TODO
- try:
- label = item.dt.bdi.get_text().rstrip().lstrip()
- value = item.dd.bdi.get_text().rstrip().lstrip()
- except AttributeError:
- continue
- attributes.append({
- "label": label,
- "value": value,
- "entity": entity
- })
- # Engines
- for url in urls:
- engines.append(url['title'].lower())
- jsonResult['infoboxes'].append({
- "infobox": title,
- "id": id,
- "content": content,
- "img_src": img_src,
- "urls": urls,
- "attributes": attributes,
- "engines": engines
- })
- ##########################################################################
- ## 'unresponsive_engines' key
- ##########################################################################
- """
- <div id="engines_msg">
- <details class="sidebar-collapsable" open="">
- <summary class="title" id="engines_msg-title">Messages from the search engines</summary>
- <div class="dialog-error" role="alert">
- <svg class="ion-icon-big" etc..></svg>
- <div>
- <p>
- <strong>Error!</strong>
- Engines cannot retrieve results:
- </p>
- <p>
- brave (<a href="/stats?engine=brave" title="View error logs and submit a bug report">Suspended: too many requests</a>)
- </p>
- <p>
- qwant (<a href="/stats?engine=qwant" title="View error logs and submit a bug report">Suspended: too many requests</a>)
- </p>
- </div>
- </div>
- </details>
- </div>
- """
- felem = soup.find("div", {"id": "engines_msg"})
- if felem:
- for errDialog in felem.find_all("div", {"class": "dialog-error"}):
- for p in errDialog.find_all("p"):
- a = p.find("a")
- if not a:
- continue
- engine, msg = p.get_text().split(" ", 1)
- jsonResult['unresponsive_engines'].append([engine, msg])
- return jsonResult
- class SearxConfigResult(HttpJsonReponse):
- Schema = Schemas['searxng_config']
- class Categories:
- types = {
- 'general': (_('General'), 'category_general'),
- 'files': (_('Files'), 'category_files'),
- 'images': (_('Images'), 'category_images'),
- 'videos': (_('Videos'), 'category_videos'),
- 'it': (_('IT'), 'category_it'),
- 'map': (_('Location'), 'category_map'),
- 'music': (_('Music'), 'category_music'),
- 'news': (_('News'), 'category_news'),
- 'science': (_('Science'), 'category_science'),
- 'social media': (_('Social'), 'category_social media'),
- 'onions': (_('Onions'), 'category_onions'),
- 'shopping': (_('Shopping'), 'category_shopping')
- }
- def __init__(self):
- self._options = {}
- self.__makeOptions()
- def __makeOptions(self):
- self._options.clear()
- for key, t in self.types.items():
- self._options.update({key: False})
- def reset(self):
- self.__makeOptions()
- def get(self, key):
- return self._options[key]
- def set(self, key, state):
- """
- @param key: One of the keys in Categories.types
- @type key: str
- @param state: Enabled / disabled state
- @type state: bool
- """
- self._options[key] = state
- def dict(self):
- newDict = {}
- for key, state in self._options.items():
- if state:
- newDict.update({self.types[key][1]: 'on'})
- return newDict
- def enabledKeys(self):
- """ Returns a list with enabled engine strings (key from
- Categories.types)
- """
- return [key for key, state in self._options.items() if state]
- class Engines(list):
- def __init__(self):
- list.__init__(self)
- def dict(self):
- if not self:
- return {}
- return {
- 'engines': ",".join(self)
- }
- class SearX:
- Periods = {
- '': _('Anytime'),
- 'day': _('Last day'),
- 'week': _('Last week'),
- 'month': _('Last month'),
- 'year': _('Last year')
- }
- # https://github.com/asciimoo/searx/blob/master/searx/languages.py
- Languages = {
- '': _('No language'),
- 'all': _('Default language'),
- 'af-NA': 'Afrikaans - af-NA',
- 'ca-AD': 'Català - ca-AD',
- 'da-DK': 'Dansk - da-DK',
- 'de': 'Deutsch - de',
- 'de-AT': 'Deutsch (Österreich) - de-AT',
- 'de-CH': 'Deutsch (Schweiz) - de-CH',
- 'de-DE': 'Deutsch (Deutschland) - de-DE',
- 'et-EE': 'Eesti - et-EE',
- 'en': 'English - en',
- 'en-AU': 'English (Australia) - en-AU',
- 'en-CA': 'English (Canada) - en-CA',
- 'en-GB': 'English (United Kingdom) - en-GB',
- 'en-IE': 'English (Ireland) - en-IE',
- 'en-IN': 'English (India) - en-IN',
- 'en-NZ': 'English (New Zealand) - en-NZ',
- 'en-PH': 'English (Philippines) - en-PH',
- 'en-SG': 'English (Singapore) - en-SG',
- 'en-US': 'English (United States) - en-US',
- 'es': 'Español - es',
- 'es-AR': 'Español (Argentina) - es-AR',
- 'es-CL': 'Español (Chile) - es-CL',
- 'es-ES': 'Español (España) - es-ES',
- 'es-MX': 'Español (México) - es-MX',
- 'fr': 'Français - fr',
- 'fr-BE': 'Français (Belgique) - fr-BE',
- 'fr-CA': 'Français (Canada) - fr-CA',
- 'fr-CH': 'Français (Suisse) - fr-CH',
- 'fr-FR': 'Français (France) - fr-FR',
- 'hr-HR': 'Hrvatski - hr-HR',
- 'id-ID': 'Indonesia - id-ID',
- 'it-IT': 'Italiano - it-IT',
- 'sw-KE': 'Kiswahili - sw-KE',
- 'lv-LV': 'Latviešu - lv-LV',
- 'lt-LT': 'Lietuvių - lt-LT',
- 'hu-HU': 'Magyar - hu-HU',
- 'ms-MY': 'Melayu - ms-MY',
- 'nl': 'Nederlands - nl',
- 'nl-BE': 'Nederlands (België) - nl-BE',
- 'nl-NL': 'Nederlands (Nederland) - nl-NL',
- 'nb-NO': 'Norsk Bokmål - nb-NO',
- 'pl-PL': 'Polski - pl-PL',
- 'pt': 'Português - pt',
- 'pt-BR': 'Português (Brasil) - pt-BR',
- 'pt-PT': 'Português (Portugal) - pt-PT',
- 'ro-RO': 'Română - ro-RO',
- 'sk-SK': 'Slovenčina - sk-SK',
- 'sl-SI': 'Slovenščina - sl-SI',
- 'sr-RS': 'Srpski - sr-RS',
- 'fi-FI': 'Suomi - fi-FI',
- 'sv-SE': 'Svenska - sv-SE',
- 'vi-VN': 'Tiếng Việt - vi-VN',
- 'tr-TR': 'Türkçe - tr-TR',
- 'is-IS': 'Íslenska - is-IS',
- 'cs-CZ': 'Čeština - cs-CZ',
- 'el-GR': 'Ελληνικά - el-GR',
- 'be-BY': 'Беларуская - be-BY',
- 'bg-BG': 'Български - bg-BG',
- 'ru-RU': 'Русский - ru-RU',
- 'uk-UA': 'Українська - uk-UA',
- 'hy-AM': 'Հայերեն - hy-AM',
- 'he-IL': 'עברית - he-IL',
- 'ar-SA': 'العربية - ar-SA',
- 'fa-IR': 'فارسی - fa-IR',
- 'th-TH': 'ไทย - th-TH',
- 'zh': '中文 - zh',
- 'zh-CN': '中文 (中国) - zh-CN',
- 'zh-TW': '中文 (台灣) - zh-TW',
- 'ja-JP': '日本語 - ja-JP',
- 'ko-KR': '한국어 - ko-KR'
- }
- def __init__(self, httpThread, httpSettings):
- self._httpThread = httpThread
- self._httpSettings = httpSettings
- self._url = ""
- self._categories = Categories()
- self._engines = Engines()
- self._query = ""
- self._lang = ""
- self._pageno = "" # int formatted as string
- self._timeRange = "" # '', 'day', 'week', 'month' or 'year'
- self._safesearch = False
- self._parseHtml = True
- @property
- def categories(self): return self._categories
- @property
- def engines(self): return self._engines
- @property
- def url(self):
- """
- @return: Instance url
- @rtype: str
- """
- return self._url
- @url.setter
- def url(self, url):
- """
- @param url: Instance url
- @type url: str
- """
- self._url = url
- @property
- def query(self):
- """
- @return: Search query
- @rtype: str
- """
- return self._query
- @query.setter
- def query(self, q):
- """
- @param q: Search query
- @type q: str
- """
- self._query = q
- @property
- def lang(self):
- """
- @return: Language code
- @rtype: str
- """
- return self._lang
- @lang.setter
- def lang(self, lang):
- """
- @param lang: Language code
- @type lang: str
- """
- self._lang = lang
- @property
- def pageno(self):
- """
- @return: Page number
- @rtype: int
- """
- return int(self._pageno)
- @pageno.setter
- def pageno(self, i):
- """
- @param i: Page number
- @type i: int
- """
- self._pageno = str(i)
- @property
- def timeRange(self):
- """
- @return: Search time range ('', 'day', 'week', 'month' or 'year')
- @rtype: str
- """
- return self._timeRange
- @timeRange.setter
- def timeRange(self, value):
- """
- @param value: Key from SearX.Periods
- @type value: str
- """
- self._timeRange = value
- @property
- def safeSearch(self):
- """
- @return: Whether safe search is enabled or not.
- @rtype: bool
- """
- return self._safesearch
- @safeSearch.setter
- def safeSearch(self, state):
- """
- @param state: Enable/disable safe search.
- @type state: bool
- """
- self._safesearch = state
- @property
- def parseHtml(self):
- """
- @return: Whether parsing HTML is enabled, this will not use the
- JSON API when it returns True.
- @rtype: bool
- """
- return self._parseHtml
- @parseHtml.setter
- def parseHtml(self, state):
- """
- @param value: Enable/disable parsing HTML instead of using the JSON API
- @type value: bool
- """
- self._parseHtml = state
- @property
- def requestKwargs(self):
- """ Returns current data that will be send with the POST
- request used for the search operation. The search query,
- language, page-number and enabled categories/engines.
- @rtype: dict
- """
- data = {
- "q": self.query,
- "safesearch": "1" if self.safeSearch else "0"
- }
- # Choose what resource to use (JSON API or HTML parser)
- if self.parseHtml:
- data.update({"theme": "simple"})
- else:
- data.update({"format": "json"})
- # After testing found that searx will honor only engines when
- # both engines and categories are set.
- if self.engines:
- data.update(self.engines.dict())
- elif self.categories:
- data.update(self.categories.dict())
- if self.lang:
- data.update({"language": self.lang})
- if self.pageno:
- data.update({"pageno": self.pageno})
- if self.timeRange:
- data.update({"time_range": self.timeRange})
- return data
- def reset(self):
- self.url = ""
- self.timeRange = ""
- self.lang = ""
- self.pageno = 1
- self.categories.reset()
- self.engines.clear()
- self.query = ""
- def searchFinishedCb(self, response):
- pass # TODO reimplement
- def search(self):
- """ Preform search operation with current set values.
- @returns: The result of this search.
- @rtype: SearchResult
- """
- rtype = SearchResult
- if self.parseHtml:
- rtype = SearchResult2
- request = HttpRequest(urllib.parse.urljoin(self.url, "/search"),
- self._httpSettings.newRequestSettings(),
- self.requestKwargs)
- response = rtype(request, self.handleLinkToken)
- self._httpThread.get(response)
- def _linkTokenReponse(self, response):
- # Failed to get dummy css
- if response.error != ErrorType.Success:
- self.searchFinishedCb(response) # TODO HttpReponse is returned here
- return
- # Redo original request
- request = HttpRequest(urllib.parse.urljoin(self.url, "/search"),
- response.request.settings,
- data=self.requestKwargs)
- response = SearchResult2(request, self.searchFinishedCb)
- self._httpThread.get(response)
- def handleLinkToken(self, response):
- """! Searx-Qt is not a bot
- @see https://github.com/searxng/searxng/blob/master/searx/botdetection/link_token.py
- @note variables in https://searx.instance/config:
- - bool ["limiter"]["botdetection.ip_limit.link_token"]
- - bool ["limiter"]["botdetection.ip_limit.pass_searxng_org"]
- """
- # Not relevant
- if response.error != ErrorType.NoResults or not self.parseHtml:
- self.searchFinishedCb(response)
- return
- # No linktoken found
- if response.linktoken is None:
- self.searchFinishedCb(response)
- return
- # Request the dummy css
- request = HttpRequest(response.linktoken,
- response.request.settings)
- response = HttpReponse(request, self._linkTokenReponse)
- self._httpThread.get(response)
- class SearxConfigHandler(HandlerProto):
- def __init__(self, httpThread, httpSettings):
- HandlerProto.__init__(self, httpThread, httpSettings)
- def updateInstanceFinished(self, response):
- pass # Reimplement this TODO
- def _requestFinished(self, response):
- if response.error != ErrorType.Success:
- self.updateInstanceFinished(response)
- return
- url = response.request.url
- instance = self.instances[url]
- j = response.json()
- """ Update instance version
- """
- instance.update({
- "version": j.get("version", "")
- })
- """ Update instance network_type to use our own network type
- definitions as class NetworkTypes (core/handler.py)
- """
- instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
- """ Update Engines
- What we get:
- "engines": [
- categories (list, str)
- enabled (bool)
- language_support (bool)
- name (str)
- paging (bool)
- safesearch (bool)
- shortcut (str)
- supported_languages (list, str)
- time_range_support (bool)
- timeout (float)
- ]
- What instanceModel wants
- "engines" : {
- "not evil": {
- "error_rate": 15,
- "errors": [
- 0
- ]
- }
- }
- What enginesModel wants
- "engines": {
- "1337x": {
- "categories": [
- "videos"
- ],
- "language_support": true,
- "paging": true,
- "safesearch": false,
- "shortcut": "1337x",
- "time_range_support": false
- },
- """
- newInstanceEngines = {}
- newEnginesEngines = {}
- for engine in j.get('engines', []):
- name = engine.get('name', "")
- if not name:
- continue
- newInstanceEngines.update({
- name: {}
- })
- if name not in self.engines:
- newEnginesEngines.update({
- name: {
- "categories": list(engine.get('categories', [])),
- "language_support": engine.get(
- 'language_support',
- False
- ),
- "paging": engine.get('paging', False),
- "safesearch": engine.get('safesearch', False),
- "shortcut": engine.get('shortcut', ""),
- "time_range_support": engine.get(
- 'time_range_support',
- False
- )
- }
- })
- instance.update({
- "engines": dict(newInstanceEngines)
- })
- self.engines.update(newEnginesEngines)
- """ Update instance lastUpdated
- """
- instance.update({
- "lastUpdated": time.time()
- })
- self.updateInstanceFinished(response)
- def updateInstance(self, url):
- newUrl = urllib.parse.urljoin(url, "/config")
- request = HttpRequest(newUrl,
- self.httpSettings.newRequestSettings())
- response = SearxConfigResult(request, self.searchFinished)
- self._httpThread.get(response)
- def addInstance(self, url):
- if url not in self.instances:
- self._instances[url] = {}
- return True
- return False
- def removeInstance(self, url):
- """
- @param url: url of the instance to remove.
- @type url: str
- """
- del self._instances[url]
- def removeMultiInstances(self, urls):
- """ Remove instance(s) by url without emitting changed for every
- instance that got removed.
- @param urls: list with urls of instances to remove.
- @type urls: list
- """
- for url in urls:
- del self._instances[url]
|