1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138 |
- ########################################################################
- # Searx-Qt - Lightweight desktop application for Searx.
- # Copyright (C) 2020-2022 CYBERDEViL
- #
- # This file is part of Searx-Qt.
- #
- # Searx-Qt is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Searx-Qt is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- ########################################################################
- import time
- import urllib.parse
- from bs4 import BeautifulSoup
- from searxqt.core.requests import JsonResult, ErrorType, Schemas
- from searxqt.core.handler import HandlerProto, NetworkTypes
- from searxqt.utils.string import parseFilesize
- from searxqt.translations import _
- ## API result (format=json)
- class SearchResult(JsonResult):
- Schema = Schemas['searxng_query']
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- JsonResult.__init__(self, url, response, err=err, errType=errType)
- def verifyFurther(self):
- JsonResult.verifyFurther(self)
- # One of the following keys has to be not empty, else we count it as
- # no (usable) result.
- validKeys = [
- 'results',
- 'answers',
- 'corrections',
- 'infoboxes',
- 'suggestions'
- ]
- if self._errType == ErrorType.Success:
- data = self.json()
- valid = False
- for key in validKeys:
- if len(data.get(key, [])):
- valid = True
- break
- if not valid:
- self._errType = ErrorType.NoResults
- self._err = f"NoResults: got: `{self.json()}`"
- def fixUrlScheme(url):
- """! Adds 'https://' when the scheme is missing."""
- parsedUrl = urllib.parse.urlparse(url)
- if not parsedUrl.scheme:
- return f"https://{url}"
- return url
- ## HTML result that will be parsed into JSON
- class SearchResult2(SearchResult):
- Schema = Schemas['searxng_query']
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- self.__json = {}
- SearchResult.__init__(self, url, response, err=err, errType=errType)
- def verifyFurther(self):
- self.__json = self.parse()
- SearchResult.verifyFurther(self)
- def json(self):
- return self.__json
- def makeUrlAbsolute(self, url):
- """! Returns a absolute URL. It will add the SearXNG instance its
- schema and location in front when they are missing."""
- parsedUrl = urllib.parse.urlparse(url)
- instanceUrl = urllib.parse.urlparse(self.url())
- if not parsedUrl.netloc:
- url = f"{instanceUrl.netloc}{url}"
- if not parsedUrl.scheme:
- url = f"{instanceUrl.scheme}://{url}"
- return url
- def parseImagesResult(self, result):
- """! Parse image results from HTML."""
- """Example HTML:
- <article class="result result-images category-images">
- <a href="https://wallup.net/wp-content/uploads/2019/09/441567-landscapes-nature-wallpaper.jpg" rel="noreferrer">
- <img alt="landscapes, Nature, Wallpaper Wallpapers HD / Desktop and Mobile ..." class="image_thumbnail" height="200" loading="lazy" rel="noreferrer" src="/image_proxy?url=https%3A%2F%2Fs2.qwant.com%2Fthumbr%2F474x315%2Ff%2F1%2F5fe20d297b0af77d40641a1c2d1a0a430b235e0f98e1584d580cf7931b28f9%2Fth.jpg%3Fu%3Dhttps%253A%252F%252Ftse.mm.bing.net%252Fth%253Fid%253DOIP.bLDwvUIZXCd5HCilSOxKCAHaE7%2526pid%253DApi%26q%3D0%26b%3D1%26p%3D0%26a%3D0&h=HASH ..." width="200"/>
- <span class="title">
- landscapes, Nature, Wallpaper Wallpapers HD / Desktop and Mobile ...
- </span>
- <span class="source">wallup.net</span>
- </a>
- <div class="detail">
- <a class="result-detail-close" href="#">
- <svg SVG_STUFF ...></svg>
- </a>
- <a class="result-detail-previous" href="#">
- <svg SVG_STUFF ...></svg>
- </a>
- <a class="result-images-source" href="https://wallup.net/wp-content/uploads/2019/09/441567-landscapes-nature-wallpaper.jpg" rel="noreferrer">
- <img alt="landscapes, Nature, Wallpaper Wallpapers HD / Desktop and Mobile ..." data-src="/image_proxy?url=https%3A%2F%2Fwallup.net%2Fwp-content%2Fuploads%2F2019%2F09%2F441567-landscapes-nature-wallpaper.jpg&h=HASH ..." src=""/>
- </a>
- <div class="result-images-labels">
- <h4>landscapes, Nature, Wallpaper Wallpapers HD / Desktop and Mobile ...</h4>
- <p class="result-content"> </p>
- <hr/>
- <p class="result-author"> </p>
- <p class="result-format"> </p>
- <p class="result-source"> </p>
- <p class="result-engine">
- <span>Engine:</span>qwant images
- </p>
- <p class="result-url">
- <span>View source:</span>
- <a href="https://wallup.net/landscapes-nature-wallpaper-69/" rel="noreferrer">https://wallup.net/landscapes-nature-wallpaper-69/</a>
- </p>
- </div>
- </div>
- </article>
- """
- title = '' # image title
- url = '' # url to the website of the image
- content = '' # probably same as the title
- engines = [] # see img_src
- #publishedDate = ''
- img_format = '' # size/format of the image in string format
- img_src = '' # source if the image (engine)
- thumbnail_src = '' # url to thumbnail
- source = '' # where does the image come from?
- category = 'images'
- # !! GET Title
- try:
- title = result.a.img.get('alt')
- except AttributeError:
- log.debug("Failed to get img title", self)
- # !! GET thumbnail_src
- try:
- thumbnail_src = result.a.img.get('src')
- except AttributeError:
- log.debug("Failed to get img thumbnail url", self)
- # Make sure the thumbnail url is absolute
- thumbnail_src = self.makeUrlAbsolute(thumbnail_src)
- # !! GET url
- felem = result.find("p", {"class": "result-url"})
- if felem:
- try:
- url = felem.a.get('href')
- except AttributeError:
- log.debug("Failed to get img url (1)", self)
- else:
- log.debug("Failed to get img url (2)", self)
- # !! GET img_src
- felem = result.find("a", {"class": "result-images-source"})
- if felem:
- img_src = felem.get('href')
- img_src = fixUrlScheme(img_src) # Make sure it has a scheme
- else:
- log.debug("Failed to get img_src", self)
- ## !! GET content
- # p class=result-content
- felem = result.find("p", {"class": "result-content"})
- if felem:
- content = felem.get_text()
- else:
- log.debug("Failed to get img content", self)
- # !! GET img_format
- # p class=result-format
- felem = result.find("p", {"class": "result-format"})
- if felem:
- img_format = felem.get_text()
- else:
- log.debug("Failed to get img format", self)
- # !! GET source
- felem = result.find("span", {"class": "source"})
- if felem:
- source = felem.get_text()
- else:
- log.debug("Failed to get img source", self)
- # !! GET engines
- #<p class="result-engine">
- felem = result.find("p", {"class": "result-engine"})
- if felem:
- for engine in felem.find_all("span"):
- engines.append(engine.nextSibling.get_text().replace(' ', '-'))
- else:
- log.debug("Failed to get img source", self)
- return {
- 'title': title,
- 'url': url,
- 'content': content,
- 'engines': [engine for engine in engines],
- 'img_format': img_format,
- 'img_src': img_src,
- 'thumbnail_src': thumbnail_src,
- 'source': source,
- 'category': category
- }
- def parse(self):
- if self.errorType() != ErrorType.Success:
- return {}
- jsonResult = {
- 'results': [],
- 'answers': [],
- 'corrections': [],
- 'infoboxes': [],
- 'suggestions': [],
- 'unresponsive_engines': []
- }
- soup = BeautifulSoup(self.content(), "html.parser")
- #######################################################################
- ## 'results' key
- #######################################################################
- def _getResults():
- # Because the element may be a 'article' or 'div', also depending
- # on the category of the result.
- for result in soup.find_all("article", {"class": "result"}):
- yield result
- for result in soup.find_all("div", {"class": "result"}):
- yield result
- for result in _getResults():
- # Image results
- if "result-images" in result.get("class"):
- jsonResult['results'].append(
- self.parseImagesResult(result)
- )
- continue
- # Normal search results
- """
- <article class="result result-default category-general qwant duckduckgo google">
- <a href="https://linuxize.com/post/curl-post-request/" class="url_wrapper" rel="noreferrer">
- <span class="url_o1">
- <span class="url_i1">https://linuxize.com</span>
- </span>
- <span class="url_o2">
- <span class="url_i2"> › post › curl-post-request</span>
- </span>
- </a>
- <h3>
- <a href="https://linuxize.com/post/curl-post-request/" rel="noreferrer">
- How to make a <span class="highlight">POST</span>
- <span class="highlight">request</span>
- with <span class="highlight">curl</span>
- </a>
- </h3>
- <p class="content">
- Learn how to use <span class="highlight">curl</span>, a command-line utility for transferring data from or to a remote server, to make <span class="highlight">POST</span> requests. See examples of sending data, files, and JSON data with <span class="highlight">curl</span> options and options.
- </p>
- <div class="engines">
- <span>qwant</span>
- <span>duckduckgo</span>
- <span>google</span>
- <a href="https://web.archive.org/web/https://linuxize.com/post/curl-post-request/" class="cache_link" rel="noreferrer">
- <svg SVG_STUFF .../></svg>
- cached
- </a>
- ‎
- </div>
- <div class="break"></div>
- </article>
- """
- """
- <article class="result result-torrent category-files solidtorrents">
- <a href="https://solidtorrents.to/torrents/STUFF .../" class="url_wrapper" rel="noreferrer">
- <span class="url_o1">
- <span class="url_i1">https://solidtorrents.to</span>
- </span>
- <span class="url_o2">
- <span class="url_i2"> › torrents › SOME_NAME › SOME_HASH</span>
- </span>
- </a>
- <h3>
- <a href="https://solidtorrents.to/torrents/SOME_NAME/SOME_HASH/" rel="noreferrer">
- <span class="highlight">SOME</span>-<span class="highlight">NAME</span>
- </a>
- </h3>
- <time class="published_date" datetime="2018-10-20 00:00:00" >Oct 20, 2018</time>
- <div class="highlight">Other/Archive</div>
- <p class="altlink">
- •
- <a href="magnet:MAGNET_LINK ..." class="magnetlink" rel="noreferrer"><svg SVG_STUFF .../></svg>magnet link</a>
- </p>
- <p class="altlink">
- •
- <a href="https://itorrents.org/torrent/TORRENT_LINK ..." class="torrentfile" rel="noreferrer">torrent file</a>
- </p>
- <p class="stat">
- • Seeder
- <span class="badge">407</span>
- • Leecher
- <span class="badge">748</span>
- </p>
- <p class="stat"> Filesize
- <span class="badge">2.88 GiB</span>
- </p>
- <div class="engines">
- <span>solidtorrents</span>
- <a href="https://web.archive.org/web/https://solidtorrents.to/torrents/TORRENT_STUFF ..." class="cache_link" rel="noreferrer"><svg SVG_STUFF .../></svg>cached</a>
- ‎
- </div>
- <div class="break"></div>
- </article>
- """
- title = ''
- url = ''
- content = ''
- engines = []
- publishedDate = ''
- magnetlink = ''
- torrentfile = ''
- filesize = 0
- files = 0 # TODO unused for now
- seed = None
- leech = None
- # !! GET Title
- try:
- title = result.h3.a.get_text().lstrip().rstrip()
- except AttributeError:
- print("Failed to get title")
- # !! GET URL
- try:
- url = result.h3.a.get("href")
- except AttributeError:
- print("Failed to get url")
- # !! GET Content
- felem = result.find("p", {"class": "content"})
- if felem:
- content = felem.get_text().lstrip().rstrip()
- # !! GET Engines
- felem = result.find("div", {"class": "engines"})
- if felem:
- for engine in felem.find_all("span"):
- engines.append(engine.get_text().rstrip().lstrip())
- ## !! Get publishDate
- felem = result.find("time", {"class": "published_date"})
- if felem:
- publishedDate = felem.get("datetime", "")
- ## !! Get magnetlink
- felem = result.find("a", {"class": "magnetlink"})
- if felem:
- magnetlink = felem.get('href')
- ## !! Get torrentfile
- felem = result.find("a", {"class": "torrentfile"})
- if felem:
- torrentfile = felem.get('href')
- ## !! Get filesize
- for felem in result.find_all("span", {"class": "badge"}):
- if felem.previousSibling:
- precedingText = felem.previousSibling
- if "Filesize" in precedingText:
- filesize = parseFilesize(felem.get_text().rstrip().lstrip())
- elif "Seeder" in precedingText:
- seed = felem.get_text()
- elif "Leecher" in precedingText:
- leech = felem.get_text()
- # !! Add result
- resultData = {
- 'title': title,
- 'url': url,
- 'content': content,
- 'engines': [engine for engine in engines],
- # Optional
- 'publishedDate': publishedDate,
- # File attributes
- 'magnetlink': magnetlink,
- 'torrentfile': torrentfile,
- 'filesize': filesize,
- 'files': files,
- 'img_format': '' # TODO
- }
- if seed is not None:
- resultData.update({'seed': seed})
- if leech is not None:
- resultData.update({'leech': leech})
- jsonResult['results'].append(resultData)
- ##########################################################################
- ## 'suggestions' key
- ##########################################################################
- """
- <div id="sidebar">
- <div id="suggestions" role="complementary" aria-labelledby="suggestions-title">
- <details class="sidebar-collapsable">
- <summary class="title" id="suggestions-title">Suggestions</summary>
- <div class="wrapper">
- <form method="POST" action="/search">
- <input type="hidden" name="q" value="curl post request json">
- <input type="hidden" name="category_general" value="1">
- <input type="hidden" name="language" value="auto">
- <input type="hidden" name="time_range" value="">
- <input type="hidden" name="safesearch" value="0">
- <input type="hidden" name="theme" value="simple">
- <input type="submit" class="suggestion" role="link" value="• curl post request json">
- """
- felem = soup.find("div", {"id": "suggestions"})
- if felem:
- for suggestion in felem.find_all("input", {"name": "q"}):
- jsonResult['suggestions'].append(suggestion.get("value"))
- ##########################################################################
- ## 'answers' key
- ##########################################################################
- """
- <h4 class="title" id="answers-title">Answers : </h4>
- <div class="answer">
- <span>LONG TEXT ...</span>
- <a href="some url ..." class="answer-url">url text ...</a>
- </div>
- """
- for answer in soup.find_all("div", {"class": "answer"}):
- felem = answer.find("span")
- if felem:
- jsonResult['answers'].append(felem.get_text())
- ##########################################################################
- ## 'corrections' key
- ##########################################################################
- """ TODO """
- ##########################################################################
- ## 'infoboxes' key
- ##########################################################################
- """
- <details open="" class="sidebar-collapsable">
- <summary class="title">Info</summary>
- <aside class="infobox" aria-label="Banana">
- <h2 class="title"><bdi>Banana</bdi></h2>
- <img src="/image_proxy?url=long_image_url" title="Banana" alt="Banana">
- <p><bdi>LOGNG TEXT HERE ...</bdi></p>
- <div class="urls">
- <ul>
- <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Banana" rel="noreferrer">Wikipedia</a></bdi></li>
- <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q503" rel="noreferrer">Wikidata</a></bdi></li>
- </ul>
- </div>
- </aside>
- </details>
- """
- """
- <details open="" class="sidebar-collapsable">
- <summary class="title">Info</summary>
- <aside class="infobox" aria-label="Water">
- <h2 class="title"><bdi>Water</bdi></h2>
- <img src="/image_proxy?url=long url .." title="Water" alt="Water">
- <p><bdi>LONG TEXT ...</bdi></p>
- <div class="attributes">
- <dl>
- <dt><bdi>Chemical formula :</bdi></dt>
- <dd><bdi>H₂O</bdi></dd>
- </dl>
- </div>
- <div class="urls">
- <ul>
- <li class="url"><bdi><a href="https://en.wikipedia.org/wiki/Water" rel="noreferrer">Wikipedia</a></bdi></li>
- <li class="url"><bdi><a href="http://www.wikidata.org/entity/Q283" rel="noreferrer">Wikidata</a></bdi></li>
- </ul>
- </div>
- </aside>
- </details>
- """
- """
- infoboxes = []
- ibox = {
- 'infobox': 'str',
- 'id': 'uri',
- 'content': 'str',
- 'img_src': 'uri' | null
- 'urls': [
- {
- 'title': 'str',
- 'url': 'uri',
- 'entity': 'str',
- 'official': true
- }
- ],
- 'attributes': [
- {
- 'label': 'str',
- 'value': 'str',
- 'entity': 'str'
- }
- ],
- 'engines': ['str'],
- 'engine': 'str'
- }
- """
- for infobox in soup.find_all("aside", {"class": "infobox"}):
- title = ""
- id = ""
- content = ""
- img_src = ""
- urls = []
- attributes = []
- engines = []
- # Title
- felem = infobox.find("h2", {"class": "title"})
- if felem:
- title = felem.get_text().rstrip().lstrip()
- # ID
- # TODO
- # Content
- felem = infobox.find("p")
- if felem:
- felem = felem.find("bdi")
- if felem:
- content = felem.get_text().rstrip().lstrip()
- # Image
- felem = infobox.find("img")
- if felem:
- img_src = self.makeUrlAbsolute(felem.get("src"))
- # URLs
- for felem in infobox.find_all("li", {"class": "url"}):
- felem = felem.find("a")
- if felem:
- urls.append({
- 'title': felem.get_text().lstrip().rstrip(),
- 'url': felem.get("href", ""),
- 'entity': '', # TODO
- 'official': False # TODO
- })
- # Attributes
- """
- <div class="attributes">
- <dl>
- <dt><bdi>Chemical formula :</bdi></dt>
- <dd><bdi>H₂O</bdi></dd>
- </dl>
- </div>
- """
- felem = infobox.find("div", {"class": "attributes"})
- if felem:
- for item in felem.find_all("dl"):
- label = ""
- value = ""
- entity = "" # TODO
- try:
- label = item.dt.bdi.get_text().rstrip().lstrip()
- value = item.dd.bdi.get_text().rstrip().lstrip()
- except AttributeError:
- continue
- attributes.append({
- "label": label,
- "value": value,
- "entity": entity
- })
- # Engines
- for url in urls:
- engines.append(url['title'].lower())
- jsonResult['infoboxes'].append({
- "infobox": title,
- "id": id,
- "content": content,
- "img_src": img_src,
- "urls": urls,
- "attributes": attributes,
- "engines": engines
- })
- ##########################################################################
- ## 'unresponsive_engines' key
- ##########################################################################
- """
- <div id="engines_msg">
- <details class="sidebar-collapsable" open="">
- <summary class="title" id="engines_msg-title">Messages from the search engines</summary>
- <div class="dialog-error" role="alert">
- <svg class="ion-icon-big" etc..></svg>
- <div>
- <p>
- <strong>Error!</strong>
- Engines cannot retrieve results:
- </p>
- <p>
- brave (<a href="/stats?engine=brave" title="View error logs and submit a bug report">Suspended: too many requests</a>)
- </p>
- <p>
- qwant (<a href="/stats?engine=qwant" title="View error logs and submit a bug report">Suspended: too many requests</a>)
- </p>
- </div>
- </div>
- </details>
- </div>
- """
- felem = soup.find("div", {"id": "engines_msg"})
- if felem:
- for errDialog in felem.find_all("div", {"class": "dialog-error"}):
- for p in errDialog.find_all("p"):
- a = p.find("a")
- if not a:
- continue
- engine, msg = p.get_text().split(" ", 1)
- jsonResult['unresponsive_engines'].append([engine, msg])
- return jsonResult
- class SearxConfigResult(JsonResult):
- Schema = Schemas['searxng_config']
- def __init__(self, url, response, err="", errType=ErrorType.Success):
- JsonResult.__init__(self, url, response, err=err, errType=errType)
- class Categories:
- types = {
- 'general': (_('General'), 'category_general'),
- 'files': (_('Files'), 'category_files'),
- 'images': (_('Images'), 'category_images'),
- 'videos': (_('Videos'), 'category_videos'),
- 'it': (_('IT'), 'category_it'),
- 'map': (_('Location'), 'category_map'),
- 'music': (_('Music'), 'category_music'),
- 'news': (_('News'), 'category_news'),
- 'science': (_('Science'), 'category_science'),
- 'social media': (_('Social'), 'category_social media'),
- 'onions': (_('Onions'), 'category_onions'),
- 'shopping': (_('Shopping'), 'category_shopping')
- }
- def __init__(self):
- self._options = {}
- self.__makeOptions()
- def __makeOptions(self):
- self._options.clear()
- for key, t in self.types.items():
- self._options.update({key: False})
- def reset(self):
- self.__makeOptions()
- def get(self, key):
- return self._options[key]
- def set(self, key, state):
- """
- @param key: One of the keys in Categories.types
- @type key: str
- @param state: Enabled / disabled state
- @type state: bool
- """
- self._options[key] = state
- def dict(self):
- newDict = {}
- for key, state in self._options.items():
- if state:
- newDict.update({self.types[key][1]: 'on'})
- return newDict
- def enabledKeys(self):
- """ Returns a list with enabled engine strings (key from
- Categories.types)
- """
- return [key for key, state in self._options.items() if state]
- class Engines(list):
- def __init__(self):
- list.__init__(self)
- def dict(self):
- if not self:
- return {}
- return {
- 'engines': ",".join(self)
- }
- class SearX:
- Periods = {
- '': _('Anytime'),
- 'day': _('Last day'),
- 'week': _('Last week'),
- 'month': _('Last month'),
- 'year': _('Last year')
- }
- # https://github.com/asciimoo/searx/blob/master/searx/languages.py
- Languages = {
- '': _('No language'),
- 'all': _('Default language'),
- 'af-NA': 'Afrikaans - af-NA',
- 'ca-AD': 'Català - ca-AD',
- 'da-DK': 'Dansk - da-DK',
- 'de': 'Deutsch - de',
- 'de-AT': 'Deutsch (Österreich) - de-AT',
- 'de-CH': 'Deutsch (Schweiz) - de-CH',
- 'de-DE': 'Deutsch (Deutschland) - de-DE',
- 'et-EE': 'Eesti - et-EE',
- 'en': 'English - en',
- 'en-AU': 'English (Australia) - en-AU',
- 'en-CA': 'English (Canada) - en-CA',
- 'en-GB': 'English (United Kingdom) - en-GB',
- 'en-IE': 'English (Ireland) - en-IE',
- 'en-IN': 'English (India) - en-IN',
- 'en-NZ': 'English (New Zealand) - en-NZ',
- 'en-PH': 'English (Philippines) - en-PH',
- 'en-SG': 'English (Singapore) - en-SG',
- 'en-US': 'English (United States) - en-US',
- 'es': 'Español - es',
- 'es-AR': 'Español (Argentina) - es-AR',
- 'es-CL': 'Español (Chile) - es-CL',
- 'es-ES': 'Español (España) - es-ES',
- 'es-MX': 'Español (México) - es-MX',
- 'fr': 'Français - fr',
- 'fr-BE': 'Français (Belgique) - fr-BE',
- 'fr-CA': 'Français (Canada) - fr-CA',
- 'fr-CH': 'Français (Suisse) - fr-CH',
- 'fr-FR': 'Français (France) - fr-FR',
- 'hr-HR': 'Hrvatski - hr-HR',
- 'id-ID': 'Indonesia - id-ID',
- 'it-IT': 'Italiano - it-IT',
- 'sw-KE': 'Kiswahili - sw-KE',
- 'lv-LV': 'Latviešu - lv-LV',
- 'lt-LT': 'Lietuvių - lt-LT',
- 'hu-HU': 'Magyar - hu-HU',
- 'ms-MY': 'Melayu - ms-MY',
- 'nl': 'Nederlands - nl',
- 'nl-BE': 'Nederlands (België) - nl-BE',
- 'nl-NL': 'Nederlands (Nederland) - nl-NL',
- 'nb-NO': 'Norsk Bokmål - nb-NO',
- 'pl-PL': 'Polski - pl-PL',
- 'pt': 'Português - pt',
- 'pt-BR': 'Português (Brasil) - pt-BR',
- 'pt-PT': 'Português (Portugal) - pt-PT',
- 'ro-RO': 'Română - ro-RO',
- 'sk-SK': 'Slovenčina - sk-SK',
- 'sl-SI': 'Slovenščina - sl-SI',
- 'sr-RS': 'Srpski - sr-RS',
- 'fi-FI': 'Suomi - fi-FI',
- 'sv-SE': 'Svenska - sv-SE',
- 'vi-VN': 'Tiếng Việt - vi-VN',
- 'tr-TR': 'Türkçe - tr-TR',
- 'is-IS': 'Íslenska - is-IS',
- 'cs-CZ': 'Čeština - cs-CZ',
- 'el-GR': 'Ελληνικά - el-GR',
- 'be-BY': 'Беларуская - be-BY',
- 'bg-BG': 'Български - bg-BG',
- 'ru-RU': 'Русский - ru-RU',
- 'uk-UA': 'Українська - uk-UA',
- 'hy-AM': 'Հայերեն - hy-AM',
- 'he-IL': 'עברית - he-IL',
- 'ar-SA': 'العربية - ar-SA',
- 'fa-IR': 'فارسی - fa-IR',
- 'th-TH': 'ไทย - th-TH',
- 'zh': '中文 - zh',
- 'zh-CN': '中文 (中国) - zh-CN',
- 'zh-TW': '中文 (台灣) - zh-TW',
- 'ja-JP': '日本語 - ja-JP',
- 'ko-KR': '한국어 - ko-KR'
- }
- def __init__(self, requestHandler):
- self._requestHandler = requestHandler
- self._url = ""
- self._categories = Categories()
- self._engines = Engines()
- self._query = ""
- self._lang = ""
- self._pageno = "" # int formatted as string
- self._timeRange = "" # '', 'day', 'week', 'month' or 'year'
- self._safesearch = False
- self._parseHtml = True
- @property
- def categories(self): return self._categories
- @property
- def engines(self): return self._engines
- @property
- def url(self):
- """
- @return: Instance url
- @rtype: str
- """
- return self._url
- @url.setter
- def url(self, url):
- """
- @param url: Instance url
- @type url: str
- """
- self._url = url
- @property
- def query(self):
- """
- @return: Search query
- @rtype: str
- """
- return self._query
- @query.setter
- def query(self, q):
- """
- @param q: Search query
- @type q: str
- """
- self._query = q
- @property
- def lang(self):
- """
- @return: Language code
- @rtype: str
- """
- return self._lang
- @lang.setter
- def lang(self, lang):
- """
- @param lang: Language code
- @type lang: str
- """
- self._lang = lang
- @property
- def pageno(self):
- """
- @return: Page number
- @rtype: int
- """
- return int(self._pageno)
- @pageno.setter
- def pageno(self, i):
- """
- @param i: Page number
- @type i: int
- """
- self._pageno = str(i)
- @property
- def timeRange(self):
- """
- @return: Search time range ('', 'day', 'week', 'month' or 'year')
- @rtype: str
- """
- return self._timeRange
- @timeRange.setter
- def timeRange(self, value):
- """
- @param value: Key from SearX.Periods
- @type value: str
- """
- self._timeRange = value
- @property
- def safeSearch(self):
- """
- @return: Whether safe search is enabled or not.
- @rtype: bool
- """
- return self._safesearch
- @safeSearch.setter
- def safeSearch(self, state):
- """
- @param state: Enable/disable safe search.
- @type state: bool
- """
- self._safesearch = state
- @property
- def parseHtml(self):
- """
- @return: Whether parsing HTML is enabled, this will not use the
- JSON API when it returns True.
- @rtype: bool
- """
- return self._parseHtml
- @parseHtml.setter
- def parseHtml(self, state):
- """
- @param value: Enable/disable parsing HTML instead of using the JSON API
- @type value: bool
- """
- self._parseHtml = state
- @property
- def requestKwargs(self):
- """ Returns current data that will be send with the POST
- request used for the search operation. The search query,
- language, page-number and enabled categories/engines.
- @rtype: dict
- """
- data = {
- "q": self.query,
- "safesearch": "1" if self.safeSearch else "0"
- }
- # Choose what resource to use (JSON API or HTML parser)
- if self.parseHtml:
- data.update({"theme": "simple"})
- else:
- data.update({"format": "json"})
- # After testing found that searx will honor only engines when
- # both engines and categories are set.
- if self.engines:
- data.update(self.engines.dict())
- elif self.categories:
- data.update(self.categories.dict())
- if self.lang:
- data.update({"language": self.lang})
- if self.pageno:
- data.update({"pageno": self.pageno})
- if self.timeRange:
- data.update({"time_range": self.timeRange})
- return data
- def reset(self):
- self.url = ""
- self.timeRange = ""
- self.lang = ""
- self.pageno = 1
- self.categories.reset()
- self.engines.clear()
- self.query = ""
- def search(self):
- """ Preform search operation with current set values.
- @returns: The result of this search.
- @rtype: SearchResult
- """
- rtype = SearchResult
- if self.parseHtml:
- rtype = SearchResult2
- return self._requestHandler.get(
- self.url,
- data=self.requestKwargs,
- ResultType=rtype
- )
- class SearxConfigHandler(HandlerProto):
- def __init__(self, requestsHandler):
- HandlerProto.__init__(self, requestsHandler)
- def updateInstance(self, url):
- newUrl = urllib.parse.urljoin(url, "/config")
- result = self.requestsHandler.get(newUrl, ResultType=SearxConfigResult)
- if result:
- instance = self.instances[url]
- j = result.json()
- """ Update instance version
- """
- instance.update({
- "version": j.get("version", "")
- })
- """ Update instance network_type to use our own network type
- definitions as class NetworkTypes (core/handler.py)
- """
- instance.update({"network_type": NetworkTypes.netTypeFromUrl(url)})
- """ Update Engines
- What we get:
- "engines": [
- categories (list, str)
- enabled (bool)
- language_support (bool)
- name (str)
- paging (bool)
- safesearch (bool)
- shortcut (str)
- supported_languages (list, str)
- time_range_support (bool)
- timeout (float)
- ]
- What instanceModel wants
- "engines" : {
- "not evil": {
- "error_rate": 15,
- "errors": [
- 0
- ]
- }
- }
- What enginesModel wants
- "engines": {
- "1337x": {
- "categories": [
- "videos"
- ],
- "language_support": true,
- "paging": true,
- "safesearch": false,
- "shortcut": "1337x",
- "time_range_support": false
- },
- """
- newInstanceEngines = {}
- newEnginesEngines = {}
- for engine in j.get('engines', []):
- name = engine.get('name', "")
- if not name:
- continue
- newInstanceEngines.update({
- name: {}
- })
- if name not in self.engines:
- newEnginesEngines.update({
- name: {
- "categories": list(engine.get('categories', [])),
- "language_support": engine.get(
- 'language_support',
- False
- ),
- "paging": engine.get('paging', False),
- "safesearch": engine.get('safesearch', False),
- "shortcut": engine.get('shortcut', ""),
- "time_range_support": engine.get(
- 'time_range_support',
- False
- )
- }
- })
- instance.update({
- "engines": dict(newInstanceEngines)
- })
- self.engines.update(newEnginesEngines)
- """ Update instance lastUpdated
- """
- instance.update({
- "lastUpdated": time.time()
- })
- return True
- return False
- def addInstance(self, url):
- if url not in self.instances:
- self._instances[url] = {}
- return True
- return False
- def removeInstance(self, url):
- """
- @param url: url of the instance to remove.
- @type url: str
- """
- del self._instances[url]
- def removeMultiInstances(self, urls):
- """ Remove instance(s) by url without emitting changed for every
- instance that got removed.
- @param urls: list with urls of instances to remove.
- @type urls: list
- """
- for url in urls:
- del self._instances[url]
|