requests.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. ########################################################################
  2. # Searx-Qt - Lightweight desktop application for Searx.
  3. # Copyright (C) 2020-2022 CYBERDEViL
  4. #
  5. # This file is part of Searx-Qt.
  6. #
  7. # Searx-Qt is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # Searx-Qt is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. ########################################################################
  21. import requests
  22. from requests.exceptions import (
  23. HTTPError,
  24. ConnectionError,
  25. Timeout,
  26. ProxyError,
  27. SSLError,
  28. RequestException
  29. )
  30. import json
  31. from jsonschema import validate as JsonValidate
  32. from jsonschema.exceptions import ValidationError, SchemaError
  33. import random
  34. from searxqt.core import log
  35. from searxqt.core.images import ImagesSettings
  36. HAVE_SOCKS = False
  37. try:
  38. import socks
  39. HAVE_SOCKS = True
  40. del socks
  41. except ImportError:
  42. log.debug("pysocks not installed! No socks proxy support.")
  43. class ErrorType:
  44. Success = 0
  45. HttpError = 1
  46. ConnectionError = 2
  47. Timeout = 3
  48. WrongStatus = 4
  49. DecodeError = 5
  50. NoResults = 6
  51. ProxyError = 7
  52. SSLError = 8
  53. InvalidSchema = 9
  54. ContentSizeExceeded = 10
  55. CorruptImage = 11
  56. Other = 12
  57. ErrorTypeStr = {
  58. ErrorType.Success: "Success",
  59. ErrorType.HttpError: "HttpError",
  60. ErrorType.ConnectionError: "ConnectionError",
  61. ErrorType.Timeout: "Timeout",
  62. ErrorType.WrongStatus: "WrongStatus",
  63. ErrorType.DecodeError: "DecodeError",
  64. ErrorType.NoResults: "NoResults",
  65. ErrorType.ProxyError: "ProxyError",
  66. ErrorType.SSLError: "SSLError",
  67. ErrorType.InvalidSchema: "InvalidSchema",
  68. ErrorType.ContentSizeExceeded: "ContentSizeExceeded",
  69. ErrorType.CorruptImage: "CorruptImage",
  70. ErrorType.Other: "Other"
  71. }
  72. # Global json schema container so we won't have to read a json schema file
  73. # from disk everytime we need to verify some json data.
  74. Schemas = {}
  75. # Loads an json schema into the global 'Schemas' container.
  76. # @param key: A name to store the json schema to, existing keys will be
  77. # overwritten!
  78. # @type key: string
  79. # @param filepath: Path where the json schema file is located (including
  80. # filename)
  81. # @type filepath: string
  82. def json_schema_load(key, filepath):
  83. # may raise an json.JSONDecodeError or UnicodeDecodeError when the schema
  84. # json is invalid, or an OSError when it cannot access the given filepath.
  85. with open(filepath, 'r') as f:
  86. data = json.load(f)
  87. # An exception should have been raised when open() or json.load() has
  88. # failed, so at this point the schema json looks valid.
  89. # @note: Existing keys will be overwritten/reloaded.
  90. Schemas.update({key: data})
  91. class Result:
  92. def __init__(self, url, response, err="", errType=ErrorType.Success,
  93. acceptCodes=None):
  94. self._url = url # url used for request.
  95. self._response = response
  96. self._err = err
  97. self._errType = errType
  98. acceptCodes = acceptCodes
  99. if not acceptCodes:
  100. acceptCodes = [200]
  101. if errType == ErrorType.Success and response.status_code not in acceptCodes:
  102. self._errType = ErrorType.WrongStatus
  103. self._err = f"WrongStatus: {self._response.status_code}"
  104. else:
  105. self.verifyFurther()
  106. def __bool__(self):
  107. return not self.failed()
  108. def url(self):
  109. return self._url
  110. def errorType(self): return self._errType
  111. def error(self): return self._err
  112. def content(self):
  113. """ In case json.loads failed and we want to debug.
  114. """
  115. if self._response is None:
  116. return b''
  117. return self._response.content
  118. def text(self):
  119. if self._response is None:
  120. return ''
  121. return self._response.text
  122. def failed(self):
  123. if self._errType is not ErrorType.Success:
  124. return True
  125. return False
  126. def statusCode(self):
  127. if self._response is not None:
  128. return self._response.status_code
  129. return 0
  130. def verifyFurther(self):
  131. pass
  132. # JsonResult should not be used directly, it should be subclassed. The subclass
  133. # is responsible for setting JsonResult.Schema
  134. class JsonResult(Result):
  135. Schema = {}
  136. def __init__(self, url, response, err="", errType=ErrorType.Success,
  137. acceptCodes=None):
  138. Result.__init__(
  139. self,
  140. url,
  141. response,
  142. err=err,
  143. errType=errType,
  144. acceptCodes=acceptCodes
  145. )
  146. def verifyFurther(self):
  147. try:
  148. self.json()
  149. except json.JSONDecodeError as err:
  150. self._errType = ErrorType.DecodeError
  151. self._err = f"DecodeError: `{err}`"
  152. except UnicodeDecodeError as err:
  153. # This could happen when the response encoding isn't plain ? (gzip)
  154. # Or we just have malformed data/crap.
  155. self._errType = ErrorType.DecodeError
  156. self._err = f"DecodeError: `{err}`"
  157. try:
  158. JsonValidate(instance=self.json(), schema=self.Schema)
  159. except ValidationError as err:
  160. self._errType = ErrorType.InvalidSchema
  161. self._err = f"InvalidSchema: `{err}`"
  162. except SchemaError as err:
  163. self._errType = ErrorType.InvalidSchema
  164. self._err = f"InvalidSchema: `{err}`"
  165. def json(self):
  166. if self.errorType() != ErrorType.Success:
  167. return {}
  168. return json.loads(self._response.content)
  169. if ImagesSettings.supported:
  170. from io import BytesIO
  171. from PIL import Image, UnidentifiedImageError
  172. else:
  173. from searxqt.core.dummy import BytesIO, Image, UnidentifiedImageError
  174. class ImageResult(Result):
  175. def verifyFurther(self):
  176. content = BytesIO(self.content())
  177. try:
  178. Image.open(content, mode='r')
  179. except UnidentifiedImageError as err:
  180. self._errType = ErrorType.CorruptImage
  181. self._err = f"CorruptImage: `{err}` for: {self.url()}"
  182. except OSError as err:
  183. self._errType = ErrorType.CorruptImage
  184. self._err = f"CorruptImage: `{err}` for: {self.url()}"
  185. class ProxyProtocol:
  186. HTTP = 1
  187. SOCKS4 = 2
  188. SOCKS5 = 4
  189. ProxyProtocolString = {
  190. 0: "none",
  191. 1: "http",
  192. 2: "socks4",
  193. 4: "socks5"
  194. }
  195. class RequestSettings:
  196. def __init__(self):
  197. # Settings
  198. self._useragents = ["searx-qt"]
  199. self._randomUserAgent = False # Use a random useragent for each
  200. # request.
  201. self._verifySSL = True # Verify SSL certificates (HTTPS).
  202. self._timeout = 10 # Connection timeout in seconds.
  203. self._maxSize = 10 * 1024 * 1024 # Maximum content receive size in KiB.
  204. self._chunkSize = 500 * 1024 # Receive chunk size.
  205. self._proxyEnabled = False # Enable the use of a proxy.
  206. self._proxyDNS = True # Only available for socks
  207. self._proxyHost = "" # Proxy string user:pass@host:port
  208. self._proxyProtocol = 0 # Proxy protocol, example:
  209. # ProxyProtocol.SOCKS5H
  210. self._extraHeaders = {} # Extra header values
  211. # Compiled settings
  212. self._headers = {} # headers kwarg (will be included in self._kwargs)
  213. # kwargs passed to requests.get or requests.post.
  214. # This will be compiled each time a setting has
  215. # changed, so it won't have to be compiled each
  216. # time a request is made.
  217. self._kwargs = {
  218. "verify": True,
  219. "timeout": 10,
  220. "headers": self._headers
  221. }
  222. self._compileKwargs()
  223. def getData(self):
  224. return {
  225. "useragents": self._useragents,
  226. "randomUserAgent": self._randomUserAgent,
  227. "verifySSL": self._verifySSL,
  228. "timeout": self._timeout,
  229. "maxSize": self._maxSize,
  230. "chunkSize": self._chunkSize,
  231. "proxyEnabled": self.proxyEnabled,
  232. "proxyDNS": self._proxyDNS,
  233. "proxyHost": self._proxyHost,
  234. "proxyProtocol": self._proxyProtocol,
  235. "extraHeaders": self._extraHeaders
  236. }
  237. def setData(self, data):
  238. self._useragents.clear()
  239. for useragent in data.get("useragents", []):
  240. self._useragents.append(useragent)
  241. self._randomUserAgent = data.get("randomUserAgent", False)
  242. self._verifySSL = data.get("verifySSL", True)
  243. self._timeout = data.get("timeout", 10)
  244. self._proxyEnabled = data.get("proxyEnabled", False)
  245. self._proxyDNS = data.get("proxyDNS", True)
  246. self._proxyHost = data.get("proxyHost", "")
  247. self._proxyProtocol = data.get("proxyProtocol", 0)
  248. self._extraHeaders = data.get("extraHeaders", {})
  249. self.updateRequestKwargs()
  250. """ Settings """
  251. @property
  252. def headers(self):
  253. return self._headers
  254. @property
  255. def extraHeaders(self):
  256. return self._extraHeaders
  257. @property
  258. def verifySSL(self):
  259. return self._verifySSL
  260. @verifySSL.setter
  261. def verifySSL(self, state):
  262. self._verifySSL = state
  263. @property
  264. def timeout(self):
  265. return self._timeout
  266. @timeout.setter
  267. def timeout(self, state):
  268. self._timeout = state
  269. @property
  270. def maxSize(self):
  271. return self._maxSize
  272. @maxSize.setter
  273. def maxSize(self, size):
  274. self._maxSize = size
  275. @property
  276. def chunkSize(self):
  277. return self._chunkSize
  278. @chunkSize.setter
  279. def chunkSize(self, size):
  280. self._chunkSize = size
  281. @property
  282. def proxyEnabled(self):
  283. return self._proxyEnabled
  284. @proxyEnabled.setter
  285. def proxyEnabled(self, state):
  286. self._proxyEnabled = state
  287. @property
  288. def proxyHost(self):
  289. return self._proxyHost
  290. @proxyHost.setter
  291. def proxyHost(self, host):
  292. self._proxyHost = host
  293. @property
  294. def proxyProtocol(self):
  295. return self._proxyProtocol
  296. # @type protocol: class ProxyProtocol
  297. @proxyProtocol.setter
  298. def proxyProtocol(self, protocol):
  299. self._proxyProtocol = protocol
  300. @property
  301. def proxyDNS(self):
  302. return self._proxyDNS
  303. @proxyDNS.setter
  304. def proxyDNS(self, state):
  305. self._proxyDNS = state
  306. # Use this to add/remove/clear useragents, it returns a list with strings
  307. @property
  308. def useragents(self):
  309. return self._useragents
  310. @property
  311. def randomUserAgent(self):
  312. return self._randomUserAgent
  313. @randomUserAgent.setter
  314. def randomUserAgent(self, state):
  315. self._randomUserAgent = state
  316. """ Make python-requests compatible """
  317. @property
  318. def requestsKwargs(self):
  319. return self._kwargs
  320. # Update requests kwargs (call this each time after you changed one or
  321. # multiple settings)
  322. def updateRequestKwargs(self):
  323. self._compileKwargs()
  324. # Get requests kwargs for a new request.
  325. def kwargsForNewRequest(self):
  326. useragent = self._getUseragent()
  327. if useragent:
  328. self._headers.update({"User-Agent": useragent})
  329. elif "User-Agent" in self._headers:
  330. del self._headers["User-Agent"]
  331. return self._kwargs
  332. def _compileProxies(self):
  333. dnsStr = ""
  334. if self.proxyProtocol in [ProxyProtocol.SOCKS4, ProxyProtocol.SOCKS5]:
  335. if self.proxyDNS:
  336. dnsStr = "h"
  337. protoStr = ProxyProtocolString[self.proxyProtocol]
  338. proxyStr = f"{protoStr}{dnsStr}://{self.proxyHost}"
  339. return {
  340. "http": proxyStr,
  341. "https": proxyStr
  342. }
  343. def _compileKwargs(self):
  344. kwargs = {
  345. "verify": self.verifySSL,
  346. "timeout": self.timeout,
  347. "headers": self.headers
  348. }
  349. self._headers.clear()
  350. self._headers.update(self.extraHeaders)
  351. if self.proxyEnabled:
  352. kwargs.update({"proxies": self._compileProxies()})
  353. self._kwargs.clear()
  354. self._kwargs.update(kwargs)
  355. def _getUseragent(self):
  356. if not self.useragents:
  357. return ""
  358. # Return first useragent string
  359. if len(self.useragents) == 1 or not self.randomUserAgent:
  360. return self.useragents[0]
  361. # Return random useragent
  362. return random.choice(self.useragents)
  363. class RequestSettingsWithParent(RequestSettings):
  364. # This is read-only when in parent mode
  365. def __init__(self, parentSettings):
  366. self._parentSettings = parentSettings
  367. self._useParent = False
  368. self._current = self
  369. RequestSettings.__init__(self)
  370. @property
  371. def useParent(self):
  372. """! When it returns `True` the settings this object holds will be
  373. ignored and values from the parent will be returned instead, in
  374. this case this object should be treated as read-only. When it
  375. returns `False` this object will behave like a normal
  376. `RequestSettings` instance.
  377. """
  378. return self._useParent
  379. @useParent.setter
  380. def useParent(self, state):
  381. self._useParent = state
  382. self._current = self._parentSettings if state else self
  383. def getData(self):
  384. data = RequestSettings.getData(self)
  385. data.update({"useParent": self.useParent})
  386. return data
  387. def setData(self, data):
  388. self.useParent = data.get("useParent", False)
  389. RequestSettings.setData(self, data)
  390. """ Override Settings """
  391. @RequestSettings.headers.getter
  392. def headers(self):
  393. return self._current._headers
  394. @RequestSettings.extraHeaders.getter
  395. def extraHeaders(self):
  396. return self._current._extraHeaders
  397. @RequestSettings.verifySSL.getter
  398. def verifySSL(self):
  399. return self._current._verifySSL
  400. @RequestSettings.timeout.getter
  401. def timeout(self):
  402. return self._current._timeout
  403. @RequestSettings.proxyEnabled.getter
  404. def proxyEnabled(self):
  405. return self._current._proxyEnabled
  406. @RequestSettings.proxyHost.getter
  407. def proxyHost(self):
  408. return self._current._proxyHost
  409. @RequestSettings.proxyProtocol.getter
  410. def proxyProtocol(self):
  411. return self._current._proxyProtocol
  412. @RequestSettings.proxyDNS.getter
  413. def proxyDNS(self):
  414. return self._current._proxyDNS
  415. @RequestSettings.useragents.getter
  416. def useragents(self):
  417. return self._current._useragents
  418. @RequestSettings.randomUserAgent.getter
  419. def randomUserAgent(self):
  420. return self._current._randomUserAgent
  421. class RequestsHandler:
  422. def __init__(self, settings=None):
  423. """! Handles remote requests.
  424. @param settings `RequestSettings` object or `None`. When `None` is
  425. given it will create a new `RequestSettings` object,
  426. else the given `settings` object will be used.
  427. """
  428. if settings is None:
  429. self._settings = RequestSettings()
  430. else:
  431. self._settings = settings
  432. @property
  433. def settings(self):
  434. return self._settings
  435. def failSafeRequestFactory(func):
  436. def failSafeRequest(self, url, data=None, ResultType=None):
  437. response = None
  438. err = ""
  439. errType = ErrorType.Success
  440. if not ResultType:
  441. # When 'ResultType' isn't specified, set 'JsonResult' as
  442. # default.
  443. ResultType = JsonResult
  444. log.debug("<NEW Request>", self)
  445. log.debug("# ------------------------", self)
  446. log.debug(f"# ResultType : {ResultType}", self)
  447. requestKwargs = self._settings.kwargsForNewRequest()
  448. """
  449. Request exceptions
  450. https://docs.python-requests.org/en/master/_modules/requests/exceptions/
  451. """
  452. try:
  453. response = func(self, url, data=data, **requestKwargs)
  454. chunkSize = self.settings.chunkSize
  455. maxSize = self.settings.maxSize
  456. curSize = 0
  457. headerContentSize = response.headers.get("Content-Length", None)
  458. if headerContentSize is not None:
  459. if int(headerContentSize) > maxSize:
  460. e = f"Maximum content size limit of '{maxSize}' bytes exceeded. (1)"
  461. log.debug(f"Request failed! ContentSizeExceeded: {e}", self)
  462. errType = ErrorType.ContentSizeExceeded
  463. err = e
  464. if errType == ErrorType.Success:
  465. response._content = b""
  466. for chunk in response.iter_content(chunkSize):
  467. # The server might have its own chunk size that is
  468. # smaller then ours, so thats why we add the lenght
  469. # of the received content instead of adding our
  470. # chunkSize.
  471. curSize += len(chunk)
  472. if curSize > maxSize:
  473. e = f"Maximum content size limit of '{maxSize}' bytes exceeded. (2)"
  474. log.debug(f"Request failed! ContentSizeExceeded: {e}", self)
  475. errType = ErrorType.ContentSizeExceeded
  476. err = e
  477. break
  478. response._content += chunk
  479. response.close()
  480. except HTTPError as e:
  481. # HTTPError is subclass of RequestException
  482. log.debug(f"Request failed! HTTPError: {e}", self)
  483. errType = ErrorType.HttpError
  484. err = str(e)
  485. except Timeout as e:
  486. # Timeout is subclass of RequestException
  487. log.debug(f"Request failed! Timeout: {e}", self)
  488. errType = ErrorType.Timeout
  489. err = str(e)
  490. except ProxyError as e:
  491. # ProxyError is subclass of ConnectionError
  492. log.debug(f"Request failed! ProxyError: {e}", self)
  493. errType = ErrorType.ProxyError
  494. err = str(e)
  495. except SSLError as e:
  496. # SSLError is subclass of ConnectionError
  497. log.debug(f"Request failed! SSLError: {e}", self)
  498. errType = ErrorType.SSLError
  499. err = str(e)
  500. except ConnectionError as e:
  501. # ConnectionError is subclass of RequestException
  502. log.debug(f"Request failed! ConnectionError: {e}", self)
  503. errType = ErrorType.ConnectionError
  504. err = str(e)
  505. except RequestException as e:
  506. # This should catch all other
  507. log.debug(f"Request failed! RequestException: {e}", self)
  508. errType = ErrorType.Other
  509. err = str(e)
  510. log.debug("# ------------------------\n", self)
  511. return ResultType(url, response, err=err, errType=errType)
  512. return failSafeRequest
  513. @failSafeRequestFactory
  514. def get(self, url, data=None, ResultType=None, **settingsKwargs):
  515. log.debug("# Type : GET", self)
  516. log.debug(f"# URL : {url}", self)
  517. log.debug(f"# Data : {data}", self)
  518. log.debug(f"# Kwargs : {settingsKwargs}", self)
  519. return requests.get(url, data=data, stream=True, **settingsKwargs)
  520. @failSafeRequestFactory
  521. def post(self, url, data=None, ResultType=None, **settingsKwargs):
  522. log.debug("# Type : POST", self)
  523. log.debug(f"# URL : {url}", self)
  524. log.debug(f"# Data : {data}", self)
  525. log.debug(f"# Kwargs : {settingsKwargs}", self)
  526. return requests.post(url, data=data, stream=True, **settingsKwargs)