__init__.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. from __future__ import annotations
  2. from urllib.parse import urlparse
  3. from typing import Iterator
  4. from http.cookies import Morsel
  5. try:
  6. from curl_cffi.requests import Session, Response
  7. from .curl_cffi import StreamResponse, StreamSession, FormData
  8. has_curl_cffi = True
  9. except ImportError:
  10. from typing import Type as Response
  11. from .aiohttp import StreamResponse, StreamSession, FormData
  12. has_curl_cffi = False
  13. try:
  14. import webview
  15. import asyncio
  16. has_webview = True
  17. except ImportError:
  18. has_webview = False
  19. try:
  20. import nodriver
  21. from nodriver.cdp.network import CookieParam
  22. from nodriver import Browser
  23. has_nodriver = True
  24. except ImportError:
  25. has_nodriver = False
  26. try:
  27. from platformdirs import user_config_dir
  28. has_platformdirs = True
  29. except ImportError:
  30. has_platformdirs = False
  31. from .. import debug
  32. from .raise_for_status import raise_for_status
  33. from ..webdriver import WebDriver, WebDriverSession
  34. from ..webdriver import bypass_cloudflare, get_driver_cookies
  35. from ..errors import MissingRequirementsError
  36. from ..typing import Cookies
  37. from .defaults import DEFAULT_HEADERS, WEBVIEW_HAEDERS
  38. if not has_curl_cffi:
  39. class Session:
  40. def __init__(self, **kwargs):
  41. raise MissingRequirementsError('Install "curl_cffi" package | pip install -U curl_cffi')
  42. async def get_args_from_webview(url: str) -> dict:
  43. if not has_webview:
  44. raise MissingRequirementsError('Install "webview" package')
  45. window = webview.create_window("", url, hidden=True)
  46. await asyncio.sleep(2)
  47. body = None
  48. while body is None:
  49. try:
  50. await asyncio.sleep(1)
  51. body = window.dom.get_element("body:not(.no-js)")
  52. except:
  53. ...
  54. headers = {
  55. **WEBVIEW_HAEDERS,
  56. "User-Agent": window.evaluate_js("this.navigator.userAgent"),
  57. "Accept-Language": window.evaluate_js("this.navigator.language"),
  58. "Referer": window.real_url
  59. }
  60. cookies = [list(*cookie.items()) for cookie in window.get_cookies()]
  61. cookies = {name: cookie.value for name, cookie in cookies}
  62. window.destroy()
  63. return {"headers": headers, "cookies": cookies}
  64. def get_args_from_browser(
  65. url: str,
  66. webdriver: WebDriver = None,
  67. proxy: str = None,
  68. timeout: int = 120,
  69. do_bypass_cloudflare: bool = True,
  70. virtual_display: bool = False
  71. ) -> dict:
  72. """
  73. Create a Session object using a WebDriver to handle cookies and headers.
  74. Args:
  75. url (str): The URL to navigate to using the WebDriver.
  76. webdriver (WebDriver, optional): The WebDriver instance to use.
  77. proxy (str, optional): Proxy server to use for the Session.
  78. timeout (int, optional): Timeout in seconds for the WebDriver.
  79. Returns:
  80. Session: A Session object configured with cookies and headers from the WebDriver.
  81. """
  82. with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=virtual_display) as driver:
  83. if do_bypass_cloudflare:
  84. bypass_cloudflare(driver, url, timeout)
  85. headers = {
  86. **DEFAULT_HEADERS,
  87. 'referer': url,
  88. }
  89. if not hasattr(driver, "requests"):
  90. headers["user-agent"] = driver.execute_script("return navigator.userAgent")
  91. else:
  92. for request in driver.requests:
  93. if request.url.startswith(url):
  94. for key, value in request.headers.items():
  95. if key in (
  96. "accept-encoding",
  97. "accept-language",
  98. "user-agent",
  99. "sec-ch-ua",
  100. "sec-ch-ua-platform",
  101. "sec-ch-ua-arch",
  102. "sec-ch-ua-full-version",
  103. "sec-ch-ua-platform-version",
  104. "sec-ch-ua-bitness"
  105. ):
  106. headers[key] = value
  107. break
  108. cookies = get_driver_cookies(driver)
  109. return {
  110. 'cookies': cookies,
  111. 'headers': headers,
  112. }
  113. def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
  114. if not has_curl_cffi:
  115. raise MissingRequirementsError('Install "curl_cffi" package | pip install -U curl_cffi')
  116. args = get_args_from_browser(url, webdriver, proxy, timeout)
  117. return Session(
  118. **args,
  119. proxies={"https": proxy, "http": proxy},
  120. timeout=timeout,
  121. impersonate="chrome"
  122. )
  123. def get_cookie_params_from_dict(cookies: Cookies, url: str = None, domain: str = None) -> list[CookieParam]:
  124. [CookieParam.from_json({
  125. "name": key,
  126. "value": value,
  127. "url": url,
  128. "domain": domain
  129. }) for key, value in cookies.items()]
  130. async def get_args_from_nodriver(
  131. url: str,
  132. proxy: str = None,
  133. timeout: int = 120,
  134. cookies: Cookies = None
  135. ) -> dict:
  136. if not has_nodriver:
  137. raise MissingRequirementsError('Install "nodriver" package | pip install -U nodriver')
  138. if debug.logging:
  139. print(f"Open nodriver with url: {url}")
  140. browser = await nodriver.start(
  141. browser_args=None if proxy is None else [f"--proxy-server={proxy}"],
  142. )
  143. domain = urlparse(url).netloc
  144. if cookies is None:
  145. cookies = {}
  146. else:
  147. await browser.cookies.set_all(get_cookie_params_from_dict(cookies, url=url, domain=domain))
  148. page = await browser.get(url)
  149. for c in await page.send(nodriver.cdp.network.get_cookies([url])):
  150. cookies[c.name] = c.value
  151. user_agent = await page.evaluate("window.navigator.userAgent")
  152. await page.wait_for("body:not(.no-js)", timeout=timeout)
  153. await page.close()
  154. browser.stop()
  155. return {
  156. "impersonate": "chrome",
  157. "cookies": cookies,
  158. "headers": {
  159. **DEFAULT_HEADERS,
  160. "user-agent": user_agent,
  161. "referer": url,
  162. },
  163. "proxy": proxy
  164. }
  165. def merge_cookies(cookies: Iterator[Morsel], response: Response) -> Cookies:
  166. if cookies is None:
  167. cookies = {}
  168. for cookie in response.cookies.jar:
  169. cookies[cookie.name] = cookie.value
  170. async def get_nodriver(proxy: str = None, user_data_dir = "nodriver", **kwargs)-> Browser:
  171. if not has_nodriver:
  172. raise MissingRequirementsError('Install "nodriver" package | pip install -U nodriver')
  173. user_data_dir = user_config_dir(f"g4f-{user_data_dir}") if has_platformdirs else None
  174. debug.log(f"Open nodriver with user_dir: {user_data_dir}")
  175. return await nodriver.start(
  176. user_data_dir=user_data_dir,
  177. browser_args=None if proxy is None else [f"--proxy-server={proxy}"],
  178. **kwargs
  179. )