__init__.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. from __future__ import annotations
  2. import os
  3. import time
  4. import random
  5. from urllib.parse import urlparse
  6. from typing import Iterator
  7. from http.cookies import Morsel
  8. from pathlib import Path
  9. import asyncio
  10. try:
  11. from curl_cffi.requests import Session, Response
  12. from .curl_cffi import StreamResponse, StreamSession, FormData
  13. has_curl_cffi = True
  14. except ImportError:
  15. from typing import Type as Response
  16. from .aiohttp import StreamResponse, StreamSession, FormData
  17. has_curl_cffi = False
  18. try:
  19. import webview
  20. has_webview = True
  21. except ImportError:
  22. has_webview = False
  23. try:
  24. import nodriver
  25. from nodriver.cdp.network import CookieParam
  26. from nodriver.core.config import find_chrome_executable
  27. from nodriver import Browser, Tab, util
  28. has_nodriver = True
  29. except ImportError:
  30. from typing import Type as Browser
  31. from typing import Type as Tab
  32. has_nodriver = False
  33. try:
  34. from platformdirs import user_config_dir
  35. has_platformdirs = True
  36. except ImportError:
  37. has_platformdirs = False
  38. from .. import debug
  39. from .raise_for_status import raise_for_status
  40. from ..errors import MissingRequirementsError
  41. from ..typing import Cookies
  42. from ..cookies import get_cookies_dir
  43. from .defaults import DEFAULT_HEADERS, WEBVIEW_HAEDERS
  44. if not has_curl_cffi:
  45. class Session:
  46. def __init__(self, **kwargs):
  47. raise MissingRequirementsError('Install "curl_cffi" package | pip install -U curl_cffi')
  48. async def get_args_from_webview(url: str) -> dict:
  49. if not has_webview:
  50. raise MissingRequirementsError('Install "webview" package')
  51. window = webview.create_window("", url, hidden=True)
  52. await asyncio.sleep(2)
  53. body = None
  54. while body is None:
  55. try:
  56. await asyncio.sleep(1)
  57. body = window.dom.get_element("body:not(.no-js)")
  58. except:
  59. ...
  60. headers = {
  61. **WEBVIEW_HAEDERS,
  62. "User-Agent": window.evaluate_js("this.navigator.userAgent"),
  63. "Accept-Language": window.evaluate_js("this.navigator.language"),
  64. "Referer": window.real_url
  65. }
  66. cookies = [list(*cookie.items()) for cookie in window.get_cookies()]
  67. cookies = {name: cookie.value for name, cookie in cookies}
  68. window.destroy()
  69. return {"headers": headers, "cookies": cookies}
  70. def get_cookie_params_from_dict(cookies: Cookies, url: str = None, domain: str = None) -> list[CookieParam]:
  71. [CookieParam.from_json({
  72. "name": key,
  73. "value": value,
  74. "url": url,
  75. "domain": domain
  76. }) for key, value in cookies.items()]
  77. async def get_args_from_nodriver(
  78. url: str,
  79. proxy: str = None,
  80. timeout: int = 120,
  81. wait_for: str = None,
  82. callback: callable = None,
  83. cookies: Cookies = None,
  84. browser: Browser = None
  85. ) -> dict:
  86. if browser is None:
  87. browser, stop_browser = await get_nodriver(proxy=proxy, timeout=timeout)
  88. else:
  89. def stop_browser():
  90. ...
  91. try:
  92. if debug.logging:
  93. print(f"Open nodriver with url: {url}")
  94. domain = urlparse(url).netloc
  95. if cookies is None:
  96. cookies = {}
  97. else:
  98. await browser.cookies.set_all(get_cookie_params_from_dict(cookies, url=url, domain=domain))
  99. page = await browser.get(url)
  100. user_agent = await page.evaluate("window.navigator.userAgent")
  101. await page.wait_for("body:not(.no-js)", timeout=timeout)
  102. if wait_for is not None:
  103. await page.wait_for(wait_for, timeout=timeout)
  104. if callback is not None:
  105. await callback(page)
  106. for c in await page.send(nodriver.cdp.network.get_cookies([url])):
  107. cookies[c.name] = c.value
  108. await page.close()
  109. return {
  110. "impersonate": "chrome",
  111. "cookies": cookies,
  112. "headers": {
  113. **DEFAULT_HEADERS,
  114. "user-agent": user_agent,
  115. "referer": url,
  116. },
  117. "proxy": proxy,
  118. }
  119. finally:
  120. stop_browser()
  121. def merge_cookies(cookies: Iterator[Morsel], response: Response) -> Cookies:
  122. if cookies is None:
  123. cookies = {}
  124. if hasattr(response.cookies, "jar"):
  125. for cookie in response.cookies.jar:
  126. cookies[cookie.name] = cookie.value
  127. else:
  128. for key, value in response.cookies.items():
  129. cookies[key] = value
  130. async def get_nodriver(
  131. proxy: str = None,
  132. user_data_dir = "nodriver",
  133. timeout: int = 120,
  134. browser_executable_path=None,
  135. **kwargs
  136. ) -> tuple[Browser, callable]:
  137. if not has_nodriver:
  138. raise MissingRequirementsError('Install "nodriver" and "platformdirs" package | pip install -U nodriver platformdirs')
  139. user_data_dir = user_config_dir(f"g4f-{user_data_dir}") if has_platformdirs else None
  140. if browser_executable_path is None:
  141. try:
  142. browser_executable_path = find_chrome_executable()
  143. except FileNotFoundError:
  144. # Default to Edge if Chrome is not available.
  145. browser_executable_path = "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe"
  146. if not os.path.exists(browser_executable_path):
  147. browser_executable_path = None
  148. lock_file = Path(get_cookies_dir()) / ".nodriver_is_open"
  149. # Implement a short delay (milliseconds) to prevent race conditions.
  150. await asyncio.sleep(0.1 * random.randint(0, 50))
  151. if lock_file.exists():
  152. opend_at = float(lock_file.read_text())
  153. time_open = time.time() - opend_at
  154. if timeout * 2 > time_open:
  155. debug.log(f"Nodriver: Browser is already in use since {time_open} secs.")
  156. for _ in range(timeout):
  157. if lock_file.exists():
  158. await asyncio.sleep(1)
  159. else:
  160. break
  161. lock_file.write_text(str(time.time()))
  162. debug.log(f"Open nodriver with user_dir: {user_data_dir}")
  163. try:
  164. browser = await nodriver.start(
  165. user_data_dir=user_data_dir,
  166. browser_args=None if proxy is None else [f"--proxy-server={proxy}"],
  167. browser_executable_path=browser_executable_path,
  168. **kwargs
  169. )
  170. except:
  171. if util.get_registered_instances():
  172. browser = util.get_registered_instances().pop()
  173. else:
  174. raise
  175. def on_stop():
  176. try:
  177. if browser.connection:
  178. browser.stop()
  179. finally:
  180. lock_file.unlink(missing_ok=True)
  181. return browser, on_stop