DDG.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. from __future__ import annotations
  2. import time
  3. from aiohttp import ClientSession, ClientTimeout
  4. import json
  5. import asyncio
  6. import random
  7. from yarl import URL
  8. from ..typing import AsyncResult, Messages, Cookies
  9. from ..requests.raise_for_status import raise_for_status
  10. from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
  11. from .helper import format_prompt, get_last_user_message
  12. from ..providers.response import FinishReason, JsonConversation
  13. from ..errors import ModelNotSupportedError, ResponseStatusError, RateLimitError, TimeoutError, ConversationLimitError
  14. class DuckDuckGoSearchException(Exception):
  15. """Base exception class for duckduckgo_search."""
  16. class Conversation(JsonConversation):
  17. vqd: str = None
  18. vqd_hash_1: str = None
  19. message_history: Messages = []
  20. cookies: dict = {}
  21. def __init__(self, model: str):
  22. self.model = model
  23. class DDG(AsyncGeneratorProvider, ProviderModelMixin):
  24. label = "DuckDuckGo AI Chat"
  25. url = "https://duckduckgo.com/aichat"
  26. api_endpoint = "https://duckduckgo.com/duckchat/v1/chat"
  27. status_url = "https://duckduckgo.com/duckchat/v1/status"
  28. working = True
  29. supports_stream = True
  30. supports_system_message = True
  31. supports_message_history = True
  32. default_model = "gpt-4o-mini"
  33. models = [default_model, "meta-llama/Llama-3.3-70B-Instruct-Turbo", "claude-3-haiku-20240307", "o3-mini", "mistralai/Mistral-Small-24B-Instruct-2501"]
  34. model_aliases = {
  35. "gpt-4": "gpt-4o-mini",
  36. "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
  37. "claude-3-haiku": "claude-3-haiku-20240307",
  38. "mixtral-small-24b": "mistralai/Mistral-Small-24B-Instruct-2501",
  39. }
  40. last_request_time = 0
  41. max_retries = 3
  42. base_delay = 2
  43. @classmethod
  44. def validate_model(cls, model: str) -> str:
  45. """Validates and returns the correct model name"""
  46. if not model:
  47. return cls.default_model
  48. if model in cls.model_aliases:
  49. model = cls.model_aliases[model]
  50. if model not in cls.models:
  51. raise ModelNotSupportedError(f"Model {model} not supported. Available models: {cls.models}")
  52. return model
  53. @classmethod
  54. async def sleep(cls, multiplier=1.0):
  55. """Implements rate limiting between requests"""
  56. now = time.time()
  57. if cls.last_request_time > 0:
  58. delay = max(0.0, 1.5 - (now - cls.last_request_time)) * multiplier
  59. if delay > 0:
  60. await asyncio.sleep(delay)
  61. cls.last_request_time = time.time()
  62. @classmethod
  63. async def get_default_cookies(cls, session: ClientSession) -> dict:
  64. """Obtains default cookies needed for API requests"""
  65. try:
  66. await cls.sleep()
  67. # Make initial request to get cookies
  68. async with session.get(cls.url) as response:
  69. # We also manually set required cookies
  70. cookies = {}
  71. cookies_dict = {'dcs': '1', 'dcm': '3'}
  72. for name, value in cookies_dict.items():
  73. cookies[name] = value
  74. url_obj = URL(cls.url)
  75. session.cookie_jar.update_cookies({name: value}, url_obj)
  76. return cookies
  77. except Exception as e:
  78. return {}
  79. @classmethod
  80. async def fetch_vqd_and_hash(cls, session: ClientSession, retry_count: int = 0) -> tuple[str, str]:
  81. """Fetches the required VQD token and hash for the chat session with retries."""
  82. headers = {
  83. "accept": "text/event-stream",
  84. "accept-language": "en-US,en;q=0.9",
  85. "cache-control": "no-cache",
  86. "content-type": "application/json",
  87. "pragma": "no-cache",
  88. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
  89. "origin": "https://duckduckgo.com",
  90. "referer": "https://duckduckgo.com/",
  91. "x-vqd-accept": "1",
  92. }
  93. # Make sure we have cookies first
  94. if len(session.cookie_jar) == 0:
  95. await cls.get_default_cookies(session)
  96. try:
  97. await cls.sleep(multiplier=1.0 + retry_count * 0.5)
  98. async with session.get(cls.status_url, headers=headers) as response:
  99. await raise_for_status(response)
  100. vqd = response.headers.get("x-vqd-4", "")
  101. vqd_hash_1 = response.headers.get("x-vqd-hash-1", "")
  102. if vqd and vqd_hash_1:
  103. return vqd, vqd_hash_1
  104. if vqd and not vqd_hash_1:
  105. return vqd, ""
  106. response_text = await response.text()
  107. raise RuntimeError(f"Failed to fetch VQD token and hash: {response.status} {response_text}")
  108. except Exception as e:
  109. if retry_count < cls.max_retries:
  110. wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
  111. await asyncio.sleep(wait_time)
  112. return await cls.fetch_vqd_and_hash(session, retry_count + 1)
  113. else:
  114. raise RuntimeError(f"Failed to fetch VQD token and hash after {cls.max_retries} attempts: {str(e)}")
  115. @classmethod
  116. async def create_async_generator(
  117. cls,
  118. model: str,
  119. messages: Messages,
  120. proxy: str = None,
  121. timeout: int = 60,
  122. cookies: Cookies = None,
  123. conversation: Conversation = None,
  124. return_conversation: bool = False,
  125. **kwargs
  126. ) -> AsyncResult:
  127. model = cls.validate_model(model)
  128. retry_count = 0
  129. while retry_count <= cls.max_retries:
  130. try:
  131. session_timeout = ClientTimeout(total=timeout)
  132. async with ClientSession(timeout=session_timeout, cookies=cookies) as session:
  133. if conversation is None:
  134. # Get initial cookies if not provided
  135. if not cookies:
  136. await cls.get_default_cookies(session)
  137. conversation = Conversation(model)
  138. vqd, vqd_hash_1 = await cls.fetch_vqd_and_hash(session)
  139. conversation.vqd = vqd
  140. conversation.vqd_hash_1 = vqd_hash_1
  141. conversation.message_history = [{"role": "user", "content": format_prompt(messages)}]
  142. else:
  143. last_message = get_last_user_message(messages.copy())
  144. conversation.message_history.append({"role": "user", "content": last_message})
  145. headers = {
  146. "accept": "text/event-stream",
  147. "accept-language": "en-US,en;q=0.9",
  148. "content-type": "application/json",
  149. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
  150. "origin": "https://duckduckgo.com",
  151. "referer": "https://duckduckgo.com/",
  152. "x-vqd-4": conversation.vqd,
  153. }
  154. # Add the x-vqd-hash-1 header if available
  155. if conversation.vqd_hash_1:
  156. headers["x-vqd-hash-1"] = conversation.vqd_hash_1
  157. data = {
  158. "model": model,
  159. "messages": conversation.message_history,
  160. }
  161. await cls.sleep(multiplier=1.0 + retry_count * 0.5)
  162. async with session.post(cls.api_endpoint, json=data, headers=headers, proxy=proxy) as response:
  163. # Handle 429 errors specifically
  164. if response.status == 429:
  165. response_text = await response.text()
  166. if retry_count < cls.max_retries:
  167. retry_count += 1
  168. wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
  169. await asyncio.sleep(wait_time)
  170. # Get fresh tokens and cookies
  171. cookies = await cls.get_default_cookies(session)
  172. continue
  173. else:
  174. raise RateLimitError(f"Rate limited after {cls.max_retries} retries")
  175. await raise_for_status(response)
  176. reason = None
  177. full_message = ""
  178. async for line in response.content:
  179. line = line.decode("utf-8").strip()
  180. if line.startswith("data:"):
  181. try:
  182. message = json.loads(line[5:].strip())
  183. except json.JSONDecodeError:
  184. continue
  185. if "action" in message and message["action"] == "error":
  186. error_type = message.get("type", "")
  187. if message.get("status") == 429:
  188. if error_type == "ERR_CONVERSATION_LIMIT":
  189. raise ConversationLimitError(error_type)
  190. raise RateLimitError(error_type)
  191. raise DuckDuckGoSearchException(error_type)
  192. if "message" in message:
  193. if message["message"]:
  194. yield message["message"]
  195. full_message += message["message"]
  196. reason = "length"
  197. else:
  198. reason = "stop"
  199. if return_conversation:
  200. conversation.message_history.append({"role": "assistant", "content": full_message})
  201. conversation.vqd = response.headers.get("x-vqd-4", conversation.vqd)
  202. conversation.vqd_hash_1 = response.headers.get("x-vqd-hash-1", conversation.vqd_hash_1)
  203. conversation.cookies = {
  204. n: c.value
  205. for n, c in session.cookie_jar.filter_cookies(URL(cls.url)).items()
  206. }
  207. yield conversation
  208. if reason is not None:
  209. yield FinishReason(reason)
  210. # If we got here, the request was successful
  211. break
  212. except (RateLimitError, ResponseStatusError) as e:
  213. if "429" in str(e) and retry_count < cls.max_retries:
  214. retry_count += 1
  215. wait_time = cls.base_delay * (2 ** retry_count) * (1 + random.random())
  216. await asyncio.sleep(wait_time)
  217. else:
  218. raise
  219. except asyncio.TimeoutError as e:
  220. raise TimeoutError(f"Request timed out: {str(e)}")
  221. except Exception as e:
  222. raise