Qwen_QVQ_72B.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from __future__ import annotations
  2. import json
  3. from aiohttp import ClientSession, FormData
  4. from ...typing import AsyncResult, Messages, ImagesType
  5. from ...requests import raise_for_status
  6. from ...errors import ResponseError
  7. from ..base_provider import AsyncGeneratorProvider, ProviderModelMixin
  8. from ..helper import format_prompt, get_random_string
  9. from ...image import to_bytes, is_accepted_format
  10. class Qwen_QVQ_72B(AsyncGeneratorProvider, ProviderModelMixin):
  11. url = "https://qwen-qvq-72b-preview.hf.space"
  12. api_endpoint = "/gradio_api/call/generate"
  13. working = True
  14. default_model = "qwen-qvq-72b-preview"
  15. models = [default_model]
  16. model_aliases = {"qvq-72b": default_model}
  17. vision_models = models
  18. @classmethod
  19. async def create_async_generator(
  20. cls, model: str, messages: Messages,
  21. images: ImagesType = None,
  22. api_key: str = None,
  23. proxy: str = None,
  24. **kwargs
  25. ) -> AsyncResult:
  26. headers = {
  27. "Accept": "application/json",
  28. }
  29. if api_key is not None:
  30. headers["Authorization"] = f"Bearer {api_key}"
  31. async with ClientSession(headers=headers) as session:
  32. if images:
  33. data = FormData()
  34. data_bytes = to_bytes(images[0][0])
  35. data.add_field("files", data_bytes, content_type=is_accepted_format(data_bytes), filename=images[0][1])
  36. url = f"{cls.url}/gradio_api/upload?upload_id={get_random_string()}"
  37. async with session.post(url, data=data, proxy=proxy) as response:
  38. await raise_for_status(response)
  39. image = await response.json()
  40. data = {"data": [{"path": image[0]}, format_prompt(messages)]}
  41. else:
  42. data = {"data": [None, format_prompt(messages)]}
  43. async with session.post(f"{cls.url}{cls.api_endpoint}", json=data, proxy=proxy) as response:
  44. await raise_for_status(response)
  45. event_id = (await response.json()).get("event_id")
  46. async with session.get(f"{cls.url}{cls.api_endpoint}/{event_id}") as event_response:
  47. await raise_for_status(event_response)
  48. event = None
  49. text_position = 0
  50. async for chunk in event_response.content:
  51. if chunk.startswith(b"event: "):
  52. event = chunk[7:].decode(errors="replace").strip()
  53. if chunk.startswith(b"data: "):
  54. if event == "error":
  55. raise ResponseError(f"GPU token limit exceeded: {chunk.decode(errors='replace')}")
  56. if event in ("complete", "generating"):
  57. try:
  58. data = json.loads(chunk[6:])
  59. except (json.JSONDecodeError, KeyError, TypeError) as e:
  60. raise RuntimeError(f"Failed to read response: {chunk.decode(errors='replace')}", e)
  61. if event == "generating":
  62. if isinstance(data[0], str):
  63. yield data[0][text_position:]
  64. text_position = len(data[0])
  65. else:
  66. break