har_file.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. from __future__ import annotations
  2. import base64
  3. import json
  4. import os
  5. import re
  6. import time
  7. import uuid
  8. import random
  9. from urllib.parse import unquote
  10. from copy import deepcopy
  11. from .crypt import decrypt, encrypt
  12. from ...requests import StreamSession
  13. from ...cookies import get_cookies_dir
  14. from ...errors import NoValidHarFileError
  15. from ... import debug
  16. arkose_url = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
  17. backend_url = "https://chatgpt.com/backend-api/conversation"
  18. backend_anon_url = "https://chatgpt.com/backend-anon/conversation"
  19. start_url = "https://chatgpt.com/"
  20. conversation_url = "https://chatgpt.com/c/"
  21. class RequestConfig:
  22. cookies: dict = None
  23. headers: dict = None
  24. access_token: str = None
  25. proof_token: list = None
  26. turnstile_token: str = None
  27. arkose_request: arkReq = None
  28. arkose_token: str = None
  29. data_build: str = "prod-db8e51e8414e068257091cf5003a62d3d4ee6ed0"
  30. class arkReq:
  31. def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
  32. self.arkURL = arkURL
  33. self.arkBx = arkBx
  34. self.arkHeader = arkHeader
  35. self.arkBody = arkBody
  36. self.arkCookies = arkCookies
  37. self.userAgent = userAgent
  38. def get_har_files():
  39. if not os.access(get_cookies_dir(), os.R_OK):
  40. raise NoValidHarFileError("har_and_cookies dir is not readable")
  41. harPath = []
  42. for root, _, files in os.walk(get_cookies_dir()):
  43. for file in files:
  44. if file.endswith(".har"):
  45. harPath.append(os.path.join(root, file))
  46. break
  47. if not harPath:
  48. raise NoValidHarFileError("No .har file found")
  49. harPath.sort(key=lambda x: os.path.getmtime(x))
  50. return harPath
  51. def readHAR(request_config: RequestConfig):
  52. for path in get_har_files():
  53. with open(path, 'rb') as file:
  54. try:
  55. harFile = json.loads(file.read())
  56. except json.JSONDecodeError:
  57. # Error: not a HAR file!
  58. continue
  59. for v in harFile['log']['entries']:
  60. v_headers = get_headers(v)
  61. if arkose_url == v['request']['url']:
  62. request_config.arkose_request = parseHAREntry(v)
  63. elif v['request']['url'].startswith(start_url):
  64. try:
  65. match = re.search(r'"accessToken":"(.*?)"', v["response"]["content"]["text"])
  66. if match:
  67. request_config.access_token = match.group(1)
  68. except KeyError:
  69. pass
  70. try:
  71. if "openai-sentinel-proof-token" in v_headers:
  72. request_config.headers = v_headers
  73. request_config.proof_token = json.loads(base64.b64decode(
  74. v_headers["openai-sentinel-proof-token"].split("gAAAAAB", 1)[-1].encode()
  75. ).decode())
  76. if "openai-sentinel-turnstile-token" in v_headers:
  77. request_config.turnstile_token = v_headers["openai-sentinel-turnstile-token"]
  78. if "authorization" in v_headers:
  79. request_config.access_token = v_headers["authorization"].split(" ")[1]
  80. request_config.cookies = {c['name']: c['value'] for c in v['request']['cookies']}
  81. except Exception as e:
  82. debug.log(f"Error on read headers: {e}")
  83. def get_headers(entry) -> dict:
  84. return {h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')}
  85. def parseHAREntry(entry) -> arkReq:
  86. tmpArk = arkReq(
  87. arkURL=entry['request']['url'],
  88. arkBx="",
  89. arkHeader=get_headers(entry),
  90. arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']},
  91. arkCookies={c['name']: c['value'] for c in entry['request']['cookies']},
  92. userAgent=""
  93. )
  94. tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '')
  95. bda = tmpArk.arkBody["bda"]
  96. bw = tmpArk.arkHeader['x-ark-esync-value']
  97. tmpArk.arkBx = decrypt(bda, tmpArk.userAgent + bw)
  98. return tmpArk
  99. def genArkReq(chatArk: arkReq) -> arkReq:
  100. tmpArk: arkReq = deepcopy(chatArk)
  101. if tmpArk is None or not tmpArk.arkBody or not tmpArk.arkHeader:
  102. raise RuntimeError("The .har file is not valid")
  103. bda, bw = getBDA(tmpArk)
  104. tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode()
  105. tmpArk.arkBody['rnd'] = str(random.random())
  106. tmpArk.arkHeader['x-ark-esync-value'] = bw
  107. return tmpArk
  108. async def sendRequest(tmpArk: arkReq, proxy: str = None) -> str:
  109. async with StreamSession(headers=tmpArk.arkHeader, cookies=tmpArk.arkCookies, proxies={"https": proxy}) as session:
  110. async with session.post(tmpArk.arkURL, data=tmpArk.arkBody) as response:
  111. data = await response.json()
  112. arkose = data.get("token")
  113. if "sup=1|rid=" not in arkose:
  114. return RuntimeError("No valid arkose token generated")
  115. return arkose
  116. def getBDA(arkReq: arkReq):
  117. bx = arkReq.arkBx
  118. bx = re.sub(r'"key":"n","value":"\S*?"', f'"key":"n","value":"{getN()}"', bx)
  119. oldUUID_search = re.search(r'"key":"4b4b269e68","value":"(\S*?)"', bx)
  120. if oldUUID_search:
  121. oldUUID = oldUUID_search.group(1)
  122. newUUID = str(uuid.uuid4())
  123. bx = bx.replace(oldUUID, newUUID)
  124. bw = getBw(getBt())
  125. encrypted_bx = encrypt(bx, arkReq.userAgent + bw)
  126. return encrypted_bx, bw
  127. def getBt() -> int:
  128. return int(time.time())
  129. def getBw(bt: int) -> str:
  130. return str(bt - (bt % 21600))
  131. def getN() -> str:
  132. timestamp = str(int(time.time()))
  133. return base64.b64encode(timestamp.encode()).decode()
  134. async def get_request_config(request_config: RequestConfig, proxy: str) -> RequestConfig:
  135. readHAR(request_config)
  136. if request_config.arkose_request is not None:
  137. request_config.arkose_token = await sendRequest(genArkReq(request_config.arkose_request), proxy)
  138. if request_config.proof_token is None:
  139. raise NoValidHarFileError("No proof_token found in .har files")
  140. return request_config