cookies.py 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325
  1. import base64
  2. import collections
  3. import contextlib
  4. import http.cookiejar
  5. import http.cookies
  6. import io
  7. import json
  8. import os
  9. import re
  10. import shutil
  11. import struct
  12. import subprocess
  13. import sys
  14. import tempfile
  15. import time
  16. import urllib.request
  17. from datetime import datetime, timedelta, timezone
  18. from enum import Enum, auto
  19. from hashlib import pbkdf2_hmac
  20. from .aes import (
  21. aes_cbc_decrypt_bytes,
  22. aes_gcm_decrypt_and_verify_bytes,
  23. unpad_pkcs7,
  24. )
  25. from .compat import functools
  26. from .dependencies import (
  27. _SECRETSTORAGE_UNAVAILABLE_REASON,
  28. secretstorage,
  29. sqlite3,
  30. )
  31. from .minicurses import MultilinePrinter, QuietMultilinePrinter
  32. from .utils import (
  33. Popen,
  34. error_to_str,
  35. expand_path,
  36. is_path_like,
  37. sanitize_url,
  38. str_or_none,
  39. try_call,
  40. write_string,
  41. )
  42. from .utils._utils import _YDLLogger
  43. from .utils.networking import normalize_url
  44. CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
  45. SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
  46. class YDLLogger(_YDLLogger):
  47. def warning(self, message, only_once=False): # compat
  48. return super().warning(message, once=only_once)
  49. class ProgressBar(MultilinePrinter):
  50. _DELAY, _timer = 0.1, 0
  51. def print(self, message):
  52. if time.time() - self._timer > self._DELAY:
  53. self.print_at_line(f'[Cookies] {message}', 0)
  54. self._timer = time.time()
  55. def progress_bar(self):
  56. """Return a context manager with a print method. (Optional)"""
  57. # Do not print to files/pipes, loggers, or when --no-progress is used
  58. if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
  59. return
  60. file = self._ydl._out_files.error
  61. try:
  62. if not file.isatty():
  63. return
  64. except BaseException:
  65. return
  66. return self.ProgressBar(file, preserve_output=False)
  67. def _create_progress_bar(logger):
  68. if hasattr(logger, 'progress_bar'):
  69. printer = logger.progress_bar()
  70. if printer:
  71. return printer
  72. printer = QuietMultilinePrinter()
  73. printer.print = lambda _: None
  74. return printer
  75. def load_cookies(cookie_file, browser_specification, ydl):
  76. cookie_jars = []
  77. if browser_specification is not None:
  78. browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
  79. cookie_jars.append(
  80. extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
  81. if cookie_file is not None:
  82. is_filename = is_path_like(cookie_file)
  83. if is_filename:
  84. cookie_file = expand_path(cookie_file)
  85. jar = YoutubeDLCookieJar(cookie_file)
  86. if not is_filename or os.access(cookie_file, os.R_OK):
  87. jar.load()
  88. cookie_jars.append(jar)
  89. return _merge_cookie_jars(cookie_jars)
  90. def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
  91. if browser_name == 'firefox':
  92. return _extract_firefox_cookies(profile, container, logger)
  93. elif browser_name == 'safari':
  94. return _extract_safari_cookies(profile, logger)
  95. elif browser_name in CHROMIUM_BASED_BROWSERS:
  96. return _extract_chrome_cookies(browser_name, profile, keyring, logger)
  97. else:
  98. raise ValueError(f'unknown browser: {browser_name}')
  99. def _extract_firefox_cookies(profile, container, logger):
  100. logger.info('Extracting cookies from firefox')
  101. if not sqlite3:
  102. logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
  103. 'Please use a python interpreter compiled with sqlite3 support')
  104. return YoutubeDLCookieJar()
  105. if profile is None:
  106. search_root = _firefox_browser_dir()
  107. elif _is_path(profile):
  108. search_root = profile
  109. else:
  110. search_root = os.path.join(_firefox_browser_dir(), profile)
  111. cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
  112. if cookie_database_path is None:
  113. raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
  114. logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
  115. container_id = None
  116. if container not in (None, 'none'):
  117. containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
  118. if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
  119. raise FileNotFoundError(f'could not read containers.json in {search_root}')
  120. with open(containers_path, encoding='utf8') as containers:
  121. identities = json.load(containers).get('identities', [])
  122. container_id = next((context.get('userContextId') for context in identities if container in (
  123. context.get('name'),
  124. try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
  125. )), None)
  126. if not isinstance(container_id, int):
  127. raise ValueError(f'could not find firefox container "{container}" in containers.json')
  128. with tempfile.TemporaryDirectory(prefix='hypervideo_dl') as tmpdir:
  129. cursor = None
  130. try:
  131. cursor = _open_database_copy(cookie_database_path, tmpdir)
  132. if isinstance(container_id, int):
  133. logger.debug(
  134. f'Only loading cookies from firefox container "{container}", ID {container_id}')
  135. cursor.execute(
  136. 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
  137. (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
  138. elif container == 'none':
  139. logger.debug('Only loading cookies not belonging to any container')
  140. cursor.execute(
  141. 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
  142. else:
  143. cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
  144. jar = YoutubeDLCookieJar()
  145. with _create_progress_bar(logger) as progress_bar:
  146. table = cursor.fetchall()
  147. total_cookie_count = len(table)
  148. for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
  149. progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
  150. cookie = http.cookiejar.Cookie(
  151. version=0, name=name, value=value, port=None, port_specified=False,
  152. domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
  153. path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
  154. comment=None, comment_url=None, rest={})
  155. jar.set_cookie(cookie)
  156. logger.info(f'Extracted {len(jar)} cookies from firefox')
  157. return jar
  158. finally:
  159. if cursor is not None:
  160. cursor.connection.close()
  161. def _firefox_browser_dir():
  162. if sys.platform in ('cygwin', 'win32'):
  163. return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
  164. elif sys.platform == 'darwin':
  165. return os.path.expanduser('~/Library/Application Support/Firefox')
  166. return os.path.expanduser('~/.mozilla/firefox')
  167. def _get_chromium_based_browser_settings(browser_name):
  168. # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
  169. if sys.platform in ('cygwin', 'win32'):
  170. appdata_local = os.path.expandvars('%LOCALAPPDATA%')
  171. appdata_roaming = os.path.expandvars('%APPDATA%')
  172. browser_dir = {
  173. 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
  174. 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
  175. 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
  176. 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
  177. 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
  178. 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
  179. }[browser_name]
  180. elif sys.platform == 'darwin':
  181. appdata = os.path.expanduser('~/Library/Application Support')
  182. browser_dir = {
  183. 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
  184. 'chrome': os.path.join(appdata, 'Google/Chrome'),
  185. 'chromium': os.path.join(appdata, 'Chromium'),
  186. 'edge': os.path.join(appdata, 'Microsoft Edge'),
  187. 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
  188. 'vivaldi': os.path.join(appdata, 'Vivaldi'),
  189. }[browser_name]
  190. else:
  191. config = _config_home()
  192. browser_dir = {
  193. 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
  194. 'chrome': os.path.join(config, 'google-chrome'),
  195. 'chromium': os.path.join(config, 'chromium'),
  196. 'edge': os.path.join(config, 'microsoft-edge'),
  197. 'opera': os.path.join(config, 'opera'),
  198. 'vivaldi': os.path.join(config, 'vivaldi'),
  199. }[browser_name]
  200. # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
  201. # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
  202. keyring_name = {
  203. 'brave': 'Brave',
  204. 'chrome': 'Chrome',
  205. 'chromium': 'Chromium',
  206. 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
  207. 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
  208. 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
  209. }[browser_name]
  210. browsers_without_profiles = {'opera'}
  211. return {
  212. 'browser_dir': browser_dir,
  213. 'keyring_name': keyring_name,
  214. 'supports_profiles': browser_name not in browsers_without_profiles
  215. }
  216. def _extract_chrome_cookies(browser_name, profile, keyring, logger):
  217. logger.info(f'Extracting cookies from {browser_name}')
  218. if not sqlite3:
  219. logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
  220. 'Please use a python interpreter compiled with sqlite3 support')
  221. return YoutubeDLCookieJar()
  222. config = _get_chromium_based_browser_settings(browser_name)
  223. if profile is None:
  224. search_root = config['browser_dir']
  225. elif _is_path(profile):
  226. search_root = profile
  227. config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
  228. else:
  229. if config['supports_profiles']:
  230. search_root = os.path.join(config['browser_dir'], profile)
  231. else:
  232. logger.error(f'{browser_name} does not support profiles')
  233. search_root = config['browser_dir']
  234. cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
  235. if cookie_database_path is None:
  236. raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
  237. logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
  238. decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
  239. with tempfile.TemporaryDirectory(prefix='hypervideo_dl') as tmpdir:
  240. cursor = None
  241. try:
  242. cursor = _open_database_copy(cookie_database_path, tmpdir)
  243. cursor.connection.text_factory = bytes
  244. column_names = _get_column_names(cursor, 'cookies')
  245. secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
  246. cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
  247. jar = YoutubeDLCookieJar()
  248. failed_cookies = 0
  249. unencrypted_cookies = 0
  250. with _create_progress_bar(logger) as progress_bar:
  251. table = cursor.fetchall()
  252. total_cookie_count = len(table)
  253. for i, line in enumerate(table):
  254. progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
  255. is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
  256. if not cookie:
  257. failed_cookies += 1
  258. continue
  259. elif not is_encrypted:
  260. unencrypted_cookies += 1
  261. jar.set_cookie(cookie)
  262. if failed_cookies > 0:
  263. failed_message = f' ({failed_cookies} could not be decrypted)'
  264. else:
  265. failed_message = ''
  266. logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
  267. counts = decryptor._cookie_counts.copy()
  268. counts['unencrypted'] = unencrypted_cookies
  269. logger.debug(f'cookie version breakdown: {counts}')
  270. return jar
  271. finally:
  272. if cursor is not None:
  273. cursor.connection.close()
  274. def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
  275. host_key = host_key.decode()
  276. name = name.decode()
  277. value = value.decode()
  278. path = path.decode()
  279. is_encrypted = not value and encrypted_value
  280. if is_encrypted:
  281. value = decryptor.decrypt(encrypted_value)
  282. if value is None:
  283. return is_encrypted, None
  284. return is_encrypted, http.cookiejar.Cookie(
  285. version=0, name=name, value=value, port=None, port_specified=False,
  286. domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
  287. path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
  288. comment=None, comment_url=None, rest={})
  289. class ChromeCookieDecryptor:
  290. """
  291. Overview:
  292. Linux:
  293. - cookies are either v10 or v11
  294. - v10: AES-CBC encrypted with a fixed key
  295. - also attempts empty password if decryption fails
  296. - v11: AES-CBC encrypted with an OS protected key (keyring)
  297. - also attempts empty password if decryption fails
  298. - v11 keys can be stored in various places depending on the activate desktop environment [2]
  299. Mac:
  300. - cookies are either v10 or not v10
  301. - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
  302. - not v10: 'old data' stored as plaintext
  303. Windows:
  304. - cookies are either v10 or not v10
  305. - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
  306. - not v10: encrypted with DPAPI
  307. Sources:
  308. - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
  309. - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
  310. - KeyStorageLinux::CreateService
  311. """
  312. _cookie_counts = {}
  313. def decrypt(self, encrypted_value):
  314. raise NotImplementedError('Must be implemented by sub classes')
  315. def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
  316. if sys.platform == 'darwin':
  317. return MacChromeCookieDecryptor(browser_keyring_name, logger)
  318. elif sys.platform in ('win32', 'cygwin'):
  319. return WindowsChromeCookieDecryptor(browser_root, logger)
  320. return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
  321. class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
  322. def __init__(self, browser_keyring_name, logger, *, keyring=None):
  323. self._logger = logger
  324. self._v10_key = self.derive_key(b'peanuts')
  325. self._empty_key = self.derive_key(b'')
  326. self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
  327. self._browser_keyring_name = browser_keyring_name
  328. self._keyring = keyring
  329. @functools.cached_property
  330. def _v11_key(self):
  331. password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
  332. return None if password is None else self.derive_key(password)
  333. @staticmethod
  334. def derive_key(password):
  335. # values from
  336. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
  337. return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
  338. def decrypt(self, encrypted_value):
  339. """
  340. following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
  341. with an empty password. The failure detection is not the same as what chromium uses so the
  342. results won't be perfect
  343. References:
  344. - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
  345. - a bugfix to try an empty password as a fallback
  346. """
  347. version = encrypted_value[:3]
  348. ciphertext = encrypted_value[3:]
  349. if version == b'v10':
  350. self._cookie_counts['v10'] += 1
  351. return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
  352. elif version == b'v11':
  353. self._cookie_counts['v11'] += 1
  354. if self._v11_key is None:
  355. self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
  356. return None
  357. return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
  358. else:
  359. self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
  360. self._cookie_counts['other'] += 1
  361. return None
  362. class MacChromeCookieDecryptor(ChromeCookieDecryptor):
  363. def __init__(self, browser_keyring_name, logger):
  364. self._logger = logger
  365. password = _get_mac_keyring_password(browser_keyring_name, logger)
  366. self._v10_key = None if password is None else self.derive_key(password)
  367. self._cookie_counts = {'v10': 0, 'other': 0}
  368. @staticmethod
  369. def derive_key(password):
  370. # values from
  371. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
  372. return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
  373. def decrypt(self, encrypted_value):
  374. version = encrypted_value[:3]
  375. ciphertext = encrypted_value[3:]
  376. if version == b'v10':
  377. self._cookie_counts['v10'] += 1
  378. if self._v10_key is None:
  379. self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
  380. return None
  381. return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
  382. else:
  383. self._cookie_counts['other'] += 1
  384. # other prefixes are considered 'old data' which were stored as plaintext
  385. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
  386. return encrypted_value
  387. class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
  388. def __init__(self, browser_root, logger):
  389. self._logger = logger
  390. self._v10_key = _get_windows_v10_key(browser_root, logger)
  391. self._cookie_counts = {'v10': 0, 'other': 0}
  392. def decrypt(self, encrypted_value):
  393. version = encrypted_value[:3]
  394. ciphertext = encrypted_value[3:]
  395. if version == b'v10':
  396. self._cookie_counts['v10'] += 1
  397. if self._v10_key is None:
  398. self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
  399. return None
  400. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
  401. # kNonceLength
  402. nonce_length = 96 // 8
  403. # boringssl
  404. # EVP_AEAD_AES_GCM_TAG_LEN
  405. authentication_tag_length = 16
  406. raw_ciphertext = ciphertext
  407. nonce = raw_ciphertext[:nonce_length]
  408. ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
  409. authentication_tag = raw_ciphertext[-authentication_tag_length:]
  410. return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
  411. else:
  412. self._cookie_counts['other'] += 1
  413. # any other prefix means the data is DPAPI encrypted
  414. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
  415. return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
  416. def _extract_safari_cookies(profile, logger):
  417. if sys.platform != 'darwin':
  418. raise ValueError(f'unsupported platform: {sys.platform}')
  419. if profile:
  420. cookies_path = os.path.expanduser(profile)
  421. if not os.path.isfile(cookies_path):
  422. raise FileNotFoundError('custom safari cookies database not found')
  423. else:
  424. cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
  425. if not os.path.isfile(cookies_path):
  426. logger.debug('Trying secondary cookie location')
  427. cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
  428. if not os.path.isfile(cookies_path):
  429. raise FileNotFoundError('could not find safari cookies database')
  430. with open(cookies_path, 'rb') as f:
  431. cookies_data = f.read()
  432. jar = parse_safari_cookies(cookies_data, logger=logger)
  433. logger.info(f'Extracted {len(jar)} cookies from safari')
  434. return jar
  435. class ParserError(Exception):
  436. pass
  437. class DataParser:
  438. def __init__(self, data, logger):
  439. self._data = data
  440. self.cursor = 0
  441. self._logger = logger
  442. def read_bytes(self, num_bytes):
  443. if num_bytes < 0:
  444. raise ParserError(f'invalid read of {num_bytes} bytes')
  445. end = self.cursor + num_bytes
  446. if end > len(self._data):
  447. raise ParserError('reached end of input')
  448. data = self._data[self.cursor:end]
  449. self.cursor = end
  450. return data
  451. def expect_bytes(self, expected_value, message):
  452. value = self.read_bytes(len(expected_value))
  453. if value != expected_value:
  454. raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
  455. def read_uint(self, big_endian=False):
  456. data_format = '>I' if big_endian else '<I'
  457. return struct.unpack(data_format, self.read_bytes(4))[0]
  458. def read_double(self, big_endian=False):
  459. data_format = '>d' if big_endian else '<d'
  460. return struct.unpack(data_format, self.read_bytes(8))[0]
  461. def read_cstring(self):
  462. buffer = []
  463. while True:
  464. c = self.read_bytes(1)
  465. if c == b'\x00':
  466. return b''.join(buffer).decode()
  467. else:
  468. buffer.append(c)
  469. def skip(self, num_bytes, description='unknown'):
  470. if num_bytes > 0:
  471. self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
  472. elif num_bytes < 0:
  473. raise ParserError(f'invalid skip of {num_bytes} bytes')
  474. def skip_to(self, offset, description='unknown'):
  475. self.skip(offset - self.cursor, description)
  476. def skip_to_end(self, description='unknown'):
  477. self.skip_to(len(self._data), description)
  478. def _mac_absolute_time_to_posix(timestamp):
  479. return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
  480. def _parse_safari_cookies_header(data, logger):
  481. p = DataParser(data, logger)
  482. p.expect_bytes(b'cook', 'database signature')
  483. number_of_pages = p.read_uint(big_endian=True)
  484. page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
  485. return page_sizes, p.cursor
  486. def _parse_safari_cookies_page(data, jar, logger):
  487. p = DataParser(data, logger)
  488. p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
  489. number_of_cookies = p.read_uint()
  490. record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
  491. if number_of_cookies == 0:
  492. logger.debug(f'a cookies page of size {len(data)} has no cookies')
  493. return
  494. p.skip_to(record_offsets[0], 'unknown page header field')
  495. with _create_progress_bar(logger) as progress_bar:
  496. for i, record_offset in enumerate(record_offsets):
  497. progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
  498. p.skip_to(record_offset, 'space between records')
  499. record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
  500. p.read_bytes(record_length)
  501. p.skip_to_end('space in between pages')
  502. def _parse_safari_cookies_record(data, jar, logger):
  503. p = DataParser(data, logger)
  504. record_size = p.read_uint()
  505. p.skip(4, 'unknown record field 1')
  506. flags = p.read_uint()
  507. is_secure = bool(flags & 0x0001)
  508. p.skip(4, 'unknown record field 2')
  509. domain_offset = p.read_uint()
  510. name_offset = p.read_uint()
  511. path_offset = p.read_uint()
  512. value_offset = p.read_uint()
  513. p.skip(8, 'unknown record field 3')
  514. expiration_date = _mac_absolute_time_to_posix(p.read_double())
  515. _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
  516. try:
  517. p.skip_to(domain_offset)
  518. domain = p.read_cstring()
  519. p.skip_to(name_offset)
  520. name = p.read_cstring()
  521. p.skip_to(path_offset)
  522. path = p.read_cstring()
  523. p.skip_to(value_offset)
  524. value = p.read_cstring()
  525. except UnicodeDecodeError:
  526. logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
  527. return record_size
  528. p.skip_to(record_size, 'space at the end of the record')
  529. cookie = http.cookiejar.Cookie(
  530. version=0, name=name, value=value, port=None, port_specified=False,
  531. domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
  532. path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
  533. comment=None, comment_url=None, rest={})
  534. jar.set_cookie(cookie)
  535. return record_size
  536. def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
  537. """
  538. References:
  539. - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
  540. - this data appears to be out of date but the important parts of the database structure is the same
  541. - there are a few bytes here and there which are skipped during parsing
  542. """
  543. if jar is None:
  544. jar = YoutubeDLCookieJar()
  545. page_sizes, body_start = _parse_safari_cookies_header(data, logger)
  546. p = DataParser(data[body_start:], logger)
  547. for page_size in page_sizes:
  548. _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
  549. p.skip_to_end('footer')
  550. return jar
  551. class _LinuxDesktopEnvironment(Enum):
  552. """
  553. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
  554. DesktopEnvironment
  555. """
  556. OTHER = auto()
  557. CINNAMON = auto()
  558. DEEPIN = auto()
  559. GNOME = auto()
  560. KDE3 = auto()
  561. KDE4 = auto()
  562. KDE5 = auto()
  563. KDE6 = auto()
  564. PANTHEON = auto()
  565. UKUI = auto()
  566. UNITY = auto()
  567. XFCE = auto()
  568. LXQT = auto()
  569. class _LinuxKeyring(Enum):
  570. """
  571. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
  572. SelectedLinuxBackend
  573. """
  574. KWALLET = auto() # KDE4
  575. KWALLET5 = auto()
  576. KWALLET6 = auto()
  577. GNOMEKEYRING = auto()
  578. BASICTEXT = auto()
  579. SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
  580. def _get_linux_desktop_environment(env, logger):
  581. """
  582. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
  583. GetDesktopEnvironment
  584. """
  585. xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
  586. desktop_session = env.get('DESKTOP_SESSION', None)
  587. if xdg_current_desktop is not None:
  588. xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
  589. if xdg_current_desktop == 'Unity':
  590. if desktop_session is not None and 'gnome-fallback' in desktop_session:
  591. return _LinuxDesktopEnvironment.GNOME
  592. else:
  593. return _LinuxDesktopEnvironment.UNITY
  594. elif xdg_current_desktop == 'Deepin':
  595. return _LinuxDesktopEnvironment.DEEPIN
  596. elif xdg_current_desktop == 'GNOME':
  597. return _LinuxDesktopEnvironment.GNOME
  598. elif xdg_current_desktop == 'X-Cinnamon':
  599. return _LinuxDesktopEnvironment.CINNAMON
  600. elif xdg_current_desktop == 'KDE':
  601. kde_version = env.get('KDE_SESSION_VERSION', None)
  602. if kde_version == '5':
  603. return _LinuxDesktopEnvironment.KDE5
  604. elif kde_version == '6':
  605. return _LinuxDesktopEnvironment.KDE6
  606. elif kde_version == '4':
  607. return _LinuxDesktopEnvironment.KDE4
  608. else:
  609. logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
  610. return _LinuxDesktopEnvironment.KDE4
  611. elif xdg_current_desktop == 'Pantheon':
  612. return _LinuxDesktopEnvironment.PANTHEON
  613. elif xdg_current_desktop == 'XFCE':
  614. return _LinuxDesktopEnvironment.XFCE
  615. elif xdg_current_desktop == 'UKUI':
  616. return _LinuxDesktopEnvironment.UKUI
  617. elif xdg_current_desktop == 'LXQt':
  618. return _LinuxDesktopEnvironment.LXQT
  619. else:
  620. logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
  621. elif desktop_session is not None:
  622. if desktop_session == 'deepin':
  623. return _LinuxDesktopEnvironment.DEEPIN
  624. elif desktop_session in ('mate', 'gnome'):
  625. return _LinuxDesktopEnvironment.GNOME
  626. elif desktop_session in ('kde4', 'kde-plasma'):
  627. return _LinuxDesktopEnvironment.KDE4
  628. elif desktop_session == 'kde':
  629. if 'KDE_SESSION_VERSION' in env:
  630. return _LinuxDesktopEnvironment.KDE4
  631. else:
  632. return _LinuxDesktopEnvironment.KDE3
  633. elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
  634. return _LinuxDesktopEnvironment.XFCE
  635. elif desktop_session == 'ukui':
  636. return _LinuxDesktopEnvironment.UKUI
  637. else:
  638. logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
  639. else:
  640. if 'GNOME_DESKTOP_SESSION_ID' in env:
  641. return _LinuxDesktopEnvironment.GNOME
  642. elif 'KDE_FULL_SESSION' in env:
  643. if 'KDE_SESSION_VERSION' in env:
  644. return _LinuxDesktopEnvironment.KDE4
  645. else:
  646. return _LinuxDesktopEnvironment.KDE3
  647. return _LinuxDesktopEnvironment.OTHER
  648. def _choose_linux_keyring(logger):
  649. """
  650. SelectBackend in [1]
  651. There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
  652. `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
  653. does not appear to be called anywhere other than in tests, so the user would have to create this file manually
  654. and so would be aware enough to tell hypervideo to use the BASIC_TEXT keyring.
  655. References:
  656. - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
  657. """
  658. desktop_environment = _get_linux_desktop_environment(os.environ, logger)
  659. logger.debug(f'detected desktop environment: {desktop_environment.name}')
  660. if desktop_environment == _LinuxDesktopEnvironment.KDE4:
  661. linux_keyring = _LinuxKeyring.KWALLET
  662. elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
  663. linux_keyring = _LinuxKeyring.KWALLET5
  664. elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
  665. linux_keyring = _LinuxKeyring.KWALLET6
  666. elif desktop_environment in (
  667. _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
  668. ):
  669. linux_keyring = _LinuxKeyring.BASICTEXT
  670. else:
  671. linux_keyring = _LinuxKeyring.GNOMEKEYRING
  672. return linux_keyring
  673. def _get_kwallet_network_wallet(keyring, logger):
  674. """ The name of the wallet used to store network passwords.
  675. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
  676. KWalletDBus::NetworkWallet
  677. which does a dbus call to the following function:
  678. https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
  679. Wallet::NetworkWallet
  680. """
  681. default_wallet = 'kdewallet'
  682. try:
  683. if keyring == _LinuxKeyring.KWALLET:
  684. service_name = 'org.kde.kwalletd'
  685. wallet_path = '/modules/kwalletd'
  686. elif keyring == _LinuxKeyring.KWALLET5:
  687. service_name = 'org.kde.kwalletd5'
  688. wallet_path = '/modules/kwalletd5'
  689. elif keyring == _LinuxKeyring.KWALLET6:
  690. service_name = 'org.kde.kwalletd6'
  691. wallet_path = '/modules/kwalletd6'
  692. else:
  693. raise ValueError(keyring)
  694. stdout, _, returncode = Popen.run([
  695. 'dbus-send', '--session', '--print-reply=literal',
  696. f'--dest={service_name}',
  697. wallet_path,
  698. 'org.kde.KWallet.networkWallet'
  699. ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
  700. if returncode:
  701. logger.warning('failed to read NetworkWallet')
  702. return default_wallet
  703. else:
  704. logger.debug(f'NetworkWallet = "{stdout.strip()}"')
  705. return stdout.strip()
  706. except Exception as e:
  707. logger.warning(f'exception while obtaining NetworkWallet: {e}')
  708. return default_wallet
  709. def _get_kwallet_password(browser_keyring_name, keyring, logger):
  710. logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
  711. if shutil.which('kwallet-query') is None:
  712. logger.error('kwallet-query command not found. KWallet and kwallet-query '
  713. 'must be installed to read from KWallet. kwallet-query should be'
  714. 'included in the kwallet package for your distribution')
  715. return b''
  716. network_wallet = _get_kwallet_network_wallet(keyring, logger)
  717. try:
  718. stdout, _, returncode = Popen.run([
  719. 'kwallet-query',
  720. '--read-password', f'{browser_keyring_name} Safe Storage',
  721. '--folder', f'{browser_keyring_name} Keys',
  722. network_wallet
  723. ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
  724. if returncode:
  725. logger.error(f'kwallet-query failed with return code {returncode}. '
  726. 'Please consult the kwallet-query man page for details')
  727. return b''
  728. else:
  729. if stdout.lower().startswith(b'failed to read'):
  730. logger.debug('failed to read password from kwallet. Using empty string instead')
  731. # this sometimes occurs in KDE because chrome does not check hasEntry and instead
  732. # just tries to read the value (which kwallet returns "") whereas kwallet-query
  733. # checks hasEntry. To verify this:
  734. # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
  735. # while starting chrome.
  736. # this was identified as a bug later and fixed in
  737. # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
  738. # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
  739. return b''
  740. else:
  741. logger.debug('password found')
  742. return stdout.rstrip(b'\n')
  743. except Exception as e:
  744. logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
  745. return b''
  746. def _get_gnome_keyring_password(browser_keyring_name, logger):
  747. if not secretstorage:
  748. logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
  749. return b''
  750. # the Gnome keyring does not seem to organise keys in the same way as KWallet,
  751. # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
  752. # and presumably searches for its key in the list. It appears that we must do the same.
  753. # https://github.com/jaraco/keyring/issues/556
  754. with contextlib.closing(secretstorage.dbus_init()) as con:
  755. col = secretstorage.get_default_collection(con)
  756. for item in col.get_all_items():
  757. if item.get_label() == f'{browser_keyring_name} Safe Storage':
  758. return item.get_secret()
  759. else:
  760. logger.error('failed to read from keyring')
  761. return b''
  762. def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
  763. # note: chrome/chromium can be run with the following flags to determine which keyring backend
  764. # it has chosen to use
  765. # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
  766. # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
  767. # will not be sufficient in all cases.
  768. keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
  769. logger.debug(f'Chosen keyring: {keyring.name}')
  770. if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
  771. return _get_kwallet_password(browser_keyring_name, keyring, logger)
  772. elif keyring == _LinuxKeyring.GNOMEKEYRING:
  773. return _get_gnome_keyring_password(browser_keyring_name, logger)
  774. elif keyring == _LinuxKeyring.BASICTEXT:
  775. # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
  776. return None
  777. assert False, f'Unknown keyring {keyring}'
  778. def _get_mac_keyring_password(browser_keyring_name, logger):
  779. logger.debug('using find-generic-password to obtain password from OSX keychain')
  780. try:
  781. stdout, _, returncode = Popen.run(
  782. ['security', 'find-generic-password',
  783. '-w', # write password to stdout
  784. '-a', browser_keyring_name, # match 'account'
  785. '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
  786. stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
  787. if returncode:
  788. logger.warning('find-generic-password failed')
  789. return None
  790. return stdout.rstrip(b'\n')
  791. except Exception as e:
  792. logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
  793. return None
  794. def _get_windows_v10_key(browser_root, logger):
  795. """
  796. References:
  797. - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
  798. """
  799. path = _find_most_recently_used_file(browser_root, 'Local State', logger)
  800. if path is None:
  801. logger.error('could not find local state file')
  802. return None
  803. logger.debug(f'Found local state file at "{path}"')
  804. with open(path, encoding='utf8') as f:
  805. data = json.load(f)
  806. try:
  807. # kOsCryptEncryptedKeyPrefName in [1]
  808. base64_key = data['os_crypt']['encrypted_key']
  809. except KeyError:
  810. logger.error('no encrypted key in Local State')
  811. return None
  812. encrypted_key = base64.b64decode(base64_key)
  813. # kDPAPIKeyPrefix in [1]
  814. prefix = b'DPAPI'
  815. if not encrypted_key.startswith(prefix):
  816. logger.error('invalid key')
  817. return None
  818. return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
  819. def pbkdf2_sha1(password, salt, iterations, key_length):
  820. return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
  821. def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
  822. for key in keys:
  823. plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
  824. try:
  825. return plaintext.decode()
  826. except UnicodeDecodeError:
  827. pass
  828. logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
  829. return None
  830. def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
  831. try:
  832. plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
  833. except ValueError:
  834. logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
  835. return None
  836. try:
  837. return plaintext.decode()
  838. except UnicodeDecodeError:
  839. logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
  840. return None
  841. def _decrypt_windows_dpapi(ciphertext, logger):
  842. """
  843. References:
  844. - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
  845. """
  846. import ctypes
  847. import ctypes.wintypes
  848. class DATA_BLOB(ctypes.Structure):
  849. _fields_ = [('cbData', ctypes.wintypes.DWORD),
  850. ('pbData', ctypes.POINTER(ctypes.c_char))]
  851. buffer = ctypes.create_string_buffer(ciphertext)
  852. blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
  853. blob_out = DATA_BLOB()
  854. ret = ctypes.windll.crypt32.CryptUnprotectData(
  855. ctypes.byref(blob_in), # pDataIn
  856. None, # ppszDataDescr: human readable description of pDataIn
  857. None, # pOptionalEntropy: salt?
  858. None, # pvReserved: must be NULL
  859. None, # pPromptStruct: information about prompts to display
  860. 0, # dwFlags
  861. ctypes.byref(blob_out) # pDataOut
  862. )
  863. if not ret:
  864. logger.warning('failed to decrypt with DPAPI', only_once=True)
  865. return None
  866. result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
  867. ctypes.windll.kernel32.LocalFree(blob_out.pbData)
  868. return result
  869. def _config_home():
  870. return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
  871. def _open_database_copy(database_path, tmpdir):
  872. # cannot open sqlite databases if they are already in use (e.g. by the browser)
  873. database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
  874. shutil.copy(database_path, database_copy_path)
  875. conn = sqlite3.connect(database_copy_path)
  876. return conn.cursor()
  877. def _get_column_names(cursor, table_name):
  878. table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
  879. return [row[1].decode() for row in table_info]
  880. def _find_most_recently_used_file(root, filename, logger):
  881. # if there are multiple browser profiles, take the most recently used one
  882. i, paths = 0, []
  883. with _create_progress_bar(logger) as progress_bar:
  884. for curr_root, dirs, files in os.walk(root):
  885. for file in files:
  886. i += 1
  887. progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
  888. if file == filename:
  889. paths.append(os.path.join(curr_root, file))
  890. return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
  891. def _merge_cookie_jars(jars):
  892. output_jar = YoutubeDLCookieJar()
  893. for jar in jars:
  894. for cookie in jar:
  895. output_jar.set_cookie(cookie)
  896. if jar.filename is not None:
  897. output_jar.filename = jar.filename
  898. return output_jar
  899. def _is_path(value):
  900. return os.path.sep in value
  901. def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
  902. if browser_name not in SUPPORTED_BROWSERS:
  903. raise ValueError(f'unsupported browser: "{browser_name}"')
  904. if keyring not in (None, *SUPPORTED_KEYRINGS):
  905. raise ValueError(f'unsupported keyring: "{keyring}"')
  906. if profile is not None and _is_path(expand_path(profile)):
  907. profile = expand_path(profile)
  908. return browser_name, profile, keyring, container
  909. class LenientSimpleCookie(http.cookies.SimpleCookie):
  910. """More lenient version of http.cookies.SimpleCookie"""
  911. # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
  912. # We use Morsel's legal key chars to avoid errors on setting values
  913. _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
  914. _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
  915. _RESERVED = {
  916. "expires",
  917. "path",
  918. "comment",
  919. "domain",
  920. "max-age",
  921. "secure",
  922. "httponly",
  923. "version",
  924. "samesite",
  925. }
  926. _FLAGS = {"secure", "httponly"}
  927. # Added 'bad' group to catch the remaining value
  928. _COOKIE_PATTERN = re.compile(r"""
  929. \s* # Optional whitespace at start of cookie
  930. (?P<key> # Start of group 'key'
  931. [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
  932. ) # End of group 'key'
  933. ( # Optional group: there may not be a value.
  934. \s*=\s* # Equal Sign
  935. ( # Start of potential value
  936. (?P<val> # Start of group 'val'
  937. "(?:[^\\"]|\\.)*" # Any doublequoted string
  938. | # or
  939. \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
  940. | # or
  941. [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
  942. ) # End of group 'val'
  943. | # or
  944. (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
  945. ) # End of potential value
  946. )? # End of optional value group
  947. \s* # Any number of spaces.
  948. (\s+|;|$) # Ending either at space, semicolon, or EOS.
  949. """, re.ASCII | re.VERBOSE)
  950. def load(self, data):
  951. # Workaround for https://github.com/hypervideo/hypervideo/issues/4776
  952. if not isinstance(data, str):
  953. return super().load(data)
  954. morsel = None
  955. for match in self._COOKIE_PATTERN.finditer(data):
  956. if match.group('bad'):
  957. morsel = None
  958. continue
  959. key, value = match.group('key', 'val')
  960. is_attribute = False
  961. if key.startswith('$'):
  962. key = key[1:]
  963. is_attribute = True
  964. lower_key = key.lower()
  965. if lower_key in self._RESERVED:
  966. if morsel is None:
  967. continue
  968. if value is None:
  969. if lower_key not in self._FLAGS:
  970. morsel = None
  971. continue
  972. value = True
  973. else:
  974. value, _ = self.value_decode(value)
  975. morsel[key] = value
  976. elif is_attribute:
  977. morsel = None
  978. elif value is not None:
  979. morsel = self.get(key, http.cookies.Morsel())
  980. real_value, coded_value = self.value_decode(value)
  981. morsel.set(key, real_value, coded_value)
  982. self[key] = morsel
  983. else:
  984. morsel = None
  985. class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
  986. """
  987. See [1] for cookie file format.
  988. 1. https://curl.haxx.se/docs/http-cookies.html
  989. """
  990. _HTTPONLY_PREFIX = '#HttpOnly_'
  991. _ENTRY_LEN = 7
  992. _HEADER = '''# Netscape HTTP Cookie File
  993. # This file is generated by hypervideo. Do not edit.
  994. '''
  995. _CookieFileEntry = collections.namedtuple(
  996. 'CookieFileEntry',
  997. ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
  998. def __init__(self, filename=None, *args, **kwargs):
  999. super().__init__(None, *args, **kwargs)
  1000. if is_path_like(filename):
  1001. filename = os.fspath(filename)
  1002. self.filename = filename
  1003. @staticmethod
  1004. def _true_or_false(cndn):
  1005. return 'TRUE' if cndn else 'FALSE'
  1006. @contextlib.contextmanager
  1007. def open(self, file, *, write=False):
  1008. if is_path_like(file):
  1009. with open(file, 'w' if write else 'r', encoding='utf-8') as f:
  1010. yield f
  1011. else:
  1012. if write:
  1013. file.truncate(0)
  1014. yield file
  1015. def _really_save(self, f, ignore_discard, ignore_expires):
  1016. now = time.time()
  1017. for cookie in self:
  1018. if (not ignore_discard and cookie.discard
  1019. or not ignore_expires and cookie.is_expired(now)):
  1020. continue
  1021. name, value = cookie.name, cookie.value
  1022. if value is None:
  1023. # cookies.txt regards 'Set-Cookie: foo' as a cookie
  1024. # with no name, whereas http.cookiejar regards it as a
  1025. # cookie with no value.
  1026. name, value = '', name
  1027. f.write('%s\n' % '\t'.join((
  1028. cookie.domain,
  1029. self._true_or_false(cookie.domain.startswith('.')),
  1030. cookie.path,
  1031. self._true_or_false(cookie.secure),
  1032. str_or_none(cookie.expires, default=''),
  1033. name, value
  1034. )))
  1035. def save(self, filename=None, ignore_discard=True, ignore_expires=True):
  1036. """
  1037. Save cookies to a file.
  1038. Code is taken from CPython 3.6
  1039. https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
  1040. if filename is None:
  1041. if self.filename is not None:
  1042. filename = self.filename
  1043. else:
  1044. raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
  1045. # Store session cookies with `expires` set to 0 instead of an empty string
  1046. for cookie in self:
  1047. if cookie.expires is None:
  1048. cookie.expires = 0
  1049. with self.open(filename, write=True) as f:
  1050. f.write(self._HEADER)
  1051. self._really_save(f, ignore_discard, ignore_expires)
  1052. def load(self, filename=None, ignore_discard=True, ignore_expires=True):
  1053. """Load cookies from a file."""
  1054. if filename is None:
  1055. if self.filename is not None:
  1056. filename = self.filename
  1057. else:
  1058. raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
  1059. def prepare_line(line):
  1060. if line.startswith(self._HTTPONLY_PREFIX):
  1061. line = line[len(self._HTTPONLY_PREFIX):]
  1062. # comments and empty lines are fine
  1063. if line.startswith('#') or not line.strip():
  1064. return line
  1065. cookie_list = line.split('\t')
  1066. if len(cookie_list) != self._ENTRY_LEN:
  1067. raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
  1068. cookie = self._CookieFileEntry(*cookie_list)
  1069. if cookie.expires_at and not cookie.expires_at.isdigit():
  1070. raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
  1071. return line
  1072. cf = io.StringIO()
  1073. with self.open(filename) as f:
  1074. for line in f:
  1075. try:
  1076. cf.write(prepare_line(line))
  1077. except http.cookiejar.LoadError as e:
  1078. if f'{line.strip()} '[0] in '[{"':
  1079. raise http.cookiejar.LoadError(
  1080. 'Cookies file must be Netscape formatted, not JSON. See '
  1081. 'https://github.com/hypervideo/hypervideo/wiki/FAQ#how-do-i-pass-cookies-to-hypervideo')
  1082. write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
  1083. continue
  1084. cf.seek(0)
  1085. self._really_load(cf, filename, ignore_discard, ignore_expires)
  1086. # Session cookies are denoted by either `expires` field set to
  1087. # an empty string or 0. MozillaCookieJar only recognizes the former
  1088. # (see [1]). So we need force the latter to be recognized as session
  1089. # cookies on our own.
  1090. # Session cookies may be important for cookies-based authentication,
  1091. # e.g. usually, when user does not check 'Remember me' check box while
  1092. # logging in on a site, some important cookies are stored as session
  1093. # cookies so that not recognizing them will result in failed login.
  1094. # 1. https://bugs.python.org/issue17164
  1095. for cookie in self:
  1096. # Treat `expires=0` cookies as session cookies
  1097. if cookie.expires == 0:
  1098. cookie.expires = None
  1099. cookie.discard = True
  1100. def get_cookie_header(self, url):
  1101. """Generate a Cookie HTTP header for a given url"""
  1102. cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
  1103. self.add_cookie_header(cookie_req)
  1104. return cookie_req.get_header('Cookie')
  1105. def get_cookies_for_url(self, url):
  1106. """Generate a list of Cookie objects for a given url"""
  1107. # Policy `_now` attribute must be set before calling `_cookies_for_request`
  1108. # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
  1109. self._policy._now = self._now = int(time.time())
  1110. return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
  1111. def clear(self, *args, **kwargs):
  1112. with contextlib.suppress(KeyError):
  1113. return super().clear(*args, **kwargs)